[CLN-list] Re: cln cvs, cl_low_div.cc and mulu32_64

Thu Dec 21 22:51:33 CET 2006

Hi!

Thank you, Sven, for sending a patch.

I'm talking this to the mailing list. (Since I'll start traveling in a 
couple of hours and come back next year.)

There seems to be some confusion about the CLN version you're having. 
You say that you're using CLN 1.1.13. But your patch looks more like it 
is for CLN-1.2 from CVS HEAD, not for the CLN-1.1 branch.

I've only had a quick glance at your patch. What is __arch64__ and why 
is __sparc64__ not defined?

Regards
  -richy.

-- 
Richard B. Kreckel
<http://www.ginac.de/~kreckel/>

Sven Verdoolaege wrote:

>On Thu, Dec 21, 2006 at 12:50:21AM +0100, Sven Verdoolaege wrote:
>  
>
>>I was trying to compile cln 1.1.13 on a sparc and I ran into some problems,
>>so I thought I'd try the cvs version.
>>I'm using gmp 4.2.1 (64 bits) and gcc 4.1.1
>>
>>After fixing a couple of problems (see below), I'm stuck with this one:
>>
>>/bin/sh ../libtool --mode=compile g++ -m64 -mptr64 -I/home/verdoolaege/solaris-5.9-gcc-4.1.1/include/ -I../include -I../include -I./base   -c ./base/low/cl_low_div.cc
>> g++ -m64 -mptr64 -I/home/verdoolaege/solaris-5.9-gcc-4.1.1/include/ -I../include -I../include -I./base -c ./base/low/cl_low_div.cc  -fPIC -DPIC -o .libs/cl_low_div.o
>>./base/low/cl_low_div.cc: In function 'uint64 cln::divu_6464_6464_(uint64, uint64)':
>>./base/low/cl_low_div.cc:262: error: 'mulu32_64' was not declared in this scope
>>
>>It seems you introduced this in your patch from Wed Mar 22 21:45:15 2006 +0000.
>>mulu32_64 doesn't appear to be defined anywhere, neither in the current
>>version nor in the version of that date:
>>    
>>
>
>I'm assuming this should be mulu32_w.
>
>I now finally got cln compiled with the patch below, but now
>the exam test segfaults:
>
>Program received signal SIGSEGV, Segmentation fault.
>0xffffffff7edae178 in cln::cl_inc_pointer_refcount (pointer=0x0)
>    at ../include/cln/object.h:205
>205             pointer->refcount++;
>(gdb) bt
>#0  0xffffffff7edae178 in cln::cl_inc_pointer_refcount (pointer=0x0)
>    at ../include/cln/object.h:205
>#1  0xffffffff7edced28 in cln::cl_gcobject::inc_pointer_refcount (
>    this=0xffffffff7f052e58) at ../include/cln/object.h:317
>#2  0xffffffff7edd716c in cln::cl_gcobject::_as_cl_private_thing (
>    this=0xffffffff7f052e58) at ../include/cln/object.h:501
>#3  0xffffffff7ede06b4 in cln::as_cl_private_thing (x=@0xffffffff7f052e58)
>    at ../include/cln/object.h:506
>#4  0xffffffff7ee1d6ec in cl_DF (this=0xffffffff7f053400, 
>    x=@0xffffffff7f052e58) at ../include/cln/dfloat_class.h:61
>#5  0xffffffff7ed10924 in cln::cl_I_to_DF (x=@0xffffffff7fffda88)
>    at float/dfloat/elem/cl_DF_from_I.cc:34
>#6  0xffffffff7ed10db8 in cln::cl_RA_to_DF (x=@0xffffffff7fffda88)
>    at float/dfloat/elem/cl_DF_from_RA.cc:36
>#7  0xffffffff7ed8f538 in __static_initialization_and_destruction_0 (
>    __initialize_p=1, __priority=65535)
>    at real/format-output/cl_fmt_scaleexp.cc:50
>#8  0xffffffff7ed8f8f0 in global constructors keyed to _ZN3cln21format_scale_exponentERKNS_4cl_FE () at real/format-output/cl_fmt_scaleexp.cc:117
>#9  0xffffffff7ed9d9c0 in __do_global_ctors_aux ()
>    at /solapplic/gcc-4.1.1/lib/gcc/sparc-sun-solaris2.9/4.1.1/../../../../include/c++/4.1.1/iostream:76
>#10 0xffffffff7ec7feb4 in _init ()
>   from /home/verdoolaege/solaris-5.9-gcc-4.1.1/lib//libcln.so.5
>#11 0xffffffff7f612398 in call_init () from /lib/sparcv9/ld.so.1
>#12 0xffffffff7f611844 in setup () from /lib/sparcv9/ld.so.1
>#13 0xffffffff7f620a8c in _setup () from /lib/sparcv9/ld.so.1
>#14 0xffffffff7f605480 in _alias_start () from /lib/sparcv9/ld.so.1
>#15 0xffffffff7f605480 in _alias_start () from /lib/sparcv9/ld.so.1
>Previous frame identical to this frame (corrupt stack?)
>
>skimo
>
>diff --git a/include/cln/types.h b/include/cln/types.h
>index 067411e..d46aaee 100644
>--- a/include/cln/types.h
>+++ b/include/cln/types.h
>@@ -48,7 +48,7 @@
>     #undef HAVE_LONGLONG
>    #endif
>   #endif
>-  #if defined(HAVE_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__mips64__) || defined(__powerpc64__) || defined(__sparc64__) || defined(__x86_64__))
>+  #if defined(HAVE_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__mips64__) || defined(__powerpc64__) || defined(__sparc64__) || defined(__x86_64__) || defined(__arch64__))
>     // 64 bit registers in hardware
>     #define HAVE_FAST_LONGLONG
>   #endif
>diff --git a/src/base/cl_low.h b/src/base/cl_low.h
>index 46485b9..226a448 100644
>--- a/src/base/cl_low.h
>+++ b/src/base/cl_low.h
>@@ -439,7 +439,7 @@ inline uint32 mulu32_unchecked (uint32 a
>       __asm__ __volatile__ (       \
>         "wr %%g0,%%g0,%%y\n\t"     \
>         "udiv %2,%3,%0\n\t"        \
>-        "umul %0,%3,%1"            \
>+        "umul %0,%3,%1\n\t"        \
>         "sub %2,%1,%1"             \
>         : "=&r" (__q), "=&r" (__r) \
>         : "r" (__x), "r" (__y));   \
>@@ -528,7 +528,7 @@ inline uint32 mulu32_unchecked (uint32 a
>       __asm__ __volatile__ (       \
>         "wr %%g0,%%g0,%%y\n\t"     \
>         "udiv %2,%3,%0\n\t"        \
>-        "umul %0,%3,%1"            \
>+        "umul %0,%3,%1\n\t"        \
>         "sub %2,%1,%1"             \
>         : "=&r" (__q), "=&r" (__r) \
>         : "r" (__x), "r" (__y));   \
>@@ -578,7 +578,7 @@ inline uint32 mulu32_unchecked (uint32 a
>       __asm__ __volatile__ (       \
>         "wr %%g0,%%g0,%%y\n\t"     \
>         "udiv %2,%3,%0\n\t"        \
>-        "umul %0,%3,%1"            \
>+        "umul %0,%3,%1\n\t"        \
>         "sub %2,%1,%1"             \
>         : "=&r" (__q), "=&r" (__r) \
>         : "r" (__x), "r" (__y));   \
>@@ -690,7 +690,7 @@ inline uint32 mulu32_unchecked (uint32 a
>       __asm__ __volatile__ (       \
>         "wr %2,%%g0,%%y\n\t"       \
>         "udiv %3,%4,%0\n\t"        \
>-        "umul %0,%4,%1"            \
>+        "umul %0,%4,%1\n\t"        \
>         "sub %3,%1,%1"             \
>         : "=&r" (__q), "=&r" (__r) \
>         : "r" (__xhi), "r" (__xlo), "r" (__y)); \
>@@ -791,7 +791,7 @@ inline uint32 mulu32_unchecked (uint32 a
>       __asm__ __volatile__ (          \
>         "wr %2,%%g0,%%y\n\t"          \
>         "udiv %3,%4,%0\n\t"           \
>-        "umul %0,%4,%1"               \
>+        "umul %0,%4,%1\n\t"           \
>         "sub %3,%1,%1"                \
>         : "=&r" (__q), "=&r" (__r)    \
>         : "r" (__xhi), "r" (__xlo), "r" (__y)); \
>diff --git a/src/base/digitseq/cl_asm_sparc64_.cc b/src/base/digitseq/cl_asm_sparc64_.cc
>index e4acced..7936327 100644
>--- a/src/base/digitseq/cl_asm_sparc64_.cc
>+++ b/src/base/digitseq/cl_asm_sparc64_.cc
>@@ -40,6 +40,8 @@
>   #define ret   jmp %i7+8    // return from subroutine
>   #define retl  jmp %o7+8    // return from leaf subroutine (no save/restore)
> 
>+	.register %g2,#scratch
>+
>         .seg "text"
> 
>         .global C(mulu16_),C(mulu32_),C(mulu32_unchecked),C(mulu64_)
>diff --git a/src/base/digitseq/cl_asm_sparc_.cc b/src/base/digitseq/cl_asm_sparc_.cc
>index aac70ae..965ac0c 100644
>--- a/src/base/digitseq/cl_asm_sparc_.cc
>+++ b/src/base/digitseq/cl_asm_sparc_.cc
>@@ -39,6 +39,8 @@
>   #define ret   jmp %i7+8    // return from subroutine
>   #define retl  jmp %o7+8    // return from leaf subroutine (no save/restore)
> 
>+	.register %g2,#scratch
>+
>         .seg "text"
> 
>         .global C(mulu16_),C(mulu32_),C(mulu32_unchecked)
>diff --git a/src/base/low/cl_low_div.cc b/src/base/low/cl_low_div.cc
>index f631a51..64e8c36 100644
>--- a/src/base/low/cl_low_div.cc
>+++ b/src/base/low/cl_low_div.cc
>@@ -259,9 +259,9 @@ uint64 divu_6464_6464_(uint64 x, uint64
>       }
>       // q = floor(x1/(y1+1))
>       // x-q*y bilden (eine 32-mal-64-Bit-Multiplikation ohne Überlauf):
>-      x -= highlow64_0(mulu32_64(q,high32(y))); // q * high32(y) * beta
>+      x -= highlow64_0(mulu32_w(q,high32(y))); // q * high32(y) * beta
>       // gefahrlos, da q*high32(y) <= q*y/beta <= x/beta < beta
>-      x -= mulu32_64(q,low32(y)); // q * low32(y)
>+      x -= mulu32_w(q,low32(y)); // q * low32(y)
>       // gefahrlos, da q*high32(y)*beta + q*low32(y) = q*y <= x
>       // Noch höchstens 2 mal y abziehen:
>       if (x >= y)
>diff --git a/src/polynomial/elem/cl_UP_GF2.h b/src/polynomial/elem/cl_UP_GF2.h
>index fa2c5b1..f3c9936 100644
>--- a/src/polynomial/elem/cl_UP_GF2.h
>+++ b/src/polynomial/elem/cl_UP_GF2.h
>@@ -95,7 +95,7 @@ static const _cl_UP gf2_uminus (cl_heap_
> 	return x;
> }
> 
>-#if !(defined(__sparc__) || defined(__sparc64__))
>+#if (intDsize==64)
> // Multiplication of polynomials over GF(2) can unfortunately not profit
> // from hardware multiply instructions. Use a table instead.
> // This is a 2^8 x 2^4 table. Maybe a 2^6 x 2^6 table would be better?
>  
>