]> www.ginac.de Git - cln.git/commitdiff
Prefer GMP's multiplication routine (if GMP version >= 4.0).
authorRichard Kreckel <kreckel@ginac.de>
Sun, 15 Feb 2009 21:16:11 +0000 (22:16 +0100)
committerRichard Kreckel <kreckel@ginac.de>
Sun, 15 Feb 2009 21:16:11 +0000 (22:16 +0100)
Recent GMP multiplication routines (mpn_mul) are somewhat faster than CLN's.
For huge operands, this is due to better tuned FFT. For medium-sized operands
it's due to more algorithms (Toom 3-way.) For small operands, I couldn't find
a difference. Let's just use mpn_mul if it's available.

For huge binsplit sums, this can half the runtimes, on amd64.

src/base/digitseq/cl_DS_mul.cc

index c3d3b5ee7222cb3530d0fd5030da3b0d3fb9506c..6201e1a5935750f4828b816f649b854a64cd8697 100644 (file)
@@ -456,7 +456,6 @@ namespace cln {
 
 #endif
 
-//  int cl_mul_algo = 0;
   void cl_UDS_mul (const uintD* sourceptr1, uintC len1,
                    const uintD* sourceptr2, uintC len2,
                    uintD* destptr)
@@ -473,12 +472,9 @@ namespace cln {
         { mulu_loop_lsp(lsprefnext(sourceptr1),sourceptr2,destptr,len2); }
       else
         {
-//          if (cl_mul_algo > 0)
-//            mulu_fftcs(sourceptr1,len1,sourceptr2,len2,destptr);
-//          else
-//          if (cl_mul_algo > 0)
-//            mulu_nussbaumer(sourceptr1,len1,sourceptr2,len2,destptr);
-//          else
+#if CL_USE_GMP && __GNU_MP__ >= 4
+          mpn_mul(destptr,sourceptr2,len2,sourceptr1,len1);
+#else
           if (len1 < cl_karatsuba_threshold)
             // Multiplikation nach Schulmethode
             mulu_2loop(sourceptr1,len1,sourceptr2,len2,destptr);
@@ -493,10 +489,11 @@ namespace cln {
             //mulu_nussbaumer(sourceptr1,len1,sourceptr2,len2,destptr);
             //mulu_fft_modp3(sourceptr1,len1,sourceptr2,len2,destptr);
             mulu_fft_modm(sourceptr1,len1,sourceptr2,len2,destptr);
+#endif
           #ifdef DEBUG_MUL_XXX
           { // Check the correctness of an other multiplication algorithm:
             CL_ALLOCA_STACK;
-            var uintD tmpprod_xxx = cl_alloc_array(uintD,len1+len2);
+            var uintD* tmpprod_xxx = cl_alloc_array(uintD,len1+len2);
             mulu_xxx(sourceptr1,len1,sourceptr2,len2,arrayLSDptr(tmpprod_xxx,len1+len2));
             if (compare_loop_msp(destptr lspop (len1+len2),arrayMSDptr(tmpprod_xxx,len1+len2),len1+len2))
               throw runtime_exception();
@@ -523,10 +520,14 @@ namespace cln {
       { if (len < cl_karatsuba_threshold)
           mulu_2loop_square(sourceptr,len,destptr);
         else
+#if CL_USE_GMP && __GNU_MP__ >= 4
+            mpn_mul(destptr,sourceptr,len,sourceptr,len);
+#else
           if (!(len >= cl_fftm_threshold))
             mulu_karatsuba_square(sourceptr,len,destptr);
           else
             mulu_fft_modm(sourceptr,len,sourceptr,len,destptr);
+#endif
       }
   }