More memory efficient constants:

author Richard Kreckel <kreckel@ginac.de>

Sun, 8 Apr 2007 21:29:47 +0000 (21:29 +0000)

committer Richard Kreckel <kreckel@ginac.de>

Sun, 8 Apr 2007 21:29:47 +0000 (21:29 +0000)
author Richard Kreckel <kreckel@ginac.de>
Sun, 8 Apr 2007 21:29:47 +0000 (21:29 +0000)
committer Richard Kreckel <kreckel@ginac.de>
Sun, 8 Apr 2007 21:29:47 +0000 (21:29 +0000)
diff --git a/ChangeLog b/ChangeLog

index b02fc87896d83cb96dab9de88bee9eebc454ada0..f00389ecbd458290aa01c6b9ef604769a7153127 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2007-04-09  Richard B. Kreckel  <kreckel@ginac.de>
+
+       More memory efficient constants:
+       * src/float/transcendental/cl_LF_pi.cc (compute_pi_ramanujan_163_fast):
+       Compute series coefficients on demand, using a series stream object.
+       * src/float/transcendental/cl_LF_zeta3.cc (zeta3): Likewise.
+       * src/float/transcendental/cl_LF_catalanconst.cc
+       (compute_catalanconst_ramanujan_fast): Likewise.
+       (compute_catalanconst_lupas): New function.
+       (compute_catalanconst): Simplify, based on new benchmark.
+
  2007-04-02  Alexei Sheplyakov  <varg@theor.jinr.ru>
  
         Debian Bug#412103:
diff --git a/src/float/transcendental/cl_LF_catalanconst.cc b/src/float/transcendental/cl_LF_catalanconst.cc

index eea0258e3c4e5a39ed3fd1c2d5f05292e1374a28..4ebc46f7a47ebf3036cc8e93797c75273a51ef97 100644 (file)
--- a/src/float/transcendental/cl_LF_catalanconst.cc
+++ b/src/float/transcendental/cl_LF_catalanconst.cc
@@ -50,35 +50,37 @@ const cl_LF compute_catalanconst_ramanujan_fast (uintC len)
  {
         // Same formula as above, using a binary splitting evaluation.
         // See [Borwein, Borwein, section 10.2.3].
-       var uintC actuallen = len + 2; // 2 Schutz-Digits
+       struct rational_series_stream : cl_pqb_series_stream {
+               cl_I n;
+               static cl_pqb_series_term computenext (cl_pqb_series_stream& thisss)
+               {
+                       var rational_series_stream& thiss = (rational_series_stream&)thisss;
+                       var cl_I n = thiss.n;
+                       var cl_pqb_series_term result;
+                       if (n==0) {
+                               result.p = 1;
+                               result.q = 1;
+                               result.b = 1;
+                       } else {
+                               result.p = n;
+                               result.b = 2*n+1;
+                               result.q = result.b << 1; // 2*(2*n+1)
+                       }
+                       thiss.n = n+1;
+                       return result;
+               }
+               rational_series_stream ()
+                       : cl_pqb_series_stream (rational_series_stream::computenext),
+                         n (0) {}
+       } series;
+       var uintC actuallen = len + 2; // 2 guard digits
         // Evaluate a sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
         // with appropriate N, and
         //   a(n) = 1, b(n) = 2*n+1,
         //   p(n) = n for n>0, q(n) = 2*(2*n+1) for n>0.
         var uintC N = (intDsize/2)*actuallen;
         // 4^-N <= 2^(-intDsize*actuallen).
-       CL_ALLOCA_STACK;
-       var cl_I* bv = (cl_I*) cl_alloca(N*sizeof(cl_I));
-       var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
-       var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
-       var uintC n;
-       init1(cl_I, bv[0]) (1);
-       init1(cl_I, pv[0]) (1);
-       init1(cl_I, qv[0]) (1);
-       for (n = 1; n < N; n++) {
-               init1(cl_I, bv[n]) (2*n+1);
-               init1(cl_I, pv[n]) (n);
-               init1(cl_I, qv[n]) (2*(2*n+1));
-       }
-       var cl_pqb_series series;
-       series.bv = bv;
-       series.pv = pv; series.qv = qv; series.qsv = NULL;
         var cl_LF fsum = eval_rational_series(N,series,actuallen);
-       for (n = 0; n < N; n++) {
-               bv[n].~cl_I();
-               pv[n].~cl_I();
-               qv[n].~cl_I();
-       }
         var cl_LF g =
           scale_float(The(cl_LF)(3*fsum)
                       + The(cl_LF)(pi(actuallen))
@@ -229,30 +231,66 @@ const cl_LF compute_catalanconst_cvz2 (uintC len)
  }
  // Bit complexity (N = len): O(log(N)^2*M(N)).
  
-// Timings of the above algorithms, on an i486 33 MHz, running Linux.
-//    N      ram    ramfast  exp1    exp2    cvz1    cvz2
-//    10     0.055   0.068   0.32    0.91    0.076   0.11
-//    25     0.17    0.26    0.95    3.78    0.23    0.43
-//    50     0.43    0.73    2.81   11.5     0.63    1.36
-//   100     1.32    2.24    8.82   34.1     1.90    4.48
-//   250     6.60   10.4    48.7   127.5    10.3    20.8
-//   500    24.0    30.9   186     329      38.4    58.6
-//  1000    83.0    89.0   944     860     149     163
-//  2500   446     352    6964    3096    1032     545
-//  5000  1547     899
-// asymp.    N^2     FAST    N^2     FAST    N^2     FAST
+
+const cl_LF compute_catalanconst_lupas (uintC len)
+{
+       // G = 19/18 * sum(n=0..infty,
+       //                 mul(m=1..n, -32*((80*m^3+72*m^2-18*m-19)*m^3)/
+       //                             (10240*m^6+14336*m^5+2560*m^4-3072*m^3-888*m^2+72*m+27))).
+       struct rational_series_stream : cl_pq_series_stream {
+               cl_I n;
+               static cl_pq_series_term computenext (cl_pq_series_stream& thisss)
+               {
+                       var rational_series_stream& thiss = (rational_series_stream&)thisss;
+                       var cl_I n = thiss.n;
+                       var cl_pq_series_term result;
+                       if (zerop(n)) {
+                               result.p = 1;
+                               result.q = 1;
+                       } else {
+                               // Compute -32*((80*n^3+72*n^2-18*n-19)*n^3) (using Horner scheme):
+                               result.p = (19+(18+(-72-80*n)*n)*n)*n*n*n << 5;
+                               // Compute 10240*n^6+14336*n^5+2560*n^4-3072*n^3-888*n^2+72*n+27:
+                               result.q = 27+(72+(-888+(-3072+(2560+(14336+10240*n)*n)*n)*n)*n)*n;
+                       }
+                       thiss.n = plus1(n);
+                       return result;
+               }
+               rational_series_stream ()
+                       : cl_pq_series_stream (rational_series_stream::computenext),
+                         n (0) {}
+       } series;
+       var uintC actuallen = len + 2; // 2 guard digits
+       var uintC N = (intDsize/2)*actuallen;
+       var cl_LF fsum = eval_rational_series(N,series,actuallen);
+       var cl_LF g = fsum*cl_I_to_LF(19,actuallen)/cl_I_to_LF(18,actuallen);
+       return shorten(g,len);
+}
+// Bit complexity (N = len): O(log(N)^2*M(N)).
+
+// Timings of the above algorithms, on an AMD Opteron 1.7 GHz, running Linux/x86.
+//    N      ram    ramfast  exp1    exp2    cvz1    cvz2    lupas
+//    25     0.0011  0.0010  0.0094  0.0107  0.0021  0.0016  0.0034
+//    50     0.0030  0.0025  0.0280  0.0317  0.0058  0.0045  0.0095
+//   100     0.0087  0.0067  0.0854  0.0941  0.0176  0.0121  0.0260
+//   250     0.043   0.029   0.462   0.393   0.088   0.055   0.109
+//   500     0.15    0.086   1.7     1.156   0.323   0.162   0.315
+//  1000     0.57    0.25    7.5     3.23    1.27    0.487   0.864
+//  2500     3.24    1.10   52.2    12.4     8.04    2.02    3.33
+//  5000    13.1     3.06  218      32.7    42.1     5.59    8.80
+// 10000    52.7     8.2   910      85.3   216.7    15.3    22.7
+// 25000   342      29.7  6403     295    1643      54.5    77.3
+// 50000            89.9                           139     195
+//100000           227                             363     483
+// asymp.    N^2     FAST    N^2     FAST    N^2     FAST    FAST
+
  // (FAST means O(log(N)^2*M(N)))
  //
-// The "exp1" and "exp2" algorithms are always about 10 to 15 times slower
-// than the "ram" and "ramfast" algorithms.
-// The break-even point between "ram" and "ramfast" is at about N = 1410.
+// The "ramfast" algorithm is always fastest.
  
  const cl_LF compute_catalanconst (uintC len)
  {
-       if (len >= 1410)
-               return compute_catalanconst_ramanujan_fast(len);
-       else
-               return compute_catalanconst_ramanujan(len);
+       return compute_catalanconst_ramanujan_fast(len);
  }
  // Bit complexity (N := len): O(log(N)^2*M(N)).
  
diff --git a/src/float/transcendental/cl_LF_pi.cc b/src/float/transcendental/cl_LF_pi.cc

index ba8d2cdbbcb24ec202254b77653b1368b0e0354d..6660f62911418e093f9d147cb7c7a453d9bed22a 100644 (file)
--- a/src/float/transcendental/cl_LF_pi.cc
+++ b/src/float/transcendental/cl_LF_pi.cc
@@ -191,6 +191,31 @@ const cl_LF compute_pi_ramanujan_163_fast (uintC len)
  {
         // Same formula as above, using a binary splitting evaluation.
         // See [Borwein, Borwein, section 10.2.3].
+       struct rational_series_stream : cl_pqa_series_stream {
+               uintC n;
+               static cl_pqa_series_term computenext (cl_pqa_series_stream& thisss)
+               {
+                       static const cl_I A = "163096908";
+                       static const cl_I B = "6541681608";
+                       static const cl_I J1 = "10939058860032000"; // 72*abs(J)
+                       var rational_series_stream& thiss = (rational_series_stream&)thisss;
+                       var uintC n = thiss.n;
+                       var cl_pqa_series_term result;
+                       if (n==0) {
+                               result.p = 1;
+                               result.q = 1;
+                       } else {
+                               result.p = -((cl_I)(6*n-5)*(cl_I)(2*n-1)*(cl_I)(6*n-1));
+                               result.q = (cl_I)n*(cl_I)n*(cl_I)n*J1;
+                       }
+                       result.a = A+n*B;
+                       thiss.n = n+1;
+                       return result;
+               }
+               rational_series_stream ()
+                       : cl_pqa_series_stream (rational_series_stream::computenext),
+                         n (0) {}
+       } series;
         var uintC actuallen = len + 4; // 4 Schutz-Digits
         static const cl_I A = "163096908";
         static const cl_I B = "6541681608";
@@ -205,32 +230,7 @@ const cl_LF compute_pi_ramanujan_163_fast (uintC len)
         var uintC N = (n_slope*actuallen)/32 + 1;
         // N > intDsize*log(2)/log(|J|) * actuallen, hence
         // |J|^-N < 2^(-intDsize*actuallen).
-       CL_ALLOCA_STACK;
-       var cl_I* av = (cl_I*) cl_alloca(N*sizeof(cl_I));
-       var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
-       var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
-       var uintC* qsv = (uintC*) cl_alloca(N*sizeof(uintC));
-       var uintC n;
-       for (n = 0; n < N; n++) {
-               init1(cl_I, av[n]) (A+n*B);
-               if (n==0) {
-                       init1(cl_I, pv[n]) (1);
-                       init1(cl_I, qv[n]) (1);
-               } else {
-                       init1(cl_I, pv[n]) (-((cl_I)(6*n-5)*(cl_I)(2*n-1)*(cl_I)(6*n-1)));
-                       init1(cl_I, qv[n]) ((cl_I)n*(cl_I)n*(cl_I)n*J1);
-               }
-       }
-       var cl_pqa_series series;
-       series.av = av;
-       series.pv = pv; series.qv = qv;
-       series.qsv = (len >= 35 ? qsv : 0); // 5% speedup for large len's
         var cl_LF fsum = eval_rational_series(N,series,actuallen);
-       for (n = 0; n < N; n++) {
-               av[n].~cl_I();
-               pv[n].~cl_I();
-               qv[n].~cl_I();
-       }
         static const cl_I J3 = "262537412640768000"; // -1728*J
         var cl_LF pires = sqrt(cl_I_to_LF(J3,actuallen)) / fsum;
         return shorten(pires,len); // verkürzen und fertig
diff --git a/src/float/transcendental/cl_LF_zeta3.cc b/src/float/transcendental/cl_LF_zeta3.cc

index 459b28d9b74e36eb67b45c64dcbae95b4d280679..31914d39a956a25a9d268836aa6c6e260824ade4 100644 (file)
--- a/src/float/transcendental/cl_LF_zeta3.cc
+++ b/src/float/transcendental/cl_LF_zeta3.cc
@@ -19,6 +19,27 @@ namespace cln {
  
  const cl_LF zeta3 (uintC len)
  {
+       struct rational_series_stream : cl_pqa_series_stream {
+               uintC n;
+               static cl_pqa_series_term computenext (cl_pqa_series_stream& thisss)
+               {
+                       var rational_series_stream& thiss = (rational_series_stream&)thisss;
+                       var uintC n = thiss.n;
+                       var cl_pqa_series_term result;
+                       if (n==0) {
+                               result.p = 1;
+                       } else {
+                               result.p = -expt_pos(n,5);
+                       }
+                       result.q = expt_pos(2*n+1,5)<<5;
+                       result.a = 205*square((cl_I)n) + 250*(cl_I)n + 77;
+                       thiss.n = n+1;
+                       return result;
+               }
+               rational_series_stream ()
+                       : cl_pqa_series_stream (rational_series_stream::computenext),
+                         n (0) {}
+       } series;
         // Method:
         //            /infinity                                  \ 
         //            | -----       (n + 1)       2              |
@@ -41,28 +62,7 @@ const cl_LF zeta3 (uintC len)
         var uintC actuallen = len+2; // 2 Schutz-Digits
         var uintC N = ceiling(actuallen*intDsize,10);
         // 1024^-N <= 2^(-intDsize*actuallen).
-       CL_ALLOCA_STACK;
-       var cl_I* av = (cl_I*) cl_alloca(N*sizeof(cl_I));
-       var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
-       var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
-       var uintC n;
-       for (n = 0; n < N; n++) {
-               init1(cl_I, av[n]) (205*square((cl_I)n) + 250*(cl_I)n + 77);
-               if (n==0)
-                       init1(cl_I, pv[n]) (1);
-               else
-                       init1(cl_I, pv[n]) (-expt_pos(n,5));
-               init1(cl_I, qv[n]) (expt_pos(2*n+1,5)<<5);
-       }
-       var cl_pqa_series series;
-       series.av = av;
-       series.pv = pv; series.qv = qv; series.qsv = NULL;
         var cl_LF sum = eval_rational_series(N,series,actuallen);
-       for (n = 0; n < N; n++) {
-               av[n].~cl_I();
-               pv[n].~cl_I();
-               qv[n].~cl_I();
-       }
         return scale_float(shorten(sum,len),-1);
  }
  // Bit complexity (N := len): O(log(N)^2*M(N)).
author	Richard Kreckel <kreckel@ginac.de>
	Sun, 8 Apr 2007 21:29:47 +0000 (21:29 +0000)
committer	Richard Kreckel <kreckel@ginac.de>
	Sun, 8 Apr 2007 21:29:47 +0000 (21:29 +0000)
ChangeLog		patch \| blob \| history
src/float/transcendental/cl_LF_catalanconst.cc		patch \| blob \| history
src/float/transcendental/cl_LF_pi.cc		patch \| blob \| history
src/float/transcendental/cl_LF_zeta3.cc		patch \| blob \| history