src/float/transcendental/cl_F_sinx.cc

   1 // sinxbyx(), sinx().
   2
   3 // General includes.
   4 #include "base/cl_sysdep.h"
   5
   6 // Specification.
   7 #include "float/transcendental/cl_F_tran.h"
   8
   9
  10 // Implementation.
  11
  12 #include "cln/float.h"
  13 #include "base/cl_low.h"
  14 #include "float/cl_F.h"
  15 #include "cln/lfloat.h"
  16 #include "float/lfloat/cl_LF.h"
  17 #include "cln/integer.h"
  18
  19 #include "base/cl_inline.h"
  20 #include "float/lfloat/elem/cl_LF_zerop.cc"
  21 #include "float/lfloat/misc/cl_LF_exponent.cc"
  22
  23 namespace cln {
  24
  25 // sinxbyx is mainly for cl_SF, cl_FF, cl_DF, where we want to avoid underflow.
  26
  27 const cl_F sinxbyx_naive (const cl_F& x)
  28 {
  29 // Methode:
  30 // e := Exponent aus (decode-float x), d := (float-digits x)
  31 // Bei x=0.0 oder e<=-d/2 liefere 1.0
  32 //   (denn bei e<=-d/2 ist x^2/6 < x^2/4 < 2^(-d)/4 = 2^(-d-2), also
  33 //   1 >= sin(x)/x > 1-x^2/6 > 1-2^(-d-2), also 1 >= (sin(x)/x)^2 > 1-2^(-d-1),
  34 //   also ist (sin(x)/x)^2, auf d Bits gerundet, gleich 1.0).
  35 // Bei e<=-sqrt(d) verwende die Potenzreihe
  36 //   sin(x)/x = sum(j=0..inf,(-x^2)^j/(2j+1)!):
  37 //   a:=-x^2, b:=1, i:=1, sum:=0,
  38 //   while (/= sum (setq sum (+ sum b))) do b:=b*a/((i+1)*(i+2)), i:=i+2.
  39 //   Ergebnis sum^2.
  40 // Sonst setze y := x/2 = (scale-float x -1),
  41 //   berechne rekursiv z:=(sin(y)/y)^2 und liefere z*(1-y^2*z).
  42 // [Die Grenze sqrt(d) ergibt sich so:
  43 //  Man braucht bei der Potenzreihe mit x=2^-k etwa j Glieder, mit
  44 //  k*j*ln 2 + j*(ln j - 1) = d, und der Aufwand beträgt etwa 2.8*(j/2)
  45 //  Multiplikationen von d-Bit-Zahlen. Bei Halbierungen bis x=2^-k ist der
  46 //  Gesamtaufwand etwa 2*(k+e)+1.4*j(k). Dieses minimieren nach k: Soll sein
  47 //  -1.4 = d/dk j(k) = (d/dj k(j))^-1 = - j^2/(d+j)*ln 2, also j^2=2(d+j),
  48 //  grob j=sqrt(2d) und damit k=sqrt(d).]
  49 // Aufwand: asymptotisch d^2.5 .
  50
  51         if (zerop(x))
  52                 return cl_float(1,x);
  53         var uintC d = float_digits(x);
  54         var sintE e = float_exponent(x);
  55         if (e <= (-(sintC)d)>>1) // e <= (-d)/2 <==> e <= -ceiling(d/2) ?
  56                 return cl_float(1,x); // ja -> 1.0 als Ergebnis
  57  {      Mutable(cl_F,x);
  58         // Bei e <= -1-limit_slope*floor(sqrt(d)) kann die Potenzreihe
  59         // angewandt werden. limit_slope = 1.0 ist sehr schlecht (ca. 30%
  60         // zu schlecht). Gute Werte bei N limbs:
  61         //    N     limit_slope
  62         //    5       0.15-0.30
  63         //   10          0.25
  64         //   25       0.25-0.35
  65         //   50       0.35-0.40
  66         //  100       0.40-0.45
  67         //  200       0.40-0.45
  68         // Wähle limit_slope = 13/32 = 0.4.
  69         var sintL e_limit = -1-floor(isqrtC(d)*13,32); // -1-floor(sqrt(d))
  70         if (e > e_limit) {
  71                 // e > -1-limit_slope*floor(sqrt(d)) -> muß |x| verkleinern.
  72                 x = scale_float(x,e_limit-e);
  73                 // Neuer Exponent = e_limit.
  74         }
  75         var cl_F x2 = square(x);        // x^2
  76         // Potenzreihe anwenden:
  77         var cl_F a = - x2; // a := -x^2
  78         var int i = 1;
  79         var cl_F b = cl_float(1,x); // b := (float 1 x)
  80         var cl_F sum = cl_float(0,x); // sum := (float 0 x)
  81         loop {
  82                 var cl_F new_sum = sum + b;
  83                 if (new_sum == sum) // = sum ?
  84                         break; // ja -> Potenzreihe abbrechen
  85                 sum = new_sum;
  86                 b = (b*a)/(cl_I)((i+1)*(i+2));
  87                 i = i+2;
  88         }
  89         var cl_F z = square(sum); // sum^2 als Ergebnis
  90         while (e > e_limit) {
  91                 z = z - x2 * square(z);
  92                 x2 = scale_float(x2,2); // x^2 := x^2*4
  93                 e--;
  94         }
  95         return z;
  96 }}
  97 // Bit complexity (N = length(x)): O(N^(1/2)*M(N)).
  98
  99 const cl_LF sinx_naive (const cl_LF& x)
 100 {
 101 // Methode:
 102 // e := Exponent aus (decode-float x), d := (float-digits x)
 103 // Bei x=0.0 oder e<=-d/2 liefere x
 104 //   (denn bei e<=-d/2 ist x^2/6 < x^2/4 < 2^(-d)/4 = 2^(-d-2), also
 105 //   1 >= sin(x)/x > 1-x^2/6 > 1-2^(-d-2), also ist sin(x)^2, auf d Bits
 106 //   gerundet, gleich x).
 107 // Bei e<=-sqrt(d) verwende die Potenzreihe
 108 //   sin(x) = sum(j=0..inf,x*(-x^2)^j/(2j+1)!):
 109 //   a:=-x^2, b:=x, i:=1, sum:=0,
 110 //   while (/= sum (setq sum (+ sum b))) do b:=b*a/((i+1)*(i+2)), i:=i+2.
 111 //   Ergebnis sum^2.
 112 // Sonst setze y := x/2 = (scale-float x -1),
 113 //   berechne rekursiv z:=sin(y)^2 und liefere 4*z*(1-z) = 1-(1-2*z)^2.
 114 // [Die Grenze sqrt(d) ergibt sich so:
 115 //  Man braucht bei der Potenzreihe mit x=2^-k etwa j Glieder, mit
 116 //  k*j*ln 2 + j*(ln j - 1) = d, und der Aufwand beträgt etwa 2.8*(j/2)
 117 //  Multiplikationen von d-Bit-Zahlen. Bei Halbierungen bis x=2^-k ist der
 118 //  Gesamtaufwand etwa 2*(k+e)+1.4*j(k). Dieses minimieren nach k: Soll sein
 119 //  -1.4 = d/dk j(k) = (d/dj k(j))^-1 = - j^2/(d+j)*ln 2, also j^2=2(d+j),
 120 //  grob j=sqrt(2d) und damit k=sqrt(d).]
 121 // Aufwand: asymptotisch d^2.5 .
 122
 123         if (zerop_inline(x))
 124                 return x;
 125         var uintC actuallen = TheLfloat(x)->len;
 126         var uintC d = float_digits(x);
 127         var sintE e = float_exponent_inline(x);
 128         if (e <= (-(sintC)d)>>1) // e <= (-d)/2 <==> e <= -ceiling(d/2) ?
 129                 return square(x); // ja -> x^2 als Ergebnis
 130  {      Mutable(cl_LF,x);
 131         var sintE ee = e;
 132         // Bei e <= -1-limit_slope*floor(sqrt(d)) kann die Potenzreihe
 133         // angewandt werden. limit_slope = 1.0 ist schlecht (ca. 10% zu
 134         // schlecht). Ein guter Wert für naive1 ist limit_slope = 0.6,
 135         // für naive3 aber limit_slope = 0.5.
 136         var sintL e_limit = -1-floor(isqrtC(d),2); // -1-floor(sqrt(d))
 137         if (e > e_limit) {
 138                 // e > -1-limit_slope*floor(sqrt(d)) -> muß |x| verkleinern.
 139                 x = scale_float(x,e_limit-e);
 140                 ee = e_limit; // Neuer Exponent = e_limit.
 141         }
 142         var cl_LF x2 = square(x); // x^2
 143         // Potenzreihe anwenden:
 144         var cl_LF powser_value;
 145         var cl_LF a = - x2; // a := -x^2
 146         var int i = 1;
 147         if (0) {
 148                 // naive1:
 149                 // fixed-point representation
 150                 d = d-ee; // fixed-point representation with d mantissa bits
 151                 var cl_I b = round1(scale_float(x,d)); // b := x
 152                 var cl_I sum = 0; // sum := (float 0 x)
 153                 loop {
 154                         if (b == 0) break;
 155                         sum = sum + b;
 156                         b = round1(round1(The(cl_LF)(b*a)),(cl_I)((i+1)*(i+2)));
 157                         i = i+2;
 158                 }
 159                 powser_value = scale_float(cl_float(sum,x),-(sintC)d);
 160         } else if (actuallen <= 7) { // Break-even-Point before extendsqrt: N<=6
 161                 // naive2:
 162                 // floating-point representation
 163                 var cl_LF b = x; // b := x
 164                 var cl_LF sum = cl_float(0,x); // sum := (float 0 x)
 165                 loop {
 166                         var cl_LF new_sum = sum + b;
 167                         if (new_sum == sum) // = sum ?
 168                                 break; // ja -> Potenzreihe abbrechen
 169                         sum = new_sum;
 170                         b = (b*a)/(cl_I)((i+1)*(i+2));
 171                         i = i+2;
 172                 }
 173                 powser_value = sum;
 174         } else {
 175                 // naive3:
 176                 // floating-point representation with smooth precision reduction
 177                 var cl_LF b = x; // b := x
 178                 var cl_LF eps = scale_float(b,-(sintC)d-10);
 179                 var cl_LF sum = cl_float(0,x); // sum := (float 0 x)
 180                 loop {
 181                         var cl_LF new_sum = sum + LF_to_LF(b,actuallen);
 182                         if (new_sum == sum) // = sum ?
 183                                 break; // ja -> Potenzreihe abbrechen
 184                         sum = new_sum;
 185                         b = cl_LF_shortenwith(b,eps);
 186                         b = (b*a)/(cl_I)((i+1)*(i+2));
 187                         i = i+2;
 188                 }
 189                 powser_value = sum;
 190         }
 191         var cl_LF z = square(powser_value); // sin^2 als Ergebnis
 192         while (e > e_limit) {
 193                 z = cl_float(1,x) - square(cl_float(1,x) - scale_float(z,1)); // z := 1-(1-2*z)^2
 194                 e--;
 195         }
 196         return z;
 197 }}
 198 // Bit complexity (N = length(x)): O(N^(1/2)*M(N)).
 199
 200 // Timings of the three variants, on an i486 33 MHz, running Linux,
 201 // applied to x = sqrt(2)-1 = 0.414...
 202 //   N     naive1  naive2  naive3  ratseries
 203 //    4     0.0064  0.0048  0.0049  0.023
 204 //    6     0.0081  0.0064  0.0065  0.031
 205 //    8     0.0103  0.0085  0.0083  0.038
 206 //   10     0.012   0.011   0.010   0.048
 207 //   25     0.043   0.047   0.035   0.119
 208 //   50     0.15    0.17    0.12    0.37
 209 //  100     0.54    0.67    0.44    1.09
 210 //  250     3.5     4.4     2.8     5.5
 211 //  500    14.7    18.5    11.6    19.4
 212 // 1000    61      78      48      64
 213 // 2500   315     361     243     261
 214 // 2700                   265     270
 215 // 3000                   294     282
 216 // 3500                   339     303
 217 // ==> ratseries faster for N >= 2750.
 218
 219 }  // namespace cln