src/float/ffloat/cl_FF.h

   1 // cl_FF internals
   2
   3 #ifndef _CL_FF_H
   4 #define _CL_FF_H
   5
   6 #include "cln/number.h"
   7 #include "cln/malloc.h"
   8 #include "cl_low.h"
   9 #include "cl_F.h"
  10
  11 #ifdef FAST_FLOAT
  12 #include "cl_N.h"
  13 #include "cl_F.h"
  14 #endif
  15
  16 namespace cln {
  17
  18 typedef uint32 ffloat; // 32-bit float in IEEE format
  19
  20 union ffloatjanus {
  21         ffloat eksplicit;       // explicit value
  22         #ifdef FAST_FLOAT
  23         float machine_float;    // value as a C `float'
  24         #endif
  25 };
  26
  27 #if defined(CL_WIDE_POINTERS)
  28 #define FF_value_shift  32
  29 inline ffloat cl_ffloat_value (const cl_FF& x)
  30 {
  31         return x.word >> FF_value_shift;
  32 }
  33 #else
  34 struct cl_heap_ffloat : cl_heap {
  35         ffloatjanus representation;
  36 };
  37 inline cl_heap_ffloat* TheFfloat (const cl_number& obj)
  38         { return (cl_heap_ffloat*)(obj.pointer); }
  39 inline ffloat cl_ffloat_value (const cl_FF& x)
  40 {
  41         return TheFfloat(x)->representation.eksplicit;
  42 }
  43 #endif
  44
  45 // The word contains:
  46 //   |..|.......|..........................|
  47 //  sign exponent             mantissa
  48
  49   #define FF_exp_len    8       // number of bits in the exponent
  50   #define FF_mant_len  23       // number of bits in the mantissa
  51                                 // (excluding the hidden bit)
  52   #define FF_exp_low   1                // minimum exponent
  53   #define FF_exp_mid   126              // exponent bias
  54   #define FF_exp_high  254              // maximum exponent, 255 is NaN/Inf
  55   #define FF_exp_shift  (FF_mant_len+FF_mant_shift) // lowest exponent bit
  56   #define FF_mant_shift  0                          // lowest mantissa bit
  57   #define FF_sign_shift  (32 - 1)       // = (FF_exp_len+FF_mant_len)
  58
  59 // Private constructor.
  60 #if !defined(CL_WIDE_POINTERS)
  61 inline cl_FF::cl_FF (cl_heap_ffloat* ptr) : cl_F ((cl_private_thing) ptr) {}
  62 #endif
  63
  64 extern cl_class cl_class_ffloat;
  65
  66 // Builds a float from the explicit word.
  67 #if defined(CL_WIDE_POINTERS)
  68 inline cl_FF::cl_FF (struct cl_heap_ffloat * null, cl_uint w)
  69         : cl_F ((cl_private_thing) w) { unused null; }
  70 inline const cl_FF allocate_ffloat (ffloat eksplicit)
  71 {
  72         return cl_FF((struct cl_heap_ffloat *) 0, ((cl_uint)eksplicit << FF_value_shift) | (cl_FF_tag << cl_tag_shift));
  73 }
  74 #else
  75 inline cl_heap_ffloat* allocate_ffloat (ffloat eksplicit)
  76 {
  77         cl_heap_ffloat* p = (cl_heap_ffloat*) malloc_hook(sizeof(cl_heap_ffloat));
  78         p->refcount = 1;
  79         p->type = &cl_class_ffloat;
  80         p->representation.eksplicit = eksplicit;
  81         return p;
  82 }
  83 #endif
  84
  85 // Builds a float word from sign (0 or -1), exponent and mantissa.
  86 inline uint32 make_FF_word (cl_sint sign, unsigned int exp, cl_uint mant)
  87 {
  88         return (sign << FF_sign_shift)
  89                | (exp << FF_exp_shift)
  90                | ((mant & (bit(FF_mant_len)-1)) << FF_mant_shift);
  91 }
  92
  93 // Builds a float from sign (0 or -1), exponent and mantissa.
  94 inline const cl_FF make_FF (cl_sint sign, unsigned int exp, cl_uint mant)
  95 {
  96         return allocate_ffloat(make_FF_word(sign,exp,mant));
  97 }
  98
  99 #if defined(CL_WIDE_POINTERS)
 100 // Single Float 0.0
 101   #define cl_FF_0  make_FF(0,0,0)
 102 // Single Float 1.0
 103   #define cl_FF_1  make_FF(0,FF_exp_mid+1,bit(FF_mant_len))
 104 // Single Float -1.0
 105   #define cl_FF_minus1  make_FF(-1,FF_exp_mid+1,bit(FF_mant_len))
 106 #else
 107 // Single Float 0.0
 108   extern const cl_FF cl_FF_0;
 109 // Single Float 1.0
 110   extern const cl_FF cl_FF_1;
 111 // Single Float -1.0
 112   extern const cl_FF cl_FF_minus1;
 113 #endif
 114
 115
 116 // Entpacken eines Single-Float:
 117 // FF_decode(obj, zero_statement, sign=,exp=,mant=);
 118 // zerlegt ein Single-Float obj.
 119 // Ist obj=0.0, wird zero_statement ausgeführt.
 120 // Sonst: cl_signean sign = Vorzeichen (0 = +, -1 = -),
 121 //        sintL exp = Exponent (vorzeichenbehaftet),
 122 //        uintL mant = Mantisse (>= 2^FF_mant_len, < 2^(FF_mant_len+1))
 123 #define FF_uexp(x)  (((x) >> FF_mant_len) & (bit(FF_exp_len)-1))
 124 #define FF_decode(obj, zero_statement, sign_zuweisung,exp_zuweisung,mant_zuweisung)  \
 125   { var ffloat _x = cl_ffloat_value(obj);                               \
 126     var uintL uexp = FF_uexp(_x);                                       \
 127     if (uexp==0)                                                        \
 128       { zero_statement } /* e=0 -> Zahl 0.0 */                          \
 129       else                                                              \
 130       { exp_zuweisung (sintL)(uexp - FF_exp_mid); /* Exponent */        \
 131         unused (sign_zuweisung sign_of((sint32)(_x))); /* Vorzeichen */\
 132         mant_zuweisung (bit(FF_mant_len) | (_x & (bit(FF_mant_len)-1))); \
 133   }   }
 134
 135 // Einpacken eines Single-Float:
 136 // encode_FF(sign,exp,mant);
 137 // liefert ein Single-Float.
 138 // > cl_signean sign: Vorzeichen, 0 für +, -1 für negativ.
 139 // > sintL exp: Exponent
 140 // > uintL mant: Mantisse, sollte >= 2^FF_mant_len und < 2^(FF_mant_len+1) sein.
 141 // < object ergebnis: ein Single-Float
 142 // Der Exponent wird auf Überlauf/Unterlauf getestet.
 143 inline const cl_FF encode_FF (cl_signean sign, sintL exp, uintL mant)
 144 {
 145         if (exp < (sintL)(FF_exp_low-FF_exp_mid))
 146                 { if (underflow_allowed())
 147                         { cl_error_floating_point_underflow(); }
 148                         else
 149                         { return cl_FF_0; }
 150                 }
 151         else
 152         if (exp > (sintL)(FF_exp_high-FF_exp_mid))
 153                 { cl_error_floating_point_overflow(); }
 154         else
 155         return make_FF(sign, exp+FF_exp_mid, mant & (bit(FF_mant_len)-1));
 156 }
 157
 158 #ifdef FAST_FLOAT
 159 // Auspacken eines Floats:
 160 inline float FF_to_float (const cl_FF& obj)
 161 {
 162   #if defined(CL_WIDE_POINTERS) // eines der beiden 32-Bit-Wörter
 163     #if defined(__GNUC__)
 164       return ((ffloatjanus) { eksplicit: cl_ffloat_value(obj) }).machine_float;
 165     #else
 166       return *(float*)(&((uint32*)&(obj))[BIG_ENDIAN_P+(1-2*BIG_ENDIAN_P)*(FF_value_shift/32)]);
 167     #endif
 168   #else
 169     return TheFfloat(obj)->representation.machine_float;
 170   #endif
 171 }
 172 // Überprüfen und Einpacken eines von den 'float'-Routinen gelieferten
 173 // IEEE-Floats.
 174 // Klassifikation:
 175 //   1 <= e <= 254 : normalisierte Zahl
 176 //   e=0, m/=0: subnormale Zahl
 177 //   e=0, m=0: vorzeichenbehaftete 0.0
 178 //   e=255, m=0: vorzeichenbehaftete Infinity
 179 //   e=255, m/=0: NaN
 180 // Angabe der möglicherweise auftretenden Sonderfälle:
 181 //   maybe_overflow: Operation läuft über, liefert IEEE-Infinity
 182 //   maybe_subnormal: Ergebnis sehr klein, liefert IEEE-subnormale Zahl
 183 //   maybe_underflow: Ergebnis sehr klein und /=0, liefert IEEE-Null
 184 //   maybe_divide_0: Ergebnis unbestimmt, liefert IEEE-Infinity
 185 //   maybe_nan: Ergebnis unbestimmt, liefert IEEE-NaN
 186   #define float_to_FF(expr,ergebnis_zuweisung,maybe_overflow,maybe_subnormal,maybe_underflow,maybe_divide_0,maybe_nan)  \
 187     { var ffloatjanus _erg; _erg.machine_float = (expr);                \
 188       if ((_erg.eksplicit & ((uint32)bit(FF_exp_len+FF_mant_len)-bit(FF_mant_len))) == 0) /* e=0 ? */\
 189         { if ((maybe_underflow                                          \
 190                || (maybe_subnormal && !((_erg.eksplicit << 1) == 0))    \
 191               )                                                         \
 192               && underflow_allowed()                                    \
 193              )                                                          \
 194             { cl_error_floating_point_underflow(); } /* subnormal oder noch kleiner -> Underflow */\
 195             else                                                        \
 196             { ergebnis_zuweisung cl_FF_0; } /* +/- 0.0 -> 0.0 */        \
 197         }                                                               \
 198       elif ((maybe_overflow || maybe_divide_0)                          \
 199             && (((~_erg.eksplicit) & ((uint32)bit(FF_exp_len+FF_mant_len)-bit(FF_mant_len))) == 0) /* e=255 ? */\
 200            )                                                            \
 201         { if (maybe_nan && !((_erg.eksplicit << (32-FF_mant_len)) == 0)) \
 202             { cl_error_division_by_0(); } /* NaN, also Singularität -> "Division durch 0" */\
 203           else /* Infinity */                                           \
 204           if (!maybe_overflow || maybe_divide_0)                        \
 205             { cl_error_division_by_0(); } /* Infinity, Division durch 0 */\
 206             else                                                        \
 207             { cl_error_floating_point_overflow(); } /* Infinity, Overflow */\
 208         }                                                               \
 209       else                                                              \
 210         { ergebnis_zuweisung allocate_ffloat(_erg.eksplicit); }         \
 211     }
 212 #endif
 213
 214 // Liefert zu einem Single-Float x : (futruncate x), ein FF.
 215 // x wird von der 0 weg zur nächsten ganzen Zahl gerundet.
 216 extern const cl_FF futruncate (const cl_FF& x);
 217
 218 // FF_to_I(x) wandelt ein Single-Float x, das eine ganze Zahl darstellt,
 219 // in ein Integer um.
 220 extern const cl_I cl_FF_to_I (const cl_FF& x);
 221
 222 // cl_I_to_FF(x) wandelt ein Integer x in ein Single-Float um und rundet dabei.
 223 extern const cl_FF cl_I_to_FF (const cl_I& x);
 224
 225 // cl_RA_to_FF(x) wandelt eine rationale Zahl x in ein Single-Float um
 226 // und rundet dabei.
 227 extern const cl_FF cl_RA_to_FF (const cl_RA& x);
 228
 229
 230 // IEEE-Single-Float:
 231 // Bit 31 = s, Bits 30..23 = e, Bits 22..0 = m.
 232 //   e=0, m=0: vorzeichenbehaftete 0.0
 233 //   e=0, m/=0: subnormale Zahl,
 234 //     Wert = (-1)^s * 2^(1-126) * [ 0 . 0 m22 ... m0 ]
 235 //   1 <= e <= 254 : normalisierte Zahl,
 236 //     Wert = (-1)^s * 2^(e-126) * [ 0 . 1 m22 ... m0 ]
 237 //   e=255, m=0: vorzeichenbehaftete Infinity
 238 //   e=255, m/=0: NaN
 239
 240 // cl_float_to_FF(val) wandelt ein IEEE-Single-Float val in ein Single-Float um.
 241 extern cl_private_thing cl_float_to_FF_pointer (const ffloatjanus& val);
 242 inline const cl_FF cl_float_to_FF (const ffloatjanus& val)
 243         { return cl_float_to_FF_pointer(val); }
 244
 245 // cl_FF_to_float(obj,&val);
 246 // wandelt ein Single-Float obj in ein IEEE-Single-Float val um.
 247 extern void cl_FF_to_float (const cl_FF& obj, ffloatjanus* val_);
 248
 249 }  // namespace cln
 250
 251 #endif /* _CL_FF_H */