src/vector/cl_GV_I.cc

   1 // cl_make_heap_GV_I().
   2
   3 // General includes.
   4 #include "cl_sysdep.h"
   5
   6 CL_PROVIDE(cl_GV_I)
   7
   8 // Specification.
   9 #include "cl_GV_integer.h"
  10
  11
  12 // Implementation.
  13
  14 #include "cl_I.h"
  15 #include "cl_DS.h"
  16 #include "cl_abort.h"
  17 #include "cl_offsetof.h"
  18
  19
  20 // Memory-efficient integer vectors: If all entries are known in advance to
  21 // be >= 0 and < 2^m, we reserve only m bits for each entry. (m=1,2,4,8,16,32).
  22 // Thus we end up with 6 kinds of bit/byte vectors, and the general integer
  23 // vectors.
  24 // For enquiring purposes, we store m in the vectorops table. Because of this,
  25 // treating a cl_GV_RA as cl_GV_I is wrong. In particular, we cannot use the
  26 // cl_null_GV_N to initialize a cl_GV_I; need a special cl_null_GV_I.
  27
  28
  29 static void cl_gvector_integer_destructor (cl_heap* pointer)
  30 {
  31 #if (defined(__mips__) || defined(__mips64__)) && !defined(__GNUC__) // workaround SGI CC bug
  32         (*(cl_heap_GV_I*)pointer).~cl_heap_GV();
  33 #else
  34         (*(cl_heap_GV_I*)pointer).~cl_heap_GV_I();
  35 #endif
  36 }
  37
  38 cl_class cl_class_gvector_integer = {
  39         cl_gvector_integer_destructor,
  40         0
  41 };
  42
  43
  44 static inline cl_heap_GV_I * outcast (cl_GV_inner<cl_I>* vec)
  45 {
  46         return (cl_heap_GV_I *)((char *) vec - offsetof(cl_heap_GV_I,v));
  47 }
  48 static inline const cl_heap_GV_I * outcast (const cl_GV_inner<cl_I>* vec)
  49 {
  50         return (const cl_heap_GV_I *)((const char *) vec - offsetof(cl_heap_GV_I,v));
  51 }
  52
  53
  54 // Add more info to the vectorops tables.
  55
  56 struct cl_GV_I_vectorops {
  57         cl_GV_vectorops<cl_I> ops;
  58         sintL m; // for maxbits
  59 };
  60
  61 static inline cl_GV_I_vectorops* outcast (cl_GV_vectorops<cl_I>* vectorops)
  62 {
  63         return (cl_GV_I_vectorops*)((char *) vectorops - offsetof(cl_GV_I_vectorops,ops));
  64 }
  65
  66
  67 // Vectors of general integers.
  68
  69 struct cl_heap_GV_I_general : public cl_heap_GV_I {
  70         cl_I data[1];
  71         // Standard allocation disabled.
  72         void* operator new (size_t size) { unused size; cl_abort(); return (void*)1; }
  73         // Standard deallocation disabled.
  74         void operator delete (void* ptr) { unused ptr; cl_abort(); }
  75         // No default constructor.
  76         cl_heap_GV_I_general ();
  77 };
  78
  79 static const cl_I general_element (const cl_GV_inner<cl_I>* vec, uintL index)
  80 {
  81         return ((const cl_heap_GV_I_general *) outcast(vec))->data[index];
  82 }
  83
  84 static void general_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
  85 {
  86         ((cl_heap_GV_I_general *) outcast(vec))->data[index] = x;
  87 }
  88
  89 static void general_do_delete (cl_GV_inner<cl_I>* vec)
  90 {
  91         var cl_heap_GV_I_general* hv = (cl_heap_GV_I_general *) outcast(vec);
  92         var uintL len = hv->v.length();
  93         for (var uintL i = 0; i < len; i++)
  94                 hv->data[i].~cl_I();
  95 }
  96
  97 static void general_copy_elements (const cl_GV_inner<cl_I>* srcvec, uintL srcindex, cl_GV_inner<cl_I>* destvec, uintL destindex, uintL count)
  98 {
  99         if (count > 0) {
 100                 var const cl_heap_GV_I_general* srcv =
 101                   (const cl_heap_GV_I_general *) outcast(srcvec);
 102                 var cl_heap_GV_I_general* destv =
 103                   (cl_heap_GV_I_general *) outcast(destvec);
 104                 var uintL srclen = srcv->v.length();
 105                 var uintL destlen = destv->v.length();
 106                 if (!(srcindex <= srcindex+count && srcindex+count <= srclen))
 107                         cl_abort();
 108                 if (!(destindex <= destindex+count && destindex+count <= destlen))
 109                         cl_abort();
 110                 do {
 111                         destv->data[destindex++] = srcv->data[srcindex++];
 112                 } while (--count > 0);
 113         }
 114 }
 115
 116 static cl_GV_I_vectorops general_vectorops = {{
 117         general_element,
 118         general_set_element,
 119         general_do_delete,
 120         general_copy_elements },
 121         -1
 122 };
 123
 124 cl_heap_GV_I* cl_make_heap_GV_I (uintL len)
 125 {
 126         var cl_heap_GV_I_general* hv = (cl_heap_GV_I_general*) cl_malloc_hook(offsetofa(cl_heap_GV_I_general,data)+sizeof(cl_I)*len);
 127         hv->refcount = 1;
 128         hv->type = &cl_class_gvector_integer;
 129         new (&hv->v) cl_GV_inner<cl_I> (len,&general_vectorops.ops);
 130         for (var uintL i = 0; i < len; i++)
 131                 init1(cl_I, hv->data[i]) ();
 132         return hv;
 133 }
 134
 135
 136 // Vectors of integers requiring only few bits.
 137
 138 #define DEFINE_cl_heap_GV_I_bits(m,uint_t)  \
 139 struct cl_heap_GV_I_bits##m : public cl_heap_GV_I {                     \
 140         uint_t data[1];                                                 \
 141         /* Standard allocation disabled. */                             \
 142         void* operator new (size_t size) { unused size; cl_abort(); return (void*)1; } \
 143         /* Standard deallocation disabled. */                           \
 144         void operator delete (void* ptr) { unused ptr; cl_abort(); }    \
 145         /* No default constructor. */                                   \
 146         cl_heap_GV_I_bits##m ();                                        \
 147 };                                                                      \
 148 static const cl_I bits##m##_element (const cl_GV_inner<cl_I>* vec, uintL index); \
 149 static void bits##m##_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x); \
 150 static void bits##m##_copy_elements (const cl_GV_inner<cl_I>* srcvec, uintL srcindex, cl_GV_inner<cl_I>* destvec, uintL destindex, uintL count) \
 151 {                                                                               \
 152         if (count > 0) {                                                        \
 153                 var const cl_heap_GV_I_bits##m * srcv =                         \
 154                   (const cl_heap_GV_I_bits##m *) outcast(srcvec);               \
 155                 var cl_heap_GV_I_bits##m * destv =                              \
 156                   (cl_heap_GV_I_bits##m *) outcast(destvec);                    \
 157                 var uintL srclen = srcv->v.length();                            \
 158                 var uintL destlen = destv->v.length();                          \
 159                 if (!(srcindex <= srcindex+count && srcindex+count <= srclen))  \
 160                         cl_abort();                                             \
 161                 if (!(destindex <= destindex+count && destindex+count <= destlen)) \
 162                         cl_abort();                                             \
 163                 if (m == intDsize) {                                            \
 164                         var const uintD* srcptr = &srcv->data[srcindex];        \
 165                         var uintD* destptr = &destv->data[destindex];           \
 166                         do {                                                    \
 167                                 *destptr++ = *srcptr++;                         \
 168                         } while (--count > 0);                                  \
 169                 } else                                                          \
 170                         bits_copy(srcv->data,m*srcindex,destv->data,m*destindex,m*count); \
 171         }                                                                       \
 172 }                                                                       \
 173 static cl_GV_I_vectorops bits##m##_vectorops = {{                       \
 174         bits##m##_element,                                              \
 175         bits##m##_set_element,                                          \
 176         bits_do_delete,                                                 \
 177         bits##m##_copy_elements },                                      \
 178         m                                                               \
 179 };
 180
 181 static void bits_do_delete (cl_GV_inner<cl_I>* vec)
 182 {
 183         unused vec;
 184 }
 185
 186 // Copy bits srcptr.bits[srcindex..srcindex+count-1] into destptr.bits[destindex..destindex+count-1].
 187 // Assumes that all range checks have already been performed.
 188 static void bits_copy (const uintD* srcptr, uintL srcindex, uintD* destptr, uintL destindex, uintL count)
 189 {
 190         srcptr += floor(srcindex,intDsize);
 191         destptr += floor(destindex,intDsize);
 192         srcindex = srcindex%intDsize;
 193         destindex = destindex%intDsize;
 194         // Now 0 <= srcindex < intDsize and 0 <= destindex < intDsize.
 195         if (srcindex == destindex) {
 196                 // src and dest are aligned with respect to each other.
 197                 if (srcindex > 0) {
 198                         if (count <= intDsize-srcindex) {
 199                                 *destptr ^= (*destptr ^ *srcptr) & ((uintD)(bit(count)-1) << srcindex);
 200                                 return;
 201                         }
 202                         *destptr ^= (*destptr ^ *srcptr) & (uintD)minus_bit(srcindex);
 203                         srcptr++;
 204                         destptr++;
 205                         count -= intDsize-srcindex;
 206                 }
 207                 // Now srcindex and destindex can be assumed to be 0.
 208                 var uintL count1 = count%intDsize;
 209                 count = floor(count,intDsize);
 210                 if (count > 0) {
 211                         do {
 212                                 *destptr++ = *srcptr++;
 213                         } while (--count > 0);
 214                 }
 215                 if (count1 > 0) {
 216                         *destptr ^= (*destptr ^ *srcptr) & (uintD)(bit(count1)-1);
 217                 }
 218         } else {
 219                 var uintL i = destindex - srcindex;
 220                 var uintD tmp;
 221                 if (destindex >= srcindex) { // i > 0
 222                         if (count <= intDsize-destindex) {
 223                                 *destptr ^= (*destptr ^ (*srcptr << i)) & ((uintD)(bit(count)-1) << destindex);
 224                                 return;
 225                         }
 226                         *destptr ^= (*destptr ^ (*srcptr << i)) & (uintD)minus_bit(destindex);
 227                         destptr++;
 228                         tmp = *srcptr >> (intDsize-i);
 229                         count -= intDsize-destindex;
 230                 } else { // i < 0
 231                         if (count <= intDsize-srcindex) {
 232                                 *destptr ^= (*destptr ^ (*srcptr >> -i)) & ((uintD)(bit(count)-1) << destindex);
 233                                 return;
 234                         }
 235                         tmp = (*destptr & (uintD)(bit(destindex)-1)) | ((*srcptr >> srcindex) << destindex);
 236                         count += destindex;
 237                         i += intDsize;
 238                 }
 239                 srcptr++;
 240                 // tmp now contains the low i bits to be put into *destptr.
 241                 var uintL count1 = count%intDsize;
 242                 count = floor(count,intDsize);
 243                 var uintD lastdest;
 244                 if (count == 0)
 245                         lastdest = tmp;
 246                 else {
 247                         lastdest = shiftleftcopy_loop_up(srcptr,destptr,count,i);
 248                         *destptr |= tmp;
 249                 }
 250                 // lastdest now contains the i bits shifted out of the top of the source.
 251                 if (count1 > 0) {
 252                         destptr += count;
 253                         if (count1 > i)
 254                                 lastdest |= *(srcptr += count) << i;
 255                         *destptr ^= (*destptr ^ lastdest) & (uintD)(bit(count1)-1);
 256                 }
 257         }
 258 }
 259
 260
 261 // It would be most natural to use the following type for uint_t:
 262 // m = 1: uint_t = uint8
 263 // m = 2: uint_t = uint8
 264 // m = 4: uint_t = uint8
 265 // m = 8: uint_t = uint8
 266 // m = 16: uint_t = uint16
 267 // m = 32: uint_t = uint32
 268 // But we want to have a fast copy_elements routine. And for m=1,
 269 // we also want to use the fast shiftxor_loop_up() function for addition.
 270 // Hence we use the uint_t = uintD in all cases. (NB: intDsize>=32.)
 271
 272 // The last ceiling(len*m/intDsize)*intDsize-len*m unused bits in the last word
 273 // are always 0. This provides some simplification for routines which work on
 274 // entire words: They don't need to special-case the last word.
 275
 276
 277 DEFINE_cl_heap_GV_I_bits(1,uintD)
 278
 279 static const cl_I bits1_element (const cl_GV_inner<cl_I>* vec, uintL index)
 280 {
 281         return (unsigned int)((((const cl_heap_GV_I_bits1 *) outcast(vec))->data[index/intDsize] >> (index%intDsize)) & 0x1);
 282 }
 283 static void bits1_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
 284 {
 285         var uint32 xval;
 286         if (fixnump(x)) {
 287                 xval = FN_to_UL(x);
 288                 if (xval <= 0x1) {
 289                         var uintD* ptr = &((cl_heap_GV_I_bits1 *) outcast(vec))->data[index/intDsize];
 290                         index = index%intDsize;
 291                         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0x1 << index));
 292                         return;
 293                 }
 294         }
 295         cl_abort();
 296 }
 297
 298
 299 DEFINE_cl_heap_GV_I_bits(2,uintD)
 300
 301 static const cl_I bits2_element (const cl_GV_inner<cl_I>* vec, uintL index)
 302 {
 303         return (unsigned int)((((const cl_heap_GV_I_bits2 *) outcast(vec))->data[index/(intDsize/2)] >> (2*(index%(intDsize/2)))) & 0x3);
 304 }
 305 static void bits2_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
 306 {
 307         var uint32 xval;
 308         if (fixnump(x)) {
 309                 xval = FN_to_UL(x);
 310                 if (xval <= 0x3) {
 311                         var uintD* ptr = &((cl_heap_GV_I_bits2 *) outcast(vec))->data[index/(intDsize/2)];
 312                         index = 2*(index%(intDsize/2));
 313                         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0x3 << index));
 314                         return;
 315                 }
 316         }
 317         cl_abort();
 318 }
 319
 320
 321 DEFINE_cl_heap_GV_I_bits(4,uintD)
 322
 323 static const cl_I bits4_element (const cl_GV_inner<cl_I>* vec, uintL index)
 324 {
 325         return (unsigned int)((((const cl_heap_GV_I_bits4 *) outcast(vec))->data[index/(intDsize/4)] >> (4*(index%(intDsize/4)))) & 0xF);
 326 }
 327 static void bits4_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
 328 {
 329         var uint32 xval;
 330         if (fixnump(x)) {
 331                 xval = FN_to_UL(x);
 332                 if (xval <= 0xF) {
 333                         var uintD* ptr = &((cl_heap_GV_I_bits4 *) outcast(vec))->data[index/(intDsize/4)];
 334                         index = 4*(index%(intDsize/4));
 335                         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0xF << index));
 336                         return;
 337                 }
 338         }
 339         cl_abort();
 340 }
 341
 342
 343 DEFINE_cl_heap_GV_I_bits(8,uintD)
 344
 345 static const cl_I bits8_element (const cl_GV_inner<cl_I>* vec, uintL index)
 346 {
 347         #if CL_CPU_BIG_ENDIAN_P
 348         return (unsigned int)((((const cl_heap_GV_I_bits8 *) outcast(vec))->data[index/(intDsize/8)] >> (8*(index%(intDsize/8)))) & 0xFF);
 349         #else
 350         // Optimization which assumes little-endian storage of uint8 in an uintD
 351         return (unsigned int)(((uint8*)(((const cl_heap_GV_I_bits8 *) outcast(vec))->data))[index]);
 352         #endif
 353 }
 354 static void bits8_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
 355 {
 356         var uint32 xval;
 357         if (fixnump(x)) {
 358                 xval = FN_to_UL(x);
 359                 if (xval <= 0xFF) {
 360                         #if CL_CPU_BIG_ENDIAN_P
 361                         var uintD* ptr = &((cl_heap_GV_I_bits8 *) outcast(vec))->data[index/(intDsize/8)];
 362                         index = 8*(index%(intDsize/8));
 363                         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0xFF << index));
 364                         #else
 365                         // Optimization which assumes little-endian storage of uint8 in an uintD
 366                         ((uint8*)(((cl_heap_GV_I_bits8 *) outcast(vec))->data))[index] = xval;
 367                         #endif
 368                         return;
 369                 }
 370         }
 371         cl_abort();
 372 }
 373
 374
 375 DEFINE_cl_heap_GV_I_bits(16,uintD)
 376
 377 static const cl_I bits16_element (const cl_GV_inner<cl_I>* vec, uintL index)
 378 {
 379         #if CL_CPU_BIG_ENDIAN_P
 380         return (unsigned int)((((const cl_heap_GV_I_bits16 *) outcast(vec))->data[index/(intDsize/16)] >> (16*(index%(intDsize/16)))) & 0xFFFF);
 381         #else
 382         // Optimization which assumes little-endian storage of uint16 in an uintD
 383         return (unsigned int)(((uint16*)(((const cl_heap_GV_I_bits16 *) outcast(vec))->data))[index]);
 384         #endif
 385 }
 386 static void bits16_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
 387 {
 388         var uint32 xval;
 389         if (fixnump(x)) {
 390                 xval = FN_to_UL(x);
 391                 if (xval <= 0xFFFF) {
 392                         #if CL_CPU_BIG_ENDIAN_P
 393                         var uintD* ptr = &((cl_heap_GV_I_bits16 *) outcast(vec))->data[index/(intDsize/16)];
 394                         index = 16*(index%(intDsize/16));
 395                         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0xFFFF << index));
 396                         #else
 397                         // Optimization which assumes little-endian storage of uint16 in an uintD
 398                         ((uint16*)(((cl_heap_GV_I_bits16 *) outcast(vec))->data))[index] = xval;
 399                         #endif
 400                         return;
 401                 }
 402         }
 403         cl_abort();
 404 }
 405
 406
 407 DEFINE_cl_heap_GV_I_bits(32,uintD)
 408
 409 static const cl_I bits32_element (const cl_GV_inner<cl_I>* vec, uintL index)
 410 {
 411         #if (intDsize==32)
 412         return (unsigned long)(((const cl_heap_GV_I_bits32 *) outcast(vec))->data[index]);
 413         #elif CL_CPU_BIG_ENDIAN_P
 414         return (unsigned long)((((const cl_heap_GV_I_bits32 *) outcast(vec))->data[index/(intDsize/32)] >> (32*(index%(intDsize/32)))) & 0xFFFFFFFF);
 415         #else
 416         // Optimization which assumes little-endian storage of uint32 in an uintD
 417         return (unsigned long)(((uint32*)(((const cl_heap_GV_I_bits32 *) outcast(vec))->data))[index]);
 418         #endif
 419 }
 420 static void bits32_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
 421 {
 422         var uint32 xval = cl_I_to_UL(x);
 423         #if (intDsize==32)
 424         ((cl_heap_GV_I_bits32 *) outcast(vec))->data[index] = xval;
 425         #elif CL_CPU_BIG_ENDIAN_P
 426         var uintD* ptr = &((cl_heap_GV_I_bits32 *) outcast(vec))->data[index/(intDsize/32)];
 427         index = 32*(index%(intDsize/32));
 428         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0xFFFFFFFF << index));
 429         #else
 430         // Optimization which assumes little-endian storage of uint32 in an uintD
 431         ((uint32*)(((cl_heap_GV_I_bits32 *) outcast(vec))->data))[index] = xval;
 432         #endif
 433 }
 434
 435
 436 static cl_GV_I_vectorops* bits_vectorops[6] = {
 437         &bits1_vectorops,
 438         &bits2_vectorops,
 439         &bits4_vectorops,
 440         &bits8_vectorops,
 441         &bits16_vectorops,
 442         &bits32_vectorops
 443 };
 444
 445 cl_heap_GV_I* cl_make_heap_GV_I (uintL len, sintL m)
 446 {
 447         // Determine log2(bits).
 448         var uintL log2_bits;
 449         switch (m) {
 450                 case 0: case 1:
 451                         log2_bits = 0; break;
 452                 case 2:
 453                         log2_bits = 1; break;
 454                 case 3: case 4:
 455                         log2_bits = 2; break;
 456                 case 5: case 6: case 7: case 8:
 457                         log2_bits = 3; break;
 458                 case 9: case 10: case 11: case 12:
 459                 case 13: case 14: case 15: case 16:
 460                         log2_bits = 4; break;
 461                 case 17: case 18: case 19: case 20:
 462                 case 21: case 22: case 23: case 24:
 463                 case 25: case 26: case 27: case 28:
 464                 case 29: case 30: case 31: case 32:
 465                         log2_bits = 5; break;
 466                 default:
 467                         return cl_make_heap_GV_I(len);
 468         }
 469         // For room allocation purposes, be pessimistic: assume the uintD case (since intDsize>=32).
 470         var uintL words = // ceiling(len*2^log2_bits,intDsize)
 471           (((sintL)len-1)>>(log2_intDsize-log2_bits))+1;
 472         var cl_heap_GV_I_bits32* hv = (cl_heap_GV_I_bits32*) cl_malloc_hook(offsetofa(cl_heap_GV_I_bits32,data)+sizeof(uintD)*words);
 473         hv->refcount = 1;
 474         hv->type = &cl_class_gvector_integer;
 475         new (&hv->v) cl_GV_inner<cl_I> (len,&bits_vectorops[log2_bits]->ops);
 476         var uintD* ptr = (uintD*)(hv->data);
 477         for (var uintL i = 0; i < words; i++)
 478                 ptr[i] = 0;
 479         return (cl_heap_GV_I*) hv;
 480 }
 481
 482
 483 sintL cl_heap_GV_I::maxbits () const
 484 {
 485         return outcast(v.vectorops)->m;
 486 }
 487
 488
 489 // An empty vector.
 490 const cl_GV_I cl_null_GV_I = cl_GV_I((uintL)0);
 491
 492 CL_PROVIDE_END(cl_GV_I)