]> www.ginac.de Git - cln.git/blob - src/vector/cl_GV_I.cc
Initial revision
[cln.git] / src / vector / cl_GV_I.cc
1 // cl_make_heap_GV_I().
2
3 // General includes.
4 #include "cl_sysdep.h"
5
6 CL_PROVIDE(cl_GV_I)
7
8 // Specification.
9 #include "cl_GV_integer.h"
10
11
12 // Implementation.
13
14 #include "cl_I.h"
15 #include "cl_DS.h"
16 #include "cl_abort.h"
17 #include "cl_offsetof.h"
18
19
20 // Memory-efficient integer vectors: If all entries are known in advance to
21 // be >= 0 and < 2^m, we reserve only m bits for each entry. (m=1,2,4,8,16,32).
22 // Thus we end up with 6 kinds of bit/byte vectors, and the general integer
23 // vectors.
24 // For enquiring purposes, we store m in the vectorops table. Because of this,
25 // treating a cl_GV_RA as cl_GV_I is wrong. In particular, we cannot use the
26 // cl_null_GV_N to initialize a cl_GV_I; need a special cl_null_GV_I.
27
28
29 static void cl_gvector_integer_destructor (cl_heap* pointer)
30 {
31 #if (defined(__mips__) || defined(__mips64__)) && !defined(__GNUC__) // workaround SGI CC bug
32         (*(cl_heap_GV_I*)pointer).~cl_heap_GV();
33 #else
34         (*(cl_heap_GV_I*)pointer).~cl_heap_GV_I();
35 #endif
36 }
37
38 cl_class cl_class_gvector_integer = {
39         cl_gvector_integer_destructor,
40         0
41 };
42
43
44 static inline cl_heap_GV_I * outcast (cl_GV_inner<cl_I>* vec)
45 {
46         return (cl_heap_GV_I *)((char *) vec - offsetof(cl_heap_GV_I,v));
47 }
48 static inline const cl_heap_GV_I * outcast (const cl_GV_inner<cl_I>* vec)
49 {
50         return (const cl_heap_GV_I *)((const char *) vec - offsetof(cl_heap_GV_I,v));
51 }
52
53
54 // Add more info to the vectorops tables.
55
56 struct cl_GV_I_vectorops {
57         cl_GV_vectorops<cl_I> ops;
58         sintL m; // for maxbits
59 };
60
61 static inline cl_GV_I_vectorops* outcast (cl_GV_vectorops<cl_I>* vectorops)
62 {
63         return (cl_GV_I_vectorops*)((char *) vectorops - offsetof(cl_GV_I_vectorops,ops));
64 }
65
66
67 // Vectors of general integers.
68
69 struct cl_heap_GV_I_general : public cl_heap_GV_I {
70         cl_I data[1];
71         // Standard allocation disabled.
72         void* operator new (size_t size) { unused size; cl_abort(); return (void*)1; }
73         // Standard deallocation disabled.
74         void operator delete (void* ptr) { unused ptr; cl_abort(); }
75         // No default constructor.
76         cl_heap_GV_I_general ();
77 };
78
79 static const cl_I general_element (const cl_GV_inner<cl_I>* vec, uintL index)
80 {
81         return ((const cl_heap_GV_I_general *) outcast(vec))->data[index];
82 }
83
84 static void general_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
85 {
86         ((cl_heap_GV_I_general *) outcast(vec))->data[index] = x;
87 }
88
89 static void general_do_delete (cl_GV_inner<cl_I>* vec)
90 {
91         var cl_heap_GV_I_general* hv = (cl_heap_GV_I_general *) outcast(vec);
92         var uintL len = hv->v.length();
93         for (var uintL i = 0; i < len; i++)
94                 hv->data[i].~cl_I();
95 }
96
97 static void general_copy_elements (const cl_GV_inner<cl_I>* srcvec, uintL srcindex, cl_GV_inner<cl_I>* destvec, uintL destindex, uintL count)
98 {
99         if (count > 0) {
100                 var const cl_heap_GV_I_general* srcv =
101                   (const cl_heap_GV_I_general *) outcast(srcvec);
102                 var cl_heap_GV_I_general* destv =
103                   (cl_heap_GV_I_general *) outcast(destvec);
104                 var uintL srclen = srcv->v.length();
105                 var uintL destlen = destv->v.length();
106                 if (!(srcindex <= srcindex+count && srcindex+count <= srclen))
107                         cl_abort();
108                 if (!(destindex <= destindex+count && destindex+count <= destlen))
109                         cl_abort();
110                 do {
111                         destv->data[destindex++] = srcv->data[srcindex++];
112                 } while (--count > 0);
113         }
114 }
115
116 static cl_GV_I_vectorops general_vectorops = {{
117         general_element,
118         general_set_element,
119         general_do_delete,
120         general_copy_elements },
121         -1
122 };
123
124 cl_heap_GV_I* cl_make_heap_GV_I (uintL len)
125 {
126         var cl_heap_GV_I_general* hv = (cl_heap_GV_I_general*) cl_malloc_hook(offsetofa(cl_heap_GV_I_general,data)+sizeof(cl_I)*len);
127         hv->refcount = 1;
128         hv->type = &cl_class_gvector_integer;
129         new (&hv->v) cl_GV_inner<cl_I> (len,&general_vectorops.ops);
130         for (var uintL i = 0; i < len; i++)
131                 init1(cl_I, hv->data[i]) ();
132         return hv;
133 }
134
135
136 // Vectors of integers requiring only few bits.
137
138 #define DEFINE_cl_heap_GV_I_bits(m,uint_t)  \
139 struct cl_heap_GV_I_bits##m : public cl_heap_GV_I {                     \
140         uint_t data[1];                                                 \
141         /* Standard allocation disabled. */                             \
142         void* operator new (size_t size) { unused size; cl_abort(); return (void*)1; } \
143         /* Standard deallocation disabled. */                           \
144         void operator delete (void* ptr) { unused ptr; cl_abort(); }    \
145         /* No default constructor. */                                   \
146         cl_heap_GV_I_bits##m ();                                        \
147 };                                                                      \
148 static const cl_I bits##m##_element (const cl_GV_inner<cl_I>* vec, uintL index); \
149 static void bits##m##_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x); \
150 static void bits##m##_copy_elements (const cl_GV_inner<cl_I>* srcvec, uintL srcindex, cl_GV_inner<cl_I>* destvec, uintL destindex, uintL count) \
151 {                                                                               \
152         if (count > 0) {                                                        \
153                 var const cl_heap_GV_I_bits##m * srcv =                         \
154                   (const cl_heap_GV_I_bits##m *) outcast(srcvec);               \
155                 var cl_heap_GV_I_bits##m * destv =                              \
156                   (cl_heap_GV_I_bits##m *) outcast(destvec);                    \
157                 var uintL srclen = srcv->v.length();                            \
158                 var uintL destlen = destv->v.length();                          \
159                 if (!(srcindex <= srcindex+count && srcindex+count <= srclen))  \
160                         cl_abort();                                             \
161                 if (!(destindex <= destindex+count && destindex+count <= destlen)) \
162                         cl_abort();                                             \
163                 if (m == intDsize) {                                            \
164                         var const uintD* srcptr = &srcv->data[srcindex];        \
165                         var uintD* destptr = &destv->data[destindex];           \
166                         do {                                                    \
167                                 *destptr++ = *srcptr++;                         \
168                         } while (--count > 0);                                  \
169                 } else                                                          \
170                         bits_copy(srcv->data,m*srcindex,destv->data,m*destindex,m*count); \
171         }                                                                       \
172 }                                                                       \
173 static cl_GV_I_vectorops bits##m##_vectorops = {{                       \
174         bits##m##_element,                                              \
175         bits##m##_set_element,                                          \
176         bits_do_delete,                                                 \
177         bits##m##_copy_elements },                                      \
178         m                                                               \
179 };
180
181 static void bits_do_delete (cl_GV_inner<cl_I>* vec)
182 {
183         unused vec;
184 }
185
186 // Copy bits srcptr.bits[srcindex..srcindex+count-1] into destptr.bits[destindex..destindex+count-1].
187 // Assumes that all range checks have already been performed.
188 static void bits_copy (const uintD* srcptr, uintL srcindex, uintD* destptr, uintL destindex, uintL count)
189 {
190         srcptr += floor(srcindex,intDsize);
191         destptr += floor(destindex,intDsize);
192         srcindex = srcindex%intDsize;
193         destindex = destindex%intDsize;
194         // Now 0 <= srcindex < intDsize and 0 <= destindex < intDsize.
195         if (srcindex == destindex) {
196                 // src and dest are aligned with respect to each other.
197                 if (srcindex > 0) {
198                         if (count <= intDsize-srcindex) {
199                                 *destptr ^= (*destptr ^ *srcptr) & ((uintD)(bit(count)-1) << srcindex);
200                                 return;
201                         }
202                         *destptr ^= (*destptr ^ *srcptr) & (uintD)minus_bit(srcindex);
203                         srcptr++;
204                         destptr++;
205                         count -= intDsize-srcindex;
206                 }
207                 // Now srcindex and destindex can be assumed to be 0.
208                 var uintL count1 = count%intDsize;
209                 count = floor(count,intDsize);
210                 if (count > 0) {
211                         do {
212                                 *destptr++ = *srcptr++;
213                         } while (--count > 0);
214                 }
215                 if (count1 > 0) {
216                         *destptr ^= (*destptr ^ *srcptr) & (uintD)(bit(count1)-1);
217                 }
218         } else {
219                 var uintL i = destindex - srcindex;
220                 var uintD tmp;
221                 if (destindex >= srcindex) { // i > 0
222                         if (count <= intDsize-destindex) {
223                                 *destptr ^= (*destptr ^ (*srcptr << i)) & ((uintD)(bit(count)-1) << destindex);
224                                 return;
225                         }
226                         *destptr ^= (*destptr ^ (*srcptr << i)) & (uintD)minus_bit(destindex);
227                         destptr++;
228                         tmp = *srcptr >> (intDsize-i);
229                         count -= intDsize-destindex;
230                 } else { // i < 0
231                         if (count <= intDsize-srcindex) {
232                                 *destptr ^= (*destptr ^ (*srcptr >> -i)) & ((uintD)(bit(count)-1) << destindex);
233                                 return;
234                         }
235                         tmp = (*destptr & (uintD)(bit(destindex)-1)) | ((*srcptr >> srcindex) << destindex);
236                         count += destindex;
237                         i += intDsize;
238                 }
239                 srcptr++;
240                 // tmp now contains the low i bits to be put into *destptr.
241                 var uintL count1 = count%intDsize;
242                 count = floor(count,intDsize);
243                 var uintD lastdest;
244                 if (count == 0)
245                         lastdest = tmp;
246                 else {
247                         lastdest = shiftleftcopy_loop_up(srcptr,destptr,count,i);
248                         *destptr |= tmp;
249                 }
250                 // lastdest now contains the i bits shifted out of the top of the source.
251                 if (count1 > 0) {
252                         destptr += count;
253                         if (count1 > i)
254                                 lastdest |= *(srcptr += count) << i;
255                         *destptr ^= (*destptr ^ lastdest) & (uintD)(bit(count1)-1);
256                 }
257         }
258 }       
259
260
261 // It would be most natural to use the following type for uint_t:
262 // m = 1: uint_t = uint8
263 // m = 2: uint_t = uint8
264 // m = 4: uint_t = uint8
265 // m = 8: uint_t = uint8
266 // m = 16: uint_t = uint16
267 // m = 32: uint_t = uint32
268 // But we want to have a fast copy_elements routine. And for m=1,
269 // we also want to use the fast shiftxor_loop_up() function for addition.
270 // Hence we use the uint_t = uintD in all cases. (NB: intDsize>=32.)
271
272 // The last ceiling(len*m/intDsize)*intDsize-len*m unused bits in the last word
273 // are always 0. This provides some simplification for routines which work on
274 // entire words: They don't need to special-case the last word.
275
276
277 DEFINE_cl_heap_GV_I_bits(1,uintD)
278
279 static const cl_I bits1_element (const cl_GV_inner<cl_I>* vec, uintL index)
280 {
281         return (unsigned int)((((const cl_heap_GV_I_bits1 *) outcast(vec))->data[index/intDsize] >> (index%intDsize)) & 0x1);
282 }
283 static void bits1_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
284 {
285         var uint32 xval;
286         if (fixnump(x)) {
287                 xval = FN_to_UL(x);
288                 if (xval <= 0x1) {
289                         var uintD* ptr = &((cl_heap_GV_I_bits1 *) outcast(vec))->data[index/intDsize];
290                         index = index%intDsize;
291                         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0x1 << index));
292                         return;
293                 }
294         }
295         cl_abort();
296 }
297
298
299 DEFINE_cl_heap_GV_I_bits(2,uintD)
300
301 static const cl_I bits2_element (const cl_GV_inner<cl_I>* vec, uintL index)
302 {
303         return (unsigned int)((((const cl_heap_GV_I_bits2 *) outcast(vec))->data[index/(intDsize/2)] >> (2*(index%(intDsize/2)))) & 0x3);
304 }
305 static void bits2_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
306 {
307         var uint32 xval;
308         if (fixnump(x)) {
309                 xval = FN_to_UL(x);
310                 if (xval <= 0x3) {
311                         var uintD* ptr = &((cl_heap_GV_I_bits2 *) outcast(vec))->data[index/(intDsize/2)];
312                         index = 2*(index%(intDsize/2));
313                         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0x3 << index));
314                         return;
315                 }
316         }
317         cl_abort();
318 }
319
320
321 DEFINE_cl_heap_GV_I_bits(4,uintD)
322
323 static const cl_I bits4_element (const cl_GV_inner<cl_I>* vec, uintL index)
324 {
325         return (unsigned int)((((const cl_heap_GV_I_bits4 *) outcast(vec))->data[index/(intDsize/4)] >> (4*(index%(intDsize/4)))) & 0xF);
326 }
327 static void bits4_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
328 {
329         var uint32 xval;
330         if (fixnump(x)) {
331                 xval = FN_to_UL(x);
332                 if (xval <= 0xF) {
333                         var uintD* ptr = &((cl_heap_GV_I_bits4 *) outcast(vec))->data[index/(intDsize/4)];
334                         index = 4*(index%(intDsize/4));
335                         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0xF << index));
336                         return;
337                 }
338         }
339         cl_abort();
340 }
341
342
343 DEFINE_cl_heap_GV_I_bits(8,uintD)
344
345 static const cl_I bits8_element (const cl_GV_inner<cl_I>* vec, uintL index)
346 {
347         #if CL_CPU_BIG_ENDIAN_P
348         return (unsigned int)((((const cl_heap_GV_I_bits8 *) outcast(vec))->data[index/(intDsize/8)] >> (8*(index%(intDsize/8)))) & 0xFF);
349         #else
350         // Optimization which assumes little-endian storage of uint8 in an uintD
351         return (unsigned int)(((uint8*)(((const cl_heap_GV_I_bits8 *) outcast(vec))->data))[index]);
352         #endif
353 }
354 static void bits8_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
355 {
356         var uint32 xval;
357         if (fixnump(x)) {
358                 xval = FN_to_UL(x);
359                 if (xval <= 0xFF) {
360                         #if CL_CPU_BIG_ENDIAN_P
361                         var uintD* ptr = &((cl_heap_GV_I_bits8 *) outcast(vec))->data[index/(intDsize/8)];
362                         index = 8*(index%(intDsize/8));
363                         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0xFF << index));
364                         #else
365                         // Optimization which assumes little-endian storage of uint8 in an uintD
366                         ((uint8*)(((cl_heap_GV_I_bits8 *) outcast(vec))->data))[index] = xval;
367                         #endif
368                         return;
369                 }
370         }
371         cl_abort();
372 }
373
374
375 DEFINE_cl_heap_GV_I_bits(16,uintD)
376
377 static const cl_I bits16_element (const cl_GV_inner<cl_I>* vec, uintL index)
378 {
379         #if CL_CPU_BIG_ENDIAN_P
380         return (unsigned int)((((const cl_heap_GV_I_bits16 *) outcast(vec))->data[index/(intDsize/16)] >> (16*(index%(intDsize/16)))) & 0xFFFF);
381         #else
382         // Optimization which assumes little-endian storage of uint16 in an uintD
383         return (unsigned int)(((uint16*)(((const cl_heap_GV_I_bits16 *) outcast(vec))->data))[index]);
384         #endif
385 }
386 static void bits16_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
387 {
388         var uint32 xval;
389         if (fixnump(x)) {
390                 xval = FN_to_UL(x);
391                 if (xval <= 0xFFFF) {
392                         #if CL_CPU_BIG_ENDIAN_P
393                         var uintD* ptr = &((cl_heap_GV_I_bits16 *) outcast(vec))->data[index/(intDsize/16)];
394                         index = 16*(index%(intDsize/16));
395                         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0xFFFF << index));
396                         #else
397                         // Optimization which assumes little-endian storage of uint16 in an uintD
398                         ((uint16*)(((cl_heap_GV_I_bits16 *) outcast(vec))->data))[index] = xval;
399                         #endif
400                         return;
401                 }
402         }
403         cl_abort();
404 }
405
406
407 DEFINE_cl_heap_GV_I_bits(32,uintD)
408
409 static const cl_I bits32_element (const cl_GV_inner<cl_I>* vec, uintL index)
410 {
411         #if (intDsize==32)
412         return (unsigned long)(((const cl_heap_GV_I_bits32 *) outcast(vec))->data[index]);
413         #elif CL_CPU_BIG_ENDIAN_P
414         return (unsigned long)((((const cl_heap_GV_I_bits32 *) outcast(vec))->data[index/(intDsize/32)] >> (32*(index%(intDsize/32)))) & 0xFFFFFFFF);
415         #else
416         // Optimization which assumes little-endian storage of uint32 in an uintD
417         return (unsigned long)(((uint32*)(((const cl_heap_GV_I_bits32 *) outcast(vec))->data))[index]);
418         #endif
419 }
420 static void bits32_set_element (cl_GV_inner<cl_I>* vec, uintL index, const cl_I& x)
421 {
422         var uint32 xval = cl_I_to_UL(x);
423         #if (intDsize==32)
424         ((cl_heap_GV_I_bits32 *) outcast(vec))->data[index] = xval;
425         #elif CL_CPU_BIG_ENDIAN_P
426         var uintD* ptr = &((cl_heap_GV_I_bits32 *) outcast(vec))->data[index/(intDsize/32)];
427         index = 32*(index%(intDsize/32));
428         *ptr = *ptr ^ ((*ptr ^ ((uintD)xval << index)) & ((uintD)0xFFFFFFFF << index));
429         #else
430         // Optimization which assumes little-endian storage of uint32 in an uintD
431         ((uint32*)(((cl_heap_GV_I_bits32 *) outcast(vec))->data))[index] = xval;
432         #endif
433 }
434
435
436 static cl_GV_I_vectorops* bits_vectorops[6] = {
437         &bits1_vectorops,
438         &bits2_vectorops,
439         &bits4_vectorops,
440         &bits8_vectorops,
441         &bits16_vectorops,
442         &bits32_vectorops
443 };
444
445 cl_heap_GV_I* cl_make_heap_GV_I (uintL len, sintL m)
446 {
447         // Determine log2(bits).
448         var uintL log2_bits;
449         switch (m) {
450                 case 0: case 1:
451                         log2_bits = 0; break;
452                 case 2:
453                         log2_bits = 1; break;
454                 case 3: case 4:
455                         log2_bits = 2; break;
456                 case 5: case 6: case 7: case 8:
457                         log2_bits = 3; break;
458                 case 9: case 10: case 11: case 12:
459                 case 13: case 14: case 15: case 16:
460                         log2_bits = 4; break;
461                 case 17: case 18: case 19: case 20:
462                 case 21: case 22: case 23: case 24:
463                 case 25: case 26: case 27: case 28:
464                 case 29: case 30: case 31: case 32:
465                         log2_bits = 5; break;
466                 default:
467                         return cl_make_heap_GV_I(len);
468         }
469         // For room allocation purposes, be pessimistic: assume the uintD case (since intDsize>=32).
470         var uintL words = // ceiling(len*2^log2_bits,intDsize)
471           (((sintL)len-1)>>(log2_intDsize-log2_bits))+1;
472         var cl_heap_GV_I_bits32* hv = (cl_heap_GV_I_bits32*) cl_malloc_hook(offsetofa(cl_heap_GV_I_bits32,data)+sizeof(uintD)*words);
473         hv->refcount = 1;
474         hv->type = &cl_class_gvector_integer;
475         new (&hv->v) cl_GV_inner<cl_I> (len,&bits_vectorops[log2_bits]->ops);
476         var uintD* ptr = (uintD*)(hv->data);
477         for (var uintL i = 0; i < words; i++)
478                 ptr[i] = 0;
479         return (cl_heap_GV_I*) hv;
480 }
481
482
483 sintL cl_heap_GV_I::maxbits () const
484 {
485         return outcast(v.vectorops)->m;
486 }
487
488
489 // An empty vector.
490 const cl_GV_I cl_null_GV_I = cl_GV_I((uintL)0);
491
492 CL_PROVIDE_END(cl_GV_I)