From: Richard Kreckel Date: Wed, 27 Apr 2022 09:21:34 +0000 (+0200) Subject: Remove ARM asm implementation of divu_6432_3232_(). X-Git-Tag: cln_1-3-7~7 X-Git-Url: https://www.ginac.de/CLN/cln.git//cln.git?a=commitdiff_plain;h=b4d44895b6c6153c87d290abadc7088e37fbea49;p=cln.git Remove ARM asm implementation of divu_6432_3232_(). --- diff --git a/src/base/cl_low.h b/src/base/cl_low.h index d1ab041..8ee7d39 100644 --- a/src/base/cl_low.h +++ b/src/base/cl_low.h @@ -693,11 +693,7 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) // < uint32 q: floor(x/y) // < uint32 r: x mod y // < x = q*y+r -#if defined(__GNUC__) && defined(__arm__) && !defined(NO_ASM) && 0 - extern "C" uint64 divu_6432_3232_ (uint32 xhi, uint32 xlo, uint32 y); // -> Quotient q -#else - extern "C" uint32 divu_6432_3232_ (uint32 xhi, uint32 xlo, uint32 y); // -> Quotient q -#endif +extern "C" uint32 divu_6432_3232_ (uint32 xhi, uint32 xlo, uint32 y); // -> Quotient q #ifdef _MSC_VER // Workaround MSVC compiler bug. } extern "C" uint32 divu_32_rest; namespace cln { // -> Rest r @@ -742,12 +738,6 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) var uint32 _r __asm__("%g1"); \ cl_unused (q_zuweisung _q); r_zuweisung _r; \ }) -#elif defined(__GNUC__) && defined(__arm__) && !defined(NO_ASM) && 0 - #define divu_6432_3232(xhi,xlo,y,q_zuweisung,r_zuweisung) \ - ({ var uint64 _q = divu_6432_3232_(xhi,xlo,y); /* extern in Assembler */\ - q_zuweisung retval64_r0(_q); \ - r_zuweisung retval64_r1(_q); \ - }) #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && !defined(NO_ASM) #define divu_6432_3232(xhi,xlo,y,q_zuweisung,r_zuweisung) \ ({var uint32 __xhi = (xhi); \ @@ -779,7 +769,7 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) #else #define divu_6432_3232(xhi,xlo,y,q_zuweisung,r_zuweisung) \ { cl_unused (q_zuweisung divu_6432_3232_(xhi,xlo,y)); r_zuweisung divu_32_rest; } - #if (defined(__m68k__) || defined(__sparc__) || defined(__sparc64__) || (defined(__arm__) && 0) || (defined(__i386__) && !defined(MICROSOFT)) || defined(__x86_64__) || defined(__hppa__)) && !defined(NO_ASM) + #if (defined(__m68k__) || defined(__sparc__) || defined(__sparc64__) || (defined(__i386__) && !defined(MICROSOFT)) || defined(__x86_64__) || defined(__hppa__)) && !defined(NO_ASM) // divu_6432_3232_ extern in Assembler #if defined(__sparc__) || defined(__sparc64__) extern "C" uint32 _get_g1 (void); diff --git a/src/base/digitseq/cl_asm_arm_.cc b/src/base/digitseq/cl_asm_arm_.cc index 769aeb7..8941e27 100644 --- a/src/base/digitseq/cl_asm_arm_.cc +++ b/src/base/digitseq/cl_asm_arm_.cc @@ -139,104 +139,6 @@ GLABEL(divu_3216_1616_) MOV a1,a1,LSR#16 // and back down again BX lr -#if 0 -// extern uint32 divu_6432_3232_ (uint32 xhi, uint32 xlo, uint32 y); // -> Quotient q -// extern uint32 divu_32_rest; // -> Rest r -// see cl_low_div.cc for algorithm -// entry -// a1 = xhi (dividend) -// a2 = xlo (dividend) -// a3 = y (divisor) -// exit -// a1 = 32 bit quotient -// a2 = 32 bit remainder -// a3, a4 destroyed - EXPORT(divu_6432_3232_) - DECLARE_FUNCTION(divu_6432_3232_) -GLABEL(divu_6432_3232_) - STMFD sp!, {v1,v2,v3,v4,v5,v6,lr} - MOV v2, a2 // = xlo - MOV v1, a3 // = y - CMP a3,#0x10000 // y <= (uint32)(bit(16)-1) - BCS divu_6432_3232_l1 - MOV a2, v2, LSR #16 - ORR a1, a2, a1, ASL #16 // = highlow32(low16(xhi),high16(xlo)) - MOV a2, v1 - BL C(divu_3216_1616_) - MOV v3, a1 // = q1 - MOV a1, v2, ASL #16 - MOV a1, a1, LSR #16 - ORR a1, a1, a2, ASL #16 // = highlow32(r1,low16(xlo)) - MOV a2, v1 - BL C(divu_3216_1616_) - ORR a1, a1, v3, ASL #16 // = highlow32(q1,q0) - LDMFD sp!, {v1,v2,v3,v4,v5,v6,pc} - -LABEL(divu_6432_3232_l1) - MOV v3, #0 // s = 0 - MOVS a4, v1, LSR #16 // while ((sint32)y >= 0) - ADDEQ v3, v3, #16 // { y = y<<1; s++; } - MOVEQ v1, v1, ASL #16 - MOVS a4, v1, LSR #24 - ADDEQ v3, v3, #8 - MOVEQ v1, v1, ASL #8 - MOVS a4, v1, LSR #28 - ADDEQ v3, v3, #4 - MOVEQ v1, v1, ASL #4 - MOVS a4, v1, LSR #30 - ADDEQ v3, v3, #2 - MOVEQ v1, v1, ASL #2 - MOVS a4, v1, LSR #31 - ADDEQ v3, v3, #1 - MOVEQ v1, v1, ASL #1 - - CMP v3, #0 - MOVNE a2, a1, ASL v3 // if (!(s==0)) - RSBNE a1, v3, #32 // { xhi = (xhi << s) - ORRNE a1, a2, v2, LSR a1 // | (xlo >> (32-s)); - MOVNE v2, v2, ASL v3 // xlo = xlo << s; } - ADD a2, v1, #0x10000 // y1_1 = high16(y)+1 - MOVS v5, a2, LSR #16 // if (y1_1 = 0) - MOVEQ v4, a1, ASL #16 // r16 = low16(xhi) * 2^16 - MOVEQ a1, a1, LSR #16 // q1 = high16(xhi) - MOVNE a2, v5 - BLNE C(divu_3216_1616_) // divu_3216_1616(xhi,y1_1, q1=,r16=) - MOVNE v4, a2, ASL #16 // r16 = r16 * 2^16 - ORR v4, v4, v2, LSR #16 // r = highlow32(r16,high16(xlo)) - MOV a4, v1, ASL #16 // tmp = mulu16(low16(y),q1) - MOV a4, a4, LSR #16 - MUL a3, a4, a1 - RSB a3, a3, a1, ASL #16 // r2 = highlow32_0(q1) - tmp - MOV v6, a1 // = q1 - ADDS a1, v4, a3 // r += r2 - ADDCS v6, v6, #1 // if ( r < r2 ) { q1 += 1 - SUBCS a1, a1, v1 // r -= y } - CMP a1, v1 // if (r >= y) - ADDCS v6, v6, #1 // { q1 += 1 - SUBCS a1, a1, v1 // r -= y } - CMP v5, #0 // if (y1_1 = 0) - MOVEQ v4, a1, ASL #16 // { r16 = low16(r) * 2^16 - MOVEQ a1, a1, LSR #16 // q0 = high16(r) } - MOVNE a2, v5 - BLNE C(divu_3216_1616_) // divu_3216_1616(r,y1_1, q0=,r16=) - MOVNE v4, a2, ASL #16 // r16 = r16 * 2^16 - MOV v2, v2, ASL #16 - ORR v4, v4, v2, LSR #16 // r = highlow32(r16,low16(xlo)) - MOV a4, v1, ASL #16 // tmp = mulu16(low16(y),q0) - MOV a4, a4, LSR #16 - MUL a3, a4, a1 - RSB a3, a3, a1, ASL #16 // r2 = highlow32_0(q0) - tmp - ADDS v4, v4, a3 // r += r2 - ADDCS a1, a1, #1 // if ( r < r2 ) { q0 += 1 - SUBCS v4, v4, v1 // r -= y } - CMP v4, v1 // if (r >= y) - ADDCS a1, a1, #1 // { q0 += 1 - SUBCS v4, v4, v1 // r -= y } - MOV a2, v4, LSR v3 // remainder = r >> s - ORR a1, a1, v6, ASL #16 // return highlow32(q1,q0) - LDMFD sp!, {v1,v2,v3,v4,v5,v6,pc} -#endif - // extern uintD* copy_loop_up (uintD* sourceptr, uintD* destptr, uintC count); // entry // a1 = source pointer