From d58e9198b54f2d89c48478f02336560c14016cae Mon Sep 17 00:00:00 2001
From: Richard Kreckel <kreckel@ginac.de>
Date: Sun, 24 Apr 2022 01:14:14 +0200
Subject: [PATCH] Fix ARM implementation of mulusmall_loop_up(digit, ptr, len,
 newdigit)...

...by removing the shortcut for small 'digit' and calling
mulu32_64_vregs instead (which handles digits >= 2^16 fine).

The requirement that 'digit' be small (<= 36) is documented in cl_DS.h
but nowhere enforced. Indeed, it is clearly violated in the base-N
input function digits_to_I_baseN().

I am not sure what to do. This ARM asm seems to be the only code which
relies on small 'digit'. A future patch should either fix the input
method to guarntee that 'digit' is small or accept that it can be
large and remove the shortcut in mulusmall_loop_up for good (and also
fix the comments in cl_DS.h).
---
 src/base/digitseq/cl_asm_arm_.cc     | 13 ++++++++-----
 src/integer/conv/cl_I_from_digits.cc |  1 +
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/base/digitseq/cl_asm_arm_.cc b/src/base/digitseq/cl_asm_arm_.cc
index 0e23d9a..75ce5db 100644
--- a/src/base/digitseq/cl_asm_arm_.cc
+++ b/src/base/digitseq/cl_asm_arm_.cc
@@ -3278,12 +3278,15 @@ LABEL(mulusmall_loop_up_l1)
         MOV     a1,a4                   // return carry
         LDMFD   sp!,{v1,pc}
 #else
-        STMFD   sp!,{v1-v2,lr}
+        STMFD   sp!,{v1-v4,lr}
 LABEL(mulusmall_loop_up_l1)
         LDR     ip,[a2]
 
-//      BL      mulu32_64_vregs         // muluD(digit,*ptr,hi=,lo=)
-// replaced by multiplication of a small x = a1 and a big y = ip :
+#if 1
+        BL      mulu32_64_vregs         // muluD(digit,*ptr,hi=,lo=)
+#else
+// this code requires that digit is small, but this condition is violated in
+// digits_to_I_baseN(const char * MSBptr, uintC len, uintD base)
         MOV     v1,ip,LSR #16           // top half of y
         BIC     ip,ip,v1,LSL #16        // bottom half of y
         MUL     v2,a1,v1                // middle section of result
@@ -3291,14 +3294,14 @@ LABEL(mulusmall_loop_up_l1)
         MOV     ip,#0                   // high section of result
         ADDS    v1,v1,v2,LSL #16        // bottom 32 bits of result
         ADC     ip,ip,v2,LSR #16        // top 32 bits of result
-
+#endif
         ADDS    v1,v1,a4                // lo += carry
         ADC     a4,ip,#0                // if (lo<carry) { hi += 1 }; carry=hi
         STR     v1,[a2],#4              // *ptr++ = lo
         SUBS    a3,a3,#1                // len--
         BNE     mulusmall_loop_up_l1    // until len==0
         MOV     a1,a4                   // return carry
-        LDMFD   sp!,{v1-v2,pc}
+        LDMFD   sp!,{v1-v4,pc}
 #endif
 
 // extern void mulu_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
diff --git a/src/integer/conv/cl_I_from_digits.cc b/src/integer/conv/cl_I_from_digits.cc
index 681c51d..1819f5e 100644
--- a/src/integer/conv/cl_I_from_digits.cc
+++ b/src/integer/conv/cl_I_from_digits.cc
@@ -134,6 +134,7 @@ static const cl_I digits_to_I_baseN (const char * MSBptr, uintC len, uintD base)
 			chx++;
 			len--;
 		}
+		// FIXME: mulusmall_loop_up/down are documented to require a small factor (<= 36).
 		var uintD carry = mulusmall_loop_lsp(factor,erg_LSDptr,erg_len,newdigit);
 		if (carry!=0) {
 			// need to extend NUDS:
-- 
2.49.0