aboutsummaryrefslogtreecommitdiff
path: root/src/math/big
diff options
context:
space:
mode:
Diffstat (limited to 'src/math/big')
-rw-r--r--src/math/big/arith_arm64.s113
-rw-r--r--src/math/big/arith_test.go106
-rw-r--r--src/math/big/example_test.go7
-rw-r--r--src/math/big/float.go7
4 files changed, 205 insertions, 28 deletions
diff --git a/src/math/big/arith_arm64.s b/src/math/big/arith_arm64.s
index 18e513e2c3..da6e408e19 100644
--- a/src/math/big/arith_arm64.s
+++ b/src/math/big/arith_arm64.s
@@ -109,13 +109,59 @@ done:
MOVD R0, c+72(FP)
RET
+#define vwOneOp(instr, op1) \
+ MOVD.P 8(R1), R4; \
+ instr op1, R4; \
+ MOVD.P R4, 8(R3);
+
+// handle the first 1~4 elements before starting iteration in addVW/subVW
+#define vwPreIter(instr1, instr2, counter, target) \
+ vwOneOp(instr1, R2); \
+ SUB $1, counter; \
+ CBZ counter, target; \
+ vwOneOp(instr2, $0); \
+ SUB $1, counter; \
+ CBZ counter, target; \
+ vwOneOp(instr2, $0); \
+ SUB $1, counter; \
+ CBZ counter, target; \
+ vwOneOp(instr2, $0);
+
+// do one iteration of add or sub in addVW/subVW
+#define vwOneIter(instr, counter, exit) \
+ CBZ counter, exit; \ // careful not to touch the carry flag
+ LDP.P 32(R1), (R4, R5); \
+ LDP -16(R1), (R6, R7); \
+ instr $0, R4, R8; \
+ instr $0, R5, R9; \
+ instr $0, R6, R10; \
+ instr $0, R7, R11; \
+ STP.P (R8, R9), 32(R3); \
+ STP (R10, R11), -16(R3); \
+ SUB $4, counter;
+
+// do one iteration of copy in addVW/subVW
+#define vwOneIterCopy(counter, exit) \
+ CBZ counter, exit; \
+ LDP.P 32(R1), (R4, R5); \
+ LDP -16(R1), (R6, R7); \
+ STP.P (R4, R5), 32(R3); \
+ STP (R6, R7), -16(R3); \
+ SUB $4, counter;
// func addVW(z, x []Word, y Word) (c Word)
+// The 'large' branch handles large 'z'. It checks the carry flag on every iteration
+// and switches to copy if we are done with carries. The copying is skipped as well
+// if 'x' and 'z' happen to share the same underlying storage.
+// The overhead of the checking and branching is visible when 'z' are small (~5%),
+// so set a threshold of 32, and remain the small-sized part entirely untouched.
TEXT ·addVW(SB),NOSPLIT,$0
MOVD z+0(FP), R3
MOVD z_len+8(FP), R0
MOVD x+24(FP), R1
MOVD y+48(FP), R2
+ CMP $32, R0
+ BGE large // large-sized 'z' and 'x'
CBZ R0, len0 // the length of z is 0
MOVD.P 8(R1), R4
ADDS R2, R4 // z[0] = x[0] + y, set carry
@@ -135,29 +181,46 @@ two: // do it twice
STP.P (R8, R9), 16(R3)
SUB $2, R0
loop: // do four times per round
- CBZ R0, len1 // careful not to touch the carry flag
- LDP.P 32(R1), (R4, R5)
- LDP -16(R1), (R6, R7)
- ADCS $0, R4, R8
- ADCS $0, R5, R9
- ADCS $0, R6, R10
- ADCS $0, R7, R11
- STP.P (R8, R9), 32(R3)
- STP (R10, R11), -16(R3)
- SUB $4, R0
+ vwOneIter(ADCS, R0, len1)
B loop
len1:
CSET HS, R2 // extract carry flag
len0:
MOVD R2, c+56(FP)
+done:
RET
+large:
+ AND $0x3, R0, R10
+ AND $~0x3, R0
+ // unrolling for the first 1~4 elements to avoid saving the carry
+ // flag in each step, adjust $R0 if we unrolled 4 elements
+ vwPreIter(ADDS, ADCS, R10, add4)
+ SUB $4, R0
+add4:
+ BCC copy
+ vwOneIter(ADCS, R0, len1)
+ B add4
+copy:
+ MOVD ZR, c+56(FP)
+ CMP R1, R3
+ BEQ done
+copy_4: // no carry flag, copy the rest
+ vwOneIterCopy(R0, done)
+ B copy_4
// func subVW(z, x []Word, y Word) (c Word)
+// The 'large' branch handles large 'z'. It checks the carry flag on every iteration
+// and switches to copy if we are done with carries. The copying is skipped as well
+// if 'x' and 'z' happen to share the same underlying storage.
+// The overhead of the checking and branching is visible when 'z' are small (~5%),
+// so set a threshold of 32, and remain the small-sized part entirely untouched.
TEXT ·subVW(SB),NOSPLIT,$0
MOVD z+0(FP), R3
MOVD z_len+8(FP), R0
MOVD x+24(FP), R1
MOVD y+48(FP), R2
+ CMP $32, R0
+ BGE large // large-sized 'z' and 'x'
CBZ R0, len0 // the length of z is 0
MOVD.P 8(R1), R4
SUBS R2, R4 // z[0] = x[0] - y, set carry
@@ -177,22 +240,32 @@ two: // do it twice
STP.P (R8, R9), 16(R3)
SUB $2, R0
loop: // do four times per round
- CBZ R0, len1 // careful not to touch the carry flag
- LDP.P 32(R1), (R4, R5)
- LDP -16(R1), (R6, R7)
- SBCS $0, R4, R8
- SBCS $0, R5, R9
- SBCS $0, R6, R10
- SBCS $0, R7, R11
- STP.P (R8, R9), 32(R3)
- STP (R10, R11), -16(R3)
- SUB $4, R0
+ vwOneIter(SBCS, R0, len1)
B loop
len1:
CSET LO, R2 // extract carry flag
len0:
MOVD R2, c+56(FP)
+done:
RET
+large:
+ AND $0x3, R0, R10
+ AND $~0x3, R0
+ // unrolling for the first 1~4 elements to avoid saving the carry
+ // flag in each step, adjust $R0 if we unrolled 4 elements
+ vwPreIter(SUBS, SBCS, R10, sub4)
+ SUB $4, R0
+sub4:
+ BCS copy
+ vwOneIter(SBCS, R0, len1)
+ B sub4
+copy:
+ MOVD ZR, c+56(FP)
+ CMP R1, R3
+ BEQ done
+copy_4: // no carry flag, copy the rest
+ vwOneIterCopy(R0, done)
+ B copy_4
// func shlVU(z, x []Word, s uint) (c Word)
// This implementation handles the shift operation from the high word to the low word,
diff --git a/src/math/big/arith_test.go b/src/math/big/arith_test.go
index 05136f1895..fc205934c5 100644
--- a/src/math/big/arith_test.go
+++ b/src/math/big/arith_test.go
@@ -179,6 +179,23 @@ func testFunVW(t *testing.T, msg string, f funVW, a argVW) {
}
}
+func testFunVWext(t *testing.T, msg string, f funVW, f_g funVW, a argVW) {
+ // using the result of addVW_g/subVW_g as golden
+ z_g := make(nat, len(a.z))
+ c_g := f_g(z_g, a.x, a.y)
+ c := f(a.z, a.x, a.y)
+
+ for i, zi := range a.z {
+ if zi != z_g[i] {
+ t.Errorf("%s\n\tgot z[%d] = %#x; want %#x", msg, i, zi, z_g[i])
+ break
+ }
+ }
+ if c != c_g {
+ t.Errorf("%s\n\tgot c = %#x; want %#x", msg, c, c_g)
+ }
+}
+
func makeFunVW(f func(z, x []Word, s uint) (c Word)) funVW {
return func(z, x []Word, s Word) (c Word) {
return f(z, x, uint(s))
@@ -213,6 +230,49 @@ func TestFunVW(t *testing.T) {
}
}
+// Construct a vector comprising the same word, usually '0' or 'maximum uint'
+func makeWordVec(e Word, n int) []Word {
+ v := make([]Word, n)
+ for i := range v {
+ v[i] = e
+ }
+ return v
+}
+
+// Extended testing to addVW and subVW using various kinds of input data.
+// We utilize the results of addVW_g and subVW_g as golden reference to check
+// correctness.
+func TestFunVWExt(t *testing.T) {
+ // 32 is the current threshold that triggers an optimized version of
+ // calculation for large-sized vector, ensure we have sizes around it tested.
+ var vwSizes = []int{0, 1, 3, 4, 5, 8, 9, 23, 31, 32, 33, 34, 35, 36, 50, 120}
+ for _, n := range vwSizes {
+ // vector of random numbers, using the result of addVW_g/subVW_g as golden
+ x := rndV(n)
+ y := rndW()
+ z := make(nat, n)
+ arg := argVW{z, x, y, 0}
+ testFunVWext(t, "addVW, random inputs", addVW, addVW_g, arg)
+ testFunVWext(t, "subVW, random inputs", subVW, subVW_g, arg)
+
+ // vector of random numbers, but make 'x' and 'z' share storage
+ arg = argVW{x, x, y, 0}
+ testFunVWext(t, "addVW, random inputs, sharing storage", addVW, addVW_g, arg)
+ testFunVWext(t, "subVW, random inputs, sharing storage", subVW, subVW_g, arg)
+
+ // vector of maximum uint, to force carry flag set in each 'add'
+ y = ^Word(0)
+ x = makeWordVec(y, n)
+ arg = argVW{z, x, y, 0}
+ testFunVWext(t, "addVW, vector of max uint", addVW, addVW_g, arg)
+
+ // vector of '0', to force carry flag set in each 'sub'
+ x = makeWordVec(0, n)
+ arg = argVW{z, x, 1, 0}
+ testFunVWext(t, "subVW, vector of zero", subVW, subVW_g, arg)
+ }
+}
+
type argVU struct {
d []Word // d is a Word slice, the input parameters x and z come from this array.
l uint // l is the length of the input parameters x and z.
@@ -241,20 +301,20 @@ var argshrVU = []argVU{
}
func testShiftFunc(t *testing.T, f func(z, x []Word, s uint) Word, a argVU) {
- // save a.d for error message, or it will be overwritten.
+ // work on copy of a.d to preserve the original data.
b := make([]Word, len(a.d))
copy(b, a.d)
- z := a.d[a.zp : a.zp+a.l]
- x := a.d[a.xp : a.xp+a.l]
+ z := b[a.zp : a.zp+a.l]
+ x := b[a.xp : a.xp+a.l]
c := f(z, x, a.s)
for i, zi := range z {
if zi != a.r[i] {
- t.Errorf("d := %v, %s(d[%d:%d], d[%d:%d], %d)\n\tgot z[%d] = %#x; want %#x", b, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, i, zi, a.r[i])
+ t.Errorf("d := %v, %s(d[%d:%d], d[%d:%d], %d)\n\tgot z[%d] = %#x; want %#x", a.d, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, i, zi, a.r[i])
break
}
}
if c != a.c {
- t.Errorf("d := %v, %s(d[%d:%d], d[%d:%d], %d)\n\tgot c = %#x; want %#x", b, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, c, a.c)
+ t.Errorf("d := %v, %s(d[%d:%d], d[%d:%d], %d)\n\tgot c = %#x; want %#x", a.d, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, c, a.c)
}
}
@@ -299,6 +359,24 @@ func BenchmarkAddVW(b *testing.B) {
}
}
+// Benchmarking addVW using vector of maximum uint to force carry flag set
+func BenchmarkAddVWext(b *testing.B) {
+ for _, n := range benchSizes {
+ if isRaceBuilder && n > 1e3 {
+ continue
+ }
+ y := ^Word(0)
+ x := makeWordVec(y, n)
+ z := make([]Word, n)
+ b.Run(fmt.Sprint(n), func(b *testing.B) {
+ b.SetBytes(int64(n * _S))
+ for i := 0; i < b.N; i++ {
+ addVW(z, x, y)
+ }
+ })
+ }
+}
+
func BenchmarkSubVW(b *testing.B) {
for _, n := range benchSizes {
if isRaceBuilder && n > 1e3 {
@@ -316,6 +394,24 @@ func BenchmarkSubVW(b *testing.B) {
}
}
+// Benchmarking subVW using vector of zero to force carry flag set
+func BenchmarkSubVWext(b *testing.B) {
+ for _, n := range benchSizes {
+ if isRaceBuilder && n > 1e3 {
+ continue
+ }
+ x := makeWordVec(0, n)
+ y := Word(1)
+ z := make([]Word, n)
+ b.Run(fmt.Sprint(n), func(b *testing.B) {
+ b.SetBytes(int64(n * _S))
+ for i := 0; i < b.N; i++ {
+ subVW(z, x, y)
+ }
+ })
+ }
+}
+
type funVWW func(z, x []Word, y, r Word) (c Word)
type argVWW struct {
z, x nat
diff --git a/src/math/big/example_test.go b/src/math/big/example_test.go
index cfc77351d4..31ca784154 100644
--- a/src/math/big/example_test.go
+++ b/src/math/big/example_test.go
@@ -25,6 +25,13 @@ func ExampleInt_SetString() {
// Output: 420
}
+func ExampleFloat_SetString() {
+ f := new(big.Float)
+ f.SetString("3.14159")
+ fmt.Println(f)
+ // Output: 3.14159
+}
+
func ExampleRat_Scan() {
// The Scan function is rarely used directly;
// the fmt package recognizes it as an implementation of fmt.Scanner.
diff --git a/src/math/big/float.go b/src/math/big/float.go
index da964eef3e..42050e2c39 100644
--- a/src/math/big/float.go
+++ b/src/math/big/float.go
@@ -322,10 +322,11 @@ func (z *Float) SetMantExp(mant *Float, exp int) *Float {
mant.validate()
}
z.Copy(mant)
- if z.form != finite {
- return z
+
+ if z.form == finite {
+ // 0 < |mant| < +Inf
+ z.setExpAndRound(int64(z.exp)+int64(exp), 0)
}
- z.setExpAndRound(int64(z.exp)+int64(exp), 0)
return z
}