aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Griesemer <gri@golang.org>2010-04-30 11:53:37 -0700
committerRobert Griesemer <gri@golang.org>2010-04-30 11:53:37 -0700
commitf78b09e6734ef69f3acfbf4defdaecc000466f6b (patch)
treef524147d19787ec834622cd716ca6b73a8c8578d
parent2b0a30c4b5db7a88b3cdd403bffe701fee6e094b (diff)
downloadgo-f78b09e6734ef69f3acfbf4defdaecc000466f6b.tar.gz
go-f78b09e6734ef69f3acfbf4defdaecc000466f6b.zip
big: assembly routines for 386 long shifts
R=rsc CC=golang-dev https://golang.org/cl/974043
-rw-r--r--src/pkg/big/arith_386.s99
1 files changed, 67 insertions, 32 deletions
diff --git a/src/pkg/big/arith_386.s b/src/pkg/big/arith_386.s
index 83398db944..2887ccc3e5 100644
--- a/src/pkg/big/arith_386.s
+++ b/src/pkg/big/arith_386.s
@@ -11,8 +11,8 @@ TEXT ·addVV(SB),7,$0
MOVL x+4(FP), SI
MOVL y+8(FP), CX
MOVL n+12(FP), BP
- MOVL $0, BX // i = 0
- MOVL $0, DX // c = 0
+ MOVL $0, BX // i = 0
+ MOVL $0, DX // c = 0
JMP E1
L1: MOVL (SI)(BX*4), AX
@@ -20,7 +20,7 @@ L1: MOVL (SI)(BX*4), AX
ADCL (CX)(BX*4), AX
RCLL $1, DX
MOVL AX, (DI)(BX*4)
- ADDL $1, BX // i++
+ ADDL $1, BX // i++
E1: CMPL BX, BP // i < n
JL L1
@@ -36,8 +36,8 @@ TEXT ·subVV(SB),7,$0
MOVL x+4(FP), SI
MOVL y+8(FP), CX
MOVL n+12(FP), BP
- MOVL $0, BX // i = 0
- MOVL $0, DX // c = 0
+ MOVL $0, BX // i = 0
+ MOVL $0, DX // c = 0
JMP E2
L2: MOVL (SI)(BX*4), AX
@@ -45,9 +45,9 @@ L2: MOVL (SI)(BX*4), AX
SBBL (CX)(BX*4), AX
RCLL $1, DX
MOVL AX, (DI)(BX*4)
- ADDL $1, BX // i++
+ ADDL $1, BX // i++
-E2: CMPL BX, BP // i < n
+E2: CMPL BX, BP // i < n
JL L2
MOVL DX, c+16(FP)
@@ -58,18 +58,18 @@ E2: CMPL BX, BP // i < n
TEXT ·addVW(SB),7,$0
MOVL z+0(FP), DI
MOVL x+4(FP), SI
- MOVL y+8(FP), AX // c = y
+ MOVL y+8(FP), AX // c = y
MOVL n+12(FP), BP
- MOVL $0, BX // i = 0
+ MOVL $0, BX // i = 0
JMP E3
L3: ADDL (SI)(BX*4), AX
MOVL AX, (DI)(BX*4)
RCLL $1, AX
ANDL $1, AX
- ADDL $1, BX // i++
+ ADDL $1, BX // i++
-E3: CMPL BX, BP // i < n
+E3: CMPL BX, BP // i < n
JL L3
MOVL AX, c+16(FP)
@@ -80,9 +80,9 @@ E3: CMPL BX, BP // i < n
TEXT ·subVW(SB),7,$0
MOVL z+0(FP), DI
MOVL x+4(FP), SI
- MOVL y+8(FP), AX // c = y
+ MOVL y+8(FP), AX // c = y
MOVL n+12(FP), BP
- MOVL $0, BX // i = 0
+ MOVL $0, BX // i = 0
JMP E4
L4: MOVL (SI)(BX*4), DX // TODO(gri) is there a reverse SUBL?
@@ -90,9 +90,9 @@ L4: MOVL (SI)(BX*4), DX // TODO(gri) is there a reverse SUBL?
MOVL DX, (DI)(BX*4)
RCLL $1, AX
ANDL $1, AX
- ADDL $1, BX // i++
+ ADDL $1, BX // i++
-E4: CMPL BX, BP // i < n
+E4: CMPL BX, BP // i < n
JL L4
MOVL AX, c+16(FP)
@@ -100,17 +100,52 @@ E4: CMPL BX, BP // i < n
// func shlVW(z, x *Word, s Word, n int) (c Word)
-// TODO(gri) implement this routine
TEXT ·shlVW(SB),7,$0
- NOP // work around bug in linker
- JMP ·shlVW_g(SB)
+ MOVL z+0(FP), DI
+ MOVL x+4(FP), SI
+ MOVL s+8(FP), CX
+ MOVL n+12(FP), BP
+ MOVL $0, AX // c = 0
+ MOVL $0, BX // i = 0
+ JMP E8
+
+L8: MOVL (SI)(BX*8), DX
+ SHLL CX, DX:AX
+ MOVL DX, (DI)(BX*8)
+ MOVL (SI)(BX*8), AX // reload (not enough regs to save original DX)
+ ADDL $1, BX // i++
+
+E8: CMPL BX, BP // i < n
+ JL L8
+
+ MOVL $0, DX
+ SHLL CX, DX:AX
+ MOVL DX, c+16(FP)
+ RET
// func shrVW(z, x *Word, s Word, n int) (c Word)
-// TODO(gri) implement this routine
TEXT ·shrVW(SB),7,$0
- NOP // work around bug in linker
- JMP ·shrVW_g(SB)
+ MOVL z+0(FP), DI
+ MOVL x+4(FP), SI
+ MOVL s+8(FP), CX
+ MOVL n+12(FP), BX // i = n
+ MOVL $0, AX // c = 0
+ JMP E9
+
+L9: MOVL (SI)(BX*8), DX
+ MOVL DX, BP
+ SHRL CX, DX:AX
+ MOVL DX, (DI)(BX*8)
+ MOVL BP, AX
+
+E9: SUBL $1, BX // i--
+ JGE L9
+
+ MOVL $0, DX
+ SHRL CX, DX:AX
+ MOVL DX, c+16(FP)
+ RET
// func mulAddVWW(z, x *Word, y, r Word, n int) (c Word)
@@ -118,7 +153,7 @@ TEXT ·mulAddVWW(SB),7,$0
MOVL z+0(FP), DI
MOVL x+4(FP), SI
MOVL y+8(FP), BP
- MOVL r+12(FP), CX // c = r
+ MOVL r+12(FP), CX // c = r
MOVL n+16(FP), BX
LEAL (SI)(BX*4), SI
LEAL (DI)(BX*4), DI
@@ -131,9 +166,9 @@ L5: MOVL (SI)(BX*4), AX
ADCL $0, DX
MOVL AX, (DI)(BX*4)
MOVL DX, CX
- ADDL $1, BX // i++
+ ADDL $1, BX // i++
-E5: CMPL BX, $0 // i < 0
+E5: CMPL BX, $0 // i < 0
JL L5
MOVL CX, c+20(FP)
@@ -148,8 +183,8 @@ TEXT ·addMulVVW(SB),7,$0
MOVL n+12(FP), BX
LEAL (SI)(BX*4), SI
LEAL (DI)(BX*4), DI
- NEGL BX // i = -n
- MOVL $0, CX // c = 0
+ NEGL BX // i = -n
+ MOVL $0, CX // c = 0
JMP E6
L6: MOVL (SI)(BX*4), AX
@@ -160,9 +195,9 @@ L6: MOVL (SI)(BX*4), AX
ADCL $0, DX
MOVL AX, (DI)(BX*4)
MOVL DX, CX
- ADDL $1, BX // i++
+ ADDL $1, BX // i++
-E6: CMPL BX, $0 // i < 0
+E6: CMPL BX, $0 // i < 0
JL L6
MOVL CX, c+16(FP)
@@ -172,18 +207,18 @@ E6: CMPL BX, $0 // i < 0
// divWVW(z* Word, xn Word, x *Word, y Word, n int) (r Word)
TEXT ·divWVW(SB),7,$0
MOVL z+0(FP), DI
- MOVL xn+4(FP), DX // r = xn
+ MOVL xn+4(FP), DX // r = xn
MOVL x+8(FP), SI
MOVL y+12(FP), CX
- MOVL n+16(FP), BX // i = n
+ MOVL n+16(FP), BX // i = n
JMP E7
L7: MOVL (SI)(BX*4), AX
DIVL CX
MOVL AX, (DI)(BX*4)
-E7: SUBL $1, BX // i--
- JGE L7 // i >= 0
+E7: SUBL $1, BX // i--
+ JGE L7 // i >= 0
MOVL DX, r+20(FP)
RET