diff options
author | Robert Griesemer <gri@golang.org> | 2010-04-30 11:53:37 -0700 |
---|---|---|
committer | Robert Griesemer <gri@golang.org> | 2010-04-30 11:53:37 -0700 |
commit | f78b09e6734ef69f3acfbf4defdaecc000466f6b (patch) | |
tree | f524147d19787ec834622cd716ca6b73a8c8578d | |
parent | 2b0a30c4b5db7a88b3cdd403bffe701fee6e094b (diff) | |
download | go-f78b09e6734ef69f3acfbf4defdaecc000466f6b.tar.gz go-f78b09e6734ef69f3acfbf4defdaecc000466f6b.zip |
big: assembly routines for 386 long shifts
R=rsc
CC=golang-dev
https://golang.org/cl/974043
-rw-r--r-- | src/pkg/big/arith_386.s | 99 |
1 files changed, 67 insertions, 32 deletions
diff --git a/src/pkg/big/arith_386.s b/src/pkg/big/arith_386.s index 83398db944..2887ccc3e5 100644 --- a/src/pkg/big/arith_386.s +++ b/src/pkg/big/arith_386.s @@ -11,8 +11,8 @@ TEXT ·addVV(SB),7,$0 MOVL x+4(FP), SI MOVL y+8(FP), CX MOVL n+12(FP), BP - MOVL $0, BX // i = 0 - MOVL $0, DX // c = 0 + MOVL $0, BX // i = 0 + MOVL $0, DX // c = 0 JMP E1 L1: MOVL (SI)(BX*4), AX @@ -20,7 +20,7 @@ L1: MOVL (SI)(BX*4), AX ADCL (CX)(BX*4), AX RCLL $1, DX MOVL AX, (DI)(BX*4) - ADDL $1, BX // i++ + ADDL $1, BX // i++ E1: CMPL BX, BP // i < n JL L1 @@ -36,8 +36,8 @@ TEXT ·subVV(SB),7,$0 MOVL x+4(FP), SI MOVL y+8(FP), CX MOVL n+12(FP), BP - MOVL $0, BX // i = 0 - MOVL $0, DX // c = 0 + MOVL $0, BX // i = 0 + MOVL $0, DX // c = 0 JMP E2 L2: MOVL (SI)(BX*4), AX @@ -45,9 +45,9 @@ L2: MOVL (SI)(BX*4), AX SBBL (CX)(BX*4), AX RCLL $1, DX MOVL AX, (DI)(BX*4) - ADDL $1, BX // i++ + ADDL $1, BX // i++ -E2: CMPL BX, BP // i < n +E2: CMPL BX, BP // i < n JL L2 MOVL DX, c+16(FP) @@ -58,18 +58,18 @@ E2: CMPL BX, BP // i < n TEXT ·addVW(SB),7,$0 MOVL z+0(FP), DI MOVL x+4(FP), SI - MOVL y+8(FP), AX // c = y + MOVL y+8(FP), AX // c = y MOVL n+12(FP), BP - MOVL $0, BX // i = 0 + MOVL $0, BX // i = 0 JMP E3 L3: ADDL (SI)(BX*4), AX MOVL AX, (DI)(BX*4) RCLL $1, AX ANDL $1, AX - ADDL $1, BX // i++ + ADDL $1, BX // i++ -E3: CMPL BX, BP // i < n +E3: CMPL BX, BP // i < n JL L3 MOVL AX, c+16(FP) @@ -80,9 +80,9 @@ E3: CMPL BX, BP // i < n TEXT ·subVW(SB),7,$0 MOVL z+0(FP), DI MOVL x+4(FP), SI - MOVL y+8(FP), AX // c = y + MOVL y+8(FP), AX // c = y MOVL n+12(FP), BP - MOVL $0, BX // i = 0 + MOVL $0, BX // i = 0 JMP E4 L4: MOVL (SI)(BX*4), DX // TODO(gri) is there a reverse SUBL? @@ -90,9 +90,9 @@ L4: MOVL (SI)(BX*4), DX // TODO(gri) is there a reverse SUBL? MOVL DX, (DI)(BX*4) RCLL $1, AX ANDL $1, AX - ADDL $1, BX // i++ + ADDL $1, BX // i++ -E4: CMPL BX, BP // i < n +E4: CMPL BX, BP // i < n JL L4 MOVL AX, c+16(FP) @@ -100,17 +100,52 @@ E4: CMPL BX, BP // i < n // func shlVW(z, x *Word, s Word, n int) (c Word) -// TODO(gri) implement this routine TEXT ·shlVW(SB),7,$0 - NOP // work around bug in linker - JMP ·shlVW_g(SB) + MOVL z+0(FP), DI + MOVL x+4(FP), SI + MOVL s+8(FP), CX + MOVL n+12(FP), BP + MOVL $0, AX // c = 0 + MOVL $0, BX // i = 0 + JMP E8 + +L8: MOVL (SI)(BX*8), DX + SHLL CX, DX:AX + MOVL DX, (DI)(BX*8) + MOVL (SI)(BX*8), AX // reload (not enough regs to save original DX) + ADDL $1, BX // i++ + +E8: CMPL BX, BP // i < n + JL L8 + + MOVL $0, DX + SHLL CX, DX:AX + MOVL DX, c+16(FP) + RET // func shrVW(z, x *Word, s Word, n int) (c Word) -// TODO(gri) implement this routine TEXT ·shrVW(SB),7,$0 - NOP // work around bug in linker - JMP ·shrVW_g(SB) + MOVL z+0(FP), DI + MOVL x+4(FP), SI + MOVL s+8(FP), CX + MOVL n+12(FP), BX // i = n + MOVL $0, AX // c = 0 + JMP E9 + +L9: MOVL (SI)(BX*8), DX + MOVL DX, BP + SHRL CX, DX:AX + MOVL DX, (DI)(BX*8) + MOVL BP, AX + +E9: SUBL $1, BX // i-- + JGE L9 + + MOVL $0, DX + SHRL CX, DX:AX + MOVL DX, c+16(FP) + RET // func mulAddVWW(z, x *Word, y, r Word, n int) (c Word) @@ -118,7 +153,7 @@ TEXT ·mulAddVWW(SB),7,$0 MOVL z+0(FP), DI MOVL x+4(FP), SI MOVL y+8(FP), BP - MOVL r+12(FP), CX // c = r + MOVL r+12(FP), CX // c = r MOVL n+16(FP), BX LEAL (SI)(BX*4), SI LEAL (DI)(BX*4), DI @@ -131,9 +166,9 @@ L5: MOVL (SI)(BX*4), AX ADCL $0, DX MOVL AX, (DI)(BX*4) MOVL DX, CX - ADDL $1, BX // i++ + ADDL $1, BX // i++ -E5: CMPL BX, $0 // i < 0 +E5: CMPL BX, $0 // i < 0 JL L5 MOVL CX, c+20(FP) @@ -148,8 +183,8 @@ TEXT ·addMulVVW(SB),7,$0 MOVL n+12(FP), BX LEAL (SI)(BX*4), SI LEAL (DI)(BX*4), DI - NEGL BX // i = -n - MOVL $0, CX // c = 0 + NEGL BX // i = -n + MOVL $0, CX // c = 0 JMP E6 L6: MOVL (SI)(BX*4), AX @@ -160,9 +195,9 @@ L6: MOVL (SI)(BX*4), AX ADCL $0, DX MOVL AX, (DI)(BX*4) MOVL DX, CX - ADDL $1, BX // i++ + ADDL $1, BX // i++ -E6: CMPL BX, $0 // i < 0 +E6: CMPL BX, $0 // i < 0 JL L6 MOVL CX, c+16(FP) @@ -172,18 +207,18 @@ E6: CMPL BX, $0 // i < 0 // divWVW(z* Word, xn Word, x *Word, y Word, n int) (r Word) TEXT ·divWVW(SB),7,$0 MOVL z+0(FP), DI - MOVL xn+4(FP), DX // r = xn + MOVL xn+4(FP), DX // r = xn MOVL x+8(FP), SI MOVL y+12(FP), CX - MOVL n+16(FP), BX // i = n + MOVL n+16(FP), BX // i = n JMP E7 L7: MOVL (SI)(BX*4), AX DIVL CX MOVL AX, (DI)(BX*4) -E7: SUBL $1, BX // i-- - JGE L7 // i >= 0 +E7: SUBL $1, BX // i-- + JGE L7 // i >= 0 MOVL DX, r+20(FP) RET |