diff options
Diffstat (limited to 'src/runtime/internal/atomic')
24 files changed, 786 insertions, 118 deletions
diff --git a/src/runtime/internal/atomic/asm_386.s b/src/runtime/internal/atomic/asm_386.s index 9b9dc14a60..d82faef1f0 100644 --- a/src/runtime/internal/atomic/asm_386.s +++ b/src/runtime/internal/atomic/asm_386.s @@ -3,6 +3,7 @@ // license that can be found in the LICENSE file. #include "textflag.h" +#include "funcdata.h" // bool Cas(int32 *val, int32 old, int32 new) // Atomically: @@ -11,7 +12,7 @@ // return 1; // }else // return 0; -TEXT runtime∕internal∕atomic·Cas(SB), NOSPLIT, $0-13 +TEXT ·Cas(SB), NOSPLIT, $0-13 MOVL ptr+0(FP), BX MOVL old+4(FP), AX MOVL new+8(FP), CX @@ -20,32 +21,31 @@ TEXT runtime∕internal∕atomic·Cas(SB), NOSPLIT, $0-13 SETEQ ret+12(FP) RET -TEXT runtime∕internal∕atomic·Casuintptr(SB), NOSPLIT, $0-13 - JMP runtime∕internal∕atomic·Cas(SB) +TEXT ·Casuintptr(SB), NOSPLIT, $0-13 + JMP ·Cas(SB) -TEXT runtime∕internal∕atomic·CasRel(SB), NOSPLIT, $0-13 - JMP runtime∕internal∕atomic·Cas(SB) +TEXT ·CasRel(SB), NOSPLIT, $0-13 + JMP ·Cas(SB) -TEXT runtime∕internal∕atomic·Loaduintptr(SB), NOSPLIT, $0-8 - JMP runtime∕internal∕atomic·Load(SB) +TEXT ·Loaduintptr(SB), NOSPLIT, $0-8 + JMP ·Load(SB) -TEXT runtime∕internal∕atomic·Loaduint(SB), NOSPLIT, $0-8 - JMP runtime∕internal∕atomic·Load(SB) +TEXT ·Loaduint(SB), NOSPLIT, $0-8 + JMP ·Load(SB) -TEXT runtime∕internal∕atomic·Storeuintptr(SB), NOSPLIT, $0-8 - JMP runtime∕internal∕atomic·Store(SB) - -TEXT runtime∕internal∕atomic·Xadduintptr(SB), NOSPLIT, $0-12 - JMP runtime∕internal∕atomic·Xadd(SB) +TEXT ·Storeuintptr(SB), NOSPLIT, $0-8 + JMP ·Store(SB) -TEXT runtime∕internal∕atomic·Loadint64(SB), NOSPLIT, $0-12 - JMP runtime∕internal∕atomic·Load64(SB) +TEXT ·Xadduintptr(SB), NOSPLIT, $0-12 + JMP ·Xadd(SB) -TEXT runtime∕internal∕atomic·Xaddint64(SB), NOSPLIT, $0-20 - JMP runtime∕internal∕atomic·Xadd64(SB) +TEXT ·Loadint64(SB), NOSPLIT, $0-12 + JMP ·Load64(SB) +TEXT ·Xaddint64(SB), NOSPLIT, $0-20 + JMP ·Xadd64(SB) -// bool runtime∕internal∕atomic·Cas64(uint64 *val, uint64 old, uint64 new) +// bool ·Cas64(uint64 *val, uint64 old, uint64 new) // Atomically: // if(*val == *old){ // *val = new; @@ -53,11 +53,12 @@ TEXT runtime∕internal∕atomic·Xaddint64(SB), NOSPLIT, $0-20 // } else { // return 0; // } -TEXT runtime∕internal∕atomic·Cas64(SB), NOSPLIT, $0-21 +TEXT ·Cas64(SB), NOSPLIT, $0-21 + NO_LOCAL_POINTERS MOVL ptr+0(FP), BP TESTL $7, BP JZ 2(PC) - MOVL 0, BP // crash with nil ptr deref + CALL ·panicUnaligned(SB) MOVL old_lo+4(FP), AX MOVL old_hi+8(FP), DX MOVL new_lo+12(FP), BX @@ -74,7 +75,7 @@ TEXT runtime∕internal∕atomic·Cas64(SB), NOSPLIT, $0-21 // return 1; // }else // return 0; -TEXT runtime∕internal∕atomic·Casp1(SB), NOSPLIT, $0-13 +TEXT ·Casp1(SB), NOSPLIT, $0-13 MOVL ptr+0(FP), BX MOVL old+4(FP), AX MOVL new+8(FP), CX @@ -87,7 +88,7 @@ TEXT runtime∕internal∕atomic·Casp1(SB), NOSPLIT, $0-13 // Atomically: // *val += delta; // return *val; -TEXT runtime∕internal∕atomic·Xadd(SB), NOSPLIT, $0-12 +TEXT ·Xadd(SB), NOSPLIT, $0-12 MOVL ptr+0(FP), BX MOVL delta+4(FP), AX MOVL AX, CX @@ -97,12 +98,13 @@ TEXT runtime∕internal∕atomic·Xadd(SB), NOSPLIT, $0-12 MOVL AX, ret+8(FP) RET -TEXT runtime∕internal∕atomic·Xadd64(SB), NOSPLIT, $0-20 +TEXT ·Xadd64(SB), NOSPLIT, $0-20 + NO_LOCAL_POINTERS // no XADDQ so use CMPXCHG8B loop MOVL ptr+0(FP), BP TESTL $7, BP JZ 2(PC) - MOVL 0, AX // crash when unaligned + CALL ·panicUnaligned(SB) // DI:SI = delta MOVL delta_lo+4(FP), SI MOVL delta_hi+8(FP), DI @@ -133,22 +135,23 @@ addloop: MOVL CX, ret_hi+16(FP) RET -TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-12 +TEXT ·Xchg(SB), NOSPLIT, $0-12 MOVL ptr+0(FP), BX MOVL new+4(FP), AX XCHGL AX, 0(BX) MOVL AX, ret+8(FP) RET -TEXT runtime∕internal∕atomic·Xchguintptr(SB), NOSPLIT, $0-12 - JMP runtime∕internal∕atomic·Xchg(SB) +TEXT ·Xchguintptr(SB), NOSPLIT, $0-12 + JMP ·Xchg(SB) -TEXT runtime∕internal∕atomic·Xchg64(SB),NOSPLIT,$0-20 +TEXT ·Xchg64(SB),NOSPLIT,$0-20 + NO_LOCAL_POINTERS // no XCHGQ so use CMPXCHG8B loop MOVL ptr+0(FP), BP TESTL $7, BP JZ 2(PC) - MOVL 0, AX // crash when unaligned + CALL ·panicUnaligned(SB) // CX:BX = new MOVL new_lo+4(FP), BX MOVL new_hi+8(FP), CX @@ -171,38 +174,43 @@ swaploop: MOVL DX, ret_hi+16(FP) RET -TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-8 +TEXT ·StorepNoWB(SB), NOSPLIT, $0-8 MOVL ptr+0(FP), BX MOVL val+4(FP), AX XCHGL AX, 0(BX) RET -TEXT runtime∕internal∕atomic·Store(SB), NOSPLIT, $0-8 +TEXT ·Store(SB), NOSPLIT, $0-8 MOVL ptr+0(FP), BX MOVL val+4(FP), AX XCHGL AX, 0(BX) RET -TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-8 +TEXT ·StoreRel(SB), NOSPLIT, $0-8 + JMP ·Store(SB) + +TEXT runtime∕internal∕atomic·StoreReluintptr(SB), NOSPLIT, $0-8 JMP runtime∕internal∕atomic·Store(SB) // uint64 atomicload64(uint64 volatile* addr); -TEXT runtime∕internal∕atomic·Load64(SB), NOSPLIT, $0-12 +TEXT ·Load64(SB), NOSPLIT, $0-12 + NO_LOCAL_POINTERS MOVL ptr+0(FP), AX TESTL $7, AX JZ 2(PC) - MOVL 0, AX // crash with nil ptr deref + CALL ·panicUnaligned(SB) MOVQ (AX), M0 MOVQ M0, ret+4(FP) EMMS RET -// void runtime∕internal∕atomic·Store64(uint64 volatile* addr, uint64 v); -TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-12 +// void ·Store64(uint64 volatile* addr, uint64 v); +TEXT ·Store64(SB), NOSPLIT, $0-12 + NO_LOCAL_POINTERS MOVL ptr+0(FP), AX TESTL $7, AX JZ 2(PC) - MOVL 0, AX // crash with nil ptr deref + CALL ·panicUnaligned(SB) // MOVQ and EMMS were introduced on the Pentium MMX. MOVQ val+4(FP), M0 MOVQ M0, (AX) @@ -214,24 +222,40 @@ TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-12 XADDL AX, (SP) RET -// void runtime∕internal∕atomic·Or8(byte volatile*, byte); -TEXT runtime∕internal∕atomic·Or8(SB), NOSPLIT, $0-5 +// void ·Or8(byte volatile*, byte); +TEXT ·Or8(SB), NOSPLIT, $0-5 MOVL ptr+0(FP), AX MOVB val+4(FP), BX LOCK ORB BX, (AX) RET -// void runtime∕internal∕atomic·And8(byte volatile*, byte); -TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-5 +// void ·And8(byte volatile*, byte); +TEXT ·And8(SB), NOSPLIT, $0-5 MOVL ptr+0(FP), AX MOVB val+4(FP), BX LOCK ANDB BX, (AX) RET -TEXT runtime∕internal∕atomic·Store8(SB), NOSPLIT, $0-5 +TEXT ·Store8(SB), NOSPLIT, $0-5 MOVL ptr+0(FP), BX MOVB val+4(FP), AX XCHGB AX, 0(BX) RET + +// func Or(addr *uint32, v uint32) +TEXT ·Or(SB), NOSPLIT, $0-8 + MOVL ptr+0(FP), AX + MOVL val+4(FP), BX + LOCK + ORL BX, (AX) + RET + +// func And(addr *uint32, v uint32) +TEXT ·And(SB), NOSPLIT, $0-8 + MOVL ptr+0(FP), AX + MOVL val+4(FP), BX + LOCK + ANDL BX, (AX) + RET diff --git a/src/runtime/internal/atomic/asm_amd64.s b/src/runtime/internal/atomic/asm_amd64.s index 90c56424c9..2cf7c55870 100644 --- a/src/runtime/internal/atomic/asm_amd64.s +++ b/src/runtime/internal/atomic/asm_amd64.s @@ -136,6 +136,12 @@ TEXT runtime∕internal∕atomic·Store(SB), NOSPLIT, $0-12 TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-12 JMP runtime∕internal∕atomic·Store(SB) +TEXT runtime∕internal∕atomic·StoreRel64(SB), NOSPLIT, $0-16 + JMP runtime∕internal∕atomic·Store64(SB) + +TEXT runtime∕internal∕atomic·StoreReluintptr(SB), NOSPLIT, $0-16 + JMP runtime∕internal∕atomic·Store64(SB) + TEXT runtime∕internal∕atomic·Store8(SB), NOSPLIT, $0-9 MOVQ ptr+0(FP), BX MOVB val+8(FP), AX @@ -163,3 +169,19 @@ TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-9 LOCK ANDB BX, (AX) RET + +// func Or(addr *uint32, v uint32) +TEXT runtime∕internal∕atomic·Or(SB), NOSPLIT, $0-12 + MOVQ ptr+0(FP), AX + MOVL val+8(FP), BX + LOCK + ORL BX, (AX) + RET + +// func And(addr *uint32, v uint32) +TEXT runtime∕internal∕atomic·And(SB), NOSPLIT, $0-12 + MOVQ ptr+0(FP), AX + MOVL val+8(FP), BX + LOCK + ANDL BX, (AX) + RET diff --git a/src/runtime/internal/atomic/asm_arm.s b/src/runtime/internal/atomic/asm_arm.s index d4ef11560e..274925ed60 100644 --- a/src/runtime/internal/atomic/asm_arm.s +++ b/src/runtime/internal/atomic/asm_arm.s @@ -3,6 +3,7 @@ // license that can be found in the LICENSE file. #include "textflag.h" +#include "funcdata.h" // bool armcas(int32 *val, int32 old, int32 new) // Atomically: @@ -12,13 +13,13 @@ // }else // return 0; // -// To implement runtime∕internal∕atomic·cas in sys_$GOOS_arm.s +// To implement ·cas in sys_$GOOS_arm.s // using the native instructions, use: // -// TEXT runtime∕internal∕atomic·cas(SB),NOSPLIT,$0 -// B runtime∕internal∕atomic·armcas(SB) +// TEXT ·cas(SB),NOSPLIT,$0 +// B ·armcas(SB) // -TEXT runtime∕internal∕atomic·armcas(SB),NOSPLIT,$0-13 +TEXT ·armcas(SB),NOSPLIT,$0-13 MOVW ptr+0(FP), R1 MOVW old+4(FP), R2 MOVW new+8(FP), R3 @@ -50,44 +51,50 @@ casfail: // stubs -TEXT runtime∕internal∕atomic·Loadp(SB),NOSPLIT|NOFRAME,$0-8 - B runtime∕internal∕atomic·Load(SB) +TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$0-8 + B ·Load(SB) -TEXT runtime∕internal∕atomic·LoadAcq(SB),NOSPLIT|NOFRAME,$0-8 - B runtime∕internal∕atomic·Load(SB) +TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-8 + B ·Load(SB) -TEXT runtime∕internal∕atomic·Casuintptr(SB),NOSPLIT,$0-13 - B runtime∕internal∕atomic·Cas(SB) +TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-8 + B ·Load(SB) -TEXT runtime∕internal∕atomic·Casp1(SB),NOSPLIT,$0-13 - B runtime∕internal∕atomic·Cas(SB) +TEXT ·Casuintptr(SB),NOSPLIT,$0-13 + B ·Cas(SB) -TEXT runtime∕internal∕atomic·CasRel(SB),NOSPLIT,$0-13 - B runtime∕internal∕atomic·Cas(SB) +TEXT ·Casp1(SB),NOSPLIT,$0-13 + B ·Cas(SB) -TEXT runtime∕internal∕atomic·Loaduintptr(SB),NOSPLIT,$0-8 - B runtime∕internal∕atomic·Load(SB) +TEXT ·CasRel(SB),NOSPLIT,$0-13 + B ·Cas(SB) -TEXT runtime∕internal∕atomic·Loaduint(SB),NOSPLIT,$0-8 - B runtime∕internal∕atomic·Load(SB) +TEXT ·Loaduintptr(SB),NOSPLIT,$0-8 + B ·Load(SB) -TEXT runtime∕internal∕atomic·Storeuintptr(SB),NOSPLIT,$0-8 - B runtime∕internal∕atomic·Store(SB) +TEXT ·Loaduint(SB),NOSPLIT,$0-8 + B ·Load(SB) -TEXT runtime∕internal∕atomic·StorepNoWB(SB),NOSPLIT,$0-8 - B runtime∕internal∕atomic·Store(SB) +TEXT ·Storeuintptr(SB),NOSPLIT,$0-8 + B ·Store(SB) -TEXT runtime∕internal∕atomic·StoreRel(SB),NOSPLIT,$0-8 - B runtime∕internal∕atomic·Store(SB) +TEXT ·StorepNoWB(SB),NOSPLIT,$0-8 + B ·Store(SB) -TEXT runtime∕internal∕atomic·Xadduintptr(SB),NOSPLIT,$0-12 - B runtime∕internal∕atomic·Xadd(SB) +TEXT ·StoreRel(SB),NOSPLIT,$0-8 + B ·Store(SB) -TEXT runtime∕internal∕atomic·Loadint64(SB),NOSPLIT,$0-12 - B runtime∕internal∕atomic·Load64(SB) +TEXT ·StoreReluintptr(SB),NOSPLIT,$0-8 + B ·Store(SB) -TEXT runtime∕internal∕atomic·Xaddint64(SB),NOSPLIT,$0-20 - B runtime∕internal∕atomic·Xadd64(SB) +TEXT ·Xadduintptr(SB),NOSPLIT,$0-12 + B ·Xadd(SB) + +TEXT ·Loadint64(SB),NOSPLIT,$0-12 + B ·Load64(SB) + +TEXT ·Xaddint64(SB),NOSPLIT,$0-20 + B ·Xadd64(SB) // 64-bit atomics // The native ARM implementations use LDREXD/STREXD, which are @@ -95,12 +102,8 @@ TEXT runtime∕internal∕atomic·Xaddint64(SB),NOSPLIT,$0-20 // On older ARM, we use Go implementations which simulate 64-bit // atomics with locks. -TEXT armCas64<>(SB),NOSPLIT,$0-21 - MOVW addr+0(FP), R1 - // make unaligned atomic access panic - AND.S $7, R1, R2 - BEQ 2(PC) - MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2. +TEXT armCas64<>(SB),NOSPLIT,$0-21 + // addr is already in R1 MOVW old_lo+4(FP), R2 MOVW old_hi+8(FP), R3 MOVW new_lo+12(FP), R4 @@ -128,12 +131,8 @@ cas64fail: MOVBU R0, swapped+20(FP) RET -TEXT armXadd64<>(SB),NOSPLIT,$0-20 - MOVW addr+0(FP), R1 - // make unaligned atomic access panic - AND.S $7, R1, R2 - BEQ 2(PC) - MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2. +TEXT armXadd64<>(SB),NOSPLIT,$0-20 + // addr is already in R1 MOVW delta_lo+4(FP), R2 MOVW delta_hi+8(FP), R3 @@ -154,12 +153,8 @@ add64loop: MOVW R5, new_hi+16(FP) RET -TEXT armXchg64<>(SB),NOSPLIT,$0-20 - MOVW addr+0(FP), R1 - // make unaligned atomic access panic - AND.S $7, R1, R2 - BEQ 2(PC) - MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2. +TEXT armXchg64<>(SB),NOSPLIT,$0-20 + // addr is already in R1 MOVW new_lo+4(FP), R2 MOVW new_hi+8(FP), R3 @@ -178,12 +173,8 @@ swap64loop: MOVW R5, old_hi+16(FP) RET -TEXT armLoad64<>(SB),NOSPLIT,$0-12 - MOVW addr+0(FP), R1 - // make unaligned atomic access panic - AND.S $7, R1, R2 - BEQ 2(PC) - MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2. +TEXT armLoad64<>(SB),NOSPLIT,$0-12 + // addr is already in R1 LDREXD (R1), R2 // loads R2 and R3 DMB MB_ISH @@ -192,12 +183,8 @@ TEXT armLoad64<>(SB),NOSPLIT,$0-12 MOVW R3, val_hi+8(FP) RET -TEXT armStore64<>(SB),NOSPLIT,$0-12 - MOVW addr+0(FP), R1 - // make unaligned atomic access panic - AND.S $7, R1, R2 - BEQ 2(PC) - MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2. +TEXT armStore64<>(SB),NOSPLIT,$0-12 + // addr is already in R1 MOVW val_lo+4(FP), R2 MOVW val_hi+8(FP), R3 @@ -213,35 +200,83 @@ store64loop: DMB MB_ISH RET -TEXT ·Cas64(SB),NOSPLIT,$0-21 +// The following functions all panic if their address argument isn't +// 8-byte aligned. Since we're calling back into Go code to do this, +// we have to cooperate with stack unwinding. In the normal case, the +// functions tail-call into the appropriate implementation, which +// means they must not open a frame. Hence, when they go down the +// panic path, at that point they push the LR to create a real frame +// (they don't need to pop it because panic won't return). + +TEXT ·Cas64(SB),NOSPLIT,$-4-21 + NO_LOCAL_POINTERS + MOVW addr+0(FP), R1 + // make unaligned atomic access panic + AND.S $7, R1, R2 + BEQ 3(PC) + MOVW.W R14, -4(R13) // prepare a real frame + BL ·panicUnaligned(SB) + MOVB runtime·goarm(SB), R11 CMP $7, R11 BLT 2(PC) JMP armCas64<>(SB) JMP ·goCas64(SB) -TEXT ·Xadd64(SB),NOSPLIT,$0-20 +TEXT ·Xadd64(SB),NOSPLIT,$-4-20 + NO_LOCAL_POINTERS + MOVW addr+0(FP), R1 + // make unaligned atomic access panic + AND.S $7, R1, R2 + BEQ 3(PC) + MOVW.W R14, -4(R13) // prepare a real frame + BL ·panicUnaligned(SB) + MOVB runtime·goarm(SB), R11 CMP $7, R11 BLT 2(PC) JMP armXadd64<>(SB) JMP ·goXadd64(SB) -TEXT ·Xchg64(SB),NOSPLIT,$0-20 +TEXT ·Xchg64(SB),NOSPLIT,$-4-20 + NO_LOCAL_POINTERS + MOVW addr+0(FP), R1 + // make unaligned atomic access panic + AND.S $7, R1, R2 + BEQ 3(PC) + MOVW.W R14, -4(R13) // prepare a real frame + BL ·panicUnaligned(SB) + MOVB runtime·goarm(SB), R11 CMP $7, R11 BLT 2(PC) JMP armXchg64<>(SB) JMP ·goXchg64(SB) -TEXT ·Load64(SB),NOSPLIT,$0-12 +TEXT ·Load64(SB),NOSPLIT,$-4-12 + NO_LOCAL_POINTERS + MOVW addr+0(FP), R1 + // make unaligned atomic access panic + AND.S $7, R1, R2 + BEQ 3(PC) + MOVW.W R14, -4(R13) // prepare a real frame + BL ·panicUnaligned(SB) + MOVB runtime·goarm(SB), R11 CMP $7, R11 BLT 2(PC) JMP armLoad64<>(SB) JMP ·goLoad64(SB) -TEXT ·Store64(SB),NOSPLIT,$0-12 +TEXT ·Store64(SB),NOSPLIT,$-4-12 + NO_LOCAL_POINTERS + MOVW addr+0(FP), R1 + // make unaligned atomic access panic + AND.S $7, R1, R2 + BEQ 3(PC) + MOVW.W R14, -4(R13) // prepare a real frame + BL ·panicUnaligned(SB) + MOVB runtime·goarm(SB), R11 CMP $7, R11 BLT 2(PC) diff --git a/src/runtime/internal/atomic/asm_mips64x.s b/src/runtime/internal/atomic/asm_mips64x.s index 3290fb726a..a515683ebb 100644 --- a/src/runtime/internal/atomic/asm_mips64x.s +++ b/src/runtime/internal/atomic/asm_mips64x.s @@ -158,6 +158,12 @@ TEXT ·StorepNoWB(SB), NOSPLIT, $0-16 TEXT ·StoreRel(SB), NOSPLIT, $0-12 JMP ·Store(SB) +TEXT ·StoreRel64(SB), NOSPLIT, $0-16 + JMP ·Store64(SB) + +TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16 + JMP ·Store64(SB) + TEXT ·Store(SB), NOSPLIT, $0-12 MOVV ptr+0(FP), R1 MOVW val+8(FP), R2 @@ -237,3 +243,29 @@ TEXT ·And8(SB), NOSPLIT, $0-9 BEQ R4, -4(PC) SYNC RET + +// func Or(addr *uint32, v uint32) +TEXT ·Or(SB), NOSPLIT, $0-12 + MOVV ptr+0(FP), R1 + MOVW val+8(FP), R2 + + SYNC + LL (R1), R3 + OR R2, R3 + SC R3, (R1) + BEQ R3, -4(PC) + SYNC + RET + +// func And(addr *uint32, v uint32) +TEXT ·And(SB), NOSPLIT, $0-12 + MOVV ptr+0(FP), R1 + MOVW val+8(FP), R2 + + SYNC + LL (R1), R3 + AND R2, R3 + SC R3, (R1) + BEQ R3, -4(PC) + SYNC + RET diff --git a/src/runtime/internal/atomic/asm_mipsx.s b/src/runtime/internal/atomic/asm_mipsx.s index 62811a6599..2b2cfabe08 100644 --- a/src/runtime/internal/atomic/asm_mipsx.s +++ b/src/runtime/internal/atomic/asm_mipsx.s @@ -122,6 +122,9 @@ TEXT ·StorepNoWB(SB),NOSPLIT,$0-8 TEXT ·StoreRel(SB),NOSPLIT,$0-8 JMP ·Store(SB) +TEXT ·StoreReluintptr(SB),NOSPLIT,$0-8 + JMP ·Store(SB) + // void Or8(byte volatile*, byte); TEXT ·Or8(SB),NOSPLIT,$0-5 MOVW ptr+0(FP), R1 @@ -169,3 +172,29 @@ try_and8: BEQ R4, try_and8 SYNC RET + +// func Or(addr *uint32, v uint32) +TEXT ·Or(SB), NOSPLIT, $0-8 + MOVW ptr+0(FP), R1 + MOVW val+4(FP), R2 + + SYNC + LL (R1), R3 + OR R2, R3 + SC R3, (R1) + BEQ R3, -4(PC) + SYNC + RET + +// func And(addr *uint32, v uint32) +TEXT ·And(SB), NOSPLIT, $0-8 + MOVW ptr+0(FP), R1 + MOVW val+4(FP), R2 + + SYNC + LL (R1), R3 + AND R2, R3 + SC R3, (R1) + BEQ R3, -4(PC) + SYNC + RET diff --git a/src/runtime/internal/atomic/asm_ppc64x.s b/src/runtime/internal/atomic/asm_ppc64x.s index 06dc931bf4..bb009ab34d 100644 --- a/src/runtime/internal/atomic/asm_ppc64x.s +++ b/src/runtime/internal/atomic/asm_ppc64x.s @@ -83,12 +83,18 @@ TEXT runtime∕internal∕atomic·Casuintptr(SB), NOSPLIT, $0-25 TEXT runtime∕internal∕atomic·Loaduintptr(SB), NOSPLIT|NOFRAME, $0-16 BR runtime∕internal∕atomic·Load64(SB) +TEXT runtime∕internal∕atomic·LoadAcquintptr(SB), NOSPLIT|NOFRAME, $0-16 + BR runtime∕internal∕atomic·LoadAcq64(SB) + TEXT runtime∕internal∕atomic·Loaduint(SB), NOSPLIT|NOFRAME, $0-16 BR runtime∕internal∕atomic·Load64(SB) TEXT runtime∕internal∕atomic·Storeuintptr(SB), NOSPLIT, $0-16 BR runtime∕internal∕atomic·Store64(SB) +TEXT runtime∕internal∕atomic·StoreReluintptr(SB), NOSPLIT, $0-16 + BR runtime∕internal∕atomic·StoreRel64(SB) + TEXT runtime∕internal∕atomic·Xadduintptr(SB), NOSPLIT, $0-24 BR runtime∕internal∕atomic·Xadd64(SB) @@ -191,6 +197,13 @@ TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-12 MOVW R4, 0(R3) RET +TEXT runtime∕internal∕atomic·StoreRel64(SB), NOSPLIT, $0-16 + MOVD ptr+0(FP), R3 + MOVD val+8(FP), R4 + LWSYNC + MOVD R4, 0(R3) + RET + // void runtime∕internal∕atomic·Or8(byte volatile*, byte); TEXT runtime∕internal∕atomic·Or8(SB), NOSPLIT, $0-9 MOVD ptr+0(FP), R3 @@ -209,8 +222,32 @@ TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-9 MOVBZ val+8(FP), R4 LWSYNC again: - LBAR (R3),R6 - AND R4,R6 - STBCCC R6,(R3) + LBAR (R3), R6 + AND R4, R6 + STBCCC R6, (R3) + BNE again + RET + +// func Or(addr *uint32, v uint32) +TEXT runtime∕internal∕atomic·Or(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R3 + MOVW val+8(FP), R4 + LWSYNC +again: + LWAR (R3), R6 + OR R4, R6 + STWCCC R6, (R3) + BNE again + RET + +// func And(addr *uint32, v uint32) +TEXT runtime∕internal∕atomic·And(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R3 + MOVW val+8(FP), R4 + LWSYNC +again: + LWAR (R3),R6 + AND R4, R6 + STWCCC R6, (R3) BNE again RET diff --git a/src/runtime/internal/atomic/asm_s390x.s b/src/runtime/internal/atomic/asm_s390x.s index 9a19bc0ece..daf1f3cc9f 100644 --- a/src/runtime/internal/atomic/asm_s390x.s +++ b/src/runtime/internal/atomic/asm_s390x.s @@ -174,8 +174,8 @@ TEXT ·Xchguintptr(SB), NOSPLIT, $0-24 // func Or8(addr *uint8, v uint8) TEXT ·Or8(SB), NOSPLIT, $0-9 - MOVD ptr+0(FP), R3 - MOVBZ val+8(FP), R4 + MOVD ptr+0(FP), R3 + MOVBZ val+8(FP), R4 // We don't have atomic operations that work on individual bytes so we // need to align addr down to a word boundary and create a mask // containing v to OR with the entire word atomically. @@ -188,8 +188,8 @@ TEXT ·Or8(SB), NOSPLIT, $0-9 // func And8(addr *uint8, v uint8) TEXT ·And8(SB), NOSPLIT, $0-9 - MOVD ptr+0(FP), R3 - MOVBZ val+8(FP), R4 + MOVD ptr+0(FP), R3 + MOVBZ val+8(FP), R4 // We don't have atomic operations that work on individual bytes so we // need to align addr down to a word boundary and create a mask // containing v to AND with the entire word atomically. @@ -200,3 +200,17 @@ TEXT ·And8(SB), NOSPLIT, $0-9 RLL R5, R4, R4 // R4 = rotl(R4, R5) LAN R4, R6, 0(R3) // R6 = *R3; *R3 &= R4; (atomic) RET + +// func Or(addr *uint32, v uint32) +TEXT ·Or(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R3 + MOVW val+8(FP), R4 + LAO R4, R6, 0(R3) // R6 = *R3; *R3 |= R4; (atomic) + RET + +// func And(addr *uint32, v uint32) +TEXT ·And(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R3 + MOVW val+8(FP), R4 + LAN R4, R6, 0(R3) // R6 = *R3; *R3 &= R4; (atomic) + RET diff --git a/src/runtime/internal/atomic/atomic_386.go b/src/runtime/internal/atomic/atomic_386.go index 8d002ebfe3..1bfcb1143d 100644 --- a/src/runtime/internal/atomic/atomic_386.go +++ b/src/runtime/internal/atomic/atomic_386.go @@ -30,6 +30,12 @@ func LoadAcq(ptr *uint32) uint32 { return *ptr } +//go:nosplit +//go:noinline +func LoadAcquintptr(ptr *uintptr) uintptr { + return *ptr +} + //go:noescape func Xadd64(ptr *uint64, delta int64) uint64 @@ -63,6 +69,12 @@ func And8(ptr *uint8, val uint8) //go:noescape func Or8(ptr *uint8, val uint8) +//go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + // NOTE: Do not add atomicxor8 (XOR is not idempotent). //go:noescape @@ -83,5 +95,8 @@ func Store64(ptr *uint64, val uint64) //go:noescape func StoreRel(ptr *uint32, val uint32) +//go:noescape +func StoreReluintptr(ptr *uintptr, val uintptr) + // NO go:noescape annotation; see atomic_pointer.go. func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) diff --git a/src/runtime/internal/atomic/atomic_amd64.go b/src/runtime/internal/atomic/atomic_amd64.go index 14b8101720..e36eb83a11 100644 --- a/src/runtime/internal/atomic/atomic_amd64.go +++ b/src/runtime/internal/atomic/atomic_amd64.go @@ -35,6 +35,18 @@ func LoadAcq(ptr *uint32) uint32 { return *ptr } +//go:nosplit +//go:noinline +func LoadAcq64(ptr *uint64) uint64 { + return *ptr +} + +//go:nosplit +//go:noinline +func LoadAcquintptr(ptr *uintptr) uintptr { + return *ptr +} + //go:noescape func Xadd(ptr *uint32, delta int32) uint32 @@ -65,6 +77,12 @@ func And8(ptr *uint8, val uint8) //go:noescape func Or8(ptr *uint8, val uint8) +//go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + // NOTE: Do not add atomicxor8 (XOR is not idempotent). //go:noescape @@ -85,6 +103,12 @@ func Store64(ptr *uint64, val uint64) //go:noescape func StoreRel(ptr *uint32, val uint32) +//go:noescape +func StoreRel64(ptr *uint64, val uint64) + +//go:noescape +func StoreReluintptr(ptr *uintptr, val uintptr) + // StorepNoWB performs *ptr = val atomically and without a write // barrier. // diff --git a/src/runtime/internal/atomic/atomic_arm.go b/src/runtime/internal/atomic/atomic_arm.go index 95713afcc1..546b3d6120 100644 --- a/src/runtime/internal/atomic/atomic_arm.go +++ b/src/runtime/internal/atomic/atomic_arm.go @@ -81,6 +81,9 @@ func Store(addr *uint32, v uint32) //go:noescape func StoreRel(addr *uint32, v uint32) +//go:noescape +func StoreReluintptr(addr *uintptr, v uintptr) + //go:nosplit func goCas64(addr *uint64, old, new uint64) bool { if uintptr(unsafe.Pointer(addr))&7 != 0 { @@ -180,6 +183,26 @@ func And8(addr *uint8, v uint8) { } //go:nosplit +func Or(addr *uint32, v uint32) { + for { + old := *addr + if Cas(addr, old, old|v) { + return + } + } +} + +//go:nosplit +func And(addr *uint32, v uint32) { + for { + old := *addr + if Cas(addr, old, old&v) { + return + } + } +} + +//go:nosplit func armcas(ptr *uint32, old, new uint32) bool //go:noescape @@ -195,6 +218,9 @@ func Load8(addr *uint8) uint8 func LoadAcq(addr *uint32) uint32 //go:noescape +func LoadAcquintptr(ptr *uintptr) uintptr + +//go:noescape func Cas64(addr *uint64, old, new uint64) bool //go:noescape diff --git a/src/runtime/internal/atomic/atomic_arm64.go b/src/runtime/internal/atomic/atomic_arm64.go index 26ca94d54c..d49bee8936 100644 --- a/src/runtime/internal/atomic/atomic_arm64.go +++ b/src/runtime/internal/atomic/atomic_arm64.go @@ -42,12 +42,24 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer func LoadAcq(addr *uint32) uint32 //go:noescape +func LoadAcq64(ptr *uint64) uint64 + +//go:noescape +func LoadAcquintptr(ptr *uintptr) uintptr + +//go:noescape func Or8(ptr *uint8, val uint8) //go:noescape func And8(ptr *uint8, val uint8) //go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + +//go:noescape func Cas64(ptr *uint64, old, new uint64) bool //go:noescape @@ -67,3 +79,9 @@ func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) //go:noescape func StoreRel(ptr *uint32, val uint32) + +//go:noescape +func StoreRel64(ptr *uint64, val uint64) + +//go:noescape +func StoreReluintptr(ptr *uintptr, val uintptr) diff --git a/src/runtime/internal/atomic/atomic_arm64.s b/src/runtime/internal/atomic/atomic_arm64.s index a2eb7568d2..0cf3c40223 100644 --- a/src/runtime/internal/atomic/atomic_arm64.s +++ b/src/runtime/internal/atomic/atomic_arm64.s @@ -36,12 +36,26 @@ TEXT ·Loadp(SB),NOSPLIT,$0-16 TEXT ·LoadAcq(SB),NOSPLIT,$0-12 B ·Load(SB) +// uint64 runtime∕internal∕atomic·LoadAcquintptr(uint64 volatile* addr) +TEXT ·LoadAcq64(SB),NOSPLIT,$0-16 + B ·Load64(SB) + +// uintptr runtime∕internal∕atomic·LoadAcq64(uintptr volatile* addr) +TEXT ·LoadAcquintptr(SB),NOSPLIT,$0-16 + B ·Load64(SB) + TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-16 B runtime∕internal∕atomic·Store64(SB) TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-12 B runtime∕internal∕atomic·Store(SB) +TEXT runtime∕internal∕atomic·StoreRel64(SB), NOSPLIT, $0-16 + B runtime∕internal∕atomic·Store64(SB) + +TEXT runtime∕internal∕atomic·StoreReluintptr(SB), NOSPLIT, $0-16 + B runtime∕internal∕atomic·Store64(SB) + TEXT runtime∕internal∕atomic·Store(SB), NOSPLIT, $0-12 MOVD ptr+0(FP), R0 MOVW val+8(FP), R1 @@ -150,3 +164,22 @@ TEXT ·Or8(SB), NOSPLIT, $0-9 CBNZ R3, -3(PC) RET +// func And(addr *uint32, v uint32) +TEXT ·And(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R0 + MOVW val+8(FP), R1 + LDAXRW (R0), R2 + AND R1, R2 + STLXRW R2, (R0), R3 + CBNZ R3, -3(PC) + RET + +// func Or(addr *uint32, v uint32) +TEXT ·Or(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R0 + MOVW val+8(FP), R1 + LDAXRW (R0), R2 + ORR R1, R2 + STLXRW R2, (R0), R3 + CBNZ R3, -3(PC) + RET diff --git a/src/runtime/internal/atomic/atomic_mips64x.go b/src/runtime/internal/atomic/atomic_mips64x.go index 1d9977850b..b0109d72b0 100644 --- a/src/runtime/internal/atomic/atomic_mips64x.go +++ b/src/runtime/internal/atomic/atomic_mips64x.go @@ -42,6 +42,12 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer func LoadAcq(ptr *uint32) uint32 //go:noescape +func LoadAcq64(ptr *uint64) uint64 + +//go:noescape +func LoadAcquintptr(ptr *uintptr) uintptr + +//go:noescape func And8(ptr *uint8, val uint8) //go:noescape @@ -50,6 +56,12 @@ func Or8(ptr *uint8, val uint8) // NOTE: Do not add atomicxor8 (XOR is not idempotent). //go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + +//go:noescape func Cas64(ptr *uint64, old, new uint64) bool //go:noescape @@ -69,3 +81,9 @@ func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) //go:noescape func StoreRel(ptr *uint32, val uint32) + +//go:noescape +func StoreRel64(ptr *uint64, val uint64) + +//go:noescape +func StoreReluintptr(ptr *uintptr, val uintptr) diff --git a/src/runtime/internal/atomic/atomic_mips64x.s b/src/runtime/internal/atomic/atomic_mips64x.s index 1ed90937c9..125c0c221c 100644 --- a/src/runtime/internal/atomic/atomic_mips64x.s +++ b/src/runtime/internal/atomic/atomic_mips64x.s @@ -47,3 +47,11 @@ TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$0-16 // uint32 runtime∕internal∕atomic·LoadAcq(uint32 volatile* ptr) TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12 JMP atomic·Load(SB) + +// uint64 runtime∕internal∕atomic·LoadAcq64(uint64 volatile* ptr) +TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$0-16 + JMP atomic·Load64(SB) + +// uintptr runtime∕internal∕atomic·LoadAcquintptr(uintptr volatile* ptr) +TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-16 + JMP atomic·Load64(SB) diff --git a/src/runtime/internal/atomic/atomic_mipsx.go b/src/runtime/internal/atomic/atomic_mipsx.go index 0e2d77ade1..1336b50121 100644 --- a/src/runtime/internal/atomic/atomic_mipsx.go +++ b/src/runtime/internal/atomic/atomic_mipsx.go @@ -34,7 +34,7 @@ func spinUnlock(state *uint32) func lockAndCheck(addr *uint64) { // ensure 8-byte alignment if uintptr(unsafe.Pointer(addr))&7 != 0 { - addr = nil + panicUnaligned() } // force dereference before taking lock _ = *addr @@ -133,12 +133,21 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer func LoadAcq(ptr *uint32) uint32 //go:noescape +func LoadAcquintptr(ptr *uintptr) uintptr + +//go:noescape func And8(ptr *uint8, val uint8) //go:noescape func Or8(ptr *uint8, val uint8) //go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + +//go:noescape func Store(ptr *uint32, val uint32) //go:noescape @@ -151,4 +160,7 @@ func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) func StoreRel(ptr *uint32, val uint32) //go:noescape +func StoreReluintptr(ptr *uintptr, val uintptr) + +//go:noescape func CasRel(addr *uint32, old, new uint32) bool diff --git a/src/runtime/internal/atomic/atomic_ppc64x.go b/src/runtime/internal/atomic/atomic_ppc64x.go index a48ecf5ee8..e4b109f0ec 100644 --- a/src/runtime/internal/atomic/atomic_ppc64x.go +++ b/src/runtime/internal/atomic/atomic_ppc64x.go @@ -42,6 +42,12 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer func LoadAcq(ptr *uint32) uint32 //go:noescape +func LoadAcq64(ptr *uint64) uint64 + +//go:noescape +func LoadAcquintptr(ptr *uintptr) uintptr + +//go:noescape func And8(ptr *uint8, val uint8) //go:noescape @@ -50,6 +56,12 @@ func Or8(ptr *uint8, val uint8) // NOTE: Do not add atomicxor8 (XOR is not idempotent). //go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + +//go:noescape func Cas64(ptr *uint64, old, new uint64) bool //go:noescape @@ -67,5 +79,11 @@ func Store64(ptr *uint64, val uint64) //go:noescape func StoreRel(ptr *uint32, val uint32) +//go:noescape +func StoreRel64(ptr *uint64, val uint64) + +//go:noescape +func StoreReluintptr(ptr *uintptr, val uintptr) + // NO go:noescape annotation; see atomic_pointer.go. func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) diff --git a/src/runtime/internal/atomic/atomic_ppc64x.s b/src/runtime/internal/atomic/atomic_ppc64x.s index c2f696fb34..b79cdbca34 100644 --- a/src/runtime/internal/atomic/atomic_ppc64x.s +++ b/src/runtime/internal/atomic/atomic_ppc64x.s @@ -6,6 +6,15 @@ #include "textflag.h" + +// For more details about how various memory models are +// enforced on POWER, the following paper provides more +// details about how they enforce C/C++ like models. This +// gives context about why the strange looking code +// sequences below work. +// +// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html + // uint32 runtime∕internal∕atomic·Load(uint32 volatile* ptr) TEXT ·Load(SB),NOSPLIT|NOFRAME,$-8-12 MOVD ptr+0(FP), R3 @@ -56,5 +65,16 @@ TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$-8-12 MOVWZ 0(R3), R3 CMPW R3, R3, CR7 BC 4, 30, 1(PC) // bne- cr7, 0x4 + ISYNC MOVW R3, ret+8(FP) RET + +// uint64 runtime∕internal∕atomic·LoadAcq64(uint64 volatile* ptr) +TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$-8-16 + MOVD ptr+0(FP), R3 + MOVD 0(R3), R3 + CMP R3, R3, CR7 + BC 4, 30, 1(PC) // bne- cr7, 0x4 + ISYNC + MOVD R3, ret+8(FP) + RET diff --git a/src/runtime/internal/atomic/atomic_riscv64.go b/src/runtime/internal/atomic/atomic_riscv64.go index d52512369e..8f24d61625 100644 --- a/src/runtime/internal/atomic/atomic_riscv64.go +++ b/src/runtime/internal/atomic/atomic_riscv64.go @@ -40,12 +40,24 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer func LoadAcq(ptr *uint32) uint32 //go:noescape +func LoadAcq64(ptr *uint64) uint64 + +//go:noescape +func LoadAcquintptr(ptr *uintptr) uintptr + +//go:noescape func Or8(ptr *uint8, val uint8) //go:noescape func And8(ptr *uint8, val uint8) //go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + +//go:noescape func Cas64(ptr *uint64, old, new uint64) bool //go:noescape @@ -65,3 +77,9 @@ func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) //go:noescape func StoreRel(ptr *uint32, val uint32) + +//go:noescape +func StoreRel64(ptr *uint64, val uint64) + +//go:noescape +func StoreReluintptr(ptr *uintptr, val uintptr) diff --git a/src/runtime/internal/atomic/atomic_riscv64.s b/src/runtime/internal/atomic/atomic_riscv64.s index d005325ca3..74c896cea6 100644 --- a/src/runtime/internal/atomic/atomic_riscv64.s +++ b/src/runtime/internal/atomic/atomic_riscv64.s @@ -150,6 +150,12 @@ TEXT ·Xaddint64(SB),NOSPLIT,$0-24 TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12 JMP ·Load(SB) +TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$0-16 + JMP ·Load64(SB) + +TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-16 + JMP ·Load64(SB) + // func Loadp(ptr unsafe.Pointer) unsafe.Pointer TEXT ·Loadp(SB),NOSPLIT,$0-16 JMP ·Load64(SB) @@ -161,6 +167,12 @@ TEXT ·StorepNoWB(SB), NOSPLIT, $0-16 TEXT ·StoreRel(SB), NOSPLIT, $0-12 JMP ·Store(SB) +TEXT ·StoreRel64(SB), NOSPLIT, $0-16 + JMP ·Store64(SB) + +TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16 + JMP ·Store64(SB) + // func Xchg(ptr *uint32, new uint32) uint32 TEXT ·Xchg(SB), NOSPLIT, $0-20 MOV ptr+0(FP), A0 @@ -230,3 +242,17 @@ TEXT ·Or8(SB), NOSPLIT, $0-9 SLL A2, A1 AMOORW A1, (A0), ZERO RET + +// func And(ptr *uint32, val uint32) +TEXT ·And(SB), NOSPLIT, $0-12 + MOV ptr+0(FP), A0 + MOVW val+8(FP), A1 + AMOANDW A1, (A0), ZERO + RET + +// func Or(ptr *uint32, val uint32) +TEXT ·Or(SB), NOSPLIT, $0-12 + MOV ptr+0(FP), A0 + MOVW val+8(FP), A1 + AMOORW A1, (A0), ZERO + RET diff --git a/src/runtime/internal/atomic/atomic_s390x.go b/src/runtime/internal/atomic/atomic_s390x.go index 4d73b39baf..a058d60102 100644 --- a/src/runtime/internal/atomic/atomic_s390x.go +++ b/src/runtime/internal/atomic/atomic_s390x.go @@ -41,6 +41,18 @@ func LoadAcq(ptr *uint32) uint32 { return *ptr } +//go:nosplit +//go:noinline +func LoadAcq64(ptr *uint64) uint64 { + return *ptr +} + +//go:nosplit +//go:noinline +func LoadAcquintptr(ptr *uintptr) uintptr { + return *ptr +} + //go:noescape func Store(ptr *uint32, val uint32) @@ -59,6 +71,18 @@ func StoreRel(ptr *uint32, val uint32) { *ptr = val } +//go:nosplit +//go:noinline +func StoreRel64(ptr *uint64, val uint64) { + *ptr = val +} + +//go:nosplit +//go:noinline +func StoreReluintptr(ptr *uintptr, val uintptr) { + *ptr = val +} + //go:noescape func And8(ptr *uint8, val uint8) @@ -68,6 +92,12 @@ func Or8(ptr *uint8, val uint8) // NOTE: Do not add atomicxor8 (XOR is not idempotent). //go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + +//go:noescape func Xadd(ptr *uint32, delta int32) uint32 //go:noescape diff --git a/src/runtime/internal/atomic/atomic_test.go b/src/runtime/internal/atomic/atomic_test.go index b0a8fa0610..c9c2eba248 100644 --- a/src/runtime/internal/atomic/atomic_test.go +++ b/src/runtime/internal/atomic/atomic_test.go @@ -73,8 +73,15 @@ func TestXadduintptrOnUint64(t *testing.T) { func shouldPanic(t *testing.T, name string, f func()) { defer func() { - if recover() == nil { + // Check that all GC maps are sane. + runtime.GC() + + err := recover() + want := "unaligned 64-bit atomic operation" + if err == nil { t.Errorf("%s did not panic", name) + } else if s, _ := err.(string); s != want { + t.Errorf("%s: wanted panic %q, got %q", name, want, err) } }() f() @@ -143,6 +150,45 @@ func TestAnd8(t *testing.T) { } } +func TestAnd(t *testing.T) { + // Basic sanity check. + x := uint32(0xffffffff) + for i := uint32(0); i < 32; i++ { + atomic.And(&x, ^(1 << i)) + if r := uint32(0xffffffff) << (i + 1); x != r { + t.Fatalf("clearing bit %#x: want %#x, got %#x", uint32(1<<i), r, x) + } + } + + // Set every bit in array to 1. + a := make([]uint32, 1<<12) + for i := range a { + a[i] = 0xffffffff + } + + // Clear array bit-by-bit in different goroutines. + done := make(chan bool) + for i := 0; i < 32; i++ { + m := ^uint32(1 << i) + go func() { + for i := range a { + atomic.And(&a[i], m) + } + done <- true + }() + } + for i := 0; i < 32; i++ { + <-done + } + + // Check that the array has been totally cleared. + for i, v := range a { + if v != 0 { + t.Fatalf("a[%v] not cleared: want %#x, got %#x", i, uint32(0), v) + } + } +} + func TestOr8(t *testing.T) { // Basic sanity check. x := uint8(0) @@ -179,7 +225,43 @@ func TestOr8(t *testing.T) { } } -func TestBitwiseContended(t *testing.T) { +func TestOr(t *testing.T) { + // Basic sanity check. + x := uint32(0) + for i := uint32(0); i < 32; i++ { + atomic.Or(&x, 1<<i) + if r := (uint32(1) << (i + 1)) - 1; x != r { + t.Fatalf("setting bit %#x: want %#x, got %#x", uint32(1)<<i, r, x) + } + } + + // Start with every bit in array set to 0. + a := make([]uint32, 1<<12) + + // Set every bit in array bit-by-bit in different goroutines. + done := make(chan bool) + for i := 0; i < 32; i++ { + m := uint32(1 << i) + go func() { + for i := range a { + atomic.Or(&a[i], m) + } + done <- true + }() + } + for i := 0; i < 32; i++ { + <-done + } + + // Check that the array has been totally set. + for i, v := range a { + if v != 0xffffffff { + t.Fatalf("a[%v] not fully set: want %#x, got %#x", i, uint32(0xffffffff), v) + } + } +} + +func TestBitwiseContended8(t *testing.T) { // Start with every bit in array set to 0. a := make([]uint8, 16) @@ -221,6 +303,48 @@ func TestBitwiseContended(t *testing.T) { } } +func TestBitwiseContended(t *testing.T) { + // Start with every bit in array set to 0. + a := make([]uint32, 16) + + // Iterations to try. + N := 1 << 16 + if testing.Short() { + N = 1 << 10 + } + + // Set and then clear every bit in the array bit-by-bit in different goroutines. + done := make(chan bool) + for i := 0; i < 32; i++ { + m := uint32(1 << i) + go func() { + for n := 0; n < N; n++ { + for i := range a { + atomic.Or(&a[i], m) + if atomic.Load(&a[i])&m != m { + t.Errorf("a[%v] bit %#x not set", i, m) + } + atomic.And(&a[i], ^m) + if atomic.Load(&a[i])&m != 0 { + t.Errorf("a[%v] bit %#x not clear", i, m) + } + } + } + done <- true + }() + } + for i := 0; i < 32; i++ { + <-done + } + + // Check that the array has been totally cleared. + for i, v := range a { + if v != 0 { + t.Fatalf("a[%v] not cleared: want %#x, got %#x", i, uint32(0), v) + } + } +} + func TestStorepNoWB(t *testing.T) { var p [2]*int for i := range p { diff --git a/src/runtime/internal/atomic/atomic_wasm.go b/src/runtime/internal/atomic/atomic_wasm.go index 2c0c3a8174..b05d98ed51 100644 --- a/src/runtime/internal/atomic/atomic_wasm.go +++ b/src/runtime/internal/atomic/atomic_wasm.go @@ -47,6 +47,18 @@ func LoadAcq(ptr *uint32) uint32 { //go:nosplit //go:noinline +func LoadAcq64(ptr *uint64) uint64 { + return *ptr +} + +//go:nosplit +//go:noinline +func LoadAcquintptr(ptr *uintptr) uintptr { + return *ptr +} + +//go:nosplit +//go:noinline func Load8(ptr *uint8) uint8 { return *ptr } @@ -121,6 +133,18 @@ func Or8(ptr *uint8, val uint8) { //go:nosplit //go:noinline +func And(ptr *uint32, val uint32) { + *ptr = *ptr & val +} + +//go:nosplit +//go:noinline +func Or(ptr *uint32, val uint32) { + *ptr = *ptr | val +} + +//go:nosplit +//go:noinline func Cas64(ptr *uint64, old, new uint64) bool { if *ptr == old { *ptr = new @@ -143,6 +167,18 @@ func StoreRel(ptr *uint32, val uint32) { //go:nosplit //go:noinline +func StoreRel64(ptr *uint64, val uint64) { + *ptr = val +} + +//go:nosplit +//go:noinline +func StoreReluintptr(ptr *uintptr, val uintptr) { + *ptr = val +} + +//go:nosplit +//go:noinline func Store8(ptr *uint8, val uint8) { *ptr = val } diff --git a/src/runtime/internal/atomic/bench_test.go b/src/runtime/internal/atomic/bench_test.go index de71b0f2c7..434aa6d434 100644 --- a/src/runtime/internal/atomic/bench_test.go +++ b/src/runtime/internal/atomic/bench_test.go @@ -51,6 +51,14 @@ func BenchmarkAnd8(b *testing.B) { } } +func BenchmarkAnd(b *testing.B) { + var x [128]uint32 // give x its own cache line + sink = &x + for i := 0; i < b.N; i++ { + atomic.And(&x[63], uint32(i)) + } +} + func BenchmarkAnd8Parallel(b *testing.B) { var x [512]uint8 // give byte its own cache line sink = &x @@ -63,6 +71,18 @@ func BenchmarkAnd8Parallel(b *testing.B) { }) } +func BenchmarkAndParallel(b *testing.B) { + var x [128]uint32 // give x its own cache line + sink = &x + b.RunParallel(func(pb *testing.PB) { + i := uint32(0) + for pb.Next() { + atomic.And(&x[63], i) + i++ + } + }) +} + func BenchmarkOr8(b *testing.B) { var x [512]uint8 // give byte its own cache line sink = &x @@ -71,6 +91,14 @@ func BenchmarkOr8(b *testing.B) { } } +func BenchmarkOr(b *testing.B) { + var x [128]uint32 // give x its own cache line + sink = &x + for i := 0; i < b.N; i++ { + atomic.Or(&x[63], uint32(i)) + } +} + func BenchmarkOr8Parallel(b *testing.B) { var x [512]uint8 // give byte its own cache line sink = &x @@ -83,6 +111,18 @@ func BenchmarkOr8Parallel(b *testing.B) { }) } +func BenchmarkOrParallel(b *testing.B) { + var x [128]uint32 // give x its own cache line + sink = &x + b.RunParallel(func(pb *testing.PB) { + i := uint32(0) + for pb.Next() { + atomic.Or(&x[63], i) + i++ + } + }) +} + func BenchmarkXadd(b *testing.B) { var x uint32 ptr := &x diff --git a/src/runtime/internal/atomic/unaligned.go b/src/runtime/internal/atomic/unaligned.go new file mode 100644 index 0000000000..a859de4144 --- /dev/null +++ b/src/runtime/internal/atomic/unaligned.go @@ -0,0 +1,9 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package atomic + +func panicUnaligned() { + panic("unaligned 64-bit atomic operation") +} |