aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/internal/atomic
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/internal/atomic')
-rw-r--r--src/runtime/internal/atomic/asm_386.s110
-rw-r--r--src/runtime/internal/atomic/asm_amd64.s22
-rw-r--r--src/runtime/internal/atomic/asm_arm.s165
-rw-r--r--src/runtime/internal/atomic/asm_mips64x.s32
-rw-r--r--src/runtime/internal/atomic/asm_mipsx.s29
-rw-r--r--src/runtime/internal/atomic/asm_ppc64x.s43
-rw-r--r--src/runtime/internal/atomic/asm_s390x.s22
-rw-r--r--src/runtime/internal/atomic/atomic_386.go15
-rw-r--r--src/runtime/internal/atomic/atomic_amd64.go24
-rw-r--r--src/runtime/internal/atomic/atomic_arm.go26
-rw-r--r--src/runtime/internal/atomic/atomic_arm64.go18
-rw-r--r--src/runtime/internal/atomic/atomic_arm64.s33
-rw-r--r--src/runtime/internal/atomic/atomic_mips64x.go18
-rw-r--r--src/runtime/internal/atomic/atomic_mips64x.s8
-rw-r--r--src/runtime/internal/atomic/atomic_mipsx.go14
-rw-r--r--src/runtime/internal/atomic/atomic_ppc64x.go18
-rw-r--r--src/runtime/internal/atomic/atomic_ppc64x.s20
-rw-r--r--src/runtime/internal/atomic/atomic_riscv64.go18
-rw-r--r--src/runtime/internal/atomic/atomic_riscv64.s26
-rw-r--r--src/runtime/internal/atomic/atomic_s390x.go30
-rw-r--r--src/runtime/internal/atomic/atomic_test.go128
-rw-r--r--src/runtime/internal/atomic/atomic_wasm.go36
-rw-r--r--src/runtime/internal/atomic/bench_test.go40
-rw-r--r--src/runtime/internal/atomic/unaligned.go9
24 files changed, 786 insertions, 118 deletions
diff --git a/src/runtime/internal/atomic/asm_386.s b/src/runtime/internal/atomic/asm_386.s
index 9b9dc14a60..d82faef1f0 100644
--- a/src/runtime/internal/atomic/asm_386.s
+++ b/src/runtime/internal/atomic/asm_386.s
@@ -3,6 +3,7 @@
// license that can be found in the LICENSE file.
#include "textflag.h"
+#include "funcdata.h"
// bool Cas(int32 *val, int32 old, int32 new)
// Atomically:
@@ -11,7 +12,7 @@
// return 1;
// }else
// return 0;
-TEXT runtime∕internal∕atomic·Cas(SB), NOSPLIT, $0-13
+TEXT ·Cas(SB), NOSPLIT, $0-13
MOVL ptr+0(FP), BX
MOVL old+4(FP), AX
MOVL new+8(FP), CX
@@ -20,32 +21,31 @@ TEXT runtime∕internal∕atomic·Cas(SB), NOSPLIT, $0-13
SETEQ ret+12(FP)
RET
-TEXT runtime∕internal∕atomic·Casuintptr(SB), NOSPLIT, $0-13
- JMP runtime∕internal∕atomic·Cas(SB)
+TEXT ·Casuintptr(SB), NOSPLIT, $0-13
+ JMP ·Cas(SB)
-TEXT runtime∕internal∕atomic·CasRel(SB), NOSPLIT, $0-13
- JMP runtime∕internal∕atomic·Cas(SB)
+TEXT ·CasRel(SB), NOSPLIT, $0-13
+ JMP ·Cas(SB)
-TEXT runtime∕internal∕atomic·Loaduintptr(SB), NOSPLIT, $0-8
- JMP runtime∕internal∕atomic·Load(SB)
+TEXT ·Loaduintptr(SB), NOSPLIT, $0-8
+ JMP ·Load(SB)
-TEXT runtime∕internal∕atomic·Loaduint(SB), NOSPLIT, $0-8
- JMP runtime∕internal∕atomic·Load(SB)
+TEXT ·Loaduint(SB), NOSPLIT, $0-8
+ JMP ·Load(SB)
-TEXT runtime∕internal∕atomic·Storeuintptr(SB), NOSPLIT, $0-8
- JMP runtime∕internal∕atomic·Store(SB)
-
-TEXT runtime∕internal∕atomic·Xadduintptr(SB), NOSPLIT, $0-12
- JMP runtime∕internal∕atomic·Xadd(SB)
+TEXT ·Storeuintptr(SB), NOSPLIT, $0-8
+ JMP ·Store(SB)
-TEXT runtime∕internal∕atomic·Loadint64(SB), NOSPLIT, $0-12
- JMP runtime∕internal∕atomic·Load64(SB)
+TEXT ·Xadduintptr(SB), NOSPLIT, $0-12
+ JMP ·Xadd(SB)
-TEXT runtime∕internal∕atomic·Xaddint64(SB), NOSPLIT, $0-20
- JMP runtime∕internal∕atomic·Xadd64(SB)
+TEXT ·Loadint64(SB), NOSPLIT, $0-12
+ JMP ·Load64(SB)
+TEXT ·Xaddint64(SB), NOSPLIT, $0-20
+ JMP ·Xadd64(SB)
-// bool runtime∕internal∕atomic·Cas64(uint64 *val, uint64 old, uint64 new)
+// bool ·Cas64(uint64 *val, uint64 old, uint64 new)
// Atomically:
// if(*val == *old){
// *val = new;
@@ -53,11 +53,12 @@ TEXT runtime∕internal∕atomic·Xaddint64(SB), NOSPLIT, $0-20
// } else {
// return 0;
// }
-TEXT runtime∕internal∕atomic·Cas64(SB), NOSPLIT, $0-21
+TEXT ·Cas64(SB), NOSPLIT, $0-21
+ NO_LOCAL_POINTERS
MOVL ptr+0(FP), BP
TESTL $7, BP
JZ 2(PC)
- MOVL 0, BP // crash with nil ptr deref
+ CALL ·panicUnaligned(SB)
MOVL old_lo+4(FP), AX
MOVL old_hi+8(FP), DX
MOVL new_lo+12(FP), BX
@@ -74,7 +75,7 @@ TEXT runtime∕internal∕atomic·Cas64(SB), NOSPLIT, $0-21
// return 1;
// }else
// return 0;
-TEXT runtime∕internal∕atomic·Casp1(SB), NOSPLIT, $0-13
+TEXT ·Casp1(SB), NOSPLIT, $0-13
MOVL ptr+0(FP), BX
MOVL old+4(FP), AX
MOVL new+8(FP), CX
@@ -87,7 +88,7 @@ TEXT runtime∕internal∕atomic·Casp1(SB), NOSPLIT, $0-13
// Atomically:
// *val += delta;
// return *val;
-TEXT runtime∕internal∕atomic·Xadd(SB), NOSPLIT, $0-12
+TEXT ·Xadd(SB), NOSPLIT, $0-12
MOVL ptr+0(FP), BX
MOVL delta+4(FP), AX
MOVL AX, CX
@@ -97,12 +98,13 @@ TEXT runtime∕internal∕atomic·Xadd(SB), NOSPLIT, $0-12
MOVL AX, ret+8(FP)
RET
-TEXT runtime∕internal∕atomic·Xadd64(SB), NOSPLIT, $0-20
+TEXT ·Xadd64(SB), NOSPLIT, $0-20
+ NO_LOCAL_POINTERS
// no XADDQ so use CMPXCHG8B loop
MOVL ptr+0(FP), BP
TESTL $7, BP
JZ 2(PC)
- MOVL 0, AX // crash when unaligned
+ CALL ·panicUnaligned(SB)
// DI:SI = delta
MOVL delta_lo+4(FP), SI
MOVL delta_hi+8(FP), DI
@@ -133,22 +135,23 @@ addloop:
MOVL CX, ret_hi+16(FP)
RET
-TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-12
+TEXT ·Xchg(SB), NOSPLIT, $0-12
MOVL ptr+0(FP), BX
MOVL new+4(FP), AX
XCHGL AX, 0(BX)
MOVL AX, ret+8(FP)
RET
-TEXT runtime∕internal∕atomic·Xchguintptr(SB), NOSPLIT, $0-12
- JMP runtime∕internal∕atomic·Xchg(SB)
+TEXT ·Xchguintptr(SB), NOSPLIT, $0-12
+ JMP ·Xchg(SB)
-TEXT runtime∕internal∕atomic·Xchg64(SB),NOSPLIT,$0-20
+TEXT ·Xchg64(SB),NOSPLIT,$0-20
+ NO_LOCAL_POINTERS
// no XCHGQ so use CMPXCHG8B loop
MOVL ptr+0(FP), BP
TESTL $7, BP
JZ 2(PC)
- MOVL 0, AX // crash when unaligned
+ CALL ·panicUnaligned(SB)
// CX:BX = new
MOVL new_lo+4(FP), BX
MOVL new_hi+8(FP), CX
@@ -171,38 +174,43 @@ swaploop:
MOVL DX, ret_hi+16(FP)
RET
-TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-8
+TEXT ·StorepNoWB(SB), NOSPLIT, $0-8
MOVL ptr+0(FP), BX
MOVL val+4(FP), AX
XCHGL AX, 0(BX)
RET
-TEXT runtime∕internal∕atomic·Store(SB), NOSPLIT, $0-8
+TEXT ·Store(SB), NOSPLIT, $0-8
MOVL ptr+0(FP), BX
MOVL val+4(FP), AX
XCHGL AX, 0(BX)
RET
-TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-8
+TEXT ·StoreRel(SB), NOSPLIT, $0-8
+ JMP ·Store(SB)
+
+TEXT runtime∕internal∕atomic·StoreReluintptr(SB), NOSPLIT, $0-8
JMP runtime∕internal∕atomic·Store(SB)
// uint64 atomicload64(uint64 volatile* addr);
-TEXT runtime∕internal∕atomic·Load64(SB), NOSPLIT, $0-12
+TEXT ·Load64(SB), NOSPLIT, $0-12
+ NO_LOCAL_POINTERS
MOVL ptr+0(FP), AX
TESTL $7, AX
JZ 2(PC)
- MOVL 0, AX // crash with nil ptr deref
+ CALL ·panicUnaligned(SB)
MOVQ (AX), M0
MOVQ M0, ret+4(FP)
EMMS
RET
-// void runtime∕internal∕atomic·Store64(uint64 volatile* addr, uint64 v);
-TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-12
+// void ·Store64(uint64 volatile* addr, uint64 v);
+TEXT ·Store64(SB), NOSPLIT, $0-12
+ NO_LOCAL_POINTERS
MOVL ptr+0(FP), AX
TESTL $7, AX
JZ 2(PC)
- MOVL 0, AX // crash with nil ptr deref
+ CALL ·panicUnaligned(SB)
// MOVQ and EMMS were introduced on the Pentium MMX.
MOVQ val+4(FP), M0
MOVQ M0, (AX)
@@ -214,24 +222,40 @@ TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-12
XADDL AX, (SP)
RET
-// void runtime∕internal∕atomic·Or8(byte volatile*, byte);
-TEXT runtime∕internal∕atomic·Or8(SB), NOSPLIT, $0-5
+// void ·Or8(byte volatile*, byte);
+TEXT ·Or8(SB), NOSPLIT, $0-5
MOVL ptr+0(FP), AX
MOVB val+4(FP), BX
LOCK
ORB BX, (AX)
RET
-// void runtime∕internal∕atomic·And8(byte volatile*, byte);
-TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-5
+// void ·And8(byte volatile*, byte);
+TEXT ·And8(SB), NOSPLIT, $0-5
MOVL ptr+0(FP), AX
MOVB val+4(FP), BX
LOCK
ANDB BX, (AX)
RET
-TEXT runtime∕internal∕atomic·Store8(SB), NOSPLIT, $0-5
+TEXT ·Store8(SB), NOSPLIT, $0-5
MOVL ptr+0(FP), BX
MOVB val+4(FP), AX
XCHGB AX, 0(BX)
RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-8
+ MOVL ptr+0(FP), AX
+ MOVL val+4(FP), BX
+ LOCK
+ ORL BX, (AX)
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-8
+ MOVL ptr+0(FP), AX
+ MOVL val+4(FP), BX
+ LOCK
+ ANDL BX, (AX)
+ RET
diff --git a/src/runtime/internal/atomic/asm_amd64.s b/src/runtime/internal/atomic/asm_amd64.s
index 90c56424c9..2cf7c55870 100644
--- a/src/runtime/internal/atomic/asm_amd64.s
+++ b/src/runtime/internal/atomic/asm_amd64.s
@@ -136,6 +136,12 @@ TEXT runtime∕internal∕atomic·Store(SB), NOSPLIT, $0-12
TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-12
JMP runtime∕internal∕atomic·Store(SB)
+TEXT runtime∕internal∕atomic·StoreRel64(SB), NOSPLIT, $0-16
+ JMP runtime∕internal∕atomic·Store64(SB)
+
+TEXT runtime∕internal∕atomic·StoreReluintptr(SB), NOSPLIT, $0-16
+ JMP runtime∕internal∕atomic·Store64(SB)
+
TEXT runtime∕internal∕atomic·Store8(SB), NOSPLIT, $0-9
MOVQ ptr+0(FP), BX
MOVB val+8(FP), AX
@@ -163,3 +169,19 @@ TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-9
LOCK
ANDB BX, (AX)
RET
+
+// func Or(addr *uint32, v uint32)
+TEXT runtime∕internal∕atomic·Or(SB), NOSPLIT, $0-12
+ MOVQ ptr+0(FP), AX
+ MOVL val+8(FP), BX
+ LOCK
+ ORL BX, (AX)
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT runtime∕internal∕atomic·And(SB), NOSPLIT, $0-12
+ MOVQ ptr+0(FP), AX
+ MOVL val+8(FP), BX
+ LOCK
+ ANDL BX, (AX)
+ RET
diff --git a/src/runtime/internal/atomic/asm_arm.s b/src/runtime/internal/atomic/asm_arm.s
index d4ef11560e..274925ed60 100644
--- a/src/runtime/internal/atomic/asm_arm.s
+++ b/src/runtime/internal/atomic/asm_arm.s
@@ -3,6 +3,7 @@
// license that can be found in the LICENSE file.
#include "textflag.h"
+#include "funcdata.h"
// bool armcas(int32 *val, int32 old, int32 new)
// Atomically:
@@ -12,13 +13,13 @@
// }else
// return 0;
//
-// To implement runtime∕internal∕atomic·cas in sys_$GOOS_arm.s
+// To implement ·cas in sys_$GOOS_arm.s
// using the native instructions, use:
//
-// TEXT runtime∕internal∕atomic·cas(SB),NOSPLIT,$0
-// B runtime∕internal∕atomic·armcas(SB)
+// TEXT ·cas(SB),NOSPLIT,$0
+// B ·armcas(SB)
//
-TEXT runtime∕internal∕atomic·armcas(SB),NOSPLIT,$0-13
+TEXT ·armcas(SB),NOSPLIT,$0-13
MOVW ptr+0(FP), R1
MOVW old+4(FP), R2
MOVW new+8(FP), R3
@@ -50,44 +51,50 @@ casfail:
// stubs
-TEXT runtime∕internal∕atomic·Loadp(SB),NOSPLIT|NOFRAME,$0-8
- B runtime∕internal∕atomic·Load(SB)
+TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$0-8
+ B ·Load(SB)
-TEXT runtime∕internal∕atomic·LoadAcq(SB),NOSPLIT|NOFRAME,$0-8
- B runtime∕internal∕atomic·Load(SB)
+TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-8
+ B ·Load(SB)
-TEXT runtime∕internal∕atomic·Casuintptr(SB),NOSPLIT,$0-13
- B runtime∕internal∕atomic·Cas(SB)
+TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-8
+ B ·Load(SB)
-TEXT runtime∕internal∕atomic·Casp1(SB),NOSPLIT,$0-13
- B runtime∕internal∕atomic·Cas(SB)
+TEXT ·Casuintptr(SB),NOSPLIT,$0-13
+ B ·Cas(SB)
-TEXT runtime∕internal∕atomic·CasRel(SB),NOSPLIT,$0-13
- B runtime∕internal∕atomic·Cas(SB)
+TEXT ·Casp1(SB),NOSPLIT,$0-13
+ B ·Cas(SB)
-TEXT runtime∕internal∕atomic·Loaduintptr(SB),NOSPLIT,$0-8
- B runtime∕internal∕atomic·Load(SB)
+TEXT ·CasRel(SB),NOSPLIT,$0-13
+ B ·Cas(SB)
-TEXT runtime∕internal∕atomic·Loaduint(SB),NOSPLIT,$0-8
- B runtime∕internal∕atomic·Load(SB)
+TEXT ·Loaduintptr(SB),NOSPLIT,$0-8
+ B ·Load(SB)
-TEXT runtime∕internal∕atomic·Storeuintptr(SB),NOSPLIT,$0-8
- B runtime∕internal∕atomic·Store(SB)
+TEXT ·Loaduint(SB),NOSPLIT,$0-8
+ B ·Load(SB)
-TEXT runtime∕internal∕atomic·StorepNoWB(SB),NOSPLIT,$0-8
- B runtime∕internal∕atomic·Store(SB)
+TEXT ·Storeuintptr(SB),NOSPLIT,$0-8
+ B ·Store(SB)
-TEXT runtime∕internal∕atomic·StoreRel(SB),NOSPLIT,$0-8
- B runtime∕internal∕atomic·Store(SB)
+TEXT ·StorepNoWB(SB),NOSPLIT,$0-8
+ B ·Store(SB)
-TEXT runtime∕internal∕atomic·Xadduintptr(SB),NOSPLIT,$0-12
- B runtime∕internal∕atomic·Xadd(SB)
+TEXT ·StoreRel(SB),NOSPLIT,$0-8
+ B ·Store(SB)
-TEXT runtime∕internal∕atomic·Loadint64(SB),NOSPLIT,$0-12
- B runtime∕internal∕atomic·Load64(SB)
+TEXT ·StoreReluintptr(SB),NOSPLIT,$0-8
+ B ·Store(SB)
-TEXT runtime∕internal∕atomic·Xaddint64(SB),NOSPLIT,$0-20
- B runtime∕internal∕atomic·Xadd64(SB)
+TEXT ·Xadduintptr(SB),NOSPLIT,$0-12
+ B ·Xadd(SB)
+
+TEXT ·Loadint64(SB),NOSPLIT,$0-12
+ B ·Load64(SB)
+
+TEXT ·Xaddint64(SB),NOSPLIT,$0-20
+ B ·Xadd64(SB)
// 64-bit atomics
// The native ARM implementations use LDREXD/STREXD, which are
@@ -95,12 +102,8 @@ TEXT runtime∕internal∕atomic·Xaddint64(SB),NOSPLIT,$0-20
// On older ARM, we use Go implementations which simulate 64-bit
// atomics with locks.
-TEXT armCas64<>(SB),NOSPLIT,$0-21
- MOVW addr+0(FP), R1
- // make unaligned atomic access panic
- AND.S $7, R1, R2
- BEQ 2(PC)
- MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
+TEXT armCas64<>(SB),NOSPLIT,$0-21
+ // addr is already in R1
MOVW old_lo+4(FP), R2
MOVW old_hi+8(FP), R3
MOVW new_lo+12(FP), R4
@@ -128,12 +131,8 @@ cas64fail:
MOVBU R0, swapped+20(FP)
RET
-TEXT armXadd64<>(SB),NOSPLIT,$0-20
- MOVW addr+0(FP), R1
- // make unaligned atomic access panic
- AND.S $7, R1, R2
- BEQ 2(PC)
- MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
+TEXT armXadd64<>(SB),NOSPLIT,$0-20
+ // addr is already in R1
MOVW delta_lo+4(FP), R2
MOVW delta_hi+8(FP), R3
@@ -154,12 +153,8 @@ add64loop:
MOVW R5, new_hi+16(FP)
RET
-TEXT armXchg64<>(SB),NOSPLIT,$0-20
- MOVW addr+0(FP), R1
- // make unaligned atomic access panic
- AND.S $7, R1, R2
- BEQ 2(PC)
- MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
+TEXT armXchg64<>(SB),NOSPLIT,$0-20
+ // addr is already in R1
MOVW new_lo+4(FP), R2
MOVW new_hi+8(FP), R3
@@ -178,12 +173,8 @@ swap64loop:
MOVW R5, old_hi+16(FP)
RET
-TEXT armLoad64<>(SB),NOSPLIT,$0-12
- MOVW addr+0(FP), R1
- // make unaligned atomic access panic
- AND.S $7, R1, R2
- BEQ 2(PC)
- MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
+TEXT armLoad64<>(SB),NOSPLIT,$0-12
+ // addr is already in R1
LDREXD (R1), R2 // loads R2 and R3
DMB MB_ISH
@@ -192,12 +183,8 @@ TEXT armLoad64<>(SB),NOSPLIT,$0-12
MOVW R3, val_hi+8(FP)
RET
-TEXT armStore64<>(SB),NOSPLIT,$0-12
- MOVW addr+0(FP), R1
- // make unaligned atomic access panic
- AND.S $7, R1, R2
- BEQ 2(PC)
- MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
+TEXT armStore64<>(SB),NOSPLIT,$0-12
+ // addr is already in R1
MOVW val_lo+4(FP), R2
MOVW val_hi+8(FP), R3
@@ -213,35 +200,83 @@ store64loop:
DMB MB_ISH
RET
-TEXT ·Cas64(SB),NOSPLIT,$0-21
+// The following functions all panic if their address argument isn't
+// 8-byte aligned. Since we're calling back into Go code to do this,
+// we have to cooperate with stack unwinding. In the normal case, the
+// functions tail-call into the appropriate implementation, which
+// means they must not open a frame. Hence, when they go down the
+// panic path, at that point they push the LR to create a real frame
+// (they don't need to pop it because panic won't return).
+
+TEXT ·Cas64(SB),NOSPLIT,$-4-21
+ NO_LOCAL_POINTERS
+ MOVW addr+0(FP), R1
+ // make unaligned atomic access panic
+ AND.S $7, R1, R2
+ BEQ 3(PC)
+ MOVW.W R14, -4(R13) // prepare a real frame
+ BL ·panicUnaligned(SB)
+
MOVB runtime·goarm(SB), R11
CMP $7, R11
BLT 2(PC)
JMP armCas64<>(SB)
JMP ·goCas64(SB)
-TEXT ·Xadd64(SB),NOSPLIT,$0-20
+TEXT ·Xadd64(SB),NOSPLIT,$-4-20
+ NO_LOCAL_POINTERS
+ MOVW addr+0(FP), R1
+ // make unaligned atomic access panic
+ AND.S $7, R1, R2
+ BEQ 3(PC)
+ MOVW.W R14, -4(R13) // prepare a real frame
+ BL ·panicUnaligned(SB)
+
MOVB runtime·goarm(SB), R11
CMP $7, R11
BLT 2(PC)
JMP armXadd64<>(SB)
JMP ·goXadd64(SB)
-TEXT ·Xchg64(SB),NOSPLIT,$0-20
+TEXT ·Xchg64(SB),NOSPLIT,$-4-20
+ NO_LOCAL_POINTERS
+ MOVW addr+0(FP), R1
+ // make unaligned atomic access panic
+ AND.S $7, R1, R2
+ BEQ 3(PC)
+ MOVW.W R14, -4(R13) // prepare a real frame
+ BL ·panicUnaligned(SB)
+
MOVB runtime·goarm(SB), R11
CMP $7, R11
BLT 2(PC)
JMP armXchg64<>(SB)
JMP ·goXchg64(SB)
-TEXT ·Load64(SB),NOSPLIT,$0-12
+TEXT ·Load64(SB),NOSPLIT,$-4-12
+ NO_LOCAL_POINTERS
+ MOVW addr+0(FP), R1
+ // make unaligned atomic access panic
+ AND.S $7, R1, R2
+ BEQ 3(PC)
+ MOVW.W R14, -4(R13) // prepare a real frame
+ BL ·panicUnaligned(SB)
+
MOVB runtime·goarm(SB), R11
CMP $7, R11
BLT 2(PC)
JMP armLoad64<>(SB)
JMP ·goLoad64(SB)
-TEXT ·Store64(SB),NOSPLIT,$0-12
+TEXT ·Store64(SB),NOSPLIT,$-4-12
+ NO_LOCAL_POINTERS
+ MOVW addr+0(FP), R1
+ // make unaligned atomic access panic
+ AND.S $7, R1, R2
+ BEQ 3(PC)
+ MOVW.W R14, -4(R13) // prepare a real frame
+ BL ·panicUnaligned(SB)
+
MOVB runtime·goarm(SB), R11
CMP $7, R11
BLT 2(PC)
diff --git a/src/runtime/internal/atomic/asm_mips64x.s b/src/runtime/internal/atomic/asm_mips64x.s
index 3290fb726a..a515683ebb 100644
--- a/src/runtime/internal/atomic/asm_mips64x.s
+++ b/src/runtime/internal/atomic/asm_mips64x.s
@@ -158,6 +158,12 @@ TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
TEXT ·StoreRel(SB), NOSPLIT, $0-12
JMP ·Store(SB)
+TEXT ·StoreRel64(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
TEXT ·Store(SB), NOSPLIT, $0-12
MOVV ptr+0(FP), R1
MOVW val+8(FP), R2
@@ -237,3 +243,29 @@ TEXT ·And8(SB), NOSPLIT, $0-9
BEQ R4, -4(PC)
SYNC
RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOVV ptr+0(FP), R1
+ MOVW val+8(FP), R2
+
+ SYNC
+ LL (R1), R3
+ OR R2, R3
+ SC R3, (R1)
+ BEQ R3, -4(PC)
+ SYNC
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOVV ptr+0(FP), R1
+ MOVW val+8(FP), R2
+
+ SYNC
+ LL (R1), R3
+ AND R2, R3
+ SC R3, (R1)
+ BEQ R3, -4(PC)
+ SYNC
+ RET
diff --git a/src/runtime/internal/atomic/asm_mipsx.s b/src/runtime/internal/atomic/asm_mipsx.s
index 62811a6599..2b2cfabe08 100644
--- a/src/runtime/internal/atomic/asm_mipsx.s
+++ b/src/runtime/internal/atomic/asm_mipsx.s
@@ -122,6 +122,9 @@ TEXT ·StorepNoWB(SB),NOSPLIT,$0-8
TEXT ·StoreRel(SB),NOSPLIT,$0-8
JMP ·Store(SB)
+TEXT ·StoreReluintptr(SB),NOSPLIT,$0-8
+ JMP ·Store(SB)
+
// void Or8(byte volatile*, byte);
TEXT ·Or8(SB),NOSPLIT,$0-5
MOVW ptr+0(FP), R1
@@ -169,3 +172,29 @@ try_and8:
BEQ R4, try_and8
SYNC
RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-8
+ MOVW ptr+0(FP), R1
+ MOVW val+4(FP), R2
+
+ SYNC
+ LL (R1), R3
+ OR R2, R3
+ SC R3, (R1)
+ BEQ R3, -4(PC)
+ SYNC
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-8
+ MOVW ptr+0(FP), R1
+ MOVW val+4(FP), R2
+
+ SYNC
+ LL (R1), R3
+ AND R2, R3
+ SC R3, (R1)
+ BEQ R3, -4(PC)
+ SYNC
+ RET
diff --git a/src/runtime/internal/atomic/asm_ppc64x.s b/src/runtime/internal/atomic/asm_ppc64x.s
index 06dc931bf4..bb009ab34d 100644
--- a/src/runtime/internal/atomic/asm_ppc64x.s
+++ b/src/runtime/internal/atomic/asm_ppc64x.s
@@ -83,12 +83,18 @@ TEXT runtime∕internal∕atomic·Casuintptr(SB), NOSPLIT, $0-25
TEXT runtime∕internal∕atomic·Loaduintptr(SB), NOSPLIT|NOFRAME, $0-16
BR runtime∕internal∕atomic·Load64(SB)
+TEXT runtime∕internal∕atomic·LoadAcquintptr(SB), NOSPLIT|NOFRAME, $0-16
+ BR runtime∕internal∕atomic·LoadAcq64(SB)
+
TEXT runtime∕internal∕atomic·Loaduint(SB), NOSPLIT|NOFRAME, $0-16
BR runtime∕internal∕atomic·Load64(SB)
TEXT runtime∕internal∕atomic·Storeuintptr(SB), NOSPLIT, $0-16
BR runtime∕internal∕atomic·Store64(SB)
+TEXT runtime∕internal∕atomic·StoreReluintptr(SB), NOSPLIT, $0-16
+ BR runtime∕internal∕atomic·StoreRel64(SB)
+
TEXT runtime∕internal∕atomic·Xadduintptr(SB), NOSPLIT, $0-24
BR runtime∕internal∕atomic·Xadd64(SB)
@@ -191,6 +197,13 @@ TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-12
MOVW R4, 0(R3)
RET
+TEXT runtime∕internal∕atomic·StoreRel64(SB), NOSPLIT, $0-16
+ MOVD ptr+0(FP), R3
+ MOVD val+8(FP), R4
+ LWSYNC
+ MOVD R4, 0(R3)
+ RET
+
// void runtime∕internal∕atomic·Or8(byte volatile*, byte);
TEXT runtime∕internal∕atomic·Or8(SB), NOSPLIT, $0-9
MOVD ptr+0(FP), R3
@@ -209,8 +222,32 @@ TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-9
MOVBZ val+8(FP), R4
LWSYNC
again:
- LBAR (R3),R6
- AND R4,R6
- STBCCC R6,(R3)
+ LBAR (R3), R6
+ AND R4, R6
+ STBCCC R6, (R3)
+ BNE again
+ RET
+
+// func Or(addr *uint32, v uint32)
+TEXT runtime∕internal∕atomic·Or(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LWSYNC
+again:
+ LWAR (R3), R6
+ OR R4, R6
+ STWCCC R6, (R3)
+ BNE again
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT runtime∕internal∕atomic·And(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LWSYNC
+again:
+ LWAR (R3),R6
+ AND R4, R6
+ STWCCC R6, (R3)
BNE again
RET
diff --git a/src/runtime/internal/atomic/asm_s390x.s b/src/runtime/internal/atomic/asm_s390x.s
index 9a19bc0ece..daf1f3cc9f 100644
--- a/src/runtime/internal/atomic/asm_s390x.s
+++ b/src/runtime/internal/atomic/asm_s390x.s
@@ -174,8 +174,8 @@ TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
// func Or8(addr *uint8, v uint8)
TEXT ·Or8(SB), NOSPLIT, $0-9
- MOVD ptr+0(FP), R3
- MOVBZ val+8(FP), R4
+ MOVD ptr+0(FP), R3
+ MOVBZ val+8(FP), R4
// We don't have atomic operations that work on individual bytes so we
// need to align addr down to a word boundary and create a mask
// containing v to OR with the entire word atomically.
@@ -188,8 +188,8 @@ TEXT ·Or8(SB), NOSPLIT, $0-9
// func And8(addr *uint8, v uint8)
TEXT ·And8(SB), NOSPLIT, $0-9
- MOVD ptr+0(FP), R3
- MOVBZ val+8(FP), R4
+ MOVD ptr+0(FP), R3
+ MOVBZ val+8(FP), R4
// We don't have atomic operations that work on individual bytes so we
// need to align addr down to a word boundary and create a mask
// containing v to AND with the entire word atomically.
@@ -200,3 +200,17 @@ TEXT ·And8(SB), NOSPLIT, $0-9
RLL R5, R4, R4 // R4 = rotl(R4, R5)
LAN R4, R6, 0(R3) // R6 = *R3; *R3 &= R4; (atomic)
RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LAO R4, R6, 0(R3) // R6 = *R3; *R3 |= R4; (atomic)
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LAN R4, R6, 0(R3) // R6 = *R3; *R3 &= R4; (atomic)
+ RET
diff --git a/src/runtime/internal/atomic/atomic_386.go b/src/runtime/internal/atomic/atomic_386.go
index 8d002ebfe3..1bfcb1143d 100644
--- a/src/runtime/internal/atomic/atomic_386.go
+++ b/src/runtime/internal/atomic/atomic_386.go
@@ -30,6 +30,12 @@ func LoadAcq(ptr *uint32) uint32 {
return *ptr
}
+//go:nosplit
+//go:noinline
+func LoadAcquintptr(ptr *uintptr) uintptr {
+ return *ptr
+}
+
//go:noescape
func Xadd64(ptr *uint64, delta int64) uint64
@@ -63,6 +69,12 @@ func And8(ptr *uint8, val uint8)
//go:noescape
func Or8(ptr *uint8, val uint8)
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
//go:noescape
@@ -83,5 +95,8 @@ func Store64(ptr *uint64, val uint64)
//go:noescape
func StoreRel(ptr *uint32, val uint32)
+//go:noescape
+func StoreReluintptr(ptr *uintptr, val uintptr)
+
// NO go:noescape annotation; see atomic_pointer.go.
func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
diff --git a/src/runtime/internal/atomic/atomic_amd64.go b/src/runtime/internal/atomic/atomic_amd64.go
index 14b8101720..e36eb83a11 100644
--- a/src/runtime/internal/atomic/atomic_amd64.go
+++ b/src/runtime/internal/atomic/atomic_amd64.go
@@ -35,6 +35,18 @@ func LoadAcq(ptr *uint32) uint32 {
return *ptr
}
+//go:nosplit
+//go:noinline
+func LoadAcq64(ptr *uint64) uint64 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func LoadAcquintptr(ptr *uintptr) uintptr {
+ return *ptr
+}
+
//go:noescape
func Xadd(ptr *uint32, delta int32) uint32
@@ -65,6 +77,12 @@ func And8(ptr *uint8, val uint8)
//go:noescape
func Or8(ptr *uint8, val uint8)
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
//go:noescape
@@ -85,6 +103,12 @@ func Store64(ptr *uint64, val uint64)
//go:noescape
func StoreRel(ptr *uint32, val uint32)
+//go:noescape
+func StoreRel64(ptr *uint64, val uint64)
+
+//go:noescape
+func StoreReluintptr(ptr *uintptr, val uintptr)
+
// StorepNoWB performs *ptr = val atomically and without a write
// barrier.
//
diff --git a/src/runtime/internal/atomic/atomic_arm.go b/src/runtime/internal/atomic/atomic_arm.go
index 95713afcc1..546b3d6120 100644
--- a/src/runtime/internal/atomic/atomic_arm.go
+++ b/src/runtime/internal/atomic/atomic_arm.go
@@ -81,6 +81,9 @@ func Store(addr *uint32, v uint32)
//go:noescape
func StoreRel(addr *uint32, v uint32)
+//go:noescape
+func StoreReluintptr(addr *uintptr, v uintptr)
+
//go:nosplit
func goCas64(addr *uint64, old, new uint64) bool {
if uintptr(unsafe.Pointer(addr))&7 != 0 {
@@ -180,6 +183,26 @@ func And8(addr *uint8, v uint8) {
}
//go:nosplit
+func Or(addr *uint32, v uint32) {
+ for {
+ old := *addr
+ if Cas(addr, old, old|v) {
+ return
+ }
+ }
+}
+
+//go:nosplit
+func And(addr *uint32, v uint32) {
+ for {
+ old := *addr
+ if Cas(addr, old, old&v) {
+ return
+ }
+ }
+}
+
+//go:nosplit
func armcas(ptr *uint32, old, new uint32) bool
//go:noescape
@@ -195,6 +218,9 @@ func Load8(addr *uint8) uint8
func LoadAcq(addr *uint32) uint32
//go:noescape
+func LoadAcquintptr(ptr *uintptr) uintptr
+
+//go:noescape
func Cas64(addr *uint64, old, new uint64) bool
//go:noescape
diff --git a/src/runtime/internal/atomic/atomic_arm64.go b/src/runtime/internal/atomic/atomic_arm64.go
index 26ca94d54c..d49bee8936 100644
--- a/src/runtime/internal/atomic/atomic_arm64.go
+++ b/src/runtime/internal/atomic/atomic_arm64.go
@@ -42,12 +42,24 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer
func LoadAcq(addr *uint32) uint32
//go:noescape
+func LoadAcq64(ptr *uint64) uint64
+
+//go:noescape
+func LoadAcquintptr(ptr *uintptr) uintptr
+
+//go:noescape
func Or8(ptr *uint8, val uint8)
//go:noescape
func And8(ptr *uint8, val uint8)
//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
+//go:noescape
func Cas64(ptr *uint64, old, new uint64) bool
//go:noescape
@@ -67,3 +79,9 @@ func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
//go:noescape
func StoreRel(ptr *uint32, val uint32)
+
+//go:noescape
+func StoreRel64(ptr *uint64, val uint64)
+
+//go:noescape
+func StoreReluintptr(ptr *uintptr, val uintptr)
diff --git a/src/runtime/internal/atomic/atomic_arm64.s b/src/runtime/internal/atomic/atomic_arm64.s
index a2eb7568d2..0cf3c40223 100644
--- a/src/runtime/internal/atomic/atomic_arm64.s
+++ b/src/runtime/internal/atomic/atomic_arm64.s
@@ -36,12 +36,26 @@ TEXT ·Loadp(SB),NOSPLIT,$0-16
TEXT ·LoadAcq(SB),NOSPLIT,$0-12
B ·Load(SB)
+// uint64 runtime∕internal∕atomic·LoadAcquintptr(uint64 volatile* addr)
+TEXT ·LoadAcq64(SB),NOSPLIT,$0-16
+ B ·Load64(SB)
+
+// uintptr runtime∕internal∕atomic·LoadAcq64(uintptr volatile* addr)
+TEXT ·LoadAcquintptr(SB),NOSPLIT,$0-16
+ B ·Load64(SB)
+
TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-16
B runtime∕internal∕atomic·Store64(SB)
TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-12
B runtime∕internal∕atomic·Store(SB)
+TEXT runtime∕internal∕atomic·StoreRel64(SB), NOSPLIT, $0-16
+ B runtime∕internal∕atomic·Store64(SB)
+
+TEXT runtime∕internal∕atomic·StoreReluintptr(SB), NOSPLIT, $0-16
+ B runtime∕internal∕atomic·Store64(SB)
+
TEXT runtime∕internal∕atomic·Store(SB), NOSPLIT, $0-12
MOVD ptr+0(FP), R0
MOVW val+8(FP), R1
@@ -150,3 +164,22 @@ TEXT ·Or8(SB), NOSPLIT, $0-9
CBNZ R3, -3(PC)
RET
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R0
+ MOVW val+8(FP), R1
+ LDAXRW (R0), R2
+ AND R1, R2
+ STLXRW R2, (R0), R3
+ CBNZ R3, -3(PC)
+ RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R0
+ MOVW val+8(FP), R1
+ LDAXRW (R0), R2
+ ORR R1, R2
+ STLXRW R2, (R0), R3
+ CBNZ R3, -3(PC)
+ RET
diff --git a/src/runtime/internal/atomic/atomic_mips64x.go b/src/runtime/internal/atomic/atomic_mips64x.go
index 1d9977850b..b0109d72b0 100644
--- a/src/runtime/internal/atomic/atomic_mips64x.go
+++ b/src/runtime/internal/atomic/atomic_mips64x.go
@@ -42,6 +42,12 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer
func LoadAcq(ptr *uint32) uint32
//go:noescape
+func LoadAcq64(ptr *uint64) uint64
+
+//go:noescape
+func LoadAcquintptr(ptr *uintptr) uintptr
+
+//go:noescape
func And8(ptr *uint8, val uint8)
//go:noescape
@@ -50,6 +56,12 @@ func Or8(ptr *uint8, val uint8)
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
+//go:noescape
func Cas64(ptr *uint64, old, new uint64) bool
//go:noescape
@@ -69,3 +81,9 @@ func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
//go:noescape
func StoreRel(ptr *uint32, val uint32)
+
+//go:noescape
+func StoreRel64(ptr *uint64, val uint64)
+
+//go:noescape
+func StoreReluintptr(ptr *uintptr, val uintptr)
diff --git a/src/runtime/internal/atomic/atomic_mips64x.s b/src/runtime/internal/atomic/atomic_mips64x.s
index 1ed90937c9..125c0c221c 100644
--- a/src/runtime/internal/atomic/atomic_mips64x.s
+++ b/src/runtime/internal/atomic/atomic_mips64x.s
@@ -47,3 +47,11 @@ TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$0-16
// uint32 runtime∕internal∕atomic·LoadAcq(uint32 volatile* ptr)
TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12
JMP atomic·Load(SB)
+
+// uint64 runtime∕internal∕atomic·LoadAcq64(uint64 volatile* ptr)
+TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$0-16
+ JMP atomic·Load64(SB)
+
+// uintptr runtime∕internal∕atomic·LoadAcquintptr(uintptr volatile* ptr)
+TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-16
+ JMP atomic·Load64(SB)
diff --git a/src/runtime/internal/atomic/atomic_mipsx.go b/src/runtime/internal/atomic/atomic_mipsx.go
index 0e2d77ade1..1336b50121 100644
--- a/src/runtime/internal/atomic/atomic_mipsx.go
+++ b/src/runtime/internal/atomic/atomic_mipsx.go
@@ -34,7 +34,7 @@ func spinUnlock(state *uint32)
func lockAndCheck(addr *uint64) {
// ensure 8-byte alignment
if uintptr(unsafe.Pointer(addr))&7 != 0 {
- addr = nil
+ panicUnaligned()
}
// force dereference before taking lock
_ = *addr
@@ -133,12 +133,21 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer
func LoadAcq(ptr *uint32) uint32
//go:noescape
+func LoadAcquintptr(ptr *uintptr) uintptr
+
+//go:noescape
func And8(ptr *uint8, val uint8)
//go:noescape
func Or8(ptr *uint8, val uint8)
//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
+//go:noescape
func Store(ptr *uint32, val uint32)
//go:noescape
@@ -151,4 +160,7 @@ func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
func StoreRel(ptr *uint32, val uint32)
//go:noescape
+func StoreReluintptr(ptr *uintptr, val uintptr)
+
+//go:noescape
func CasRel(addr *uint32, old, new uint32) bool
diff --git a/src/runtime/internal/atomic/atomic_ppc64x.go b/src/runtime/internal/atomic/atomic_ppc64x.go
index a48ecf5ee8..e4b109f0ec 100644
--- a/src/runtime/internal/atomic/atomic_ppc64x.go
+++ b/src/runtime/internal/atomic/atomic_ppc64x.go
@@ -42,6 +42,12 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer
func LoadAcq(ptr *uint32) uint32
//go:noescape
+func LoadAcq64(ptr *uint64) uint64
+
+//go:noescape
+func LoadAcquintptr(ptr *uintptr) uintptr
+
+//go:noescape
func And8(ptr *uint8, val uint8)
//go:noescape
@@ -50,6 +56,12 @@ func Or8(ptr *uint8, val uint8)
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
+//go:noescape
func Cas64(ptr *uint64, old, new uint64) bool
//go:noescape
@@ -67,5 +79,11 @@ func Store64(ptr *uint64, val uint64)
//go:noescape
func StoreRel(ptr *uint32, val uint32)
+//go:noescape
+func StoreRel64(ptr *uint64, val uint64)
+
+//go:noescape
+func StoreReluintptr(ptr *uintptr, val uintptr)
+
// NO go:noescape annotation; see atomic_pointer.go.
func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
diff --git a/src/runtime/internal/atomic/atomic_ppc64x.s b/src/runtime/internal/atomic/atomic_ppc64x.s
index c2f696fb34..b79cdbca34 100644
--- a/src/runtime/internal/atomic/atomic_ppc64x.s
+++ b/src/runtime/internal/atomic/atomic_ppc64x.s
@@ -6,6 +6,15 @@
#include "textflag.h"
+
+// For more details about how various memory models are
+// enforced on POWER, the following paper provides more
+// details about how they enforce C/C++ like models. This
+// gives context about why the strange looking code
+// sequences below work.
+//
+// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
+
// uint32 runtime∕internal∕atomic·Load(uint32 volatile* ptr)
TEXT ·Load(SB),NOSPLIT|NOFRAME,$-8-12
MOVD ptr+0(FP), R3
@@ -56,5 +65,16 @@ TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$-8-12
MOVWZ 0(R3), R3
CMPW R3, R3, CR7
BC 4, 30, 1(PC) // bne- cr7, 0x4
+ ISYNC
MOVW R3, ret+8(FP)
RET
+
+// uint64 runtime∕internal∕atomic·LoadAcq64(uint64 volatile* ptr)
+TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$-8-16
+ MOVD ptr+0(FP), R3
+ MOVD 0(R3), R3
+ CMP R3, R3, CR7
+ BC 4, 30, 1(PC) // bne- cr7, 0x4
+ ISYNC
+ MOVD R3, ret+8(FP)
+ RET
diff --git a/src/runtime/internal/atomic/atomic_riscv64.go b/src/runtime/internal/atomic/atomic_riscv64.go
index d52512369e..8f24d61625 100644
--- a/src/runtime/internal/atomic/atomic_riscv64.go
+++ b/src/runtime/internal/atomic/atomic_riscv64.go
@@ -40,12 +40,24 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer
func LoadAcq(ptr *uint32) uint32
//go:noescape
+func LoadAcq64(ptr *uint64) uint64
+
+//go:noescape
+func LoadAcquintptr(ptr *uintptr) uintptr
+
+//go:noescape
func Or8(ptr *uint8, val uint8)
//go:noescape
func And8(ptr *uint8, val uint8)
//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
+//go:noescape
func Cas64(ptr *uint64, old, new uint64) bool
//go:noescape
@@ -65,3 +77,9 @@ func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
//go:noescape
func StoreRel(ptr *uint32, val uint32)
+
+//go:noescape
+func StoreRel64(ptr *uint64, val uint64)
+
+//go:noescape
+func StoreReluintptr(ptr *uintptr, val uintptr)
diff --git a/src/runtime/internal/atomic/atomic_riscv64.s b/src/runtime/internal/atomic/atomic_riscv64.s
index d005325ca3..74c896cea6 100644
--- a/src/runtime/internal/atomic/atomic_riscv64.s
+++ b/src/runtime/internal/atomic/atomic_riscv64.s
@@ -150,6 +150,12 @@ TEXT ·Xaddint64(SB),NOSPLIT,$0-24
TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12
JMP ·Load(SB)
+TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$0-16
+ JMP ·Load64(SB)
+
+TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-16
+ JMP ·Load64(SB)
+
// func Loadp(ptr unsafe.Pointer) unsafe.Pointer
TEXT ·Loadp(SB),NOSPLIT,$0-16
JMP ·Load64(SB)
@@ -161,6 +167,12 @@ TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
TEXT ·StoreRel(SB), NOSPLIT, $0-12
JMP ·Store(SB)
+TEXT ·StoreRel64(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
+TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
+ JMP ·Store64(SB)
+
// func Xchg(ptr *uint32, new uint32) uint32
TEXT ·Xchg(SB), NOSPLIT, $0-20
MOV ptr+0(FP), A0
@@ -230,3 +242,17 @@ TEXT ·Or8(SB), NOSPLIT, $0-9
SLL A2, A1
AMOORW A1, (A0), ZERO
RET
+
+// func And(ptr *uint32, val uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOV ptr+0(FP), A0
+ MOVW val+8(FP), A1
+ AMOANDW A1, (A0), ZERO
+ RET
+
+// func Or(ptr *uint32, val uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOV ptr+0(FP), A0
+ MOVW val+8(FP), A1
+ AMOORW A1, (A0), ZERO
+ RET
diff --git a/src/runtime/internal/atomic/atomic_s390x.go b/src/runtime/internal/atomic/atomic_s390x.go
index 4d73b39baf..a058d60102 100644
--- a/src/runtime/internal/atomic/atomic_s390x.go
+++ b/src/runtime/internal/atomic/atomic_s390x.go
@@ -41,6 +41,18 @@ func LoadAcq(ptr *uint32) uint32 {
return *ptr
}
+//go:nosplit
+//go:noinline
+func LoadAcq64(ptr *uint64) uint64 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func LoadAcquintptr(ptr *uintptr) uintptr {
+ return *ptr
+}
+
//go:noescape
func Store(ptr *uint32, val uint32)
@@ -59,6 +71,18 @@ func StoreRel(ptr *uint32, val uint32) {
*ptr = val
}
+//go:nosplit
+//go:noinline
+func StoreRel64(ptr *uint64, val uint64) {
+ *ptr = val
+}
+
+//go:nosplit
+//go:noinline
+func StoreReluintptr(ptr *uintptr, val uintptr) {
+ *ptr = val
+}
+
//go:noescape
func And8(ptr *uint8, val uint8)
@@ -68,6 +92,12 @@ func Or8(ptr *uint8, val uint8)
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
+//go:noescape
func Xadd(ptr *uint32, delta int32) uint32
//go:noescape
diff --git a/src/runtime/internal/atomic/atomic_test.go b/src/runtime/internal/atomic/atomic_test.go
index b0a8fa0610..c9c2eba248 100644
--- a/src/runtime/internal/atomic/atomic_test.go
+++ b/src/runtime/internal/atomic/atomic_test.go
@@ -73,8 +73,15 @@ func TestXadduintptrOnUint64(t *testing.T) {
func shouldPanic(t *testing.T, name string, f func()) {
defer func() {
- if recover() == nil {
+ // Check that all GC maps are sane.
+ runtime.GC()
+
+ err := recover()
+ want := "unaligned 64-bit atomic operation"
+ if err == nil {
t.Errorf("%s did not panic", name)
+ } else if s, _ := err.(string); s != want {
+ t.Errorf("%s: wanted panic %q, got %q", name, want, err)
}
}()
f()
@@ -143,6 +150,45 @@ func TestAnd8(t *testing.T) {
}
}
+func TestAnd(t *testing.T) {
+ // Basic sanity check.
+ x := uint32(0xffffffff)
+ for i := uint32(0); i < 32; i++ {
+ atomic.And(&x, ^(1 << i))
+ if r := uint32(0xffffffff) << (i + 1); x != r {
+ t.Fatalf("clearing bit %#x: want %#x, got %#x", uint32(1<<i), r, x)
+ }
+ }
+
+ // Set every bit in array to 1.
+ a := make([]uint32, 1<<12)
+ for i := range a {
+ a[i] = 0xffffffff
+ }
+
+ // Clear array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 32; i++ {
+ m := ^uint32(1 << i)
+ go func() {
+ for i := range a {
+ atomic.And(&a[i], m)
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 32; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally cleared.
+ for i, v := range a {
+ if v != 0 {
+ t.Fatalf("a[%v] not cleared: want %#x, got %#x", i, uint32(0), v)
+ }
+ }
+}
+
func TestOr8(t *testing.T) {
// Basic sanity check.
x := uint8(0)
@@ -179,7 +225,43 @@ func TestOr8(t *testing.T) {
}
}
-func TestBitwiseContended(t *testing.T) {
+func TestOr(t *testing.T) {
+ // Basic sanity check.
+ x := uint32(0)
+ for i := uint32(0); i < 32; i++ {
+ atomic.Or(&x, 1<<i)
+ if r := (uint32(1) << (i + 1)) - 1; x != r {
+ t.Fatalf("setting bit %#x: want %#x, got %#x", uint32(1)<<i, r, x)
+ }
+ }
+
+ // Start with every bit in array set to 0.
+ a := make([]uint32, 1<<12)
+
+ // Set every bit in array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 32; i++ {
+ m := uint32(1 << i)
+ go func() {
+ for i := range a {
+ atomic.Or(&a[i], m)
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 32; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally set.
+ for i, v := range a {
+ if v != 0xffffffff {
+ t.Fatalf("a[%v] not fully set: want %#x, got %#x", i, uint32(0xffffffff), v)
+ }
+ }
+}
+
+func TestBitwiseContended8(t *testing.T) {
// Start with every bit in array set to 0.
a := make([]uint8, 16)
@@ -221,6 +303,48 @@ func TestBitwiseContended(t *testing.T) {
}
}
+func TestBitwiseContended(t *testing.T) {
+ // Start with every bit in array set to 0.
+ a := make([]uint32, 16)
+
+ // Iterations to try.
+ N := 1 << 16
+ if testing.Short() {
+ N = 1 << 10
+ }
+
+ // Set and then clear every bit in the array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 32; i++ {
+ m := uint32(1 << i)
+ go func() {
+ for n := 0; n < N; n++ {
+ for i := range a {
+ atomic.Or(&a[i], m)
+ if atomic.Load(&a[i])&m != m {
+ t.Errorf("a[%v] bit %#x not set", i, m)
+ }
+ atomic.And(&a[i], ^m)
+ if atomic.Load(&a[i])&m != 0 {
+ t.Errorf("a[%v] bit %#x not clear", i, m)
+ }
+ }
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 32; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally cleared.
+ for i, v := range a {
+ if v != 0 {
+ t.Fatalf("a[%v] not cleared: want %#x, got %#x", i, uint32(0), v)
+ }
+ }
+}
+
func TestStorepNoWB(t *testing.T) {
var p [2]*int
for i := range p {
diff --git a/src/runtime/internal/atomic/atomic_wasm.go b/src/runtime/internal/atomic/atomic_wasm.go
index 2c0c3a8174..b05d98ed51 100644
--- a/src/runtime/internal/atomic/atomic_wasm.go
+++ b/src/runtime/internal/atomic/atomic_wasm.go
@@ -47,6 +47,18 @@ func LoadAcq(ptr *uint32) uint32 {
//go:nosplit
//go:noinline
+func LoadAcq64(ptr *uint64) uint64 {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
+func LoadAcquintptr(ptr *uintptr) uintptr {
+ return *ptr
+}
+
+//go:nosplit
+//go:noinline
func Load8(ptr *uint8) uint8 {
return *ptr
}
@@ -121,6 +133,18 @@ func Or8(ptr *uint8, val uint8) {
//go:nosplit
//go:noinline
+func And(ptr *uint32, val uint32) {
+ *ptr = *ptr & val
+}
+
+//go:nosplit
+//go:noinline
+func Or(ptr *uint32, val uint32) {
+ *ptr = *ptr | val
+}
+
+//go:nosplit
+//go:noinline
func Cas64(ptr *uint64, old, new uint64) bool {
if *ptr == old {
*ptr = new
@@ -143,6 +167,18 @@ func StoreRel(ptr *uint32, val uint32) {
//go:nosplit
//go:noinline
+func StoreRel64(ptr *uint64, val uint64) {
+ *ptr = val
+}
+
+//go:nosplit
+//go:noinline
+func StoreReluintptr(ptr *uintptr, val uintptr) {
+ *ptr = val
+}
+
+//go:nosplit
+//go:noinline
func Store8(ptr *uint8, val uint8) {
*ptr = val
}
diff --git a/src/runtime/internal/atomic/bench_test.go b/src/runtime/internal/atomic/bench_test.go
index de71b0f2c7..434aa6d434 100644
--- a/src/runtime/internal/atomic/bench_test.go
+++ b/src/runtime/internal/atomic/bench_test.go
@@ -51,6 +51,14 @@ func BenchmarkAnd8(b *testing.B) {
}
}
+func BenchmarkAnd(b *testing.B) {
+ var x [128]uint32 // give x its own cache line
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.And(&x[63], uint32(i))
+ }
+}
+
func BenchmarkAnd8Parallel(b *testing.B) {
var x [512]uint8 // give byte its own cache line
sink = &x
@@ -63,6 +71,18 @@ func BenchmarkAnd8Parallel(b *testing.B) {
})
}
+func BenchmarkAndParallel(b *testing.B) {
+ var x [128]uint32 // give x its own cache line
+ sink = &x
+ b.RunParallel(func(pb *testing.PB) {
+ i := uint32(0)
+ for pb.Next() {
+ atomic.And(&x[63], i)
+ i++
+ }
+ })
+}
+
func BenchmarkOr8(b *testing.B) {
var x [512]uint8 // give byte its own cache line
sink = &x
@@ -71,6 +91,14 @@ func BenchmarkOr8(b *testing.B) {
}
}
+func BenchmarkOr(b *testing.B) {
+ var x [128]uint32 // give x its own cache line
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.Or(&x[63], uint32(i))
+ }
+}
+
func BenchmarkOr8Parallel(b *testing.B) {
var x [512]uint8 // give byte its own cache line
sink = &x
@@ -83,6 +111,18 @@ func BenchmarkOr8Parallel(b *testing.B) {
})
}
+func BenchmarkOrParallel(b *testing.B) {
+ var x [128]uint32 // give x its own cache line
+ sink = &x
+ b.RunParallel(func(pb *testing.PB) {
+ i := uint32(0)
+ for pb.Next() {
+ atomic.Or(&x[63], i)
+ i++
+ }
+ })
+}
+
func BenchmarkXadd(b *testing.B) {
var x uint32
ptr := &x
diff --git a/src/runtime/internal/atomic/unaligned.go b/src/runtime/internal/atomic/unaligned.go
new file mode 100644
index 0000000000..a859de4144
--- /dev/null
+++ b/src/runtime/internal/atomic/unaligned.go
@@ -0,0 +1,9 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atomic
+
+func panicUnaligned() {
+ panic("unaligned 64-bit atomic operation")
+}