aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichał Derkacz <michal@Lnet.pl>2020-06-14 00:06:24 +0200
committerJoel Sing <joel@sing.id.au>2020-10-28 05:02:44 +0000
commit150d2448e5a213cd679396371c0a147918dc2125 (patch)
tree7ae047f123b042c4bab3b1ffe27d33dd5c9c959b
parentc95bd2e6a99ab06efadb265bf42bbaf8d964904f (diff)
downloadgo-150d2448e5a213cd679396371c0a147918dc2125.tar.gz
go-150d2448e5a213cd679396371c0a147918dc2125.zip
cmd/compile,cmd/internal/obj/riscv,runtime: use Duff's devices on riscv64
Implement runtime.duffzero and runtime.duffcopy for riscv64. Use obj.ADUFFZERO/obj.ADUFFCOPY for medium size, word aligned zeroing/moving. Change-Id: I42ec622055630c94cb77e286d8d33dbe7c9f846c Reviewed-on: https://go-review.googlesource.com/c/go/+/237797 Run-TryBot: Cherry Zhang <cherryyz@google.com> Reviewed-by: Joel Sing <joel@sing.id.au> Reviewed-by: Cherry Zhang <cherryyz@google.com>
-rw-r--r--src/cmd/compile/internal/riscv64/ggen.go10
-rw-r--r--src/cmd/compile/internal/riscv64/ssa.go14
-rw-r--r--src/cmd/compile/internal/ssa/gen/RISCV64.rules14
-rw-r--r--src/cmd/compile/internal/ssa/gen/RISCV64Ops.go38
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go28
-rw-r--r--src/cmd/compile/internal/ssa/rewriteRISCV64.go33
-rw-r--r--src/cmd/internal/obj/riscv/obj.go8
-rw-r--r--src/runtime/duff_riscv64.s907
-rw-r--r--src/runtime/mkduff.go28
9 files changed, 1076 insertions, 4 deletions
diff --git a/src/cmd/compile/internal/riscv64/ggen.go b/src/cmd/compile/internal/riscv64/ggen.go
index be31fad441..f7c03fe7c2 100644
--- a/src/cmd/compile/internal/riscv64/ggen.go
+++ b/src/cmd/compile/internal/riscv64/ggen.go
@@ -25,7 +25,15 @@ func zeroRange(pp *gc.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog {
return p
}
- // TODO(jsing): Add a duff zero implementation for medium sized ranges.
+ if cnt <= int64(128*gc.Widthptr) {
+ p = pp.Appendpp(p, riscv.AADDI, obj.TYPE_CONST, 0, off, obj.TYPE_REG, riscv.REG_A0, 0)
+ p.Reg = riscv.REG_SP
+ p = pp.Appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0)
+ p.To.Name = obj.NAME_EXTERN
+ p.To.Sym = gc.Duffzero
+ p.To.Offset = 8 * (128 - cnt/int64(gc.Widthptr))
+ return p
+ }
// Loop, zeroing pointer width bytes at a time.
// ADD $(off), SP, T0
diff --git a/src/cmd/compile/internal/riscv64/ssa.go b/src/cmd/compile/internal/riscv64/ssa.go
index 064a1ca111..0beb5b4bd1 100644
--- a/src/cmd/compile/internal/riscv64/ssa.go
+++ b/src/cmd/compile/internal/riscv64/ssa.go
@@ -608,6 +608,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
+ case ssa.OpRISCV64DUFFZERO:
+ p := s.Prog(obj.ADUFFZERO)
+ p.To.Type = obj.TYPE_MEM
+ p.To.Name = obj.NAME_EXTERN
+ p.To.Sym = gc.Duffzero
+ p.To.Offset = v.AuxInt
+
+ case ssa.OpRISCV64DUFFCOPY:
+ p := s.Prog(obj.ADUFFCOPY)
+ p.To.Type = obj.TYPE_MEM
+ p.To.Name = obj.NAME_EXTERN
+ p.To.Sym = gc.Duffcopy
+ p.To.Offset = v.AuxInt
+
default:
v.Fatalf("Unhandled op %v", v.Op)
}
diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64.rules b/src/cmd/compile/internal/ssa/gen/RISCV64.rules
index 3bc2e8498a..325cbeb825 100644
--- a/src/cmd/compile/internal/ssa/gen/RISCV64.rules
+++ b/src/cmd/compile/internal/ssa/gen/RISCV64.rules
@@ -360,6 +360,13 @@
(Zero [4] ptr mem) => (MOVWstore ptr (MOVWconst) mem)
(Zero [8] ptr mem) => (MOVDstore ptr (MOVDconst) mem)
+// Medium zeroing uses a Duff's device
+// 8 and 128 are magic constants, see runtime/mkduff.go
+(Zero [s] {t} ptr mem)
+ && s%8 == 0 && s >= 16 && s <= 8*128
+ && t.Alignment()%8 == 0 && !config.noDuffDevice =>
+ (DUFFZERO [8 * (128 - s/8)] ptr mem)
+
// Generic zeroing uses a loop
(Zero [s] {t} ptr mem) =>
(LoweredZero [t.Alignment()]
@@ -395,6 +402,13 @@
(Move [4] dst src mem) => (MOVWstore dst (MOVWload src mem) mem)
(Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem)
+// Medium move uses a Duff's device
+// 16 and 128 are magic constants, see runtime/mkduff.go
+(Move [s] {t} dst src mem)
+ && s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0
+ && !config.noDuffDevice && logLargeCopy(v, s) =>
+ (DUFFCOPY [16 * (128 - s/8)] dst src mem)
+
// Generic move uses a loop
(Move [s] {t} dst src mem) && (s <= 16 || logLargeCopy(v, s)) =>
(LoweredMove [t.Alignment()]
diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go
index ebd515b7fc..f64319230b 100644
--- a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go
@@ -240,6 +240,44 @@ func init() {
{name: "CALLclosure", argLength: 3, reg: callClosure, aux: "CallOff", call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
{name: "CALLinter", argLength: 2, reg: callInter, aux: "CallOff", call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem
+ // duffzero
+ // arg0 = address of memory to zero (in X10, changed as side effect)
+ // arg1 = mem
+ // auxint = offset into duffzero code to start executing
+ // X1 (link register) changed because of function call
+ // returns mem
+ {
+ name: "DUFFZERO",
+ aux: "Int64",
+ argLength: 2,
+ reg: regInfo{
+ inputs: []regMask{regNamed["X10"]},
+ clobbers: regNamed["X1"] | regNamed["X10"],
+ },
+ typ: "Mem",
+ faultOnNilArg0: true,
+ },
+
+ // duffcopy
+ // arg0 = address of dst memory (in X11, changed as side effect)
+ // arg1 = address of src memory (in X10, changed as side effect)
+ // arg2 = mem
+ // auxint = offset into duffcopy code to start executing
+ // X1 (link register) changed because of function call
+ // returns mem
+ {
+ name: "DUFFCOPY",
+ aux: "Int64",
+ argLength: 3,
+ reg: regInfo{
+ inputs: []regMask{regNamed["X11"], regNamed["X10"]},
+ clobbers: regNamed["X1"] | regNamed["X10"] | regNamed["X11"],
+ },
+ typ: "Mem",
+ faultOnNilArg0: true,
+ faultOnNilArg1: true,
+ },
+
// Generic moves and zeros
// general unaligned zeroing
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index bb1cbc0baa..96aa3adedd 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -2111,6 +2111,8 @@ const (
OpRISCV64CALLstatic
OpRISCV64CALLclosure
OpRISCV64CALLinter
+ OpRISCV64DUFFZERO
+ OpRISCV64DUFFCOPY
OpRISCV64LoweredZero
OpRISCV64LoweredMove
OpRISCV64LoweredAtomicLoad8
@@ -28163,6 +28165,32 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "DUFFZERO",
+ auxType: auxInt64,
+ argLen: 2,
+ faultOnNilArg0: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 512}, // X10
+ },
+ clobbers: 512, // X10
+ },
+ },
+ {
+ name: "DUFFCOPY",
+ auxType: auxInt64,
+ argLen: 3,
+ faultOnNilArg0: true,
+ faultOnNilArg1: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1024}, // X11
+ {1, 512}, // X10
+ },
+ clobbers: 1536, // X10 X11
+ },
+ },
+ {
name: "LoweredZero",
auxType: auxInt64,
argLen: 3,
diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
index ac92945753..c337ffbfe7 100644
--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go
+++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
@@ -2017,6 +2017,23 @@ func rewriteValueRISCV64_OpMove(v *Value) bool {
return true
}
// match: (Move [s] {t} dst src mem)
+ // cond: s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s)
+ // result: (DUFFCOPY [16 * (128 - s/8)] dst src mem)
+ for {
+ s := auxIntToInt64(v.AuxInt)
+ t := auxToType(v.Aux)
+ dst := v_0
+ src := v_1
+ mem := v_2
+ if !(s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s)) {
+ break
+ }
+ v.reset(OpRISCV64DUFFCOPY)
+ v.AuxInt = int64ToAuxInt(16 * (128 - s/8))
+ v.AddArg3(dst, src, mem)
+ return true
+ }
+ // match: (Move [s] {t} dst src mem)
// cond: (s <= 16 || logLargeCopy(v, s))
// result: (LoweredMove [t.Alignment()] dst src (ADDI <src.Type> [s-moveSize(t.Alignment(), config)] src) mem)
for {
@@ -5650,6 +5667,22 @@ func rewriteValueRISCV64_OpZero(v *Value) bool {
return true
}
// match: (Zero [s] {t} ptr mem)
+ // cond: s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice
+ // result: (DUFFZERO [8 * (128 - s/8)] ptr mem)
+ for {
+ s := auxIntToInt64(v.AuxInt)
+ t := auxToType(v.Aux)
+ ptr := v_0
+ mem := v_1
+ if !(s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice) {
+ break
+ }
+ v.reset(OpRISCV64DUFFZERO)
+ v.AuxInt = int64ToAuxInt(8 * (128 - s/8))
+ v.AddArg2(ptr, mem)
+ return true
+ }
+ // match: (Zero [s] {t} ptr mem)
// result: (LoweredZero [t.Alignment()] ptr (ADD <ptr.Type> ptr (MOVDconst [s-moveSize(t.Alignment(), config)])) mem)
for {
s := auxIntToInt64(v.AuxInt)
diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go
index da49f67138..5301e44002 100644
--- a/src/cmd/internal/obj/riscv/obj.go
+++ b/src/cmd/internal/obj/riscv/obj.go
@@ -33,7 +33,7 @@ func buildop(ctxt *obj.Link) {}
// lr is the link register to use for the JALR.
// p must be a CALL, JMP or RET.
func jalrToSym(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc, lr int16) *obj.Prog {
- if p.As != obj.ACALL && p.As != obj.AJMP && p.As != obj.ARET {
+ if p.As != obj.ACALL && p.As != obj.AJMP && p.As != obj.ARET && p.As != obj.ADUFFZERO && p.As != obj.ADUFFCOPY {
ctxt.Diag("unexpected Prog in jalrToSym: %v", p)
return p
}
@@ -417,7 +417,7 @@ func containsCall(sym *obj.LSym) bool {
// CALLs are CALL or JAL(R) with link register LR.
for p := sym.Func().Text; p != nil; p = p.Link {
switch p.As {
- case obj.ACALL:
+ case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
return true
case AJAL, AJALR:
if p.From.Type == obj.TYPE_REG && p.From.Reg == REG_LR {
@@ -656,7 +656,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
p.From.Reg = REG_SP
}
- case obj.ACALL:
+ case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
switch p.To.Type {
case obj.TYPE_MEM:
jalrToSym(ctxt, p, newprog, REG_LR)
@@ -1696,6 +1696,8 @@ var encodings = [ALAST & obj.AMask]encoding{
obj.APCDATA: pseudoOpEncoding,
obj.ATEXT: pseudoOpEncoding,
obj.ANOP: pseudoOpEncoding,
+ obj.ADUFFZERO: pseudoOpEncoding,
+ obj.ADUFFCOPY: pseudoOpEncoding,
}
// encodingForAs returns the encoding for an obj.As.
diff --git a/src/runtime/duff_riscv64.s b/src/runtime/duff_riscv64.s
new file mode 100644
index 0000000000..f7bd3f326e
--- /dev/null
+++ b/src/runtime/duff_riscv64.s
@@ -0,0 +1,907 @@
+// Code generated by mkduff.go; DO NOT EDIT.
+// Run go generate from src/runtime to update.
+// See mkduff.go for comments.
+
+#include "textflag.h"
+
+TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ RET
+
+TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ RET
diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go
index 8859ed68cc..6ddf0256e9 100644
--- a/src/runtime/mkduff.go
+++ b/src/runtime/mkduff.go
@@ -38,6 +38,7 @@ func main() {
gen("arm64", notags, zeroARM64, copyARM64)
gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x)
gen("mips64x", tagsMIPS64x, zeroMIPS64x, copyMIPS64x)
+ gen("riscv64", notags, zeroRISCV64, copyRISCV64)
}
func gen(arch string, tags, zero, copy func(io.Writer)) {
@@ -227,3 +228,30 @@ func copyMIPS64x(w io.Writer) {
}
fmt.Fprintln(w, "\tRET")
}
+
+func zeroRISCV64(w io.Writer) {
+ // ZERO: always zero
+ // X10: ptr to memory to be zeroed
+ // X10 is updated as a side effect.
+ fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
+ for i := 0; i < 128; i++ {
+ fmt.Fprintln(w, "\tMOV\tZERO, (X10)")
+ fmt.Fprintln(w, "\tADD\t$8, X10")
+ }
+ fmt.Fprintln(w, "\tRET")
+}
+
+func copyRISCV64(w io.Writer) {
+ // X10: ptr to source memory
+ // X11: ptr to destination memory
+ // X10 and X11 are updated as a side effect
+ fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0")
+ for i := 0; i < 128; i++ {
+ fmt.Fprintln(w, "\tMOV\t(X10), X31")
+ fmt.Fprintln(w, "\tADD\t$8, X10")
+ fmt.Fprintln(w, "\tMOV\tX31, (X11)")
+ fmt.Fprintln(w, "\tADD\t$8, X11")
+ fmt.Fprintln(w)
+ }
+ fmt.Fprintln(w, "\tRET")
+}