diff options
-rw-r--r-- | src/cmd/compile/internal/riscv64/ggen.go | 10 | ||||
-rw-r--r-- | src/cmd/compile/internal/riscv64/ssa.go | 14 | ||||
-rw-r--r-- | src/cmd/compile/internal/ssa/gen/RISCV64.rules | 14 | ||||
-rw-r--r-- | src/cmd/compile/internal/ssa/gen/RISCV64Ops.go | 38 | ||||
-rw-r--r-- | src/cmd/compile/internal/ssa/opGen.go | 28 | ||||
-rw-r--r-- | src/cmd/compile/internal/ssa/rewriteRISCV64.go | 33 | ||||
-rw-r--r-- | src/cmd/internal/obj/riscv/obj.go | 8 | ||||
-rw-r--r-- | src/runtime/duff_riscv64.s | 907 | ||||
-rw-r--r-- | src/runtime/mkduff.go | 28 |
9 files changed, 1076 insertions, 4 deletions
diff --git a/src/cmd/compile/internal/riscv64/ggen.go b/src/cmd/compile/internal/riscv64/ggen.go index be31fad441..f7c03fe7c2 100644 --- a/src/cmd/compile/internal/riscv64/ggen.go +++ b/src/cmd/compile/internal/riscv64/ggen.go @@ -25,7 +25,15 @@ func zeroRange(pp *gc.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog { return p } - // TODO(jsing): Add a duff zero implementation for medium sized ranges. + if cnt <= int64(128*gc.Widthptr) { + p = pp.Appendpp(p, riscv.AADDI, obj.TYPE_CONST, 0, off, obj.TYPE_REG, riscv.REG_A0, 0) + p.Reg = riscv.REG_SP + p = pp.Appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) + p.To.Name = obj.NAME_EXTERN + p.To.Sym = gc.Duffzero + p.To.Offset = 8 * (128 - cnt/int64(gc.Widthptr)) + return p + } // Loop, zeroing pointer width bytes at a time. // ADD $(off), SP, T0 diff --git a/src/cmd/compile/internal/riscv64/ssa.go b/src/cmd/compile/internal/riscv64/ssa.go index 064a1ca111..0beb5b4bd1 100644 --- a/src/cmd/compile/internal/riscv64/ssa.go +++ b/src/cmd/compile/internal/riscv64/ssa.go @@ -608,6 +608,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() + case ssa.OpRISCV64DUFFZERO: + p := s.Prog(obj.ADUFFZERO) + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_EXTERN + p.To.Sym = gc.Duffzero + p.To.Offset = v.AuxInt + + case ssa.OpRISCV64DUFFCOPY: + p := s.Prog(obj.ADUFFCOPY) + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_EXTERN + p.To.Sym = gc.Duffcopy + p.To.Offset = v.AuxInt + default: v.Fatalf("Unhandled op %v", v.Op) } diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64.rules b/src/cmd/compile/internal/ssa/gen/RISCV64.rules index 3bc2e8498a..325cbeb825 100644 --- a/src/cmd/compile/internal/ssa/gen/RISCV64.rules +++ b/src/cmd/compile/internal/ssa/gen/RISCV64.rules @@ -360,6 +360,13 @@ (Zero [4] ptr mem) => (MOVWstore ptr (MOVWconst) mem) (Zero [8] ptr mem) => (MOVDstore ptr (MOVDconst) mem) +// Medium zeroing uses a Duff's device +// 8 and 128 are magic constants, see runtime/mkduff.go +(Zero [s] {t} ptr mem) + && s%8 == 0 && s >= 16 && s <= 8*128 + && t.Alignment()%8 == 0 && !config.noDuffDevice => + (DUFFZERO [8 * (128 - s/8)] ptr mem) + // Generic zeroing uses a loop (Zero [s] {t} ptr mem) => (LoweredZero [t.Alignment()] @@ -395,6 +402,13 @@ (Move [4] dst src mem) => (MOVWstore dst (MOVWload src mem) mem) (Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem) +// Medium move uses a Duff's device +// 16 and 128 are magic constants, see runtime/mkduff.go +(Move [s] {t} dst src mem) + && s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 + && !config.noDuffDevice && logLargeCopy(v, s) => + (DUFFCOPY [16 * (128 - s/8)] dst src mem) + // Generic move uses a loop (Move [s] {t} dst src mem) && (s <= 16 || logLargeCopy(v, s)) => (LoweredMove [t.Alignment()] diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go index ebd515b7fc..f64319230b 100644 --- a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go @@ -240,6 +240,44 @@ func init() { {name: "CALLclosure", argLength: 3, reg: callClosure, aux: "CallOff", call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem {name: "CALLinter", argLength: 2, reg: callInter, aux: "CallOff", call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem + // duffzero + // arg0 = address of memory to zero (in X10, changed as side effect) + // arg1 = mem + // auxint = offset into duffzero code to start executing + // X1 (link register) changed because of function call + // returns mem + { + name: "DUFFZERO", + aux: "Int64", + argLength: 2, + reg: regInfo{ + inputs: []regMask{regNamed["X10"]}, + clobbers: regNamed["X1"] | regNamed["X10"], + }, + typ: "Mem", + faultOnNilArg0: true, + }, + + // duffcopy + // arg0 = address of dst memory (in X11, changed as side effect) + // arg1 = address of src memory (in X10, changed as side effect) + // arg2 = mem + // auxint = offset into duffcopy code to start executing + // X1 (link register) changed because of function call + // returns mem + { + name: "DUFFCOPY", + aux: "Int64", + argLength: 3, + reg: regInfo{ + inputs: []regMask{regNamed["X11"], regNamed["X10"]}, + clobbers: regNamed["X1"] | regNamed["X10"] | regNamed["X11"], + }, + typ: "Mem", + faultOnNilArg0: true, + faultOnNilArg1: true, + }, + // Generic moves and zeros // general unaligned zeroing diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index bb1cbc0baa..96aa3adedd 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -2111,6 +2111,8 @@ const ( OpRISCV64CALLstatic OpRISCV64CALLclosure OpRISCV64CALLinter + OpRISCV64DUFFZERO + OpRISCV64DUFFCOPY OpRISCV64LoweredZero OpRISCV64LoweredMove OpRISCV64LoweredAtomicLoad8 @@ -28163,6 +28165,32 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "DUFFZERO", + auxType: auxInt64, + argLen: 2, + faultOnNilArg0: true, + reg: regInfo{ + inputs: []inputInfo{ + {0, 512}, // X10 + }, + clobbers: 512, // X10 + }, + }, + { + name: "DUFFCOPY", + auxType: auxInt64, + argLen: 3, + faultOnNilArg0: true, + faultOnNilArg1: true, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1024}, // X11 + {1, 512}, // X10 + }, + clobbers: 1536, // X10 X11 + }, + }, + { name: "LoweredZero", auxType: auxInt64, argLen: 3, diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go index ac92945753..c337ffbfe7 100644 --- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go +++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go @@ -2017,6 +2017,23 @@ func rewriteValueRISCV64_OpMove(v *Value) bool { return true } // match: (Move [s] {t} dst src mem) + // cond: s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s) + // result: (DUFFCOPY [16 * (128 - s/8)] dst src mem) + for { + s := auxIntToInt64(v.AuxInt) + t := auxToType(v.Aux) + dst := v_0 + src := v_1 + mem := v_2 + if !(s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s)) { + break + } + v.reset(OpRISCV64DUFFCOPY) + v.AuxInt = int64ToAuxInt(16 * (128 - s/8)) + v.AddArg3(dst, src, mem) + return true + } + // match: (Move [s] {t} dst src mem) // cond: (s <= 16 || logLargeCopy(v, s)) // result: (LoweredMove [t.Alignment()] dst src (ADDI <src.Type> [s-moveSize(t.Alignment(), config)] src) mem) for { @@ -5650,6 +5667,22 @@ func rewriteValueRISCV64_OpZero(v *Value) bool { return true } // match: (Zero [s] {t} ptr mem) + // cond: s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice + // result: (DUFFZERO [8 * (128 - s/8)] ptr mem) + for { + s := auxIntToInt64(v.AuxInt) + t := auxToType(v.Aux) + ptr := v_0 + mem := v_1 + if !(s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice) { + break + } + v.reset(OpRISCV64DUFFZERO) + v.AuxInt = int64ToAuxInt(8 * (128 - s/8)) + v.AddArg2(ptr, mem) + return true + } + // match: (Zero [s] {t} ptr mem) // result: (LoweredZero [t.Alignment()] ptr (ADD <ptr.Type> ptr (MOVDconst [s-moveSize(t.Alignment(), config)])) mem) for { s := auxIntToInt64(v.AuxInt) diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index da49f67138..5301e44002 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -33,7 +33,7 @@ func buildop(ctxt *obj.Link) {} // lr is the link register to use for the JALR. // p must be a CALL, JMP or RET. func jalrToSym(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc, lr int16) *obj.Prog { - if p.As != obj.ACALL && p.As != obj.AJMP && p.As != obj.ARET { + if p.As != obj.ACALL && p.As != obj.AJMP && p.As != obj.ARET && p.As != obj.ADUFFZERO && p.As != obj.ADUFFCOPY { ctxt.Diag("unexpected Prog in jalrToSym: %v", p) return p } @@ -417,7 +417,7 @@ func containsCall(sym *obj.LSym) bool { // CALLs are CALL or JAL(R) with link register LR. for p := sym.Func().Text; p != nil; p = p.Link { switch p.As { - case obj.ACALL: + case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY: return true case AJAL, AJALR: if p.From.Type == obj.TYPE_REG && p.From.Reg == REG_LR { @@ -656,7 +656,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.From.Reg = REG_SP } - case obj.ACALL: + case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY: switch p.To.Type { case obj.TYPE_MEM: jalrToSym(ctxt, p, newprog, REG_LR) @@ -1696,6 +1696,8 @@ var encodings = [ALAST & obj.AMask]encoding{ obj.APCDATA: pseudoOpEncoding, obj.ATEXT: pseudoOpEncoding, obj.ANOP: pseudoOpEncoding, + obj.ADUFFZERO: pseudoOpEncoding, + obj.ADUFFCOPY: pseudoOpEncoding, } // encodingForAs returns the encoding for an obj.As. diff --git a/src/runtime/duff_riscv64.s b/src/runtime/duff_riscv64.s new file mode 100644 index 0000000000..f7bd3f326e --- /dev/null +++ b/src/runtime/duff_riscv64.s @@ -0,0 +1,907 @@ +// Code generated by mkduff.go; DO NOT EDIT. +// Run go generate from src/runtime to update. +// See mkduff.go for comments. + +#include "textflag.h" + +TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + RET + +TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0 + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + RET diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go index 8859ed68cc..6ddf0256e9 100644 --- a/src/runtime/mkduff.go +++ b/src/runtime/mkduff.go @@ -38,6 +38,7 @@ func main() { gen("arm64", notags, zeroARM64, copyARM64) gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x) gen("mips64x", tagsMIPS64x, zeroMIPS64x, copyMIPS64x) + gen("riscv64", notags, zeroRISCV64, copyRISCV64) } func gen(arch string, tags, zero, copy func(io.Writer)) { @@ -227,3 +228,30 @@ func copyMIPS64x(w io.Writer) { } fmt.Fprintln(w, "\tRET") } + +func zeroRISCV64(w io.Writer) { + // ZERO: always zero + // X10: ptr to memory to be zeroed + // X10 is updated as a side effect. + fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") + for i := 0; i < 128; i++ { + fmt.Fprintln(w, "\tMOV\tZERO, (X10)") + fmt.Fprintln(w, "\tADD\t$8, X10") + } + fmt.Fprintln(w, "\tRET") +} + +func copyRISCV64(w io.Writer) { + // X10: ptr to source memory + // X11: ptr to destination memory + // X10 and X11 are updated as a side effect + fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") + for i := 0; i < 128; i++ { + fmt.Fprintln(w, "\tMOV\t(X10), X31") + fmt.Fprintln(w, "\tADD\t$8, X10") + fmt.Fprintln(w, "\tMOV\tX31, (X11)") + fmt.Fprintln(w, "\tADD\t$8, X11") + fmt.Fprintln(w) + } + fmt.Fprintln(w, "\tRET") +} |