// Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // +build ignore // The gen command generates Go code (in the parent directory) for all // the architecture-specific opcodes, blocks, and rewrites. package main import ( "bytes" "flag" "fmt" "go/format" "io/ioutil" "log" "os" "path" "regexp" "runtime" "runtime/pprof" "runtime/trace" "sort" "strings" "sync" ) // TODO: capitalize these types, so that we can more easily tell variable names // apart from type names, and avoid awkward func parameters like "arch arch". type arch struct { name string pkg string // obj package to import for this arch. genfile string // source file containing opcode code generation. ops []opData blocks []blockData regnames []string gpregmask regMask fpregmask regMask fp32regmask regMask fp64regmask regMask specialregmask regMask framepointerreg int8 linkreg int8 generic bool imports []string } type opData struct { name string reg regInfo asm string typ string // default result type aux string rematerializeable bool argLength int32 // number of arguments, if -1, then this operation has a variable number of arguments commutative bool // this operation is commutative on its first 2 arguments (e.g. addition) resultInArg0 bool // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register resultNotInArgs bool // outputs must not be allocated to the same registers as inputs clobberFlags bool // this op clobbers flags register call bool // is a function call nilCheck bool // this op is a nil check on arg0 faultOnNilArg0 bool // this op will fault if arg0 is nil (and aux encodes a small offset) faultOnNilArg1 bool // this op will fault if arg1 is nil (and aux encodes a small offset) usesScratch bool // this op requires scratch memory space hasSideEffects bool // for "reasons", not to be eliminated. E.g., atomic store, #19182. zeroWidth bool // op never translates into any machine code. example: copy, which may sometimes translate to machine code, is not zero-width. unsafePoint bool // this op is an unsafe point, i.e. not safe for async preemption symEffect string // effect this op has on symbol in aux scale uint8 // amd64/386 indexed load scale } type blockData struct { name string // the suffix for this block ("EQ", "LT", etc.) controls int // the number of control values this type of block requires aux string // the type of the Aux/AuxInt value, if any } type regInfo struct { // inputs[i] encodes the set of registers allowed for the i'th input. // Inputs that don't use registers (flags, memory, etc.) should be 0. inputs []regMask // clobbers encodes the set of registers that are overwritten by // the instruction (other than the output registers). clobbers regMask // outputs[i] encodes the set of registers allowed for the i'th output. outputs []regMask } type regMask uint64 func (a arch) regMaskComment(r regMask) string { var buf bytes.Buffer for i := uint64(0); r != 0; i++ { if r&1 != 0 { if buf.Len() == 0 { buf.WriteString(" //") } buf.WriteString(" ") buf.WriteString(a.regnames[i]) } r >>= 1 } return buf.String() } var archs []arch var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") var memprofile = flag.String("memprofile", "", "write memory profile to `file`") var tracefile = flag.String("trace", "", "write trace to `file`") func main() { flag.Parse() if *cpuprofile != "" { f, err := os.Create(*cpuprofile) if err != nil { log.Fatal("could not create CPU profile: ", err) } defer f.Close() if err := pprof.StartCPUProfile(f); err != nil { log.Fatal("could not start CPU profile: ", err) } defer pprof.StopCPUProfile() } if *tracefile != "" { f, err := os.Create(*tracefile) if err != nil { log.Fatalf("failed to create trace output file: %v", err) } defer func() { if err := f.Close(); err != nil { log.Fatalf("failed to close trace file: %v", err) } }() if err := trace.Start(f); err != nil { log.Fatalf("failed to start trace: %v", err) } defer trace.Stop() } sort.Sort(ArchsByName(archs)) // The generate tasks are run concurrently, since they are CPU-intensive // that can easily make use of many cores on a machine. // // Note that there is no limit on the concurrency at the moment. On a // four-core laptop at the time of writing, peak RSS usually reaches // ~200MiB, which seems doable by practically any machine nowadays. If // that stops being the case, we can cap this func to a fixed number of // architectures being generated at once. tasks := []func(){ genOp, } for _, a := range archs { a := a // the funcs are ran concurrently at a later time tasks = append(tasks, func() { genRules(a) genSplitLoadRules(a) }) } var wg sync.WaitGroup for _, task := range tasks { task := task wg.Add(1) go func() { task() wg.Done() }() } wg.Wait() if *memprofile != "" { f, err := os.Create(*memprofile) if err != nil { log.Fatal("could not create memory profile: ", err) } defer f.Close() runtime.GC() // get up-to-date statistics if err := pprof.WriteHeapProfile(f); err != nil { log.Fatal("could not write memory profile: ", err) } } } func genOp() { w := new(bytes.Buffer) fmt.Fprintf(w, "// Code generated from gen/*Ops.go; DO NOT EDIT.\n") fmt.Fprintln(w) fmt.Fprintln(w, "package ssa") fmt.Fprintln(w, "import (") fmt.Fprintln(w, "\"cmd/internal/obj\"") for _, a := range archs { if a.pkg != "" { fmt.Fprintf(w, "%q\n", a.pkg) } } fmt.Fprintln(w, ")") // generate Block* declarations fmt.Fprintln(w, "const (") fmt.Fprintln(w, "BlockInvalid BlockKind = iota") for _, a := range archs { fmt.Fprintln(w) for _, d := range a.blocks { fmt.Fprintf(w, "Block%s%s\n", a.Name(), d.name) } } fmt.Fprintln(w, ")") // generate block kind string method fmt.Fprintln(w, "var blockString = [...]string{") fmt.Fprintln(w, "BlockInvalid:\"BlockInvalid\",") for _, a := range archs { fmt.Fprintln(w) for _, b := range a.blocks { fmt.Fprintf(w, "Block%s%s:\"%s\",\n", a.Name(), b.name, b.name) } } fmt.Fprintln(w, "}") fmt.Fprintln(w, "func (k BlockKind) String() string {return blockString[k]}") // generate block kind auxint method fmt.Fprintln(w, "func (k BlockKind) AuxIntType() string {") fmt.Fprintln(w, "switch k {") for _, a := range archs { for _, b := range a.blocks { if b.auxIntType() == "invalid" { continue } fmt.Fprintf(w, "case Block%s%s: return \"%s\"\n", a.Name(), b.name, b.auxIntType()) } } fmt.Fprintln(w, "}") fmt.Fprintln(w, "return \"\"") fmt.Fprintln(w, "}") // generate Op* declarations fmt.Fprintln(w, "const (") fmt.Fprintln(w, "OpInvalid Op = iota") // make sure OpInvalid is 0. for _, a := range archs { fmt.Fprintln(w) for _, v := range a.ops { if v.name == "Invalid" { continue } fmt.Fprintf(w, "Op%s%s\n", a.Name(), v.name) } } fmt.Fprintln(w, ")") // generate OpInfo table fmt.Fprintln(w, "var opcodeTable = [...]opInfo{") fmt.Fprintln(w, " { name: \"OpInvalid\" },") for _, a := range archs { fmt.Fprintln(w) pkg := path.Base(a.pkg) for _, v := range a.ops { if v.name == "Invalid" { continue } fmt.Fprintln(w, "{") fmt.Fprintf(w, "name:\"%s\",\n", v.name) // flags if v.aux != "" { fmt.Fprintf(w, "auxType: aux%s,\n", v.aux) } fmt.Fprintf(w, "argLen: %d,\n", v.argLength) if v.rematerializeable { if v.reg.clobbers != 0 { log.Fatalf("%s is rematerializeable and clobbers registers", v.name) } if v.clobberFlags { log.Fatalf("%s is rematerializeable and clobbers flags", v.name) } fmt.Fprintln(w, "rematerializeable: true,") } if v.commutative { fmt.Fprintln(w, "commutative: true,") } if v.resultInArg0 { fmt.Fprintln(w, "resultInArg0: true,") // OpConvert's register mask is selected dynamically, // so don't try to check it in the static table. if v.name != "Convert" && v.reg.inputs[0] != v.reg.outputs[0] { log.Fatalf("%s: input[0] and output[0] must use the same registers for %s", a.name, v.name) } if v.name != "Convert" && v.commutative && v.reg.inputs[1] != v.reg.outputs[0] { log.Fatalf("%s: input[1] and output[0] must use the same registers for %s", a.name, v.name) } } if v.resultNotInArgs { fmt.Fprintln(w, "resultNotInArgs: true,") } if v.clobberFlags { fmt.Fprintln(w, "clobberFlags: true,") } if v.call { fmt.Fprintln(w, "call: true,") } if v.nilCheck { fmt.Fprintln(w, "nilCheck: true,") } if v.faultOnNilArg0 { fmt.Fprintln(w, "faultOnNilArg0: true,") if v.aux != "Sym" && v.aux != "SymOff" && v.aux != "SymValAndOff" && v.aux != "Int64" && v.aux != "Int32" && v.aux != "" { log.Fatalf("faultOnNilArg0 with aux %s not allowed", v.aux) } } if v.faultOnNilArg1 { fmt.Fprintln(w, "faultOnNilArg1: true,") if v.aux != "Sym" && v.aux != "SymOff" && v.aux != "SymValAndOff" && v.aux != "Int64" && v.aux != "Int32" && v.aux != "" { log.Fatalf("faultOnNilArg1 with aux %s not allowed", v.aux) } } if v.usesScratch { fmt.Fprintln(w, "usesScratch: true,") } if v.hasSideEffects { fmt.Fprintln(w, "hasSideEffects: true,") } if v.zeroWidth { fmt.Fprintln(w, "zeroWidth: true,") } if v.unsafePoint { fmt.Fprintln(w, "unsafePoint: true,") } needEffect := strings.HasPrefix(v.aux, "Sym") if v.symEffect != "" { if !needEffect { log.Fatalf("symEffect with aux %s not allowed", v.aux) } fmt.Fprintf(w, "symEffect: Sym%s,\n", strings.Replace(v.symEffect, ",", "|Sym", -1)) } else if needEffect { log.Fatalf("symEffect needed for aux %s", v.aux) } if a.name == "generic" { fmt.Fprintln(w, "generic:true,") fmt.Fprintln(w, "},") // close op // generic ops have no reg info or asm continue } if v.asm != "" { fmt.Fprintf(w, "asm: %s.A%s,\n", pkg, v.asm) } if v.scale != 0 { fmt.Fprintf(w, "scale: %d,\n", v.scale) } fmt.Fprintln(w, "reg:regInfo{") // Compute input allocation order. We allocate from the // most to the least constrained input. This order guarantees // that we will always be able to find a register. var s []intPair for i, r := range v.reg.inputs { if r != 0 { s = append(s, intPair{countRegs(r), i}) } } if len(s) > 0 { sort.Sort(byKey(s)) fmt.Fprintln(w, "inputs: []inputInfo{") for _, p := range s { r := v.reg.inputs[p.val] fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r)) } fmt.Fprintln(w, "},") } if v.reg.clobbers > 0 { fmt.Fprintf(w, "clobbers: %d,%s\n", v.reg.clobbers, a.regMaskComment(v.reg.clobbers)) } // reg outputs s = s[:0] for i, r := range v.reg.outputs { s = append(s, intPair{countRegs(r), i}) } if len(s) > 0 { sort.Sort(byKey(s)) fmt.Fprintln(w, "outputs: []outputInfo{") for _, p := range s { r := v.reg.outputs[p.val] fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r)) } fmt.Fprintln(w, "},") } fmt.Fprintln(w, "},") // close reg info fmt.Fprintln(w, "},") // close op } } fmt.Fprintln(w, "}") fmt.Fprintln(w, "func (o Op) Asm() obj.As {return opcodeTable[o].asm}") fmt.Fprintln(w, "func (o Op) Scale() int16 {return int16(opcodeTable[o].scale)}") // generate op string method fmt.Fprintln(w, "func (o Op) String() string {return opcodeTable[o].name }") fmt.Fprintln(w, "func (o Op) UsesScratch() bool { return opcodeTable[o].usesScratch }") fmt.Fprintln(w, "func (o Op) SymEffect() SymEffect { return opcodeTable[o].symEffect }") fmt.Fprintln(w, "func (o Op) IsCall() bool { return opcodeTable[o].call }") fmt.Fprintln(w, "func (o Op) HasSideEffects() bool { return opcodeTable[o].hasSideEffects }") fmt.Fprintln(w, "func (o Op) UnsafePoint() bool { return opcodeTable[o].unsafePoint }") // generate registers for _, a := range archs { if a.generic { continue } fmt.Fprintf(w, "var registers%s = [...]Register {\n", a.name) var gcRegN int for i, r := range a.regnames { pkg := a.pkg[len("cmd/internal/obj/"):] var objname string // name in cmd/internal/obj/$ARCH switch r { case "SB": // SB isn't a real register. cmd/internal/obj expects 0 in this case. objname = "0" case "SP": objname = pkg + ".REGSP" case "g": objname = pkg + ".REGG" default: objname = pkg + ".REG_" + r } // Assign a GC register map index to registers // that may contain pointers. gcRegIdx := -1 if a.gpregmask&(1< 32 { // Won't fit in a uint32 mask. log.Fatalf("too many GC registers (%d > 32) on %s", gcRegN, a.name) } fmt.Fprintln(w, "}") fmt.Fprintf(w, "var gpRegMask%s = regMask(%d)\n", a.name, a.gpregmask) fmt.Fprintf(w, "var fpRegMask%s = regMask(%d)\n", a.name, a.fpregmask) if a.fp32regmask != 0 { fmt.Fprintf(w, "var fp32RegMask%s = regMask(%d)\n", a.name, a.fp32regmask) } if a.fp64regmask != 0 { fmt.Fprintf(w, "var fp64RegMask%s = regMask(%d)\n", a.name, a.fp64regmask) } fmt.Fprintf(w, "var specialRegMask%s = regMask(%d)\n", a.name, a.specialregmask) fmt.Fprintf(w, "var framepointerReg%s = int8(%d)\n", a.name, a.framepointerreg) fmt.Fprintf(w, "var linkReg%s = int8(%d)\n", a.name, a.linkreg) } // gofmt result b := w.Bytes() var err error b, err = format.Source(b) if err != nil { fmt.Printf("%s\n", w.Bytes()) panic(err) } if err := ioutil.WriteFile("../opGen.go", b, 0666); err != nil { log.Fatalf("can't write output: %v\n", err) } // Check that the arch genfile handles all the arch-specific opcodes. // This is very much a hack, but it is better than nothing. // // Do a single regexp pass to record all ops being handled in a map, and // then compare that with the ops list. This is much faster than one // regexp pass per opcode. for _, a := range archs { if a.genfile == "" { continue } pattern := fmt.Sprintf(`\Wssa\.Op%s([a-zA-Z0-9_]+)\W`, a.name) rxOp, err := regexp.Compile(pattern) if err != nil { log.Fatalf("bad opcode regexp %s: %v", pattern, err) } src, err := ioutil.ReadFile(a.genfile) if err != nil { log.Fatalf("can't read %s: %v", a.genfile, err) } seen := make(map[string]bool, len(a.ops)) for _, m := range rxOp.FindAllSubmatch(src, -1) { seen[string(m[1])] = true } for _, op := range a.ops { if !seen[op.name] { log.Fatalf("Op%s%s has no code generation in %s", a.name, op.name, a.genfile) } } } } // Name returns the name of the architecture for use in Op* and Block* enumerations. func (a arch) Name() string { s := a.name if s == "generic" { s = "" } return s } // countRegs returns the number of set bits in the register mask. func countRegs(r regMask) int { n := 0 for r != 0 { n += int(r & 1) r >>= 1 } return n } // for sorting a pair of integers by key type intPair struct { key, val int } type byKey []intPair func (a byKey) Len() int { return len(a) } func (a byKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a byKey) Less(i, j int) bool { return a[i].key < a[j].key } type ArchsByName []arch func (x ArchsByName) Len() int { return len(x) } func (x ArchsByName) Swap(i, j int) { x[i], x[j] = x[j], x[i] } func (x ArchsByName) Less(i, j int) bool { return x[i].name < x[j].name }