diff options
author | Matthew Dempsky <mdempsky@google.com> | 2021-01-25 17:53:08 -0800 |
---|---|---|
committer | Matthew Dempsky <mdempsky@google.com> | 2021-01-25 17:53:50 -0800 |
commit | 34704e374f08ea126786b7d454fc9b647663f95a (patch) | |
tree | ca17b071c0b292023c5914fabd5de6576bf47ee0 /src/cmd | |
parent | c97af0036b8cd8ab2a7ed3f68c3ba72968637e4d (diff) | |
parent | 5e4a0cdde3672b9c774564c428058858d09795ea (diff) | |
download | go-34704e374f08ea126786b7d454fc9b647663f95a.tar.gz go-34704e374f08ea126786b7d454fc9b647663f95a.zip |
[dev.typeparams] all: merge dev.regabi (5e4a0cd) into dev.typeparams
Merge List:
+ 2021-01-25 5e4a0cdde3 [dev.regabi] all: merge master (bf0f7c9) into dev.regabi
+ 2021-01-25 bf0f7c9d78 doc/go1.16: mention os.DirFS in os section
+ 2021-01-25 deaf29a8a8 cmd/compile: fix order-of-assignment issue w/ defers
+ 2021-01-25 ad2ca26a52 doc/go1.16: mention os.DirEntry and types moved from os to io/fs
+ 2021-01-25 a51921fa5b doc/go1.16: mention new testing/iotest functions
+ 2021-01-25 e6b6d107f7 doc/go1.16: mention deprecation of io/ioutil
+ 2021-01-25 7eaaf28cae [dev.regabi] cmd/compile: disallow taking address of SSA'd values
+ 2021-01-25 96a276363b doc/go1.16: mention go/build changes
+ 2021-01-25 3d85c69a0b html/template: revert "avoid race when escaping updates template"
+ 2021-01-25 54514c6b28 cmd/go: fix TestScript/cgo_path, cgo_path_space when CC set
+ 2021-01-25 6f5e79f470 [dev.regabi] cmd/compile/internal: specify memory layout
+ 2021-01-25 cabffc199d [dev.regabi] cmd/compile/internal: add internal ABI specification
+ 2021-01-25 6de8443f3b doc/asm: add a section on go_asm.h, clean up go_tls.h section
+ 2021-01-25 6a4739ccc5 [dev.regabi] cmd/compile: enable rational constant arithmetic
+ 2021-01-25 be9612a832 [dev.regabi] os: disable TestDirFS until #42637 is fixed
+ 2021-01-25 8ee3d39838 [dev.regabi] cmd/go: workaround -race issue on ppc64le
+ 2021-01-25 54b251f542 lib/time, time/tzdata: update tzdata to 2021a
+ 2021-01-25 5a76c3d548 [dev.regabi] cmd/compile: modify abiutils for recently updated ABI
+ 2021-01-25 ff82cc971a os: force consistent mtime before running fstest on directory on Windows
+ 2021-01-25 044f937a73 doc/go1.16: fix WalkDir and Walk links
+ 2021-01-23 b634f5d97a doc/go1.16: add crypto/x509 memory optimization
+ 2021-01-23 9897655c61 doc/go1.16: reword ambiguously parsable sentence
+ 2021-01-23 cd99385ff4 cmd/internal/obj/arm64: fix VMOVQ instruction encoding error
+ 2021-01-23 66ee8b158f runtime: restore cgo_import_dynamic for libc.so on openbsd
+ 2021-01-22 25c39e4fb5 io/ioutil: fix example test for WriteFile to allow it to run in the playground
+ 2021-01-22 eb21b31e48 runtime: define dummy msanmove
+ 2021-01-22 3a778ff50f runtime: check for g0 stack last in signal handler
+ 2021-01-22 a2cef9b544 cmd/go: don't lookup the path for CC when invoking cgo
Change-Id: Iede4f98ba5ddbee2e16075d20186f8a9c095e378
Diffstat (limited to 'src/cmd')
-rw-r--r-- | src/cmd/compile/internal-abi.md | 628 | ||||
-rw-r--r-- | src/cmd/compile/internal/abi/abiutils.go | 146 | ||||
-rw-r--r-- | src/cmd/compile/internal/noder/noder.go | 8 | ||||
-rw-r--r-- | src/cmd/compile/internal/ssagen/ssa.go | 5 | ||||
-rw-r--r-- | src/cmd/compile/internal/test/abiutils_test.go | 214 | ||||
-rw-r--r-- | src/cmd/compile/internal/test/abiutilsaux_test.go | 18 | ||||
-rw-r--r-- | src/cmd/compile/internal/typecheck/iexport.go | 51 | ||||
-rw-r--r-- | src/cmd/compile/internal/typecheck/iimport.go | 27 | ||||
-rw-r--r-- | src/cmd/compile/internal/walk/assign.go | 40 | ||||
-rw-r--r-- | src/cmd/go/internal/work/action.go | 3 | ||||
-rw-r--r-- | src/cmd/go/internal/work/exec.go | 27 | ||||
-rw-r--r-- | src/cmd/go/internal/work/gc.go | 6 | ||||
-rw-r--r-- | src/cmd/go/testdata/script/cgo_path.txt | 12 | ||||
-rw-r--r-- | src/cmd/go/testdata/script/cgo_path_space.txt | 56 | ||||
-rw-r--r-- | src/cmd/internal/obj/arm64/asm7.go | 38 | ||||
-rw-r--r-- | src/cmd/internal/obj/arm64/asm_arm64_test.go (renamed from src/cmd/internal/obj/arm64/asm_test.go) | 18 | ||||
-rw-r--r-- | src/cmd/internal/obj/arm64/asm_arm64_test.s | 14 |
17 files changed, 1120 insertions, 191 deletions
diff --git a/src/cmd/compile/internal-abi.md b/src/cmd/compile/internal-abi.md new file mode 100644 index 0000000000..f4ef2cc869 --- /dev/null +++ b/src/cmd/compile/internal-abi.md @@ -0,0 +1,628 @@ +# Go internal ABI specification + +This document describes Go’s internal application binary interface +(ABI), known as ABIInternal. +Go's ABI defines the layout of data in memory and the conventions for +calling between Go functions. +This ABI is *unstable* and will change between Go versions. +If you’re writing assembly code, please instead refer to Go’s +[assembly documentation](/doc/asm.html), which describes Go’s stable +ABI, known as ABI0. + +All functions defined in Go source follow ABIInternal. +However, ABIInternal and ABI0 functions are able to call each other +through transparent *ABI wrappers*, described in the [internal calling +convention proposal](https://golang.org/design/27539-internal-abi). + +Go uses a common ABI design across all architectures. +We first describe the common ABI, and then cover per-architecture +specifics. + +*Rationale*: For the reasoning behind using a common ABI across +architectures instead of the platform ABI, see the [register-based Go +calling convention proposal](https://golang.org/design/40724-register-calling). + +## Memory layout + +Go's built-in types have the following sizes and alignments. +Many, though not all, of these sizes are guaranteed by the [language +specification](/doc/go_spec.html#Size_and_alignment_guarantees). +Those that aren't guaranteed may change in future versions of Go (for +example, we've considered changing the alignment of int64 on 32-bit). + +| Type | 64-bit | | 32-bit | | +| --- | --- | --- | --- | --- | +| | Size | Align | Size | Align | +| bool, uint8, int8 | 1 | 1 | 1 | 1 | +| uint16, int16 | 2 | 2 | 2 | 2 | +| uint32, int32 | 4 | 4 | 4 | 4 | +| uint64, int64 | 8 | 8 | 8 | 4 | +| int, uint | 8 | 8 | 4 | 4 | +| float32 | 4 | 4 | 4 | 4 | +| float64 | 8 | 8 | 8 | 4 | +| complex64 | 8 | 4 | 8 | 4 | +| complex128 | 16 | 8 | 16 | 4 | +| uintptr, *T, unsafe.Pointer | 8 | 8 | 4 | 4 | + +The types `byte` and `rune` are aliases for `uint8` and `int32`, +respectively, and hence have the same size and alignment as these +types. + +The layout of `map`, `chan`, and `func` types is equivalent to *T. + +To describe the layout of the remaining composite types, we first +define the layout of a *sequence* S of N fields with types +t<sub>1</sub>, t<sub>2</sub>, ..., t<sub>N</sub>. +We define the byte offset at which each field begins relative to a +base address of 0, as well as the size and alignment of the sequence +as follows: + +``` +offset(S, i) = 0 if i = 1 + = align(offset(S, i-1) + sizeof(t_(i-1)), alignof(t_i)) +alignof(S) = 1 if N = 0 + = max(alignof(t_i) | 1 <= i <= N) +sizeof(S) = 0 if N = 0 + = align(offset(S, N) + sizeof(t_N), alignof(S)) +``` + +Where sizeof(T) and alignof(T) are the size and alignment of type T, +respectively, and align(x, y) rounds x up to a multiple of y. + +The `interface{}` type is a sequence of 1. a pointer to the runtime type +description for the interface's dynamic type and 2. an `unsafe.Pointer` +data field. +Any other interface type (besides the empty interface) is a sequence +of 1. a pointer to the runtime "itab" that gives the method pointers and +the type of the data field and 2. an `unsafe.Pointer` data field. +An interface can be "direct" or "indirect" depending on the dynamic +type: a direct interface stores the value directly in the data field, +and an indirect interface stores a pointer to the value in the data +field. +An interface can only be direct if the value consists of a single +pointer word. + +An array type `[N]T` is a sequence of N fields of type T. + +The slice type `[]T` is a sequence of a `*[cap]T` pointer to the slice +backing store, an `int` giving the `len` of the slice, and an `int` +giving the `cap` of the slice. + +The `string` type is a sequence of a `*[len]byte` pointer to the +string backing store, and an `int` giving the `len` of the string. + +A struct type `struct { f1 t1; ...; fM tM }` is laid out as the +sequence t1, ..., tM, tP, where tP is either: + +- Type `byte` if sizeof(tM) = 0 and any of sizeof(t*i*) ≠ 0. +- Empty (size 0 and align 1) otherwise. + +The padding byte prevents creating a past-the-end pointer by taking +the address of the final, empty fN field. + +Note that user-written assembly code should generally not depend on Go +type layout and should instead use the constants defined in +[`go_asm.h`](/doc/asm.html#data-offsets). + +## Function call argument and result passing + +Function calls pass arguments and results using a combination of the +stack and machine registers. +Each argument or result is passed either entirely in registers or +entirely on the stack. +Because access to registers is generally faster than access to the +stack, arguments and results are preferentially passed in registers. +However, any argument or result that contains a non-trivial array or +does not fit entirely in the remaining available registers is passed +on the stack. + +Each architecture defines a sequence of integer registers and a +sequence of floating-point registers. +At a high level, arguments and results are recursively broken down +into values of base types and these base values are assigned to +registers from these sequences. + +Arguments and results can share the same registers, but do not share +the same stack space. +Beyond the arguments and results passed on the stack, the caller also +reserves spill space on the stack for all register-based arguments +(but does not populate this space). + +The receiver, arguments, and results of function or method F are +assigned to registers or the stack using the following algorithm: + +1. Let NI and NFP be the length of integer and floating-point register + sequences defined by the architecture. + Let I and FP be 0; these are the indexes of the next integer and + floating-pointer register. + Let S, the type sequence defining the stack frame, be empty. +1. If F is a method, assign F’s receiver. +1. For each argument A of F, assign A. +1. Add a pointer-alignment field to S. This has size 0 and the same + alignment as `uintptr`. +1. Reset I and FP to 0. +1. For each result R of F, assign R. +1. Add a pointer-alignment field to S. +1. For each register-assigned receiver and argument of F, let T be its + type and add T to the stack sequence S. + This is the argument's (or receiver's) spill space and will be + uninitialized at the call. +1. Add a pointer-alignment field to S. + +Assigning a receiver, argument, or result V of underlying type T works +as follows: + +1. Remember I and FP. +1. Try to register-assign V. +1. If step 2 failed, reset I and FP to the values from step 1, add T + to the stack sequence S, and assign V to this field in S. + +Register-assignment of a value V of underlying type T works as follows: + +1. If T is a boolean or integral type that fits in an integer + register, assign V to register I and increment I. +1. If T is an integral type that fits in two integer registers, assign + the least significant and most significant halves of V to registers + I and I+1, respectively, and increment I by 2 +1. If T is a floating-point type and can be represented without loss + of precision in a floating-point register, assign V to register FP + and increment FP. +1. If T is a complex type, recursively register-assign its real and + imaginary parts. +1. If T is a pointer type, map type, channel type, or function type, + assign V to register I and increment I. +1. If T is a string type, interface type, or slice type, recursively + register-assign V’s components (2 for strings and interfaces, 3 for + slices). +1. If T is a struct type, recursively register-assign each field of V. +1. If T is an array type of length 0, do nothing. +1. If T is an array type of length 1, recursively register-assign its + one element. +1. If T is an array type of length > 1, fail. +1. If I > NI or FP > NFP, fail. +1. If any recursive assignment above fails, fail. + +The above algorithm produces an assignment of each receiver, argument, +and result to registers or to a field in the stack sequence. +The final stack sequence looks like: stack-assigned receiver, +stack-assigned arguments, pointer-alignment, stack-assigned results, +pointer-alignment, spill space for each register-assigned argument, +pointer-alignment. +The following diagram shows what this stack frame looks like on the +stack, using the typical convention where address 0 is at the bottom: + + +------------------------------+ + | . . . | + | 2nd reg argument spill space | + | 1st reg argument spill space | + | <pointer-sized alignment> | + | . . . | + | 2nd stack-assigned result | + | 1st stack-assigned result | + | <pointer-sized alignment> | + | . . . | + | 2nd stack-assigned argument | + | 1st stack-assigned argument | + | stack-assigned receiver | + +------------------------------+ ↓ lower addresses + +To perform a call, the caller reserves space starting at the lowest +address in its stack frame for the call stack frame, stores arguments +in the registers and argument stack fields determined by the above +algorithm, and performs the call. +At the time of a call, spill space, result stack fields, and result +registers are left uninitialized. +Upon return, the callee must have stored results to all result +registers and result stack fields determined by the above algorithm. + +There are no callee-save registers, so a call may overwrite any +register that doesn’t have a fixed meaning, including argument +registers. + +### Example + +Consider the function `func f(a1 uint8, a2 [2]uintptr, a3 uint8) (r1 +struct { x uintptr; y [2]uintptr }, r2 string)` on a 64-bit +architecture with hypothetical integer registers R0–R9. + +On entry, `a1` is assigned to `R0`, `a3` is assigned to `R1` and the +stack frame is laid out in the following sequence: + + a2 [2]uintptr + r1.x uintptr + r1.y [2]uintptr + a1Spill uint8 + a2Spill uint8 + _ [6]uint8 // alignment padding + +In the stack frame, only the `a2` field is initialized on entry; the +rest of the frame is left uninitialized. + +On exit, `r2.base` is assigned to `R0`, `r2.len` is assigned to `R1`, +and `r1.x` and `r1.y` are initialized in the stack frame. + +There are several things to note in this example. +First, `a2` and `r1` are stack-assigned because they contain arrays. +The other arguments and results are register-assigned. +Result `r2` is decomposed into its components, which are individually +register-assigned. +On the stack, the stack-assigned arguments appear at lower addresses +than the stack-assigned results, which appear at lower addresses than +the argument spill area. +Only arguments, not results, are assigned a spill area on the stack. + +### Rationale + +Each base value is assigned to its own register to optimize +construction and access. +An alternative would be to pack multiple sub-word values into +registers, or to simply map an argument's in-memory layout to +registers (this is common in C ABIs), but this typically adds cost to +pack and unpack these values. +Modern architectures have more than enough registers to pass all +arguments and results this way for nearly all functions (see the +appendix), so there’s little downside to spreading base values across +registers. + +Arguments that can’t be fully assigned to registers are passed +entirely on the stack in case the callee takes the address of that +argument. +If an argument could be split across the stack and registers and the +callee took its address, it would need to be reconstructed in memory, +a process that would be proportional to the size of the argument. + +Non-trivial arrays are always passed on the stack because indexing +into an array typically requires a computed offset, which generally +isn’t possible with registers. +Arrays in general are rare in function signatures (only 0.7% of +functions in the Go 1.15 standard library and 0.2% in kubelet). +We considered allowing array fields to be passed on the stack while +the rest of an argument’s fields are passed in registers, but this +creates the same problems as other large structs if the callee takes +the address of an argument, and would benefit <0.1% of functions in +kubelet (and even these very little). + +We make exceptions for 0 and 1-element arrays because these don’t +require computed offsets, and 1-element arrays are already decomposed +in the compiler’s SSA representation. + +The ABI assignment algorithm above is equivalent to Go’s stack-based +ABI0 calling convention if there are zero architecture registers. +This is intended to ease the transition to the register-based internal +ABI and make it easy for the compiler to generate either calling +convention. +An architecture may still define register meanings that aren’t +compatible with ABI0, but these differences should be easy to account +for in the compiler. + +The algorithm reserves spill space for arguments in the caller’s frame +so that the compiler can generate a stack growth path that spills into +this reserved space. +If the callee has to grow the stack, it may not be able to reserve +enough additional stack space in its own frame to spill these, which +is why it’s important that the caller do so. +These slots also act as the home location if these arguments need to +be spilled for any other reason, which simplifies traceback printing. + +There are several options for how to lay out the argument spill space. +We chose to lay out each argument according to its type's usual memory +layout but to separate the spill space from the regular argument +space. +Using the usual memory layout simplifies the compiler because it +already understands this layout. +Also, if a function takes the address of a register-assigned argument, +the compiler must spill that argument to memory in its usual memory +layout and it's more convenient to use the argument spill space for +this purpose. + +Alternatively, the spill space could be structured around argument +registers. +In this approach, the stack growth spill path would spill each +argument register to a register-sized stack word. +However, if the function takes the address of a register-assigned +argument, the compiler would have to reconstruct it in memory layout +elsewhere on the stack. + +The spill space could also be interleaved with the stack-assigned +arguments so the arguments appear in order whether they are register- +or stack-assigned. +This would be close to ABI0, except that register-assigned arguments +would be uninitialized on the stack and there's no need to reserve +stack space for register-assigned results. +We expect separating the spill space to perform better because of +memory locality. +Separating the space is also potentially simpler for `reflect` calls +because this allows `reflect` to summarize the spill space as a single +number. +Finally, the long-term intent is to remove reserved spill slots +entirely – allowing most functions to be called without any stack +setup and easing the introduction of callee-save registers – and +separating the spill space makes that transition easier. + +## Closures + +A func value (e.g., `var x func()`) is a pointer to a closure object. +A closure object begins with a pointer-sized program counter +representing the entry point of the function, followed by zero or more +bytes containing the closed-over environment. + +Closure calls follow the same conventions as static function and +method calls, with one addition. Each architecture specifies a +*closure context pointer* register and calls to closures store the +address of the closure object in the closure context pointer register +prior to the call. + +## Software floating-point mode + +In "softfloat" mode, the ABI simply treats the hardware as having zero +floating-point registers. +As a result, any arguments containing floating-point values will be +passed on the stack. + +*Rationale*: Softfloat mode is about compatibility over performance +and is not commonly used. +Hence, we keep the ABI as simple as possible in this case, rather than +adding additional rules for passing floating-point values in integer +registers. + +## Architecture specifics + +This section describes per-architecture register mappings, as well as +other per-architecture special cases. + +### amd64 architecture + +The amd64 architecture uses the following sequence of 9 registers for +integer arguments and results: + + RAX, RBX, RCX, RDI, RSI, R8, R9, R10, R11 + +It uses X0 – X14 for floating-point arguments and results. + +*Rationale*: These sequences are chosen from the available registers +to be relatively easy to remember. + +Registers R12 and R13 are permanent scratch registers. +R15 is a scratch register except in dynamically linked binaries. + +*Rationale*: Some operations such as stack growth and reflection calls +need dedicated scratch registers in order to manipulate call frames +without corrupting arguments or results. + +Special-purpose registers are as follows: + +| Register | Call meaning | Body meaning | +| --- | --- | --- | +| RSP | Stack pointer | Fixed | +| RBP | Frame pointer | Fixed | +| RDX | Closure context pointer | Scratch | +| R12 | None | Scratch | +| R13 | None | Scratch | +| R14 | Current goroutine | Scratch | +| R15 | GOT reference temporary | Fixed if dynlink | +| X15 | Zero value | Fixed | + +TODO: We may start with the existing TLS-based g and move to R14 +later. + +*Rationale*: These register meanings are compatible with Go’s +stack-based calling convention except for R14 and X15, which will have +to be restored on transitions from ABI0 code to ABIInternal code. +In ABI0, these are undefined, so transitions from ABIInternal to ABI0 +can ignore these registers. + +*Rationale*: For the current goroutine pointer, we chose a register +that requires an additional REX byte. +While this adds one byte to every function prologue, it is hardly ever +accessed outside the function prologue and we expect making more +single-byte registers available to be a net win. + +*Rationale*: We designate X15 as a fixed zero register because +functions often have to bulk zero their stack frames, and this is more +efficient with a designated zero register. + +#### Stack layout + +The stack pointer, RSP, grows down and is always aligned to 8 bytes. + +The amd64 architecture does not use a link register. + +A function's stack frame is laid out as follows: + + +------------------------------+ + | return PC | + | RBP on entry | + | ... locals ... | + | ... outgoing arguments ... | + +------------------------------+ ↓ lower addresses + +The "return PC" is pushed as part of the standard amd64 `CALL` +operation. +On entry, a function subtracts from RSP to open its stack frame and +saves the value of RBP directly below the return PC. +A leaf function that does not require any stack space may omit the +saved RBP. + +The Go ABI's use of RBP as a frame pointer register is compatible with +amd64 platform conventions so that Go can inter-operate with platform +debuggers and profilers. + +#### Flags + +The direction flag (D) is always cleared (set to the “forward” +direction) at a call. +The arithmetic status flags are treated like scratch registers and not +preserved across calls. +All other bits in RFLAGS are system flags. + +The CPU is always in MMX technology state (not x87 mode). + +*Rationale*: Go on amd64 uses the XMM registers and never uses the x87 +registers, so it makes sense to assume the CPU is in MMX mode. +Otherwise, any function that used the XMM registers would have to +execute an EMMS instruction before calling another function or +returning (this is the case in the SysV ABI). + +At calls, the MXCSR control bits are always set as follows: + +| Flag | Bit | Value | Meaning | +| --- | --- | --- | --- | +| FZ | 15 | 0 | Do not flush to zero | +| RC | 14/13 | 0 (RN) | Round to nearest | +| PM | 12 | 1 | Precision masked | +| UM | 11 | 1 | Underflow masked | +| OM | 10 | 1 | Overflow masked | +| ZM | 9 | 1 | Divide-by-zero masked | +| DM | 8 | 1 | Denormal operations masked | +| IM | 7 | 1 | Invalid operations masked | +| DAZ | 6 | 0 | Do not zero de-normals | + +The MXCSR status bits are callee-save. + +*Rationale*: Having a fixed MXCSR control configuration allows Go +functions to use SSE operations without modifying or saving the MXCSR. +Functions are allowed to modify it between calls (as long as they +restore it), but as of this writing Go code never does. +The above fixed configuration matches the process initialization +control bits specified by the ELF AMD64 ABI. + +The x87 floating-point control word is not used by Go on amd64. + +## Future directions + +### Spill path improvements + +The ABI currently reserves spill space for argument registers so the +compiler can statically generate an argument spill path before calling +into `runtime.morestack` to grow the stack. +This ensures there will be sufficient spill space even when the stack +is nearly exhausted and keeps stack growth and stack scanning +essentially unchanged from ABI0. + +However, this wastes stack space (the median wastage is 16 bytes per +call), resulting in larger stacks and increased cache footprint. +A better approach would be to reserve stack space only when spilling. +One way to ensure enough space is available to spill would be for +every function to ensure there is enough space for the function's own +frame *as well as* the spill space of all functions it calls. +For most functions, this would change the threshold for the prologue +stack growth check. +For `nosplit` functions, this would change the threshold used in the +linker's static stack size check. + +Allocating spill space in the callee rather than the caller may also +allow for faster reflection calls in the common case where a function +takes only register arguments, since it would allow reflection to make +these calls directly without allocating any frame. + +The statically-generated spill path also increases code size. +It is possible to instead have a generic spill path in the runtime, as +part of `morestack`. +However, this complicates reserving the spill space, since spilling +all possible register arguments would, in most cases, take +significantly more space than spilling only those used by a particular +function. +Some options are to spill to a temporary space and copy back only the +registers used by the function, or to grow the stack if necessary +before spilling to it (using a temporary space if necessary), or to +use a heap-allocated space if insufficient stack space is available. +These options all add enough complexity that we will have to make this +decision based on the actual code size growth caused by the static +spill paths. + +### Clobber sets + +As defined, the ABI does not use callee-save registers. +This significantly simplifies the garbage collector and the compiler's +register allocator, but at some performance cost. +A potentially better balance for Go code would be to use *clobber +sets*: for each function, the compiler records the set of registers it +clobbers (including those clobbered by functions it calls) and any +register not clobbered by function F can remain live across calls to +F. + +This is generally a good fit for Go because Go's package DAG allows +function metadata like the clobber set to flow up the call graph, even +across package boundaries. +Clobber sets would require relatively little change to the garbage +collector, unlike general callee-save registers. +One disadvantage of clobber sets over callee-save registers is that +they don't help with indirect function calls or interface method +calls, since static information isn't available in these cases. + +### Large aggregates + +Go encourages passing composite values by value, and this simplifies +reasoning about mutation and races. +However, this comes at a performance cost for large composite values. +It may be possible to instead transparently pass large composite +values by reference and delay copying until it is actually necessary. + +## Appendix: Register usage analysis + +In order to understand the impacts of the above design on register +usage, we +[analyzed](https://github.com/aclements/go-misc/tree/master/abi) the +impact of the above ABI on a large code base: cmd/kubelet from +[Kubernetes](https://github.com/kubernetes/kubernetes) at tag v1.18.8. + +The following table shows the impact of different numbers of available +integer and floating-point registers on argument assignment: + +``` +| | | | stack args | spills | stack total | +| ints | floats | % fit | p50 | p95 | p99 | p50 | p95 | p99 | p50 | p95 | p99 | +| 0 | 0 | 6.3% | 32 | 152 | 256 | 0 | 0 | 0 | 32 | 152 | 256 | +| 0 | 8 | 6.4% | 32 | 152 | 256 | 0 | 0 | 0 | 32 | 152 | 256 | +| 1 | 8 | 21.3% | 24 | 144 | 248 | 8 | 8 | 8 | 32 | 152 | 256 | +| 2 | 8 | 38.9% | 16 | 128 | 224 | 8 | 16 | 16 | 24 | 136 | 240 | +| 3 | 8 | 57.0% | 0 | 120 | 224 | 16 | 24 | 24 | 24 | 136 | 240 | +| 4 | 8 | 73.0% | 0 | 120 | 216 | 16 | 32 | 32 | 24 | 136 | 232 | +| 5 | 8 | 83.3% | 0 | 112 | 216 | 16 | 40 | 40 | 24 | 136 | 232 | +| 6 | 8 | 87.5% | 0 | 112 | 208 | 16 | 48 | 48 | 24 | 136 | 232 | +| 7 | 8 | 89.8% | 0 | 112 | 208 | 16 | 48 | 56 | 24 | 136 | 232 | +| 8 | 8 | 91.3% | 0 | 112 | 200 | 16 | 56 | 64 | 24 | 136 | 232 | +| 9 | 8 | 92.1% | 0 | 112 | 192 | 16 | 56 | 72 | 24 | 136 | 232 | +| 10 | 8 | 92.6% | 0 | 104 | 192 | 16 | 56 | 72 | 24 | 136 | 232 | +| 11 | 8 | 93.1% | 0 | 104 | 184 | 16 | 56 | 80 | 24 | 128 | 232 | +| 12 | 8 | 93.4% | 0 | 104 | 176 | 16 | 56 | 88 | 24 | 128 | 232 | +| 13 | 8 | 94.0% | 0 | 88 | 176 | 16 | 56 | 96 | 24 | 128 | 232 | +| 14 | 8 | 94.4% | 0 | 80 | 152 | 16 | 64 | 104 | 24 | 128 | 232 | +| 15 | 8 | 94.6% | 0 | 80 | 152 | 16 | 64 | 112 | 24 | 128 | 232 | +| 16 | 8 | 94.9% | 0 | 16 | 152 | 16 | 64 | 112 | 24 | 128 | 232 | +| ∞ | 8 | 99.8% | 0 | 0 | 0 | 24 | 112 | 216 | 24 | 120 | 216 | +``` + +The first two columns show the number of available integer and +floating-point registers. +The first row shows the results for 0 integer and 0 floating-point +registers, which is equivalent to ABI0. +We found that any reasonable number of floating-point registers has +the same effect, so we fixed it at 8 for all other rows. + +The “% fit” column gives the fraction of functions where all arguments +and results are register-assigned and no arguments are passed on the +stack. +The three “stack args” columns give the median, 95th and 99th +percentile number of bytes of stack arguments. +The “spills” columns likewise summarize the number of bytes in +on-stack spill space. +And “stack total” summarizes the sum of stack arguments and on-stack +spill slots. +Note that these are three different distributions; for example, +there’s no single function that takes 0 stack argument bytes, 16 spill +bytes, and 24 total stack bytes. + +From this, we can see that the fraction of functions that fit entirely +in registers grows very slowly once it reaches about 90%, though +curiously there is a small minority of functions that could benefit +from a huge number of registers. +Making 9 integer registers available on amd64 puts it in this realm. +We also see that the stack space required for most functions is fairly +small. +While the increasing space required for spills largely balances out +the decreasing space required for stack arguments as the number of +available registers increases, there is a general reduction in the +total stack space required with more available registers. +This does, however, suggest that eliminating spill slots in the future +would noticeably reduce stack requirements. diff --git a/src/cmd/compile/internal/abi/abiutils.go b/src/cmd/compile/internal/abi/abiutils.go index 3ac59e6f75..e935821802 100644 --- a/src/cmd/compile/internal/abi/abiutils.go +++ b/src/cmd/compile/internal/abi/abiutils.go @@ -25,9 +25,8 @@ import ( type ABIParamResultInfo struct { inparams []ABIParamAssignment // Includes receiver for method calls. Does NOT include hidden closure pointer. outparams []ABIParamAssignment - intSpillSlots int - floatSpillSlots int offsetToSpillArea int64 + spillAreaSize int64 config *ABIConfig // to enable String() method } @@ -47,18 +46,14 @@ func (a *ABIParamResultInfo) OutParam(i int) ABIParamAssignment { return a.outparams[i] } -func (a *ABIParamResultInfo) IntSpillCount() int { - return a.intSpillSlots -} - -func (a *ABIParamResultInfo) FloatSpillCount() int { - return a.floatSpillSlots -} - func (a *ABIParamResultInfo) SpillAreaOffset() int64 { return a.offsetToSpillArea } +func (a *ABIParamResultInfo) SpillAreaSize() int64 { + return a.spillAreaSize +} + // RegIndex stores the index into the set of machine registers used by // the ABI on a specific architecture for parameter passing. RegIndex // values 0 through N-1 (where N is the number of integer registers @@ -78,7 +73,27 @@ type RegIndex uint8 type ABIParamAssignment struct { Type *types.Type Registers []RegIndex - Offset int32 + offset int32 +} + +// Offset returns the stack offset for addressing the parameter that "a" describes. +// This will panic if "a" describes a register-allocated parameter. +func (a *ABIParamAssignment) Offset() int32 { + if len(a.Registers) > 0 { + panic("Register allocated parameters have no offset") + } + return a.offset +} + +// SpillOffset returns the offset *within the spill area* for the parameter that "a" describes. +// Registers will be spilled here; if a memory home is needed (for a pointer method e.g.) +// then that will be the address. +// This will panic if "a" describes a stack-allocated parameter. +func (a *ABIParamAssignment) SpillOffset() int32 { + if len(a.Registers) == 0 { + panic("Stack-allocated parameters have no spill offset") + } + return a.offset } // RegAmounts holds a specified number of integer/float registers. @@ -91,20 +106,58 @@ type RegAmounts struct { // by the ABI rules for parameter passing and result returning. type ABIConfig struct { // Do we need anything more than this? - regAmounts RegAmounts + regAmounts RegAmounts + regsForTypeCache map[*types.Type]int } // NewABIConfig returns a new ABI configuration for an architecture with // iRegsCount integer/pointer registers and fRegsCount floating point registers. func NewABIConfig(iRegsCount, fRegsCount int) *ABIConfig { - return &ABIConfig{RegAmounts{iRegsCount, fRegsCount}} + return &ABIConfig{regAmounts: RegAmounts{iRegsCount, fRegsCount}, regsForTypeCache: make(map[*types.Type]int)} +} + +// NumParamRegs returns the number of parameter registers used for a given type, +// without regard for the number available. +func (a *ABIConfig) NumParamRegs(t *types.Type) int { + if n, ok := a.regsForTypeCache[t]; ok { + return n + } + + if t.IsScalar() || t.IsPtrShaped() { + var n int + if t.IsComplex() { + n = 2 + } else { + n = (int(t.Size()) + types.RegSize - 1) / types.RegSize + } + a.regsForTypeCache[t] = n + return n + } + typ := t.Kind() + n := 0 + switch typ { + case types.TARRAY: + n = a.NumParamRegs(t.Elem()) * int(t.NumElem()) + case types.TSTRUCT: + for _, f := range t.FieldSlice() { + n += a.NumParamRegs(f.Type) + } + case types.TSLICE: + n = a.NumParamRegs(synthSlice) + case types.TSTRING: + n = a.NumParamRegs(synthString) + case types.TINTER: + n = a.NumParamRegs(synthIface) + } + a.regsForTypeCache[t] = n + return n } // ABIAnalyze takes a function type 't' and an ABI rules description // 'config' and analyzes the function to determine how its parameters // and results will be passed (in registers or on the stack), returning // an ABIParamResultInfo object that holds the results of the analysis. -func ABIAnalyze(t *types.Type, config *ABIConfig) ABIParamResultInfo { +func (config *ABIConfig) ABIAnalyze(t *types.Type) ABIParamResultInfo { setup() s := assignState{ rTotal: config.regAmounts, @@ -116,28 +169,27 @@ func ABIAnalyze(t *types.Type, config *ABIConfig) ABIParamResultInfo { if t.NumRecvs() != 0 { rfsl := ft.Receiver.FieldSlice() result.inparams = append(result.inparams, - s.assignParamOrReturn(rfsl[0].Type)) + s.assignParamOrReturn(rfsl[0].Type, false)) } // Inputs ifsl := ft.Params.FieldSlice() for _, f := range ifsl { result.inparams = append(result.inparams, - s.assignParamOrReturn(f.Type)) + s.assignParamOrReturn(f.Type, false)) } s.stackOffset = types.Rnd(s.stackOffset, int64(types.RegSize)) - // Record number of spill slots needed. - result.intSpillSlots = s.rUsed.intRegs - result.floatSpillSlots = s.rUsed.floatRegs - // Outputs s.rUsed = RegAmounts{} ofsl := ft.Results.FieldSlice() for _, f := range ofsl { - result.outparams = append(result.outparams, s.assignParamOrReturn(f.Type)) + result.outparams = append(result.outparams, s.assignParamOrReturn(f.Type, true)) } - result.offsetToSpillArea = s.stackOffset + // The spill area is at a register-aligned offset and its size is rounded up to a register alignment. + // TODO in theory could align offset only to minimum required by spilled data types. + result.offsetToSpillArea = alignTo(s.stackOffset, types.RegSize) + result.spillAreaSize = alignTo(s.spillOffset, types.RegSize) return result } @@ -160,10 +212,14 @@ func (c *RegAmounts) regString(r RegIndex) string { // form, suitable for debugging or unit testing. func (ri *ABIParamAssignment) toString(config *ABIConfig) string { regs := "R{" + offname := "spilloffset" // offset is for spill for register(s) + if len(ri.Registers) == 0 { + offname = "offset" // offset is for memory arg + } for _, r := range ri.Registers { regs += " " + config.regAmounts.regString(r) } - return fmt.Sprintf("%s } offset: %d typ: %v", regs, ri.Offset, ri.Type) + return fmt.Sprintf("%s } %s: %d typ: %v", regs, offname, ri.offset, ri.Type) } // toString method renders an ABIParamResultInfo in human-readable @@ -176,8 +232,8 @@ func (ri *ABIParamResultInfo) String() string { for k, r := range ri.outparams { res += fmt.Sprintf("OUT %d: %s\n", k, r.toString(ri.config)) } - res += fmt.Sprintf("intspill: %d floatspill: %d offsetToSpillArea: %d", - ri.intSpillSlots, ri.floatSpillSlots, ri.offsetToSpillArea) + res += fmt.Sprintf("offsetToSpillArea: %d spillAreaSize: %d", + ri.offsetToSpillArea, ri.spillAreaSize) return res } @@ -188,16 +244,27 @@ type assignState struct { rUsed RegAmounts // regs used by params completely assigned so far pUsed RegAmounts // regs used by the current param (or pieces therein) stackOffset int64 // current stack offset + spillOffset int64 // current spill offset +} + +// align returns a rounded up to t's alignment +func align(a int64, t *types.Type) int64 { + return alignTo(a, int(t.Align)) +} + +// alignTo returns a rounded up to t, where t must be 0 or a power of 2. +func alignTo(a int64, t int) int64 { + if t == 0 { + return a + } + return types.Rnd(a, int64(t)) } // stackSlot returns a stack offset for a param or result of the // specified type. func (state *assignState) stackSlot(t *types.Type) int64 { - if t.Align > 0 { - state.stackOffset = types.Rnd(state.stackOffset, int64(t.Align)) - } - rv := state.stackOffset - state.stackOffset += t.Width + rv := align(state.stackOffset, t) + state.stackOffset = rv + t.Width return rv } @@ -225,11 +292,17 @@ func (state *assignState) allocateRegs() []RegIndex { // regAllocate creates a register ABIParamAssignment object for a param // or result with the specified type, as a final step (this assumes // that all of the safety/suitability analysis is complete). -func (state *assignState) regAllocate(t *types.Type) ABIParamAssignment { +func (state *assignState) regAllocate(t *types.Type, isReturn bool) ABIParamAssignment { + spillLoc := int64(-1) + if !isReturn { + // Spill for register-resident t must be aligned for storage of a t. + spillLoc = align(state.spillOffset, t) + state.spillOffset = spillLoc + t.Size() + } return ABIParamAssignment{ Type: t, Registers: state.allocateRegs(), - Offset: -1, + offset: int32(spillLoc), } } @@ -239,7 +312,7 @@ func (state *assignState) regAllocate(t *types.Type) ABIParamAssignment { func (state *assignState) stackAllocate(t *types.Type) ABIParamAssignment { return ABIParamAssignment{ Type: t, - Offset: int32(state.stackSlot(t)), + offset: int32(state.stackSlot(t)), } } @@ -261,6 +334,9 @@ func (state *assignState) floatUsed() int { // accordingly). func (state *assignState) regassignIntegral(t *types.Type) bool { regsNeeded := int(types.Rnd(t.Width, int64(types.PtrSize)) / int64(types.PtrSize)) + if t.IsComplex() { + regsNeeded = 2 + } // Floating point and complex. if t.IsFloat() || t.IsComplex() { @@ -371,14 +447,14 @@ func (state *assignState) regassign(pt *types.Type) bool { // of type 'pt' to determine whether it can be register assigned. // The result of the analysis is recorded in the result // ABIParamResultInfo held in 'state'. -func (state *assignState) assignParamOrReturn(pt *types.Type) ABIParamAssignment { +func (state *assignState) assignParamOrReturn(pt *types.Type, isReturn bool) ABIParamAssignment { state.pUsed = RegAmounts{} if pt.Width == types.BADWIDTH { panic("should never happen") } else if pt.Width == 0 { return state.stackAllocate(pt) } else if state.regassign(pt) { - return state.regAllocate(pt) + return state.regAllocate(pt, isReturn) } else { return state.stackAllocate(pt) } diff --git a/src/cmd/compile/internal/noder/noder.go b/src/cmd/compile/internal/noder/noder.go index 887205b9fb..1c38f1a934 100644 --- a/src/cmd/compile/internal/noder/noder.go +++ b/src/cmd/compile/internal/noder/noder.go @@ -1463,14 +1463,6 @@ func (p *noder) basicLit(lit *syntax.BasicLit) constant.Value { p.errorAt(lit.Pos(), "malformed constant: %s", lit.Value) } - // go/constant uses big.Rat by default, which is more precise, but - // causes toolstash -cmp and some tests to fail. For now, convert - // to big.Float to match cmd/compile's historical precision. - // TODO(mdempsky): Remove. - if v.Kind() == constant.Float { - v = constant.Make(ir.BigFloat(v)) - } - return v } diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index ecf3294082..e49a9716fe 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -434,6 +434,7 @@ func buildssa(fn *ir.Func, worker int) *ssa.Func { // bitmask showing which of the open-coded defers in this function // have been activated. deferBitsTemp := typecheck.TempAt(src.NoXPos, s.curfn, types.Types[types.TUINT8]) + deferBitsTemp.SetAddrtaken(true) s.deferBitsTemp = deferBitsTemp // For this value, AuxInt is initialized to zero by default startDeferBits := s.entryNewValue0(ssa.OpConst8, types.Types[types.TUINT8]) @@ -5086,6 +5087,10 @@ func (s *state) addr(n ir.Node) *ssa.Value { defer s.popLine() } + if s.canSSA(n) { + s.Fatalf("addr of canSSA expression: %+v", n) + } + t := types.NewPtr(n.Type()) linksymOffset := func(lsym *obj.LSym, offset int64) *ssa.Value { v := s.entryNewValue1A(ssa.OpAddr, t, lsym, s.sb) diff --git a/src/cmd/compile/internal/test/abiutils_test.go b/src/cmd/compile/internal/test/abiutils_test.go index ae7d484062..decc29667e 100644 --- a/src/cmd/compile/internal/test/abiutils_test.go +++ b/src/cmd/compile/internal/test/abiutils_test.go @@ -21,7 +21,7 @@ import ( // AMD64 registers available: // - integer: RAX, RBX, RCX, RDI, RSI, R8, R9, r10, R11 // - floating point: X0 - X14 -var configAMD64 = abi.NewABIConfig(9,15) +var configAMD64 = abi.NewABIConfig(9, 15) func TestMain(m *testing.M) { ssagen.Arch.LinkArch = &x86.Linkamd64 @@ -46,9 +46,9 @@ func TestABIUtilsBasic1(t *testing.T) { // expected results exp := makeExpectedDump(` - IN 0: R{ I0 } offset: -1 typ: int32 - OUT 0: R{ I0 } offset: -1 typ: int32 - intspill: 1 floatspill: 0 offsetToSpillArea: 0 + IN 0: R{ I0 } spilloffset: 0 typ: int32 + OUT 0: R{ I0 } spilloffset: -1 typ: int32 + offsetToSpillArea: 0 spillAreaSize: 8 `) abitest(t, ft, exp) @@ -75,39 +75,39 @@ func TestABIUtilsBasic2(t *testing.T) { i8, i16, i32, i64}, []*types.Type{i32, f64, f64}) exp := makeExpectedDump(` - IN 0: R{ I0 } offset: -1 typ: int8 - IN 1: R{ I1 } offset: -1 typ: int16 - IN 2: R{ I2 } offset: -1 typ: int32 - IN 3: R{ I3 } offset: -1 typ: int64 - IN 4: R{ F0 } offset: -1 typ: float32 - IN 5: R{ F1 } offset: -1 typ: float32 - IN 6: R{ F2 } offset: -1 typ: float64 - IN 7: R{ F3 } offset: -1 typ: float64 - IN 8: R{ I4 } offset: -1 typ: int8 - IN 9: R{ I5 } offset: -1 typ: int16 - IN 10: R{ I6 } offset: -1 typ: int32 - IN 11: R{ I7 } offset: -1 typ: int64 - IN 12: R{ F4 } offset: -1 typ: float32 - IN 13: R{ F5 } offset: -1 typ: float32 - IN 14: R{ F6 } offset: -1 typ: float64 - IN 15: R{ F7 } offset: -1 typ: float64 - IN 16: R{ F8 F9 } offset: -1 typ: complex128 - IN 17: R{ F10 F11 } offset: -1 typ: complex128 - IN 18: R{ F12 F13 } offset: -1 typ: complex128 - IN 19: R{ } offset: 0 typ: complex128 - IN 20: R{ F14 } offset: -1 typ: complex64 - IN 21: R{ I8 } offset: -1 typ: int8 - IN 22: R{ } offset: 16 typ: int16 - IN 23: R{ } offset: 20 typ: int32 - IN 24: R{ } offset: 24 typ: int64 - IN 25: R{ } offset: 32 typ: int8 - IN 26: R{ } offset: 34 typ: int16 - IN 27: R{ } offset: 36 typ: int32 - IN 28: R{ } offset: 40 typ: int64 - OUT 0: R{ I0 } offset: -1 typ: int32 - OUT 1: R{ F0 } offset: -1 typ: float64 - OUT 2: R{ F1 } offset: -1 typ: float64 - intspill: 9 floatspill: 15 offsetToSpillArea: 48 + IN 0: R{ I0 } spilloffset: 0 typ: int8 + IN 1: R{ I1 } spilloffset: 2 typ: int16 + IN 2: R{ I2 } spilloffset: 4 typ: int32 + IN 3: R{ I3 } spilloffset: 8 typ: int64 + IN 4: R{ F0 } spilloffset: 16 typ: float32 + IN 5: R{ F1 } spilloffset: 20 typ: float32 + IN 6: R{ F2 } spilloffset: 24 typ: float64 + IN 7: R{ F3 } spilloffset: 32 typ: float64 + IN 8: R{ I4 } spilloffset: 40 typ: int8 + IN 9: R{ I5 } spilloffset: 42 typ: int16 + IN 10: R{ I6 } spilloffset: 44 typ: int32 + IN 11: R{ I7 } spilloffset: 48 typ: int64 + IN 12: R{ F4 } spilloffset: 56 typ: float32 + IN 13: R{ F5 } spilloffset: 60 typ: float32 + IN 14: R{ F6 } spilloffset: 64 typ: float64 + IN 15: R{ F7 } spilloffset: 72 typ: float64 + IN 16: R{ F8 F9 } spilloffset: 80 typ: complex128 + IN 17: R{ F10 F11 } spilloffset: 96 typ: complex128 + IN 18: R{ F12 F13 } spilloffset: 112 typ: complex128 + IN 19: R{ } offset: 0 typ: complex128 + IN 20: R{ } offset: 16 typ: complex64 + IN 21: R{ I8 } spilloffset: 128 typ: int8 + IN 22: R{ } offset: 24 typ: int16 + IN 23: R{ } offset: 28 typ: int32 + IN 24: R{ } offset: 32 typ: int64 + IN 25: R{ } offset: 40 typ: int8 + IN 26: R{ } offset: 42 typ: int16 + IN 27: R{ } offset: 44 typ: int32 + IN 28: R{ } offset: 48 typ: int64 + OUT 0: R{ I0 } spilloffset: -1 typ: int32 + OUT 1: R{ F0 } spilloffset: -1 typ: float64 + OUT 2: R{ F1 } spilloffset: -1 typ: float64 + offsetToSpillArea: 56 spillAreaSize: 136 `) abitest(t, ft, exp) @@ -123,15 +123,15 @@ func TestABIUtilsArrays(t *testing.T) { []*types.Type{a2, a1, ae, aa1}) exp := makeExpectedDump(` - IN 0: R{ I0 } offset: -1 typ: [1]int32 - IN 1: R{ } offset: 0 typ: [0]int32 - IN 2: R{ I1 } offset: -1 typ: [1][1]int32 - IN 3: R{ } offset: 0 typ: [2]int32 - OUT 0: R{ } offset: 8 typ: [2]int32 - OUT 1: R{ I0 } offset: -1 typ: [1]int32 - OUT 2: R{ } offset: 16 typ: [0]int32 - OUT 3: R{ I1 } offset: -1 typ: [1][1]int32 - intspill: 2 floatspill: 0 offsetToSpillArea: 16 + IN 0: R{ I0 } spilloffset: 0 typ: [1]int32 + IN 1: R{ } offset: 0 typ: [0]int32 + IN 2: R{ I1 } spilloffset: 4 typ: [1][1]int32 + IN 3: R{ } offset: 0 typ: [2]int32 + OUT 0: R{ } offset: 8 typ: [2]int32 + OUT 1: R{ I0 } spilloffset: -1 typ: [1]int32 + OUT 2: R{ } offset: 16 typ: [0]int32 + OUT 3: R{ I1 } spilloffset: -1 typ: [1][1]int32 + offsetToSpillArea: 16 spillAreaSize: 8 `) abitest(t, ft, exp) @@ -147,13 +147,13 @@ func TestABIUtilsStruct1(t *testing.T) { []*types.Type{s, i8, i32}) exp := makeExpectedDump(` - IN 0: R{ I0 } offset: -1 typ: int8 - IN 1: R{ I1 I2 I3 I4 } offset: -1 typ: struct { int8; int8; struct {}; int8; int16 } - IN 2: R{ I5 } offset: -1 typ: int64 - OUT 0: R{ I0 I1 I2 I3 } offset: -1 typ: struct { int8; int8; struct {}; int8; int16 } - OUT 1: R{ I4 } offset: -1 typ: int8 - OUT 2: R{ I5 } offset: -1 typ: int32 - intspill: 6 floatspill: 0 offsetToSpillArea: 0 + IN 0: R{ I0 } spilloffset: 0 typ: int8 + IN 1: R{ I1 I2 I3 I4 } spilloffset: 2 typ: struct { int8; int8; struct {}; int8; int16 } + IN 2: R{ I5 } spilloffset: 8 typ: int64 + OUT 0: R{ I0 I1 I2 I3 } spilloffset: -1 typ: struct { int8; int8; struct {}; int8; int16 } + OUT 1: R{ I4 } spilloffset: -1 typ: int8 + OUT 2: R{ I5 } spilloffset: -1 typ: int32 + offsetToSpillArea: 0 spillAreaSize: 16 `) abitest(t, ft, exp) @@ -168,12 +168,12 @@ func TestABIUtilsStruct2(t *testing.T) { []*types.Type{fs, fs}) exp := makeExpectedDump(` - IN 0: R{ I0 } offset: -1 typ: struct { int64; struct {} } - IN 1: R{ I1 } offset: -1 typ: struct { int64; struct {} } - IN 2: R{ I2 F0 } offset: -1 typ: struct { float64; struct { int64; struct {} }; struct {} } - OUT 0: R{ I0 F0 } offset: -1 typ: struct { float64; struct { int64; struct {} }; struct {} } - OUT 1: R{ I1 F1 } offset: -1 typ: struct { float64; struct { int64; struct {} }; struct {} } - intspill: 3 floatspill: 1 offsetToSpillArea: 0 + IN 0: R{ I0 } spilloffset: 0 typ: struct { int64; struct {} } + IN 1: R{ I1 } spilloffset: 16 typ: struct { int64; struct {} } + IN 2: R{ I2 F0 } spilloffset: 32 typ: struct { float64; struct { int64; struct {} }; struct {} } + OUT 0: R{ I0 F0 } spilloffset: -1 typ: struct { float64; struct { int64; struct {} }; struct {} } + OUT 1: R{ I1 F1 } spilloffset: -1 typ: struct { float64; struct { int64; struct {} }; struct {} } + offsetToSpillArea: 0 spillAreaSize: 64 `) abitest(t, ft, exp) @@ -189,19 +189,19 @@ func TestABIUtilsSliceString(t *testing.T) { []*types.Type{str, i64, str, sli32}) exp := makeExpectedDump(` - IN 0: R{ I0 I1 I2 } offset: -1 typ: []int32 - IN 1: R{ I3 } offset: -1 typ: int8 - IN 2: R{ I4 I5 I6 } offset: -1 typ: []int32 - IN 3: R{ I7 } offset: -1 typ: int8 - IN 4: R{ } offset: 0 typ: string - IN 5: R{ I8 } offset: -1 typ: int8 - IN 6: R{ } offset: 16 typ: int64 - IN 7: R{ } offset: 24 typ: []int32 - OUT 0: R{ I0 I1 } offset: -1 typ: string - OUT 1: R{ I2 } offset: -1 typ: int64 - OUT 2: R{ I3 I4 } offset: -1 typ: string - OUT 3: R{ I5 I6 I7 } offset: -1 typ: []int32 - intspill: 9 floatspill: 0 offsetToSpillArea: 48 + IN 0: R{ I0 I1 I2 } spilloffset: 0 typ: []int32 + IN 1: R{ I3 } spilloffset: 24 typ: int8 + IN 2: R{ I4 I5 I6 } spilloffset: 32 typ: []int32 + IN 3: R{ I7 } spilloffset: 56 typ: int8 + IN 4: R{ } offset: 0 typ: string + IN 5: R{ I8 } spilloffset: 57 typ: int8 + IN 6: R{ } offset: 16 typ: int64 + IN 7: R{ } offset: 24 typ: []int32 + OUT 0: R{ I0 I1 } spilloffset: -1 typ: string + OUT 1: R{ I2 } spilloffset: -1 typ: int64 + OUT 2: R{ I3 I4 } spilloffset: -1 typ: string + OUT 3: R{ I5 I6 I7 } spilloffset: -1 typ: []int32 + offsetToSpillArea: 48 spillAreaSize: 64 `) abitest(t, ft, exp) @@ -219,17 +219,17 @@ func TestABIUtilsMethod(t *testing.T) { []*types.Type{a7, f64, i64}) exp := makeExpectedDump(` - IN 0: R{ I0 I1 I2 } offset: -1 typ: struct { int16; int16; int16 } - IN 1: R{ I3 } offset: -1 typ: *struct { int16; int16; int16 } - IN 2: R{ } offset: 0 typ: [7]*struct { int16; int16; int16 } - IN 3: R{ F0 } offset: -1 typ: float64 - IN 4: R{ I4 } offset: -1 typ: int16 - IN 5: R{ I5 } offset: -1 typ: int16 - IN 6: R{ I6 } offset: -1 typ: int16 - OUT 0: R{ } offset: 56 typ: [7]*struct { int16; int16; int16 } - OUT 1: R{ F0 } offset: -1 typ: float64 - OUT 2: R{ I0 } offset: -1 typ: int64 - intspill: 7 floatspill: 1 offsetToSpillArea: 112 + IN 0: R{ I0 I1 I2 } spilloffset: 0 typ: struct { int16; int16; int16 } + IN 1: R{ I3 } spilloffset: 8 typ: *struct { int16; int16; int16 } + IN 2: R{ } offset: 0 typ: [7]*struct { int16; int16; int16 } + IN 3: R{ F0 } spilloffset: 16 typ: float64 + IN 4: R{ I4 } spilloffset: 24 typ: int16 + IN 5: R{ I5 } spilloffset: 26 typ: int16 + IN 6: R{ I6 } spilloffset: 28 typ: int16 + OUT 0: R{ } offset: 56 typ: [7]*struct { int16; int16; int16 } + OUT 1: R{ F0 } spilloffset: -1 typ: float64 + OUT 2: R{ I0 } spilloffset: -1 typ: int64 + offsetToSpillArea: 112 spillAreaSize: 32 `) abitest(t, ft, exp) @@ -252,18 +252,44 @@ func TestABIUtilsInterfaces(t *testing.T) { []*types.Type{ei, nei, pei}) exp := makeExpectedDump(` - IN 0: R{ I0 I1 I2 } offset: -1 typ: struct { int16; int16; bool } - IN 1: R{ I3 I4 } offset: -1 typ: interface {} - IN 2: R{ I5 I6 } offset: -1 typ: interface {} - IN 3: R{ I7 I8 } offset: -1 typ: interface { () untyped string } - IN 4: R{ } offset: 0 typ: *interface {} - IN 5: R{ } offset: 8 typ: interface { () untyped string } - IN 6: R{ } offset: 24 typ: int16 - OUT 0: R{ I0 I1 } offset: -1 typ: interface {} - OUT 1: R{ I2 I3 } offset: -1 typ: interface { () untyped string } - OUT 2: R{ I4 } offset: -1 typ: *interface {} - intspill: 9 floatspill: 0 offsetToSpillArea: 32 + IN 0: R{ I0 I1 I2 } spilloffset: 0 typ: struct { int16; int16; bool } + IN 1: R{ I3 I4 } spilloffset: 8 typ: interface {} + IN 2: R{ I5 I6 } spilloffset: 24 typ: interface {} + IN 3: R{ I7 I8 } spilloffset: 40 typ: interface { () untyped string } + IN 4: R{ } offset: 0 typ: *interface {} + IN 5: R{ } offset: 8 typ: interface { () untyped string } + IN 6: R{ } offset: 24 typ: int16 + OUT 0: R{ I0 I1 } spilloffset: -1 typ: interface {} + OUT 1: R{ I2 I3 } spilloffset: -1 typ: interface { () untyped string } + OUT 2: R{ I4 } spilloffset: -1 typ: *interface {} + offsetToSpillArea: 32 spillAreaSize: 56 `) abitest(t, ft, exp) } + +func TestABINumParamRegs(t *testing.T) { + i8 := types.Types[types.TINT8] + i16 := types.Types[types.TINT16] + i32 := types.Types[types.TINT32] + i64 := types.Types[types.TINT64] + f32 := types.Types[types.TFLOAT32] + f64 := types.Types[types.TFLOAT64] + c64 := types.Types[types.TCOMPLEX64] + c128 := types.Types[types.TCOMPLEX128] + + s := mkstruct([]*types.Type{i8, i8, mkstruct([]*types.Type{}), i8, i16}) + a := types.NewArray(s, 3) + + nrtest(t, i8, 1) + nrtest(t, i16, 1) + nrtest(t, i32, 1) + nrtest(t, i64, 1) + nrtest(t, f32, 1) + nrtest(t, f64, 1) + nrtest(t, c64, 2) + nrtest(t, c128, 2) + nrtest(t, s, 4) + nrtest(t, a, 12) + +}
\ No newline at end of file diff --git a/src/cmd/compile/internal/test/abiutilsaux_test.go b/src/cmd/compile/internal/test/abiutilsaux_test.go index 10fb668745..19dd3a51fd 100644 --- a/src/cmd/compile/internal/test/abiutilsaux_test.go +++ b/src/cmd/compile/internal/test/abiutilsaux_test.go @@ -78,9 +78,9 @@ func tokenize(src string) []string { func verifyParamResultOffset(t *testing.T, f *types.Field, r abi.ABIParamAssignment, which string, idx int) int { n := ir.AsNode(f.Nname).(*ir.Name) - if n.FrameOffset() != int64(r.Offset) { + if n.FrameOffset() != int64(r.Offset()) { t.Errorf("%s %d: got offset %d wanted %d t=%v", - which, idx, r.Offset, n.Offset_, f.Type) + which, idx, r.Offset(), n.Offset_, f.Type) return 1 } return 0 @@ -106,12 +106,20 @@ func difftokens(atoks []string, etoks []string) string { return "" } +func nrtest(t *testing.T, ft *types.Type, expected int) { + types.CalcSize(ft) + got := configAMD64.NumParamRegs(ft) + if got != expected { + t.Errorf("]\nexpected num regs = %d, got %d, type %v", expected, got, ft) + } +} + func abitest(t *testing.T, ft *types.Type, exp expectedDump) { types.CalcSize(ft) // Analyze with full set of registers. - regRes := abi.ABIAnalyze(ft, configAMD64) + regRes := configAMD64.ABIAnalyze(ft) regResString := strings.TrimSpace(regRes.String()) // Check results. @@ -122,8 +130,8 @@ func abitest(t *testing.T, ft *types.Type, exp expectedDump) { } // Analyze again with empty register set. - empty := &abi.ABIConfig{} - emptyRes := abi.ABIAnalyze(ft, empty) + empty := abi.NewABIConfig(0, 0) + emptyRes := empty.ABIAnalyze(ft) emptyResString := emptyRes.String() // Walk the results and make sure the offsets assigned match diff --git a/src/cmd/compile/internal/typecheck/iexport.go b/src/cmd/compile/internal/typecheck/iexport.go index be4a689836..6fab74e61f 100644 --- a/src/cmd/compile/internal/typecheck/iexport.go +++ b/src/cmd/compile/internal/typecheck/iexport.go @@ -462,12 +462,16 @@ func (p *iexporter) doDecl(n *ir.Name) { } case ir.OLITERAL: + // TODO(mdempsky): Extend check to all declarations. + if n.Typecheck() == 0 { + base.FatalfAt(n.Pos(), "missed typecheck: %v", n) + } + // Constant. - // TODO(mdempsky): Do we still need this typecheck? If so, why? - n = Expr(n).(*ir.Name) w.tag('C') w.pos(n.Pos()) w.value(n.Type(), n.Val()) + w.constExt(n) case ir.OTYPE: if types.IsDotAlias(n.Sym()) { @@ -956,6 +960,17 @@ func (w *exportWriter) mpfloat(v constant.Value, typ *types.Type) { } } +func (w *exportWriter) mprat(v constant.Value) { + r, ok := constant.Val(v).(*big.Rat) + if !w.bool(ok) { + return + } + // TODO(mdempsky): Come up with a more efficient binary + // encoding before bumping iexportVersion to expose to + // gcimporter. + w.string(r.String()) +} + func (w *exportWriter) bool(b bool) bool { var x uint64 if b { @@ -971,7 +986,37 @@ func (w *exportWriter) string(s string) { w.uint64(w.p.stringOff(s)) } // Compiler-specific extensions. -func (w *exportWriter) varExt(n ir.Node) { +func (w *exportWriter) constExt(n *ir.Name) { + // Internally, we now represent untyped float and complex + // constants with infinite-precision rational numbers using + // go/constant, but the "public" export data format known to + // gcimporter only supports 512-bit floating point constants. + // In case rationals turn out to be a bad idea and we want to + // switch back to fixed-precision constants, for now we + // continue writing out the 512-bit truncation in the public + // data section, and write the exact, rational constant in the + // compiler's extension data. Also, we only need to worry + // about exporting rationals for declared constants, because + // constants that appear in an expression will already have + // been coerced to a concrete, fixed-precision type. + // + // Eventually, assuming we stick with using rationals, we + // should bump iexportVersion to support rationals, and do the + // whole gcimporter update song-and-dance. + // + // TODO(mdempsky): Prepare vocals for that. + + switch n.Type() { + case types.UntypedFloat: + w.mprat(n.Val()) + case types.UntypedComplex: + v := n.Val() + w.mprat(constant.Real(v)) + w.mprat(constant.Imag(v)) + } +} + +func (w *exportWriter) varExt(n *ir.Name) { w.linkname(n.Sym()) w.symIdx(n.Sym()) } diff --git a/src/cmd/compile/internal/typecheck/iimport.go b/src/cmd/compile/internal/typecheck/iimport.go index f2682257f3..b73ef5176b 100644 --- a/src/cmd/compile/internal/typecheck/iimport.go +++ b/src/cmd/compile/internal/typecheck/iimport.go @@ -303,7 +303,9 @@ func (r *importReader) doDecl(sym *types.Sym) *ir.Name { typ := r.typ() val := r.value(typ) - return importconst(r.p.ipkg, pos, sym, typ, val) + n := importconst(r.p.ipkg, pos, sym, typ, val) + r.constExt(n) + return n case 'F': typ := r.signature(nil) @@ -440,6 +442,15 @@ func (p *importReader) float(typ *types.Type) constant.Value { return constant.Make(&f) } +func (p *importReader) mprat(orig constant.Value) constant.Value { + if !p.bool() { + return orig + } + var rat big.Rat + rat.SetString(p.string()) + return constant.Make(&rat) +} + func (r *importReader) ident(selector bool) *types.Sym { name := r.string() if name == "" { @@ -641,7 +652,19 @@ func (r *importReader) byte() byte { // Compiler-specific extensions. -func (r *importReader) varExt(n ir.Node) { +func (r *importReader) constExt(n *ir.Name) { + switch n.Type() { + case types.UntypedFloat: + n.SetVal(r.mprat(n.Val())) + case types.UntypedComplex: + v := n.Val() + re := r.mprat(constant.Real(v)) + im := r.mprat(constant.Imag(v)) + n.SetVal(makeComplex(re, im)) + } +} + +func (r *importReader) varExt(n *ir.Name) { r.linkname(n.Sym()) r.symIdx(n.Sym()) } diff --git a/src/cmd/compile/internal/walk/assign.go b/src/cmd/compile/internal/walk/assign.go index 6e8075a35f..230b544148 100644 --- a/src/cmd/compile/internal/walk/assign.go +++ b/src/cmd/compile/internal/walk/assign.go @@ -289,11 +289,14 @@ func ascompatee(op ir.Op, nl, nr []ir.Node) []ir.Node { } var assigned ir.NameSet - var memWrite bool + var memWrite, deferResultWrite bool // affected reports whether expression n could be affected by // the assignments applied so far. affected := func(n ir.Node) bool { + if deferResultWrite { + return true + } return ir.Any(n, func(n ir.Node) bool { if n.Op() == ir.ONAME && assigned.Has(n.(*ir.Name)) { return true @@ -369,21 +372,40 @@ func ascompatee(op ir.Op, nl, nr []ir.Node) []ir.Node { appendWalkStmt(&late, convas(ir.NewAssignStmt(base.Pos, lorig, r), &late)) - if name != nil && ir.IsBlank(name) { - // We can ignore assignments to blank. + // Check for reasons why we may need to compute later expressions + // before this assignment happens. + + if name == nil { + // Not a direct assignment to a declared variable. + // Conservatively assume any memory access might alias. + memWrite = true continue } - if op == ir.ORETURN && types.OrigSym(name.Sym()) == nil { - // We can also ignore assignments to anonymous result - // parameters. These can't appear in expressions anyway. + + if name.Class == ir.PPARAMOUT && ir.CurFunc.HasDefer() { + // Assignments to a result parameter in a function with defers + // becomes visible early if evaluation of any later expression + // panics (#43835). + deferResultWrite = true continue } - if name != nil && name.OnStack() && !name.Addrtaken() { - assigned.Add(name) - } else { + if sym := types.OrigSym(name.Sym()); sym == nil || sym.IsBlank() { + // We can ignore assignments to blank or anonymous result parameters. + // These can't appear in expressions anyway. + continue + } + + if name.Addrtaken() || !name.OnStack() { + // Global variable, heap escaped, or just addrtaken. + // Conservatively assume any memory access might alias. memWrite = true + continue } + + // Local, non-addrtaken variable. + // Assignments can only alias with direct uses of this variable. + assigned.Add(name) } early.Append(late.Take()...) diff --git a/src/cmd/go/internal/work/action.go b/src/cmd/go/internal/work/action.go index b071ed1400..9d141ae233 100644 --- a/src/cmd/go/internal/work/action.go +++ b/src/cmd/go/internal/work/action.go @@ -57,9 +57,6 @@ type Builder struct { id sync.Mutex toolIDCache map[string]string // tool name -> tool ID buildIDCache map[string]string // file name -> build ID - - cgoEnvOnce sync.Once - cgoEnvCache []string } // NOTE: Much of Action would not need to be exported if not for test. diff --git a/src/cmd/go/internal/work/exec.go b/src/cmd/go/internal/work/exec.go index cacb4c05df..422e83c224 100644 --- a/src/cmd/go/internal/work/exec.go +++ b/src/cmd/go/internal/work/exec.go @@ -1165,7 +1165,10 @@ func (b *Builder) vet(ctx context.Context, a *Action) error { } // TODO(rsc): Why do we pass $GCCGO to go vet? - env := b.cgoEnv() + env := b.cCompilerEnv() + if cfg.BuildToolchainName == "gccgo" { + env = append(env, "GCCGO="+BuildToolchain.compiler()) + } p := a.Package tool := VetTool @@ -2111,24 +2114,6 @@ func (b *Builder) cCompilerEnv() []string { return []string{"TERM=dumb"} } -// cgoEnv returns environment variables to set when running cgo. -// Some of these pass through to cgo running the C compiler, -// so it includes cCompilerEnv. -func (b *Builder) cgoEnv() []string { - b.cgoEnvOnce.Do(func() { - cc, err := exec.LookPath(b.ccExe()[0]) - if err != nil || filepath.Base(cc) == cc { // reject relative path - cc = "/missing-cc" - } - gccgo := GccgoBin - if filepath.Base(gccgo) == gccgo { // reject relative path - gccgo = "/missing-gccgo" - } - b.cgoEnvCache = append(b.cCompilerEnv(), "CC="+cc, "GCCGO="+gccgo) - }) - return b.cgoEnvCache -} - // mkdir makes the named directory. func (b *Builder) Mkdir(dir string) error { // Make Mkdir(a.Objdir) a no-op instead of an error when a.Objdir == "". @@ -2729,7 +2714,7 @@ func (b *Builder) cgo(a *Action, cgoExe, objdir string, pcCFLAGS, pcLDFLAGS, cgo // along to the host linker. At this point in the code, cgoLDFLAGS // consists of the original $CGO_LDFLAGS (unchecked) and all the // flags put together from source code (checked). - cgoenv := b.cgoEnv() + cgoenv := b.cCompilerEnv() if len(cgoLDFLAGS) > 0 { flags := make([]string, len(cgoLDFLAGS)) for i, f := range cgoLDFLAGS { @@ -2966,7 +2951,7 @@ func (b *Builder) dynimport(a *Action, p *load.Package, objdir, importGo, cgoExe if p.Standard && p.ImportPath == "runtime/cgo" { cgoflags = []string{"-dynlinker"} // record path to dynamic linker } - return b.run(a, base.Cwd, p.ImportPath, b.cgoEnv(), cfg.BuildToolexec, cgoExe, "-dynpackage", p.Name, "-dynimport", dynobj, "-dynout", importGo, cgoflags) + return b.run(a, base.Cwd, p.ImportPath, b.cCompilerEnv(), cfg.BuildToolexec, cgoExe, "-dynpackage", p.Name, "-dynimport", dynobj, "-dynout", importGo, cgoflags) } // Run SWIG on all SWIG input files. diff --git a/src/cmd/go/internal/work/gc.go b/src/cmd/go/internal/work/gc.go index cc4e2b2b2b..3205fcbffc 100644 --- a/src/cmd/go/internal/work/gc.go +++ b/src/cmd/go/internal/work/gc.go @@ -129,7 +129,11 @@ func (gcToolchain) gc(b *Builder, a *Action, archive string, importcfg, embedcfg } } - args := []interface{}{cfg.BuildToolexec, base.Tool("compile"), "-o", ofile, "-trimpath", a.trimpath(), gcflags, gcargs, "-D", p.Internal.LocalPrefix} + args := []interface{}{cfg.BuildToolexec, base.Tool("compile"), "-o", ofile, "-trimpath", a.trimpath(), gcflags, gcargs} + if p.Internal.LocalPrefix != "" { + // Workaround #43883. + args = append(args, "-D", p.Internal.LocalPrefix) + } if importcfg != nil { if err := b.writeFile(objdir+"importcfg", importcfg); err != nil { return "", nil, err diff --git a/src/cmd/go/testdata/script/cgo_path.txt b/src/cmd/go/testdata/script/cgo_path.txt index 0d15998426..be9609e86f 100644 --- a/src/cmd/go/testdata/script/cgo_path.txt +++ b/src/cmd/go/testdata/script/cgo_path.txt @@ -1,12 +1,20 @@ [!cgo] skip +# Set CC explicitly to something that requires a PATH lookup. +# Normally, the default is gcc or clang, but if CC was set during make.bash, +# that becomes the default. +[exec:clang] env CC=clang +[exec:gcc] env CC=gcc +[!exec:clang] [!exec:gcc] skip 'Unknown C compiler' + env GOCACHE=$WORK/gocache # Looking for compile flags, so need a clean cache. [!windows] env PATH=.:$PATH -[!windows] chmod 0777 p/gcc p/clang +[!windows] chmod 0755 p/gcc p/clang [!windows] exists -exec p/gcc p/clang [windows] exists -exec p/gcc.bat p/clang.bat ! exists p/bug.txt -go build -x +! go build -x +stderr '^cgo: exec (clang|gcc): (clang|gcc) resolves to executable relative to current directory \(.[/\\](clang|gcc)(.bat)?\)$' ! exists p/bug.txt -- go.mod -- diff --git a/src/cmd/go/testdata/script/cgo_path_space.txt b/src/cmd/go/testdata/script/cgo_path_space.txt new file mode 100644 index 0000000000..654295dc69 --- /dev/null +++ b/src/cmd/go/testdata/script/cgo_path_space.txt @@ -0,0 +1,56 @@ +# Check that if the PATH directory containing the C compiler has a space, +# we can still use that compiler with cgo. +# Verifies #43808. +[!cgo] skip + +# Set CC explicitly to something that requires a PATH lookup. +# Normally, the default is gcc or clang, but if CC was set during make.bash, +# that becomes the default. +[exec:clang] env CC=clang +[exec:gcc] env CC=gcc +[!exec:clang] [!exec:gcc] skip 'Unknown C compiler' + +[!windows] chmod 0755 $WORK/'program files'/clang +[!windows] chmod 0755 $WORK/'program files'/gcc +[!windows] exists -exec $WORK/'program files'/clang +[!windows] exists -exec $WORK/'program files'/gcc +[!windows] env PATH=$WORK/'program files':$PATH +[windows] exists -exec $WORK/'program files'/gcc.bat +[windows] exists -exec $WORK/'program files'/clang.bat +[windows] env PATH=$WORK\'program files';%PATH% + +! exists $WORK/log.txt +? go build -x +exists $WORK/log.txt +rm $WORK/log.txt + +# TODO(#41400, #43078): when CC is set explicitly, it should be allowed to +# contain spaces separating arguments, and it should be possible to quote +# arguments with spaces (including the path), as in CGO_CFLAGS and other +# variables. For now, this doesn't work. +[!windows] env CC=$WORK/'program files'/gcc +[windows] env CC=$WORK\'program files'\gcc.bat +! go build -x +! exists $WORK/log.txt + +-- go.mod -- +module m + +-- m.go -- +package m + +// #define X 1 +import "C" + +-- $WORK/program files/gcc -- +#!/bin/sh + +echo ok >$WORK/log.txt +-- $WORK/program files/clang -- +#!/bin/sh + +echo ok >$WORK/log.txt +-- $WORK/program files/gcc.bat -- +echo ok >%WORK%\log.txt +-- $WORK/program files/clang.bat -- +echo ok >%WORK%\log.txt diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 1a359f1921..70072cfba4 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -280,7 +280,7 @@ func MOVCONST(d int64, s int, rt int) uint32 { const ( // Optab.flag LFROM = 1 << 0 // p.From uses constant pool - LFROM3 = 1 << 1 // p.From3 uses constant pool + LFROM128 = 1 << 1 // p.From3<<64+p.From forms a 128-bit constant in literal pool LTO = 1 << 2 // p.To uses constant pool NOTUSETMP = 1 << 3 // p expands to multiple instructions, but does NOT use REGTMP ) @@ -419,7 +419,7 @@ var optab = []Optab{ {AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, 34, 8, REGSP, LFROM, 0}, // Move a large constant to a vector register. - {AVMOVQ, C_VCON, C_NONE, C_VCON, C_VREG, 101, 4, 0, LFROM | LFROM3, 0}, + {AVMOVQ, C_VCON, C_NONE, C_VCON, C_VREG, 101, 4, 0, LFROM128, 0}, {AVMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, {AVMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, @@ -995,8 +995,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if o.flag&LFROM != 0 { c.addpool(p, &p.From) } - if o.flag&LFROM3 != 0 { - c.addpool(p, p.GetFrom3()) + if o.flag&LFROM128 != 0 { + c.addpool128(p, &p.From, p.GetFrom3()) } if o.flag<O != 0 { c.addpool(p, &p.To) @@ -1201,6 +1201,36 @@ func (c *ctxt7) flushpool(p *obj.Prog, skip int) { } } +// addpool128 adds a 128-bit constant to literal pool by two consecutive DWORD +// instructions, the 128-bit constant is formed by ah.Offset<<64+al.Offset. +func (c *ctxt7) addpool128(p *obj.Prog, al, ah *obj.Addr) { + lit := al.Offset + q := c.newprog() + q.As = ADWORD + q.To.Type = obj.TYPE_CONST + q.To.Offset = lit + q.Pc = int64(c.pool.size) + + lit = ah.Offset + t := c.newprog() + t.As = ADWORD + t.To.Type = obj.TYPE_CONST + t.To.Offset = lit + t.Pc = int64(c.pool.size + 8) + q.Link = t + + if c.blitrl == nil { + c.blitrl = q + c.pool.start = uint32(p.Pc) + } else { + c.elitrl.Link = q + } + + c.elitrl = t + c.pool.size += 16 + p.Pool = q +} + /* * MOVD foo(SB), R is actually * MOVD addr, REGTMP diff --git a/src/cmd/internal/obj/arm64/asm_test.go b/src/cmd/internal/obj/arm64/asm_arm64_test.go index 9efdb0217f..c6a00f5b94 100644 --- a/src/cmd/internal/obj/arm64/asm_test.go +++ b/src/cmd/internal/obj/arm64/asm_arm64_test.go @@ -47,7 +47,7 @@ func TestLarge(t *testing.T) { // assemble generated file cmd := exec.Command(testenv.GoToolPath(t), "tool", "asm", "-S", "-o", filepath.Join(dir, "test.o"), tmpfile) - cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux") + cmd.Env = append(os.Environ(), "GOOS=linux") out, err := cmd.CombinedOutput() if err != nil { t.Errorf("Assemble failed: %v, output: %s", err, out) @@ -62,7 +62,7 @@ func TestLarge(t *testing.T) { // build generated file cmd = exec.Command(testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile) - cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux") + cmd.Env = append(os.Environ(), "GOOS=linux") out, err = cmd.CombinedOutput() if err != nil { t.Errorf("Build failed: %v, output: %s", err, out) @@ -96,7 +96,7 @@ func TestNoRet(t *testing.T) { t.Fatal(err) } cmd := exec.Command(testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile) - cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux") + cmd.Env = append(os.Environ(), "GOOS=linux") if out, err := cmd.CombinedOutput(); err != nil { t.Errorf("%v\n%s", err, out) } @@ -134,7 +134,7 @@ func TestPCALIGN(t *testing.T) { t.Fatal(err) } cmd := exec.Command(testenv.GoToolPath(t), "tool", "asm", "-S", "-o", tmpout, tmpfile) - cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux") + cmd.Env = append(os.Environ(), "GOOS=linux") out, err := cmd.CombinedOutput() if err != nil { t.Errorf("The %s build failed: %v, output: %s", test.name, err, out) @@ -150,3 +150,13 @@ func TestPCALIGN(t *testing.T) { } } } + +func testvmovq() (r1, r2 uint64) + +// TestVMOVQ checks if the arm64 VMOVQ instruction is working properly. +func TestVMOVQ(t *testing.T) { + a, b := testvmovq() + if a != 0x7040201008040201 || b != 0x3040201008040201 { + t.Errorf("TestVMOVQ got: a=0x%x, b=0x%x, want: a=0x7040201008040201, b=0x3040201008040201", a, b) + } +} diff --git a/src/cmd/internal/obj/arm64/asm_arm64_test.s b/src/cmd/internal/obj/arm64/asm_arm64_test.s new file mode 100644 index 0000000000..9d337a4fd1 --- /dev/null +++ b/src/cmd/internal/obj/arm64/asm_arm64_test.s @@ -0,0 +1,14 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// testvmovq() (r1, r2 uint64) +TEXT ·testvmovq(SB), NOSPLIT, $0-16 + VMOVQ $0x7040201008040201, $0x3040201008040201, V1 + VMOV V1.D[0], R0 + VMOV V1.D[1], R1 + MOVD R0, r1+0(FP) + MOVD R1, r2+8(FP) + RET |