[dev.typeparams] all: merge dev.regabi (5e4a0cd) into dev.typeparams

Merge List: + 2021-01-25 5e4a0cdde3 [dev.regabi] all: merge master (bf0f7c9) into dev.regabi + 2021-01-25 bf0f7c9d78 doc/go1.16: mention os.DirFS in os section + 2021-01-25 deaf29a8a8 cmd/compile: fix order-of-assignment issue w/ defers + 2021-01-25 ad2ca26a52 doc/go1.16: mention os.DirEntry and types moved from os to io/fs + 2021-01-25 a51921fa5b doc/go1.16: mention new testing/iotest functions + 2021-01-25 e6b6d107f7 doc/go1.16: mention deprecation of io/ioutil + 2021-01-25 7eaaf28cae [dev.regabi] cmd/compile: disallow taking address of SSA'd values + 2021-01-25 96a276363b doc/go1.16: mention go/build changes + 2021-01-25 3d85c69a0b html/template: revert "avoid race when escaping updates template" + 2021-01-25 54514c6b28 cmd/go: fix TestScript/cgo_path, cgo_path_space when CC set + 2021-01-25 6f5e79f470 [dev.regabi] cmd/compile/internal: specify memory layout + 2021-01-25 cabffc199d [dev.regabi] cmd/compile/internal: add internal ABI specification + 2021-01-25 6de8443f3b doc/asm: add a section on go_asm.h, clean up go_tls.h section + 2021-01-25 6a4739ccc5 [dev.regabi] cmd/compile: enable rational constant arithmetic + 2021-01-25 be9612a832 [dev.regabi] os: disable TestDirFS until #42637 is fixed + 2021-01-25 8ee3d39838 [dev.regabi] cmd/go: workaround -race issue on ppc64le + 2021-01-25 54b251f542 lib/time, time/tzdata: update tzdata to 2021a + 2021-01-25 5a76c3d548 [dev.regabi] cmd/compile: modify abiutils for recently updated ABI + 2021-01-25 ff82cc971a os: force consistent mtime before running fstest on directory on Windows + 2021-01-25 044f937a73 doc/go1.16: fix WalkDir and Walk links + 2021-01-23 b634f5d97a doc/go1.16: add crypto/x509 memory optimization + 2021-01-23 9897655c61 doc/go1.16: reword ambiguously parsable sentence + 2021-01-23 cd99385ff4 cmd/internal/obj/arm64: fix VMOVQ instruction encoding error + 2021-01-23 66ee8b158f runtime: restore cgo_import_dynamic for libc.so on openbsd + 2021-01-22 25c39e4fb5 io/ioutil: fix example test for WriteFile to allow it to run in the playground + 2021-01-22 eb21b31e48 runtime: define dummy msanmove + 2021-01-22 3a778ff50f runtime: check for g0 stack last in signal handler + 2021-01-22 a2cef9b544 cmd/go: don't lookup the path for CC when invoking cgo Change-Id: Iede4f98ba5ddbee2e16075d20186f8a9c095e378
author: Matthew Dempsky <mdempsky@google.com> 2021-01-25 17:53:08 -0800
committer: Matthew Dempsky <mdempsky@google.com> 2021-01-25 17:53:50 -0800
commit: 34704e374f08ea126786b7d454fc9b647663f95a (patch)
tree: ca17b071c0b292023c5914fabd5de6576bf47ee0 /src/cmd
parent: c97af0036b8cd8ab2a7ed3f68c3ba72968637e4d (diff)
parent: 5e4a0cdde3672b9c774564c428058858d09795ea (diff)
download: go-34704e374f08ea126786b7d454fc9b647663f95a.tar.gz
go-34704e374f08ea126786b7d454fc9b647663f95a.zip
17 files changed, 1120 insertions, 191 deletions
diff --git a/src/cmd/compile/internal-abi.md b/src/cmd/compile/internal-abi.md
new file mode 100644
index 0000000000..f4ef2cc869
--- /dev/null
+++ b/src/cmd/compile/internal-abi.md
@@ -0,0 +1,628 @@
+# Go internal ABI specification
+
+This document describes Go’s internal application binary interface
+(ABI), known as ABIInternal.
+Go's ABI defines the layout of data in memory and the conventions for
+calling between Go functions.
+This ABI is *unstable* and will change between Go versions.
+If you’re writing assembly code, please instead refer to Go’s
+[assembly documentation](/doc/asm.html), which describes Go’s stable
+ABI, known as ABI0.
+
+All functions defined in Go source follow ABIInternal.
+However, ABIInternal and ABI0 functions are able to call each other
+through transparent *ABI wrappers*, described in the [internal calling
+convention proposal](https://golang.org/design/27539-internal-abi).
+
+Go uses a common ABI design across all architectures.
+We first describe the common ABI, and then cover per-architecture
+specifics.
+
+*Rationale*: For the reasoning behind using a common ABI across
+architectures instead of the platform ABI, see the [register-based Go
+calling convention proposal](https://golang.org/design/40724-register-calling).
+
+## Memory layout
+
+Go's built-in types have the following sizes and alignments.
+Many, though not all, of these sizes are guaranteed by the [language
+specification](/doc/go_spec.html#Size_and_alignment_guarantees).
+Those that aren't guaranteed may change in future versions of Go (for
+example, we've considered changing the alignment of int64 on 32-bit).
+
+| Type | 64-bit |       | 32-bit |       |
+| ---  | ---    | ---   | ---    | ---   |
+|      | Size   | Align | Size   | Align |
+| bool, uint8, int8  | 1  | 1 | 1  | 1 |
+| uint16, int16      | 2  | 2 | 2  | 2 |
+| uint32, int32      | 4  | 4 | 4  | 4 |
+| uint64, int64      | 8  | 8 | 8  | 4 |
+| int, uint          | 8  | 8 | 4  | 4 |
+| float32            | 4  | 4 | 4  | 4 |
+| float64            | 8  | 8 | 8  | 4 |
+| complex64          | 8  | 4 | 8  | 4 |
+| complex128         | 16 | 8 | 16 | 4 |
+| uintptr, *T, unsafe.Pointer | 8 | 8 | 4 | 4 |
+
+The types `byte` and `rune` are aliases for `uint8` and `int32`,
+respectively, and hence have the same size and alignment as these
+types.
+
+The layout of `map`, `chan`, and `func` types is equivalent to *T.
+
+To describe the layout of the remaining composite types, we first
+define the layout of a *sequence* S of N fields with types
+t<sub>1</sub>, t<sub>2</sub>, ..., t<sub>N</sub>.
+We define the byte offset at which each field begins relative to a
+base address of 0, as well as the size and alignment of the sequence
+as follows:
+
+```
+offset(S, i) = 0  if i = 1
+             = align(offset(S, i-1) + sizeof(t_(i-1)), alignof(t_i))
+alignof(S)   = 1  if N = 0
+             = max(alignof(t_i) | 1 <= i <= N)
+sizeof(S)    = 0  if N = 0
+             = align(offset(S, N) + sizeof(t_N), alignof(S))
+```
+
+Where sizeof(T) and alignof(T) are the size and alignment of type T,
+respectively, and align(x, y) rounds x up to a multiple of y.
+
+The `interface{}` type is a sequence of 1. a pointer to the runtime type
+description for the interface's dynamic type and 2. an `unsafe.Pointer`
+data field.
+Any other interface type (besides the empty interface) is a sequence
+of 1. a pointer to the runtime "itab" that gives the method pointers and
+the type of the data field and 2. an `unsafe.Pointer` data field.
+An interface can be "direct" or "indirect" depending on the dynamic
+type: a direct interface stores the value directly in the data field,
+and an indirect interface stores a pointer to the value in the data
+field.
+An interface can only be direct if the value consists of a single
+pointer word.
+
+An array type `[N]T` is a sequence of N fields of type T.
+
+The slice type `[]T` is a sequence of a `*[cap]T` pointer to the slice
+backing store, an `int` giving the `len` of the slice, and an `int`
+giving the `cap` of the slice.
+
+The `string` type is a sequence of a `*[len]byte` pointer to the
+string backing store, and an `int` giving the `len` of the string.
+
+A struct type `struct { f1 t1; ...; fM tM }` is laid out as the
+sequence t1, ..., tM, tP, where tP is either:
+
+- Type `byte` if sizeof(tM) = 0 and any of sizeof(t*i*) ≠ 0.
+- Empty (size 0 and align 1) otherwise.
+
+The padding byte prevents creating a past-the-end pointer by taking
+the address of the final, empty fN field.
+
+Note that user-written assembly code should generally not depend on Go
+type layout and should instead use the constants defined in
+[`go_asm.h`](/doc/asm.html#data-offsets).
+
+## Function call argument and result passing
+
+Function calls pass arguments and results using a combination of the
+stack and machine registers.
+Each argument or result is passed either entirely in registers or
+entirely on the stack.
+Because access to registers is generally faster than access to the
+stack, arguments and results are preferentially passed in registers.
+However, any argument or result that contains a non-trivial array or
+does not fit entirely in the remaining available registers is passed
+on the stack.
+
+Each architecture defines a sequence of integer registers and a
+sequence of floating-point registers.
+At a high level, arguments and results are recursively broken down
+into values of base types and these base values are assigned to
+registers from these sequences.
+
+Arguments and results can share the same registers, but do not share
+the same stack space.
+Beyond the arguments and results passed on the stack, the caller also
+reserves spill space on the stack for all register-based arguments
+(but does not populate this space).
+
+The receiver, arguments, and results of function or method F are
+assigned to registers or the stack using the following algorithm:
+
+1. Let NI and NFP be the length of integer and floating-point register
+   sequences defined by the architecture.
+   Let I and FP be 0; these are the indexes of the next integer and
+   floating-pointer register.
+   Let S, the type sequence defining the stack frame, be empty.
+1. If F is a method, assign F’s receiver.
+1. For each argument A of F, assign A.
+1. Add a pointer-alignment field to S. This has size 0 and the same
+   alignment as `uintptr`.
+1. Reset I and FP to 0.
+1. For each result R of F, assign R.
+1. Add a pointer-alignment field to S.
+1. For each register-assigned receiver and argument of F, let T be its
+   type and add T to the stack sequence S.
+   This is the argument's (or receiver's) spill space and will be
+   uninitialized at the call.
+1. Add a pointer-alignment field to S.
+
+Assigning a receiver, argument, or result V of underlying type T works
+as follows:
+
+1. Remember I and FP.
+1. Try to register-assign V.
+1. If step 2 failed, reset I and FP to the values from step 1, add T
+   to the stack sequence S, and assign V to this field in S.
+
+Register-assignment of a value V of underlying type T works as follows:
+
+1. If T is a boolean or integral type that fits in an integer
+   register, assign V to register I and increment I.
+1. If T is an integral type that fits in two integer registers, assign
+   the least significant and most significant halves of V to registers
+   I and I+1, respectively, and increment I by 2
+1. If T is a floating-point type and can be represented without loss
+   of precision in a floating-point register, assign V to register FP
+   and increment FP.
+1. If T is a complex type, recursively register-assign its real and
+   imaginary parts.
+1. If T is a pointer type, map type, channel type, or function type,
+   assign V to register I and increment I.
+1. If T is a string type, interface type, or slice type, recursively
+   register-assign V’s components (2 for strings and interfaces, 3 for
+   slices).
+1. If T is a struct type, recursively register-assign each field of V.
+1. If T is an array type of length 0, do nothing.
+1. If T is an array type of length 1, recursively register-assign its
+   one element.
+1. If T is an array type of length > 1, fail.
+1. If I > NI or FP > NFP, fail.
+1. If any recursive assignment above fails, fail.
+
+The above algorithm produces an assignment of each receiver, argument,
+and result to registers or to a field in the stack sequence.
+The final stack sequence looks like: stack-assigned receiver,
+stack-assigned arguments, pointer-alignment, stack-assigned results,
+pointer-alignment, spill space for each register-assigned argument,
+pointer-alignment.
+The following diagram shows what this stack frame looks like on the
+stack, using the typical convention where address 0 is at the bottom:
+
+    +------------------------------+
+    |             . . .            |
+    | 2nd reg argument spill space |
+    | 1st reg argument spill space |
+    | <pointer-sized alignment>    |
+    |             . . .            |
+    | 2nd stack-assigned result    |
+    | 1st stack-assigned result    |
+    | <pointer-sized alignment>    |
+    |             . . .            |
+    | 2nd stack-assigned argument  |
+    | 1st stack-assigned argument  |
+    | stack-assigned receiver      |
+    +------------------------------+ ↓ lower addresses
+
+To perform a call, the caller reserves space starting at the lowest
+address in its stack frame for the call stack frame, stores arguments
+in the registers and argument stack fields determined by the above
+algorithm, and performs the call.
+At the time of a call, spill space, result stack fields, and result
+registers are left uninitialized.
+Upon return, the callee must have stored results to all result
+registers and result stack fields determined by the above algorithm.
+
+There are no callee-save registers, so a call may overwrite any
+register that doesn’t have a fixed meaning, including argument
+registers.
+
+### Example
+
+Consider the function `func f(a1 uint8, a2 [2]uintptr, a3 uint8) (r1
+struct { x uintptr; y [2]uintptr }, r2 string)` on a 64-bit
+architecture with hypothetical integer registers R0–R9.
+
+On entry, `a1` is assigned to `R0`, `a3` is assigned to `R1` and the
+stack frame is laid out in the following sequence:
+
+    a2      [2]uintptr
+    r1.x    uintptr
+    r1.y    [2]uintptr
+    a1Spill uint8
+    a2Spill uint8
+    _       [6]uint8  // alignment padding
+
+In the stack frame, only the `a2` field is initialized on entry; the
+rest of the frame is left uninitialized.
+
+On exit, `r2.base` is assigned to `R0`, `r2.len` is assigned to `R1`,
+and `r1.x` and `r1.y` are initialized in the stack frame.
+
+There are several things to note in this example.
+First, `a2` and `r1` are stack-assigned because they contain arrays.
+The other arguments and results are register-assigned.
+Result `r2` is decomposed into its components, which are individually
+register-assigned.
+On the stack, the stack-assigned arguments appear at lower addresses
+than the stack-assigned results, which appear at lower addresses than
+the argument spill area.
+Only arguments, not results, are assigned a spill area on the stack.
+
+### Rationale
+
+Each base value is assigned to its own register to optimize
+construction and access.
+An alternative would be to pack multiple sub-word values into
+registers, or to simply map an argument's in-memory layout to
+registers (this is common in C ABIs), but this typically adds cost to
+pack and unpack these values.
+Modern architectures have more than enough registers to pass all
+arguments and results this way for nearly all functions (see the
+appendix), so there’s little downside to spreading base values across
+registers.
+
+Arguments that can’t be fully assigned to registers are passed
+entirely on the stack in case the callee takes the address of that
+argument.
+If an argument could be split across the stack and registers and the
+callee took its address, it would need to be reconstructed in memory,
+a process that would be proportional to the size of the argument.
+
+Non-trivial arrays are always passed on the stack because indexing
+into an array typically requires a computed offset, which generally
+isn’t possible with registers.
+Arrays in general are rare in function signatures (only 0.7% of
+functions in the Go 1.15 standard library and 0.2% in kubelet).
+We considered allowing array fields to be passed on the stack while
+the rest of an argument’s fields are passed in registers, but this
+creates the same problems as other large structs if the callee takes
+the address of an argument, and would benefit <0.1% of functions in
+kubelet (and even these very little).
+
+We make exceptions for 0 and 1-element arrays because these don’t
+require computed offsets, and 1-element arrays are already decomposed
+in the compiler’s SSA representation.
+
+The ABI assignment algorithm above is equivalent to Go’s stack-based
+ABI0 calling convention if there are zero architecture registers.
+This is intended to ease the transition to the register-based internal
+ABI and make it easy for the compiler to generate either calling
+convention.
+An architecture may still define register meanings that aren’t
+compatible with ABI0, but these differences should be easy to account
+for in the compiler.
+
+The algorithm reserves spill space for arguments in the caller’s frame
+so that the compiler can generate a stack growth path that spills into
+this reserved space.
+If the callee has to grow the stack, it may not be able to reserve
+enough additional stack space in its own frame to spill these, which
+is why it’s important that the caller do so.
+These slots also act as the home location if these arguments need to
+be spilled for any other reason, which simplifies traceback printing.
+
+There are several options for how to lay out the argument spill space.
+We chose to lay out each argument according to its type's usual memory
+layout but to separate the spill space from the regular argument
+space.
+Using the usual memory layout simplifies the compiler because it
+already understands this layout.
+Also, if a function takes the address of a register-assigned argument,
+the compiler must spill that argument to memory in its usual memory
+layout and it's more convenient to use the argument spill space for
+this purpose.
+
+Alternatively, the spill space could be structured around argument
+registers.
+In this approach, the stack growth spill path would spill each
+argument register to a register-sized stack word.
+However, if the function takes the address of a register-assigned
+argument, the compiler would have to reconstruct it in memory layout
+elsewhere on the stack.
+
+The spill space could also be interleaved with the stack-assigned
+arguments so the arguments appear in order whether they are register-
+or stack-assigned.
+This would be close to ABI0, except that register-assigned arguments
+would be uninitialized on the stack and there's no need to reserve
+stack space for register-assigned results.
+We expect separating the spill space to perform better because of
+memory locality.
+Separating the space is also potentially simpler for `reflect` calls
+because this allows `reflect` to summarize the spill space as a single
+number.
+Finally, the long-term intent is to remove reserved spill slots
+entirely – allowing most functions to be called without any stack
+setup and easing the introduction of callee-save registers – and
+separating the spill space makes that transition easier.
+
+## Closures
+
+A func value (e.g., `var x func()`) is a pointer to a closure object.
+A closure object begins with a pointer-sized program counter
+representing the entry point of the function, followed by zero or more
+bytes containing the closed-over environment.
+
+Closure calls follow the same conventions as static function and
+method calls, with one addition. Each architecture specifies a
+*closure context pointer* register and calls to closures store the
+address of the closure object in the closure context pointer register
+prior to the call.
+
+## Software floating-point mode
+
+In "softfloat" mode, the ABI simply treats the hardware as having zero
+floating-point registers.
+As a result, any arguments containing floating-point values will be
+passed on the stack.
+
+*Rationale*: Softfloat mode is about compatibility over performance
+and is not commonly used.
+Hence, we keep the ABI as simple as possible in this case, rather than
+adding additional rules for passing floating-point values in integer
+registers.
+
+## Architecture specifics
+
+This section describes per-architecture register mappings, as well as
+other per-architecture special cases.
+
+### amd64 architecture
+
+The amd64 architecture uses the following sequence of 9 registers for
+integer arguments and results:
+
+    RAX, RBX, RCX, RDI, RSI, R8, R9, R10, R11
+
+It uses X0 – X14 for floating-point arguments and results.
+
+*Rationale*: These sequences are chosen from the available registers
+to be relatively easy to remember.
+
+Registers R12 and R13 are permanent scratch registers.
+R15 is a scratch register except in dynamically linked binaries.
+
+*Rationale*: Some operations such as stack growth and reflection calls
+need dedicated scratch registers in order to manipulate call frames
+without corrupting arguments or results.
+
+Special-purpose registers are as follows:
+
+| Register | Call meaning | Body meaning |
+| --- | --- | --- |
+| RSP | Stack pointer | Fixed |
+| RBP | Frame pointer | Fixed |
+| RDX | Closure context pointer | Scratch |
+| R12 | None | Scratch |
+| R13 | None | Scratch |
+| R14 | Current goroutine | Scratch |
+| R15 | GOT reference temporary | Fixed if dynlink |
+| X15 | Zero value | Fixed |
+
+TODO: We may start with the existing TLS-based g and move to R14
+later.
+
+*Rationale*: These register meanings are compatible with Go’s
+stack-based calling convention except for R14 and X15, which will have
+to be restored on transitions from ABI0 code to ABIInternal code.
+In ABI0, these are undefined, so transitions from ABIInternal to ABI0
+can ignore these registers.
+
+*Rationale*: For the current goroutine pointer, we chose a register
+that requires an additional REX byte.
+While this adds one byte to every function prologue, it is hardly ever
+accessed outside the function prologue and we expect making more
+single-byte registers available to be a net win.
+
+*Rationale*: We designate X15 as a fixed zero register because
+functions often have to bulk zero their stack frames, and this is more
+efficient with a designated zero register.
+
+#### Stack layout
+
+The stack pointer, RSP, grows down and is always aligned to 8 bytes.
+
+The amd64 architecture does not use a link register.
+
+A function's stack frame is laid out as follows:
+
+    +------------------------------+
+    | return PC                    |
+    | RBP on entry                 |
+    | ... locals ...               |
+    | ... outgoing arguments ...   |
+    +------------------------------+ ↓ lower addresses
+
+The "return PC" is pushed as part of the standard amd64 `CALL`
+operation.
+On entry, a function subtracts from RSP to open its stack frame and
+saves the value of RBP directly below the return PC.
+A leaf function that does not require any stack space may omit the
+saved RBP.
+
+The Go ABI's use of RBP as a frame pointer register is compatible with
+amd64 platform conventions so that Go can inter-operate with platform
+debuggers and profilers.
+
+#### Flags
+
+The direction flag (D) is always cleared (set to the “forward”
+direction) at a call.
+The arithmetic status flags are treated like scratch registers and not
+preserved across calls.
+All other bits in RFLAGS are system flags.
+
+The CPU is always in MMX technology state (not x87 mode).
+
+*Rationale*: Go on amd64 uses the XMM registers and never uses the x87
+registers, so it makes sense to assume the CPU is in MMX mode.
+Otherwise, any function that used the XMM registers would have to
+execute an EMMS instruction before calling another function or
+returning (this is the case in the SysV ABI).
+
+At calls, the MXCSR control bits are always set as follows:
+
+| Flag | Bit | Value | Meaning |
+| --- | --- | --- | --- |
+| FZ | 15 | 0 | Do not flush to zero |
+| RC | 14/13 | 0 (RN) | Round to nearest |
+| PM | 12 | 1 | Precision masked |
+| UM | 11 | 1 | Underflow masked |
+| OM | 10 | 1 | Overflow masked |
+| ZM | 9 | 1 | Divide-by-zero masked |
+| DM | 8 | 1 | Denormal operations masked |
+| IM | 7 | 1 | Invalid operations masked |
+| DAZ | 6 | 0 | Do not zero de-normals |
+
+The MXCSR status bits are callee-save.
+
+*Rationale*: Having a fixed MXCSR control configuration allows Go
+functions to use SSE operations without modifying or saving the MXCSR.
+Functions are allowed to modify it between calls (as long as they
+restore it), but as of this writing Go code never does.
+The above fixed configuration matches the process initialization
+control bits specified by the ELF AMD64 ABI.
+
+The x87 floating-point control word is not used by Go on amd64.
+
+## Future directions
+
+### Spill path improvements
+
+The ABI currently reserves spill space for argument registers so the
+compiler can statically generate an argument spill path before calling
+into `runtime.morestack` to grow the stack.
+This ensures there will be sufficient spill space even when the stack
+is nearly exhausted and keeps stack growth and stack scanning
+essentially unchanged from ABI0.
+
+However, this wastes stack space (the median wastage is 16 bytes per
+call), resulting in larger stacks and increased cache footprint.
+A better approach would be to reserve stack space only when spilling.
+One way to ensure enough space is available to spill would be for
+every function to ensure there is enough space for the function's own
+frame *as well as* the spill space of all functions it calls.
+For most functions, this would change the threshold for the prologue
+stack growth check.
+For `nosplit` functions, this would change the threshold used in the
+linker's static stack size check.
+
+Allocating spill space in the callee rather than the caller may also
+allow for faster reflection calls in the common case where a function
+takes only register arguments, since it would allow reflection to make
+these calls directly without allocating any frame.
+
+The statically-generated spill path also increases code size.
+It is possible to instead have a generic spill path in the runtime, as
+part of `morestack`.
+However, this complicates reserving the spill space, since spilling
+all possible register arguments would, in most cases, take
+significantly more space than spilling only those used by a particular
+function.
+Some options are to spill to a temporary space and copy back only the
+registers used by the function, or to grow the stack if necessary
+before spilling to it (using a temporary space if necessary), or to
+use a heap-allocated space if insufficient stack space is available.
+These options all add enough complexity that we will have to make this
+decision based on the actual code size growth caused by the static
+spill paths.
+
+### Clobber sets
+
+As defined, the ABI does not use callee-save registers.
+This significantly simplifies the garbage collector and the compiler's
+register allocator, but at some performance cost.
+A potentially better balance for Go code would be to use *clobber
+sets*: for each function, the compiler records the set of registers it
+clobbers (including those clobbered by functions it calls) and any
+register not clobbered by function F can remain live across calls to
+F.
+
+This is generally a good fit for Go because Go's package DAG allows
+function metadata like the clobber set to flow up the call graph, even
+across package boundaries.
+Clobber sets would require relatively little change to the garbage
+collector, unlike general callee-save registers.
+One disadvantage of clobber sets over callee-save registers is that
+they don't help with indirect function calls or interface method
+calls, since static information isn't available in these cases.
+
+### Large aggregates
+
+Go encourages passing composite values by value, and this simplifies
+reasoning about mutation and races.
+However, this comes at a performance cost for large composite values.
+It may be possible to instead transparently pass large composite
+values by reference and delay copying until it is actually necessary.
+
+## Appendix: Register usage analysis
+
+In order to understand the impacts of the above design on register
+usage, we
+[analyzed](https://github.com/aclements/go-misc/tree/master/abi) the
+impact of the above ABI on a large code base: cmd/kubelet from
+[Kubernetes](https://github.com/kubernetes/kubernetes) at tag v1.18.8.
+
+The following table shows the impact of different numbers of available
+integer and floating-point registers on argument assignment:
+
+```
+|      |        |       |      stack args |          spills |     stack total |
+| ints | floats | % fit | p50 | p95 | p99 | p50 | p95 | p99 | p50 | p95 | p99 |
+|    0 |      0 |  6.3% |  32 | 152 | 256 |   0 |   0 |   0 |  32 | 152 | 256 |
+|    0 |      8 |  6.4% |  32 | 152 | 256 |   0 |   0 |   0 |  32 | 152 | 256 |
+|    1 |      8 | 21.3% |  24 | 144 | 248 |   8 |   8 |   8 |  32 | 152 | 256 |
+|    2 |      8 | 38.9% |  16 | 128 | 224 |   8 |  16 |  16 |  24 | 136 | 240 |
+|    3 |      8 | 57.0% |   0 | 120 | 224 |  16 |  24 |  24 |  24 | 136 | 240 |
+|    4 |      8 | 73.0% |   0 | 120 | 216 |  16 |  32 |  32 |  24 | 136 | 232 |
+|    5 |      8 | 83.3% |   0 | 112 | 216 |  16 |  40 |  40 |  24 | 136 | 232 |
+|    6 |      8 | 87.5% |   0 | 112 | 208 |  16 |  48 |  48 |  24 | 136 | 232 |
+|    7 |      8 | 89.8% |   0 | 112 | 208 |  16 |  48 |  56 |  24 | 136 | 232 |
+|    8 |      8 | 91.3% |   0 | 112 | 200 |  16 |  56 |  64 |  24 | 136 | 232 |
+|    9 |      8 | 92.1% |   0 | 112 | 192 |  16 |  56 |  72 |  24 | 136 | 232 |
+|   10 |      8 | 92.6% |   0 | 104 | 192 |  16 |  56 |  72 |  24 | 136 | 232 |
+|   11 |      8 | 93.1% |   0 | 104 | 184 |  16 |  56 |  80 |  24 | 128 | 232 |
+|   12 |      8 | 93.4% |   0 | 104 | 176 |  16 |  56 |  88 |  24 | 128 | 232 |
+|   13 |      8 | 94.0% |   0 |  88 | 176 |  16 |  56 |  96 |  24 | 128 | 232 |
+|   14 |      8 | 94.4% |   0 |  80 | 152 |  16 |  64 | 104 |  24 | 128 | 232 |
+|   15 |      8 | 94.6% |   0 |  80 | 152 |  16 |  64 | 112 |  24 | 128 | 232 |
+|   16 |      8 | 94.9% |   0 |  16 | 152 |  16 |  64 | 112 |  24 | 128 | 232 |
+|    ∞ |      8 | 99.8% |   0 |   0 |   0 |  24 | 112 | 216 |  24 | 120 | 216 |
+```
+
+The first two columns show the number of available integer and
+floating-point registers.
+The first row shows the results for 0 integer and 0 floating-point
+registers, which is equivalent to ABI0.
+We found that any reasonable number of floating-point registers has
+the same effect, so we fixed it at 8 for all other rows.
+
+The “% fit” column gives the fraction of functions where all arguments
+and results are register-assigned and no arguments are passed on the
+stack.
+The three “stack args” columns give the median, 95th and 99th
+percentile number of bytes of stack arguments.
+The “spills” columns likewise summarize the number of bytes in
+on-stack spill space.
+And “stack total” summarizes the sum of stack arguments and on-stack
+spill slots.
+Note that these are three different distributions; for example,
+there’s no single function that takes 0 stack argument bytes, 16 spill
+bytes, and 24 total stack bytes.
+
+From this, we can see that the fraction of functions that fit entirely
+in registers grows very slowly once it reaches about 90%, though
+curiously there is a small minority of functions that could benefit
+from a huge number of registers.
+Making 9 integer registers available on amd64 puts it in this realm.
+We also see that the stack space required for most functions is fairly
+small.
+While the increasing space required for spills largely balances out
+the decreasing space required for stack arguments as the number of
+available registers increases, there is a general reduction in the
+total stack space required with more available registers.
+This does, however, suggest that eliminating spill slots in the future
+would noticeably reduce stack requirements.
diff --git a/src/cmd/compile/internal/abi/abiutils.go b/src/cmd/compile/internal/abi/abiutils.go
index 3ac59e6f75..e935821802 100644
--- a/src/cmd/compile/internal/abi/abiutils.go
+++ b/src/cmd/compile/internal/abi/abiutils.go
@@ -25,9 +25,8 @@ import (
 type ABIParamResultInfo struct {
 	inparams          []ABIParamAssignment // Includes receiver for method calls.  Does NOT include hidden closure pointer.
 	outparams         []ABIParamAssignment
-	intSpillSlots     int
-	floatSpillSlots   int
 	offsetToSpillArea int64
+	spillAreaSize     int64
 	config            *ABIConfig // to enable String() method
 }
 
@@ -47,18 +46,14 @@ func (a *ABIParamResultInfo) OutParam(i int) ABIParamAssignment {
 	return a.outparams[i]
 }
 
-func (a *ABIParamResultInfo) IntSpillCount() int {
-	return a.intSpillSlots
-}
-
-func (a *ABIParamResultInfo) FloatSpillCount() int {
-	return a.floatSpillSlots
-}
-
 func (a *ABIParamResultInfo) SpillAreaOffset() int64 {
 	return a.offsetToSpillArea
 }
 
+func (a *ABIParamResultInfo) SpillAreaSize() int64 {
+	return a.spillAreaSize
+}
+
 // RegIndex stores the index into the set of machine registers used by
 // the ABI on a specific architecture for parameter passing.  RegIndex
 // values 0 through N-1 (where N is the number of integer registers
@@ -78,7 +73,27 @@ type RegIndex uint8
 type ABIParamAssignment struct {
 	Type      *types.Type
 	Registers []RegIndex
-	Offset    int32
+	offset    int32
+}
+
+// Offset returns the stack offset for addressing the parameter that "a" describes.
+// This will panic if "a" describes a register-allocated parameter.
+func (a *ABIParamAssignment) Offset() int32 {
+	if len(a.Registers) > 0 {
+		panic("Register allocated parameters have no offset")
+	}
+	return a.offset
+}
+
+// SpillOffset returns the offset *within the spill area* for the parameter that "a" describes.
+// Registers will be spilled here; if a memory home is needed (for a pointer method e.g.)
+// then that will be the address.
+// This will panic if "a" describes a stack-allocated parameter.
+func (a *ABIParamAssignment) SpillOffset() int32 {
+	if len(a.Registers) == 0 {
+		panic("Stack-allocated parameters have no spill offset")
+	}
+	return a.offset
 }
 
 // RegAmounts holds a specified number of integer/float registers.
@@ -91,20 +106,58 @@ type RegAmounts struct {
 // by the ABI rules for parameter passing and result returning.
 type ABIConfig struct {
 	// Do we need anything more than this?
-	regAmounts RegAmounts
+	regAmounts       RegAmounts
+	regsForTypeCache map[*types.Type]int
 }
 
 // NewABIConfig returns a new ABI configuration for an architecture with
 // iRegsCount integer/pointer registers and fRegsCount floating point registers.
 func NewABIConfig(iRegsCount, fRegsCount int) *ABIConfig {
-	return &ABIConfig{RegAmounts{iRegsCount, fRegsCount}}
+	return &ABIConfig{regAmounts: RegAmounts{iRegsCount, fRegsCount}, regsForTypeCache: make(map[*types.Type]int)}
+}
+
+// NumParamRegs returns the number of parameter registers used for a given type,
+// without regard for the number available.
+func (a *ABIConfig) NumParamRegs(t *types.Type) int {
+	if n, ok := a.regsForTypeCache[t]; ok {
+		return n
+	}
+
+	if t.IsScalar() || t.IsPtrShaped() {
+		var n int
+		if t.IsComplex() {
+			n = 2
+		} else {
+			n = (int(t.Size()) + types.RegSize - 1) / types.RegSize
+		}
+		a.regsForTypeCache[t] = n
+		return n
+	}
+	typ := t.Kind()
+	n := 0
+	switch typ {
+	case types.TARRAY:
+		n = a.NumParamRegs(t.Elem()) * int(t.NumElem())
+	case types.TSTRUCT:
+		for _, f := range t.FieldSlice() {
+			n += a.NumParamRegs(f.Type)
+		}
+	case types.TSLICE:
+		n = a.NumParamRegs(synthSlice)
+	case types.TSTRING:
+		n = a.NumParamRegs(synthString)
+	case types.TINTER:
+		n = a.NumParamRegs(synthIface)
+	}
+	a.regsForTypeCache[t] = n
+	return n
 }
 
 // ABIAnalyze takes a function type 't' and an ABI rules description
 // 'config' and analyzes the function to determine how its parameters
 // and results will be passed (in registers or on the stack), returning
 // an ABIParamResultInfo object that holds the results of the analysis.
-func ABIAnalyze(t *types.Type, config *ABIConfig) ABIParamResultInfo {
+func (config *ABIConfig) ABIAnalyze(t *types.Type) ABIParamResultInfo {
 	setup()
 	s := assignState{
 		rTotal: config.regAmounts,
@@ -116,28 +169,27 @@ func ABIAnalyze(t *types.Type, config *ABIConfig) ABIParamResultInfo {
 	if t.NumRecvs() != 0 {
 		rfsl := ft.Receiver.FieldSlice()
 		result.inparams = append(result.inparams,
-			s.assignParamOrReturn(rfsl[0].Type))
+			s.assignParamOrReturn(rfsl[0].Type, false))
 	}
 
 	// Inputs
 	ifsl := ft.Params.FieldSlice()
 	for _, f := range ifsl {
 		result.inparams = append(result.inparams,
-			s.assignParamOrReturn(f.Type))
+			s.assignParamOrReturn(f.Type, false))
 	}
 	s.stackOffset = types.Rnd(s.stackOffset, int64(types.RegSize))
 
-	// Record number of spill slots needed.
-	result.intSpillSlots = s.rUsed.intRegs
-	result.floatSpillSlots = s.rUsed.floatRegs
-
 	// Outputs
 	s.rUsed = RegAmounts{}
 	ofsl := ft.Results.FieldSlice()
 	for _, f := range ofsl {
-		result.outparams = append(result.outparams, s.assignParamOrReturn(f.Type))
+		result.outparams = append(result.outparams, s.assignParamOrReturn(f.Type, true))
 	}
-	result.offsetToSpillArea = s.stackOffset
+	// The spill area is at a register-aligned offset and its size is rounded up to a register alignment.
+	// TODO in theory could align offset only to minimum required by spilled data types.
+	result.offsetToSpillArea = alignTo(s.stackOffset, types.RegSize)
+	result.spillAreaSize = alignTo(s.spillOffset, types.RegSize)
 
 	return result
 }
@@ -160,10 +212,14 @@ func (c *RegAmounts) regString(r RegIndex) string {
 // form, suitable for debugging or unit testing.
 func (ri *ABIParamAssignment) toString(config *ABIConfig) string {
 	regs := "R{"
+	offname := "spilloffset" // offset is for spill for register(s)
+	if len(ri.Registers) == 0 {
+		offname = "offset" // offset is for memory arg
+	}
 	for _, r := range ri.Registers {
 		regs += " " + config.regAmounts.regString(r)
 	}
-	return fmt.Sprintf("%s } offset: %d typ: %v", regs, ri.Offset, ri.Type)
+	return fmt.Sprintf("%s } %s: %d typ: %v", regs, offname, ri.offset, ri.Type)
 }
 
 // toString method renders an ABIParamResultInfo in human-readable
@@ -176,8 +232,8 @@ func (ri *ABIParamResultInfo) String() string {
 	for k, r := range ri.outparams {
 		res += fmt.Sprintf("OUT %d: %s\n", k, r.toString(ri.config))
 	}
-	res += fmt.Sprintf("intspill: %d floatspill: %d offsetToSpillArea: %d",
-		ri.intSpillSlots, ri.floatSpillSlots, ri.offsetToSpillArea)
+	res += fmt.Sprintf("offsetToSpillArea: %d spillAreaSize: %d",
+		ri.offsetToSpillArea, ri.spillAreaSize)
 	return res
 }
 
@@ -188,16 +244,27 @@ type assignState struct {
 	rUsed       RegAmounts // regs used by params completely assigned so far
 	pUsed       RegAmounts // regs used by the current param (or pieces therein)
 	stackOffset int64      // current stack offset
+	spillOffset int64      // current spill offset
+}
+
+// align returns a rounded up to t's alignment
+func align(a int64, t *types.Type) int64 {
+	return alignTo(a, int(t.Align))
+}
+
+// alignTo returns a rounded up to t, where t must be 0 or a power of 2.
+func alignTo(a int64, t int) int64 {
+	if t == 0 {
+		return a
+	}
+	return types.Rnd(a, int64(t))
 }
 
 // stackSlot returns a stack offset for a param or result of the
 // specified type.
 func (state *assignState) stackSlot(t *types.Type) int64 {
-	if t.Align > 0 {
-		state.stackOffset = types.Rnd(state.stackOffset, int64(t.Align))
-	}
-	rv := state.stackOffset
-	state.stackOffset += t.Width
+	rv := align(state.stackOffset, t)
+	state.stackOffset = rv + t.Width
 	return rv
 }
 
@@ -225,11 +292,17 @@ func (state *assignState) allocateRegs() []RegIndex {
 // regAllocate creates a register ABIParamAssignment object for a param
 // or result with the specified type, as a final step (this assumes
 // that all of the safety/suitability analysis is complete).
-func (state *assignState) regAllocate(t *types.Type) ABIParamAssignment {
+func (state *assignState) regAllocate(t *types.Type, isReturn bool) ABIParamAssignment {
+	spillLoc := int64(-1)
+	if !isReturn {
+		// Spill for register-resident t must be aligned for storage of a t.
+		spillLoc = align(state.spillOffset, t)
+		state.spillOffset = spillLoc + t.Size()
+	}
 	return ABIParamAssignment{
 		Type:      t,
 		Registers: state.allocateRegs(),
-		Offset:    -1,
+		offset:    int32(spillLoc),
 	}
 }
 
@@ -239,7 +312,7 @@ func (state *assignState) regAllocate(t *types.Type) ABIParamAssignment {
 func (state *assignState) stackAllocate(t *types.Type) ABIParamAssignment {
 	return ABIParamAssignment{
 		Type:   t,
-		Offset: int32(state.stackSlot(t)),
+		offset: int32(state.stackSlot(t)),
 	}
 }
 
@@ -261,6 +334,9 @@ func (state *assignState) floatUsed() int {
 // accordingly).
 func (state *assignState) regassignIntegral(t *types.Type) bool {
 	regsNeeded := int(types.Rnd(t.Width, int64(types.PtrSize)) / int64(types.PtrSize))
+	if t.IsComplex() {
+		regsNeeded = 2
+	}
 
 	// Floating point and complex.
 	if t.IsFloat() || t.IsComplex() {
@@ -371,14 +447,14 @@ func (state *assignState) regassign(pt *types.Type) bool {
 // of type 'pt' to determine whether it can be register assigned.
 // The result of the analysis is recorded in the result
 // ABIParamResultInfo held in 'state'.
-func (state *assignState) assignParamOrReturn(pt *types.Type) ABIParamAssignment {
+func (state *assignState) assignParamOrReturn(pt *types.Type, isReturn bool) ABIParamAssignment {
 	state.pUsed = RegAmounts{}
 	if pt.Width == types.BADWIDTH {
 		panic("should never happen")
 	} else if pt.Width == 0 {
 		return state.stackAllocate(pt)
 	} else if state.regassign(pt) {
-		return state.regAllocate(pt)
+		return state.regAllocate(pt, isReturn)
 	} else {
 		return state.stackAllocate(pt)
 	}
diff --git a/src/cmd/compile/internal/noder/noder.go b/src/cmd/compile/internal/noder/noder.go
index 887205b9fb..1c38f1a934 100644
--- a/src/cmd/compile/internal/noder/noder.go
+++ b/src/cmd/compile/internal/noder/noder.go
@@ -1463,14 +1463,6 @@ func (p *noder) basicLit(lit *syntax.BasicLit) constant.Value {
 		p.errorAt(lit.Pos(), "malformed constant: %s", lit.Value)
 	}
 
-	// go/constant uses big.Rat by default, which is more precise, but
-	// causes toolstash -cmp and some tests to fail. For now, convert
-	// to big.Float to match cmd/compile's historical precision.
-	// TODO(mdempsky): Remove.
-	if v.Kind() == constant.Float {
-		v = constant.Make(ir.BigFloat(v))
-	}
-
 	return v
 }
 
diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go
index ecf3294082..e49a9716fe 100644
--- a/src/cmd/compile/internal/ssagen/ssa.go
+++ b/src/cmd/compile/internal/ssagen/ssa.go
@@ -434,6 +434,7 @@ func buildssa(fn *ir.Func, worker int) *ssa.Func {
 		// bitmask showing which of the open-coded defers in this function
 		// have been activated.
 		deferBitsTemp := typecheck.TempAt(src.NoXPos, s.curfn, types.Types[types.TUINT8])
+		deferBitsTemp.SetAddrtaken(true)
 		s.deferBitsTemp = deferBitsTemp
 		// For this value, AuxInt is initialized to zero by default
 		startDeferBits := s.entryNewValue0(ssa.OpConst8, types.Types[types.TUINT8])
@@ -5086,6 +5087,10 @@ func (s *state) addr(n ir.Node) *ssa.Value {
 		defer s.popLine()
 	}
 
+	if s.canSSA(n) {
+		s.Fatalf("addr of canSSA expression: %+v", n)
+	}
+
 	t := types.NewPtr(n.Type())
 	linksymOffset := func(lsym *obj.LSym, offset int64) *ssa.Value {
 		v := s.entryNewValue1A(ssa.OpAddr, t, lsym, s.sb)
diff --git a/src/cmd/compile/internal/test/abiutils_test.go b/src/cmd/compile/internal/test/abiutils_test.go
index ae7d484062..decc29667e 100644
--- a/src/cmd/compile/internal/test/abiutils_test.go
+++ b/src/cmd/compile/internal/test/abiutils_test.go
@@ -21,7 +21,7 @@ import (
 // AMD64 registers available:
 // - integer: RAX, RBX, RCX, RDI, RSI, R8, R9, r10, R11
 // - floating point: X0 - X14
-var configAMD64 = abi.NewABIConfig(9,15)
+var configAMD64 = abi.NewABIConfig(9, 15)
 
 func TestMain(m *testing.M) {
 	ssagen.Arch.LinkArch = &x86.Linkamd64
@@ -46,9 +46,9 @@ func TestABIUtilsBasic1(t *testing.T) {
 
 	// expected results
 	exp := makeExpectedDump(`
-		IN 0: R{ I0 } offset: -1 typ: int32
-		OUT 0: R{ I0 } offset: -1 typ: int32
-		intspill: 1 floatspill: 0 offsetToSpillArea: 0
+        IN 0: R{ I0 } spilloffset: 0 typ: int32
+        OUT 0: R{ I0 } spilloffset: -1 typ: int32
+        offsetToSpillArea: 0 spillAreaSize: 8
 `)
 
 	abitest(t, ft, exp)
@@ -75,39 +75,39 @@ func TestABIUtilsBasic2(t *testing.T) {
 			i8, i16, i32, i64},
 		[]*types.Type{i32, f64, f64})
 	exp := makeExpectedDump(`
-		IN 0: R{ I0 } offset: -1 typ: int8
-		IN 1: R{ I1 } offset: -1 typ: int16
-		IN 2: R{ I2 } offset: -1 typ: int32
-		IN 3: R{ I3 } offset: -1 typ: int64
-		IN 4: R{ F0 } offset: -1 typ: float32
-		IN 5: R{ F1 } offset: -1 typ: float32
-		IN 6: R{ F2 } offset: -1 typ: float64
-		IN 7: R{ F3 } offset: -1 typ: float64
-		IN 8: R{ I4 } offset: -1 typ: int8
-		IN 9: R{ I5 } offset: -1 typ: int16
-		IN 10: R{ I6 } offset: -1 typ: int32
-		IN 11: R{ I7 } offset: -1 typ: int64
-		IN 12: R{ F4 } offset: -1 typ: float32
-		IN 13: R{ F5 } offset: -1 typ: float32
-		IN 14: R{ F6 } offset: -1 typ: float64
-		IN 15: R{ F7 } offset: -1 typ: float64
-		IN 16: R{ F8 F9 } offset: -1 typ: complex128
-		IN 17: R{ F10 F11 } offset: -1 typ: complex128
-		IN 18: R{ F12 F13 } offset: -1 typ: complex128
-		IN 19: R{ } offset: 0 typ: complex128
-		IN 20: R{ F14 } offset: -1 typ: complex64
-		IN 21: R{ I8 } offset: -1 typ: int8
-		IN 22: R{ } offset: 16 typ: int16
-		IN 23: R{ } offset: 20 typ: int32
-		IN 24: R{ } offset: 24 typ: int64
-		IN 25: R{ } offset: 32 typ: int8
-		IN 26: R{ } offset: 34 typ: int16
-		IN 27: R{ } offset: 36 typ: int32
-		IN 28: R{ } offset: 40 typ: int64
-		OUT 0: R{ I0 } offset: -1 typ: int32
-		OUT 1: R{ F0 } offset: -1 typ: float64
-		OUT 2: R{ F1 } offset: -1 typ: float64
-		intspill: 9 floatspill: 15 offsetToSpillArea: 48
+        IN 0: R{ I0 } spilloffset: 0 typ: int8
+        IN 1: R{ I1 } spilloffset: 2 typ: int16
+        IN 2: R{ I2 } spilloffset: 4 typ: int32
+        IN 3: R{ I3 } spilloffset: 8 typ: int64
+        IN 4: R{ F0 } spilloffset: 16 typ: float32
+        IN 5: R{ F1 } spilloffset: 20 typ: float32
+        IN 6: R{ F2 } spilloffset: 24 typ: float64
+        IN 7: R{ F3 } spilloffset: 32 typ: float64
+        IN 8: R{ I4 } spilloffset: 40 typ: int8
+        IN 9: R{ I5 } spilloffset: 42 typ: int16
+        IN 10: R{ I6 } spilloffset: 44 typ: int32
+        IN 11: R{ I7 } spilloffset: 48 typ: int64
+        IN 12: R{ F4 } spilloffset: 56 typ: float32
+        IN 13: R{ F5 } spilloffset: 60 typ: float32
+        IN 14: R{ F6 } spilloffset: 64 typ: float64
+        IN 15: R{ F7 } spilloffset: 72 typ: float64
+        IN 16: R{ F8 F9 } spilloffset: 80 typ: complex128
+        IN 17: R{ F10 F11 } spilloffset: 96 typ: complex128
+        IN 18: R{ F12 F13 } spilloffset: 112 typ: complex128
+        IN 19: R{ } offset: 0 typ: complex128
+        IN 20: R{ } offset: 16 typ: complex64
+        IN 21: R{ I8 } spilloffset: 128 typ: int8
+        IN 22: R{ } offset: 24 typ: int16
+        IN 23: R{ } offset: 28 typ: int32
+        IN 24: R{ } offset: 32 typ: int64
+        IN 25: R{ } offset: 40 typ: int8
+        IN 26: R{ } offset: 42 typ: int16
+        IN 27: R{ } offset: 44 typ: int32
+        IN 28: R{ } offset: 48 typ: int64
+        OUT 0: R{ I0 } spilloffset: -1 typ: int32
+        OUT 1: R{ F0 } spilloffset: -1 typ: float64
+        OUT 2: R{ F1 } spilloffset: -1 typ: float64
+        offsetToSpillArea: 56 spillAreaSize: 136
 `)
 
 	abitest(t, ft, exp)
@@ -123,15 +123,15 @@ func TestABIUtilsArrays(t *testing.T) {
 		[]*types.Type{a2, a1, ae, aa1})
 
 	exp := makeExpectedDump(`
-		IN 0: R{ I0 } offset: -1 typ: [1]int32
-		IN 1: R{ } offset: 0 typ: [0]int32
-		IN 2: R{ I1 } offset: -1 typ: [1][1]int32
-		IN 3: R{ } offset: 0 typ: [2]int32
-		OUT 0: R{ } offset: 8 typ: [2]int32
-		OUT 1: R{ I0 } offset: -1 typ: [1]int32
-		OUT 2: R{ } offset: 16 typ: [0]int32
-		OUT 3: R{ I1 } offset: -1 typ: [1][1]int32
-		intspill: 2 floatspill: 0 offsetToSpillArea: 16
+        IN 0: R{ I0 } spilloffset: 0 typ: [1]int32
+        IN 1: R{ } offset: 0 typ: [0]int32
+        IN 2: R{ I1 } spilloffset: 4 typ: [1][1]int32
+        IN 3: R{ } offset: 0 typ: [2]int32
+        OUT 0: R{ } offset: 8 typ: [2]int32
+        OUT 1: R{ I0 } spilloffset: -1 typ: [1]int32
+        OUT 2: R{ } offset: 16 typ: [0]int32
+        OUT 3: R{ I1 } spilloffset: -1 typ: [1][1]int32
+        offsetToSpillArea: 16 spillAreaSize: 8
 `)
 
 	abitest(t, ft, exp)
@@ -147,13 +147,13 @@ func TestABIUtilsStruct1(t *testing.T) {
 		[]*types.Type{s, i8, i32})
 
 	exp := makeExpectedDump(`
-		IN 0: R{ I0 } offset: -1 typ: int8
-		IN 1: R{ I1 I2 I3 I4 } offset: -1 typ: struct { int8; int8; struct {}; int8; int16 }
-		IN 2: R{ I5 } offset: -1 typ: int64
-		OUT 0: R{ I0 I1 I2 I3 } offset: -1 typ: struct { int8; int8; struct {}; int8; int16 }
-		OUT 1: R{ I4 } offset: -1 typ: int8
-		OUT 2: R{ I5 } offset: -1 typ: int32
-		intspill: 6 floatspill: 0 offsetToSpillArea: 0
+        IN 0: R{ I0 } spilloffset: 0 typ: int8
+        IN 1: R{ I1 I2 I3 I4 } spilloffset: 2 typ: struct { int8; int8; struct {}; int8; int16 }
+        IN 2: R{ I5 } spilloffset: 8 typ: int64
+        OUT 0: R{ I0 I1 I2 I3 } spilloffset: -1 typ: struct { int8; int8; struct {}; int8; int16 }
+        OUT 1: R{ I4 } spilloffset: -1 typ: int8
+        OUT 2: R{ I5 } spilloffset: -1 typ: int32
+        offsetToSpillArea: 0 spillAreaSize: 16
 `)
 
 	abitest(t, ft, exp)
@@ -168,12 +168,12 @@ func TestABIUtilsStruct2(t *testing.T) {
 		[]*types.Type{fs, fs})
 
 	exp := makeExpectedDump(`
-		IN 0: R{ I0 } offset: -1 typ: struct { int64; struct {} }
-		IN 1: R{ I1 } offset: -1 typ: struct { int64; struct {} }
-		IN 2: R{ I2 F0 } offset: -1 typ: struct { float64; struct { int64; struct {} }; struct {} }
-		OUT 0: R{ I0 F0 } offset: -1 typ: struct { float64; struct { int64; struct {} }; struct {} }
-		OUT 1: R{ I1 F1 } offset: -1 typ: struct { float64; struct { int64; struct {} }; struct {} }
-		intspill: 3 floatspill: 1 offsetToSpillArea: 0
+        IN 0: R{ I0 } spilloffset: 0 typ: struct { int64; struct {} }
+        IN 1: R{ I1 } spilloffset: 16 typ: struct { int64; struct {} }
+        IN 2: R{ I2 F0 } spilloffset: 32 typ: struct { float64; struct { int64; struct {} }; struct {} }
+        OUT 0: R{ I0 F0 } spilloffset: -1 typ: struct { float64; struct { int64; struct {} }; struct {} }
+        OUT 1: R{ I1 F1 } spilloffset: -1 typ: struct { float64; struct { int64; struct {} }; struct {} }
+        offsetToSpillArea: 0 spillAreaSize: 64
 `)
 
 	abitest(t, ft, exp)
@@ -189,19 +189,19 @@ func TestABIUtilsSliceString(t *testing.T) {
 		[]*types.Type{str, i64, str, sli32})
 
 	exp := makeExpectedDump(`
-		IN 0: R{ I0 I1 I2 } offset: -1 typ: []int32
-		IN 1: R{ I3 } offset: -1 typ: int8
-		IN 2: R{ I4 I5 I6 } offset: -1 typ: []int32
-		IN 3: R{ I7 } offset: -1 typ: int8
-		IN 4: R{ } offset: 0 typ: string
-		IN 5: R{ I8 } offset: -1 typ: int8
-		IN 6: R{ } offset: 16 typ: int64
-		IN 7: R{ } offset: 24 typ: []int32
-		OUT 0: R{ I0 I1 } offset: -1 typ: string
-		OUT 1: R{ I2 } offset: -1 typ: int64
-		OUT 2: R{ I3 I4 } offset: -1 typ: string
-		OUT 3: R{ I5 I6 I7 } offset: -1 typ: []int32
-		intspill: 9 floatspill: 0 offsetToSpillArea: 48
+        IN 0: R{ I0 I1 I2 } spilloffset: 0 typ: []int32
+        IN 1: R{ I3 } spilloffset: 24 typ: int8
+        IN 2: R{ I4 I5 I6 } spilloffset: 32 typ: []int32
+        IN 3: R{ I7 } spilloffset: 56 typ: int8
+        IN 4: R{ } offset: 0 typ: string
+        IN 5: R{ I8 } spilloffset: 57 typ: int8
+        IN 6: R{ } offset: 16 typ: int64
+        IN 7: R{ } offset: 24 typ: []int32
+        OUT 0: R{ I0 I1 } spilloffset: -1 typ: string
+        OUT 1: R{ I2 } spilloffset: -1 typ: int64
+        OUT 2: R{ I3 I4 } spilloffset: -1 typ: string
+        OUT 3: R{ I5 I6 I7 } spilloffset: -1 typ: []int32
+        offsetToSpillArea: 48 spillAreaSize: 64
 `)
 
 	abitest(t, ft, exp)
@@ -219,17 +219,17 @@ func TestABIUtilsMethod(t *testing.T) {
 		[]*types.Type{a7, f64, i64})
 
 	exp := makeExpectedDump(`
-		IN 0: R{ I0 I1 I2 } offset: -1 typ: struct { int16; int16; int16 }
-		IN 1: R{ I3 } offset: -1 typ: *struct { int16; int16; int16 }
-		IN 2: R{ } offset: 0 typ: [7]*struct { int16; int16; int16 }
-		IN 3: R{ F0 } offset: -1 typ: float64
-		IN 4: R{ I4 } offset: -1 typ: int16
-		IN 5: R{ I5 } offset: -1 typ: int16
-		IN 6: R{ I6 } offset: -1 typ: int16
-		OUT 0: R{ } offset: 56 typ: [7]*struct { int16; int16; int16 }
-		OUT 1: R{ F0 } offset: -1 typ: float64
-		OUT 2: R{ I0 } offset: -1 typ: int64
-		intspill: 7 floatspill: 1 offsetToSpillArea: 112
+        IN 0: R{ I0 I1 I2 } spilloffset: 0 typ: struct { int16; int16; int16 }
+        IN 1: R{ I3 } spilloffset: 8 typ: *struct { int16; int16; int16 }
+        IN 2: R{ } offset: 0 typ: [7]*struct { int16; int16; int16 }
+        IN 3: R{ F0 } spilloffset: 16 typ: float64
+        IN 4: R{ I4 } spilloffset: 24 typ: int16
+        IN 5: R{ I5 } spilloffset: 26 typ: int16
+        IN 6: R{ I6 } spilloffset: 28 typ: int16
+        OUT 0: R{ } offset: 56 typ: [7]*struct { int16; int16; int16 }
+        OUT 1: R{ F0 } spilloffset: -1 typ: float64
+        OUT 2: R{ I0 } spilloffset: -1 typ: int64
+        offsetToSpillArea: 112 spillAreaSize: 32
 `)
 
 	abitest(t, ft, exp)
@@ -252,18 +252,44 @@ func TestABIUtilsInterfaces(t *testing.T) {
 		[]*types.Type{ei, nei, pei})
 
 	exp := makeExpectedDump(`
-		IN 0: R{ I0 I1 I2 } offset: -1 typ: struct { int16; int16; bool }
-		IN 1: R{ I3 I4 } offset: -1 typ: interface {}
-		IN 2: R{ I5 I6 } offset: -1 typ: interface {}
-		IN 3: R{ I7 I8 } offset: -1 typ: interface { () untyped string }
-		IN 4: R{ } offset: 0 typ: *interface {}
-		IN 5: R{ } offset: 8 typ: interface { () untyped string }
-		IN 6: R{ } offset: 24 typ: int16
-		OUT 0: R{ I0 I1 } offset: -1 typ: interface {}
-		OUT 1: R{ I2 I3 } offset: -1 typ: interface { () untyped string }
-		OUT 2: R{ I4 } offset: -1 typ: *interface {}
-		intspill: 9 floatspill: 0 offsetToSpillArea: 32
+        IN 0: R{ I0 I1 I2 } spilloffset: 0 typ: struct { int16; int16; bool }
+        IN 1: R{ I3 I4 } spilloffset: 8 typ: interface {}
+        IN 2: R{ I5 I6 } spilloffset: 24 typ: interface {}
+        IN 3: R{ I7 I8 } spilloffset: 40 typ: interface { () untyped string }
+        IN 4: R{ } offset: 0 typ: *interface {}
+        IN 5: R{ } offset: 8 typ: interface { () untyped string }
+        IN 6: R{ } offset: 24 typ: int16
+        OUT 0: R{ I0 I1 } spilloffset: -1 typ: interface {}
+        OUT 1: R{ I2 I3 } spilloffset: -1 typ: interface { () untyped string }
+        OUT 2: R{ I4 } spilloffset: -1 typ: *interface {}
+        offsetToSpillArea: 32 spillAreaSize: 56
 `)
 
 	abitest(t, ft, exp)
 }
+
+func TestABINumParamRegs(t *testing.T) {
+	i8 := types.Types[types.TINT8]
+	i16 := types.Types[types.TINT16]
+	i32 := types.Types[types.TINT32]
+	i64 := types.Types[types.TINT64]
+	f32 := types.Types[types.TFLOAT32]
+	f64 := types.Types[types.TFLOAT64]
+	c64 := types.Types[types.TCOMPLEX64]
+	c128 := types.Types[types.TCOMPLEX128]
+
+	s := mkstruct([]*types.Type{i8, i8, mkstruct([]*types.Type{}), i8, i16})
+	a := types.NewArray(s, 3)
+
+	nrtest(t, i8, 1)
+	nrtest(t, i16, 1)
+	nrtest(t, i32, 1)
+	nrtest(t, i64, 1)
+	nrtest(t, f32, 1)
+	nrtest(t, f64, 1)
+	nrtest(t, c64, 2)
+	nrtest(t, c128, 2)
+	nrtest(t, s, 4)
+	nrtest(t, a, 12)
+
+}
+\ No newline at end of file
diff --git a/src/cmd/compile/internal/test/abiutilsaux_test.go b/src/cmd/compile/internal/test/abiutilsaux_test.go
index 10fb668745..19dd3a51fd 100644
--- a/src/cmd/compile/internal/test/abiutilsaux_test.go
+++ b/src/cmd/compile/internal/test/abiutilsaux_test.go
@@ -78,9 +78,9 @@ func tokenize(src string) []string {
 
 func verifyParamResultOffset(t *testing.T, f *types.Field, r abi.ABIParamAssignment, which string, idx int) int {
 	n := ir.AsNode(f.Nname).(*ir.Name)
-	if n.FrameOffset() != int64(r.Offset) {
+	if n.FrameOffset() != int64(r.Offset()) {
 		t.Errorf("%s %d: got offset %d wanted %d t=%v",
-			which, idx, r.Offset, n.Offset_, f.Type)
+			which, idx, r.Offset(), n.Offset_, f.Type)
 		return 1
 	}
 	return 0
@@ -106,12 +106,20 @@ func difftokens(atoks []string, etoks []string) string {
 	return ""
 }
 
+func nrtest(t *testing.T, ft *types.Type, expected int) {
+	types.CalcSize(ft)
+	got := configAMD64.NumParamRegs(ft)
+	if got != expected {
+		t.Errorf("]\nexpected num regs = %d, got %d, type %v", expected, got, ft)
+	}
+}
+
 func abitest(t *testing.T, ft *types.Type, exp expectedDump) {
 
 	types.CalcSize(ft)
 
 	// Analyze with full set of registers.
-	regRes := abi.ABIAnalyze(ft, configAMD64)
+	regRes := configAMD64.ABIAnalyze(ft)
 	regResString := strings.TrimSpace(regRes.String())
 
 	// Check results.
@@ -122,8 +130,8 @@ func abitest(t *testing.T, ft *types.Type, exp expectedDump) {
 	}
 
 	// Analyze again with empty register set.
-	empty := &abi.ABIConfig{}
-	emptyRes := abi.ABIAnalyze(ft, empty)
+	empty := abi.NewABIConfig(0, 0)
+	emptyRes := empty.ABIAnalyze(ft)
 	emptyResString := emptyRes.String()
 
 	// Walk the results and make sure the offsets assigned match
diff --git a/src/cmd/compile/internal/typecheck/iexport.go b/src/cmd/compile/internal/typecheck/iexport.go
index be4a689836..6fab74e61f 100644
--- a/src/cmd/compile/internal/typecheck/iexport.go
+++ b/src/cmd/compile/internal/typecheck/iexport.go
@@ -462,12 +462,16 @@ func (p *iexporter) doDecl(n *ir.Name) {
 		}
 
 	case ir.OLITERAL:
+		// TODO(mdempsky): Extend check to all declarations.
+		if n.Typecheck() == 0 {
+			base.FatalfAt(n.Pos(), "missed typecheck: %v", n)
+		}
+
 		// Constant.
-		// TODO(mdempsky): Do we still need this typecheck? If so, why?
-		n = Expr(n).(*ir.Name)
 		w.tag('C')
 		w.pos(n.Pos())
 		w.value(n.Type(), n.Val())
+		w.constExt(n)
 
 	case ir.OTYPE:
 		if types.IsDotAlias(n.Sym()) {
@@ -956,6 +960,17 @@ func (w *exportWriter) mpfloat(v constant.Value, typ *types.Type) {
 	}
 }
 
+func (w *exportWriter) mprat(v constant.Value) {
+	r, ok := constant.Val(v).(*big.Rat)
+	if !w.bool(ok) {
+		return
+	}
+	// TODO(mdempsky): Come up with a more efficient binary
+	// encoding before bumping iexportVersion to expose to
+	// gcimporter.
+	w.string(r.String())
+}
+
 func (w *exportWriter) bool(b bool) bool {
 	var x uint64
 	if b {
@@ -971,7 +986,37 @@ func (w *exportWriter) string(s string) { w.uint64(w.p.stringOff(s)) }
 
 // Compiler-specific extensions.
 
-func (w *exportWriter) varExt(n ir.Node) {
+func (w *exportWriter) constExt(n *ir.Name) {
+	// Internally, we now represent untyped float and complex
+	// constants with infinite-precision rational numbers using
+	// go/constant, but the "public" export data format known to
+	// gcimporter only supports 512-bit floating point constants.
+	// In case rationals turn out to be a bad idea and we want to
+	// switch back to fixed-precision constants, for now we
+	// continue writing out the 512-bit truncation in the public
+	// data section, and write the exact, rational constant in the
+	// compiler's extension data. Also, we only need to worry
+	// about exporting rationals for declared constants, because
+	// constants that appear in an expression will already have
+	// been coerced to a concrete, fixed-precision type.
+	//
+	// Eventually, assuming we stick with using rationals, we
+	// should bump iexportVersion to support rationals, and do the
+	// whole gcimporter update song-and-dance.
+	//
+	// TODO(mdempsky): Prepare vocals for that.
+
+	switch n.Type() {
+	case types.UntypedFloat:
+		w.mprat(n.Val())
+	case types.UntypedComplex:
+		v := n.Val()
+		w.mprat(constant.Real(v))
+		w.mprat(constant.Imag(v))
+	}
+}
+
+func (w *exportWriter) varExt(n *ir.Name) {
 	w.linkname(n.Sym())
 	w.symIdx(n.Sym())
 }
diff --git a/src/cmd/compile/internal/typecheck/iimport.go b/src/cmd/compile/internal/typecheck/iimport.go
index f2682257f3..b73ef5176b 100644
--- a/src/cmd/compile/internal/typecheck/iimport.go
+++ b/src/cmd/compile/internal/typecheck/iimport.go
@@ -303,7 +303,9 @@ func (r *importReader) doDecl(sym *types.Sym) *ir.Name {
 		typ := r.typ()
 		val := r.value(typ)
 
-		return importconst(r.p.ipkg, pos, sym, typ, val)
+		n := importconst(r.p.ipkg, pos, sym, typ, val)
+		r.constExt(n)
+		return n
 
 	case 'F':
 		typ := r.signature(nil)
@@ -440,6 +442,15 @@ func (p *importReader) float(typ *types.Type) constant.Value {
 	return constant.Make(&f)
 }
 
+func (p *importReader) mprat(orig constant.Value) constant.Value {
+	if !p.bool() {
+		return orig
+	}
+	var rat big.Rat
+	rat.SetString(p.string())
+	return constant.Make(&rat)
+}
+
 func (r *importReader) ident(selector bool) *types.Sym {
 	name := r.string()
 	if name == "" {
@@ -641,7 +652,19 @@ func (r *importReader) byte() byte {
 
 // Compiler-specific extensions.
 
-func (r *importReader) varExt(n ir.Node) {
+func (r *importReader) constExt(n *ir.Name) {
+	switch n.Type() {
+	case types.UntypedFloat:
+		n.SetVal(r.mprat(n.Val()))
+	case types.UntypedComplex:
+		v := n.Val()
+		re := r.mprat(constant.Real(v))
+		im := r.mprat(constant.Imag(v))
+		n.SetVal(makeComplex(re, im))
+	}
+}
+
+func (r *importReader) varExt(n *ir.Name) {
 	r.linkname(n.Sym())
 	r.symIdx(n.Sym())
 }
diff --git a/src/cmd/compile/internal/walk/assign.go b/src/cmd/compile/internal/walk/assign.go
index 6e8075a35f..230b544148 100644
--- a/src/cmd/compile/internal/walk/assign.go
+++ b/src/cmd/compile/internal/walk/assign.go
@@ -289,11 +289,14 @@ func ascompatee(op ir.Op, nl, nr []ir.Node) []ir.Node {
 	}
 
 	var assigned ir.NameSet
-	var memWrite bool
+	var memWrite, deferResultWrite bool
 
 	// affected reports whether expression n could be affected by
 	// the assignments applied so far.
 	affected := func(n ir.Node) bool {
+		if deferResultWrite {
+			return true
+		}
 		return ir.Any(n, func(n ir.Node) bool {
 			if n.Op() == ir.ONAME && assigned.Has(n.(*ir.Name)) {
 				return true
@@ -369,21 +372,40 @@ func ascompatee(op ir.Op, nl, nr []ir.Node) []ir.Node {
 
 		appendWalkStmt(&late, convas(ir.NewAssignStmt(base.Pos, lorig, r), &late))
 
-		if name != nil && ir.IsBlank(name) {
-			// We can ignore assignments to blank.
+		// Check for reasons why we may need to compute later expressions
+		// before this assignment happens.
+
+		if name == nil {
+			// Not a direct assignment to a declared variable.
+			// Conservatively assume any memory access might alias.
+			memWrite = true
 			continue
 		}
-		if op == ir.ORETURN && types.OrigSym(name.Sym()) == nil {
-			// We can also ignore assignments to anonymous result
-			// parameters. These can't appear in expressions anyway.
+
+		if name.Class == ir.PPARAMOUT && ir.CurFunc.HasDefer() {
+			// Assignments to a result parameter in a function with defers
+			// becomes visible early if evaluation of any later expression
+			// panics (#43835).
+			deferResultWrite = true
 			continue
 		}
 
-		if name != nil && name.OnStack() && !name.Addrtaken() {
-			assigned.Add(name)
-		} else {
+		if sym := types.OrigSym(name.Sym()); sym == nil || sym.IsBlank() {
+			// We can ignore assignments to blank or anonymous result parameters.
+			// These can't appear in expressions anyway.
+			continue
+		}
+
+		if name.Addrtaken() || !name.OnStack() {
+			// Global variable, heap escaped, or just addrtaken.
+			// Conservatively assume any memory access might alias.
 			memWrite = true
+			continue
 		}
+
+		// Local, non-addrtaken variable.
+		// Assignments can only alias with direct uses of this variable.
+		assigned.Add(name)
 	}
 
 	early.Append(late.Take()...)
diff --git a/src/cmd/go/internal/work/action.go b/src/cmd/go/internal/work/action.go
index b071ed1400..9d141ae233 100644
--- a/src/cmd/go/internal/work/action.go
+++ b/src/cmd/go/internal/work/action.go
@@ -57,9 +57,6 @@ type Builder struct {
 	id           sync.Mutex
 	toolIDCache  map[string]string // tool name -> tool ID
 	buildIDCache map[string]string // file name -> build ID
-
-	cgoEnvOnce  sync.Once
-	cgoEnvCache []string
 }
 
 // NOTE: Much of Action would not need to be exported if not for test.
diff --git a/src/cmd/go/internal/work/exec.go b/src/cmd/go/internal/work/exec.go
index cacb4c05df..422e83c224 100644
--- a/src/cmd/go/internal/work/exec.go
+++ b/src/cmd/go/internal/work/exec.go
@@ -1165,7 +1165,10 @@ func (b *Builder) vet(ctx context.Context, a *Action) error {
 	}
 
 	// TODO(rsc): Why do we pass $GCCGO to go vet?
-	env := b.cgoEnv()
+	env := b.cCompilerEnv()
+	if cfg.BuildToolchainName == "gccgo" {
+		env = append(env, "GCCGO="+BuildToolchain.compiler())
+	}
 
 	p := a.Package
 	tool := VetTool
@@ -2111,24 +2114,6 @@ func (b *Builder) cCompilerEnv() []string {
 	return []string{"TERM=dumb"}
 }
 
-// cgoEnv returns environment variables to set when running cgo.
-// Some of these pass through to cgo running the C compiler,
-// so it includes cCompilerEnv.
-func (b *Builder) cgoEnv() []string {
-	b.cgoEnvOnce.Do(func() {
-		cc, err := exec.LookPath(b.ccExe()[0])
-		if err != nil || filepath.Base(cc) == cc { // reject relative path
-			cc = "/missing-cc"
-		}
-		gccgo := GccgoBin
-		if filepath.Base(gccgo) == gccgo { // reject relative path
-			gccgo = "/missing-gccgo"
-		}
-		b.cgoEnvCache = append(b.cCompilerEnv(), "CC="+cc, "GCCGO="+gccgo)
-	})
-	return b.cgoEnvCache
-}
-
 // mkdir makes the named directory.
 func (b *Builder) Mkdir(dir string) error {
 	// Make Mkdir(a.Objdir) a no-op instead of an error when a.Objdir == "".
@@ -2729,7 +2714,7 @@ func (b *Builder) cgo(a *Action, cgoExe, objdir string, pcCFLAGS, pcLDFLAGS, cgo
 	// along to the host linker. At this point in the code, cgoLDFLAGS
 	// consists of the original $CGO_LDFLAGS (unchecked) and all the
 	// flags put together from source code (checked).
-	cgoenv := b.cgoEnv()
+	cgoenv := b.cCompilerEnv()
 	if len(cgoLDFLAGS) > 0 {
 		flags := make([]string, len(cgoLDFLAGS))
 		for i, f := range cgoLDFLAGS {
@@ -2966,7 +2951,7 @@ func (b *Builder) dynimport(a *Action, p *load.Package, objdir, importGo, cgoExe
 	if p.Standard && p.ImportPath == "runtime/cgo" {
 		cgoflags = []string{"-dynlinker"} // record path to dynamic linker
 	}
-	return b.run(a, base.Cwd, p.ImportPath, b.cgoEnv(), cfg.BuildToolexec, cgoExe, "-dynpackage", p.Name, "-dynimport", dynobj, "-dynout", importGo, cgoflags)
+	return b.run(a, base.Cwd, p.ImportPath, b.cCompilerEnv(), cfg.BuildToolexec, cgoExe, "-dynpackage", p.Name, "-dynimport", dynobj, "-dynout", importGo, cgoflags)
 }
 
 // Run SWIG on all SWIG input files.
diff --git a/src/cmd/go/internal/work/gc.go b/src/cmd/go/internal/work/gc.go
index cc4e2b2b2b..3205fcbffc 100644
--- a/src/cmd/go/internal/work/gc.go
+++ b/src/cmd/go/internal/work/gc.go
@@ -129,7 +129,11 @@ func (gcToolchain) gc(b *Builder, a *Action, archive string, importcfg, embedcfg
 		}
 	}
 
-	args := []interface{}{cfg.BuildToolexec, base.Tool("compile"), "-o", ofile, "-trimpath", a.trimpath(), gcflags, gcargs, "-D", p.Internal.LocalPrefix}
+	args := []interface{}{cfg.BuildToolexec, base.Tool("compile"), "-o", ofile, "-trimpath", a.trimpath(), gcflags, gcargs}
+	if p.Internal.LocalPrefix != "" {
+		// Workaround #43883.
+		args = append(args, "-D", p.Internal.LocalPrefix)
+	}
 	if importcfg != nil {
 		if err := b.writeFile(objdir+"importcfg", importcfg); err != nil {
 			return "", nil, err
diff --git a/src/cmd/go/testdata/script/cgo_path.txt b/src/cmd/go/testdata/script/cgo_path.txt
index 0d15998426..be9609e86f 100644
--- a/src/cmd/go/testdata/script/cgo_path.txt
+++ b/src/cmd/go/testdata/script/cgo_path.txt
@@ -1,12 +1,20 @@
 [!cgo] skip
 
+# Set CC explicitly to something that requires a PATH lookup.
+# Normally, the default is gcc or clang, but if CC was set during make.bash,
+# that becomes the default.
+[exec:clang] env CC=clang
+[exec:gcc] env CC=gcc
+[!exec:clang] [!exec:gcc] skip 'Unknown C compiler'
+
 env GOCACHE=$WORK/gocache  # Looking for compile flags, so need a clean cache.
 [!windows] env PATH=.:$PATH
-[!windows] chmod 0777 p/gcc p/clang
+[!windows] chmod 0755 p/gcc p/clang
 [!windows] exists -exec p/gcc p/clang
 [windows] exists -exec p/gcc.bat p/clang.bat
 ! exists p/bug.txt
-go build -x
+! go build -x
+stderr '^cgo: exec (clang|gcc): (clang|gcc) resolves to executable relative to current directory \(.[/\\](clang|gcc)(.bat)?\)$'
 ! exists p/bug.txt
 
 -- go.mod --
diff --git a/src/cmd/go/testdata/script/cgo_path_space.txt b/src/cmd/go/testdata/script/cgo_path_space.txt
new file mode 100644
index 0000000000..654295dc69
--- /dev/null
+++ b/src/cmd/go/testdata/script/cgo_path_space.txt
@@ -0,0 +1,56 @@
+# Check that if the PATH directory containing the C compiler has a space,
+# we can still use that compiler with cgo.
+# Verifies #43808.
+[!cgo] skip
+
+# Set CC explicitly to something that requires a PATH lookup.
+# Normally, the default is gcc or clang, but if CC was set during make.bash,
+# that becomes the default.
+[exec:clang] env CC=clang
+[exec:gcc] env CC=gcc
+[!exec:clang] [!exec:gcc] skip 'Unknown C compiler'
+
+[!windows] chmod 0755 $WORK/'program files'/clang
+[!windows] chmod 0755 $WORK/'program files'/gcc
+[!windows] exists -exec $WORK/'program files'/clang
+[!windows] exists -exec $WORK/'program files'/gcc
+[!windows] env PATH=$WORK/'program files':$PATH
+[windows] exists -exec $WORK/'program files'/gcc.bat
+[windows] exists -exec $WORK/'program files'/clang.bat
+[windows] env PATH=$WORK\'program files';%PATH%
+
+! exists $WORK/log.txt
+? go build -x
+exists $WORK/log.txt
+rm $WORK/log.txt
+
+# TODO(#41400, #43078): when CC is set explicitly, it should be allowed to
+# contain spaces separating arguments, and it should be possible to quote
+# arguments with spaces (including the path), as in CGO_CFLAGS and other
+# variables. For now, this doesn't work.
+[!windows] env CC=$WORK/'program files'/gcc
+[windows] env CC=$WORK\'program files'\gcc.bat
+! go build -x
+! exists $WORK/log.txt
+
+-- go.mod --
+module m
+
+-- m.go --
+package m
+
+// #define X 1
+import "C"
+
+-- $WORK/program files/gcc --
+#!/bin/sh
+
+echo ok >$WORK/log.txt
+-- $WORK/program files/clang --
+#!/bin/sh
+
+echo ok >$WORK/log.txt
+-- $WORK/program files/gcc.bat --
+echo ok >%WORK%\log.txt
+-- $WORK/program files/clang.bat --
+echo ok >%WORK%\log.txt
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 1a359f1921..70072cfba4 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -280,7 +280,7 @@ func MOVCONST(d int64, s int, rt int) uint32 {
 const (
 	// Optab.flag
 	LFROM     = 1 << 0 // p.From uses constant pool
-	LFROM3    = 1 << 1 // p.From3 uses constant pool
+	LFROM128  = 1 << 1 // p.From3<<64+p.From forms a 128-bit constant in literal pool
 	LTO       = 1 << 2 // p.To uses constant pool
 	NOTUSETMP = 1 << 3 // p expands to multiple instructions, but does NOT use REGTMP
 )
@@ -419,7 +419,7 @@ var optab = []Optab{
 	{AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, 34, 8, REGSP, LFROM, 0},
 
 	// Move a large constant to a vector register.
-	{AVMOVQ, C_VCON, C_NONE, C_VCON, C_VREG, 101, 4, 0, LFROM | LFROM3, 0},
+	{AVMOVQ, C_VCON, C_NONE, C_VCON, C_VREG, 101, 4, 0, LFROM128, 0},
 	{AVMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
 	{AVMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
 
@@ -995,8 +995,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
 		if o.flag&LFROM != 0 {
 			c.addpool(p, &p.From)
 		}
-		if o.flag&LFROM3 != 0 {
-			c.addpool(p, p.GetFrom3())
+		if o.flag&LFROM128 != 0 {
+			c.addpool128(p, &p.From, p.GetFrom3())
 		}
 		if o.flag&LTO != 0 {
 			c.addpool(p, &p.To)
@@ -1201,6 +1201,36 @@ func (c *ctxt7) flushpool(p *obj.Prog, skip int) {
 	}
 }
 
+// addpool128 adds a 128-bit constant to literal pool by two consecutive DWORD
+// instructions, the 128-bit constant is formed by ah.Offset<<64+al.Offset.
+func (c *ctxt7) addpool128(p *obj.Prog, al, ah *obj.Addr) {
+	lit := al.Offset
+	q := c.newprog()
+	q.As = ADWORD
+	q.To.Type = obj.TYPE_CONST
+	q.To.Offset = lit
+	q.Pc = int64(c.pool.size)
+
+	lit = ah.Offset
+	t := c.newprog()
+	t.As = ADWORD
+	t.To.Type = obj.TYPE_CONST
+	t.To.Offset = lit
+	t.Pc = int64(c.pool.size + 8)
+	q.Link = t
+
+	if c.blitrl == nil {
+		c.blitrl = q
+		c.pool.start = uint32(p.Pc)
+	} else {
+		c.elitrl.Link = q
+	}
+
+	c.elitrl = t
+	c.pool.size += 16
+	p.Pool = q
+}
+
 /*
  * MOVD foo(SB), R is actually
  *   MOVD addr, REGTMP
diff --git a/src/cmd/internal/obj/arm64/asm_test.go b/src/cmd/internal/obj/arm64/asm_arm64_test.go
index 9efdb0217f..c6a00f5b94 100644
--- a/src/cmd/internal/obj/arm64/asm_test.go
+++ b/src/cmd/internal/obj/arm64/asm_arm64_test.go
@@ -47,7 +47,7 @@ func TestLarge(t *testing.T) {
 
 	// assemble generated file
 	cmd := exec.Command(testenv.GoToolPath(t), "tool", "asm", "-S", "-o", filepath.Join(dir, "test.o"), tmpfile)
-	cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux")
+	cmd.Env = append(os.Environ(), "GOOS=linux")
 	out, err := cmd.CombinedOutput()
 	if err != nil {
 		t.Errorf("Assemble failed: %v, output: %s", err, out)
@@ -62,7 +62,7 @@ func TestLarge(t *testing.T) {
 
 	// build generated file
 	cmd = exec.Command(testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile)
-	cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux")
+	cmd.Env = append(os.Environ(), "GOOS=linux")
 	out, err = cmd.CombinedOutput()
 	if err != nil {
 		t.Errorf("Build failed: %v, output: %s", err, out)
@@ -96,7 +96,7 @@ func TestNoRet(t *testing.T) {
 		t.Fatal(err)
 	}
 	cmd := exec.Command(testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile)
-	cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux")
+	cmd.Env = append(os.Environ(), "GOOS=linux")
 	if out, err := cmd.CombinedOutput(); err != nil {
 		t.Errorf("%v\n%s", err, out)
 	}
@@ -134,7 +134,7 @@ func TestPCALIGN(t *testing.T) {
 			t.Fatal(err)
 		}
 		cmd := exec.Command(testenv.GoToolPath(t), "tool", "asm", "-S", "-o", tmpout, tmpfile)
-		cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux")
+		cmd.Env = append(os.Environ(), "GOOS=linux")
 		out, err := cmd.CombinedOutput()
 		if err != nil {
 			t.Errorf("The %s build failed: %v, output: %s", test.name, err, out)
@@ -150,3 +150,13 @@ func TestPCALIGN(t *testing.T) {
 		}
 	}
 }
+
+func testvmovq() (r1, r2 uint64)
+
+// TestVMOVQ checks if the arm64 VMOVQ instruction is working properly.
+func TestVMOVQ(t *testing.T) {
+	a, b := testvmovq()
+	if a != 0x7040201008040201 || b != 0x3040201008040201 {
+		t.Errorf("TestVMOVQ got: a=0x%x, b=0x%x, want: a=0x7040201008040201, b=0x3040201008040201", a, b)
+	}
+}
diff --git a/src/cmd/internal/obj/arm64/asm_arm64_test.s b/src/cmd/internal/obj/arm64/asm_arm64_test.s
new file mode 100644
index 0000000000..9d337a4fd1
--- /dev/null
+++ b/src/cmd/internal/obj/arm64/asm_arm64_test.s
@@ -0,0 +1,14 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// testvmovq() (r1, r2 uint64)
+TEXT ·testvmovq(SB), NOSPLIT, $0-16
+	VMOVQ   $0x7040201008040201, $0x3040201008040201, V1
+	VMOV    V1.D[0], R0
+	VMOV    V1.D[1], R1
+	MOVD    R0, r1+0(FP)
+	MOVD    R1, r2+8(FP)
+	RET
author	Matthew Dempsky <mdempsky@google.com>	2021-01-25 17:53:08 -0800
committer	Matthew Dempsky <mdempsky@google.com>	2021-01-25 17:53:50 -0800
commit	34704e374f08ea126786b7d454fc9b647663f95a (patch)
tree	ca17b071c0b292023c5914fabd5de6576bf47ee0 /src/cmd
parent	c97af0036b8cd8ab2a7ed3f68c3ba72968637e4d (diff)
parent	5e4a0cdde3672b9c774564c428058858d09795ea (diff)
download	go-34704e374f08ea126786b7d454fc9b647663f95a.tar.gz go-34704e374f08ea126786b7d454fc9b647663f95a.zip