aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLynn Boger <laboger@linux.vnet.ibm.com>2017-06-08 08:26:19 -0400
committerRuss Cox <rsc@golang.org>2017-10-25 18:57:11 +0000
commit2eac89d5c82d1d8ac7969d13f328f9fd2317b8dc (patch)
treefa64b4eef0a64b2b9ebbe6e50c50418f26aa9f14
parent142d4490813ec7e72dfc4c248bf71e2c2f53ffa1 (diff)
downloadgo-2eac89d5c82d1d8ac7969d13f328f9fd2317b8dc.tar.gz
go-2eac89d5c82d1d8ac7969d13f328f9fd2317b8dc.zip
[release-branch.go1.8] cmd/link: implement trampolines for ppc64le with ext linking
When using golang on ppc64le there have been issues when building executables that generate extremely large text sections. This is due to the call instruction and the limitation on the offset field, which is smaller than most platforms. If the size of the call target offset is too big for the offset field in the call instruction, then link errors can occur. The original solution to this problem in golang was to split the text section when it became too large, allowing the external (GNU) linker to insert the necessary stub to handle the long call. That worked fine until the another size limit for the program size was hit, where a plt_branch was created instead of a long branch. In that case the plt_branch code sequence expects r2 to contain the address of the TOC, but when golang creates dynamic executables by default (-buildmode=exe) r2 does not always contain the address of the TOC and as a result when building programs that reach this extremely large size, a runtime SEGV or SIGILL can occur due to branching to a bad address. When using internal linking, trampolines are generated to handle the long calls but the text sections are not split. With this change, text sections will still be split approrpriately with external linking but if the buildmode being used does not maintain r2 as the TOC addresses, then trampolines will be created for those calls. Fixes #20497 Change-Id: If5400b0f86c2c08e106b332be6db0b259b07d93d Reviewed-on: https://go-review.googlesource.com/45130 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com> Reviewed-on: https://go-review.googlesource.com/70837 Run-TryBot: Russ Cox <rsc@golang.org> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
-rw-r--r--src/cmd/link/internal/ld/data.go38
-rw-r--r--src/cmd/link/internal/ppc64/asm.go73
2 files changed, 84 insertions, 27 deletions
diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go
index aca8973a85..d007d2d180 100644
--- a/src/cmd/link/internal/ld/data.go
+++ b/src/cmd/link/internal/ld/data.go
@@ -324,18 +324,36 @@ func isRuntimeDepPkg(pkg string) bool {
return strings.HasPrefix(pkg, "runtime/internal/") && !strings.HasSuffix(pkg, "_test")
}
+// Estimate the max size needed to hold any new trampolines created for this function. This
+// is used to determine when the section can be split if it becomes too large, to ensure that
+// the trampolines are in the same section as the function that uses them.
+func maxSizeTrampolinesPPC64(s *Symbol, isTramp bool) uint64 {
+ // If Thearch.Trampoline is nil, then trampoline support is not available on this arch.
+ // A trampoline does not need any dependent trampolines.
+ if Thearch.Trampoline == nil || isTramp {
+ return 0
+ }
+
+ n := uint64(0)
+ for ri := range s.R {
+ r := &s.R[ri]
+ if r.Type.IsDirectJump() {
+ n++
+ }
+ }
+ // Trampolines in ppc64 are 4 instructions.
+ return n * 16
+}
+
// detect too-far jumps in function s, and add trampolines if necessary
-// ARM supports trampoline insertion for internal and external linking
-// PPC64 & PPC64LE support trampoline insertion for internal linking only
+// ARM, PPC64 & PPC64LE support trampoline insertion for internal and external linking
+// On PPC64 & PPC64LE the text sections might be split but will still insert trampolines
+// where necessary.
func trampoline(ctxt *Link, s *Symbol) {
if Thearch.Trampoline == nil {
return // no need or no support of trampolines on this arch
}
- if Linkmode == LinkExternal && SysArch.Family == sys.PPC64 {
- return
- }
-
for ri := range s.R {
r := &s.R[ri]
if !r.Type.IsDirectJump() {
@@ -2044,14 +2062,14 @@ func (ctxt *Link) textaddress() {
sect.Vaddr = va
ntramps := 0
for _, sym := range ctxt.Textp {
- sect, n, va = assignAddress(ctxt, sect, n, sym, va)
+ sect, n, va = assignAddress(ctxt, sect, n, sym, va, false)
trampoline(ctxt, sym) // resolve jumps, may add trampolines if jump too far
// lay down trampolines after each function
for ; ntramps < len(ctxt.tramps); ntramps++ {
tramp := ctxt.tramps[ntramps]
- sect, n, va = assignAddress(ctxt, sect, n, tramp, va)
+ sect, n, va = assignAddress(ctxt, sect, n, tramp, va, true)
}
}
@@ -2077,7 +2095,7 @@ func (ctxt *Link) textaddress() {
// assigns address for a text symbol, returns (possibly new) section, its number, and the address
// Note: once we have trampoline insertion support for external linking, this function
// will not need to create new text sections, and so no need to return sect and n.
-func assignAddress(ctxt *Link, sect *Section, n int, sym *Symbol, va uint64) (*Section, int, uint64) {
+func assignAddress(ctxt *Link, sect *Section, n int, sym *Symbol, va uint64, isTramp bool) (*Section, int, uint64) {
sym.Sect = sect
if sym.Type&obj.SSUB != 0 {
return sect, n, va
@@ -2106,7 +2124,7 @@ func assignAddress(ctxt *Link, sect *Section, n int, sym *Symbol, va uint64) (*S
// Only break at outermost syms.
- if SysArch.InFamily(sys.PPC64) && sym.Outer == nil && Iself && Linkmode == LinkExternal && va-sect.Vaddr+funcsize > 0x1c00000 {
+ if SysArch.InFamily(sys.PPC64) && sym.Outer == nil && Iself && Linkmode == LinkExternal && va-sect.Vaddr+funcsize+maxSizeTrampolinesPPC64(sym, isTramp) > 0x1c00000 {
// Set the length for the previous text section
sect.Length = va - sect.Vaddr
diff --git a/src/cmd/link/internal/ppc64/asm.go b/src/cmd/link/internal/ppc64/asm.go
index cf2c532f9e..9f2925fc51 100644
--- a/src/cmd/link/internal/ppc64/asm.go
+++ b/src/cmd/link/internal/ppc64/asm.go
@@ -522,13 +522,22 @@ func archrelocaddr(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol, val *int64) int {
// resolve direct jump relocation r in s, and add trampoline if necessary
func trampoline(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol) {
+ // Trampolines are created if the branch offset is too large and the linker cannot insert a call stub to handle it.
+ // For internal linking, trampolines are always created for long calls.
+ // For external linking, the linker can insert a call stub to handle a long call, but depends on having the TOC address in
+ // r2. For those build modes with external linking where the TOC address is not maintained in r2, trampolines must be created.
+ if ld.Linkmode == ld.LinkExternal && (ctxt.DynlinkingGo() || ld.Buildmode == ld.BuildmodeCArchive || ld.Buildmode == ld.BuildmodeCShared || ld.Buildmode == ld.BuildmodePIE) {
+ // No trampolines needed since r2 contains the TOC
+ return
+ }
+
t := ld.Symaddr(r.Sym) + r.Add - (s.Value + int64(r.Off))
switch r.Type {
case obj.R_CALLPOWER:
// If branch offset is too far then create a trampoline.
- if int64(int32(t<<6)>>6) != t || (*ld.FlagDebugTramp > 1 && s.File != r.Sym.File) {
+ if (ld.Linkmode == ld.LinkExternal && s.Sect != r.Sym.Sect) || (ld.Linkmode == ld.LinkInternal && int64(int32(t<<6)>>6) != t) || (*ld.FlagDebugTramp > 1 && s.File != r.Sym.File) {
var tramp *ld.Symbol
for i := 0; ; i++ {
@@ -552,26 +561,20 @@ func trampoline(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol) {
t = ld.Symaddr(tramp) + r.Add - (s.Value + int64(r.Off))
- // If the offset of the trampoline that has been found is within range, use it.
- if int64(int32(t<<6)>>6) == t {
+ // With internal linking, the trampoline can be used if it is not too far.
+ // With external linking, the trampoline must be in this section for it to be reused.
+ if (ld.Linkmode == ld.LinkInternal && int64(int32(t<<6)>>6) == t) || (ld.Linkmode == ld.LinkExternal && s.Sect == tramp.Sect) {
break
}
}
if tramp.Type == 0 {
- ctxt.AddTramp(tramp)
- tramp.Size = 16 // 4 instructions
- tramp.P = make([]byte, tramp.Size)
- t = ld.Symaddr(r.Sym) + r.Add
- f := t & 0xffff0000
- o1 := uint32(0x3fe00000 | (f >> 16)) // lis r31,trampaddr hi (r31 is temp reg)
- f = t & 0xffff
- o2 := uint32(0x63ff0000 | f) // ori r31,trampaddr lo
- o3 := uint32(0x7fe903a6) // mtctr
- o4 := uint32(0x4e800420) // bctr
- ld.SysArch.ByteOrder.PutUint32(tramp.P, o1)
- ld.SysArch.ByteOrder.PutUint32(tramp.P[4:], o2)
- ld.SysArch.ByteOrder.PutUint32(tramp.P[8:], o3)
- ld.SysArch.ByteOrder.PutUint32(tramp.P[12:], o4)
+ if ctxt.DynlinkingGo() || ld.Buildmode == ld.BuildmodeCArchive || ld.Buildmode == ld.BuildmodeCShared || ld.Buildmode == ld.BuildmodePIE {
+ // Should have returned for above cases
+ ld.Errorf(s, "unexpected trampoline for shared or dynamic linking\n")
+ } else {
+ ctxt.AddTramp(tramp)
+ gentramp(tramp, r.Sym, int64(r.Add))
+ }
}
r.Sym = tramp
r.Add = 0 // This was folded into the trampoline target address
@@ -582,6 +585,42 @@ func trampoline(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol) {
}
}
+func gentramp(tramp, target *ld.Symbol, offset int64) {
+ // Used for default build mode for an executable
+ // Address of the call target is generated using
+ // relocation and doesn't depend on r2 (TOC).
+ tramp.Size = 16 // 4 instructions
+ tramp.P = make([]byte, tramp.Size)
+ t := ld.Symaddr(target) + offset
+ o1 := uint32(0x3fe00000) // lis r31,targetaddr hi
+ o2 := uint32(0x3bff0000) // addi r31,targetaddr lo
+ // With external linking, the target address must be
+ // relocated using LO and HA
+ if ld.Linkmode == ld.LinkExternal {
+ tr := ld.Addrel(tramp)
+ tr.Off = 0
+ tr.Type = obj.R_ADDRPOWER
+ tr.Siz = 8 // generates 2 relocations: HA + LO
+ tr.Sym = target
+ tr.Add = offset
+ } else {
+ // adjustment needed if lo has sign bit set
+ // when using addi to compute address
+ val := uint32((t & 0xffff0000) >> 16)
+ if t&0x8000 != 0 {
+ val += 1
+ }
+ o1 |= val // hi part of addr
+ o2 |= uint32(t & 0xffff) // lo part of addr
+ }
+ o3 := uint32(0x7fe903a6) // mtctr r31
+ o4 := uint32(0x4e800420) // bctr
+ ld.SysArch.ByteOrder.PutUint32(tramp.P, o1)
+ ld.SysArch.ByteOrder.PutUint32(tramp.P[4:], o2)
+ ld.SysArch.ByteOrder.PutUint32(tramp.P[8:], o3)
+ ld.SysArch.ByteOrder.PutUint32(tramp.P[12:], o4)
+}
+
func archreloc(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol, val *int64) int {
if ld.Linkmode == ld.LinkExternal {
switch r.Type {