cmd/go: make module index loading O(1)

For a large module, opening the index was populating tables with entries for every package in the module. If we are only using a small number of those packages, this is wasted work that can dwarf the benefit from the index. This CL changes the index reader to avoid loading all packages at module index open time. It also refactors the code somewhat for clarity. It also removes some duplication by defining that a per-package index is a per-module index containing a single package, rather than having two different formats and two different decoders. It also changes the string table to use uvarint-prefixed data instead of having to scan for a NUL byte. This makes random access to long strings more efficient - O(1) instead of O(n) - and can significantly speed up the strings.Compare operation in the binary search looking for a given package. Also add a direct test of the indexing code. For #53577. Change-Id: I7428d28133e4e7fe2d2993fa014896cd15af48af Reviewed-on: https://go-review.googlesource.com/c/go/+/416178 Reviewed-by: Bryan Mills <bcmills@google.com>
author: Russ Cox <rsc@golang.org> 2022-07-06 09:49:32 -0400
committer: Russ Cox <rsc@golang.org> 2022-07-11 19:09:00 +0000
commit: 7510e597def68cee77e8ba280fc0f04d3cfd2a22 (patch)
tree: d89617bccd7cea595b88390c9e105dbdb4041940
parent: b8bf820d5d4602f7e83ff89c0f8d0f2bd3a220d4 (diff)
download: go-7510e597def68cee77e8ba280fc0f04d3cfd2a22.tar.gz
go-7510e597def68cee77e8ba280fc0f04d3cfd2a22.zip
4 files changed, 327 insertions, 275 deletions
diff --git a/src/cmd/go/internal/modindex/index_test.go b/src/cmd/go/internal/modindex/index_test.go
new file mode 100644
index 0000000000..2c072f909d
--- /dev/null
+++ b/src/cmd/go/internal/modindex/index_test.go
@@ -0,0 +1,87 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package modindex
+
+import (
+	"encoding/hex"
+	"encoding/json"
+	"go/build"
+	"internal/diff"
+	"path/filepath"
+	"reflect"
+	"runtime"
+	"testing"
+)
+
+func init() {
+	isTest = true
+	enabled = true // to allow GODEBUG=goindex=0 go test, when things are very broken
+}
+
+func TestIndex(t *testing.T) {
+	src := filepath.Join(runtime.GOROOT(), "src")
+	checkPkg := func(t *testing.T, m *Module, pkg string, data []byte) {
+		p := m.Package(pkg)
+		bp, err := p.Import(build.Default, build.ImportComment)
+		if err != nil {
+			t.Fatal(err)
+		}
+		bp1, err := build.Default.Import(pkg, filepath.Join(src, pkg), build.ImportComment)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if !reflect.DeepEqual(bp, bp1) {
+			t.Errorf("mismatch")
+			t.Logf("index:\n%s", hex.Dump(data))
+
+			js, err := json.MarshalIndent(bp, "", "\t")
+			if err != nil {
+				t.Fatal(err)
+			}
+			js1, err := json.MarshalIndent(bp1, "", "\t")
+			if err != nil {
+				t.Fatal(err)
+			}
+			t.Logf("diff:\n%s", diff.Diff("index", js, "correct", js1))
+			t.FailNow()
+		}
+	}
+
+	// Check packages in increasing complexity, one at a time.
+	pkgs := []string{
+		"crypto",
+		"encoding",
+		"unsafe",
+		"encoding/json",
+		"runtime",
+		"net",
+	}
+	var raws []*rawPackage
+	for _, pkg := range pkgs {
+		raw := importRaw(src, pkg)
+		raws = append(raws, raw)
+		t.Run(pkg, func(t *testing.T) {
+			data := encodeModuleBytes([]*rawPackage{raw})
+			m, err := fromBytes(src, data)
+			if err != nil {
+				t.Fatal(err)
+			}
+			checkPkg(t, m, pkg, data)
+		})
+	}
+
+	// Check that a multi-package index works too.
+	t.Run("all", func(t *testing.T) {
+		data := encodeModuleBytes(raws)
+		m, err := fromBytes(src, data)
+		if err != nil {
+			t.Fatal(err)
+		}
+		for _, pkg := range pkgs {
+			checkPkg(t, m, pkg, data)
+		}
+	})
+}
diff --git a/src/cmd/go/internal/modindex/read.go b/src/cmd/go/internal/modindex/read.go
index 436bbebb39..38ddfec70f 100644
--- a/src/cmd/go/internal/modindex/read.go
+++ b/src/cmd/go/internal/modindex/read.go
@@ -15,7 +15,6 @@ import (
 	"internal/godebug"
 	"internal/goroot"
 	"internal/unsafeheader"
-	"math"
 	"path"
 	"path/filepath"
 	"runtime"
@@ -45,10 +44,9 @@ var enabled bool = godebug.Get("goindex") != "0"
 // do the equivalent of build.Import of packages in the module and answer other
 // questions based on the index file's data.
 type Module struct {
-	modroot      string
-	od           offsetDecoder
-	packages     map[string]int // offsets of each package
-	packagePaths []string       // paths to package directories relative to modroot; these are the keys of packages
+	modroot string
+	d       *decoder
+	n       int // number of packages
 }
 
 // moduleHash returns an ActionID corresponding to the state of the module
@@ -236,12 +234,30 @@ func openIndexPackage(modroot, pkgdir string) (*IndexPackage, error) {
 	return r.pkg, r.err
 }
 
-// fromBytes returns a *Module given the encoded representation.
-func fromBytes(moddir string, data []byte) (mi *Module, err error) {
-	if !enabled {
-		panic("use of index")
-	}
+var errCorrupt = errors.New("corrupt index")
+
+// protect marks the start of a large section of code that accesses the index.
+// It should be used as:
+//
+//	defer unprotect(protect, &err)
+//
+// It should not be used for trivial accesses which would be
+// dwarfed by the overhead of the defer.
+func protect() bool {
+	return debug.SetPanicOnFault(true)
+}
 
+var isTest = false
+
+// unprotect marks the end of a large section of code that accesses the index.
+// It should be used as:
+//
+//	defer unprotect(protect, &err)
+//
+// end looks for panics due to errCorrupt or bad mmap accesses.
+// When it finds them, it adds explanatory text, consumes the panic, and sets *errp instead.
+// If errp is nil, end adds the explanatory text but then calls base.Fatalf.
+func unprotect(old bool, errp *error) {
 	// SetPanicOnFault's errors _may_ satisfy this interface. Even though it's not guaranteed
 	// that all its errors satisfy this interface, we'll only check for these errors so that
 	// we don't suppress panics that could have been produced from other sources.
@@ -249,97 +265,100 @@ func fromBytes(moddir string, data []byte) (mi *Module, err error) {
 		Addr() uintptr
 	}
 
-	// set PanicOnFault to true so that we can catch errors on the initial reads of the slice,
-	// in case it's mmapped (the common case).
-	old := debug.SetPanicOnFault(true)
-	defer func() {
-		debug.SetPanicOnFault(old)
-		if e := recover(); e != nil {
-			if _, ok := e.(addrer); ok {
-				// This panic was almost certainly caused by SetPanicOnFault.
-				err = fmt.Errorf("error reading module index: %v", e)
+	debug.SetPanicOnFault(old)
+
+	if e := recover(); e != nil {
+		if _, ok := e.(addrer); ok || e == errCorrupt {
+			// This panic was almost certainly caused by SetPanicOnFault or our panic(errCorrupt).
+			err := fmt.Errorf("error reading module index: %v", e)
+			if errp != nil {
+				*errp = err
 				return
 			}
-			// The panic was likely not caused by SetPanicOnFault.
-			panic(e)
+			if isTest {
+				panic(err)
+			}
+			base.Fatalf("%v", err)
 		}
-	}()
+		// The panic was likely not caused by SetPanicOnFault.
+		panic(e)
+	}
+}
 
-	gotVersion, unread, _ := bytes.Cut(data, []byte{'\n'})
-	if string(gotVersion) != indexVersion {
-		return nil, fmt.Errorf("bad index version string: %q", gotVersion)
+// fromBytes returns a *Module given the encoded representation.
+func fromBytes(moddir string, data []byte) (m *Module, err error) {
+	if !enabled {
+		panic("use of index")
+	}
+
+	defer unprotect(protect(), &err)
+
+	if !bytes.HasPrefix(data, []byte(indexVersion+"\n")) {
+		return nil, errCorrupt
 	}
-	stringTableOffset, unread := binary.LittleEndian.Uint32(unread[:4]), unread[4:]
-	st := newStringTable(data[stringTableOffset:])
-	d := decoder{unread, st}
-	numPackages := d.int()
-
-	packagePaths := make([]string, numPackages)
-	for i := range packagePaths {
-		packagePaths[i] = d.string()
+
+	const hdr = len(indexVersion + "\n")
+	d := &decoder{data: data}
+	str := d.intAt(hdr)
+	if str < hdr+8 || len(d.data) < str {
+		return nil, errCorrupt
 	}
-	packageOffsets := make([]int, numPackages)
-	for i := range packageOffsets {
-		packageOffsets[i] = d.int()
+	d.data, d.str = data[:str], d.data[str:]
+	// Check that string table looks valid.
+	// First string is empty string (length 0),
+	// and we leave a marker byte 0xFF at the end
+	// just to make sure that the file is not truncated.
+	if len(d.str) == 0 || d.str[0] != 0 || d.str[len(d.str)-1] != 0xFF {
+		return nil, errCorrupt
 	}
-	packages := make(map[string]int, numPackages)
-	for i := range packagePaths {
-		packages[packagePaths[i]] = packageOffsets[i]
+
+	n := d.intAt(hdr + 4)
+	if n < 0 || n > (len(d.data)-8)/8 {
+		return nil, errCorrupt
 	}
 
-	return &Module{
+	m = &Module{
 		moddir,
-		offsetDecoder{data, st},
-		packages,
-		packagePaths,
-	}, nil
+		d,
+		n,
+	}
+	return m, nil
 }
 
 // packageFromBytes returns a *IndexPackage given the encoded representation.
 func packageFromBytes(modroot string, data []byte) (p *IndexPackage, err error) {
-	if !enabled {
-		panic("use of package index when not enabled")
+	m, err := fromBytes(modroot, data)
+	if err != nil {
+		return nil, err
 	}
-
-	// SetPanicOnFault's errors _may_ satisfy this interface. Even though it's not guaranteed
-	// that all its errors satisfy this interface, we'll only check for these errors so that
-	// we don't suppress panics that could have been produced from other sources.
-	type addrer interface {
-		Addr() uintptr
+	if m.n != 1 {
+		return nil, fmt.Errorf("corrupt single-package index")
 	}
+	return m.pkg(0), nil
+}
 
-	// set PanicOnFault to true so that we can catch errors on the initial reads of the slice,
-	// in case it's mmapped (the common case).
-	old := debug.SetPanicOnFault(true)
-	defer func() {
-		debug.SetPanicOnFault(old)
-		if e := recover(); e != nil {
-			if _, ok := e.(addrer); ok {
-				// This panic was almost certainly caused by SetPanicOnFault.
-				err = fmt.Errorf("error reading module index: %v", e)
-				return
-			}
-			// The panic was likely not caused by SetPanicOnFault.
-			panic(e)
-		}
-	}()
+// pkgDir returns the dir string of the i'th package in the index.
+func (m *Module) pkgDir(i int) string {
+	if i < 0 || i >= m.n {
+		panic(errCorrupt)
+	}
+	return m.d.stringAt(12 + 8 + 8*i)
+}
 
-	gotVersion, unread, _ := bytes.Cut(data, []byte{'\n'})
-	if string(gotVersion) != indexVersion {
-		return nil, fmt.Errorf("bad index version string: %q", gotVersion)
+// pkgOff returns the offset of the data for the i'th package in the index.
+func (m *Module) pkgOff(i int) int {
+	if i < 0 || i >= m.n {
+		panic(errCorrupt)
 	}
-	stringTableOffset, unread := binary.LittleEndian.Uint32(unread[:4]), unread[4:]
-	st := newStringTable(data[stringTableOffset:])
-	d := &decoder{unread, st}
-	p = decodePackage(d, offsetDecoder{data, st})
-	p.modroot = modroot
-	return p, nil
+	return m.d.intAt(12 + 8 + 8*i + 4)
 }
 
-// Returns a list of directory paths, relative to the modroot, for
-// packages contained in the module index.
-func (mi *Module) Packages() []string {
-	return mi.packagePaths
+// Walk calls f for each package in the index, passing the path to that package relative to the module root.
+func (m *Module) Walk(f func(path string)) {
+	defer unprotect(protect(), nil)
+	for i := 0; i < m.n; i++ {
+		f(m.pkgDir(i))
+	}
 }
 
 // relPath returns the path relative to the module's root.
@@ -349,11 +368,7 @@ func relPath(path, modroot string) string {
 
 // Import is the equivalent of build.Import given the information in Module.
 func (rp *IndexPackage) Import(bctxt build.Context, mode build.ImportMode) (p *build.Package, err error) {
-	defer func() {
-		if e := recover(); e != nil {
-			err = fmt.Errorf("error reading module index: %v", e)
-		}
-	}()
+	defer unprotect(protect(), &err)
 
 	ctxt := (*Context)(&bctxt)
 
@@ -794,46 +809,44 @@ type IndexPackage struct {
 
 var errCannotFindPackage = errors.New("cannot find package")
 
-// Package returns an IndexPackage constructed using the information in the Module.
-func (mi *Module) Package(path string) *IndexPackage {
-	defer func() {
-		if e := recover(); e != nil {
-			base.Fatalf("error reading module index: %v", e)
-		}
-	}()
-	offset, ok := mi.packages[path]
+// Package and returns finds the package with the given path (relative to the module root).
+// If the package does not exist, Package returns an IndexPackage that will return an
+// appropriate error from its methods.
+func (m *Module) Package(path string) *IndexPackage {
+	defer unprotect(protect(), nil)
+
+	i, ok := sort.Find(m.n, func(i int) int {
+		return strings.Compare(path, m.pkgDir(i))
+	})
 	if !ok {
-		return &IndexPackage{error: fmt.Errorf("%w %q in:\n\t%s", errCannotFindPackage, path, filepath.Join(mi.modroot, path))}
+		return &IndexPackage{error: fmt.Errorf("%w %q in:\n\t%s", errCannotFindPackage, path, filepath.Join(m.modroot, path))}
 	}
-
-	// TODO(matloob): do we want to lock on the module index?
-	d := mi.od.decoderAt(offset)
-	p := decodePackage(d, mi.od)
-	p.modroot = mi.modroot
-	return p
+	return m.pkg(i)
 }
 
-func decodePackage(d *decoder, od offsetDecoder) *IndexPackage {
-	rp := new(IndexPackage)
-	if errstr := d.string(); errstr != "" {
-		rp.error = errors.New(errstr)
+// pkgAt returns the i'th IndexPackage in m.
+func (m *Module) pkg(i int) *IndexPackage {
+	r := m.d.readAt(m.pkgOff(i))
+	p := new(IndexPackage)
+	if errstr := r.string(); errstr != "" {
+		p.error = errors.New(errstr)
 	}
-	rp.dir = d.string()
-	numSourceFiles := d.uint32()
-	rp.sourceFiles = make([]*sourceFile, numSourceFiles)
-	for i := uint32(0); i < numSourceFiles; i++ {
-		offset := d.uint32()
-		rp.sourceFiles[i] = &sourceFile{
-			od: od.offsetDecoderAt(offset),
+	p.dir = r.string()
+	p.sourceFiles = make([]*sourceFile, r.int())
+	for i := range p.sourceFiles {
+		p.sourceFiles[i] = &sourceFile{
+			d:   m.d,
+			pos: r.int(),
 		}
 	}
-	return rp
+	p.modroot = m.modroot
+	return p
 }
 
 // sourceFile represents the information of a given source file in the module index.
 type sourceFile struct {
-	od offsetDecoder // od interprets all offsets relative to the start of the source file's data
-
+	d               *decoder // encoding of this source file
+	pos             int      // start of sourceFile encoding in d
 	onceReadImports sync.Once
 	savedImports    []rawImport // saved imports so that they're only read once
 }
@@ -853,73 +866,67 @@ const (
 )
 
 func (sf *sourceFile) error() string {
-	return sf.od.stringAt(sourceFileError)
+	return sf.d.stringAt(sf.pos + sourceFileError)
 }
 func (sf *sourceFile) parseError() string {
-	return sf.od.stringAt(sourceFileParseError)
+	return sf.d.stringAt(sf.pos + sourceFileParseError)
 }
 func (sf *sourceFile) synopsis() string {
-	return sf.od.stringAt(sourceFileSynopsis)
+	return sf.d.stringAt(sf.pos + sourceFileSynopsis)
 }
 func (sf *sourceFile) name() string {
-	return sf.od.stringAt(sourceFileName)
+	return sf.d.stringAt(sf.pos + sourceFileName)
 }
 func (sf *sourceFile) pkgName() string {
-	return sf.od.stringAt(sourceFilePkgName)
+	return sf.d.stringAt(sf.pos + sourceFilePkgName)
 }
 func (sf *sourceFile) ignoreFile() bool {
-	return sf.od.boolAt(sourceFileIgnoreFile)
+	return sf.d.boolAt(sf.pos + sourceFileIgnoreFile)
 }
 func (sf *sourceFile) binaryOnly() bool {
-	return sf.od.boolAt(sourceFileBinaryOnly)
+	return sf.d.boolAt(sf.pos + sourceFileBinaryOnly)
 }
 func (sf *sourceFile) cgoDirectives() string {
-	return sf.od.stringAt(sourceFileCgoDirectives)
+	return sf.d.stringAt(sf.pos + sourceFileCgoDirectives)
 }
 func (sf *sourceFile) goBuildConstraint() string {
-	return sf.od.stringAt(sourceFileGoBuildConstraint)
+	return sf.d.stringAt(sf.pos + sourceFileGoBuildConstraint)
 }
 
 func (sf *sourceFile) plusBuildConstraints() []string {
-	d := sf.od.decoderAt(sourceFileNumPlusBuildConstraints)
-	n := d.int()
+	pos := sf.pos + sourceFileNumPlusBuildConstraints
+	n := sf.d.intAt(pos)
+	pos += 4
 	ret := make([]string, n)
 	for i := 0; i < n; i++ {
-		ret[i] = d.string()
+		ret[i] = sf.d.stringAt(pos)
+		pos += 4
 	}
 	return ret
 }
 
-func importsOffset(numPlusBuildConstraints int) int {
-	// 4 bytes per uin32, add one to advance past numPlusBuildConstraints itself
-	return sourceFileNumPlusBuildConstraints + 4*(numPlusBuildConstraints+1)
-}
-
 func (sf *sourceFile) importsOffset() int {
-	numPlusBuildConstraints := sf.od.intAt(sourceFileNumPlusBuildConstraints)
-	return importsOffset(numPlusBuildConstraints)
-}
-
-func embedsOffset(importsOffset, numImports int) int {
-	// 4 bytes per uint32; 1 to advance past numImports itself, and 5 uint32s per import
-	return importsOffset + 4*(1+(5*numImports))
+	pos := sf.pos + sourceFileNumPlusBuildConstraints
+	n := sf.d.intAt(pos)
+	// each build constraint is 1 uint32
+	return pos + 4 + n*4
 }
 
 func (sf *sourceFile) embedsOffset() int {
-	importsOffset := sf.importsOffset()
-	numImports := sf.od.intAt(importsOffset)
-	return embedsOffset(importsOffset, numImports)
+	pos := sf.importsOffset()
+	n := sf.d.intAt(pos)
+	// each import is 5 uint32s (string + tokpos)
+	return pos + 4 + n*(4*5)
 }
 
 func (sf *sourceFile) imports() []rawImport {
 	sf.onceReadImports.Do(func() {
 		importsOffset := sf.importsOffset()
-		d := sf.od.decoderAt(importsOffset)
-		numImports := d.int()
+		r := sf.d.readAt(importsOffset)
+		numImports := r.int()
 		ret := make([]rawImport, numImports)
 		for i := 0; i < numImports; i++ {
-			ret[i].path = d.string()
-			ret[i].position = d.tokpos()
+			ret[i] = rawImport{r.string(), r.tokpos()}
 		}
 		sf.savedImports = ret
 	})
@@ -928,132 +935,101 @@ func (sf *sourceFile) imports() []rawImport {
 
 func (sf *sourceFile) embeds() []embed {
 	embedsOffset := sf.embedsOffset()
-	d := sf.od.decoderAt(embedsOffset)
-	numEmbeds := d.int()
+	r := sf.d.readAt(embedsOffset)
+	numEmbeds := r.int()
 	ret := make([]embed, numEmbeds)
 	for i := range ret {
-		pattern := d.string()
-		pos := d.tokpos()
-		ret[i] = embed{pattern, pos}
+		ret[i] = embed{r.string(), r.tokpos()}
 	}
 	return ret
 }
 
-// A decoder reads from the current position of the file and advances its position as it
-// reads.
-type decoder struct {
-	b  []byte
-	st *stringTable
-}
+func asString(b []byte) string {
+	p := (*unsafeheader.Slice)(unsafe.Pointer(&b)).Data
 
-func (d *decoder) uint32() uint32 {
-	n := binary.LittleEndian.Uint32(d.b[:4])
-	d.b = d.b[4:]
-	return n
-}
+	var s string
+	hdr := (*unsafeheader.String)(unsafe.Pointer(&s))
+	hdr.Data = p
+	hdr.Len = len(b)
 
-func (d *decoder) int() int {
-	n := d.uint32()
-	if int64(n) > math.MaxInt {
-		base.Fatalf("go: attempting to read a uint32 from the index that overflows int")
-	}
-	return int(n)
+	return s
 }
 
-func (d *decoder) tokpos() token.Position {
-	file := d.string()
-	offset := d.int()
-	line := d.int()
-	column := d.int()
-	return token.Position{
-		Filename: file,
-		Offset:   offset,
-		Line:     line,
-		Column:   column,
-	}
+// A decoder helps decode the index format.
+type decoder struct {
+	data []byte // data after header
+	str  []byte // string table
 }
 
-func (d *decoder) string() string {
-	return d.st.string(d.int())
+// intAt returns the int at the given offset in d.data.
+func (d *decoder) intAt(off int) int {
+	if off < 0 || len(d.data)-off < 4 {
+		panic(errCorrupt)
+	}
+	i := binary.LittleEndian.Uint32(d.data[off : off+4])
+	if int32(i)>>31 != 0 {
+		panic(errCorrupt)
+	}
+	return int(i)
 }
 
-// And offset decoder reads information offset from its position in the file.
-// It's either offset from the beginning of the index, or the beginning of a sourceFile's data.
-type offsetDecoder struct {
-	b  []byte
-	st *stringTable
+// boolAt returns the bool at the given offset in d.data.
+func (d *decoder) boolAt(off int) bool {
+	return d.intAt(off) != 0
 }
 
-func (od *offsetDecoder) uint32At(offset int) uint32 {
-	if offset > len(od.b) {
-		base.Fatalf("go: trying to read from index file at offset higher than file length. This indicates a corrupt offset file in the cache.")
-	}
-	return binary.LittleEndian.Uint32(od.b[offset:])
+// stringTableAt returns the string pointed at by the int at the given offset in d.data.
+func (d *decoder) stringAt(off int) string {
+	return d.stringTableAt(d.intAt(off))
 }
 
-func (od *offsetDecoder) intAt(offset int) int {
-	n := od.uint32At(offset)
-	if int64(n) > math.MaxInt {
-		base.Fatalf("go: attempting to read a uint32 from the index that overflows int")
+// stringTableAt returns the string at the given offset in the string table d.str.
+func (d *decoder) stringTableAt(off int) string {
+	if off < 0 || off >= len(d.str) {
+		panic(errCorrupt)
 	}
-	return int(n)
-}
-
-func (od *offsetDecoder) boolAt(offset int) bool {
-	switch v := od.uint32At(offset); v {
-	case 0:
-		return false
-	case 1:
-		return true
-	default:
-		base.Fatalf("go: invalid bool value in index file encoding: %v", v)
+	s := d.str[off:]
+	v, n := binary.Uvarint(s)
+	if n <= 0 || v > uint64(len(s[n:])) {
+		panic(errCorrupt)
 	}
-	panic("unreachable")
+	return asString(s[n : n+int(v)])
 }
 
-func (od *offsetDecoder) stringAt(offset int) string {
-	return od.st.string(od.intAt(offset))
+// A reader reads sequential fields from a section of the index format.
+type reader struct {
+	d   *decoder
+	pos int
 }
 
-func (od *offsetDecoder) decoderAt(offset int) *decoder {
-	return &decoder{od.b[offset:], od.st}
+// readAt returns a reader starting at the given position in d.
+func (d *decoder) readAt(pos int) *reader {
+	return &reader{d, pos}
 }
 
-func (od *offsetDecoder) offsetDecoderAt(offset uint32) offsetDecoder {
-	return offsetDecoder{od.b[offset:], od.st}
+// int reads the next int.
+func (r *reader) int() int {
+	i := r.d.intAt(r.pos)
+	r.pos += 4
+	return i
 }
 
-type stringTable struct {
-	b []byte
+// string reads the next string.
+func (r *reader) string() string {
+	return r.d.stringTableAt(r.int())
 }
 
-func newStringTable(b []byte) *stringTable {
-	return &stringTable{b: b}
+// bool reads the next bool.
+func (r *reader) bool() bool {
+	return r.int() != 0
 }
 
-func (st *stringTable) string(pos int) string {
-	if pos == 0 {
-		return ""
-	}
-
-	bb := st.b[pos:]
-	i := bytes.IndexByte(bb, 0)
-
-	if i == -1 {
-		panic("reached end of string table trying to read string")
+// tokpos reads the next token.Position.
+func (r *reader) tokpos() token.Position {
+	return token.Position{
+		Filename: r.string(),
+		Offset:   r.int(),
+		Line:     r.int(),
+		Column:   r.int(),
 	}
-	s := asString(bb[:i])
-
-	return s
-}
-
-func asString(b []byte) string {
-	p := (*unsafeheader.Slice)(unsafe.Pointer(&b)).Data
-
-	var s string
-	hdr := (*unsafeheader.String)(unsafe.Pointer(&s))
-	hdr.Data = p
-	hdr.Len = len(b)
-
-	return s
 }
diff --git a/src/cmd/go/internal/modindex/write.go b/src/cmd/go/internal/modindex/write.go
index 3408248bd9..7db1fb0870 100644
--- a/src/cmd/go/internal/modindex/write.go
+++ b/src/cmd/go/internal/modindex/write.go
@@ -1,54 +1,46 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
 package modindex
 
 import (
 	"cmd/go/internal/base"
 	"encoding/binary"
 	"go/token"
-	"math"
 	"sort"
-	"strings"
 )
 
-const indexVersion = "go index v0"
+const indexVersion = "go index v1" // 11 bytes (plus \n), to align uint32s in index
 
 // encodeModuleBytes produces the encoded representation of the module index.
 // encodeModuleBytes may modify the packages slice.
 func encodeModuleBytes(packages []*rawPackage) []byte {
 	e := newEncoder()
-	e.Bytes([]byte(indexVersion))
-	e.Bytes([]byte{'\n'})
+	e.Bytes([]byte(indexVersion + "\n"))
 	stringTableOffsetPos := e.Pos() // fill this at the end
 	e.Uint32(0)                     // string table offset
-	e.Int(len(packages))
 	sort.Slice(packages, func(i, j int) bool {
 		return packages[i].dir < packages[j].dir
 	})
+	e.Int(len(packages))
+	packagesPos := e.Pos()
 	for _, p := range packages {
 		e.String(p.dir)
-	}
-	packagesOffsetPos := e.Pos()
-	for range packages {
 		e.Int(0)
 	}
 	for i, p := range packages {
-		e.IntAt(e.Pos(), packagesOffsetPos+4*i)
+		e.IntAt(e.Pos(), packagesPos+8*i+4)
 		encodePackage(e, p)
 	}
 	e.IntAt(e.Pos(), stringTableOffsetPos)
 	e.Bytes(e.stringTable)
+	e.Bytes([]byte{0xFF}) // end of string table marker
 	return e.b
 }
 
 func encodePackageBytes(p *rawPackage) []byte {
-	e := newEncoder()
-	e.Bytes([]byte(indexVersion))
-	e.Bytes([]byte{'\n'})
-	stringTableOffsetPos := e.Pos() // fill this at the end
-	e.Uint32(0)                     // string table offset
-	encodePackage(e, p)
-	e.IntAt(e.Pos(), stringTableOffsetPos)
-	e.Bytes(e.stringTable)
-	return e.b
+	return encodeModuleBytes([]*rawPackage{p})
 }
 
 func encodePackage(e *encoder, p *rawPackage) {
@@ -126,9 +118,6 @@ func (e *encoder) Bytes(b []byte) {
 }
 
 func (e *encoder) String(s string) {
-	if strings.IndexByte(s, 0) >= 0 {
-		base.Fatalf("go: attempting to encode a string containing a null byte")
-	}
 	if n, ok := e.strings[s]; ok {
 		e.Int(n)
 		return
@@ -136,8 +125,8 @@ func (e *encoder) String(s string) {
 	pos := len(e.stringTable)
 	e.strings[s] = pos
 	e.Int(pos)
+	e.stringTable = binary.AppendUvarint(e.stringTable, uint64(len(s)))
 	e.stringTable = append(e.stringTable, []byte(s)...)
-	e.stringTable = append(e.stringTable, 0)
 }
 
 func (e *encoder) Bool(b bool) {
@@ -152,17 +141,18 @@ func (e *encoder) Uint32(n uint32) {
 	e.b = binary.LittleEndian.AppendUint32(e.b, n)
 }
 
-// Int encodes n. Note that all ints are written to the index as uint32s.
+// Int encodes n. Note that all ints are written to the index as uint32s,
+// and to avoid problems on 32-bit systems we require fitting into a 32-bit int.
 func (e *encoder) Int(n int) {
-	if n < 0 || int64(n) > math.MaxUint32 {
-		base.Fatalf("go: attempting to write an int to the index that overflows uint32")
+	if n < 0 || int(int32(n)) != n {
+		base.Fatalf("go: attempting to write an int to the index that overflows int32")
 	}
 	e.Uint32(uint32(n))
 }
 
 func (e *encoder) IntAt(n int, at int) {
-	if n < 0 || int64(n) > math.MaxUint32 {
-		base.Fatalf("go: attempting to write an int to the index that overflows uint32")
+	if n < 0 || int(int32(n)) != n {
+		base.Fatalf("go: attempting to write an int to the index that overflows int32")
 	}
 	binary.LittleEndian.PutUint32(e.b[at:], uint32(n))
 }
diff --git a/src/cmd/go/internal/modload/search.go b/src/cmd/go/internal/modload/search.go
index 856390a0f2..b2ac7f22b1 100644
--- a/src/cmd/go/internal/modload/search.go
+++ b/src/cmd/go/internal/modload/search.go
@@ -216,21 +216,20 @@ func matchPackages(ctx context.Context, m *search.Match, tags map[string]bool, f
 // is the module's root directory on disk, index is the modindex.Module for the
 // module, and importPathRoot is the module's path prefix.
 func walkFromIndex(index *modindex.Module, importPathRoot string, isMatch, treeCanMatch func(string) bool, tags, have map[string]bool, addPkg func(string)) {
-loopPackages:
-	for _, reldir := range index.Packages() {
+	index.Walk(func(reldir string) {
 		// Avoid .foo, _foo, and testdata subdirectory trees.
 		p := reldir
 		for {
 			elem, rest, found := strings.Cut(p, string(filepath.Separator))
 			if strings.HasPrefix(elem, ".") || strings.HasPrefix(elem, "_") || elem == "testdata" {
-				continue loopPackages
+				return
 			}
 			if found && elem == "vendor" {
 				// Ignore this path if it contains the element "vendor" anywhere
 				// except for the last element (packages named vendor are allowed
 				// for historical reasons). Note that found is true when this
 				// isn't the last path element.
-				continue loopPackages
+				return
 			}
 			if !found {
 				// Didn't find the separator, so we're considering the last element.
@@ -241,12 +240,12 @@ loopPackages:
 
 		// Don't use GOROOT/src.
 		if reldir == "" && importPathRoot == "" {
-			continue
+			return
 		}
 
 		name := path.Join(importPathRoot, filepath.ToSlash(reldir))
 		if !treeCanMatch(name) {
-			continue
+			return
 		}
 
 		if !have[name] {
@@ -257,7 +256,7 @@ loopPackages:
 				}
 			}
 		}
-	}
+	})
 }
 
 // MatchInModule identifies the packages matching the given pattern within the
author	Russ Cox <rsc@golang.org>	2022-07-06 09:49:32 -0400
committer	Russ Cox <rsc@golang.org>	2022-07-11 19:09:00 +0000
commit	7510e597def68cee77e8ba280fc0f04d3cfd2a22 (patch)
tree	d89617bccd7cea595b88390c9e105dbdb4041940
parent	b8bf820d5d4602f7e83ff89c0f8d0f2bd3a220d4 (diff)
download	go-7510e597def68cee77e8ba280fc0f04d3cfd2a22.tar.gz go-7510e597def68cee77e8ba280fc0f04d3cfd2a22.zip