aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Griesemer <gri@golang.org>2011-08-29 17:22:20 -0700
committerRobert Griesemer <gri@golang.org>2011-08-29 17:22:20 -0700
commitd01ee38fb09bfc9f03f5930fa2054cc767d611d2 (patch)
treead395f910838f186d3cb8c5822e3e0448bcaabc1
parent6b902628709f694967d54045de949d2db20ddadd (diff)
downloadgo-d01ee38fb09bfc9f03f5930fa2054cc767d611d2.tar.gz
go-d01ee38fb09bfc9f03f5930fa2054cc767d611d2.zip
godoc: support for reading/writing (splitted) index files.
This CL implements a new godoc feature to save the search index on disk. Use -write_index to create the search index file named with -index_files. Use -index_files to provide a glob pattern specifying index file(s) when starting godoc; in this case the run-time indexer is not run. Known issues: - saving/restoring full text index is not yet supported - the list of flags and overall usage logic could use a cleanup R=rsc, dsymonds CC=golang-dev https://golang.org/cl/4974045
-rw-r--r--src/cmd/godoc/appconfig.go13
-rw-r--r--src/cmd/godoc/appinit.go27
-rw-r--r--src/cmd/godoc/doc.go6
-rw-r--r--src/cmd/godoc/godoc.go80
-rw-r--r--src/cmd/godoc/index.go33
-rw-r--r--src/cmd/godoc/main.go46
6 files changed, 170 insertions, 35 deletions
diff --git a/src/cmd/godoc/appconfig.go b/src/cmd/godoc/appconfig.go
index 9cbe7a4434..1f420fc6cf 100644
--- a/src/cmd/godoc/appconfig.go
+++ b/src/cmd/godoc/appconfig.go
@@ -11,9 +11,18 @@ package main
const (
// zipFilename is the name of the .zip file
// containing the file system served by godoc.
- zipFilename = "go.zip"
+ zipFilename = "godoc.zip"
// zipGoroot is the path of the goroot directory
// in the .zip file.
- zipGoroot = "/home/username/go"
+ zipGoroot = "/home/user/go"
+
+ // indexFilenames is a glob pattern specifying
+ // files containing the search index served by
+ // godoc. The files are concatenated in sorted
+ // order (by filename).
+ // app-engine limit: file sizes must be <= 10MB;
+ // use "split -b8m indexfile index.split." to get
+ // smaller files.
+ indexFilenames = "index.split.*"
)
diff --git a/src/cmd/godoc/appinit.go b/src/cmd/godoc/appinit.go
index 20e0fdc30a..96f8d5e2ac 100644
--- a/src/cmd/godoc/appinit.go
+++ b/src/cmd/godoc/appinit.go
@@ -23,11 +23,12 @@
// strings // never version of the strings package
// ... //
// app.yaml // app engine control file
-// go.zip // zip file containing the file system to serve
+// godoc.zip // .zip file containing the file system to serve
// godoc // contains godoc sources
// appinit.go // this file instead of godoc/main.go
// appconfig.go // godoc for app engine configuration
// ... //
+// index.split.* // index file(s) containing the search index to serve
//
// To run app the engine emulator locally:
//
@@ -43,6 +44,7 @@ import (
"http"
"log"
"os"
+ "path"
)
func serveError(w http.ResponseWriter, r *http.Request, relpath string, err os.Error) {
@@ -53,7 +55,16 @@ func serveError(w http.ResponseWriter, r *http.Request, relpath string, err os.E
func init() {
log.Println("initializing godoc ...")
+ log.Printf(".zip file = %s", zipFilename)
+ log.Printf(".zip GOROOT = %s", zipGoroot)
+ log.Printf("index files = %s", indexFilenames)
+
+ // initialize flags for app engine
*goroot = path.Join("/", zipGoroot) // fsHttp paths are relative to '/'
+ *indexEnabled = true
+ *indexFiles = indexFilenames
+ *maxResults = 0 // save space for now
+ *indexThrottle = 0.3 // in case *indexFiles is empty (and thus the indexer is run)
// read .zip file and set up file systems
const zipfile = zipFilename
@@ -65,8 +76,8 @@ func init() {
fsHttp = NewHttpZipFS(rc, *goroot)
// initialize http handlers
- initHandlers()
readTemplates()
+ initHandlers()
registerPublicHandlers(http.DefaultServeMux)
// initialize default directory tree with corresponding timestamp.
@@ -75,12 +86,12 @@ func init() {
// initialize directory trees for user-defined file systems (-path flag).
initDirTrees()
- // create search index
- // TODO(gri) Disabled for now as it takes too long. Find a solution for this.
- /*
- *indexEnabled = true
- go indexer()
- */
+ // initialize search index
+ if *indexEnabled {
+ if err := initIndex(); err != nil {
+ log.Fatalf("error initializing index: %s", err)
+ }
+ }
log.Println("godoc initialization complete")
}
diff --git a/src/cmd/godoc/doc.go b/src/cmd/godoc/doc.go
index 57073ffb1f..813527d280 100644
--- a/src/cmd/godoc/doc.go
+++ b/src/cmd/godoc/doc.go
@@ -50,11 +50,17 @@ The flags are:
-index
enable identifier and full text search index
(no search box is shown if -index is not set)
+ -index_files=""
+ glob pattern specifying index files; if not empty,
+ the index is read from these files in sorted order
-index_throttle=0.75
index throttle value; a value of 0 means no time is allocated
to the indexer (the indexer will never finish), a value of 1.0
means that index creation is running at full throttle (other
goroutines may get no time while the index is built)
+ -write_index=false
+ write index to a file; the file name must be specified with
+ -index_files
-maxresults=10000
maximum number of full text search results shown
(no full text index is built if maxresults <= 0)
diff --git a/src/cmd/godoc/godoc.go b/src/cmd/godoc/godoc.go
index 9554d47b77..c172235168 100644
--- a/src/cmd/godoc/godoc.go
+++ b/src/cmd/godoc/godoc.go
@@ -63,7 +63,9 @@ var (
templateDir = flag.String("templates", "", "directory containing alternate template files")
// search index
- indexEnabled = flag.Bool("index", false, "enable search index")
+ indexEnabled = flag.Bool("index", false, "enable search index")
+ indexFiles = flag.String("index_files", "", "glob pattern specifying index files;"+
+ "if not empty, the index is read from these files in sorted order")
maxResults = flag.Int("maxresults", 10000, "maximum number of full text search results shown")
indexThrottle = flag.Float64("index_throttle", 0.75, "index throttle value; 0.0 = no time allocated, 1.0 = full throttle")
@@ -1062,10 +1064,12 @@ func lookup(query string) (result SearchResult) {
// is the result accurate?
if *indexEnabled {
if _, ts := fsModified.get(); timestamp < ts {
- // The index is older than the latest file system change
- // under godoc's observation. Indexing may be in progress
- // or start shortly (see indexer()).
- result.Alert = "Indexing in progress: result may be inaccurate"
+ // The index is older than the latest file system change under godoc's observation.
+ if *indexFiles != "" {
+ result.Alert = "Index not automatically updated: result may be inaccurate"
+ } else {
+ result.Alert = "Indexing in progress: result may be inaccurate"
+ }
}
} else {
result.Alert = "Search index disabled: no results available"
@@ -1141,26 +1145,30 @@ func fsDirnames() <-chan string {
return c
}
+func updateIndex() {
+ if *verbose {
+ log.Printf("updating index...")
+ }
+ start := time.Nanoseconds()
+ index := NewIndex(fsDirnames(), *maxResults > 0, *indexThrottle)
+ stop := time.Nanoseconds()
+ searchIndex.set(index)
+ if *verbose {
+ secs := float64((stop-start)/1e6) / 1e3
+ stats := index.Stats()
+ log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)",
+ secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots)
+ }
+ log.Printf("before GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
+ runtime.GC()
+ log.Printf("after GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
+}
+
func indexer() {
for {
if !indexUpToDate() {
// index possibly out of date - make a new one
- if *verbose {
- log.Printf("updating index...")
- }
- start := time.Nanoseconds()
- index := NewIndex(fsDirnames(), *maxResults > 0, *indexThrottle)
- stop := time.Nanoseconds()
- searchIndex.set(index)
- if *verbose {
- secs := float64((stop-start)/1e6) / 1e3
- stats := index.Stats()
- log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)",
- secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots)
- }
- log.Printf("before GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
- runtime.GC()
- log.Printf("after GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
+ updateIndex()
}
var delay int64 = 60 * 1e9 // by default, try every 60s
if *testDir != "" {
@@ -1170,3 +1178,33 @@ func indexer() {
time.Sleep(delay)
}
}
+
+func initIndex() os.Error {
+ if *indexFiles == "" {
+ // run periodic indexer
+ go indexer()
+ return nil
+ }
+
+ // get search index from files
+ matches, err := filepath.Glob(*indexFiles)
+ if err != nil {
+ return err
+ }
+ sort.Strings(matches) // make sure files are in the right order
+ files := make([]io.Reader, 0, len(matches))
+ for _, filename := range matches {
+ f, err := os.Open(filename)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ files = append(files, f)
+ }
+ x := new(Index)
+ if err := x.Read(io.MultiReader(files...)); err != nil {
+ return err
+ }
+ searchIndex.set(x)
+ return nil
+}
diff --git a/src/cmd/godoc/index.go b/src/cmd/godoc/index.go
index f33ca05730..8bf1a9eb38 100644
--- a/src/cmd/godoc/index.go
+++ b/src/cmd/godoc/index.go
@@ -43,7 +43,9 @@ import (
"go/parser"
"go/token"
"go/scanner"
+ "gob"
"index/suffixarray"
+ "io"
"os"
"path/filepath"
"regexp"
@@ -804,6 +806,37 @@ func NewIndex(dirnames <-chan string, fulltextIndex bool, throttle float64) *Ind
return &Index{x.fset, suffixes, words, alts, x.snippets, x.stats}
}
+type FileIndex struct {
+ Words map[string]*LookupResult
+ Alts map[string]*AltWords
+ Snippets []*Snippet
+}
+
+// Write writes the index x to w.
+func (x *Index) Write(w io.Writer) os.Error {
+ if x.suffixes != nil {
+ panic("no support for writing full text index yet")
+ }
+ fx := FileIndex{
+ x.words,
+ x.alts,
+ x.snippets,
+ }
+ return gob.NewEncoder(w).Encode(fx)
+}
+
+// Read reads the index from r into x; x must not be nil.
+func (x *Index) Read(r io.Reader) os.Error {
+ var fx FileIndex
+ if err := gob.NewDecoder(r).Decode(&fx); err != nil {
+ return err
+ }
+ x.words = fx.Words
+ x.alts = fx.Alts
+ x.snippets = fx.Snippets
+ return nil
+}
+
// Stats() returns index statistics.
func (x *Index) Stats() Statistics {
return x.stats
diff --git a/src/cmd/godoc/main.go b/src/cmd/godoc/main.go
index 48bfa2477e..20eff6dd3a 100644
--- a/src/cmd/godoc/main.go
+++ b/src/cmd/godoc/main.go
@@ -54,6 +54,9 @@ var (
// (with e.g.: zip -r go.zip $GOROOT -i \*.go -i \*.html -i \*.css -i \*.js -i \*.txt -i \*.c -i \*.h -i \*.s -i \*.png -i \*.jpg -i \*.sh -i favicon.ico)
zipfile = flag.String("zip", "", "zip file providing the file system to serve; disabled if empty")
+ // file-based index
+ writeIndex = flag.Bool("write_index", false, "write index to a file; the file name must be specified with -index_files")
+
// periodic sync
syncCmd = flag.String("sync", "", "sync command; disabled if empty")
syncMin = flag.Int("sync_minutes", 0, "sync interval in minutes; disabled if <= 0")
@@ -221,8 +224,8 @@ func main() {
flag.Usage = usage
flag.Parse()
- // Check usage: either server and no args, or command line and args
- if (*httpAddr != "") != (flag.NArg() == 0) {
+ // Check usage: either server and no args, command line and args, or index creation mode
+ if (*httpAddr != "") != (flag.NArg() == 0) && !*writeIndex {
usage()
}
@@ -253,6 +256,39 @@ func main() {
readTemplates()
initHandlers()
+ if (*indexEnabled || *writeIndex) && *indexFiles != "" && *maxResults > 0 {
+ log.Println("warning: no support for full-text index yet (setting -maxresults to 0)")
+ *maxResults = 0
+ }
+
+ if *writeIndex {
+ if *indexFiles == "" {
+ log.Fatal("no index files specified")
+ }
+
+ log.Println("initialize file systems")
+ *verbose = true // want to see what happens
+ initFSTree()
+ initDirTrees()
+
+ *indexThrottle = 1
+ updateIndex()
+
+ log.Println("writing index file", *indexFiles)
+ f, err := os.Create(*indexFiles)
+ if err != nil {
+ log.Fatal(err)
+ }
+ index, _ := searchIndex.get()
+ err = index.(*Index).Write(f)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ log.Println("done")
+ return
+ }
+
if *httpAddr != "" {
// HTTP server mode.
var handler http.Handler = http.DefaultServeMux
@@ -304,9 +340,11 @@ func main() {
}()
}
- // Start indexing goroutine.
+ // Initialize search index.
if *indexEnabled {
- go indexer()
+ if err := initIndex(); err != nil {
+ log.Fatalf("error initializing index: %s", err)
+ }
}
// Start http server.