diff options
author | Robert Griesemer <gri@golang.org> | 2010-03-19 13:01:45 -0700 |
---|---|---|
committer | Robert Griesemer <gri@golang.org> | 2010-03-19 13:01:45 -0700 |
commit | 90f7209548ee73dd1e918a4a03a375e6818081ad (patch) | |
tree | 98c4d83ccdc132295b633b1aa26f9b442886a792 | |
parent | 17e03514043c2a23cc6b51e838eda02d0adc8e15 (diff) | |
download | go-90f7209548ee73dd1e918a4a03a375e6818081ad.tar.gz go-90f7209548ee73dd1e918a4a03a375e6818081ad.zip |
godoc: improved comment formatting: recognize URLs
and highlight special words, if provided. Also:
- related cleanups in src/pkg/go/doc/comment.go
- fix typos in src/cmd/goinstall/doc.go
Fixes #672.
R=rsc
CC=adg, golang-dev
https://golang.org/cl/601042
-rw-r--r-- | src/cmd/godoc/godoc.go | 4 | ||||
-rw-r--r-- | src/cmd/goinstall/doc.go | 2 | ||||
-rw-r--r-- | src/pkg/go/doc/comment.go | 174 |
3 files changed, 139 insertions, 41 deletions
diff --git a/src/cmd/godoc/godoc.go b/src/cmd/godoc/godoc.go index 8a8cd420ab..4a625311f4 100644 --- a/src/cmd/godoc/godoc.go +++ b/src/cmd/godoc/godoc.go @@ -665,7 +665,9 @@ func htmlEscFmt(w io.Writer, x interface{}, format string) { func htmlCommentFmt(w io.Writer, x interface{}, format string) { var buf bytes.Buffer writeAny(&buf, x, false) - doc.ToHTML(w, buf.Bytes()) // does html-escaping + // TODO(gri) Provide list of words (e.g. function parameters) + // to be emphasized by ToHTML. + doc.ToHTML(w, buf.Bytes(), nil) // does html-escaping } diff --git a/src/cmd/goinstall/doc.go b/src/cmd/goinstall/doc.go index c35e9e043b..c5f93f9290 100644 --- a/src/cmd/goinstall/doc.go +++ b/src/cmd/goinstall/doc.go @@ -6,7 +6,7 @@ Goinstall is an experiment in automatic package installation. It installs packages, possibly downloading them from the internet. -It maintains a list of public Go packages at http://godashboard.appspot.com/packages. +It maintains a list of public Go packages at http://godashboard.appspot.com/package. Usage: goinstall [flags] importpath... diff --git a/src/pkg/go/doc/comment.go b/src/pkg/go/doc/comment.go index 6e9ad0b04c..3fc6396637 100644 --- a/src/pkg/go/doc/comment.go +++ b/src/pkg/go/doc/comment.go @@ -8,12 +8,25 @@ package doc import ( "go/ast" + "http" // for URLEscape "io" + "regexp" "strings" "template" // for htmlEscape ) -// Comment extraction + +func isWhitespace(ch byte) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' } + + +func stripTrailingWhitespace(s string) string { + i := len(s) + for i > 0 && isWhitespace(s[i-1]) { + i-- + } + return s[0:i] +} + // CommentText returns the text of comment, // with the comment markers - //, /*, and */ - removed. @@ -26,19 +39,23 @@ func CommentText(comment *ast.CommentGroup) string { comments[i] = string(c.Text) } - lines := make([]string, 0, 20) + lines := make([]string, 0, 10) // most comments are less than 10 lines for _, c := range comments { // Remove comment markers. // The parser has given us exactly the comment text. - switch n := len(c); { - case n >= 4 && c[0:2] == "/*" && c[n-2:n] == "*/": - c = c[2 : n-2] - case n >= 2 && c[0:2] == "//": - c = c[2:n] + switch c[1] { + case '/': + //-style comment + c = c[2:] // Remove leading space after //, if there is one. + // TODO(gri) This appears to be necessary in isolated + // cases (bignum.RatFromString) - why? if len(c) > 0 && c[0] == ' ' { c = c[1:] } + case '*': + /*-style comment */ + c = c[2 : len(c)-2] } // Split on newlines. @@ -46,20 +63,12 @@ func CommentText(comment *ast.CommentGroup) string { // Walk lines, stripping trailing white space and adding to list. for _, l := range cl { - // Strip trailing white space - m := len(l) - for m > 0 && (l[m-1] == ' ' || l[m-1] == '\n' || l[m-1] == '\t' || l[m-1] == '\r') { - m-- - } - l = l[0:m] - + l = stripTrailingWhitespace(l) // Add to list. n := len(lines) if n+1 >= cap(lines) { newlines := make([]string, n, 2*cap(lines)) - for k := range newlines { - newlines[k] = lines[k] - } + copy(newlines, lines) lines = newlines } lines = lines[0 : n+1] @@ -88,6 +97,7 @@ func CommentText(comment *ast.CommentGroup) string { return strings.Join(lines, "\n") } + // Split bytes into lines. func split(text []byte) [][]byte { // count lines @@ -127,28 +137,51 @@ var ( rdquo = []byte("”") ) -// Escape comment text for HTML. -// Also, turn `` into “ and '' into ”. -func commentEscape(w io.Writer, s []byte) { +// Escape comment text for HTML. If nice is set, +// also turn `` into “ and '' into ”. +func commentEscape(w io.Writer, s []byte, nice bool) { last := 0 - for i := 0; i < len(s)-1; i++ { - if s[i] == s[i+1] && (s[i] == '`' || s[i] == '\'') { - template.HTMLEscape(w, s[last:i]) - last = i + 2 - switch s[i] { - case '`': - w.Write(ldquo) - case '\'': - w.Write(rdquo) + if nice { + for i := 0; i < len(s)-1; i++ { + ch := s[i] + if ch == s[i+1] && (ch == '`' || ch == '\'') { + template.HTMLEscape(w, s[last:i]) + last = i + 2 + switch ch { + case '`': + w.Write(ldquo) + case '\'': + w.Write(rdquo) + } + i++ // loop will add one more } - i++ // loop will add one more } } template.HTMLEscape(w, s[last:]) } +const ( + // Regexp for Go identifiers + identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this + + // Regexp for URLs + protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):` + hostPart = `[a-zA-Z0-9_@\-]+` + filePart = `[a-zA-Z0-9_?%#~&/\-+=]+` + urlRx = protocol + `//` + // http:// + hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/ + filePart + `([:.,]` + filePart + `)*` +) + +var matchRx = regexp.MustCompile(`(` + identRx + `)|(` + urlRx + `)`) + var ( + html_a = []byte(`<a href="`) + html_aq = []byte(`">`) + html_enda = []byte("</a>") + html_i = []byte("<i>") + html_endi = []byte("</i>") html_p = []byte("<p>\n") html_endp = []byte("</p>\n") html_pre = []byte("<pre>") @@ -156,6 +189,66 @@ var ( ) +// Emphasize and escape a line of text for HTML. URLs are converted into links; +// if the URL also appears in the words map, the link is taken from the map (if +// the corresponding map value is the empty string, the URL is not converted +// into a link). Go identifiers that appear in the words map are italicized; if +// the corresponding map value is not the empty string, it is considered a URL +// and the word is converted into a link. If nice is set, the remaining text's +// appearance is improved where is makes sense (e.g., `` is turned into “ +// and '' into ”). +func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) { + for { + m := matchRx.Execute(line) + if len(m) == 0 { + break + } + // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is identRx) + + // write text before match + commentEscape(w, line[0:m[0]], nice) + + // analyze match + match := line[m[0]:m[1]] + url := "" + italics := false + if words != nil { + url, italics = words[string(match)] + } + if m[2] < 0 { + // didn't match against first parenthesized sub-regexp; must be match against urlRx + if !italics { + // no alternative URL in words list, use match instead + url = string(match) + } + italics = false // don't italicize URLs + } + + + // write match + if len(url) > 0 { + w.Write(html_a) + w.Write([]byte(http.URLEscape(url))) + w.Write(html_aq) + } + if italics { + w.Write(html_i) + } + commentEscape(w, match, nice) + if italics { + w.Write(html_endi) + } + if len(url) > 0 { + w.Write(html_enda) + } + + // advance + line = line[m[1]:] + } + commentEscape(w, line, nice) +} + + func indentLen(s []byte) int { i := 0 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { @@ -207,11 +300,16 @@ func unindent(block [][]byte) { // The comment markers have already been removed. // // Turn each run of multiple \n into </p><p> -// Turn each run of indented lines into <pre> without indent. +// Turn each run of indented lines into a <pre> block without indent. +// +// URLs in the comment text are converted into links; if the URL also appears +// in the words map, the link is taken from the map (if the corresponding map +// value is the empty string, the URL is not converted into a link). // -// TODO(rsc): I'd like to pass in an array of variable names []string -// and then italicize those strings when they appear as words. -func ToHTML(w io.Writer, s []byte) { +// Go identifiers that appear in the words map are italicized; if the corresponding +// map value is not the empty string, it is considered a URL and the word is converted +// into a link. +func ToHTML(w io.Writer, s []byte, words map[string]string) { inpara := false close := func() { @@ -255,19 +353,17 @@ func ToHTML(w io.Writer, s []byte) { unindent(block) - // put those lines in a pre block. - // they don't get the nice text formatting, - // just html escaping + // put those lines in a pre block w.Write(html_pre) for _, line := range block { - template.HTMLEscape(w, line) + emphasize(w, line, nil, false) // no nice text formatting } w.Write(html_endpre) continue } // open paragraph open() - commentEscape(w, lines[i]) + emphasize(w, lines[i], words, true) // nice text formatting i++ } close() |