aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Griesemer <gri@golang.org>2010-03-19 13:01:45 -0700
committerRobert Griesemer <gri@golang.org>2010-03-19 13:01:45 -0700
commit90f7209548ee73dd1e918a4a03a375e6818081ad (patch)
tree98c4d83ccdc132295b633b1aa26f9b442886a792
parent17e03514043c2a23cc6b51e838eda02d0adc8e15 (diff)
downloadgo-90f7209548ee73dd1e918a4a03a375e6818081ad.tar.gz
go-90f7209548ee73dd1e918a4a03a375e6818081ad.zip
godoc: improved comment formatting: recognize URLs
and highlight special words, if provided. Also: - related cleanups in src/pkg/go/doc/comment.go - fix typos in src/cmd/goinstall/doc.go Fixes #672. R=rsc CC=adg, golang-dev https://golang.org/cl/601042
-rw-r--r--src/cmd/godoc/godoc.go4
-rw-r--r--src/cmd/goinstall/doc.go2
-rw-r--r--src/pkg/go/doc/comment.go174
3 files changed, 139 insertions, 41 deletions
diff --git a/src/cmd/godoc/godoc.go b/src/cmd/godoc/godoc.go
index 8a8cd420ab..4a625311f4 100644
--- a/src/cmd/godoc/godoc.go
+++ b/src/cmd/godoc/godoc.go
@@ -665,7 +665,9 @@ func htmlEscFmt(w io.Writer, x interface{}, format string) {
func htmlCommentFmt(w io.Writer, x interface{}, format string) {
var buf bytes.Buffer
writeAny(&buf, x, false)
- doc.ToHTML(w, buf.Bytes()) // does html-escaping
+ // TODO(gri) Provide list of words (e.g. function parameters)
+ // to be emphasized by ToHTML.
+ doc.ToHTML(w, buf.Bytes(), nil) // does html-escaping
}
diff --git a/src/cmd/goinstall/doc.go b/src/cmd/goinstall/doc.go
index c35e9e043b..c5f93f9290 100644
--- a/src/cmd/goinstall/doc.go
+++ b/src/cmd/goinstall/doc.go
@@ -6,7 +6,7 @@
Goinstall is an experiment in automatic package installation.
It installs packages, possibly downloading them from the internet.
-It maintains a list of public Go packages at http://godashboard.appspot.com/packages.
+It maintains a list of public Go packages at http://godashboard.appspot.com/package.
Usage:
goinstall [flags] importpath...
diff --git a/src/pkg/go/doc/comment.go b/src/pkg/go/doc/comment.go
index 6e9ad0b04c..3fc6396637 100644
--- a/src/pkg/go/doc/comment.go
+++ b/src/pkg/go/doc/comment.go
@@ -8,12 +8,25 @@ package doc
import (
"go/ast"
+ "http" // for URLEscape
"io"
+ "regexp"
"strings"
"template" // for htmlEscape
)
-// Comment extraction
+
+func isWhitespace(ch byte) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' }
+
+
+func stripTrailingWhitespace(s string) string {
+ i := len(s)
+ for i > 0 && isWhitespace(s[i-1]) {
+ i--
+ }
+ return s[0:i]
+}
+
// CommentText returns the text of comment,
// with the comment markers - //, /*, and */ - removed.
@@ -26,19 +39,23 @@ func CommentText(comment *ast.CommentGroup) string {
comments[i] = string(c.Text)
}
- lines := make([]string, 0, 20)
+ lines := make([]string, 0, 10) // most comments are less than 10 lines
for _, c := range comments {
// Remove comment markers.
// The parser has given us exactly the comment text.
- switch n := len(c); {
- case n >= 4 && c[0:2] == "/*" && c[n-2:n] == "*/":
- c = c[2 : n-2]
- case n >= 2 && c[0:2] == "//":
- c = c[2:n]
+ switch c[1] {
+ case '/':
+ //-style comment
+ c = c[2:]
// Remove leading space after //, if there is one.
+ // TODO(gri) This appears to be necessary in isolated
+ // cases (bignum.RatFromString) - why?
if len(c) > 0 && c[0] == ' ' {
c = c[1:]
}
+ case '*':
+ /*-style comment */
+ c = c[2 : len(c)-2]
}
// Split on newlines.
@@ -46,20 +63,12 @@ func CommentText(comment *ast.CommentGroup) string {
// Walk lines, stripping trailing white space and adding to list.
for _, l := range cl {
- // Strip trailing white space
- m := len(l)
- for m > 0 && (l[m-1] == ' ' || l[m-1] == '\n' || l[m-1] == '\t' || l[m-1] == '\r') {
- m--
- }
- l = l[0:m]
-
+ l = stripTrailingWhitespace(l)
// Add to list.
n := len(lines)
if n+1 >= cap(lines) {
newlines := make([]string, n, 2*cap(lines))
- for k := range newlines {
- newlines[k] = lines[k]
- }
+ copy(newlines, lines)
lines = newlines
}
lines = lines[0 : n+1]
@@ -88,6 +97,7 @@ func CommentText(comment *ast.CommentGroup) string {
return strings.Join(lines, "\n")
}
+
// Split bytes into lines.
func split(text []byte) [][]byte {
// count lines
@@ -127,28 +137,51 @@ var (
rdquo = []byte("&rdquo;")
)
-// Escape comment text for HTML.
-// Also, turn `` into &ldquo; and '' into &rdquo;.
-func commentEscape(w io.Writer, s []byte) {
+// Escape comment text for HTML. If nice is set,
+// also turn `` into &ldquo; and '' into &rdquo;.
+func commentEscape(w io.Writer, s []byte, nice bool) {
last := 0
- for i := 0; i < len(s)-1; i++ {
- if s[i] == s[i+1] && (s[i] == '`' || s[i] == '\'') {
- template.HTMLEscape(w, s[last:i])
- last = i + 2
- switch s[i] {
- case '`':
- w.Write(ldquo)
- case '\'':
- w.Write(rdquo)
+ if nice {
+ for i := 0; i < len(s)-1; i++ {
+ ch := s[i]
+ if ch == s[i+1] && (ch == '`' || ch == '\'') {
+ template.HTMLEscape(w, s[last:i])
+ last = i + 2
+ switch ch {
+ case '`':
+ w.Write(ldquo)
+ case '\'':
+ w.Write(rdquo)
+ }
+ i++ // loop will add one more
}
- i++ // loop will add one more
}
}
template.HTMLEscape(w, s[last:])
}
+const (
+ // Regexp for Go identifiers
+ identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this
+
+ // Regexp for URLs
+ protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):`
+ hostPart = `[a-zA-Z0-9_@\-]+`
+ filePart = `[a-zA-Z0-9_?%#~&/\-+=]+`
+ urlRx = protocol + `//` + // http://
+ hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
+ filePart + `([:.,]` + filePart + `)*`
+)
+
+var matchRx = regexp.MustCompile(`(` + identRx + `)|(` + urlRx + `)`)
+
var (
+ html_a = []byte(`<a href="`)
+ html_aq = []byte(`">`)
+ html_enda = []byte("</a>")
+ html_i = []byte("<i>")
+ html_endi = []byte("</i>")
html_p = []byte("<p>\n")
html_endp = []byte("</p>\n")
html_pre = []byte("<pre>")
@@ -156,6 +189,66 @@ var (
)
+// Emphasize and escape a line of text for HTML. URLs are converted into links;
+// if the URL also appears in the words map, the link is taken from the map (if
+// the corresponding map value is the empty string, the URL is not converted
+// into a link). Go identifiers that appear in the words map are italicized; if
+// the corresponding map value is not the empty string, it is considered a URL
+// and the word is converted into a link. If nice is set, the remaining text's
+// appearance is improved where is makes sense (e.g., `` is turned into &ldquo;
+// and '' into &rdquo;).
+func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) {
+ for {
+ m := matchRx.Execute(line)
+ if len(m) == 0 {
+ break
+ }
+ // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is identRx)
+
+ // write text before match
+ commentEscape(w, line[0:m[0]], nice)
+
+ // analyze match
+ match := line[m[0]:m[1]]
+ url := ""
+ italics := false
+ if words != nil {
+ url, italics = words[string(match)]
+ }
+ if m[2] < 0 {
+ // didn't match against first parenthesized sub-regexp; must be match against urlRx
+ if !italics {
+ // no alternative URL in words list, use match instead
+ url = string(match)
+ }
+ italics = false // don't italicize URLs
+ }
+
+
+ // write match
+ if len(url) > 0 {
+ w.Write(html_a)
+ w.Write([]byte(http.URLEscape(url)))
+ w.Write(html_aq)
+ }
+ if italics {
+ w.Write(html_i)
+ }
+ commentEscape(w, match, nice)
+ if italics {
+ w.Write(html_endi)
+ }
+ if len(url) > 0 {
+ w.Write(html_enda)
+ }
+
+ // advance
+ line = line[m[1]:]
+ }
+ commentEscape(w, line, nice)
+}
+
+
func indentLen(s []byte) int {
i := 0
for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
@@ -207,11 +300,16 @@ func unindent(block [][]byte) {
// The comment markers have already been removed.
//
// Turn each run of multiple \n into </p><p>
-// Turn each run of indented lines into <pre> without indent.
+// Turn each run of indented lines into a <pre> block without indent.
+//
+// URLs in the comment text are converted into links; if the URL also appears
+// in the words map, the link is taken from the map (if the corresponding map
+// value is the empty string, the URL is not converted into a link).
//
-// TODO(rsc): I'd like to pass in an array of variable names []string
-// and then italicize those strings when they appear as words.
-func ToHTML(w io.Writer, s []byte) {
+// Go identifiers that appear in the words map are italicized; if the corresponding
+// map value is not the empty string, it is considered a URL and the word is converted
+// into a link.
+func ToHTML(w io.Writer, s []byte, words map[string]string) {
inpara := false
close := func() {
@@ -255,19 +353,17 @@ func ToHTML(w io.Writer, s []byte) {
unindent(block)
- // put those lines in a pre block.
- // they don't get the nice text formatting,
- // just html escaping
+ // put those lines in a pre block
w.Write(html_pre)
for _, line := range block {
- template.HTMLEscape(w, line)
+ emphasize(w, line, nil, false) // no nice text formatting
}
w.Write(html_endpre)
continue
}
// open paragraph
open()
- commentEscape(w, lines[i])
+ emphasize(w, lines[i], words, true) // nice text formatting
i++
}
close()