diff options
author | Robert Griesemer <gri@golang.org> | 2009-11-08 21:48:51 -0800 |
---|---|---|
committer | Robert Griesemer <gri@golang.org> | 2009-11-08 21:48:51 -0800 |
commit | c532940ecf612b59cb71901fbad7bdfbb8d63111 (patch) | |
tree | 64fee715c1505a3d5cd2596e15eef5a6479bbc9e | |
parent | ed6eb5b57736af09d75e224e36d22b1a50053136 (diff) | |
download | go-c532940ecf612b59cb71901fbad7bdfbb8d63111.tar.gz go-c532940ecf612b59cb71901fbad7bdfbb8d63111.zip |
improved sentence extraction:
- don't forget first periods
- look at capitalization of last char before periods
R=rsc
http://go/go-review/1024027
-rw-r--r-- | src/cmd/godoc/godoc.go | 38 |
1 files changed, 28 insertions, 10 deletions
diff --git a/src/cmd/godoc/godoc.go b/src/cmd/godoc/godoc.go index 2acaa7cfe3..eb97253508 100644 --- a/src/cmd/godoc/godoc.go +++ b/src/cmd/godoc/godoc.go @@ -23,6 +23,7 @@ import ( "sync"; "template"; "time"; + "unicode"; "utf8"; ) @@ -137,21 +138,38 @@ func htmlEscape(s string) string { func firstSentence(s string) string { - // find first period followed by whitespace, or just the first period - i := -1; - for j, ch := range s { + i := -1; // index+1 of first period + j := -1; // index+1 of first period that is followed by white space + prev := 'A'; + for k, ch := range s { + k1 := k+1; if ch == '.' { - i = j+1; // include period - if i < len(s) && s[i] <= ' ' { - break; + if i < 0 { + i = k1; // first period + } + if k1 < len(s) && s[k1] <= ' ' { + if j < 0 { + j = k1; // first period followed by white space + } + if !unicode.IsUpper(prev) { + j = k1; + break; + } } } + prev = ch; } - if i < 0 { - // no period found, use the enire string - i = len(s); + + if j < 0 { + // use the next best period + j = i; + if j < 0 { + // no period at all, use the entire string + j = len(s); + } } - return s[0:i]; + + return s[0:j]; } |