aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/vendor/rsc.io/markdown/link.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/vendor/rsc.io/markdown/link.go')
-rw-r--r--src/cmd/vendor/rsc.io/markdown/link.go861
1 files changed, 861 insertions, 0 deletions
diff --git a/src/cmd/vendor/rsc.io/markdown/link.go b/src/cmd/vendor/rsc.io/markdown/link.go
new file mode 100644
index 0000000000..94fa798f0a
--- /dev/null
+++ b/src/cmd/vendor/rsc.io/markdown/link.go
@@ -0,0 +1,861 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package markdown
+
+import (
+ "bytes"
+ "fmt"
+ "strings"
+ "unicode/utf8"
+
+ "golang.org/x/text/cases"
+)
+
+func parseLinkRefDef(p buildState, s string) (int, bool) {
+ // “A link reference definition consists of a link label,
+ // optionally preceded by up to three spaces of indentation,
+ // followed by a colon (:),
+ // optional spaces or tabs (including up to one line ending),
+ // a link destination,
+ // optional spaces or tabs (including up to one line ending),
+ // and an optional link title,
+ // which if it is present must be separated from the link destination
+ // by spaces or tabs. No further character may occur.”
+ i := skipSpace(s, 0)
+ label, i, ok := parseLinkLabel(p.(*parseState), s, i)
+ if !ok || i >= len(s) || s[i] != ':' {
+ return 0, false
+ }
+ i = skipSpace(s, i+1)
+ suf := s[i:]
+ dest, i, ok := parseLinkDest(s, i)
+ if !ok {
+ if suf != "" && suf[0] == '<' {
+ // Goldmark treats <<> as a link definition.
+ p.(*parseState).corner = true
+ }
+ return 0, false
+ }
+ moved := false
+ for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
+ moved = true
+ i++
+ }
+
+ // Take title if present and doesn't break parse.
+ j := i
+ if j >= len(s) || s[j] == '\n' {
+ moved = true
+ if j < len(s) {
+ j++
+ }
+ }
+
+ var title string
+ var titleChar byte
+ var corner bool
+ if moved {
+ for j < len(s) && (s[j] == ' ' || s[j] == '\t') {
+ j++
+ }
+ if t, c, j, ok := parseLinkTitle(s, j); ok {
+ for j < len(s) && (s[j] == ' ' || s[j] == '\t') {
+ j++
+ }
+ if j >= len(s) || s[j] == '\n' {
+ i = j
+ if t == "" {
+ // Goldmark adds title="" in this case.
+ // We do not, nor does the Dingus.
+ corner = true
+ }
+ title = t
+ titleChar = c
+ }
+ }
+ }
+
+ // Must end line. Already trimmed spaces.
+ if i < len(s) && s[i] != '\n' {
+ return 0, false
+ }
+ if i < len(s) {
+ i++
+ }
+
+ label = normalizeLabel(label)
+ if p.link(label) == nil {
+ p.defineLink(label, &Link{URL: dest, Title: title, TitleChar: titleChar, corner: corner})
+ }
+ return i, true
+}
+
+func parseLinkTitle(s string, i int) (title string, char byte, next int, found bool) {
+ if i < len(s) && (s[i] == '"' || s[i] == '\'' || s[i] == '(') {
+ want := s[i]
+ if want == '(' {
+ want = ')'
+ }
+ j := i + 1
+ for ; j < len(s); j++ {
+ if s[j] == want {
+ title := s[i+1 : j]
+ // TODO: Validate title?
+ return mdUnescaper.Replace(title), want, j + 1, true
+ }
+ if s[j] == '(' && want == ')' {
+ break
+ }
+ if s[j] == '\\' && j+1 < len(s) {
+ j++
+ }
+ }
+ }
+ return "", 0, 0, false
+}
+
+func parseLinkLabel(p *parseState, s string, i int) (string, int, bool) {
+ // “A link label begins with a left bracket ([) and ends with
+ // the first right bracket (]) that is not backslash-escaped.
+ // Between these brackets there must be at least one character
+ // that is not a space, tab, or line ending.
+ // Unescaped square bracket characters are not allowed
+ // inside the opening and closing square brackets of link labels.
+ // A link label can have at most 999 characters inside the square brackets.”
+ if i >= len(s) || s[i] != '[' {
+ return "", 0, false
+ }
+ j := i + 1
+ for ; j < len(s); j++ {
+ if s[j] == ']' {
+ if j-(i+1) > 999 {
+ // Goldmark does not apply 999 limit.
+ p.corner = true
+ break
+ }
+ if label := trimSpaceTabNewline(s[i+1 : j]); label != "" {
+ // Note: CommonMark Dingus does not escape.
+ return label, j + 1, true
+ }
+ break
+ }
+ if s[j] == '[' {
+ break
+ }
+ if s[j] == '\\' && j+1 < len(s) {
+ j++
+ }
+ }
+ return "", 0, false
+}
+
+func normalizeLabel(s string) string {
+ if strings.Contains(s, "[") || strings.Contains(s, "]") {
+ // Labels cannot have [ ] so avoid the work of translating.
+ // This is especially important for pathlogical cases like
+ // [[[[[[[[[[a]]]]]]]]]] which would otherwise generate quadratic
+ // amounts of garbage.
+ return ""
+ }
+
+ // “To normalize a label, strip off the opening and closing brackets,
+ // perform the Unicode case fold, strip leading and trailing spaces, tabs, and line endings,
+ // and collapse consecutive internal spaces, tabs, and line endings to a single space.”
+ s = trimSpaceTabNewline(s)
+ var b strings.Builder
+ space := false
+ hi := false
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ switch c {
+ case ' ', '\t', '\n':
+ space = true
+ continue
+ default:
+ if space {
+ b.WriteByte(' ')
+ space = false
+ }
+ if 'A' <= c && c <= 'Z' {
+ c += 'a' - 'A'
+ }
+ if c >= 0x80 {
+ hi = true
+ }
+ b.WriteByte(c)
+ }
+ }
+ s = b.String()
+ if hi {
+ s = cases.Fold().String(s)
+ }
+ return s
+}
+
+func parseLinkDest(s string, i int) (string, int, bool) {
+ if i >= len(s) {
+ return "", 0, false
+ }
+
+ // “A sequence of zero or more characters between an opening < and a closing >
+ // that contains no line endings or unescaped < or > characters,”
+ if s[i] == '<' {
+ for j := i + 1; ; j++ {
+ if j >= len(s) || s[j] == '\n' || s[j] == '<' {
+ return "", 0, false
+ }
+ if s[j] == '>' {
+ // TODO unescape?
+ return mdUnescape(s[i+1 : j]), j + 1, true
+ }
+ if s[j] == '\\' {
+ j++
+ }
+ }
+ }
+
+ // “or a nonempty sequence of characters that does not start with <,
+ // does not include ASCII control characters or space character,
+ // and includes parentheses only if (a) they are backslash-escaped
+ // or (b) they are part of a balanced pair of unescaped parentheses.
+ depth := 0
+ j := i
+Loop:
+ for ; j < len(s); j++ {
+ switch s[j] {
+ case '(':
+ depth++
+ if depth > 32 {
+ // Avoid quadratic inputs by stopping if too deep.
+ // This is the same depth that cmark-gfm uses.
+ return "", 0, false
+ }
+ case ')':
+ if depth == 0 {
+ break Loop
+ }
+ depth--
+ case '\\':
+ if j+1 < len(s) {
+ if s[j+1] == ' ' || s[j+1] == '\t' {
+ return "", 0, false
+ }
+ j++
+ }
+ case ' ', '\t', '\n':
+ break Loop
+ }
+ }
+
+ dest := s[i:j]
+ // TODO: Validate dest?
+ // TODO: Unescape?
+ // NOTE: CommonMark Dingus does not reject control characters.
+ return mdUnescape(dest), j, true
+}
+
+func parseAutoLinkURI(s string, i int) (Inline, int, bool) {
+ // CommonMark 0.30:
+ //
+ // For purposes of this spec, a scheme is any sequence of 2–32 characters
+ // beginning with an ASCII letter and followed by any combination of
+ // ASCII letters, digits, or the symbols plus (”+”), period (”.”), or
+ // hyphen (”-”).
+ //
+ // An absolute URI, for these purposes, consists of a scheme followed by
+ // a colon (:) followed by zero or more characters other ASCII control
+ // characters, space, <, and >. If the URI includes these characters,
+ // they must be percent-encoded (e.g. %20 for a space).
+
+ j := i
+ if j+1 >= len(s) || s[j] != '<' || !isLetter(s[j+1]) {
+ return nil, 0, false
+ }
+ j++
+ for j < len(s) && isScheme(s[j]) && j-(i+1) <= 32 {
+ j++
+ }
+ if j-(i+1) < 2 || j-(i+1) > 32 || j >= len(s) || s[j] != ':' {
+ return nil, 0, false
+ }
+ j++
+ for j < len(s) && isURL(s[j]) {
+ j++
+ }
+ if j >= len(s) || s[j] != '>' {
+ return nil, 0, false
+ }
+ link := s[i+1 : j]
+ // link = mdUnescaper.Replace(link)
+ return &AutoLink{link, link}, j + 1, true
+}
+
+func parseAutoLinkEmail(s string, i int) (Inline, int, bool) {
+ // CommonMark 0.30:
+ //
+ // An email address, for these purposes, is anything that matches
+ // the non-normative regex from the HTML5 spec:
+ //
+ // /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/
+
+ j := i
+ if j+1 >= len(s) || s[j] != '<' || !isUser(s[j+1]) {
+ return nil, 0, false
+ }
+ j++
+ for j < len(s) && isUser(s[j]) {
+ j++
+ }
+ if j >= len(s) || s[j] != '@' {
+ return nil, 0, false
+ }
+ for {
+ j++
+ n, ok := skipDomainElem(s[j:])
+ if !ok {
+ return nil, 0, false
+ }
+ j += n
+ if j >= len(s) || s[j] != '.' && s[j] != '>' {
+ return nil, 0, false
+ }
+ if s[j] == '>' {
+ break
+ }
+ }
+ email := s[i+1 : j]
+ return &AutoLink{email, "mailto:" + email}, j + 1, true
+}
+
+func isUser(c byte) bool {
+ if isLetterDigit(c) {
+ return true
+ }
+ s := ".!#$%&'*+/=?^_`{|}~-"
+ for i := 0; i < len(s); i++ {
+ if c == s[i] {
+ return true
+ }
+ }
+ return false
+}
+
+func isHexDigit(c byte) bool {
+ return 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' || '0' <= c && c <= '9'
+}
+
+func isDigit(c byte) bool {
+ return '0' <= c && c <= '9'
+}
+
+func skipDomainElem(s string) (int, bool) {
+ // String of LDH, up to 63 in length, with LetterDigit
+ // at both ends (1-letter/digit names are OK).
+ // Aka /[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?/.
+ if len(s) < 1 || !isLetterDigit(s[0]) {
+ return 0, false
+ }
+ i := 1
+ for i < len(s) && isLDH(s[i]) && i <= 63 {
+ i++
+ }
+ if i > 63 || !isLetterDigit(s[i-1]) {
+ return 0, false
+ }
+ return i, true
+}
+
+func isScheme(c byte) bool {
+ return isLetterDigit(c) || c == '+' || c == '.' || c == '-'
+}
+
+func isURL(c byte) bool {
+ return c > ' ' && c != '<' && c != '>'
+}
+
+type AutoLink struct {
+ Text string
+ URL string
+}
+
+func (*AutoLink) Inline() {}
+
+func (x *AutoLink) PrintHTML(buf *bytes.Buffer) {
+ fmt.Fprintf(buf, "<a href=\"%s\">%s</a>", htmlLinkEscaper.Replace(x.URL), htmlEscaper.Replace(x.Text))
+}
+
+func (x *AutoLink) printMarkdown(buf *bytes.Buffer) {
+ fmt.Fprintf(buf, "<%s>", x.Text)
+}
+
+func (x *AutoLink) PrintText(buf *bytes.Buffer) {
+ fmt.Fprintf(buf, "%s", htmlEscaper.Replace(x.Text))
+}
+
+type Link struct {
+ Inner []Inline
+ URL string
+ Title string
+ TitleChar byte // ', " or )
+ corner bool
+}
+
+func (*Link) Inline() {}
+
+func (x *Link) PrintHTML(buf *bytes.Buffer) {
+ fmt.Fprintf(buf, "<a href=\"%s\"", htmlLinkEscaper.Replace(x.URL))
+ if x.Title != "" {
+ fmt.Fprintf(buf, " title=\"%s\"", htmlQuoteEscaper.Replace(x.Title))
+ }
+ buf.WriteString(">")
+ for _, c := range x.Inner {
+ c.PrintHTML(buf)
+ }
+ buf.WriteString("</a>")
+}
+
+func (x *Link) printMarkdown(buf *bytes.Buffer) {
+ buf.WriteByte('[')
+ x.printRemainingMarkdown(buf)
+}
+
+func (x *Link) printRemainingMarkdown(buf *bytes.Buffer) {
+ for _, c := range x.Inner {
+ c.printMarkdown(buf)
+ }
+ buf.WriteString("](")
+ buf.WriteString(x.URL)
+ printLinkTitleMarkdown(buf, x.Title, x.TitleChar)
+ buf.WriteByte(')')
+}
+
+func printLinkTitleMarkdown(buf *bytes.Buffer, title string, titleChar byte) {
+ if title == "" {
+ return
+ }
+ closeChar := titleChar
+ openChar := closeChar
+ if openChar == ')' {
+ openChar = '('
+ }
+ fmt.Fprintf(buf, " %c%s%c", openChar, title /*TODO(jba): escape*/, closeChar)
+}
+
+func (x *Link) PrintText(buf *bytes.Buffer) {
+ for _, c := range x.Inner {
+ c.PrintText(buf)
+ }
+}
+
+type Image struct {
+ Inner []Inline
+ URL string
+ Title string
+ TitleChar byte
+ corner bool
+}
+
+func (*Image) Inline() {}
+
+func (x *Image) PrintHTML(buf *bytes.Buffer) {
+ fmt.Fprintf(buf, "<img src=\"%s\"", htmlLinkEscaper.Replace(x.URL))
+ fmt.Fprintf(buf, " alt=\"")
+ i := buf.Len()
+ for _, c := range x.Inner {
+ c.PrintText(buf)
+ }
+ // GitHub and Goldmark both rewrite \n to space
+ // but the Dingus does not.
+ // The spec says title can be split across lines but not
+ // what happens at that point.
+ out := buf.Bytes()
+ for ; i < len(out); i++ {
+ if out[i] == '\n' {
+ out[i] = ' '
+ }
+ }
+ fmt.Fprintf(buf, "\"")
+ if x.Title != "" {
+ fmt.Fprintf(buf, " title=\"%s\"", htmlQuoteEscaper.Replace(x.Title))
+ }
+ buf.WriteString(" />")
+}
+
+func (x *Image) printMarkdown(buf *bytes.Buffer) {
+ buf.WriteString("![")
+ (*Link)(x).printRemainingMarkdown(buf)
+}
+
+func (x *Image) PrintText(buf *bytes.Buffer) {
+ for _, c := range x.Inner {
+ c.PrintText(buf)
+ }
+}
+
+// GitHub Flavored Markdown autolinks extension
+// https://github.github.com/gfm/#autolinks-extension-
+
+// autoLinkMore rewrites any extended autolinks in the body
+// and returns the result.
+//
+// body is a list of Plain, Emph, Strong, and Del nodes.
+// Two Plains only appear consecutively when one is a
+// potential emphasis marker that ended up being plain after all, like "_" or "**".
+// There are no Link nodes.
+//
+// The GitHub “spec” declares that “autolinks can only come at the
+// beginning of a line, after whitespace, or any of the delimiting
+// characters *, _, ~, and (”. However, the GitHub web site does not
+// enforce this rule: text like "$abc@def.ghi is my email" links the
+// text following the $ as an email address. It appears the actual rule
+// is that autolinks cannot come after ASCII letters, although they can
+// come after numbers or Unicode letters.
+// Since the only point of implementing GitHub Flavored Markdown
+// is to match GitHub's behavior, we do what they do, not what they say,
+// at least for now.
+func (p *parseState) autoLinkText(list []Inline) []Inline {
+ if !p.AutoLinkText {
+ return list
+ }
+
+ var out []Inline // allocated lazily when we first change list
+ for i, x := range list {
+ switch x := x.(type) {
+ case *Plain:
+ if rewrite := p.autoLinkPlain(x.Text); rewrite != nil {
+ if out == nil {
+ out = append(out, list[:i]...)
+ }
+ out = append(out, rewrite...)
+ continue
+ }
+ case *Strong:
+ x.Inner = p.autoLinkText(x.Inner)
+ case *Del:
+ x.Inner = p.autoLinkText(x.Inner)
+ case *Emph:
+ x.Inner = p.autoLinkText(x.Inner)
+ }
+ if out != nil {
+ out = append(out, x)
+ }
+ }
+ if out == nil {
+ return list
+ }
+ return out
+}
+
+func (p *parseState) autoLinkPlain(s string) []Inline {
+ vd := &validDomainChecker{s: s}
+ var out []Inline
+Restart:
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if c == '@' {
+ if before, link, after, ok := p.parseAutoEmail(s, i); ok {
+ if before != "" {
+ out = append(out, &Plain{Text: before})
+ }
+ out = append(out, link)
+ vd.skip(len(s) - len(after))
+ s = after
+ goto Restart
+ }
+ }
+
+ if (c == 'h' || c == 'm' || c == 'x' || c == 'w') && (i == 0 || !isLetter(s[i-1])) {
+ if link, after, ok := p.parseAutoProto(s, i, vd); ok {
+ if i > 0 {
+ out = append(out, &Plain{Text: s[:i]})
+ }
+ out = append(out, link)
+ vd.skip(len(s) - len(after))
+ s = after
+ goto Restart
+ }
+ }
+ }
+ if out == nil {
+ return nil
+ }
+ out = append(out, &Plain{Text: s})
+ return out
+}
+
+func (p *parseState) parseAutoProto(s string, i int, vd *validDomainChecker) (link *Link, after string, found bool) {
+ if s == "" {
+ return
+ }
+ switch s[i] {
+ case 'h':
+ var n int
+ if strings.HasPrefix(s[i:], "https://") {
+ n = len("https://")
+ } else if strings.HasPrefix(s[i:], "http://") {
+ n = len("http://")
+ } else {
+ return
+ }
+ return p.parseAutoHTTP(s[i:i+n], s, i, i+n, i+n+1, vd)
+ case 'w':
+ if !strings.HasPrefix(s[i:], "www.") {
+ return
+ }
+ // GitHub Flavored Markdown says to use http://,
+ // but it's not 1985 anymore. We live in the https:// future
+ // (unless the parser is explicitly configured otherwise).
+ // People who really care in their docs can write http:// themselves.
+ scheme := "https://"
+ if p.AutoLinkAssumeHTTP {
+ scheme = "http://"
+ }
+ return p.parseAutoHTTP(scheme, s, i, i, i+3, vd)
+ case 'm':
+ if !strings.HasPrefix(s[i:], "mailto:") {
+ return
+ }
+ return p.parseAutoMailto(s, i)
+ case 'x':
+ if !strings.HasPrefix(s[i:], "xmpp:") {
+ return
+ }
+ return p.parseAutoXmpp(s, i)
+ }
+ return
+}
+
+// parseAutoWWW parses an extended www autolink.
+// https://github.github.com/gfm/#extended-www-autolink
+func (p *parseState) parseAutoHTTP(scheme, s string, textstart, start, min int, vd *validDomainChecker) (link *Link, after string, found bool) {
+ n, ok := vd.parseValidDomain(start)
+ if !ok {
+ return
+ }
+ i := start + n
+ domEnd := i
+
+ // “After a valid domain, zero or more non-space non-< characters may follow.”
+ paren := 0
+ for i < len(s) {
+ r, n := utf8.DecodeRuneInString(s[i:])
+ if isUnicodeSpace(r) || r == '<' {
+ break
+ }
+ if r == '(' {
+ paren++
+ }
+ if r == ')' {
+ paren--
+ }
+ i += n
+ }
+
+ // https://github.github.com/gfm/#extended-autolink-path-validation
+Trim:
+ for i > min {
+ switch s[i-1] {
+ case '?', '!', '.', ',', ':', '@', '_', '~':
+ // Trim certain trailing punctuation.
+ i--
+ continue Trim
+
+ case ')':
+ // Trim trailing unmatched (by count only) parens.
+ if paren < 0 {
+ for s[i-1] == ')' && paren < 0 {
+ paren++
+ i--
+ }
+ continue Trim
+ }
+
+ case ';':
+ // Trim entity reference.
+ // After doing the work of the scan, we either cut that part off the string
+ // or we stop the trimming entirely, so there's no chance of repeating
+ // the scan on a future iteration and going accidentally quadratic.
+ // Even though the Markdown spec already requires having a complete
+ // list of all the HTML entities, the GitHub definition here just requires
+ // "looks like" an entity, meaning its an ampersand, letters/digits, and semicolon.
+ for j := i - 2; j > start; j-- {
+ if j < i-2 && s[j] == '&' {
+ i = j
+ continue Trim
+ }
+ if !isLetterDigit(s[j]) {
+ break Trim
+ }
+ }
+ }
+ break Trim
+ }
+
+ // According to the literal text of the GitHub Flavored Markdown spec
+ // and the actual behavior on GitHub,
+ // www.example.com$foo turns into <a href="https://www.example.com$foo">,
+ // but that makes the character restrictions in the valid-domain check
+ // almost meaningless. So we insist that when all is said and done,
+ // if the domain is followed by anything, that thing must be a slash,
+ // even though GitHub is not that picky.
+ // People might complain about www.example.com:1234 not working,
+ // but if you want to get fancy with that kind of thing, just write http:// in front.
+ if textstart == start && i > domEnd && s[domEnd] != '/' {
+ i = domEnd
+ }
+
+ if i < min {
+ return
+ }
+
+ link = &Link{
+ Inner: []Inline{&Plain{Text: s[textstart:i]}},
+ URL: scheme + s[start:i],
+ }
+ return link, s[i:], true
+}
+
+type validDomainChecker struct {
+ s string
+ cut int // before this index, no valid domains
+}
+
+func (v *validDomainChecker) skip(i int) {
+ v.s = v.s[i:]
+ v.cut -= i
+}
+
+// parseValidDomain parses a valid domain.
+// https://github.github.com/gfm/#valid-domain
+//
+// If s starts with a valid domain, parseValidDomain returns
+// the length of that domain and true. If s does not start with
+// a valid domain, parseValidDomain returns n, false,
+// where n is the length of a prefix guaranteed not to be acceptable
+// to any future call to parseValidDomain.
+//
+// “A valid domain consists of segments of alphanumeric characters,
+// underscores (_) and hyphens (-) separated by periods (.).
+// There must be at least one period, and no underscores may be
+// present in the last two segments of the domain.”
+//
+// The spec does not spell out whether segments can be empty.
+// Empirically, in GitHub's implementation they can.
+func (v *validDomainChecker) parseValidDomain(start int) (n int, found bool) {
+ if start < v.cut {
+ return 0, false
+ }
+ i := start
+ dots := 0
+ for ; i < len(v.s); i++ {
+ c := v.s[i]
+ if c == '_' {
+ dots = -2
+ continue
+ }
+ if c == '.' {
+ dots++
+ continue
+ }
+ if !isLDH(c) {
+ break
+ }
+ }
+ if dots >= 0 && i > start {
+ return i - start, true
+ }
+ v.cut = i
+ return 0, false
+}
+
+func (p *parseState) parseAutoEmail(s string, i int) (before string, link *Link, after string, ok bool) {
+ if s[i] != '@' {
+ return
+ }
+
+ // “One ore more characters which are alphanumeric, or ., -, _, or +.”
+ j := i
+ for j > 0 && (isLDH(s[j-1]) || s[j-1] == '_' || s[j-1] == '+' || s[j-1] == '.') {
+ j--
+ }
+ if i-j < 1 {
+ return
+ }
+
+ // “One or more characters which are alphanumeric, or - or _, separated by periods (.).
+ // There must be at least one period. The last character must not be one of - or _.”
+ dots := 0
+ k := i + 1
+ for k < len(s) && (isLDH(s[k]) || s[k] == '_' || s[k] == '.') {
+ if s[k] == '.' {
+ if s[k-1] == '.' {
+ // Empirically, .. stops the scan but foo@.bar is fine.
+ break
+ }
+ dots++
+ }
+ k++
+ }
+
+ // “., -, and _ can occur on both sides of the @, but only . may occur at the end
+ // of the email address, in which case it will not be considered part of the address”
+ if s[k-1] == '.' {
+ dots--
+ k--
+ }
+ if s[k-1] == '-' || s[k-1] == '_' {
+ return
+ }
+ if k-(i+1)-dots < 2 || dots < 1 {
+ return
+ }
+
+ link = &Link{
+ Inner: []Inline{&Plain{Text: s[j:k]}},
+ URL: "mailto:" + s[j:k],
+ }
+ return s[:j], link, s[k:], true
+}
+
+func (p *parseState) parseAutoMailto(s string, i int) (link *Link, after string, ok bool) {
+ j := i + len("mailto:")
+ for j < len(s) && (isLDH(s[j]) || s[j] == '_' || s[j] == '+' || s[j] == '.') {
+ j++
+ }
+ if j >= len(s) || s[j] != '@' {
+ return
+ }
+ before, link, after, ok := p.parseAutoEmail(s[i:], j-i)
+ if before != "mailto:" || !ok {
+ return nil, "", false
+ }
+ link.Inner[0] = &Plain{Text: s[i : len(s)-len(after)]}
+ return link, after, true
+}
+
+func (p *parseState) parseAutoXmpp(s string, i int) (link *Link, after string, ok bool) {
+ j := i + len("xmpp:")
+ for j < len(s) && (isLDH(s[j]) || s[j] == '_' || s[j] == '+' || s[j] == '.') {
+ j++
+ }
+ if j >= len(s) || s[j] != '@' {
+ return
+ }
+ before, link, after, ok := p.parseAutoEmail(s[i:], j-i)
+ if before != "xmpp:" || !ok {
+ return nil, "", false
+ }
+ if after != "" && after[0] == '/' {
+ k := 1
+ for k < len(after) && (isLetterDigit(after[k]) || after[k] == '@' || after[k] == '.') {
+ k++
+ }
+ after = after[k:]
+ }
+ url := s[i : len(s)-len(after)]
+ link.Inner[0] = &Plain{Text: url}
+ link.URL = url
+ return link, after, true
+}