1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
|
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package syntax
import (
"io"
"unicode/utf8"
)
// buf [...read...|...|...unread...|s|...free...]
// ^ ^ ^ ^
// | | | |
// suf r0 r w
type source struct {
src io.Reader
// source buffer
buf [4 << 10]byte
offs int // source offset of buf
r0, r, w int // previous/current read and write buf positions, excluding sentinel
line0, line int // previous/current line
err error // pending io error
// literal buffer
lit []byte // literal prefix
suf int // literal suffix; suf >= 0 means we are scanning a literal
}
func (s *source) init(src io.Reader) {
s.src = src
s.buf[0] = utf8.RuneSelf // terminate with sentinel
s.offs = 0
s.r0, s.r, s.w = 0, 0, 0
s.line0, s.line = 1, 1
s.err = nil
s.lit = s.lit[:0]
s.suf = -1
}
func (s *source) pos() int {
return s.offs + s.r
}
func (s *source) ungetr() {
s.r, s.line = s.r0, s.line0
}
func (s *source) getr() rune {
for {
s.r0, s.line0 = s.r, s.line
// common case: ASCII and enough bytes
if b := s.buf[s.r]; b < utf8.RuneSelf {
s.r++
if b == 0 {
panic("invalid NUL character")
continue
}
if b == '\n' {
s.line++
}
return rune(b)
}
// uncommon case: not ASCII or not enough bytes
r, w := utf8.DecodeRune(s.buf[s.r:s.w]) // optimistically assume valid rune
if r != utf8.RuneError || w > 1 {
s.r += w
// BOM's are only allowed as the first character in a file
const BOM = 0xfeff
if r == BOM && s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to 1)
panic("invalid BOM in the middle of the file")
continue
}
return r
}
if w == 0 && s.err != nil {
if s.err != io.EOF {
panic(s.err)
}
return -1
}
if w == 1 && (s.r+utf8.UTFMax <= s.w || utf8.FullRune(s.buf[s.r:s.w])) {
s.r++
panic("invalid UTF-8 encoding")
continue
}
s.fill()
}
}
func (s *source) fill() {
// Slide unread bytes to beginning but preserve last read char
// (for one ungetr call) plus one extra byte (for a 2nd ungetr
// call, only for ".." character sequence).
if s.r0 > 1 {
// save literal prefix, if any
// (We see at most one ungetr call while reading
// a literal, so make sure s.r0 remains in buf.)
if s.suf >= 0 {
s.lit = append(s.lit, s.buf[s.suf:s.r0]...)
s.suf = 1 // == s.r0 after slide below
}
s.offs += s.r0 - 1
r := s.r - s.r0 + 1 // last read char plus one byte
s.w = r + copy(s.buf[r:], s.buf[s.r:s.w])
s.r = r
s.r0 = 1
}
// read more data: try a limited number of times
for i := 100; i > 0; i-- {
n, err := s.src.Read(s.buf[s.w : len(s.buf)-1]) // -1 to leave space for sentinel
if n < 0 {
panic("negative read")
}
s.w += n
if n > 0 || err != nil {
s.buf[s.w] = utf8.RuneSelf // sentinel
if err != nil {
s.err = err
}
return
}
}
panic("no progress")
}
func (s *source) startLit() {
s.suf = s.r0
s.lit = s.lit[:0] // reuse lit
}
func (s *source) stopLit() string {
lit := s.buf[s.suf:s.r]
if len(s.lit) > 0 {
lit = append(s.lit, lit...)
}
s.suf = -1 // no pending literal
return string(lit)
}
|