1
2
3
4
5 package syntax
6
7 import (
8 "bytes"
9 "strconv"
10 "unicode"
11 )
12
13
14
15
16
17 type Prog struct {
18 Inst []Inst
19 Start int
20 NumCap int
21 }
22
23
24 type InstOp uint8
25
26 const (
27 InstAlt InstOp = iota
28 InstAltMatch
29 InstCapture
30 InstEmptyWidth
31 InstMatch
32 InstFail
33 InstNop
34 InstRune
35 InstRune1
36 InstRuneAny
37 InstRuneAnyNotNL
38 )
39
40 var instOpNames = []string{
41 "InstAlt",
42 "InstAltMatch",
43 "InstCapture",
44 "InstEmptyWidth",
45 "InstMatch",
46 "InstFail",
47 "InstNop",
48 "InstRune",
49 "InstRune1",
50 "InstRuneAny",
51 "InstRuneAnyNotNL",
52 }
53
54 func (i InstOp) String() string {
55 if uint(i) >= uint(len(instOpNames)) {
56 return ""
57 }
58 return instOpNames[i]
59 }
60
61
62 type EmptyOp uint8
63
64 const (
65 EmptyBeginLine EmptyOp = 1 << iota
66 EmptyEndLine
67 EmptyBeginText
68 EmptyEndText
69 EmptyWordBoundary
70 EmptyNoWordBoundary
71 )
72
73
74
75
76
77
78
79 func EmptyOpContext(r1, r2 rune) EmptyOp {
80 var op EmptyOp = EmptyNoWordBoundary
81 var boundary byte
82 switch {
83 case IsWordChar(r1):
84 boundary = 1
85 case r1 == '\n':
86 op |= EmptyBeginLine
87 case r1 < 0:
88 op |= EmptyBeginText | EmptyBeginLine
89 }
90 switch {
91 case IsWordChar(r2):
92 boundary ^= 1
93 case r2 == '\n':
94 op |= EmptyEndLine
95 case r2 < 0:
96 op |= EmptyEndText | EmptyEndLine
97 }
98 if boundary != 0 {
99 op ^= (EmptyWordBoundary | EmptyNoWordBoundary)
100 }
101 return op
102 }
103
104
105
106
107 func IsWordChar(r rune) bool {
108 return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_'
109 }
110
111
112 type Inst struct {
113 Op InstOp
114 Out uint32
115 Arg uint32
116 Rune []rune
117 }
118
119 func (p *Prog) String() string {
120 var b bytes.Buffer
121 dumpProg(&b, p)
122 return b.String()
123 }
124
125
126
127 func (p *Prog) skipNop(pc uint32) (*Inst, uint32) {
128 i := &p.Inst[pc]
129 for i.Op == InstNop || i.Op == InstCapture {
130 pc = i.Out
131 i = &p.Inst[pc]
132 }
133 return i, pc
134 }
135
136
137 func (i *Inst) op() InstOp {
138 op := i.Op
139 switch op {
140 case InstRune1, InstRuneAny, InstRuneAnyNotNL:
141 op = InstRune
142 }
143 return op
144 }
145
146
147
148
149 func (p *Prog) Prefix() (prefix string, complete bool) {
150 i, _ := p.skipNop(uint32(p.Start))
151
152
153 if i.op() != InstRune || len(i.Rune) != 1 {
154 return "", i.Op == InstMatch
155 }
156
157
158 var buf bytes.Buffer
159 for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 {
160 buf.WriteRune(i.Rune[0])
161 i, _ = p.skipNop(i.Out)
162 }
163 return buf.String(), i.Op == InstMatch
164 }
165
166
167
168 func (p *Prog) StartCond() EmptyOp {
169 var flag EmptyOp
170 pc := uint32(p.Start)
171 i := &p.Inst[pc]
172 Loop:
173 for {
174 switch i.Op {
175 case InstEmptyWidth:
176 flag |= EmptyOp(i.Arg)
177 case InstFail:
178 return ^EmptyOp(0)
179 case InstCapture, InstNop:
180
181 default:
182 break Loop
183 }
184 pc = i.Out
185 i = &p.Inst[pc]
186 }
187 return flag
188 }
189
190 const noMatch = -1
191
192
193
194 func (i *Inst) MatchRune(r rune) bool {
195 return i.MatchRunePos(r) != noMatch
196 }
197
198
199
200
201
202
203 func (i *Inst) MatchRunePos(r rune) int {
204 rune := i.Rune
205
206
207 if len(rune) == 1 {
208 r0 := rune[0]
209 if r == r0 {
210 return 0
211 }
212 if Flags(i.Arg)&FoldCase != 0 {
213 for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
214 if r == r1 {
215 return 0
216 }
217 }
218 }
219 return noMatch
220 }
221
222
223
224 for j := 0; j < len(rune) && j <= 8; j += 2 {
225 if r < rune[j] {
226 return noMatch
227 }
228 if r <= rune[j+1] {
229 return j / 2
230 }
231 }
232
233
234 lo := 0
235 hi := len(rune) / 2
236 for lo < hi {
237 m := lo + (hi-lo)/2
238 if c := rune[2*m]; c <= r {
239 if r <= rune[2*m+1] {
240 return m
241 }
242 lo = m + 1
243 } else {
244 hi = m
245 }
246 }
247 return noMatch
248 }
249
250
251
252
253 func (i *Inst) MatchEmptyWidth(before rune, after rune) bool {
254 switch EmptyOp(i.Arg) {
255 case EmptyBeginLine:
256 return before == '\n' || before == -1
257 case EmptyEndLine:
258 return after == '\n' || after == -1
259 case EmptyBeginText:
260 return before == -1
261 case EmptyEndText:
262 return after == -1
263 case EmptyWordBoundary:
264 return IsWordChar(before) != IsWordChar(after)
265 case EmptyNoWordBoundary:
266 return IsWordChar(before) == IsWordChar(after)
267 }
268 panic("unknown empty width arg")
269 }
270
271 func (i *Inst) String() string {
272 var b bytes.Buffer
273 dumpInst(&b, i)
274 return b.String()
275 }
276
277 func bw(b *bytes.Buffer, args ...string) {
278 for _, s := range args {
279 b.WriteString(s)
280 }
281 }
282
283 func dumpProg(b *bytes.Buffer, p *Prog) {
284 for j := range p.Inst {
285 i := &p.Inst[j]
286 pc := strconv.Itoa(j)
287 if len(pc) < 3 {
288 b.WriteString(" "[len(pc):])
289 }
290 if j == p.Start {
291 pc += "*"
292 }
293 bw(b, pc, "\t")
294 dumpInst(b, i)
295 bw(b, "\n")
296 }
297 }
298
299 func u32(i uint32) string {
300 return strconv.FormatUint(uint64(i), 10)
301 }
302
303 func dumpInst(b *bytes.Buffer, i *Inst) {
304 switch i.Op {
305 case InstAlt:
306 bw(b, "alt -> ", u32(i.Out), ", ", u32(i.Arg))
307 case InstAltMatch:
308 bw(b, "altmatch -> ", u32(i.Out), ", ", u32(i.Arg))
309 case InstCapture:
310 bw(b, "cap ", u32(i.Arg), " -> ", u32(i.Out))
311 case InstEmptyWidth:
312 bw(b, "empty ", u32(i.Arg), " -> ", u32(i.Out))
313 case InstMatch:
314 bw(b, "match")
315 case InstFail:
316 bw(b, "fail")
317 case InstNop:
318 bw(b, "nop -> ", u32(i.Out))
319 case InstRune:
320 if i.Rune == nil {
321
322 bw(b, "rune <nil>")
323 }
324 bw(b, "rune ", strconv.QuoteToASCII(string(i.Rune)))
325 if Flags(i.Arg)&FoldCase != 0 {
326 bw(b, "/i")
327 }
328 bw(b, " -> ", u32(i.Out))
329 case InstRune1:
330 bw(b, "rune1 ", strconv.QuoteToASCII(string(i.Rune)), " -> ", u32(i.Out))
331 case InstRuneAny:
332 bw(b, "any -> ", u32(i.Out))
333 case InstRuneAnyNotNL:
334 bw(b, "anynotnl -> ", u32(i.Out))
335 }
336 }
337
View as plain text