Source file
src/regexp/backtrack.go
Documentation: regexp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package regexp
16
17 import "regexp/syntax"
18
19
20
21 type job struct {
22 pc uint32
23 arg bool
24 pos int
25 }
26
27 const (
28 visitedBits = 32
29 maxBacktrackProg = 500
30 maxBacktrackVector = 256 * 1024
31 )
32
33
34 type bitState struct {
35 prog *syntax.Prog
36
37 end int
38 cap []int
39 jobs []job
40 visited []uint32
41 }
42
43 var notBacktrack *bitState = nil
44
45
46
47 func maxBitStateLen(prog *syntax.Prog) int {
48 if !shouldBacktrack(prog) {
49 return 0
50 }
51 return maxBacktrackVector / len(prog.Inst)
52 }
53
54
55
56
57 func newBitState(prog *syntax.Prog) *bitState {
58 if !shouldBacktrack(prog) {
59 return notBacktrack
60 }
61 return &bitState{
62 prog: prog,
63 }
64 }
65
66
67
68 func shouldBacktrack(prog *syntax.Prog) bool {
69 return len(prog.Inst) <= maxBacktrackProg
70 }
71
72
73
74
75 func (b *bitState) reset(end int, ncap int) {
76 b.end = end
77
78 if cap(b.jobs) == 0 {
79 b.jobs = make([]job, 0, 256)
80 } else {
81 b.jobs = b.jobs[:0]
82 }
83
84 visitedSize := (len(b.prog.Inst)*(end+1) + visitedBits - 1) / visitedBits
85 if cap(b.visited) < visitedSize {
86 b.visited = make([]uint32, visitedSize, maxBacktrackVector/visitedBits)
87 } else {
88 b.visited = b.visited[:visitedSize]
89 for i := range b.visited {
90 b.visited[i] = 0
91 }
92 }
93
94 if cap(b.cap) < ncap {
95 b.cap = make([]int, ncap)
96 } else {
97 b.cap = b.cap[:ncap]
98 }
99 for i := range b.cap {
100 b.cap[i] = -1
101 }
102 }
103
104
105
106 func (b *bitState) shouldVisit(pc uint32, pos int) bool {
107 n := uint(int(pc)*(b.end+1) + pos)
108 if b.visited[n/visitedBits]&(1<<(n&(visitedBits-1))) != 0 {
109 return false
110 }
111 b.visited[n/visitedBits] |= 1 << (n & (visitedBits - 1))
112 return true
113 }
114
115
116
117 func (b *bitState) push(pc uint32, pos int, arg bool) {
118
119
120 if b.prog.Inst[pc].Op != syntax.InstFail && (arg || b.shouldVisit(pc, pos)) {
121 b.jobs = append(b.jobs, job{pc: pc, arg: arg, pos: pos})
122 }
123 }
124
125
126 func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
127 longest := m.re.longest
128 m.matched = false
129
130 b.push(pc, pos, false)
131 for len(b.jobs) > 0 {
132 l := len(b.jobs) - 1
133
134 pc := b.jobs[l].pc
135 pos := b.jobs[l].pos
136 arg := b.jobs[l].arg
137 b.jobs = b.jobs[:l]
138
139
140
141
142
143
144
145
146 goto Skip
147 CheckAndLoop:
148 if !b.shouldVisit(pc, pos) {
149 continue
150 }
151 Skip:
152
153 inst := b.prog.Inst[pc]
154
155 switch inst.Op {
156 default:
157 panic("bad inst")
158 case syntax.InstFail:
159 panic("unexpected InstFail")
160 case syntax.InstAlt:
161
162
163
164
165
166
167
168
169 if arg {
170
171 arg = false
172 pc = inst.Arg
173 goto CheckAndLoop
174 } else {
175 b.push(pc, pos, true)
176 pc = inst.Out
177 goto CheckAndLoop
178 }
179
180 case syntax.InstAltMatch:
181
182 switch b.prog.Inst[inst.Out].Op {
183 case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
184
185 b.push(inst.Arg, pos, false)
186 pc = inst.Arg
187 pos = b.end
188 goto CheckAndLoop
189 }
190
191 b.push(inst.Out, b.end, false)
192 pc = inst.Out
193 goto CheckAndLoop
194
195 case syntax.InstRune:
196 r, width := i.step(pos)
197 if !inst.MatchRune(r) {
198 continue
199 }
200 pos += width
201 pc = inst.Out
202 goto CheckAndLoop
203
204 case syntax.InstRune1:
205 r, width := i.step(pos)
206 if r != inst.Rune[0] {
207 continue
208 }
209 pos += width
210 pc = inst.Out
211 goto CheckAndLoop
212
213 case syntax.InstRuneAnyNotNL:
214 r, width := i.step(pos)
215 if r == '\n' || r == endOfText {
216 continue
217 }
218 pos += width
219 pc = inst.Out
220 goto CheckAndLoop
221
222 case syntax.InstRuneAny:
223 r, width := i.step(pos)
224 if r == endOfText {
225 continue
226 }
227 pos += width
228 pc = inst.Out
229 goto CheckAndLoop
230
231 case syntax.InstCapture:
232 if arg {
233
234 b.cap[inst.Arg] = pos
235 continue
236 } else {
237 if 0 <= inst.Arg && inst.Arg < uint32(len(b.cap)) {
238
239 b.push(pc, b.cap[inst.Arg], true)
240 b.cap[inst.Arg] = pos
241 }
242 pc = inst.Out
243 goto CheckAndLoop
244 }
245
246 case syntax.InstEmptyWidth:
247 if syntax.EmptyOp(inst.Arg)&^i.context(pos) != 0 {
248 continue
249 }
250 pc = inst.Out
251 goto CheckAndLoop
252
253 case syntax.InstNop:
254 pc = inst.Out
255 goto CheckAndLoop
256
257 case syntax.InstMatch:
258
259
260 if len(b.cap) == 0 {
261 m.matched = true
262 return m.matched
263 }
264
265
266
267
268 if len(b.cap) > 1 {
269 b.cap[1] = pos
270 }
271 if !m.matched || (longest && pos > 0 && pos > m.matchcap[1]) {
272 copy(m.matchcap, b.cap)
273 }
274 m.matched = true
275
276
277 if !longest {
278 return m.matched
279 }
280
281
282 if pos == b.end {
283 return m.matched
284 }
285
286
287 continue
288 }
289 }
290
291 return m.matched
292 }
293
294
295 func (m *machine) backtrack(i input, pos int, end int, ncap int) bool {
296 if !i.canCheckPrefix() {
297 panic("backtrack called for a RuneReader")
298 }
299
300 startCond := m.re.cond
301 if startCond == ^syntax.EmptyOp(0) {
302 return false
303 }
304 if startCond&syntax.EmptyBeginText != 0 && pos != 0 {
305
306 return false
307 }
308
309 b := m.b
310 b.reset(end, ncap)
311
312 m.matchcap = m.matchcap[:ncap]
313 for i := range m.matchcap {
314 m.matchcap[i] = -1
315 }
316
317
318 if startCond&syntax.EmptyBeginText != 0 {
319 if len(b.cap) > 0 {
320 b.cap[0] = pos
321 }
322 return m.tryBacktrack(b, i, uint32(m.p.Start), pos)
323 }
324
325
326
327
328
329
330
331 width := -1
332 for ; pos <= end && width != 0; pos += width {
333 if len(m.re.prefix) > 0 {
334
335 advance := i.index(m.re, pos)
336 if advance < 0 {
337 return false
338 }
339 pos += advance
340 }
341
342 if len(b.cap) > 0 {
343 b.cap[0] = pos
344 }
345 if m.tryBacktrack(b, i, uint32(m.p.Start), pos) {
346
347 return true
348 }
349 _, width = i.step(pos)
350 }
351 return false
352 }
353
View as plain text