1
2
3
4
5 package parse
6
7 import (
8 "fmt"
9 "strings"
10 "unicode"
11 "unicode/utf8"
12 )
13
14
15 type item struct {
16 typ itemType
17 pos Pos
18 val string
19 line int
20 }
21
22 func (i item) String() string {
23 switch {
24 case i.typ == itemEOF:
25 return "EOF"
26 case i.typ == itemError:
27 return i.val
28 case i.typ > itemKeyword:
29 return fmt.Sprintf("<%s>", i.val)
30 case len(i.val) > 10:
31 return fmt.Sprintf("%.10q...", i.val)
32 }
33 return fmt.Sprintf("%q", i.val)
34 }
35
36
37 type itemType int
38
39 const (
40 itemError itemType = iota
41 itemBool
42 itemChar
43 itemCharConstant
44 itemComplex
45 itemColonEquals
46 itemEOF
47 itemField
48 itemIdentifier
49 itemLeftDelim
50 itemLeftParen
51 itemNumber
52 itemPipe
53 itemRawString
54 itemRightDelim
55 itemRightParen
56 itemSpace
57 itemString
58 itemText
59 itemVariable
60
61 itemKeyword
62 itemBlock
63 itemDot
64 itemDefine
65 itemElse
66 itemEnd
67 itemIf
68 itemNil
69 itemRange
70 itemTemplate
71 itemWith
72 )
73
74 var key = map[string]itemType{
75 ".": itemDot,
76 "block": itemBlock,
77 "define": itemDefine,
78 "else": itemElse,
79 "end": itemEnd,
80 "if": itemIf,
81 "range": itemRange,
82 "nil": itemNil,
83 "template": itemTemplate,
84 "with": itemWith,
85 }
86
87 const eof = -1
88
89
90
91
92
93
94
95
96
97 const (
98 spaceChars = " \t\r\n"
99 leftTrimMarker = "- "
100 rightTrimMarker = " -"
101 trimMarkerLen = Pos(len(leftTrimMarker))
102 )
103
104
105 type stateFn func(*lexer) stateFn
106
107
108 type lexer struct {
109 name string
110 input string
111 leftDelim string
112 rightDelim string
113 pos Pos
114 start Pos
115 width Pos
116 items chan item
117 parenDepth int
118 line int
119 }
120
121
122 func (l *lexer) next() rune {
123 if int(l.pos) >= len(l.input) {
124 l.width = 0
125 return eof
126 }
127 r, w := utf8.DecodeRuneInString(l.input[l.pos:])
128 l.width = Pos(w)
129 l.pos += l.width
130 if r == '\n' {
131 l.line++
132 }
133 return r
134 }
135
136
137 func (l *lexer) peek() rune {
138 r := l.next()
139 l.backup()
140 return r
141 }
142
143
144 func (l *lexer) backup() {
145 l.pos -= l.width
146
147 if l.width == 1 && l.input[l.pos] == '\n' {
148 l.line--
149 }
150 }
151
152
153 func (l *lexer) emit(t itemType) {
154 l.items <- item{t, l.start, l.input[l.start:l.pos], l.line}
155
156 switch t {
157 case itemText, itemRawString, itemLeftDelim, itemRightDelim:
158 l.line += strings.Count(l.input[l.start:l.pos], "\n")
159 }
160 l.start = l.pos
161 }
162
163
164 func (l *lexer) ignore() {
165 l.line += strings.Count(l.input[l.start:l.pos], "\n")
166 l.start = l.pos
167 }
168
169
170 func (l *lexer) accept(valid string) bool {
171 if strings.ContainsRune(valid, l.next()) {
172 return true
173 }
174 l.backup()
175 return false
176 }
177
178
179 func (l *lexer) acceptRun(valid string) {
180 for strings.ContainsRune(valid, l.next()) {
181 }
182 l.backup()
183 }
184
185
186
187 func (l *lexer) errorf(format string, args ...interface{}) stateFn {
188 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.line}
189 return nil
190 }
191
192
193
194 func (l *lexer) nextItem() item {
195 return <-l.items
196 }
197
198
199
200 func (l *lexer) drain() {
201 for range l.items {
202 }
203 }
204
205
206 func lex(name, input, left, right string) *lexer {
207 if left == "" {
208 left = leftDelim
209 }
210 if right == "" {
211 right = rightDelim
212 }
213 l := &lexer{
214 name: name,
215 input: input,
216 leftDelim: left,
217 rightDelim: right,
218 items: make(chan item),
219 line: 1,
220 }
221 go l.run()
222 return l
223 }
224
225
226 func (l *lexer) run() {
227 for state := lexText; state != nil; {
228 state = state(l)
229 }
230 close(l.items)
231 }
232
233
234
235 const (
236 leftDelim = "{{"
237 rightDelim = "}}"
238 leftComment = "/*"
239 rightComment = "*/"
240 )
241
242
243 func lexText(l *lexer) stateFn {
244 l.width = 0
245 if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 {
246 ldn := Pos(len(l.leftDelim))
247 l.pos += Pos(x)
248 trimLength := Pos(0)
249 if strings.HasPrefix(l.input[l.pos+ldn:], leftTrimMarker) {
250 trimLength = rightTrimLength(l.input[l.start:l.pos])
251 }
252 l.pos -= trimLength
253 if l.pos > l.start {
254 l.emit(itemText)
255 }
256 l.pos += trimLength
257 l.ignore()
258 return lexLeftDelim
259 } else {
260 l.pos = Pos(len(l.input))
261 }
262
263 if l.pos > l.start {
264 l.emit(itemText)
265 }
266 l.emit(itemEOF)
267 return nil
268 }
269
270
271 func rightTrimLength(s string) Pos {
272 return Pos(len(s) - len(strings.TrimRight(s, spaceChars)))
273 }
274
275
276 func (l *lexer) atRightDelim() (delim, trimSpaces bool) {
277 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) {
278 return true, false
279 }
280
281 if strings.HasPrefix(l.input[l.pos:], rightTrimMarker) &&
282 strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) {
283 return true, true
284 }
285 return false, false
286 }
287
288
289 func leftTrimLength(s string) Pos {
290 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars)))
291 }
292
293
294 func lexLeftDelim(l *lexer) stateFn {
295 l.pos += Pos(len(l.leftDelim))
296 trimSpace := strings.HasPrefix(l.input[l.pos:], leftTrimMarker)
297 afterMarker := Pos(0)
298 if trimSpace {
299 afterMarker = trimMarkerLen
300 }
301 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) {
302 l.pos += afterMarker
303 l.ignore()
304 return lexComment
305 }
306 l.emit(itemLeftDelim)
307 l.pos += afterMarker
308 l.ignore()
309 l.parenDepth = 0
310 return lexInsideAction
311 }
312
313
314 func lexComment(l *lexer) stateFn {
315 l.pos += Pos(len(leftComment))
316 i := strings.Index(l.input[l.pos:], rightComment)
317 if i < 0 {
318 return l.errorf("unclosed comment")
319 }
320 l.pos += Pos(i + len(rightComment))
321 delim, trimSpace := l.atRightDelim()
322 if !delim {
323 return l.errorf("comment ends before closing delimiter")
324 }
325 if trimSpace {
326 l.pos += trimMarkerLen
327 }
328 l.pos += Pos(len(l.rightDelim))
329 if trimSpace {
330 l.pos += leftTrimLength(l.input[l.pos:])
331 }
332 l.ignore()
333 return lexText
334 }
335
336
337 func lexRightDelim(l *lexer) stateFn {
338 trimSpace := strings.HasPrefix(l.input[l.pos:], rightTrimMarker)
339 if trimSpace {
340 l.pos += trimMarkerLen
341 l.ignore()
342 }
343 l.pos += Pos(len(l.rightDelim))
344 l.emit(itemRightDelim)
345 if trimSpace {
346 l.pos += leftTrimLength(l.input[l.pos:])
347 l.ignore()
348 }
349 return lexText
350 }
351
352
353 func lexInsideAction(l *lexer) stateFn {
354
355
356
357 delim, _ := l.atRightDelim()
358 if delim {
359 if l.parenDepth == 0 {
360 return lexRightDelim
361 }
362 return l.errorf("unclosed left paren")
363 }
364 switch r := l.next(); {
365 case r == eof || isEndOfLine(r):
366 return l.errorf("unclosed action")
367 case isSpace(r):
368 return lexSpace
369 case r == ':':
370 if l.next() != '=' {
371 return l.errorf("expected :=")
372 }
373 l.emit(itemColonEquals)
374 case r == '|':
375 l.emit(itemPipe)
376 case r == '"':
377 return lexQuote
378 case r == '`':
379 return lexRawQuote
380 case r == '$':
381 return lexVariable
382 case r == '\'':
383 return lexChar
384 case r == '.':
385
386 if l.pos < Pos(len(l.input)) {
387 r := l.input[l.pos]
388 if r < '0' || '9' < r {
389 return lexField
390 }
391 }
392 fallthrough
393 case r == '+' || r == '-' || ('0' <= r && r <= '9'):
394 l.backup()
395 return lexNumber
396 case isAlphaNumeric(r):
397 l.backup()
398 return lexIdentifier
399 case r == '(':
400 l.emit(itemLeftParen)
401 l.parenDepth++
402 case r == ')':
403 l.emit(itemRightParen)
404 l.parenDepth--
405 if l.parenDepth < 0 {
406 return l.errorf("unexpected right paren %#U", r)
407 }
408 case r <= unicode.MaxASCII && unicode.IsPrint(r):
409 l.emit(itemChar)
410 return lexInsideAction
411 default:
412 return l.errorf("unrecognized character in action: %#U", r)
413 }
414 return lexInsideAction
415 }
416
417
418
419 func lexSpace(l *lexer) stateFn {
420 for isSpace(l.peek()) {
421 l.next()
422 }
423 l.emit(itemSpace)
424 return lexInsideAction
425 }
426
427
428 func lexIdentifier(l *lexer) stateFn {
429 Loop:
430 for {
431 switch r := l.next(); {
432 case isAlphaNumeric(r):
433
434 default:
435 l.backup()
436 word := l.input[l.start:l.pos]
437 if !l.atTerminator() {
438 return l.errorf("bad character %#U", r)
439 }
440 switch {
441 case key[word] > itemKeyword:
442 l.emit(key[word])
443 case word[0] == '.':
444 l.emit(itemField)
445 case word == "true", word == "false":
446 l.emit(itemBool)
447 default:
448 l.emit(itemIdentifier)
449 }
450 break Loop
451 }
452 }
453 return lexInsideAction
454 }
455
456
457
458 func lexField(l *lexer) stateFn {
459 return lexFieldOrVariable(l, itemField)
460 }
461
462
463
464 func lexVariable(l *lexer) stateFn {
465 if l.atTerminator() {
466 l.emit(itemVariable)
467 return lexInsideAction
468 }
469 return lexFieldOrVariable(l, itemVariable)
470 }
471
472
473
474 func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
475 if l.atTerminator() {
476 if typ == itemVariable {
477 l.emit(itemVariable)
478 } else {
479 l.emit(itemDot)
480 }
481 return lexInsideAction
482 }
483 var r rune
484 for {
485 r = l.next()
486 if !isAlphaNumeric(r) {
487 l.backup()
488 break
489 }
490 }
491 if !l.atTerminator() {
492 return l.errorf("bad character %#U", r)
493 }
494 l.emit(typ)
495 return lexInsideAction
496 }
497
498
499
500
501
502 func (l *lexer) atTerminator() bool {
503 r := l.peek()
504 if isSpace(r) || isEndOfLine(r) {
505 return true
506 }
507 switch r {
508 case eof, '.', ',', '|', ':', ')', '(':
509 return true
510 }
511
512
513
514 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r {
515 return true
516 }
517 return false
518 }
519
520
521
522 func lexChar(l *lexer) stateFn {
523 Loop:
524 for {
525 switch l.next() {
526 case '\\':
527 if r := l.next(); r != eof && r != '\n' {
528 break
529 }
530 fallthrough
531 case eof, '\n':
532 return l.errorf("unterminated character constant")
533 case '\'':
534 break Loop
535 }
536 }
537 l.emit(itemCharConstant)
538 return lexInsideAction
539 }
540
541
542
543
544
545 func lexNumber(l *lexer) stateFn {
546 if !l.scanNumber() {
547 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
548 }
549 if sign := l.peek(); sign == '+' || sign == '-' {
550
551 if !l.scanNumber() || l.input[l.pos-1] != 'i' {
552 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
553 }
554 l.emit(itemComplex)
555 } else {
556 l.emit(itemNumber)
557 }
558 return lexInsideAction
559 }
560
561 func (l *lexer) scanNumber() bool {
562
563 l.accept("+-")
564
565 digits := "0123456789"
566 if l.accept("0") && l.accept("xX") {
567 digits = "0123456789abcdefABCDEF"
568 }
569 l.acceptRun(digits)
570 if l.accept(".") {
571 l.acceptRun(digits)
572 }
573 if l.accept("eE") {
574 l.accept("+-")
575 l.acceptRun("0123456789")
576 }
577
578 l.accept("i")
579
580 if isAlphaNumeric(l.peek()) {
581 l.next()
582 return false
583 }
584 return true
585 }
586
587
588 func lexQuote(l *lexer) stateFn {
589 Loop:
590 for {
591 switch l.next() {
592 case '\\':
593 if r := l.next(); r != eof && r != '\n' {
594 break
595 }
596 fallthrough
597 case eof, '\n':
598 return l.errorf("unterminated quoted string")
599 case '"':
600 break Loop
601 }
602 }
603 l.emit(itemString)
604 return lexInsideAction
605 }
606
607
608 func lexRawQuote(l *lexer) stateFn {
609 startLine := l.line
610 Loop:
611 for {
612 switch l.next() {
613 case eof:
614
615
616 l.line = startLine
617 return l.errorf("unterminated raw quoted string")
618 case '`':
619 break Loop
620 }
621 }
622 l.emit(itemRawString)
623 return lexInsideAction
624 }
625
626
627 func isSpace(r rune) bool {
628 return r == ' ' || r == '\t'
629 }
630
631
632 func isEndOfLine(r rune) bool {
633 return r == '\r' || r == '\n'
634 }
635
636
637 func isAlphaNumeric(r rune) bool {
638 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
639 }
640
View as plain text