1
2
3
4
5
6
7 package utf8
8
9
10
11
12
13
14 const (
15 RuneError = '\uFFFD'
16 RuneSelf = 0x80
17 MaxRune = '\U0010FFFF'
18 UTFMax = 4
19 )
20
21
22 const (
23 surrogateMin = 0xD800
24 surrogateMax = 0xDFFF
25 )
26
27 const (
28 t1 = 0x00
29 tx = 0x80
30 t2 = 0xC0
31 t3 = 0xE0
32 t4 = 0xF0
33 t5 = 0xF8
34
35 maskx = 0x3F
36 mask2 = 0x1F
37 mask3 = 0x0F
38 mask4 = 0x07
39
40 rune1Max = 1<<7 - 1
41 rune2Max = 1<<11 - 1
42 rune3Max = 1<<16 - 1
43
44
45 locb = 0x80
46 hicb = 0xBF
47
48
49
50
51
52 xx = 0xF1
53 as = 0xF0
54 s1 = 0x02
55 s2 = 0x13
56 s3 = 0x03
57 s4 = 0x23
58 s5 = 0x34
59 s6 = 0x04
60 s7 = 0x44
61 )
62
63
64 var first = [256]uint8{
65
66 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
67 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
68 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
69 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
70 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
71 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
72 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
73 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as,
74
75 xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx,
76 xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx,
77 xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx,
78 xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx,
79 xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1,
80 s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1,
81 s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3,
82 s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx,
83 }
84
85
86
87 type acceptRange struct {
88 lo uint8
89 hi uint8
90 }
91
92 var acceptRanges = [...]acceptRange{
93 0: {locb, hicb},
94 1: {0xA0, hicb},
95 2: {locb, 0x9F},
96 3: {0x90, hicb},
97 4: {locb, 0x8F},
98 }
99
100
101
102 func FullRune(p []byte) bool {
103 n := len(p)
104 if n == 0 {
105 return false
106 }
107 x := first[p[0]]
108 if n >= int(x&7) {
109 return true
110 }
111
112 accept := acceptRanges[x>>4]
113 if n > 1 && (p[1] < accept.lo || accept.hi < p[1]) {
114 return true
115 } else if n > 2 && (p[2] < locb || hicb < p[2]) {
116 return true
117 }
118 return false
119 }
120
121
122 func FullRuneInString(s string) bool {
123 n := len(s)
124 if n == 0 {
125 return false
126 }
127 x := first[s[0]]
128 if n >= int(x&7) {
129 return true
130 }
131
132 accept := acceptRanges[x>>4]
133 if n > 1 && (s[1] < accept.lo || accept.hi < s[1]) {
134 return true
135 } else if n > 2 && (s[2] < locb || hicb < s[2]) {
136 return true
137 }
138 return false
139 }
140
141
142
143
144
145
146
147
148
149 func DecodeRune(p []byte) (r rune, size int) {
150 n := len(p)
151 if n < 1 {
152 return RuneError, 0
153 }
154 p0 := p[0]
155 x := first[p0]
156 if x >= as {
157
158
159
160 mask := rune(x) << 31 >> 31
161 return rune(p[0])&^mask | RuneError&mask, 1
162 }
163 sz := x & 7
164 accept := acceptRanges[x>>4]
165 if n < int(sz) {
166 return RuneError, 1
167 }
168 b1 := p[1]
169 if b1 < accept.lo || accept.hi < b1 {
170 return RuneError, 1
171 }
172 if sz == 2 {
173 return rune(p0&mask2)<<6 | rune(b1&maskx), 2
174 }
175 b2 := p[2]
176 if b2 < locb || hicb < b2 {
177 return RuneError, 1
178 }
179 if sz == 3 {
180 return rune(p0&mask3)<<12 | rune(b1&maskx)<<6 | rune(b2&maskx), 3
181 }
182 b3 := p[3]
183 if b3 < locb || hicb < b3 {
184 return RuneError, 1
185 }
186 return rune(p0&mask4)<<18 | rune(b1&maskx)<<12 | rune(b2&maskx)<<6 | rune(b3&maskx), 4
187 }
188
189
190
191
192
193
194
195
196
197 func DecodeRuneInString(s string) (r rune, size int) {
198 n := len(s)
199 if n < 1 {
200 return RuneError, 0
201 }
202 s0 := s[0]
203 x := first[s0]
204 if x >= as {
205
206
207
208 mask := rune(x) << 31 >> 31
209 return rune(s[0])&^mask | RuneError&mask, 1
210 }
211 sz := x & 7
212 accept := acceptRanges[x>>4]
213 if n < int(sz) {
214 return RuneError, 1
215 }
216 s1 := s[1]
217 if s1 < accept.lo || accept.hi < s1 {
218 return RuneError, 1
219 }
220 if sz == 2 {
221 return rune(s0&mask2)<<6 | rune(s1&maskx), 2
222 }
223 s2 := s[2]
224 if s2 < locb || hicb < s2 {
225 return RuneError, 1
226 }
227 if sz == 3 {
228 return rune(s0&mask3)<<12 | rune(s1&maskx)<<6 | rune(s2&maskx), 3
229 }
230 s3 := s[3]
231 if s3 < locb || hicb < s3 {
232 return RuneError, 1
233 }
234 return rune(s0&mask4)<<18 | rune(s1&maskx)<<12 | rune(s2&maskx)<<6 | rune(s3&maskx), 4
235 }
236
237
238
239
240
241
242
243
244
245 func DecodeLastRune(p []byte) (r rune, size int) {
246 end := len(p)
247 if end == 0 {
248 return RuneError, 0
249 }
250 start := end - 1
251 r = rune(p[start])
252 if r < RuneSelf {
253 return r, 1
254 }
255
256
257
258 lim := end - UTFMax
259 if lim < 0 {
260 lim = 0
261 }
262 for start--; start >= lim; start-- {
263 if RuneStart(p[start]) {
264 break
265 }
266 }
267 if start < 0 {
268 start = 0
269 }
270 r, size = DecodeRune(p[start:end])
271 if start+size != end {
272 return RuneError, 1
273 }
274 return r, size
275 }
276
277
278
279
280
281
282
283
284
285 func DecodeLastRuneInString(s string) (r rune, size int) {
286 end := len(s)
287 if end == 0 {
288 return RuneError, 0
289 }
290 start := end - 1
291 r = rune(s[start])
292 if r < RuneSelf {
293 return r, 1
294 }
295
296
297
298 lim := end - UTFMax
299 if lim < 0 {
300 lim = 0
301 }
302 for start--; start >= lim; start-- {
303 if RuneStart(s[start]) {
304 break
305 }
306 }
307 if start < 0 {
308 start = 0
309 }
310 r, size = DecodeRuneInString(s[start:end])
311 if start+size != end {
312 return RuneError, 1
313 }
314 return r, size
315 }
316
317
318
319 func RuneLen(r rune) int {
320 switch {
321 case r < 0:
322 return -1
323 case r <= rune1Max:
324 return 1
325 case r <= rune2Max:
326 return 2
327 case surrogateMin <= r && r <= surrogateMax:
328 return -1
329 case r <= rune3Max:
330 return 3
331 case r <= MaxRune:
332 return 4
333 }
334 return -1
335 }
336
337
338
339 func EncodeRune(p []byte, r rune) int {
340
341 switch i := uint32(r); {
342 case i <= rune1Max:
343 p[0] = byte(r)
344 return 1
345 case i <= rune2Max:
346 _ = p[1]
347 p[0] = t2 | byte(r>>6)
348 p[1] = tx | byte(r)&maskx
349 return 2
350 case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
351 r = RuneError
352 fallthrough
353 case i <= rune3Max:
354 _ = p[2]
355 p[0] = t3 | byte(r>>12)
356 p[1] = tx | byte(r>>6)&maskx
357 p[2] = tx | byte(r)&maskx
358 return 3
359 default:
360 _ = p[3]
361 p[0] = t4 | byte(r>>18)
362 p[1] = tx | byte(r>>12)&maskx
363 p[2] = tx | byte(r>>6)&maskx
364 p[3] = tx | byte(r)&maskx
365 return 4
366 }
367 }
368
369
370
371 func RuneCount(p []byte) int {
372 np := len(p)
373 var n int
374 for i := 0; i < np; {
375 n++
376 c := p[i]
377 if c < RuneSelf {
378
379 i++
380 continue
381 }
382 x := first[c]
383 if x == xx {
384 i++
385 continue
386 }
387 size := int(x & 7)
388 if i+size > np {
389 i++
390 continue
391 }
392 accept := acceptRanges[x>>4]
393 if c := p[i+1]; c < accept.lo || accept.hi < c {
394 size = 1
395 } else if size == 2 {
396 } else if c := p[i+2]; c < locb || hicb < c {
397 size = 1
398 } else if size == 3 {
399 } else if c := p[i+3]; c < locb || hicb < c {
400 size = 1
401 }
402 i += size
403 }
404 return n
405 }
406
407
408 func RuneCountInString(s string) (n int) {
409 ns := len(s)
410 for i := 0; i < ns; n++ {
411 c := s[i]
412 if c < RuneSelf {
413
414 i++
415 continue
416 }
417 x := first[c]
418 if x == xx {
419 i++
420 continue
421 }
422 size := int(x & 7)
423 if i+size > ns {
424 i++
425 continue
426 }
427 accept := acceptRanges[x>>4]
428 if c := s[i+1]; c < accept.lo || accept.hi < c {
429 size = 1
430 } else if size == 2 {
431 } else if c := s[i+2]; c < locb || hicb < c {
432 size = 1
433 } else if size == 3 {
434 } else if c := s[i+3]; c < locb || hicb < c {
435 size = 1
436 }
437 i += size
438 }
439 return n
440 }
441
442
443
444
445 func RuneStart(b byte) bool { return b&0xC0 != 0x80 }
446
447
448 func Valid(p []byte) bool {
449 n := len(p)
450 for i := 0; i < n; {
451 pi := p[i]
452 if pi < RuneSelf {
453 i++
454 continue
455 }
456 x := first[pi]
457 if x == xx {
458 return false
459 }
460 size := int(x & 7)
461 if i+size > n {
462 return false
463 }
464 accept := acceptRanges[x>>4]
465 if c := p[i+1]; c < accept.lo || accept.hi < c {
466 return false
467 } else if size == 2 {
468 } else if c := p[i+2]; c < locb || hicb < c {
469 return false
470 } else if size == 3 {
471 } else if c := p[i+3]; c < locb || hicb < c {
472 return false
473 }
474 i += size
475 }
476 return true
477 }
478
479
480 func ValidString(s string) bool {
481 n := len(s)
482 for i := 0; i < n; {
483 si := s[i]
484 if si < RuneSelf {
485 i++
486 continue
487 }
488 x := first[si]
489 if x == xx {
490 return false
491 }
492 size := int(x & 7)
493 if i+size > n {
494 return false
495 }
496 accept := acceptRanges[x>>4]
497 if c := s[i+1]; c < accept.lo || accept.hi < c {
498 return false
499 } else if size == 2 {
500 } else if c := s[i+2]; c < locb || hicb < c {
501 return false
502 } else if size == 3 {
503 } else if c := s[i+3]; c < locb || hicb < c {
504 return false
505 }
506 i += size
507 }
508 return true
509 }
510
511
512
513 func ValidRune(r rune) bool {
514 switch {
515 case 0 <= r && r < surrogateMin:
516 return true
517 case surrogateMax < r && r <= MaxRune:
518 return true
519 }
520 return false
521 }
522
View as plain text