Source file
src/bytes/bytes.go
Documentation: bytes
1
2
3
4
5
6
7 package bytes
8
9 import (
10 "unicode"
11 "unicode/utf8"
12 )
13
14 func equalPortable(a, b []byte) bool {
15 if len(a) != len(b) {
16 return false
17 }
18 for i, c := range a {
19 if c != b[i] {
20 return false
21 }
22 }
23 return true
24 }
25
26
27
28 func explode(s []byte, n int) [][]byte {
29 if n <= 0 {
30 n = len(s)
31 }
32 a := make([][]byte, n)
33 var size int
34 na := 0
35 for len(s) > 0 {
36 if na+1 >= n {
37 a[na] = s
38 na++
39 break
40 }
41 _, size = utf8.DecodeRune(s)
42 a[na] = s[0:size:size]
43 s = s[size:]
44 na++
45 }
46 return a[0:na]
47 }
48
49
50 func countGeneric(s, sep []byte) int {
51
52 if len(sep) == 0 {
53 return utf8.RuneCount(s) + 1
54 }
55 n := 0
56 for {
57 i := Index(s, sep)
58 if i == -1 {
59 return n
60 }
61 n++
62 s = s[i+len(sep):]
63 }
64 }
65
66
67 func Contains(b, subslice []byte) bool {
68 return Index(b, subslice) != -1
69 }
70
71
72 func ContainsAny(b []byte, chars string) bool {
73 return IndexAny(b, chars) >= 0
74 }
75
76
77 func ContainsRune(b []byte, r rune) bool {
78 return IndexRune(b, r) >= 0
79 }
80
81 func indexBytePortable(s []byte, c byte) int {
82 for i, b := range s {
83 if b == c {
84 return i
85 }
86 }
87 return -1
88 }
89
90
91 func LastIndex(s, sep []byte) int {
92 n := len(sep)
93 if n == 0 {
94 return len(s)
95 }
96 c := sep[0]
97 for i := len(s) - n; i >= 0; i-- {
98 if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) {
99 return i
100 }
101 }
102 return -1
103 }
104
105
106 func LastIndexByte(s []byte, c byte) int {
107 for i := len(s) - 1; i >= 0; i-- {
108 if s[i] == c {
109 return i
110 }
111 }
112 return -1
113 }
114
115
116
117
118
119
120 func IndexRune(s []byte, r rune) int {
121 switch {
122 case 0 <= r && r < utf8.RuneSelf:
123 return IndexByte(s, byte(r))
124 case r == utf8.RuneError:
125 for i := 0; i < len(s); {
126 r1, n := utf8.DecodeRune(s[i:])
127 if r1 == utf8.RuneError {
128 return i
129 }
130 i += n
131 }
132 return -1
133 case !utf8.ValidRune(r):
134 return -1
135 default:
136 var b [utf8.UTFMax]byte
137 n := utf8.EncodeRune(b[:], r)
138 return Index(s, b[:n])
139 }
140 }
141
142
143
144
145
146 func IndexAny(s []byte, chars string) int {
147 if chars == "" {
148
149 return -1
150 }
151 if len(s) > 8 {
152 if as, isASCII := makeASCIISet(chars); isASCII {
153 for i, c := range s {
154 if as.contains(c) {
155 return i
156 }
157 }
158 return -1
159 }
160 }
161 var width int
162 for i := 0; i < len(s); i += width {
163 r := rune(s[i])
164 if r < utf8.RuneSelf {
165 width = 1
166 } else {
167 r, width = utf8.DecodeRune(s[i:])
168 }
169 for _, ch := range chars {
170 if r == ch {
171 return i
172 }
173 }
174 }
175 return -1
176 }
177
178
179
180
181
182 func LastIndexAny(s []byte, chars string) int {
183 if chars == "" {
184
185 return -1
186 }
187 if len(s) > 8 {
188 if as, isASCII := makeASCIISet(chars); isASCII {
189 for i := len(s) - 1; i >= 0; i-- {
190 if as.contains(s[i]) {
191 return i
192 }
193 }
194 return -1
195 }
196 }
197 for i := len(s); i > 0; {
198 r, size := utf8.DecodeLastRune(s[:i])
199 i -= size
200 for _, c := range chars {
201 if r == c {
202 return i
203 }
204 }
205 }
206 return -1
207 }
208
209
210
211 func genSplit(s, sep []byte, sepSave, n int) [][]byte {
212 if n == 0 {
213 return nil
214 }
215 if len(sep) == 0 {
216 return explode(s, n)
217 }
218 if n < 0 {
219 n = Count(s, sep) + 1
220 }
221
222 a := make([][]byte, n)
223 n--
224 i := 0
225 for i < n {
226 m := Index(s, sep)
227 if m < 0 {
228 break
229 }
230 a[i] = s[: m+sepSave : m+sepSave]
231 s = s[m+len(sep):]
232 i++
233 }
234 a[i] = s
235 return a[:i+1]
236 }
237
238
239
240
241
242
243
244
245 func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) }
246
247
248
249
250
251
252
253
254 func SplitAfterN(s, sep []byte, n int) [][]byte {
255 return genSplit(s, sep, len(sep), n)
256 }
257
258
259
260
261
262 func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) }
263
264
265
266
267
268 func SplitAfter(s, sep []byte) [][]byte {
269 return genSplit(s, sep, len(sep), -1)
270 }
271
272 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
273
274
275
276
277
278 func Fields(s []byte) [][]byte {
279
280
281 n := 0
282 wasSpace := 1
283
284 setBits := uint8(0)
285 for i := 0; i < len(s); i++ {
286 r := s[i]
287 setBits |= r
288 isSpace := int(asciiSpace[r])
289 n += wasSpace & ^isSpace
290 wasSpace = isSpace
291 }
292
293 if setBits >= utf8.RuneSelf {
294
295 return FieldsFunc(s, unicode.IsSpace)
296 }
297
298
299 a := make([][]byte, n)
300 na := 0
301 fieldStart := 0
302 i := 0
303
304 for i < len(s) && asciiSpace[s[i]] != 0 {
305 i++
306 }
307 fieldStart = i
308 for i < len(s) {
309 if asciiSpace[s[i]] == 0 {
310 i++
311 continue
312 }
313 a[na] = s[fieldStart:i:i]
314 na++
315 i++
316
317 for i < len(s) && asciiSpace[s[i]] != 0 {
318 i++
319 }
320 fieldStart = i
321 }
322 if fieldStart < len(s) {
323 a[na] = s[fieldStart:len(s):len(s)]
324 }
325 return a
326 }
327
328
329
330
331
332
333
334 func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
335
336
337 type span struct {
338 start int
339 end int
340 }
341 spans := make([]span, 0, 32)
342
343
344 wasField := false
345 fromIndex := 0
346 for i := 0; i < len(s); {
347 size := 1
348 r := rune(s[i])
349 if r >= utf8.RuneSelf {
350 r, size = utf8.DecodeRune(s[i:])
351 }
352 if f(r) {
353 if wasField {
354 spans = append(spans, span{start: fromIndex, end: i})
355 wasField = false
356 }
357 } else {
358 if !wasField {
359 fromIndex = i
360 wasField = true
361 }
362 }
363 i += size
364 }
365
366
367 if wasField {
368 spans = append(spans, span{fromIndex, len(s)})
369 }
370
371
372 a := make([][]byte, len(spans))
373 for i, span := range spans {
374 a[i] = s[span.start:span.end:span.end]
375 }
376
377 return a
378 }
379
380
381
382 func Join(s [][]byte, sep []byte) []byte {
383 if len(s) == 0 {
384 return []byte{}
385 }
386 if len(s) == 1 {
387
388 return append([]byte(nil), s[0]...)
389 }
390 n := len(sep) * (len(s) - 1)
391 for _, v := range s {
392 n += len(v)
393 }
394
395 b := make([]byte, n)
396 bp := copy(b, s[0])
397 for _, v := range s[1:] {
398 bp += copy(b[bp:], sep)
399 bp += copy(b[bp:], v)
400 }
401 return b
402 }
403
404
405 func HasPrefix(s, prefix []byte) bool {
406 return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix)
407 }
408
409
410 func HasSuffix(s, suffix []byte) bool {
411 return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix)
412 }
413
414
415
416
417
418 func Map(mapping func(r rune) rune, s []byte) []byte {
419
420
421
422 maxbytes := len(s)
423 nbytes := 0
424 b := make([]byte, maxbytes)
425 for i := 0; i < len(s); {
426 wid := 1
427 r := rune(s[i])
428 if r >= utf8.RuneSelf {
429 r, wid = utf8.DecodeRune(s[i:])
430 }
431 r = mapping(r)
432 if r >= 0 {
433 rl := utf8.RuneLen(r)
434 if rl < 0 {
435 rl = len(string(utf8.RuneError))
436 }
437 if nbytes+rl > maxbytes {
438
439 maxbytes = maxbytes*2 + utf8.UTFMax
440 nb := make([]byte, maxbytes)
441 copy(nb, b[0:nbytes])
442 b = nb
443 }
444 nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
445 }
446 i += wid
447 }
448 return b[0:nbytes]
449 }
450
451
452
453
454
455 func Repeat(b []byte, count int) []byte {
456
457
458
459
460 if count < 0 {
461 panic("bytes: negative Repeat count")
462 } else if count > 0 && len(b)*count/count != len(b) {
463 panic("bytes: Repeat count causes overflow")
464 }
465
466 nb := make([]byte, len(b)*count)
467 bp := copy(nb, b)
468 for bp < len(nb) {
469 copy(nb[bp:], nb[:bp])
470 bp *= 2
471 }
472 return nb
473 }
474
475
476 func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
477
478
479 func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
480
481
482 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
483
484
485
486 func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte {
487 return Map(func(r rune) rune { return c.ToUpper(r) }, s)
488 }
489
490
491
492 func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte {
493 return Map(func(r rune) rune { return c.ToLower(r) }, s)
494 }
495
496
497
498 func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte {
499 return Map(func(r rune) rune { return c.ToTitle(r) }, s)
500 }
501
502
503
504 func isSeparator(r rune) bool {
505
506 if r <= 0x7F {
507 switch {
508 case '0' <= r && r <= '9':
509 return false
510 case 'a' <= r && r <= 'z':
511 return false
512 case 'A' <= r && r <= 'Z':
513 return false
514 case r == '_':
515 return false
516 }
517 return true
518 }
519
520 if unicode.IsLetter(r) || unicode.IsDigit(r) {
521 return false
522 }
523
524 return unicode.IsSpace(r)
525 }
526
527
528
529
530
531 func Title(s []byte) []byte {
532
533
534
535 prev := ' '
536 return Map(
537 func(r rune) rune {
538 if isSeparator(prev) {
539 prev = r
540 return unicode.ToTitle(r)
541 }
542 prev = r
543 return r
544 },
545 s)
546 }
547
548
549
550 func TrimLeftFunc(s []byte, f func(r rune) bool) []byte {
551 i := indexFunc(s, f, false)
552 if i == -1 {
553 return nil
554 }
555 return s[i:]
556 }
557
558
559
560 func TrimRightFunc(s []byte, f func(r rune) bool) []byte {
561 i := lastIndexFunc(s, f, false)
562 if i >= 0 && s[i] >= utf8.RuneSelf {
563 _, wid := utf8.DecodeRune(s[i:])
564 i += wid
565 } else {
566 i++
567 }
568 return s[0:i]
569 }
570
571
572
573 func TrimFunc(s []byte, f func(r rune) bool) []byte {
574 return TrimRightFunc(TrimLeftFunc(s, f), f)
575 }
576
577
578
579 func TrimPrefix(s, prefix []byte) []byte {
580 if HasPrefix(s, prefix) {
581 return s[len(prefix):]
582 }
583 return s
584 }
585
586
587
588 func TrimSuffix(s, suffix []byte) []byte {
589 if HasSuffix(s, suffix) {
590 return s[:len(s)-len(suffix)]
591 }
592 return s
593 }
594
595
596
597
598 func IndexFunc(s []byte, f func(r rune) bool) int {
599 return indexFunc(s, f, true)
600 }
601
602
603
604
605 func LastIndexFunc(s []byte, f func(r rune) bool) int {
606 return lastIndexFunc(s, f, true)
607 }
608
609
610
611
612 func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
613 start := 0
614 for start < len(s) {
615 wid := 1
616 r := rune(s[start])
617 if r >= utf8.RuneSelf {
618 r, wid = utf8.DecodeRune(s[start:])
619 }
620 if f(r) == truth {
621 return start
622 }
623 start += wid
624 }
625 return -1
626 }
627
628
629
630
631 func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int {
632 for i := len(s); i > 0; {
633 r, size := rune(s[i-1]), 1
634 if r >= utf8.RuneSelf {
635 r, size = utf8.DecodeLastRune(s[0:i])
636 }
637 i -= size
638 if f(r) == truth {
639 return i
640 }
641 }
642 return -1
643 }
644
645
646
647
648
649
650
651 type asciiSet [8]uint32
652
653
654
655 func makeASCIISet(chars string) (as asciiSet, ok bool) {
656 for i := 0; i < len(chars); i++ {
657 c := chars[i]
658 if c >= utf8.RuneSelf {
659 return as, false
660 }
661 as[c>>5] |= 1 << uint(c&31)
662 }
663 return as, true
664 }
665
666
667 func (as *asciiSet) contains(c byte) bool {
668 return (as[c>>5] & (1 << uint(c&31))) != 0
669 }
670
671 func makeCutsetFunc(cutset string) func(r rune) bool {
672 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
673 return func(r rune) bool {
674 return r == rune(cutset[0])
675 }
676 }
677 if as, isASCII := makeASCIISet(cutset); isASCII {
678 return func(r rune) bool {
679 return r < utf8.RuneSelf && as.contains(byte(r))
680 }
681 }
682 return func(r rune) bool {
683 for _, c := range cutset {
684 if c == r {
685 return true
686 }
687 }
688 return false
689 }
690 }
691
692
693
694 func Trim(s []byte, cutset string) []byte {
695 return TrimFunc(s, makeCutsetFunc(cutset))
696 }
697
698
699
700 func TrimLeft(s []byte, cutset string) []byte {
701 return TrimLeftFunc(s, makeCutsetFunc(cutset))
702 }
703
704
705
706 func TrimRight(s []byte, cutset string) []byte {
707 return TrimRightFunc(s, makeCutsetFunc(cutset))
708 }
709
710
711
712 func TrimSpace(s []byte) []byte {
713 return TrimFunc(s, unicode.IsSpace)
714 }
715
716
717
718 func Runes(s []byte) []rune {
719 t := make([]rune, utf8.RuneCount(s))
720 i := 0
721 for len(s) > 0 {
722 r, l := utf8.DecodeRune(s)
723 t[i] = r
724 i++
725 s = s[l:]
726 }
727 return t
728 }
729
730
731
732
733
734
735
736 func Replace(s, old, new []byte, n int) []byte {
737 m := 0
738 if n != 0 {
739
740 m = Count(s, old)
741 }
742 if m == 0 {
743
744 return append([]byte(nil), s...)
745 }
746 if n < 0 || m < n {
747 n = m
748 }
749
750
751 t := make([]byte, len(s)+n*(len(new)-len(old)))
752 w := 0
753 start := 0
754 for i := 0; i < n; i++ {
755 j := start
756 if len(old) == 0 {
757 if i > 0 {
758 _, wid := utf8.DecodeRune(s[start:])
759 j += wid
760 }
761 } else {
762 j += Index(s[start:], old)
763 }
764 w += copy(t[w:], s[start:j])
765 w += copy(t[w:], new)
766 start = j + len(old)
767 }
768 w += copy(t[w:], s[start:])
769 return t[0:w]
770 }
771
772
773
774 func EqualFold(s, t []byte) bool {
775 for len(s) != 0 && len(t) != 0 {
776
777 var sr, tr rune
778 if s[0] < utf8.RuneSelf {
779 sr, s = rune(s[0]), s[1:]
780 } else {
781 r, size := utf8.DecodeRune(s)
782 sr, s = r, s[size:]
783 }
784 if t[0] < utf8.RuneSelf {
785 tr, t = rune(t[0]), t[1:]
786 } else {
787 r, size := utf8.DecodeRune(t)
788 tr, t = r, t[size:]
789 }
790
791
792
793
794 if tr == sr {
795 continue
796 }
797
798
799 if tr < sr {
800 tr, sr = sr, tr
801 }
802
803 if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' {
804
805 if tr == sr+'a'-'A' {
806 continue
807 }
808 return false
809 }
810
811
812
813 r := unicode.SimpleFold(sr)
814 for r != sr && r < tr {
815 r = unicode.SimpleFold(r)
816 }
817 if r == tr {
818 continue
819 }
820 return false
821 }
822
823
824 return len(s) == len(t)
825 }
826
827 func indexRabinKarp(s, sep []byte) int {
828
829 hashsep, pow := hashStr(sep)
830 n := len(sep)
831 var h uint32
832 for i := 0; i < n; i++ {
833 h = h*primeRK + uint32(s[i])
834 }
835 if h == hashsep && Equal(s[:n], sep) {
836 return 0
837 }
838 for i := n; i < len(s); {
839 h *= primeRK
840 h += uint32(s[i])
841 h -= pow * uint32(s[i-n])
842 i++
843 if h == hashsep && Equal(s[i-n:i], sep) {
844 return i - n
845 }
846 }
847 return -1
848 }
849
850
851 const primeRK = 16777619
852
853
854
855 func hashStr(sep []byte) (uint32, uint32) {
856 hash := uint32(0)
857 for i := 0; i < len(sep); i++ {
858 hash = hash*primeRK + uint32(sep[i])
859 }
860 var pow, sq uint32 = 1, primeRK
861 for i := len(sep); i > 0; i >>= 1 {
862 if i&1 != 0 {
863 pow *= sq
864 }
865 sq *= sq
866 }
867 return hash, pow
868 }
869
View as plain text