Source file
src/strings/strings.go
Documentation: strings
1
2
3
4
5
6
7
8 package strings
9
10 import (
11 "unicode"
12 "unicode/utf8"
13 )
14
15
16
17
18 func explode(s string, n int) []string {
19 l := utf8.RuneCountInString(s)
20 if n < 0 || n > l {
21 n = l
22 }
23 a := make([]string, n)
24 for i := 0; i < n-1; i++ {
25 ch, size := utf8.DecodeRuneInString(s)
26 a[i] = s[:size]
27 s = s[size:]
28 if ch == utf8.RuneError {
29 a[i] = string(utf8.RuneError)
30 }
31 }
32 if n > 0 {
33 a[n-1] = s
34 }
35 return a
36 }
37
38
39 const primeRK = 16777619
40
41
42
43 func hashStr(sep string) (uint32, uint32) {
44 hash := uint32(0)
45 for i := 0; i < len(sep); i++ {
46 hash = hash*primeRK + uint32(sep[i])
47 }
48 var pow, sq uint32 = 1, primeRK
49 for i := len(sep); i > 0; i >>= 1 {
50 if i&1 != 0 {
51 pow *= sq
52 }
53 sq *= sq
54 }
55 return hash, pow
56 }
57
58
59
60 func hashStrRev(sep string) (uint32, uint32) {
61 hash := uint32(0)
62 for i := len(sep) - 1; i >= 0; i-- {
63 hash = hash*primeRK + uint32(sep[i])
64 }
65 var pow, sq uint32 = 1, primeRK
66 for i := len(sep); i > 0; i >>= 1 {
67 if i&1 != 0 {
68 pow *= sq
69 }
70 sq *= sq
71 }
72 return hash, pow
73 }
74
75
76 func countGeneric(s, substr string) int {
77
78 if len(substr) == 0 {
79 return utf8.RuneCountInString(s) + 1
80 }
81 n := 0
82 for {
83 i := Index(s, substr)
84 if i == -1 {
85 return n
86 }
87 n++
88 s = s[i+len(substr):]
89 }
90 }
91
92
93 func Contains(s, substr string) bool {
94 return Index(s, substr) >= 0
95 }
96
97
98 func ContainsAny(s, chars string) bool {
99 return IndexAny(s, chars) >= 0
100 }
101
102
103 func ContainsRune(s string, r rune) bool {
104 return IndexRune(s, r) >= 0
105 }
106
107
108 func LastIndex(s, substr string) int {
109 n := len(substr)
110 switch {
111 case n == 0:
112 return len(s)
113 case n == 1:
114 return LastIndexByte(s, substr[0])
115 case n == len(s):
116 if substr == s {
117 return 0
118 }
119 return -1
120 case n > len(s):
121 return -1
122 }
123
124 hashss, pow := hashStrRev(substr)
125 last := len(s) - n
126 var h uint32
127 for i := len(s) - 1; i >= last; i-- {
128 h = h*primeRK + uint32(s[i])
129 }
130 if h == hashss && s[last:] == substr {
131 return last
132 }
133 for i := last - 1; i >= 0; i-- {
134 h *= primeRK
135 h += uint32(s[i])
136 h -= pow * uint32(s[i+n])
137 if h == hashss && s[i:i+n] == substr {
138 return i
139 }
140 }
141 return -1
142 }
143
144
145
146
147
148 func IndexRune(s string, r rune) int {
149 switch {
150 case 0 <= r && r < utf8.RuneSelf:
151 return IndexByte(s, byte(r))
152 case r == utf8.RuneError:
153 for i, r := range s {
154 if r == utf8.RuneError {
155 return i
156 }
157 }
158 return -1
159 case !utf8.ValidRune(r):
160 return -1
161 default:
162 return Index(s, string(r))
163 }
164 }
165
166
167
168 func IndexAny(s, chars string) int {
169 if chars == "" {
170
171 return -1
172 }
173 if len(s) > 8 {
174 if as, isASCII := makeASCIISet(chars); isASCII {
175 for i := 0; i < len(s); i++ {
176 if as.contains(s[i]) {
177 return i
178 }
179 }
180 return -1
181 }
182 }
183 for i, c := range s {
184 for _, m := range chars {
185 if c == m {
186 return i
187 }
188 }
189 }
190 return -1
191 }
192
193
194
195
196 func LastIndexAny(s, chars string) int {
197 if chars == "" {
198
199 return -1
200 }
201 if len(s) > 8 {
202 if as, isASCII := makeASCIISet(chars); isASCII {
203 for i := len(s) - 1; i >= 0; i-- {
204 if as.contains(s[i]) {
205 return i
206 }
207 }
208 return -1
209 }
210 }
211 for i := len(s); i > 0; {
212 r, size := utf8.DecodeLastRuneInString(s[:i])
213 i -= size
214 for _, c := range chars {
215 if r == c {
216 return i
217 }
218 }
219 }
220 return -1
221 }
222
223
224 func LastIndexByte(s string, c byte) int {
225 for i := len(s) - 1; i >= 0; i-- {
226 if s[i] == c {
227 return i
228 }
229 }
230 return -1
231 }
232
233
234
235 func genSplit(s, sep string, sepSave, n int) []string {
236 if n == 0 {
237 return nil
238 }
239 if sep == "" {
240 return explode(s, n)
241 }
242 if n < 0 {
243 n = Count(s, sep) + 1
244 }
245
246 a := make([]string, n)
247 n--
248 i := 0
249 for i < n {
250 m := Index(s, sep)
251 if m < 0 {
252 break
253 }
254 a[i] = s[:m+sepSave]
255 s = s[m+len(sep):]
256 i++
257 }
258 a[i] = s
259 return a[:i+1]
260 }
261
262
263
264
265
266
267
268
269
270
271
272 func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
273
274
275
276
277
278
279
280
281
282
283
284 func SplitAfterN(s, sep string, n int) []string {
285 return genSplit(s, sep, len(sep), n)
286 }
287
288
289
290
291
292
293
294
295
296
297
298 func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) }
299
300
301
302
303
304
305
306
307
308
309
310 func SplitAfter(s, sep string) []string {
311 return genSplit(s, sep, len(sep), -1)
312 }
313
314 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
315
316
317
318
319 func Fields(s string) []string {
320
321
322 n := 0
323 wasSpace := 1
324
325 setBits := uint8(0)
326 for i := 0; i < len(s); i++ {
327 r := s[i]
328 setBits |= r
329 isSpace := int(asciiSpace[r])
330 n += wasSpace & ^isSpace
331 wasSpace = isSpace
332 }
333
334 if setBits < utf8.RuneSelf {
335 a := make([]string, n)
336 na := 0
337 fieldStart := 0
338 i := 0
339
340 for i < len(s) && asciiSpace[s[i]] != 0 {
341 i++
342 }
343 fieldStart = i
344 for i < len(s) {
345 if asciiSpace[s[i]] == 0 {
346 i++
347 continue
348 }
349 a[na] = s[fieldStart:i]
350 na++
351 i++
352
353 for i < len(s) && asciiSpace[s[i]] != 0 {
354 i++
355 }
356 fieldStart = i
357 }
358 if fieldStart < len(s) {
359 a[na] = s[fieldStart:]
360 }
361 return a
362 }
363
364
365 return FieldsFunc(s, unicode.IsSpace)
366 }
367
368
369
370
371
372
373 func FieldsFunc(s string, f func(rune) bool) []string {
374
375
376 type span struct {
377 start int
378 end int
379 }
380 spans := make([]span, 0, 32)
381
382
383 wasField := false
384 fromIndex := 0
385 for i, rune := range s {
386 if f(rune) {
387 if wasField {
388 spans = append(spans, span{start: fromIndex, end: i})
389 wasField = false
390 }
391 } else {
392 if !wasField {
393 fromIndex = i
394 wasField = true
395 }
396 }
397 }
398
399
400 if wasField {
401 spans = append(spans, span{fromIndex, len(s)})
402 }
403
404
405 a := make([]string, len(spans))
406 for i, span := range spans {
407 a[i] = s[span.start:span.end]
408 }
409
410 return a
411 }
412
413
414
415 func Join(a []string, sep string) string {
416 switch len(a) {
417 case 0:
418 return ""
419 case 1:
420 return a[0]
421 case 2:
422
423
424 return a[0] + sep + a[1]
425 case 3:
426
427
428 return a[0] + sep + a[1] + sep + a[2]
429 }
430 n := len(sep) * (len(a) - 1)
431 for i := 0; i < len(a); i++ {
432 n += len(a[i])
433 }
434
435 b := make([]byte, n)
436 bp := copy(b, a[0])
437 for _, s := range a[1:] {
438 bp += copy(b[bp:], sep)
439 bp += copy(b[bp:], s)
440 }
441 return string(b)
442 }
443
444
445 func HasPrefix(s, prefix string) bool {
446 return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
447 }
448
449
450 func HasSuffix(s, suffix string) bool {
451 return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
452 }
453
454
455
456
457 func Map(mapping func(rune) rune, s string) string {
458
459
460
461
462
463
464 var b []byte
465
466 var nbytes int
467
468 for i, c := range s {
469 r := mapping(c)
470 if r == c {
471 continue
472 }
473
474 b = make([]byte, len(s)+utf8.UTFMax)
475 nbytes = copy(b, s[:i])
476 if r >= 0 {
477 if r <= utf8.RuneSelf {
478 b[nbytes] = byte(r)
479 nbytes++
480 } else {
481 nbytes += utf8.EncodeRune(b[nbytes:], r)
482 }
483 }
484
485 if c == utf8.RuneError {
486
487
488
489 _, w := utf8.DecodeRuneInString(s[i:])
490 i += w
491 } else {
492 i += utf8.RuneLen(c)
493 }
494
495 s = s[i:]
496 break
497 }
498
499 if b == nil {
500 return s
501 }
502
503 for _, c := range s {
504 r := mapping(c)
505
506
507 if (0 <= r && r <= utf8.RuneSelf) && nbytes < len(b) {
508 b[nbytes] = byte(r)
509 nbytes++
510 continue
511 }
512
513
514 if r >= 0 {
515 if nbytes+utf8.UTFMax >= len(b) {
516
517 nb := make([]byte, 2*len(b))
518 copy(nb, b[:nbytes])
519 b = nb
520 }
521 nbytes += utf8.EncodeRune(b[nbytes:], r)
522 }
523 }
524
525 return string(b[:nbytes])
526 }
527
528
529
530
531
532 func Repeat(s string, count int) string {
533
534
535
536
537 if count < 0 {
538 panic("strings: negative Repeat count")
539 } else if count > 0 && len(s)*count/count != len(s) {
540 panic("strings: Repeat count causes overflow")
541 }
542
543 b := make([]byte, len(s)*count)
544 bp := copy(b, s)
545 for bp < len(b) {
546 copy(b[bp:], b[:bp])
547 bp *= 2
548 }
549 return string(b)
550 }
551
552
553 func ToUpper(s string) string {
554 isASCII, hasLower := true, false
555 for i := 0; i < len(s); i++ {
556 c := s[i]
557 if c >= utf8.RuneSelf {
558 isASCII = false
559 break
560 }
561 hasLower = hasLower || (c >= 'a' && c <= 'z')
562 }
563
564 if isASCII {
565 if !hasLower {
566 return s
567 }
568 b := make([]byte, len(s))
569 for i := 0; i < len(s); i++ {
570 c := s[i]
571 if c >= 'a' && c <= 'z' {
572 c -= 'a' - 'A'
573 }
574 b[i] = c
575 }
576 return string(b)
577 }
578 return Map(unicode.ToUpper, s)
579 }
580
581
582 func ToLower(s string) string {
583 isASCII, hasUpper := true, false
584 for i := 0; i < len(s); i++ {
585 c := s[i]
586 if c >= utf8.RuneSelf {
587 isASCII = false
588 break
589 }
590 hasUpper = hasUpper || (c >= 'A' && c <= 'Z')
591 }
592
593 if isASCII {
594 if !hasUpper {
595 return s
596 }
597 b := make([]byte, len(s))
598 for i := 0; i < len(s); i++ {
599 c := s[i]
600 if c >= 'A' && c <= 'Z' {
601 c += 'a' - 'A'
602 }
603 b[i] = c
604 }
605 return string(b)
606 }
607 return Map(unicode.ToLower, s)
608 }
609
610
611 func ToTitle(s string) string { return Map(unicode.ToTitle, s) }
612
613
614
615 func ToUpperSpecial(c unicode.SpecialCase, s string) string {
616 return Map(func(r rune) rune { return c.ToUpper(r) }, s)
617 }
618
619
620
621 func ToLowerSpecial(c unicode.SpecialCase, s string) string {
622 return Map(func(r rune) rune { return c.ToLower(r) }, s)
623 }
624
625
626
627 func ToTitleSpecial(c unicode.SpecialCase, s string) string {
628 return Map(func(r rune) rune { return c.ToTitle(r) }, s)
629 }
630
631
632
633 func isSeparator(r rune) bool {
634
635 if r <= 0x7F {
636 switch {
637 case '0' <= r && r <= '9':
638 return false
639 case 'a' <= r && r <= 'z':
640 return false
641 case 'A' <= r && r <= 'Z':
642 return false
643 case r == '_':
644 return false
645 }
646 return true
647 }
648
649 if unicode.IsLetter(r) || unicode.IsDigit(r) {
650 return false
651 }
652
653 return unicode.IsSpace(r)
654 }
655
656
657
658
659
660 func Title(s string) string {
661
662
663
664 prev := ' '
665 return Map(
666 func(r rune) rune {
667 if isSeparator(prev) {
668 prev = r
669 return unicode.ToTitle(r)
670 }
671 prev = r
672 return r
673 },
674 s)
675 }
676
677
678
679 func TrimLeftFunc(s string, f func(rune) bool) string {
680 i := indexFunc(s, f, false)
681 if i == -1 {
682 return ""
683 }
684 return s[i:]
685 }
686
687
688
689 func TrimRightFunc(s string, f func(rune) bool) string {
690 i := lastIndexFunc(s, f, false)
691 if i >= 0 && s[i] >= utf8.RuneSelf {
692 _, wid := utf8.DecodeRuneInString(s[i:])
693 i += wid
694 } else {
695 i++
696 }
697 return s[0:i]
698 }
699
700
701
702 func TrimFunc(s string, f func(rune) bool) string {
703 return TrimRightFunc(TrimLeftFunc(s, f), f)
704 }
705
706
707
708 func IndexFunc(s string, f func(rune) bool) int {
709 return indexFunc(s, f, true)
710 }
711
712
713
714 func LastIndexFunc(s string, f func(rune) bool) int {
715 return lastIndexFunc(s, f, true)
716 }
717
718
719
720
721 func indexFunc(s string, f func(rune) bool, truth bool) int {
722 for i, r := range s {
723 if f(r) == truth {
724 return i
725 }
726 }
727 return -1
728 }
729
730
731
732
733 func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
734 for i := len(s); i > 0; {
735 r, size := utf8.DecodeLastRuneInString(s[0:i])
736 i -= size
737 if f(r) == truth {
738 return i
739 }
740 }
741 return -1
742 }
743
744
745
746
747
748
749
750 type asciiSet [8]uint32
751
752
753
754 func makeASCIISet(chars string) (as asciiSet, ok bool) {
755 for i := 0; i < len(chars); i++ {
756 c := chars[i]
757 if c >= utf8.RuneSelf {
758 return as, false
759 }
760 as[c>>5] |= 1 << uint(c&31)
761 }
762 return as, true
763 }
764
765
766 func (as *asciiSet) contains(c byte) bool {
767 return (as[c>>5] & (1 << uint(c&31))) != 0
768 }
769
770 func makeCutsetFunc(cutset string) func(rune) bool {
771 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
772 return func(r rune) bool {
773 return r == rune(cutset[0])
774 }
775 }
776 if as, isASCII := makeASCIISet(cutset); isASCII {
777 return func(r rune) bool {
778 return r < utf8.RuneSelf && as.contains(byte(r))
779 }
780 }
781 return func(r rune) bool { return IndexRune(cutset, r) >= 0 }
782 }
783
784
785
786 func Trim(s string, cutset string) string {
787 if s == "" || cutset == "" {
788 return s
789 }
790 return TrimFunc(s, makeCutsetFunc(cutset))
791 }
792
793
794
795 func TrimLeft(s string, cutset string) string {
796 if s == "" || cutset == "" {
797 return s
798 }
799 return TrimLeftFunc(s, makeCutsetFunc(cutset))
800 }
801
802
803
804 func TrimRight(s string, cutset string) string {
805 if s == "" || cutset == "" {
806 return s
807 }
808 return TrimRightFunc(s, makeCutsetFunc(cutset))
809 }
810
811
812
813 func TrimSpace(s string) string {
814 return TrimFunc(s, unicode.IsSpace)
815 }
816
817
818
819 func TrimPrefix(s, prefix string) string {
820 if HasPrefix(s, prefix) {
821 return s[len(prefix):]
822 }
823 return s
824 }
825
826
827
828 func TrimSuffix(s, suffix string) string {
829 if HasSuffix(s, suffix) {
830 return s[:len(s)-len(suffix)]
831 }
832 return s
833 }
834
835
836
837
838
839
840
841 func Replace(s, old, new string, n int) string {
842 if old == new || n == 0 {
843 return s
844 }
845
846
847 if m := Count(s, old); m == 0 {
848 return s
849 } else if n < 0 || m < n {
850 n = m
851 }
852
853
854 t := make([]byte, len(s)+n*(len(new)-len(old)))
855 w := 0
856 start := 0
857 for i := 0; i < n; i++ {
858 j := start
859 if len(old) == 0 {
860 if i > 0 {
861 _, wid := utf8.DecodeRuneInString(s[start:])
862 j += wid
863 }
864 } else {
865 j += Index(s[start:], old)
866 }
867 w += copy(t[w:], s[start:j])
868 w += copy(t[w:], new)
869 start = j + len(old)
870 }
871 w += copy(t[w:], s[start:])
872 return string(t[0:w])
873 }
874
875
876
877 func EqualFold(s, t string) bool {
878 for s != "" && t != "" {
879
880 var sr, tr rune
881 if s[0] < utf8.RuneSelf {
882 sr, s = rune(s[0]), s[1:]
883 } else {
884 r, size := utf8.DecodeRuneInString(s)
885 sr, s = r, s[size:]
886 }
887 if t[0] < utf8.RuneSelf {
888 tr, t = rune(t[0]), t[1:]
889 } else {
890 r, size := utf8.DecodeRuneInString(t)
891 tr, t = r, t[size:]
892 }
893
894
895
896
897 if tr == sr {
898 continue
899 }
900
901
902 if tr < sr {
903 tr, sr = sr, tr
904 }
905
906 if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' {
907
908 if tr == sr+'a'-'A' {
909 continue
910 }
911 return false
912 }
913
914
915
916 r := unicode.SimpleFold(sr)
917 for r != sr && r < tr {
918 r = unicode.SimpleFold(r)
919 }
920 if r == tr {
921 continue
922 }
923 return false
924 }
925
926
927 return s == t
928 }
929
930 func indexRabinKarp(s, substr string) int {
931
932 hashss, pow := hashStr(substr)
933 n := len(substr)
934 var h uint32
935 for i := 0; i < n; i++ {
936 h = h*primeRK + uint32(s[i])
937 }
938 if h == hashss && s[:n] == substr {
939 return 0
940 }
941 for i := n; i < len(s); {
942 h *= primeRK
943 h += uint32(s[i])
944 h -= pow * uint32(s[i-n])
945 i++
946 if h == hashss && s[i-n:i] == substr {
947 return i - n
948 }
949 }
950 return -1
951
952 }
953
View as plain text