...
Source file
src/unicode/letter.go
Documentation: unicode
1
2
3
4
5
6
7 package unicode
8
9
10
11
12 const (
13 MaxRune = '\U0010FFFF'
14 ReplacementChar = '\uFFFD'
15 MaxASCII = '\u007F'
16 MaxLatin1 = '\u00FF'
17 )
18
19
20
21
22
23
24 type RangeTable struct {
25 R16 []Range16
26 R32 []Range32
27 LatinOffset int
28 }
29
30
31
32 type Range16 struct {
33 Lo uint16
34 Hi uint16
35 Stride uint16
36 }
37
38
39
40
41 type Range32 struct {
42 Lo uint32
43 Hi uint32
44 Stride uint32
45 }
46
47
48
49
50
51
52
53
54
55
56
57 type CaseRange struct {
58 Lo uint32
59 Hi uint32
60 Delta d
61 }
62
63
64
65 type SpecialCase []CaseRange
66
67
68
69
70
71 const (
72 UpperCase = iota
73 LowerCase
74 TitleCase
75 MaxCase
76 )
77
78 type d [MaxCase]rune
79
80
81
82
83 const (
84 UpperLower = MaxRune + 1
85 )
86
87
88
89 const linearMax = 18
90
91
92 func is16(ranges []Range16, r uint16) bool {
93 if len(ranges) <= linearMax || r <= MaxLatin1 {
94 for i := range ranges {
95 range_ := &ranges[i]
96 if r < range_.Lo {
97 return false
98 }
99 if r <= range_.Hi {
100 return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
101 }
102 }
103 return false
104 }
105
106
107 lo := 0
108 hi := len(ranges)
109 for lo < hi {
110 m := lo + (hi-lo)/2
111 range_ := &ranges[m]
112 if range_.Lo <= r && r <= range_.Hi {
113 return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
114 }
115 if r < range_.Lo {
116 hi = m
117 } else {
118 lo = m + 1
119 }
120 }
121 return false
122 }
123
124
125 func is32(ranges []Range32, r uint32) bool {
126 if len(ranges) <= linearMax {
127 for i := range ranges {
128 range_ := &ranges[i]
129 if r < range_.Lo {
130 return false
131 }
132 if r <= range_.Hi {
133 return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
134 }
135 }
136 return false
137 }
138
139
140 lo := 0
141 hi := len(ranges)
142 for lo < hi {
143 m := lo + (hi-lo)/2
144 range_ := ranges[m]
145 if range_.Lo <= r && r <= range_.Hi {
146 return range_.Stride == 1 || (r-range_.Lo)%range_.Stride == 0
147 }
148 if r < range_.Lo {
149 hi = m
150 } else {
151 lo = m + 1
152 }
153 }
154 return false
155 }
156
157
158 func Is(rangeTab *RangeTable, r rune) bool {
159 r16 := rangeTab.R16
160 if len(r16) > 0 && r <= rune(r16[len(r16)-1].Hi) {
161 return is16(r16, uint16(r))
162 }
163 r32 := rangeTab.R32
164 if len(r32) > 0 && r >= rune(r32[0].Lo) {
165 return is32(r32, uint32(r))
166 }
167 return false
168 }
169
170 func isExcludingLatin(rangeTab *RangeTable, r rune) bool {
171 r16 := rangeTab.R16
172 if off := rangeTab.LatinOffset; len(r16) > off && r <= rune(r16[len(r16)-1].Hi) {
173 return is16(r16[off:], uint16(r))
174 }
175 r32 := rangeTab.R32
176 if len(r32) > 0 && r >= rune(r32[0].Lo) {
177 return is32(r32, uint32(r))
178 }
179 return false
180 }
181
182
183 func IsUpper(r rune) bool {
184
185 if uint32(r) <= MaxLatin1 {
186 return properties[uint8(r)]&pLmask == pLu
187 }
188 return isExcludingLatin(Upper, r)
189 }
190
191
192 func IsLower(r rune) bool {
193
194 if uint32(r) <= MaxLatin1 {
195 return properties[uint8(r)]&pLmask == pLl
196 }
197 return isExcludingLatin(Lower, r)
198 }
199
200
201 func IsTitle(r rune) bool {
202 if r <= MaxLatin1 {
203 return false
204 }
205 return isExcludingLatin(Title, r)
206 }
207
208
209 func to(_case int, r rune, caseRange []CaseRange) rune {
210 if _case < 0 || MaxCase <= _case {
211 return ReplacementChar
212 }
213
214 lo := 0
215 hi := len(caseRange)
216 for lo < hi {
217 m := lo + (hi-lo)/2
218 cr := caseRange[m]
219 if rune(cr.Lo) <= r && r <= rune(cr.Hi) {
220 delta := cr.Delta[_case]
221 if delta > MaxRune {
222
223
224
225
226
227
228
229
230
231
232 return rune(cr.Lo) + ((r-rune(cr.Lo))&^1 | rune(_case&1))
233 }
234 return r + delta
235 }
236 if r < rune(cr.Lo) {
237 hi = m
238 } else {
239 lo = m + 1
240 }
241 }
242 return r
243 }
244
245
246 func To(_case int, r rune) rune {
247 return to(_case, r, CaseRanges)
248 }
249
250
251 func ToUpper(r rune) rune {
252 if r <= MaxASCII {
253 if 'a' <= r && r <= 'z' {
254 r -= 'a' - 'A'
255 }
256 return r
257 }
258 return To(UpperCase, r)
259 }
260
261
262 func ToLower(r rune) rune {
263 if r <= MaxASCII {
264 if 'A' <= r && r <= 'Z' {
265 r += 'a' - 'A'
266 }
267 return r
268 }
269 return To(LowerCase, r)
270 }
271
272
273 func ToTitle(r rune) rune {
274 if r <= MaxASCII {
275 if 'a' <= r && r <= 'z' {
276 r -= 'a' - 'A'
277 }
278 return r
279 }
280 return To(TitleCase, r)
281 }
282
283
284 func (special SpecialCase) ToUpper(r rune) rune {
285 r1 := to(UpperCase, r, []CaseRange(special))
286 if r1 == r {
287 r1 = ToUpper(r)
288 }
289 return r1
290 }
291
292
293 func (special SpecialCase) ToTitle(r rune) rune {
294 r1 := to(TitleCase, r, []CaseRange(special))
295 if r1 == r {
296 r1 = ToTitle(r)
297 }
298 return r1
299 }
300
301
302 func (special SpecialCase) ToLower(r rune) rune {
303 r1 := to(LowerCase, r, []CaseRange(special))
304 if r1 == r {
305 r1 = ToLower(r)
306 }
307 return r1
308 }
309
310
311
312
313
314 type foldPair struct {
315 From uint16
316 To uint16
317 }
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337 func SimpleFold(r rune) rune {
338 if r < 0 || r > MaxRune {
339 return r
340 }
341
342 if int(r) < len(asciiFold) {
343 return rune(asciiFold[r])
344 }
345
346
347 lo := 0
348 hi := len(caseOrbit)
349 for lo < hi {
350 m := lo + (hi-lo)/2
351 if rune(caseOrbit[m].From) < r {
352 lo = m + 1
353 } else {
354 hi = m
355 }
356 }
357 if lo < len(caseOrbit) && rune(caseOrbit[lo].From) == r {
358 return rune(caseOrbit[lo].To)
359 }
360
361
362
363
364 if l := ToLower(r); l != r {
365 return l
366 }
367 return ToUpper(r)
368 }
369
View as plain text