00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055 static const struct cname
00056 {
00057 const char *name;
00058 const char code;
00059 } cnames[] =
00060
00061 {
00062 {
00063 "NUL", '\0'
00064 },
00065 {
00066 "SOH", '\001'
00067 },
00068 {
00069 "STX", '\002'
00070 },
00071 {
00072 "ETX", '\003'
00073 },
00074 {
00075 "EOT", '\004'
00076 },
00077 {
00078 "ENQ", '\005'
00079 },
00080 {
00081 "ACK", '\006'
00082 },
00083 {
00084 "BEL", '\007'
00085 },
00086 {
00087 "alert", '\007'
00088 },
00089 {
00090 "BS", '\010'
00091 },
00092 {
00093 "backspace", '\b'
00094 },
00095 {
00096 "HT", '\011'
00097 },
00098 {
00099 "tab", '\t'
00100 },
00101 {
00102 "LF", '\012'
00103 },
00104 {
00105 "newline", '\n'
00106 },
00107 {
00108 "VT", '\013'
00109 },
00110 {
00111 "vertical-tab", '\v'
00112 },
00113 {
00114 "FF", '\014'
00115 },
00116 {
00117 "form-feed", '\f'
00118 },
00119 {
00120 "CR", '\015'
00121 },
00122 {
00123 "carriage-return", '\r'
00124 },
00125 {
00126 "SO", '\016'
00127 },
00128 {
00129 "SI", '\017'
00130 },
00131 {
00132 "DLE", '\020'
00133 },
00134 {
00135 "DC1", '\021'
00136 },
00137 {
00138 "DC2", '\022'
00139 },
00140 {
00141 "DC3", '\023'
00142 },
00143 {
00144 "DC4", '\024'
00145 },
00146 {
00147 "NAK", '\025'
00148 },
00149 {
00150 "SYN", '\026'
00151 },
00152 {
00153 "ETB", '\027'
00154 },
00155 {
00156 "CAN", '\030'
00157 },
00158 {
00159 "EM", '\031'
00160 },
00161 {
00162 "SUB", '\032'
00163 },
00164 {
00165 "ESC", '\033'
00166 },
00167 {
00168 "IS4", '\034'
00169 },
00170 {
00171 "FS", '\034'
00172 },
00173 {
00174 "IS3", '\035'
00175 },
00176 {
00177 "GS", '\035'
00178 },
00179 {
00180 "IS2", '\036'
00181 },
00182 {
00183 "RS", '\036'
00184 },
00185 {
00186 "IS1", '\037'
00187 },
00188 {
00189 "US", '\037'
00190 },
00191 {
00192 "space", ' '
00193 },
00194 {
00195 "exclamation-mark", '!'
00196 },
00197 {
00198 "quotation-mark", '"'
00199 },
00200 {
00201 "number-sign", '#'
00202 },
00203 {
00204 "dollar-sign", '$'
00205 },
00206 {
00207 "percent-sign", '%'
00208 },
00209 {
00210 "ampersand", '&'
00211 },
00212 {
00213 "apostrophe", '\''
00214 },
00215 {
00216 "left-parenthesis", '('
00217 },
00218 {
00219 "right-parenthesis", ')'
00220 },
00221 {
00222 "asterisk", '*'
00223 },
00224 {
00225 "plus-sign", '+'
00226 },
00227 {
00228 "comma", ','
00229 },
00230 {
00231 "hyphen", '-'
00232 },
00233 {
00234 "hyphen-minus", '-'
00235 },
00236 {
00237 "period", '.'
00238 },
00239 {
00240 "full-stop", '.'
00241 },
00242 {
00243 "slash", '/'
00244 },
00245 {
00246 "solidus", '/'
00247 },
00248 {
00249 "zero", '0'
00250 },
00251 {
00252 "one", '1'
00253 },
00254 {
00255 "two", '2'
00256 },
00257 {
00258 "three", '3'
00259 },
00260 {
00261 "four", '4'
00262 },
00263 {
00264 "five", '5'
00265 },
00266 {
00267 "six", '6'
00268 },
00269 {
00270 "seven", '7'
00271 },
00272 {
00273 "eight", '8'
00274 },
00275 {
00276 "nine", '9'
00277 },
00278 {
00279 "colon", ':'
00280 },
00281 {
00282 "semicolon", ';'
00283 },
00284 {
00285 "less-than-sign", '<'
00286 },
00287 {
00288 "equals-sign", '='
00289 },
00290 {
00291 "greater-than-sign", '>'
00292 },
00293 {
00294 "question-mark", '?'
00295 },
00296 {
00297 "commercial-at", '@'
00298 },
00299 {
00300 "left-square-bracket", '['
00301 },
00302 {
00303 "backslash", '\\'
00304 },
00305 {
00306 "reverse-solidus", '\\'
00307 },
00308 {
00309 "right-square-bracket", ']'
00310 },
00311 {
00312 "circumflex", '^'
00313 },
00314 {
00315 "circumflex-accent", '^'
00316 },
00317 {
00318 "underscore", '_'
00319 },
00320 {
00321 "low-line", '_'
00322 },
00323 {
00324 "grave-accent", '`'
00325 },
00326 {
00327 "left-brace", '{'
00328 },
00329 {
00330 "left-curly-bracket", '{'
00331 },
00332 {
00333 "vertical-line", '|'
00334 },
00335 {
00336 "right-brace", '}'
00337 },
00338 {
00339 "right-curly-bracket", '}'
00340 },
00341 {
00342 "tilde", '~'
00343 },
00344 {
00345 "DEL", '\177'
00346 },
00347 {
00348 NULL, 0
00349 }
00350 };
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366 static celt
00367 element(struct vars * v,
00368 const chr *startp,
00369 const chr *endp)
00370 {
00371 const struct cname *cn;
00372 size_t len;
00373
00374
00375 assert(startp < endp);
00376 len = endp - startp;
00377 if (len == 1)
00378 return *startp;
00379
00380 NOTE(REG_ULOCALE);
00381
00382
00383 for (cn = cnames; cn->name != NULL; cn++)
00384 {
00385 if (strlen(cn->name) == len &&
00386 pg_char_and_wchar_strncmp(cn->name, startp, len) == 0)
00387 {
00388 break;
00389 }
00390 }
00391 if (cn->name != NULL)
00392 return CHR(cn->code);
00393
00394
00395 ERR(REG_ECOLLATE);
00396 return 0;
00397 }
00398
00399
00400
00401
00402 static struct cvec *
00403 range(struct vars * v,
00404 celt a,
00405 celt b,
00406 int cases)
00407 {
00408 int nchrs;
00409 struct cvec *cv;
00410 celt c,
00411 lc,
00412 uc;
00413
00414 if (a != b && !before(a, b))
00415 {
00416 ERR(REG_ERANGE);
00417 return NULL;
00418 }
00419
00420 if (!cases)
00421 {
00422 cv = getcvec(v, 0, 1);
00423 NOERRN();
00424 addrange(cv, a, b);
00425 return cv;
00426 }
00427
00428
00429
00430
00431
00432
00433
00434 nchrs = (b - a + 1) * 2 + 4;
00435
00436 cv = getcvec(v, nchrs, 0);
00437 NOERRN();
00438
00439 for (c = a; c <= b; c++)
00440 {
00441 addchr(cv, c);
00442 lc = pg_wc_tolower((chr) c);
00443 if (c != lc)
00444 addchr(cv, lc);
00445 uc = pg_wc_toupper((chr) c);
00446 if (c != uc)
00447 addchr(cv, uc);
00448 }
00449
00450 return cv;
00451 }
00452
00453
00454
00455
00456 static int
00457 before(celt x, celt y)
00458 {
00459 if (x < y)
00460 return 1;
00461 return 0;
00462 }
00463
00464
00465
00466
00467
00468 static struct cvec *
00469 eclass(struct vars * v,
00470 celt c,
00471
00472 int cases)
00473 {
00474 struct cvec *cv;
00475
00476
00477 if ((v->cflags & REG_FAKE) && c == 'x')
00478 {
00479 cv = getcvec(v, 4, 0);
00480 addchr(cv, (chr) 'x');
00481 addchr(cv, (chr) 'y');
00482 if (cases)
00483 {
00484 addchr(cv, (chr) 'X');
00485 addchr(cv, (chr) 'Y');
00486 }
00487 return cv;
00488 }
00489
00490
00491 if (cases)
00492 return allcases(v, c);
00493 cv = getcvec(v, 1, 0);
00494 assert(cv != NULL);
00495 addchr(cv, (chr) c);
00496 return cv;
00497 }
00498
00499
00500
00501
00502
00503
00504
00505
00506
00507
00508 static struct cvec *
00509 cclass(struct vars * v,
00510 const chr *startp,
00511 const chr *endp,
00512 int cases)
00513 {
00514 size_t len;
00515 struct cvec *cv = NULL;
00516 const char *const * namePtr;
00517 int i,
00518 index;
00519
00520
00521
00522
00523
00524 static const char *const classNames[] = {
00525 "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
00526 "lower", "print", "punct", "space", "upper", "xdigit", NULL
00527 };
00528
00529 enum classes
00530 {
00531 CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
00532 CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT
00533 };
00534
00535
00536
00537
00538 len = endp - startp;
00539 index = -1;
00540 for (namePtr = classNames, i = 0; *namePtr != NULL; namePtr++, i++)
00541 {
00542 if (strlen(*namePtr) == len &&
00543 pg_char_and_wchar_strncmp(*namePtr, startp, len) == 0)
00544 {
00545 index = i;
00546 break;
00547 }
00548 }
00549 if (index == -1)
00550 {
00551 ERR(REG_ECTYPE);
00552 return NULL;
00553 }
00554
00555
00556
00557
00558
00559 if (cases &&
00560 ((enum classes) index == CC_LOWER ||
00561 (enum classes) index == CC_UPPER))
00562 index = (int) CC_ALPHA;
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572 switch ((enum classes) index)
00573 {
00574 case CC_PRINT:
00575 cv = pg_ctype_get_cache(pg_wc_isprint);
00576 break;
00577 case CC_ALNUM:
00578 cv = pg_ctype_get_cache(pg_wc_isalnum);
00579 break;
00580 case CC_ALPHA:
00581 cv = pg_ctype_get_cache(pg_wc_isalpha);
00582 break;
00583 case CC_ASCII:
00584
00585 cv = getcvec(v, 0, 1);
00586 if (cv)
00587 addrange(cv, 0, 0x7f);
00588 break;
00589 case CC_BLANK:
00590
00591 cv = getcvec(v, 2, 0);
00592 addchr(cv, '\t');
00593 addchr(cv, ' ');
00594 break;
00595 case CC_CNTRL:
00596
00597 cv = getcvec(v, 0, 2);
00598 addrange(cv, 0x0, 0x1f);
00599 addrange(cv, 0x7f, 0x9f);
00600 break;
00601 case CC_DIGIT:
00602 cv = pg_ctype_get_cache(pg_wc_isdigit);
00603 break;
00604 case CC_PUNCT:
00605 cv = pg_ctype_get_cache(pg_wc_ispunct);
00606 break;
00607 case CC_XDIGIT:
00608
00609
00610
00611
00612
00613
00614 cv = getcvec(v, 0, 3);
00615 if (cv)
00616 {
00617 addrange(cv, '0', '9');
00618 addrange(cv, 'a', 'f');
00619 addrange(cv, 'A', 'F');
00620 }
00621 break;
00622 case CC_SPACE:
00623 cv = pg_ctype_get_cache(pg_wc_isspace);
00624 break;
00625 case CC_LOWER:
00626 cv = pg_ctype_get_cache(pg_wc_islower);
00627 break;
00628 case CC_UPPER:
00629 cv = pg_ctype_get_cache(pg_wc_isupper);
00630 break;
00631 case CC_GRAPH:
00632 cv = pg_ctype_get_cache(pg_wc_isgraph);
00633 break;
00634 }
00635
00636
00637 if (cv == NULL)
00638 ERR(REG_ESPACE);
00639 return cv;
00640 }
00641
00642
00643
00644
00645
00646
00647
00648 static struct cvec *
00649 allcases(struct vars * v,
00650 chr pc)
00651 {
00652 struct cvec *cv;
00653 chr c = (chr) pc;
00654 chr lc,
00655 uc;
00656
00657 lc = pg_wc_tolower((chr) c);
00658 uc = pg_wc_toupper((chr) c);
00659
00660 cv = getcvec(v, 2, 0);
00661 addchr(cv, lc);
00662 if (lc != uc)
00663 addchr(cv, uc);
00664 return cv;
00665 }
00666
00667
00668
00669
00670
00671
00672
00673
00674
00675 static int
00676 cmp(const chr *x, const chr *y,
00677 size_t len)
00678 {
00679 return memcmp(VS(x), VS(y), len * sizeof(chr));
00680 }
00681
00682
00683
00684
00685
00686
00687
00688
00689
00690 static int
00691 casecmp(const chr *x, const chr *y,
00692 size_t len)
00693 {
00694 for (; len > 0; len--, x++, y++)
00695 {
00696 if ((*x != *y) && (pg_wc_tolower(*x) != pg_wc_tolower(*y)))
00697 return 1;
00698 }
00699 return 0;
00700 }