00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 #define ATEOS() (v->now >= v->stop)
00037 #define HAVE(n) (v->stop - v->now >= (n))
00038 #define NEXT1(c) (!ATEOS() && *v->now == CHR(c))
00039 #define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
00040 #define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \
00041 *(v->now+1) == CHR(b) && \
00042 *(v->now+2) == CHR(c))
00043 #define SET(c) (v->nexttype = (c))
00044 #define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n))
00045 #define RET(c) return (SET(c), 1)
00046 #define RETV(c, n) return (SETV(c, n), 1)
00047 #define FAILW(e) return (ERR(e), 0)
00048 #define LASTTYPE(t) (v->lasttype == (t))
00049
00050
00051 #define L_ERE 1
00052 #define L_BRE 2
00053 #define L_Q 3
00054 #define L_EBND 4
00055 #define L_BBND 5
00056 #define L_BRACK 6
00057 #define L_CEL 7
00058 #define L_ECL 8
00059 #define L_CCL 9
00060 #define INTOCON(c) (v->lexcon = (c))
00061 #define INCON(con) (v->lexcon == (con))
00062
00063
00064 #define ENDOF(array) ((array) + sizeof(array)/sizeof(chr))
00065
00066
00067
00068
00069 static void
00070 lexstart(struct vars * v)
00071 {
00072 prefixes(v);
00073 NOERR();
00074
00075 if (v->cflags & REG_QUOTE)
00076 {
00077 assert(!(v->cflags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE)));
00078 INTOCON(L_Q);
00079 }
00080 else if (v->cflags & REG_EXTENDED)
00081 {
00082 assert(!(v->cflags & REG_QUOTE));
00083 INTOCON(L_ERE);
00084 }
00085 else
00086 {
00087 assert(!(v->cflags & (REG_QUOTE | REG_ADVF)));
00088 INTOCON(L_BRE);
00089 }
00090
00091 v->nexttype = EMPTY;
00092 next(v);
00093 }
00094
00095
00096
00097
00098 static void
00099 prefixes(struct vars * v)
00100 {
00101
00102 if (v->cflags & REG_QUOTE)
00103 return;
00104
00105
00106 if (HAVE(4) && NEXT3('*', '*', '*'))
00107 switch (*(v->now + 3))
00108 {
00109 case CHR('?'):
00110 ERR(REG_BADPAT);
00111 return;
00112 break;
00113 case CHR('='):
00114 NOTE(REG_UNONPOSIX);
00115 v->cflags |= REG_QUOTE;
00116 v->cflags &= ~(REG_ADVANCED | REG_EXPANDED | REG_NEWLINE);
00117 v->now += 4;
00118 return;
00119 break;
00120 case CHR(':'):
00121 NOTE(REG_UNONPOSIX);
00122 v->cflags |= REG_ADVANCED;
00123 v->now += 4;
00124 break;
00125 default:
00126 ERR(REG_BADRPT);
00127 return;
00128 break;
00129 }
00130
00131
00132 if ((v->cflags & REG_ADVANCED) != REG_ADVANCED)
00133 return;
00134
00135
00136 if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2)))
00137 {
00138 NOTE(REG_UNONPOSIX);
00139 v->now += 2;
00140 for (; !ATEOS() && iscalpha(*v->now); v->now++)
00141 switch (*v->now)
00142 {
00143 case CHR('b'):
00144 v->cflags &= ~(REG_ADVANCED | REG_QUOTE);
00145 break;
00146 case CHR('c'):
00147 v->cflags &= ~REG_ICASE;
00148 break;
00149 case CHR('e'):
00150 v->cflags |= REG_EXTENDED;
00151 v->cflags &= ~(REG_ADVF | REG_QUOTE);
00152 break;
00153 case CHR('i'):
00154 v->cflags |= REG_ICASE;
00155 break;
00156 case CHR('m'):
00157 case CHR('n'):
00158 v->cflags |= REG_NEWLINE;
00159 break;
00160 case CHR('p'):
00161 v->cflags |= REG_NLSTOP;
00162 v->cflags &= ~REG_NLANCH;
00163 break;
00164 case CHR('q'):
00165 v->cflags |= REG_QUOTE;
00166 v->cflags &= ~REG_ADVANCED;
00167 break;
00168 case CHR('s'):
00169 v->cflags &= ~REG_NEWLINE;
00170 break;
00171 case CHR('t'):
00172 v->cflags &= ~REG_EXPANDED;
00173 break;
00174 case CHR('w'):
00175 v->cflags &= ~REG_NLSTOP;
00176 v->cflags |= REG_NLANCH;
00177 break;
00178 case CHR('x'):
00179 v->cflags |= REG_EXPANDED;
00180 break;
00181 default:
00182 ERR(REG_BADOPT);
00183 return;
00184 }
00185 if (!NEXT1(')'))
00186 {
00187 ERR(REG_BADOPT);
00188 return;
00189 }
00190 v->now++;
00191 if (v->cflags & REG_QUOTE)
00192 v->cflags &= ~(REG_EXPANDED | REG_NEWLINE);
00193 }
00194 }
00195
00196
00197
00198
00199
00200
00201
00202 static void
00203 lexnest(struct vars * v,
00204 const chr *beginp,
00205 const chr *endp)
00206 {
00207 assert(v->savenow == NULL);
00208 v->savenow = v->now;
00209 v->savestop = v->stop;
00210 v->now = beginp;
00211 v->stop = endp;
00212 }
00213
00214
00215
00216
00217 static const chr backd[] = {
00218 CHR('['), CHR('['), CHR(':'),
00219 CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
00220 CHR(':'), CHR(']'), CHR(']')
00221 };
00222 static const chr backD[] = {
00223 CHR('['), CHR('^'), CHR('['), CHR(':'),
00224 CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
00225 CHR(':'), CHR(']'), CHR(']')
00226 };
00227 static const chr brbackd[] = {
00228 CHR('['), CHR(':'),
00229 CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
00230 CHR(':'), CHR(']')
00231 };
00232 static const chr backs[] = {
00233 CHR('['), CHR('['), CHR(':'),
00234 CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
00235 CHR(':'), CHR(']'), CHR(']')
00236 };
00237 static const chr backS[] = {
00238 CHR('['), CHR('^'), CHR('['), CHR(':'),
00239 CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
00240 CHR(':'), CHR(']'), CHR(']')
00241 };
00242 static const chr brbacks[] = {
00243 CHR('['), CHR(':'),
00244 CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
00245 CHR(':'), CHR(']')
00246 };
00247 static const chr backw[] = {
00248 CHR('['), CHR('['), CHR(':'),
00249 CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
00250 CHR(':'), CHR(']'), CHR('_'), CHR(']')
00251 };
00252 static const chr backW[] = {
00253 CHR('['), CHR('^'), CHR('['), CHR(':'),
00254 CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
00255 CHR(':'), CHR(']'), CHR('_'), CHR(']')
00256 };
00257 static const chr brbackw[] = {
00258 CHR('['), CHR(':'),
00259 CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
00260 CHR(':'), CHR(']'), CHR('_')
00261 };
00262
00263
00264
00265
00266
00267 static void
00268 lexword(struct vars * v)
00269 {
00270 lexnest(v, backw, ENDOF(backw));
00271 }
00272
00273
00274
00275
00276 static int
00277 next(struct vars * v)
00278 {
00279 chr c;
00280
00281
00282 if (ISERR())
00283 return 0;
00284
00285
00286 v->lasttype = v->nexttype;
00287
00288
00289 if (v->nexttype == EMPTY && (v->cflags & REG_BOSONLY))
00290 {
00291
00292 RETV(SBEGIN, 0);
00293 }
00294
00295
00296 if (v->savenow != NULL && ATEOS())
00297 {
00298 v->now = v->savenow;
00299 v->stop = v->savestop;
00300 v->savenow = v->savestop = NULL;
00301 }
00302
00303
00304 if (v->cflags & REG_EXPANDED)
00305 switch (v->lexcon)
00306 {
00307 case L_ERE:
00308 case L_BRE:
00309 case L_EBND:
00310 case L_BBND:
00311 skip(v);
00312 break;
00313 }
00314
00315
00316 if (ATEOS())
00317 {
00318 switch (v->lexcon)
00319 {
00320 case L_ERE:
00321 case L_BRE:
00322 case L_Q:
00323 RET(EOS);
00324 break;
00325 case L_EBND:
00326 case L_BBND:
00327 FAILW(REG_EBRACE);
00328 break;
00329 case L_BRACK:
00330 case L_CEL:
00331 case L_ECL:
00332 case L_CCL:
00333 FAILW(REG_EBRACK);
00334 break;
00335 }
00336 assert(NOTREACHED);
00337 }
00338
00339
00340 c = *v->now++;
00341
00342
00343 switch (v->lexcon)
00344 {
00345 case L_BRE:
00346 return brenext(v, c);
00347 break;
00348 case L_ERE:
00349 break;
00350 case L_Q:
00351 RETV(PLAIN, c);
00352 break;
00353 case L_BBND:
00354 case L_EBND:
00355 switch (c)
00356 {
00357 case CHR('0'):
00358 case CHR('1'):
00359 case CHR('2'):
00360 case CHR('3'):
00361 case CHR('4'):
00362 case CHR('5'):
00363 case CHR('6'):
00364 case CHR('7'):
00365 case CHR('8'):
00366 case CHR('9'):
00367 RETV(DIGIT, (chr) DIGITVAL(c));
00368 break;
00369 case CHR(','):
00370 RET(',');
00371 break;
00372 case CHR('}'):
00373 if (INCON(L_EBND))
00374 {
00375 INTOCON(L_ERE);
00376 if ((v->cflags & REG_ADVF) && NEXT1('?'))
00377 {
00378 v->now++;
00379 NOTE(REG_UNONPOSIX);
00380 RETV('}', 0);
00381 }
00382 RETV('}', 1);
00383 }
00384 else
00385 FAILW(REG_BADBR);
00386 break;
00387 case CHR('\\'):
00388 if (INCON(L_BBND) && NEXT1('}'))
00389 {
00390 v->now++;
00391 INTOCON(L_BRE);
00392 RET('}');
00393 }
00394 else
00395 FAILW(REG_BADBR);
00396 break;
00397 default:
00398 FAILW(REG_BADBR);
00399 break;
00400 }
00401 assert(NOTREACHED);
00402 break;
00403 case L_BRACK:
00404 switch (c)
00405 {
00406 case CHR(']'):
00407 if (LASTTYPE('['))
00408 RETV(PLAIN, c);
00409 else
00410 {
00411 INTOCON((v->cflags & REG_EXTENDED) ?
00412 L_ERE : L_BRE);
00413 RET(']');
00414 }
00415 break;
00416 case CHR('\\'):
00417 NOTE(REG_UBBS);
00418 if (!(v->cflags & REG_ADVF))
00419 RETV(PLAIN, c);
00420 NOTE(REG_UNONPOSIX);
00421 if (ATEOS())
00422 FAILW(REG_EESCAPE);
00423 (DISCARD) lexescape(v);
00424 switch (v->nexttype)
00425 {
00426 case PLAIN:
00427 return 1;
00428 break;
00429 case CCLASS:
00430 switch (v->nextvalue)
00431 {
00432 case 'd':
00433 lexnest(v, brbackd, ENDOF(brbackd));
00434 break;
00435 case 's':
00436 lexnest(v, brbacks, ENDOF(brbacks));
00437 break;
00438 case 'w':
00439 lexnest(v, brbackw, ENDOF(brbackw));
00440 break;
00441 default:
00442 FAILW(REG_EESCAPE);
00443 break;
00444 }
00445
00446 v->nexttype = v->lasttype;
00447 return next(v);
00448 break;
00449 }
00450
00451 FAILW(REG_EESCAPE);
00452 break;
00453 case CHR('-'):
00454 if (LASTTYPE('[') || NEXT1(']'))
00455 RETV(PLAIN, c);
00456 else
00457 RETV(RANGE, c);
00458 break;
00459 case CHR('['):
00460 if (ATEOS())
00461 FAILW(REG_EBRACK);
00462 switch (*v->now++)
00463 {
00464 case CHR('.'):
00465 INTOCON(L_CEL);
00466
00467 RET(COLLEL);
00468 break;
00469 case CHR('='):
00470 INTOCON(L_ECL);
00471 NOTE(REG_ULOCALE);
00472 RET(ECLASS);
00473 break;
00474 case CHR(':'):
00475 INTOCON(L_CCL);
00476 NOTE(REG_ULOCALE);
00477 RET(CCLASS);
00478 break;
00479 default:
00480 v->now--;
00481 RETV(PLAIN, c);
00482 break;
00483 }
00484 assert(NOTREACHED);
00485 break;
00486 default:
00487 RETV(PLAIN, c);
00488 break;
00489 }
00490 assert(NOTREACHED);
00491 break;
00492 case L_CEL:
00493 if (c == CHR('.') && NEXT1(']'))
00494 {
00495 v->now++;
00496 INTOCON(L_BRACK);
00497 RETV(END, '.');
00498 }
00499 else
00500 RETV(PLAIN, c);
00501 break;
00502 case L_ECL:
00503 if (c == CHR('=') && NEXT1(']'))
00504 {
00505 v->now++;
00506 INTOCON(L_BRACK);
00507 RETV(END, '=');
00508 }
00509 else
00510 RETV(PLAIN, c);
00511 break;
00512 case L_CCL:
00513 if (c == CHR(':') && NEXT1(']'))
00514 {
00515 v->now++;
00516 INTOCON(L_BRACK);
00517 RETV(END, ':');
00518 }
00519 else
00520 RETV(PLAIN, c);
00521 break;
00522 default:
00523 assert(NOTREACHED);
00524 break;
00525 }
00526
00527
00528 assert(INCON(L_ERE));
00529
00530
00531 switch (c)
00532 {
00533 case CHR('|'):
00534 RET('|');
00535 break;
00536 case CHR('*'):
00537 if ((v->cflags & REG_ADVF) && NEXT1('?'))
00538 {
00539 v->now++;
00540 NOTE(REG_UNONPOSIX);
00541 RETV('*', 0);
00542 }
00543 RETV('*', 1);
00544 break;
00545 case CHR('+'):
00546 if ((v->cflags & REG_ADVF) && NEXT1('?'))
00547 {
00548 v->now++;
00549 NOTE(REG_UNONPOSIX);
00550 RETV('+', 0);
00551 }
00552 RETV('+', 1);
00553 break;
00554 case CHR('?'):
00555 if ((v->cflags & REG_ADVF) && NEXT1('?'))
00556 {
00557 v->now++;
00558 NOTE(REG_UNONPOSIX);
00559 RETV('?', 0);
00560 }
00561 RETV('?', 1);
00562 break;
00563 case CHR('{'):
00564 if (v->cflags & REG_EXPANDED)
00565 skip(v);
00566 if (ATEOS() || !iscdigit(*v->now))
00567 {
00568 NOTE(REG_UBRACES);
00569 NOTE(REG_UUNSPEC);
00570 RETV(PLAIN, c);
00571 }
00572 else
00573 {
00574 NOTE(REG_UBOUNDS);
00575 INTOCON(L_EBND);
00576 RET('{');
00577 }
00578 assert(NOTREACHED);
00579 break;
00580 case CHR('('):
00581 if ((v->cflags & REG_ADVF) && NEXT1('?'))
00582 {
00583 NOTE(REG_UNONPOSIX);
00584 v->now++;
00585 switch (*v->now++)
00586 {
00587 case CHR(':'):
00588 RETV('(', 0);
00589 break;
00590 case CHR('#'):
00591 while (!ATEOS() && *v->now != CHR(')'))
00592 v->now++;
00593 if (!ATEOS())
00594 v->now++;
00595 assert(v->nexttype == v->lasttype);
00596 return next(v);
00597 break;
00598 case CHR('='):
00599 NOTE(REG_ULOOKAHEAD);
00600 RETV(LACON, 1);
00601 break;
00602 case CHR('!'):
00603 NOTE(REG_ULOOKAHEAD);
00604 RETV(LACON, 0);
00605 break;
00606 default:
00607 FAILW(REG_BADRPT);
00608 break;
00609 }
00610 assert(NOTREACHED);
00611 }
00612 if (v->cflags & REG_NOSUB)
00613 RETV('(', 0);
00614 else
00615 RETV('(', 1);
00616 break;
00617 case CHR(')'):
00618 if (LASTTYPE('('))
00619 NOTE(REG_UUNSPEC);
00620 RETV(')', c);
00621 break;
00622 case CHR('['):
00623 if (HAVE(6) && *(v->now + 0) == CHR('[') &&
00624 *(v->now + 1) == CHR(':') &&
00625 (*(v->now + 2) == CHR('<') ||
00626 *(v->now + 2) == CHR('>')) &&
00627 *(v->now + 3) == CHR(':') &&
00628 *(v->now + 4) == CHR(']') &&
00629 *(v->now + 5) == CHR(']'))
00630 {
00631 c = *(v->now + 2);
00632 v->now += 6;
00633 NOTE(REG_UNONPOSIX);
00634 RET((c == CHR('<')) ? '<' : '>');
00635 }
00636 INTOCON(L_BRACK);
00637 if (NEXT1('^'))
00638 {
00639 v->now++;
00640 RETV('[', 0);
00641 }
00642 RETV('[', 1);
00643 break;
00644 case CHR('.'):
00645 RET('.');
00646 break;
00647 case CHR('^'):
00648 RET('^');
00649 break;
00650 case CHR('$'):
00651 RET('$');
00652 break;
00653 case CHR('\\'):
00654 if (ATEOS())
00655 FAILW(REG_EESCAPE);
00656 break;
00657 default:
00658 RETV(PLAIN, c);
00659 break;
00660 }
00661
00662
00663 assert(!ATEOS());
00664 if (!(v->cflags & REG_ADVF))
00665 {
00666 if (iscalnum(*v->now))
00667 {
00668 NOTE(REG_UBSALNUM);
00669 NOTE(REG_UUNSPEC);
00670 }
00671 RETV(PLAIN, *v->now++);
00672 }
00673 (DISCARD) lexescape(v);
00674 if (ISERR())
00675 FAILW(REG_EESCAPE);
00676 if (v->nexttype == CCLASS)
00677 {
00678 switch (v->nextvalue)
00679 {
00680 case 'd':
00681 lexnest(v, backd, ENDOF(backd));
00682 break;
00683 case 'D':
00684 lexnest(v, backD, ENDOF(backD));
00685 break;
00686 case 's':
00687 lexnest(v, backs, ENDOF(backs));
00688 break;
00689 case 'S':
00690 lexnest(v, backS, ENDOF(backS));
00691 break;
00692 case 'w':
00693 lexnest(v, backw, ENDOF(backw));
00694 break;
00695 case 'W':
00696 lexnest(v, backW, ENDOF(backW));
00697 break;
00698 default:
00699 assert(NOTREACHED);
00700 FAILW(REG_ASSERT);
00701 break;
00702 }
00703
00704 v->nexttype = v->lasttype;
00705 return next(v);
00706 }
00707
00708 return !ISERR();
00709 }
00710
00711
00712
00713
00714
00715 static int
00716 lexescape(struct vars * v)
00717 {
00718 chr c;
00719 static chr alert[] = {
00720 CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
00721 };
00722 static chr esc[] = {
00723 CHR('E'), CHR('S'), CHR('C')
00724 };
00725 const chr *save;
00726
00727 assert(v->cflags & REG_ADVF);
00728
00729 assert(!ATEOS());
00730 c = *v->now++;
00731 if (!iscalnum(c))
00732 RETV(PLAIN, c);
00733
00734 NOTE(REG_UNONPOSIX);
00735 switch (c)
00736 {
00737 case CHR('a'):
00738 RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
00739 break;
00740 case CHR('A'):
00741 RETV(SBEGIN, 0);
00742 break;
00743 case CHR('b'):
00744 RETV(PLAIN, CHR('\b'));
00745 break;
00746 case CHR('B'):
00747 RETV(PLAIN, CHR('\\'));
00748 break;
00749 case CHR('c'):
00750 NOTE(REG_UUNPORT);
00751 if (ATEOS())
00752 FAILW(REG_EESCAPE);
00753 RETV(PLAIN, (chr) (*v->now++ & 037));
00754 break;
00755 case CHR('d'):
00756 NOTE(REG_ULOCALE);
00757 RETV(CCLASS, 'd');
00758 break;
00759 case CHR('D'):
00760 NOTE(REG_ULOCALE);
00761 RETV(CCLASS, 'D');
00762 break;
00763 case CHR('e'):
00764 NOTE(REG_UUNPORT);
00765 RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
00766 break;
00767 case CHR('f'):
00768 RETV(PLAIN, CHR('\f'));
00769 break;
00770 case CHR('m'):
00771 RET('<');
00772 break;
00773 case CHR('M'):
00774 RET('>');
00775 break;
00776 case CHR('n'):
00777 RETV(PLAIN, CHR('\n'));
00778 break;
00779 case CHR('r'):
00780 RETV(PLAIN, CHR('\r'));
00781 break;
00782 case CHR('s'):
00783 NOTE(REG_ULOCALE);
00784 RETV(CCLASS, 's');
00785 break;
00786 case CHR('S'):
00787 NOTE(REG_ULOCALE);
00788 RETV(CCLASS, 'S');
00789 break;
00790 case CHR('t'):
00791 RETV(PLAIN, CHR('\t'));
00792 break;
00793 case CHR('u'):
00794 c = lexdigits(v, 16, 4, 4);
00795 if (ISERR())
00796 FAILW(REG_EESCAPE);
00797 RETV(PLAIN, c);
00798 break;
00799 case CHR('U'):
00800 c = lexdigits(v, 16, 8, 8);
00801 if (ISERR())
00802 FAILW(REG_EESCAPE);
00803 RETV(PLAIN, c);
00804 break;
00805 case CHR('v'):
00806 RETV(PLAIN, CHR('\v'));
00807 break;
00808 case CHR('w'):
00809 NOTE(REG_ULOCALE);
00810 RETV(CCLASS, 'w');
00811 break;
00812 case CHR('W'):
00813 NOTE(REG_ULOCALE);
00814 RETV(CCLASS, 'W');
00815 break;
00816 case CHR('x'):
00817 NOTE(REG_UUNPORT);
00818 c = lexdigits(v, 16, 1, 255);
00819 if (ISERR())
00820 FAILW(REG_EESCAPE);
00821 RETV(PLAIN, c);
00822 break;
00823 case CHR('y'):
00824 NOTE(REG_ULOCALE);
00825 RETV(WBDRY, 0);
00826 break;
00827 case CHR('Y'):
00828 NOTE(REG_ULOCALE);
00829 RETV(NWBDRY, 0);
00830 break;
00831 case CHR('Z'):
00832 RETV(SEND, 0);
00833 break;
00834 case CHR('1'):
00835 case CHR('2'):
00836 case CHR('3'):
00837 case CHR('4'):
00838 case CHR('5'):
00839 case CHR('6'):
00840 case CHR('7'):
00841 case CHR('8'):
00842 case CHR('9'):
00843 save = v->now;
00844 v->now--;
00845 c = lexdigits(v, 10, 1, 255);
00846 if (ISERR())
00847 FAILW(REG_EESCAPE);
00848
00849 if (v->now == save || ((int) c > 0 && (int) c <= v->nsubexp))
00850 {
00851 NOTE(REG_UBACKREF);
00852 RETV(BACKREF, (chr) c);
00853 }
00854
00855 v->now = save;
00856
00857 case CHR('0'):
00858 NOTE(REG_UUNPORT);
00859 v->now--;
00860 c = lexdigits(v, 8, 1, 3);
00861 if (ISERR())
00862 FAILW(REG_EESCAPE);
00863 RETV(PLAIN, c);
00864 break;
00865 default:
00866 assert(iscalpha(c));
00867 FAILW(REG_EESCAPE);
00868 break;
00869 }
00870 assert(NOTREACHED);
00871 }
00872
00873
00874
00875
00876 static chr
00877 lexdigits(struct vars * v,
00878 int base,
00879 int minlen,
00880 int maxlen)
00881 {
00882 uchr n;
00883 int len;
00884 chr c;
00885 int d;
00886 const uchr ub = (uchr) base;
00887
00888 n = 0;
00889 for (len = 0; len < maxlen && !ATEOS(); len++)
00890 {
00891 c = *v->now++;
00892 switch (c)
00893 {
00894 case CHR('0'):
00895 case CHR('1'):
00896 case CHR('2'):
00897 case CHR('3'):
00898 case CHR('4'):
00899 case CHR('5'):
00900 case CHR('6'):
00901 case CHR('7'):
00902 case CHR('8'):
00903 case CHR('9'):
00904 d = DIGITVAL(c);
00905 break;
00906 case CHR('a'):
00907 case CHR('A'):
00908 d = 10;
00909 break;
00910 case CHR('b'):
00911 case CHR('B'):
00912 d = 11;
00913 break;
00914 case CHR('c'):
00915 case CHR('C'):
00916 d = 12;
00917 break;
00918 case CHR('d'):
00919 case CHR('D'):
00920 d = 13;
00921 break;
00922 case CHR('e'):
00923 case CHR('E'):
00924 d = 14;
00925 break;
00926 case CHR('f'):
00927 case CHR('F'):
00928 d = 15;
00929 break;
00930 default:
00931 v->now--;
00932 d = -1;
00933 break;
00934 }
00935
00936 if (d >= base)
00937 {
00938 v->now--;
00939 d = -1;
00940 }
00941 if (d < 0)
00942 break;
00943 n = n * ub + (uchr) d;
00944 }
00945 if (len < minlen)
00946 ERR(REG_EESCAPE);
00947
00948 return (chr) n;
00949 }
00950
00951
00952
00953
00954
00955
00956
00957 static int
00958 brenext(struct vars * v,
00959 chr pc)
00960 {
00961 chr c = (chr) pc;
00962
00963 switch (c)
00964 {
00965 case CHR('*'):
00966 if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
00967 RETV(PLAIN, c);
00968 RET('*');
00969 break;
00970 case CHR('['):
00971 if (HAVE(6) && *(v->now + 0) == CHR('[') &&
00972 *(v->now + 1) == CHR(':') &&
00973 (*(v->now + 2) == CHR('<') ||
00974 *(v->now + 2) == CHR('>')) &&
00975 *(v->now + 3) == CHR(':') &&
00976 *(v->now + 4) == CHR(']') &&
00977 *(v->now + 5) == CHR(']'))
00978 {
00979 c = *(v->now + 2);
00980 v->now += 6;
00981 NOTE(REG_UNONPOSIX);
00982 RET((c == CHR('<')) ? '<' : '>');
00983 }
00984 INTOCON(L_BRACK);
00985 if (NEXT1('^'))
00986 {
00987 v->now++;
00988 RETV('[', 0);
00989 }
00990 RETV('[', 1);
00991 break;
00992 case CHR('.'):
00993 RET('.');
00994 break;
00995 case CHR('^'):
00996 if (LASTTYPE(EMPTY))
00997 RET('^');
00998 if (LASTTYPE('('))
00999 {
01000 NOTE(REG_UUNSPEC);
01001 RET('^');
01002 }
01003 RETV(PLAIN, c);
01004 break;
01005 case CHR('$'):
01006 if (v->cflags & REG_EXPANDED)
01007 skip(v);
01008 if (ATEOS())
01009 RET('$');
01010 if (NEXT2('\\', ')'))
01011 {
01012 NOTE(REG_UUNSPEC);
01013 RET('$');
01014 }
01015 RETV(PLAIN, c);
01016 break;
01017 case CHR('\\'):
01018 break;
01019 default:
01020 RETV(PLAIN, c);
01021 break;
01022 }
01023
01024 assert(c == CHR('\\'));
01025
01026 if (ATEOS())
01027 FAILW(REG_EESCAPE);
01028
01029 c = *v->now++;
01030 switch (c)
01031 {
01032 case CHR('{'):
01033 INTOCON(L_BBND);
01034 NOTE(REG_UBOUNDS);
01035 RET('{');
01036 break;
01037 case CHR('('):
01038 RETV('(', 1);
01039 break;
01040 case CHR(')'):
01041 RETV(')', c);
01042 break;
01043 case CHR('<'):
01044 NOTE(REG_UNONPOSIX);
01045 RET('<');
01046 break;
01047 case CHR('>'):
01048 NOTE(REG_UNONPOSIX);
01049 RET('>');
01050 break;
01051 case CHR('1'):
01052 case CHR('2'):
01053 case CHR('3'):
01054 case CHR('4'):
01055 case CHR('5'):
01056 case CHR('6'):
01057 case CHR('7'):
01058 case CHR('8'):
01059 case CHR('9'):
01060 NOTE(REG_UBACKREF);
01061 RETV(BACKREF, (chr) DIGITVAL(c));
01062 break;
01063 default:
01064 if (iscalnum(c))
01065 {
01066 NOTE(REG_UBSALNUM);
01067 NOTE(REG_UUNSPEC);
01068 }
01069 RETV(PLAIN, c);
01070 break;
01071 }
01072
01073 assert(NOTREACHED);
01074 return 0;
01075 }
01076
01077
01078
01079
01080 static void
01081 skip(struct vars * v)
01082 {
01083 const chr *start = v->now;
01084
01085 assert(v->cflags & REG_EXPANDED);
01086
01087 for (;;)
01088 {
01089 while (!ATEOS() && iscspace(*v->now))
01090 v->now++;
01091 if (ATEOS() || *v->now != CHR('#'))
01092 break;
01093 assert(NEXT1('#'));
01094 while (!ATEOS() && *v->now != CHR('\n'))
01095 v->now++;
01096
01097 }
01098
01099 if (v->now != start)
01100 NOTE(REG_UNONPOSIX);
01101 }
01102
01103
01104
01105
01106
01107
01108 static chr
01109 newline(void)
01110 {
01111 return CHR('\n');
01112 }
01113
01114
01115
01116
01117
01118
01119
01120 static chr
01121 chrnamed(struct vars * v,
01122 const chr *startp,
01123 const chr *endp,
01124 chr lastresort)
01125 {
01126 celt c;
01127 int errsave;
01128 int e;
01129 struct cvec *cv;
01130
01131 errsave = v->err;
01132 v->err = 0;
01133 c = element(v, startp, endp);
01134 e = v->err;
01135 v->err = errsave;
01136
01137 if (e != 0)
01138 return (chr) lastresort;
01139
01140 cv = range(v, c, c, 0);
01141 if (cv->nchrs == 0)
01142 return (chr) lastresort;
01143 return cv->chrs[0];
01144 }