00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include "regex/regguts.h"
00036
00037
00038
00039
00040
00041 static void moresubs(struct vars *, int);
00042 static int freev(struct vars *, int);
00043 static void makesearch(struct vars *, struct nfa *);
00044 static struct subre *parse(struct vars *, int, int, struct state *, struct state *);
00045 static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int);
00046 static void parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *);
00047 static void nonword(struct vars *, int, struct state *, struct state *);
00048 static void word(struct vars *, int, struct state *, struct state *);
00049 static int scannum(struct vars *);
00050 static void repeat(struct vars *, struct state *, struct state *, int, int);
00051 static void bracket(struct vars *, struct state *, struct state *);
00052 static void cbracket(struct vars *, struct state *, struct state *);
00053 static void brackpart(struct vars *, struct state *, struct state *);
00054 static const chr *scanplain(struct vars *);
00055 static void onechr(struct vars *, chr, struct state *, struct state *);
00056 static void dovec(struct vars *, struct cvec *, struct state *, struct state *);
00057 static void wordchrs(struct vars *);
00058 static struct subre *subre(struct vars *, int, int, struct state *, struct state *);
00059 static void freesubre(struct vars *, struct subre *);
00060 static void freesrnode(struct vars *, struct subre *);
00061 static void optst(struct vars *, struct subre *);
00062 static int numst(struct subre *, int);
00063 static void markst(struct subre *);
00064 static void cleanst(struct vars *);
00065 static long nfatree(struct vars *, struct subre *, FILE *);
00066 static long nfanode(struct vars *, struct subre *, FILE *);
00067 static int newlacon(struct vars *, struct state *, struct state *, int);
00068 static void freelacons(struct subre *, int);
00069 static void rfree(regex_t *);
00070
00071 #ifdef REG_DEBUG
00072 static void dump(regex_t *, FILE *);
00073 static void dumpst(struct subre *, FILE *, int);
00074 static void stdump(struct subre *, FILE *, int);
00075 static const char *stid(struct subre *, char *, size_t);
00076 #endif
00077
00078 static void lexstart(struct vars *);
00079 static void prefixes(struct vars *);
00080 static void lexnest(struct vars *, const chr *, const chr *);
00081 static void lexword(struct vars *);
00082 static int next(struct vars *);
00083 static int lexescape(struct vars *);
00084 static chr lexdigits(struct vars *, int, int, int);
00085 static int brenext(struct vars *, chr);
00086 static void skip(struct vars *);
00087 static chr newline(void);
00088 static chr chrnamed(struct vars *, const chr *, const chr *, chr);
00089
00090
00091 static void initcm(struct vars *, struct colormap *);
00092 static void freecm(struct colormap *);
00093 static void cmtreefree(struct colormap *, union tree *, int);
00094 static color setcolor(struct colormap *, chr, pcolor);
00095 static color maxcolor(struct colormap *);
00096 static color newcolor(struct colormap *);
00097 static void freecolor(struct colormap *, pcolor);
00098 static color pseudocolor(struct colormap *);
00099 static color subcolor(struct colormap *, chr c);
00100 static color newsub(struct colormap *, pcolor);
00101 static void subrange(struct vars *, chr, chr, struct state *, struct state *);
00102 static void subblock(struct vars *, chr, struct state *, struct state *);
00103 static void okcolors(struct nfa *, struct colormap *);
00104 static void colorchain(struct colormap *, struct arc *);
00105 static void uncolorchain(struct colormap *, struct arc *);
00106 static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *);
00107 static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *);
00108
00109 #ifdef REG_DEBUG
00110 static void dumpcolors(struct colormap *, FILE *);
00111 static void fillcheck(struct colormap *, union tree *, int, FILE *);
00112 static void dumpchr(chr, FILE *);
00113 #endif
00114
00115 static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *);
00116 static void freenfa(struct nfa *);
00117 static struct state *newstate(struct nfa *);
00118 static struct state *newfstate(struct nfa *, int flag);
00119 static void dropstate(struct nfa *, struct state *);
00120 static void freestate(struct nfa *, struct state *);
00121 static void destroystate(struct nfa *, struct state *);
00122 static void newarc(struct nfa *, int, pcolor, struct state *, struct state *);
00123 static struct arc *allocarc(struct nfa *, struct state *);
00124 static void freearc(struct nfa *, struct arc *);
00125 static int hasnonemptyout(struct state *);
00126 static int nonemptyouts(struct state *);
00127 static int nonemptyins(struct state *);
00128 static struct arc *findarc(struct state *, int, pcolor);
00129 static void cparc(struct nfa *, struct arc *, struct state *, struct state *);
00130 static void moveins(struct nfa *, struct state *, struct state *);
00131 static void copyins(struct nfa *, struct state *, struct state *, int);
00132 static void moveouts(struct nfa *, struct state *, struct state *);
00133 static void copyouts(struct nfa *, struct state *, struct state *, int);
00134 static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int);
00135 static void delsub(struct nfa *, struct state *, struct state *);
00136 static void deltraverse(struct nfa *, struct state *, struct state *);
00137 static void dupnfa(struct nfa *, struct state *, struct state *, struct state *, struct state *);
00138 static void duptraverse(struct nfa *, struct state *, struct state *);
00139 static void cleartraverse(struct nfa *, struct state *);
00140 static void specialcolors(struct nfa *);
00141 static long optimize(struct nfa *, FILE *);
00142 static void pullback(struct nfa *, FILE *);
00143 static int pull(struct nfa *, struct arc *);
00144 static void pushfwd(struct nfa *, FILE *);
00145 static int push(struct nfa *, struct arc *);
00146
00147 #define INCOMPATIBLE 1
00148 #define SATISFIED 2
00149 #define COMPATIBLE 3
00150 static int combine(struct arc *, struct arc *);
00151 static void fixempties(struct nfa *, FILE *);
00152 static struct state *emptyreachable(struct state *, struct state *);
00153 static void replaceempty(struct nfa *, struct state *, struct state *);
00154 static void cleanup(struct nfa *);
00155 static void markreachable(struct nfa *, struct state *, struct state *, struct state *);
00156 static void markcanreach(struct nfa *, struct state *, struct state *, struct state *);
00157 static long analyze(struct nfa *);
00158 static void compact(struct nfa *, struct cnfa *);
00159 static void carcsort(struct carc *, struct carc *);
00160 static void freecnfa(struct cnfa *);
00161 static void dumpnfa(struct nfa *, FILE *);
00162
00163 #ifdef REG_DEBUG
00164 static void dumpstate(struct state *, FILE *);
00165 static void dumparcs(struct state *, FILE *);
00166 static int dumprarcs(struct arc *, struct state *, FILE *, int);
00167 static void dumparc(struct arc *, struct state *, FILE *);
00168 static void dumpcnfa(struct cnfa *, FILE *);
00169 static void dumpcstate(int, struct cnfa *, FILE *);
00170 #endif
00171
00172 static struct cvec *newcvec(int, int);
00173 static struct cvec *clearcvec(struct cvec *);
00174 static void addchr(struct cvec *, chr);
00175 static void addrange(struct cvec *, chr, chr);
00176 static struct cvec *getcvec(struct vars *, int, int);
00177 static void freecvec(struct cvec *);
00178
00179
00180 static int pg_wc_isdigit(pg_wchar c);
00181 static int pg_wc_isalpha(pg_wchar c);
00182 static int pg_wc_isalnum(pg_wchar c);
00183 static int pg_wc_isupper(pg_wchar c);
00184 static int pg_wc_islower(pg_wchar c);
00185 static int pg_wc_isgraph(pg_wchar c);
00186 static int pg_wc_isprint(pg_wchar c);
00187 static int pg_wc_ispunct(pg_wchar c);
00188 static int pg_wc_isspace(pg_wchar c);
00189 static pg_wchar pg_wc_toupper(pg_wchar c);
00190 static pg_wchar pg_wc_tolower(pg_wchar c);
00191
00192
00193 static celt element(struct vars *, const chr *, const chr *);
00194 static struct cvec *range(struct vars *, celt, celt, int);
00195 static int before(celt, celt);
00196 static struct cvec *eclass(struct vars *, celt, int);
00197 static struct cvec *cclass(struct vars *, const chr *, const chr *, int);
00198 static struct cvec *allcases(struct vars *, chr);
00199 static int cmp(const chr *, const chr *, size_t);
00200 static int casecmp(const chr *, const chr *, size_t);
00201
00202
00203
00204 struct vars
00205 {
00206 regex_t *re;
00207 const chr *now;
00208 const chr *stop;
00209 const chr *savenow;
00210 const chr *savestop;
00211 int err;
00212 int cflags;
00213 int lasttype;
00214 int nexttype;
00215 chr nextvalue;
00216 int lexcon;
00217 int nsubexp;
00218 struct subre **subs;
00219 size_t nsubs;
00220 struct subre *sub10[10];
00221 struct nfa *nfa;
00222 struct colormap *cm;
00223 color nlcolor;
00224 struct state *wordchrs;
00225 struct subre *tree;
00226 struct subre *treechain;
00227 struct subre *treefree;
00228 int ntree;
00229 struct cvec *cv;
00230 struct cvec *cv2;
00231 struct subre *lacons;
00232 int nlacons;
00233 };
00234
00235
00236 #define NEXT() (next(v))
00237 #define SEE(t) (v->nexttype == (t))
00238 #define EAT(t) (SEE(t) && next(v))
00239 #define VISERR(vv) ((vv)->err != 0)
00240 #define ISERR() VISERR(v)
00241 #define VERR(vv,e) ((vv)->nexttype = EOS, \
00242 (vv)->err = ((vv)->err ? (vv)->err : (e)))
00243 #define ERR(e) VERR(v, e)
00244 #define NOERR() {if (ISERR()) return;}
00245 #define NOERRN() {if (ISERR()) return NULL;}
00246 #define NOERRZ() {if (ISERR()) return 0;}
00247 #define INSIST(c, e) do { if (!(c)) ERR(e); } while (0)
00248 #define NOTE(b) (v->re->re_info |= (b))
00249 #define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y)
00250
00251
00252 #define EMPTY 'n'
00253 #define EOS 'e'
00254 #define PLAIN 'p'
00255 #define DIGIT 'd'
00256 #define BACKREF 'b'
00257 #define COLLEL 'I'
00258 #define ECLASS 'E'
00259 #define CCLASS 'C'
00260 #define END 'X'
00261 #define RANGE 'R'
00262 #define LACON 'L'
00263 #define AHEAD 'a'
00264 #define BEHIND 'r'
00265 #define WBDRY 'w'
00266 #define NWBDRY 'W'
00267 #define SBEGIN 'A'
00268 #define SEND 'Z'
00269 #define PREFER 'P'
00270
00271
00272 #define COLORED(a) \
00273 ((a)->type == PLAIN || (a)->type == AHEAD || (a)->type == BEHIND)
00274
00275
00276
00277 static struct fns functions = {
00278 rfree,
00279 };
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289 int
00290 pg_regcomp(regex_t *re,
00291 const chr *string,
00292 size_t len,
00293 int flags,
00294 Oid collation)
00295 {
00296 struct vars var;
00297 struct vars *v = &var;
00298 struct guts *g;
00299 int i;
00300 size_t j;
00301
00302 #ifdef REG_DEBUG
00303 FILE *debug = (flags & REG_PROGRESS) ? stdout : (FILE *) NULL;
00304 #else
00305 FILE *debug = (FILE *) NULL;
00306 #endif
00307
00308 #define CNOERR() { if (ISERR()) return freev(v, v->err); }
00309
00310
00311
00312 if (re == NULL || string == NULL)
00313 return REG_INVARG;
00314 if ((flags & REG_QUOTE) &&
00315 (flags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE)))
00316 return REG_INVARG;
00317 if (!(flags & REG_EXTENDED) && (flags & REG_ADVF))
00318 return REG_INVARG;
00319
00320
00321 pg_set_regex_collation(collation);
00322
00323
00324 v->re = re;
00325 v->now = string;
00326 v->stop = v->now + len;
00327 v->savenow = v->savestop = NULL;
00328 v->err = 0;
00329 v->cflags = flags;
00330 v->nsubexp = 0;
00331 v->subs = v->sub10;
00332 v->nsubs = 10;
00333 for (j = 0; j < v->nsubs; j++)
00334 v->subs[j] = NULL;
00335 v->nfa = NULL;
00336 v->cm = NULL;
00337 v->nlcolor = COLORLESS;
00338 v->wordchrs = NULL;
00339 v->tree = NULL;
00340 v->treechain = NULL;
00341 v->treefree = NULL;
00342 v->cv = NULL;
00343 v->cv2 = NULL;
00344 v->lacons = NULL;
00345 v->nlacons = 0;
00346 re->re_magic = REMAGIC;
00347 re->re_info = 0;
00348 re->re_csize = sizeof(chr);
00349 re->re_collation = collation;
00350 re->re_guts = NULL;
00351 re->re_fns = VS(&functions);
00352
00353
00354 re->re_guts = VS(MALLOC(sizeof(struct guts)));
00355 if (re->re_guts == NULL)
00356 return freev(v, REG_ESPACE);
00357 g = (struct guts *) re->re_guts;
00358 g->tree = NULL;
00359 initcm(v, &g->cmap);
00360 v->cm = &g->cmap;
00361 g->lacons = NULL;
00362 g->nlacons = 0;
00363 ZAPCNFA(g->search);
00364 v->nfa = newnfa(v, v->cm, (struct nfa *) NULL);
00365 CNOERR();
00366
00367 v->cv = newcvec(100, 20);
00368 if (v->cv == NULL)
00369 return freev(v, REG_ESPACE);
00370
00371
00372 lexstart(v);
00373 if ((v->cflags & REG_NLSTOP) || (v->cflags & REG_NLANCH))
00374 {
00375
00376 v->nlcolor = subcolor(v->cm, newline());
00377 okcolors(v->nfa, v->cm);
00378 }
00379 CNOERR();
00380 v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final);
00381 assert(SEE(EOS));
00382 CNOERR();
00383 assert(v->tree != NULL);
00384
00385
00386 specialcolors(v->nfa);
00387 CNOERR();
00388 #ifdef REG_DEBUG
00389 if (debug != NULL)
00390 {
00391 fprintf(debug, "\n\n\n========= RAW ==========\n");
00392 dumpnfa(v->nfa, debug);
00393 dumpst(v->tree, debug, 1);
00394 }
00395 #endif
00396 optst(v, v->tree);
00397 v->ntree = numst(v->tree, 1);
00398 markst(v->tree);
00399 cleanst(v);
00400 #ifdef REG_DEBUG
00401 if (debug != NULL)
00402 {
00403 fprintf(debug, "\n\n\n========= TREE FIXED ==========\n");
00404 dumpst(v->tree, debug, 1);
00405 }
00406 #endif
00407
00408
00409 re->re_info |= nfatree(v, v->tree, debug);
00410 CNOERR();
00411 assert(v->nlacons == 0 || v->lacons != NULL);
00412 for (i = 1; i < v->nlacons; i++)
00413 {
00414 #ifdef REG_DEBUG
00415 if (debug != NULL)
00416 fprintf(debug, "\n\n\n========= LA%d ==========\n", i);
00417 #endif
00418 nfanode(v, &v->lacons[i], debug);
00419 }
00420 CNOERR();
00421 if (v->tree->flags & SHORTER)
00422 NOTE(REG_USHORTEST);
00423
00424
00425 #ifdef REG_DEBUG
00426 if (debug != NULL)
00427 fprintf(debug, "\n\n\n========= SEARCH ==========\n");
00428 #endif
00429
00430 (DISCARD) optimize(v->nfa, debug);
00431 CNOERR();
00432 makesearch(v, v->nfa);
00433 CNOERR();
00434 compact(v->nfa, &g->search);
00435 CNOERR();
00436
00437
00438 re->re_nsub = v->nsubexp;
00439 v->re = NULL;
00440 g->magic = GUTSMAGIC;
00441 g->cflags = v->cflags;
00442 g->info = re->re_info;
00443 g->nsub = re->re_nsub;
00444 g->tree = v->tree;
00445 v->tree = NULL;
00446 g->ntree = v->ntree;
00447 g->compare = (v->cflags & REG_ICASE) ? casecmp : cmp;
00448 g->lacons = v->lacons;
00449 v->lacons = NULL;
00450 g->nlacons = v->nlacons;
00451
00452 #ifdef REG_DEBUG
00453 if (flags & REG_DUMP)
00454 dump(re, stdout);
00455 #endif
00456
00457 assert(v->err == 0);
00458 return freev(v, 0);
00459 }
00460
00461
00462
00463
00464 static void
00465 moresubs(struct vars * v,
00466 int wanted)
00467 {
00468 struct subre **p;
00469 size_t n;
00470
00471 assert(wanted > 0 && (size_t) wanted >= v->nsubs);
00472 n = (size_t) wanted *3 / 2 + 1;
00473
00474 if (v->subs == v->sub10)
00475 {
00476 p = (struct subre **) MALLOC(n * sizeof(struct subre *));
00477 if (p != NULL)
00478 memcpy(VS(p), VS(v->subs),
00479 v->nsubs * sizeof(struct subre *));
00480 }
00481 else
00482 p = (struct subre **) REALLOC(v->subs, n * sizeof(struct subre *));
00483 if (p == NULL)
00484 {
00485 ERR(REG_ESPACE);
00486 return;
00487 }
00488 v->subs = p;
00489 for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++)
00490 *p = NULL;
00491 assert(v->nsubs == n);
00492 assert((size_t) wanted < v->nsubs);
00493 }
00494
00495
00496
00497
00498
00499
00500
00501 static int
00502 freev(struct vars * v,
00503 int err)
00504 {
00505 if (v->re != NULL)
00506 rfree(v->re);
00507 if (v->subs != v->sub10)
00508 FREE(v->subs);
00509 if (v->nfa != NULL)
00510 freenfa(v->nfa);
00511 if (v->tree != NULL)
00512 freesubre(v, v->tree);
00513 if (v->treechain != NULL)
00514 cleanst(v);
00515 if (v->cv != NULL)
00516 freecvec(v->cv);
00517 if (v->cv2 != NULL)
00518 freecvec(v->cv2);
00519 if (v->lacons != NULL)
00520 freelacons(v->lacons, v->nlacons);
00521 ERR(err);
00522
00523 return v->err;
00524 }
00525
00526
00527
00528
00529
00530 static void
00531 makesearch(struct vars * v,
00532 struct nfa * nfa)
00533 {
00534 struct arc *a;
00535 struct arc *b;
00536 struct state *pre = nfa->pre;
00537 struct state *s;
00538 struct state *s2;
00539 struct state *slist;
00540
00541
00542 for (a = pre->outs; a != NULL; a = a->outchain)
00543 {
00544 assert(a->type == PLAIN);
00545 if (a->co != nfa->bos[0] && a->co != nfa->bos[1])
00546 break;
00547 }
00548 if (a != NULL)
00549 {
00550
00551 rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre);
00552
00553
00554 newarc(nfa, PLAIN, nfa->bos[0], pre, pre);
00555 newarc(nfa, PLAIN, nfa->bos[1], pre, pre);
00556 }
00557
00558
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568 slist = NULL;
00569 for (a = pre->outs; a != NULL; a = a->outchain)
00570 {
00571 s = a->to;
00572 for (b = s->ins; b != NULL; b = b->inchain)
00573 if (b->from != pre)
00574 break;
00575 if (b != NULL && s->tmp == NULL)
00576 {
00577
00578
00579
00580
00581 s->tmp = slist;
00582 slist = s;
00583 }
00584 }
00585
00586
00587 for (s = slist; s != NULL; s = s2)
00588 {
00589 s2 = newstate(nfa);
00590 copyouts(nfa, s, s2, 1);
00591 for (a = s->ins; a != NULL; a = b)
00592 {
00593 b = a->inchain;
00594 if (a->from != pre)
00595 {
00596 cparc(nfa, a, a->from, s2);
00597 freearc(nfa, a);
00598 }
00599 }
00600 s2 = s->tmp;
00601 s->tmp = NULL;
00602 }
00603 }
00604
00605
00606
00607
00608
00609
00610
00611
00612 static struct subre *
00613 parse(struct vars * v,
00614 int stopper,
00615 int type,
00616 struct state * init,
00617 struct state * final)
00618 {
00619 struct state *left;
00620 struct state *right;
00621 struct subre *branches;
00622 struct subre *branch;
00623 struct subre *t;
00624 int firstbranch;
00625
00626 assert(stopper == ')' || stopper == EOS);
00627
00628 branches = subre(v, '|', LONGER, init, final);
00629 NOERRN();
00630 branch = branches;
00631 firstbranch = 1;
00632 do
00633 {
00634 if (!firstbranch)
00635 {
00636
00637 branch->right = subre(v, '|', LONGER, init, final);
00638 NOERRN();
00639 branch = branch->right;
00640 }
00641 firstbranch = 0;
00642 left = newstate(v->nfa);
00643 right = newstate(v->nfa);
00644 NOERRN();
00645 EMPTYARC(init, left);
00646 EMPTYARC(right, final);
00647 NOERRN();
00648 branch->left = parsebranch(v, stopper, type, left, right, 0);
00649 NOERRN();
00650 branch->flags |= UP(branch->flags | branch->left->flags);
00651 if ((branch->flags & ~branches->flags) != 0)
00652 for (t = branches; t != branch; t = t->right)
00653 t->flags |= branch->flags;
00654 } while (EAT('|'));
00655 assert(SEE(stopper) || SEE(EOS));
00656
00657 if (!SEE(stopper))
00658 {
00659 assert(stopper == ')' && SEE(EOS));
00660 ERR(REG_EPAREN);
00661 }
00662
00663
00664 if (branch == branches)
00665 {
00666 assert(branch->right == NULL);
00667 t = branch->left;
00668 branch->left = NULL;
00669 freesubre(v, branches);
00670 branches = t;
00671 }
00672 else if (!MESSY(branches->flags))
00673 {
00674 freesubre(v, branches->left);
00675 branches->left = NULL;
00676 freesubre(v, branches->right);
00677 branches->right = NULL;
00678 branches->op = '=';
00679 }
00680
00681 return branches;
00682 }
00683
00684
00685
00686
00687
00688
00689
00690
00691 static struct subre *
00692 parsebranch(struct vars * v,
00693 int stopper,
00694 int type,
00695 struct state * left,
00696 struct state * right,
00697 int partial)
00698 {
00699 struct state *lp;
00700 int seencontent;
00701 struct subre *t;
00702
00703 lp = left;
00704 seencontent = 0;
00705 t = subre(v, '=', 0, left, right);
00706 NOERRN();
00707 while (!SEE('|') && !SEE(stopper) && !SEE(EOS))
00708 {
00709 if (seencontent)
00710 {
00711 lp = newstate(v->nfa);
00712 NOERRN();
00713 moveins(v->nfa, right, lp);
00714 }
00715 seencontent = 1;
00716
00717
00718 parseqatom(v, stopper, type, lp, right, t);
00719 NOERRN();
00720 }
00721
00722 if (!seencontent)
00723 {
00724 if (!partial)
00725 NOTE(REG_UUNSPEC);
00726 assert(lp == left);
00727 EMPTYARC(left, right);
00728 }
00729
00730 return t;
00731 }
00732
00733
00734
00735
00736
00737
00738
00739
00740 static void
00741 parseqatom(struct vars * v,
00742 int stopper,
00743 int type,
00744 struct state * lp,
00745 struct state * rp,
00746 struct subre * top)
00747 {
00748 struct state *s;
00749 struct state *s2;
00750
00751 #define ARCV(t, val) newarc(v->nfa, t, val, lp, rp)
00752 int m,
00753 n;
00754 struct subre *atom;
00755 struct subre *t;
00756 int cap;
00757 int pos;
00758 int subno;
00759 int atomtype;
00760 int qprefer;
00761 int f;
00762 struct subre **atomp;
00763
00764
00765 atom = NULL;
00766 assert(lp->nouts == 0);
00767 assert(rp->nins == 0);
00768 subno = 0;
00769
00770
00771 atomtype = v->nexttype;
00772 switch (atomtype)
00773 {
00774
00775 case '^':
00776 ARCV('^', 1);
00777 if (v->cflags & REG_NLANCH)
00778 ARCV(BEHIND, v->nlcolor);
00779 NEXT();
00780 return;
00781 break;
00782 case '$':
00783 ARCV('$', 1);
00784 if (v->cflags & REG_NLANCH)
00785 ARCV(AHEAD, v->nlcolor);
00786 NEXT();
00787 return;
00788 break;
00789 case SBEGIN:
00790 ARCV('^', 1);
00791 ARCV('^', 0);
00792 NEXT();
00793 return;
00794 break;
00795 case SEND:
00796 ARCV('$', 1);
00797 ARCV('$', 0);
00798 NEXT();
00799 return;
00800 break;
00801 case '<':
00802 wordchrs(v);
00803 s = newstate(v->nfa);
00804 NOERR();
00805 nonword(v, BEHIND, lp, s);
00806 word(v, AHEAD, s, rp);
00807 return;
00808 break;
00809 case '>':
00810 wordchrs(v);
00811 s = newstate(v->nfa);
00812 NOERR();
00813 word(v, BEHIND, lp, s);
00814 nonword(v, AHEAD, s, rp);
00815 return;
00816 break;
00817 case WBDRY:
00818 wordchrs(v);
00819 s = newstate(v->nfa);
00820 NOERR();
00821 nonword(v, BEHIND, lp, s);
00822 word(v, AHEAD, s, rp);
00823 s = newstate(v->nfa);
00824 NOERR();
00825 word(v, BEHIND, lp, s);
00826 nonword(v, AHEAD, s, rp);
00827 return;
00828 break;
00829 case NWBDRY:
00830 wordchrs(v);
00831 s = newstate(v->nfa);
00832 NOERR();
00833 word(v, BEHIND, lp, s);
00834 word(v, AHEAD, s, rp);
00835 s = newstate(v->nfa);
00836 NOERR();
00837 nonword(v, BEHIND, lp, s);
00838 nonword(v, AHEAD, s, rp);
00839 return;
00840 break;
00841 case LACON:
00842 pos = v->nextvalue;
00843 NEXT();
00844 s = newstate(v->nfa);
00845 s2 = newstate(v->nfa);
00846 NOERR();
00847 t = parse(v, ')', LACON, s, s2);
00848 freesubre(v, t);
00849 assert(SEE(')') || ISERR());
00850 NEXT();
00851 n = newlacon(v, s, s2, pos);
00852 NOERR();
00853 ARCV(LACON, n);
00854 return;
00855 break;
00856
00857 case '*':
00858 case '+':
00859 case '?':
00860 case '{':
00861 ERR(REG_BADRPT);
00862 return;
00863 break;
00864 default:
00865 ERR(REG_ASSERT);
00866 return;
00867 break;
00868
00869 case ')':
00870 if ((v->cflags & REG_ADVANCED) != REG_EXTENDED)
00871 {
00872 ERR(REG_EPAREN);
00873 return;
00874 }
00875
00876 NOTE(REG_UPBOTCH);
00877
00878 case PLAIN:
00879 onechr(v, v->nextvalue, lp, rp);
00880 okcolors(v->nfa, v->cm);
00881 NOERR();
00882 NEXT();
00883 break;
00884 case '[':
00885 if (v->nextvalue == 1)
00886 bracket(v, lp, rp);
00887 else
00888 cbracket(v, lp, rp);
00889 assert(SEE(']') || ISERR());
00890 NEXT();
00891 break;
00892 case '.':
00893 rainbow(v->nfa, v->cm, PLAIN,
00894 (v->cflags & REG_NLSTOP) ? v->nlcolor : COLORLESS,
00895 lp, rp);
00896 NEXT();
00897 break;
00898
00899 case '(':
00900 cap = (type == LACON) ? 0 : v->nextvalue;
00901 if (cap)
00902 {
00903 v->nsubexp++;
00904 subno = v->nsubexp;
00905 if ((size_t) subno >= v->nsubs)
00906 moresubs(v, subno);
00907 assert((size_t) subno < v->nsubs);
00908 }
00909 else
00910 atomtype = PLAIN;
00911 NEXT();
00912
00913 s = newstate(v->nfa);
00914 s2 = newstate(v->nfa);
00915 NOERR();
00916 EMPTYARC(lp, s);
00917 EMPTYARC(s2, rp);
00918 NOERR();
00919 atom = parse(v, ')', PLAIN, s, s2);
00920 assert(SEE(')') || ISERR());
00921 NEXT();
00922 NOERR();
00923 if (cap)
00924 {
00925 v->subs[subno] = atom;
00926 t = subre(v, '(', atom->flags | CAP, lp, rp);
00927 NOERR();
00928 t->subno = subno;
00929 t->left = atom;
00930 atom = t;
00931 }
00932
00933 break;
00934 case BACKREF:
00935 INSIST(type != LACON, REG_ESUBREG);
00936 INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
00937 INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
00938 NOERR();
00939 assert(v->nextvalue > 0);
00940 atom = subre(v, 'b', BACKR, lp, rp);
00941 subno = v->nextvalue;
00942 atom->subno = subno;
00943 EMPTYARC(lp, rp);
00944 NEXT();
00945 break;
00946 }
00947
00948
00949 switch (v->nexttype)
00950 {
00951 case '*':
00952 m = 0;
00953 n = INFINITY;
00954 qprefer = (v->nextvalue) ? LONGER : SHORTER;
00955 NEXT();
00956 break;
00957 case '+':
00958 m = 1;
00959 n = INFINITY;
00960 qprefer = (v->nextvalue) ? LONGER : SHORTER;
00961 NEXT();
00962 break;
00963 case '?':
00964 m = 0;
00965 n = 1;
00966 qprefer = (v->nextvalue) ? LONGER : SHORTER;
00967 NEXT();
00968 break;
00969 case '{':
00970 NEXT();
00971 m = scannum(v);
00972 if (EAT(','))
00973 {
00974 if (SEE(DIGIT))
00975 n = scannum(v);
00976 else
00977 n = INFINITY;
00978 if (m > n)
00979 {
00980 ERR(REG_BADBR);
00981 return;
00982 }
00983
00984 qprefer = (v->nextvalue) ? LONGER : SHORTER;
00985 }
00986 else
00987 {
00988 n = m;
00989
00990 qprefer = 0;
00991 }
00992 if (!SEE('}'))
00993 {
00994 ERR(REG_BADBR);
00995 return;
00996 }
00997 NEXT();
00998 break;
00999 default:
01000 m = n = 1;
01001 qprefer = 0;
01002 break;
01003 }
01004
01005
01006 if (m == 0 && n == 0)
01007 {
01008 if (atom != NULL)
01009 freesubre(v, atom);
01010 if (atomtype == '(')
01011 v->subs[subno] = NULL;
01012 delsub(v->nfa, lp, rp);
01013 EMPTYARC(lp, rp);
01014 return;
01015 }
01016
01017
01018 assert(!MESSY(top->flags));
01019 f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0);
01020 if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f)))
01021 {
01022 if (!(m == 1 && n == 1))
01023 repeat(v, lp, rp, m, n);
01024 if (atom != NULL)
01025 freesubre(v, atom);
01026 top->flags = f;
01027 return;
01028 }
01029
01030
01031
01032
01033
01034
01035
01036
01037
01038 if (atom == NULL)
01039 {
01040 atom = subre(v, '=', 0, lp, rp);
01041 NOERR();
01042 }
01043
01044
01045
01046
01047
01048
01049
01050
01051
01052
01053
01054
01055
01056
01057
01058
01059
01060 s = newstate(v->nfa);
01061 s2 = newstate(v->nfa);
01062 NOERR();
01063 moveouts(v->nfa, lp, s);
01064 moveins(v->nfa, rp, s2);
01065 NOERR();
01066 atom->begin = s;
01067 atom->end = s2;
01068 s = newstate(v->nfa);
01069 NOERR();
01070 EMPTYARC(lp, s);
01071 NOERR();
01072
01073
01074 t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp);
01075 t->left = atom;
01076 atomp = &t->left;
01077
01078
01079
01080
01081 assert(top->op == '=' && top->left == NULL && top->right == NULL);
01082 top->left = subre(v, '=', top->flags, top->begin, lp);
01083 top->op = '.';
01084 top->right = t;
01085
01086
01087 if (atomtype == BACKREF)
01088 {
01089 assert(atom->begin->nouts == 1);
01090 delsub(v->nfa, atom->begin, atom->end);
01091 assert(v->subs[subno] != NULL);
01092
01093
01094
01095
01096
01097
01098 dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
01099 atom->begin, atom->end);
01100 NOERR();
01101 }
01102
01103
01104
01105
01106
01107 if (atomtype == BACKREF)
01108 {
01109
01110 EMPTYARC(s, atom->begin);
01111
01112 repeat(v, atom->begin, atom->end, m, n);
01113 atom->min = (short) m;
01114 atom->max = (short) n;
01115 atom->flags |= COMBINE(qprefer, atom->flags);
01116
01117 s2 = atom->end;
01118 }
01119 else if (m == 1 && n == 1)
01120 {
01121
01122 EMPTYARC(s, atom->begin);
01123
01124 s2 = atom->end;
01125 }
01126 else if (m > 0 && !(atom->flags & BACKR))
01127 {
01128
01129
01130
01131
01132
01133
01134
01135
01136 dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin);
01137 assert(m >= 1 && m != INFINITY && n >= 1);
01138 repeat(v, s, atom->begin, m - 1, (n == INFINITY) ? n : n - 1);
01139 f = COMBINE(qprefer, atom->flags);
01140 t = subre(v, '.', f, s, atom->end);
01141 NOERR();
01142 t->left = subre(v, '=', PREF(f), s, atom->begin);
01143 NOERR();
01144 t->right = atom;
01145 *atomp = t;
01146
01147 s2 = atom->end;
01148 }
01149 else
01150 {
01151
01152 s2 = newstate(v->nfa);
01153 NOERR();
01154 moveouts(v->nfa, atom->end, s2);
01155 NOERR();
01156 dupnfa(v->nfa, atom->begin, atom->end, s, s2);
01157 repeat(v, s, s2, m, n);
01158 f = COMBINE(qprefer, atom->flags);
01159 t = subre(v, '*', f, s, s2);
01160 NOERR();
01161 t->min = (short) m;
01162 t->max = (short) n;
01163 t->left = atom;
01164 *atomp = t;
01165
01166 }
01167
01168
01169 t = top->right;
01170 if (!(SEE('|') || SEE(stopper) || SEE(EOS)))
01171 t->right = parsebranch(v, stopper, type, s2, rp, 1);
01172 else
01173 {
01174 EMPTYARC(s2, rp);
01175 t->right = subre(v, '=', 0, s2, rp);
01176 }
01177 NOERR();
01178 assert(SEE('|') || SEE(stopper) || SEE(EOS));
01179 t->flags |= COMBINE(t->flags, t->right->flags);
01180 top->flags |= COMBINE(top->flags, t->flags);
01181 }
01182
01183
01184
01185
01186 static void
01187 nonword(struct vars * v,
01188 int dir,
01189 struct state * lp,
01190 struct state * rp)
01191 {
01192 int anchor = (dir == AHEAD) ? '$' : '^';
01193
01194 assert(dir == AHEAD || dir == BEHIND);
01195 newarc(v->nfa, anchor, 1, lp, rp);
01196 newarc(v->nfa, anchor, 0, lp, rp);
01197 colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp);
01198
01199 }
01200
01201
01202
01203
01204 static void
01205 word(struct vars * v,
01206 int dir,
01207 struct state * lp,
01208 struct state * rp)
01209 {
01210 assert(dir == AHEAD || dir == BEHIND);
01211 cloneouts(v->nfa, v->wordchrs, lp, rp, dir);
01212
01213 }
01214
01215
01216
01217
01218 static int
01219 scannum(struct vars * v)
01220 {
01221 int n = 0;
01222
01223 while (SEE(DIGIT) && n < DUPMAX)
01224 {
01225 n = n * 10 + v->nextvalue;
01226 NEXT();
01227 }
01228 if (SEE(DIGIT) || n > DUPMAX)
01229 {
01230 ERR(REG_BADBR);
01231 return 0;
01232 }
01233 return n;
01234 }
01235
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245
01246
01247
01248
01249 static void
01250 repeat(struct vars * v,
01251 struct state * lp,
01252 struct state * rp,
01253 int m,
01254 int n)
01255 {
01256 #define SOME 2
01257 #define INF 3
01258 #define PAIR(x, y) ((x)*4 + (y))
01259 #define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) )
01260 const int rm = REDUCE(m);
01261 const int rn = REDUCE(n);
01262 struct state *s;
01263 struct state *s2;
01264
01265 switch (PAIR(rm, rn))
01266 {
01267 case PAIR(0, 0):
01268 delsub(v->nfa, lp, rp);
01269 EMPTYARC(lp, rp);
01270 break;
01271 case PAIR(0, 1):
01272 EMPTYARC(lp, rp);
01273 break;
01274 case PAIR(0, SOME):
01275 repeat(v, lp, rp, 1, n);
01276 NOERR();
01277 EMPTYARC(lp, rp);
01278 break;
01279 case PAIR(0, INF):
01280 s = newstate(v->nfa);
01281 NOERR();
01282 moveouts(v->nfa, lp, s);
01283 moveins(v->nfa, rp, s);
01284 EMPTYARC(lp, s);
01285 EMPTYARC(s, rp);
01286 break;
01287 case PAIR(1, 1):
01288 break;
01289 case PAIR(1, SOME):
01290 s = newstate(v->nfa);
01291 NOERR();
01292 moveouts(v->nfa, lp, s);
01293 dupnfa(v->nfa, s, rp, lp, s);
01294 NOERR();
01295 repeat(v, lp, s, 1, n - 1);
01296 NOERR();
01297 EMPTYARC(lp, s);
01298 break;
01299 case PAIR(1, INF):
01300 s = newstate(v->nfa);
01301 s2 = newstate(v->nfa);
01302 NOERR();
01303 moveouts(v->nfa, lp, s);
01304 moveins(v->nfa, rp, s2);
01305 EMPTYARC(lp, s);
01306 EMPTYARC(s2, rp);
01307 EMPTYARC(s2, s);
01308 break;
01309 case PAIR(SOME, SOME):
01310 s = newstate(v->nfa);
01311 NOERR();
01312 moveouts(v->nfa, lp, s);
01313 dupnfa(v->nfa, s, rp, lp, s);
01314 NOERR();
01315 repeat(v, lp, s, m - 1, n - 1);
01316 break;
01317 case PAIR(SOME, INF):
01318 s = newstate(v->nfa);
01319 NOERR();
01320 moveouts(v->nfa, lp, s);
01321 dupnfa(v->nfa, s, rp, lp, s);
01322 NOERR();
01323 repeat(v, lp, s, m - 1, n);
01324 break;
01325 default:
01326 ERR(REG_ASSERT);
01327 break;
01328 }
01329 }
01330
01331
01332
01333
01334
01335 static void
01336 bracket(struct vars * v,
01337 struct state * lp,
01338 struct state * rp)
01339 {
01340 assert(SEE('['));
01341 NEXT();
01342 while (!SEE(']') && !SEE(EOS))
01343 brackpart(v, lp, rp);
01344 assert(SEE(']') || ISERR());
01345 okcolors(v->nfa, v->cm);
01346 }
01347
01348
01349
01350
01351
01352
01353
01354 static void
01355 cbracket(struct vars * v,
01356 struct state * lp,
01357 struct state * rp)
01358 {
01359 struct state *left = newstate(v->nfa);
01360 struct state *right = newstate(v->nfa);
01361
01362 NOERR();
01363 bracket(v, left, right);
01364 if (v->cflags & REG_NLSTOP)
01365 newarc(v->nfa, PLAIN, v->nlcolor, left, right);
01366 NOERR();
01367
01368 assert(lp->nouts == 0);
01369
01370
01371
01372
01373
01374 colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
01375 NOERR();
01376 dropstate(v->nfa, left);
01377 assert(right->nins == 0);
01378 freestate(v->nfa, right);
01379 }
01380
01381
01382
01383
01384 static void
01385 brackpart(struct vars * v,
01386 struct state * lp,
01387 struct state * rp)
01388 {
01389 celt startc;
01390 celt endc;
01391 struct cvec *cv;
01392 const chr *startp;
01393 const chr *endp;
01394 chr c[1];
01395
01396
01397 switch (v->nexttype)
01398 {
01399 case RANGE:
01400 ERR(REG_ERANGE);
01401 return;
01402 break;
01403 case PLAIN:
01404 c[0] = v->nextvalue;
01405 NEXT();
01406
01407 if (!SEE(RANGE))
01408 {
01409 onechr(v, c[0], lp, rp);
01410 return;
01411 }
01412 startc = element(v, c, c + 1);
01413 NOERR();
01414 break;
01415 case COLLEL:
01416 startp = v->now;
01417 endp = scanplain(v);
01418 INSIST(startp < endp, REG_ECOLLATE);
01419 NOERR();
01420 startc = element(v, startp, endp);
01421 NOERR();
01422 break;
01423 case ECLASS:
01424 startp = v->now;
01425 endp = scanplain(v);
01426 INSIST(startp < endp, REG_ECOLLATE);
01427 NOERR();
01428 startc = element(v, startp, endp);
01429 NOERR();
01430 cv = eclass(v, startc, (v->cflags & REG_ICASE));
01431 NOERR();
01432 dovec(v, cv, lp, rp);
01433 return;
01434 break;
01435 case CCLASS:
01436 startp = v->now;
01437 endp = scanplain(v);
01438 INSIST(startp < endp, REG_ECTYPE);
01439 NOERR();
01440 cv = cclass(v, startp, endp, (v->cflags & REG_ICASE));
01441 NOERR();
01442 dovec(v, cv, lp, rp);
01443 return;
01444 break;
01445 default:
01446 ERR(REG_ASSERT);
01447 return;
01448 break;
01449 }
01450
01451 if (SEE(RANGE))
01452 {
01453 NEXT();
01454 switch (v->nexttype)
01455 {
01456 case PLAIN:
01457 case RANGE:
01458 c[0] = v->nextvalue;
01459 NEXT();
01460 endc = element(v, c, c + 1);
01461 NOERR();
01462 break;
01463 case COLLEL:
01464 startp = v->now;
01465 endp = scanplain(v);
01466 INSIST(startp < endp, REG_ECOLLATE);
01467 NOERR();
01468 endc = element(v, startp, endp);
01469 NOERR();
01470 break;
01471 default:
01472 ERR(REG_ERANGE);
01473 return;
01474 break;
01475 }
01476 }
01477 else
01478 endc = startc;
01479
01480
01481
01482
01483
01484 if (startc != endc)
01485 NOTE(REG_UUNPORT);
01486 cv = range(v, startc, endc, (v->cflags & REG_ICASE));
01487 NOERR();
01488 dovec(v, cv, lp, rp);
01489 }
01490
01491
01492
01493
01494
01495
01496
01497 static const chr *
01498 scanplain(struct vars * v)
01499 {
01500 const chr *endp;
01501
01502 assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS));
01503 NEXT();
01504
01505 endp = v->now;
01506 while (SEE(PLAIN))
01507 {
01508 endp = v->now;
01509 NEXT();
01510 }
01511
01512 assert(SEE(END) || ISERR());
01513 NEXT();
01514
01515 return endp;
01516 }
01517
01518
01519
01520
01521
01522 static void
01523 onechr(struct vars * v,
01524 chr c,
01525 struct state * lp,
01526 struct state * rp)
01527 {
01528 if (!(v->cflags & REG_ICASE))
01529 {
01530 newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp);
01531 return;
01532 }
01533
01534
01535 dovec(v, allcases(v, c), lp, rp);
01536 }
01537
01538
01539
01540
01541 static void
01542 dovec(struct vars * v,
01543 struct cvec * cv,
01544 struct state * lp,
01545 struct state * rp)
01546 {
01547 chr ch,
01548 from,
01549 to;
01550 const chr *p;
01551 int i;
01552
01553
01554 for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
01555 {
01556 ch = *p;
01557 newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
01558 }
01559
01560
01561 for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--)
01562 {
01563 from = *p;
01564 to = *(p + 1);
01565 if (from <= to)
01566 subrange(v, from, to, lp, rp);
01567 }
01568 }
01569
01570
01571
01572
01573
01574
01575
01576
01577
01578
01579 static void
01580 wordchrs(struct vars * v)
01581 {
01582 struct state *left;
01583 struct state *right;
01584
01585 if (v->wordchrs != NULL)
01586 {
01587 NEXT();
01588 return;
01589 }
01590
01591 left = newstate(v->nfa);
01592 right = newstate(v->nfa);
01593 NOERR();
01594
01595 lexword(v);
01596 NEXT();
01597 assert(v->savenow != NULL && SEE('['));
01598 bracket(v, left, right);
01599 assert((v->savenow != NULL && SEE(']')) || ISERR());
01600 NEXT();
01601 NOERR();
01602 v->wordchrs = left;
01603 }
01604
01605
01606
01607
01608 static struct subre *
01609 subre(struct vars * v,
01610 int op,
01611 int flags,
01612 struct state * begin,
01613 struct state * end)
01614 {
01615 struct subre *ret = v->treefree;
01616
01617 if (ret != NULL)
01618 v->treefree = ret->left;
01619 else
01620 {
01621 ret = (struct subre *) MALLOC(sizeof(struct subre));
01622 if (ret == NULL)
01623 {
01624 ERR(REG_ESPACE);
01625 return NULL;
01626 }
01627 ret->chain = v->treechain;
01628 v->treechain = ret;
01629 }
01630
01631 assert(strchr("=b|.*(", op) != NULL);
01632
01633 ret->op = op;
01634 ret->flags = flags;
01635 ret->id = 0;
01636 ret->subno = 0;
01637 ret->min = ret->max = 1;
01638 ret->left = NULL;
01639 ret->right = NULL;
01640 ret->begin = begin;
01641 ret->end = end;
01642 ZAPCNFA(ret->cnfa);
01643
01644 return ret;
01645 }
01646
01647
01648
01649
01650 static void
01651 freesubre(struct vars * v,
01652 struct subre * sr)
01653 {
01654 if (sr == NULL)
01655 return;
01656
01657 if (sr->left != NULL)
01658 freesubre(v, sr->left);
01659 if (sr->right != NULL)
01660 freesubre(v, sr->right);
01661
01662 freesrnode(v, sr);
01663 }
01664
01665
01666
01667
01668 static void
01669 freesrnode(struct vars * v,
01670 struct subre * sr)
01671 {
01672 if (sr == NULL)
01673 return;
01674
01675 if (!NULLCNFA(sr->cnfa))
01676 freecnfa(&sr->cnfa);
01677 sr->flags = 0;
01678
01679 if (v != NULL)
01680 {
01681 sr->left = v->treefree;
01682 v->treefree = sr;
01683 }
01684 else
01685 FREE(sr);
01686 }
01687
01688
01689
01690
01691 static void
01692 optst(struct vars * v,
01693 struct subre * t)
01694 {
01695
01696
01697
01698
01699
01700
01701 return;
01702 }
01703
01704
01705
01706
01707 static int
01708 numst(struct subre * t,
01709 int start)
01710 {
01711 int i;
01712
01713 assert(t != NULL);
01714
01715 i = start;
01716 t->id = (short) i++;
01717 if (t->left != NULL)
01718 i = numst(t->left, i);
01719 if (t->right != NULL)
01720 i = numst(t->right, i);
01721 return i;
01722 }
01723
01724
01725
01726
01727 static void
01728 markst(struct subre * t)
01729 {
01730 assert(t != NULL);
01731
01732 t->flags |= INUSE;
01733 if (t->left != NULL)
01734 markst(t->left);
01735 if (t->right != NULL)
01736 markst(t->right);
01737 }
01738
01739
01740
01741
01742 static void
01743 cleanst(struct vars * v)
01744 {
01745 struct subre *t;
01746 struct subre *next;
01747
01748 for (t = v->treechain; t != NULL; t = next)
01749 {
01750 next = t->chain;
01751 if (!(t->flags & INUSE))
01752 FREE(t);
01753 }
01754 v->treechain = NULL;
01755 v->treefree = NULL;
01756 }
01757
01758
01759
01760
01761 static long
01762 nfatree(struct vars * v,
01763 struct subre * t,
01764 FILE *f)
01765 {
01766 assert(t != NULL && t->begin != NULL);
01767
01768 if (t->left != NULL)
01769 (DISCARD) nfatree(v, t->left, f);
01770 if (t->right != NULL)
01771 (DISCARD) nfatree(v, t->right, f);
01772
01773 return nfanode(v, t, f);
01774 }
01775
01776
01777
01778
01779 static long
01780 nfanode(struct vars * v,
01781 struct subre * t,
01782 FILE *f)
01783 {
01784 struct nfa *nfa;
01785 long ret = 0;
01786
01787 assert(t->begin != NULL);
01788
01789 #ifdef REG_DEBUG
01790 if (f != NULL)
01791 {
01792 char idbuf[50];
01793
01794 fprintf(f, "\n\n\n========= TREE NODE %s ==========\n",
01795 stid(t, idbuf, sizeof(idbuf)));
01796 }
01797 #endif
01798 nfa = newnfa(v, v->cm, v->nfa);
01799 NOERRZ();
01800 dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final);
01801 if (!ISERR())
01802 {
01803 specialcolors(nfa);
01804 ret = optimize(nfa, f);
01805 }
01806 if (!ISERR())
01807 compact(nfa, &t->cnfa);
01808
01809 freenfa(nfa);
01810 return ret;
01811 }
01812
01813
01814
01815
01816 static int
01817 newlacon(struct vars * v,
01818 struct state * begin,
01819 struct state * end,
01820 int pos)
01821 {
01822 int n;
01823 struct subre *sub;
01824
01825 if (v->nlacons == 0)
01826 {
01827 v->lacons = (struct subre *) MALLOC(2 * sizeof(struct subre));
01828 n = 1;
01829 v->nlacons = 2;
01830 }
01831 else
01832 {
01833 v->lacons = (struct subre *) REALLOC(v->lacons,
01834 (v->nlacons + 1) * sizeof(struct subre));
01835 n = v->nlacons++;
01836 }
01837 if (v->lacons == NULL)
01838 {
01839 ERR(REG_ESPACE);
01840 return 0;
01841 }
01842 sub = &v->lacons[n];
01843 sub->begin = begin;
01844 sub->end = end;
01845 sub->subno = pos;
01846 ZAPCNFA(sub->cnfa);
01847 return n;
01848 }
01849
01850
01851
01852
01853 static void
01854 freelacons(struct subre * subs,
01855 int n)
01856 {
01857 struct subre *sub;
01858 int i;
01859
01860 assert(n > 0);
01861 for (sub = subs + 1, i = n - 1; i > 0; sub++, i--)
01862 if (!NULLCNFA(sub->cnfa))
01863 freecnfa(&sub->cnfa);
01864 FREE(subs);
01865 }
01866
01867
01868
01869
01870 static void
01871 rfree(regex_t *re)
01872 {
01873 struct guts *g;
01874
01875 if (re == NULL || re->re_magic != REMAGIC)
01876 return;
01877
01878 re->re_magic = 0;
01879 g = (struct guts *) re->re_guts;
01880 re->re_guts = NULL;
01881 re->re_fns = NULL;
01882 if (g != NULL)
01883 {
01884 g->magic = 0;
01885 freecm(&g->cmap);
01886 if (g->tree != NULL)
01887 freesubre((struct vars *) NULL, g->tree);
01888 if (g->lacons != NULL)
01889 freelacons(g->lacons, g->nlacons);
01890 if (!NULLCNFA(g->search))
01891 freecnfa(&g->search);
01892 FREE(g);
01893 }
01894 }
01895
01896 #ifdef REG_DEBUG
01897
01898
01899
01900
01901 static void
01902 dump(regex_t *re,
01903 FILE *f)
01904 {
01905 struct guts *g;
01906 int i;
01907
01908 if (re->re_magic != REMAGIC)
01909 fprintf(f, "bad magic number (0x%x not 0x%x)\n", re->re_magic,
01910 REMAGIC);
01911 if (re->re_guts == NULL)
01912 {
01913 fprintf(f, "NULL guts!!!\n");
01914 return;
01915 }
01916 g = (struct guts *) re->re_guts;
01917 if (g->magic != GUTSMAGIC)
01918 fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", g->magic,
01919 GUTSMAGIC);
01920
01921 fprintf(f, "\n\n\n========= DUMP ==========\n");
01922 fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n",
01923 (int) re->re_nsub, re->re_info, re->re_csize, g->ntree);
01924
01925 dumpcolors(&g->cmap, f);
01926 if (!NULLCNFA(g->search))
01927 {
01928 printf("\nsearch:\n");
01929 dumpcnfa(&g->search, f);
01930 }
01931 for (i = 1; i < g->nlacons; i++)
01932 {
01933 fprintf(f, "\nla%d (%s):\n", i,
01934 (g->lacons[i].subno) ? "positive" : "negative");
01935 dumpcnfa(&g->lacons[i].cnfa, f);
01936 }
01937 fprintf(f, "\n");
01938 dumpst(g->tree, f, 0);
01939 }
01940
01941
01942
01943
01944 static void
01945 dumpst(struct subre * t,
01946 FILE *f,
01947 int nfapresent)
01948 {
01949 if (t == NULL)
01950 fprintf(f, "null tree\n");
01951 else
01952 stdump(t, f, nfapresent);
01953 fflush(f);
01954 }
01955
01956
01957
01958
01959 static void
01960 stdump(struct subre * t,
01961 FILE *f,
01962 int nfapresent)
01963 {
01964 char idbuf[50];
01965
01966 fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op);
01967 if (t->flags & LONGER)
01968 fprintf(f, " longest");
01969 if (t->flags & SHORTER)
01970 fprintf(f, " shortest");
01971 if (t->flags & MIXED)
01972 fprintf(f, " hasmixed");
01973 if (t->flags & CAP)
01974 fprintf(f, " hascapture");
01975 if (t->flags & BACKR)
01976 fprintf(f, " hasbackref");
01977 if (!(t->flags & INUSE))
01978 fprintf(f, " UNUSED");
01979 if (t->subno != 0)
01980 fprintf(f, " (#%d)", t->subno);
01981 if (t->min != 1 || t->max != 1)
01982 {
01983 fprintf(f, " {%d,", t->min);
01984 if (t->max != INFINITY)
01985 fprintf(f, "%d", t->max);
01986 fprintf(f, "}");
01987 }
01988 if (nfapresent)
01989 fprintf(f, " %ld-%ld", (long) t->begin->no, (long) t->end->no);
01990 if (t->left != NULL)
01991 fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf)));
01992 if (t->right != NULL)
01993 fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf)));
01994 if (!NULLCNFA(t->cnfa))
01995 {
01996 fprintf(f, "\n");
01997 dumpcnfa(&t->cnfa, f);
01998 }
01999 fprintf(f, "\n");
02000 if (t->left != NULL)
02001 stdump(t->left, f, nfapresent);
02002 if (t->right != NULL)
02003 stdump(t->right, f, nfapresent);
02004 }
02005
02006
02007
02008
02009 static const char *
02010 stid(struct subre * t,
02011 char *buf,
02012 size_t bufsize)
02013 {
02014
02015 if (bufsize < sizeof(void *) * 2 + 3 || bufsize < sizeof(t->id) * 3 + 1)
02016 return "unable";
02017 if (t->id != 0)
02018 sprintf(buf, "%d", t->id);
02019 else
02020 sprintf(buf, "%p", t);
02021 return buf;
02022 }
02023 #endif
02024
02025
02026 #include "regc_lex.c"
02027 #include "regc_color.c"
02028 #include "regc_nfa.c"
02029 #include "regc_cvec.c"
02030 #include "regc_pg_locale.c"
02031 #include "regc_locale.c"