00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #include "regex/regguts.h"
00035
00036
00037
00038
00039 struct arcp
00040 {
00041 struct sset *ss;
00042 color co;
00043 };
00044
00045 struct sset
00046 {
00047 unsigned *states;
00048 unsigned hash;
00049 #define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
00050 #define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
00051 memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
00052 int flags;
00053 #define STARTER 01
00054 #define POSTSTATE 02
00055 #define LOCKED 04
00056 #define NOPROGRESS 010
00057 struct arcp ins;
00058 chr *lastseen;
00059 struct sset **outs;
00060 struct arcp *inchain;
00061 };
00062
00063 struct dfa
00064 {
00065 int nssets;
00066 int nssused;
00067 int nstates;
00068 int ncolors;
00069 int wordsper;
00070 struct sset *ssets;
00071 unsigned *statesarea;
00072 unsigned *work;
00073 struct sset **outsarea;
00074 struct arcp *incarea;
00075 struct cnfa *cnfa;
00076 struct colormap *cm;
00077 chr *lastpost;
00078 chr *lastnopr;
00079 struct sset *search;
00080 int cptsmalloced;
00081 char *mallocarea;
00082 };
00083
00084 #define WORK 1
00085
00086
00087 #define FEWSTATES 20
00088 #define FEWCOLORS 15
00089 struct smalldfa
00090 {
00091 struct dfa dfa;
00092 struct sset ssets[FEWSTATES * 2];
00093 unsigned statesarea[FEWSTATES * 2 + WORK];
00094 struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS];
00095 struct arcp incarea[FEWSTATES * 2 * FEWCOLORS];
00096 };
00097
00098 #define DOMALLOC ((struct smalldfa *)NULL)
00099
00100
00101
00102
00103 struct vars
00104 {
00105 regex_t *re;
00106 struct guts *g;
00107 int eflags;
00108 size_t nmatch;
00109 regmatch_t *pmatch;
00110 rm_detail_t *details;
00111 chr *start;
00112 chr *search_start;
00113 chr *stop;
00114 int err;
00115 struct dfa **subdfas;
00116 struct smalldfa dfa1;
00117 struct smalldfa dfa2;
00118 };
00119
00120 #define VISERR(vv) ((vv)->err != 0)
00121 #define ISERR() VISERR(v)
00122 #define VERR(vv,e) ((vv)->err = ((vv)->err ? (vv)->err : (e)))
00123 #define ERR(e) VERR(v, e)
00124 #define NOERR() {if (ISERR()) return v->err;}
00125 #define OFF(p) ((p) - v->start)
00126 #define LOFF(p) ((long)OFF(p))
00127
00128
00129
00130
00131
00132
00133
00134 static struct dfa *getsubdfa(struct vars *, struct subre *);
00135 static int find(struct vars *, struct cnfa *, struct colormap *);
00136 static int cfind(struct vars *, struct cnfa *, struct colormap *);
00137 static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **);
00138 static void zapallsubs(regmatch_t *, size_t);
00139 static void zaptreesubs(struct vars *, struct subre *);
00140 static void subset(struct vars *, struct subre *, chr *, chr *);
00141 static int cdissect(struct vars *, struct subre *, chr *, chr *);
00142 static int ccondissect(struct vars *, struct subre *, chr *, chr *);
00143 static int crevcondissect(struct vars *, struct subre *, chr *, chr *);
00144 static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
00145 static int caltdissect(struct vars *, struct subre *, chr *, chr *);
00146 static int citerdissect(struct vars *, struct subre *, chr *, chr *);
00147 static int creviterdissect(struct vars *, struct subre *, chr *, chr *);
00148
00149
00150 static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
00151 static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *);
00152 static chr *lastcold(struct vars *, struct dfa *);
00153 static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *);
00154 static void freedfa(struct dfa *);
00155 static unsigned hash(unsigned *, int);
00156 static struct sset *initialize(struct vars *, struct dfa *, chr *);
00157 static struct sset *miss(struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *);
00158 static int lacon(struct vars *, struct cnfa *, chr *, pcolor);
00159 static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
00160 static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
00161
00162
00163
00164
00165
00166 int
00167 pg_regexec(regex_t *re,
00168 const chr *string,
00169 size_t len,
00170 size_t search_start,
00171 rm_detail_t *details,
00172 size_t nmatch,
00173 regmatch_t pmatch[],
00174 int flags)
00175 {
00176 struct vars var;
00177 register struct vars *v = &var;
00178 int st;
00179 size_t n;
00180 size_t i;
00181 int backref;
00182
00183 #define LOCALMAT 20
00184 regmatch_t mat[LOCALMAT];
00185
00186 #define LOCALDFAS 40
00187 struct dfa *subdfas[LOCALDFAS];
00188
00189
00190 if (re == NULL || string == NULL || re->re_magic != REMAGIC)
00191 return REG_INVARG;
00192 if (re->re_csize != sizeof(chr))
00193 return REG_MIXED;
00194
00195
00196 pg_set_regex_collation(re->re_collation);
00197
00198
00199 v->re = re;
00200 v->g = (struct guts *) re->re_guts;
00201 if ((v->g->cflags & REG_EXPECT) && details == NULL)
00202 return REG_INVARG;
00203 if (v->g->info & REG_UIMPOSSIBLE)
00204 return REG_NOMATCH;
00205 backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
00206 v->eflags = flags;
00207 if (v->g->cflags & REG_NOSUB)
00208 nmatch = 0;
00209 v->nmatch = nmatch;
00210 if (backref)
00211 {
00212
00213 if (v->g->nsub + 1 <= LOCALMAT)
00214 v->pmatch = mat;
00215 else
00216 v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
00217 sizeof(regmatch_t));
00218 if (v->pmatch == NULL)
00219 return REG_ESPACE;
00220 v->nmatch = v->g->nsub + 1;
00221 }
00222 else
00223 v->pmatch = pmatch;
00224 v->details = details;
00225 v->start = (chr *) string;
00226 v->search_start = (chr *) string + search_start;
00227 v->stop = (chr *) string + len;
00228 v->err = 0;
00229 assert(v->g->ntree >= 0);
00230 n = (size_t) v->g->ntree;
00231 if (n <= LOCALDFAS)
00232 v->subdfas = subdfas;
00233 else
00234 v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
00235 if (v->subdfas == NULL)
00236 {
00237 if (v->pmatch != pmatch && v->pmatch != mat)
00238 FREE(v->pmatch);
00239 return REG_ESPACE;
00240 }
00241 for (i = 0; i < n; i++)
00242 v->subdfas[i] = NULL;
00243
00244
00245 assert(v->g->tree != NULL);
00246 if (backref)
00247 st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
00248 else
00249 st = find(v, &v->g->tree->cnfa, &v->g->cmap);
00250
00251
00252 if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0)
00253 {
00254 zapallsubs(pmatch, nmatch);
00255 n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
00256 memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t));
00257 }
00258
00259
00260 if (v->pmatch != pmatch && v->pmatch != mat)
00261 FREE(v->pmatch);
00262 for (i = 0; i < n; i++)
00263 {
00264 if (v->subdfas[i] != NULL)
00265 freedfa(v->subdfas[i]);
00266 }
00267 if (v->subdfas != subdfas)
00268 FREE(v->subdfas);
00269
00270 return st;
00271 }
00272
00273
00274
00275
00276
00277
00278
00279 static struct dfa *
00280 getsubdfa(struct vars * v,
00281 struct subre * t)
00282 {
00283 if (v->subdfas[t->id] == NULL)
00284 {
00285 v->subdfas[t->id] = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
00286 if (ISERR())
00287 return NULL;
00288 }
00289 return v->subdfas[t->id];
00290 }
00291
00292
00293
00294
00295 static int
00296 find(struct vars * v,
00297 struct cnfa * cnfa,
00298 struct colormap * cm)
00299 {
00300 struct dfa *s;
00301 struct dfa *d;
00302 chr *begin;
00303 chr *end = NULL;
00304 chr *cold;
00305 chr *open;
00306 chr *close;
00307 int hitend;
00308 int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
00309
00310
00311 s = newdfa(v, &v->g->search, cm, &v->dfa1);
00312 assert(!(ISERR() && s != NULL));
00313 NOERR();
00314 MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
00315 cold = NULL;
00316 close = shortest(v, s, v->search_start, v->search_start, v->stop,
00317 &cold, (int *) NULL);
00318 freedfa(s);
00319 NOERR();
00320 if (v->g->cflags & REG_EXPECT)
00321 {
00322 assert(v->details != NULL);
00323 if (cold != NULL)
00324 v->details->rm_extend.rm_so = OFF(cold);
00325 else
00326 v->details->rm_extend.rm_so = OFF(v->stop);
00327 v->details->rm_extend.rm_eo = OFF(v->stop);
00328 }
00329 if (close == NULL)
00330 return REG_NOMATCH;
00331 if (v->nmatch == 0)
00332 return REG_OKAY;
00333
00334
00335 assert(cold != NULL);
00336 open = cold;
00337 cold = NULL;
00338 MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
00339 d = newdfa(v, cnfa, cm, &v->dfa1);
00340 assert(!(ISERR() && d != NULL));
00341 NOERR();
00342 for (begin = open; begin <= close; begin++)
00343 {
00344 MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
00345 if (shorter)
00346 end = shortest(v, d, begin, begin, v->stop,
00347 (chr **) NULL, &hitend);
00348 else
00349 end = longest(v, d, begin, v->stop, &hitend);
00350 NOERR();
00351 if (hitend && cold == NULL)
00352 cold = begin;
00353 if (end != NULL)
00354 break;
00355 }
00356 assert(end != NULL);
00357 freedfa(d);
00358
00359
00360 assert(v->nmatch > 0);
00361 v->pmatch[0].rm_so = OFF(begin);
00362 v->pmatch[0].rm_eo = OFF(end);
00363 if (v->g->cflags & REG_EXPECT)
00364 {
00365 if (cold != NULL)
00366 v->details->rm_extend.rm_so = OFF(cold);
00367 else
00368 v->details->rm_extend.rm_so = OFF(v->stop);
00369 v->details->rm_extend.rm_eo = OFF(v->stop);
00370 }
00371 if (v->nmatch == 1)
00372 return REG_OKAY;
00373
00374
00375 zapallsubs(v->pmatch, v->nmatch);
00376 return cdissect(v, v->g->tree, begin, end);
00377 }
00378
00379
00380
00381
00382 static int
00383 cfind(struct vars * v,
00384 struct cnfa * cnfa,
00385 struct colormap * cm)
00386 {
00387 struct dfa *s;
00388 struct dfa *d;
00389 chr *cold;
00390 int ret;
00391
00392 s = newdfa(v, &v->g->search, cm, &v->dfa1);
00393 NOERR();
00394 d = newdfa(v, cnfa, cm, &v->dfa2);
00395 if (ISERR())
00396 {
00397 assert(d == NULL);
00398 freedfa(s);
00399 return v->err;
00400 }
00401
00402 ret = cfindloop(v, cnfa, cm, d, s, &cold);
00403
00404 freedfa(d);
00405 freedfa(s);
00406 NOERR();
00407 if (v->g->cflags & REG_EXPECT)
00408 {
00409 assert(v->details != NULL);
00410 if (cold != NULL)
00411 v->details->rm_extend.rm_so = OFF(cold);
00412 else
00413 v->details->rm_extend.rm_so = OFF(v->stop);
00414 v->details->rm_extend.rm_eo = OFF(v->stop);
00415 }
00416 return ret;
00417 }
00418
00419
00420
00421
00422 static int
00423 cfindloop(struct vars * v,
00424 struct cnfa * cnfa,
00425 struct colormap * cm,
00426 struct dfa * d,
00427 struct dfa * s,
00428 chr **coldp)
00429 {
00430 chr *begin;
00431 chr *end;
00432 chr *cold;
00433 chr *open;
00434 chr *close;
00435 chr *estart;
00436 chr *estop;
00437 int er;
00438 int shorter = v->g->tree->flags & SHORTER;
00439 int hitend;
00440
00441 assert(d != NULL && s != NULL);
00442 cold = NULL;
00443 close = v->search_start;
00444 do
00445 {
00446 MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
00447 close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
00448 if (close == NULL)
00449 break;
00450 assert(cold != NULL);
00451 open = cold;
00452 cold = NULL;
00453 MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
00454 for (begin = open; begin <= close; begin++)
00455 {
00456 MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
00457 estart = begin;
00458 estop = v->stop;
00459 for (;;)
00460 {
00461 if (shorter)
00462 end = shortest(v, d, begin, estart,
00463 estop, (chr **) NULL, &hitend);
00464 else
00465 end = longest(v, d, begin, estop,
00466 &hitend);
00467 if (hitend && cold == NULL)
00468 cold = begin;
00469 if (end == NULL)
00470 break;
00471 MDEBUG(("tentative end %ld\n", LOFF(end)));
00472 zapallsubs(v->pmatch, v->nmatch);
00473 er = cdissect(v, v->g->tree, begin, end);
00474 if (er == REG_OKAY)
00475 {
00476 if (v->nmatch > 0)
00477 {
00478 v->pmatch[0].rm_so = OFF(begin);
00479 v->pmatch[0].rm_eo = OFF(end);
00480 }
00481 *coldp = cold;
00482 return REG_OKAY;
00483 }
00484 if (er != REG_NOMATCH)
00485 {
00486 ERR(er);
00487 *coldp = cold;
00488 return er;
00489 }
00490 if ((shorter) ? end == estop : end == begin)
00491 {
00492
00493 *coldp = cold;
00494 return REG_NOMATCH;
00495 }
00496
00497 if (shorter)
00498 estart = end + 1;
00499 else
00500 estop = end - 1;
00501 }
00502 }
00503 } while (close < v->stop);
00504
00505 *coldp = cold;
00506 return REG_NOMATCH;
00507 }
00508
00509
00510
00511
00512 static void
00513 zapallsubs(regmatch_t *p,
00514 size_t n)
00515 {
00516 size_t i;
00517
00518 for (i = n - 1; i > 0; i--)
00519 {
00520 p[i].rm_so = -1;
00521 p[i].rm_eo = -1;
00522 }
00523 }
00524
00525
00526
00527
00528 static void
00529 zaptreesubs(struct vars * v,
00530 struct subre * t)
00531 {
00532 if (t->op == '(')
00533 {
00534 int n = t->subno;
00535
00536 assert(n > 0);
00537 if ((size_t) n < v->nmatch)
00538 {
00539 v->pmatch[n].rm_so = -1;
00540 v->pmatch[n].rm_eo = -1;
00541 }
00542 }
00543
00544 if (t->left != NULL)
00545 zaptreesubs(v, t->left);
00546 if (t->right != NULL)
00547 zaptreesubs(v, t->right);
00548 }
00549
00550
00551
00552
00553 static void
00554 subset(struct vars * v,
00555 struct subre * sub,
00556 chr *begin,
00557 chr *end)
00558 {
00559 int n = sub->subno;
00560
00561 assert(n > 0);
00562 if ((size_t) n >= v->nmatch)
00563 return;
00564
00565 MDEBUG(("setting %d\n", n));
00566 v->pmatch[n].rm_so = OFF(begin);
00567 v->pmatch[n].rm_eo = OFF(end);
00568 }
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00581
00582
00583
00584
00585 static int
00586 cdissect(struct vars * v,
00587 struct subre * t,
00588 chr *begin,
00589 chr *end)
00590 {
00591 int er;
00592
00593 assert(t != NULL);
00594 MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
00595
00596 switch (t->op)
00597 {
00598 case '=':
00599 assert(t->left == NULL && t->right == NULL);
00600 er = REG_OKAY;
00601 break;
00602 case 'b':
00603 assert(t->left == NULL && t->right == NULL);
00604 er = cbrdissect(v, t, begin, end);
00605 break;
00606 case '.':
00607 assert(t->left != NULL && t->right != NULL);
00608 if (t->left->flags & SHORTER)
00609 er = crevcondissect(v, t, begin, end);
00610 else
00611 er = ccondissect(v, t, begin, end);
00612 break;
00613 case '|':
00614 assert(t->left != NULL);
00615 er = caltdissect(v, t, begin, end);
00616 break;
00617 case '*':
00618 assert(t->left != NULL);
00619 if (t->left->flags & SHORTER)
00620 er = creviterdissect(v, t, begin, end);
00621 else
00622 er = citerdissect(v, t, begin, end);
00623 break;
00624 case '(':
00625 assert(t->left != NULL && t->right == NULL);
00626 assert(t->subno > 0);
00627 er = cdissect(v, t->left, begin, end);
00628 if (er == REG_OKAY)
00629 subset(v, t, begin, end);
00630 break;
00631 default:
00632 er = REG_ASSERT;
00633 break;
00634 }
00635
00636
00637
00638
00639
00640
00641 assert(er != REG_NOMATCH || (t->flags & BACKR));
00642
00643 return er;
00644 }
00645
00646
00647
00648
00649 static int
00650 ccondissect(struct vars * v,
00651 struct subre * t,
00652 chr *begin,
00653 chr *end)
00654 {
00655 struct dfa *d;
00656 struct dfa *d2;
00657 chr *mid;
00658 int er;
00659
00660 assert(t->op == '.');
00661 assert(t->left != NULL && t->left->cnfa.nstates > 0);
00662 assert(t->right != NULL && t->right->cnfa.nstates > 0);
00663 assert(!(t->left->flags & SHORTER));
00664
00665 d = getsubdfa(v, t->left);
00666 NOERR();
00667 d2 = getsubdfa(v, t->right);
00668 NOERR();
00669 MDEBUG(("cconcat %d\n", t->id));
00670
00671
00672 mid = longest(v, d, begin, end, (int *) NULL);
00673 if (mid == NULL)
00674 return REG_NOMATCH;
00675 MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
00676
00677
00678 for (;;)
00679 {
00680
00681 if (longest(v, d2, mid, end, (int *) NULL) == end)
00682 {
00683 er = cdissect(v, t->left, begin, mid);
00684 if (er == REG_OKAY)
00685 {
00686 er = cdissect(v, t->right, mid, end);
00687 if (er == REG_OKAY)
00688 {
00689
00690 MDEBUG(("successful\n"));
00691 return REG_OKAY;
00692 }
00693 }
00694 if (er != REG_NOMATCH)
00695 return er;
00696 }
00697
00698
00699 if (mid == begin)
00700 {
00701
00702 MDEBUG(("%d no midpoint\n", t->id));
00703 return REG_NOMATCH;
00704 }
00705 mid = longest(v, d, begin, mid - 1, (int *) NULL);
00706 if (mid == NULL)
00707 {
00708
00709 MDEBUG(("%d failed midpoint\n", t->id));
00710 return REG_NOMATCH;
00711 }
00712 MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
00713 zaptreesubs(v, t->left);
00714 zaptreesubs(v, t->right);
00715 }
00716
00717
00718 return REG_ASSERT;
00719 }
00720
00721
00722
00723
00724 static int
00725 crevcondissect(struct vars * v,
00726 struct subre * t,
00727 chr *begin,
00728 chr *end)
00729 {
00730 struct dfa *d;
00731 struct dfa *d2;
00732 chr *mid;
00733 int er;
00734
00735 assert(t->op == '.');
00736 assert(t->left != NULL && t->left->cnfa.nstates > 0);
00737 assert(t->right != NULL && t->right->cnfa.nstates > 0);
00738 assert(t->left->flags & SHORTER);
00739
00740 d = getsubdfa(v, t->left);
00741 NOERR();
00742 d2 = getsubdfa(v, t->right);
00743 NOERR();
00744 MDEBUG(("crevcon %d\n", t->id));
00745
00746
00747 mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
00748 if (mid == NULL)
00749 return REG_NOMATCH;
00750 MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
00751
00752
00753 for (;;)
00754 {
00755
00756 if (longest(v, d2, mid, end, (int *) NULL) == end)
00757 {
00758 er = cdissect(v, t->left, begin, mid);
00759 if (er == REG_OKAY)
00760 {
00761 er = cdissect(v, t->right, mid, end);
00762 if (er == REG_OKAY)
00763 {
00764
00765 MDEBUG(("successful\n"));
00766 return REG_OKAY;
00767 }
00768 }
00769 if (er != REG_NOMATCH)
00770 return er;
00771 }
00772
00773
00774 if (mid == end)
00775 {
00776
00777 MDEBUG(("%d no midpoint\n", t->id));
00778 return REG_NOMATCH;
00779 }
00780 mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
00781 if (mid == NULL)
00782 {
00783
00784 MDEBUG(("%d failed midpoint\n", t->id));
00785 return REG_NOMATCH;
00786 }
00787 MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
00788 zaptreesubs(v, t->left);
00789 zaptreesubs(v, t->right);
00790 }
00791
00792
00793 return REG_ASSERT;
00794 }
00795
00796
00797
00798
00799 static int
00800 cbrdissect(struct vars * v,
00801 struct subre * t,
00802 chr *begin,
00803 chr *end)
00804 {
00805 int n = t->subno;
00806 size_t numreps;
00807 size_t tlen;
00808 size_t brlen;
00809 chr *brstring;
00810 chr *p;
00811 int min = t->min;
00812 int max = t->max;
00813
00814 assert(t != NULL);
00815 assert(t->op == 'b');
00816 assert(n >= 0);
00817 assert((size_t) n < v->nmatch);
00818
00819 MDEBUG(("cbackref n%d %d{%d-%d}\n", t->id, n, min, max));
00820
00821
00822 if (v->pmatch[n].rm_so == -1)
00823 return REG_NOMATCH;
00824 brstring = v->start + v->pmatch[n].rm_so;
00825 brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
00826
00827
00828 if (brlen == 0)
00829 {
00830
00831
00832
00833
00834 if (begin == end && min <= max)
00835 {
00836 MDEBUG(("cbackref matched trivially\n"));
00837 return REG_OKAY;
00838 }
00839 return REG_NOMATCH;
00840 }
00841 if (begin == end)
00842 {
00843
00844 if (min == 0)
00845 {
00846 MDEBUG(("cbackref matched trivially\n"));
00847 return REG_OKAY;
00848 }
00849 return REG_NOMATCH;
00850 }
00851
00852
00853
00854
00855
00856 assert(end > begin);
00857 tlen = end - begin;
00858 if (tlen % brlen != 0)
00859 return REG_NOMATCH;
00860 numreps = tlen / brlen;
00861 if (numreps < min || (numreps > max && max != INFINITY))
00862 return REG_NOMATCH;
00863
00864
00865 p = begin;
00866 while (numreps-- > 0)
00867 {
00868 if ((*v->g->compare) (brstring, p, brlen) != 0)
00869 return REG_NOMATCH;
00870 p += brlen;
00871 }
00872
00873 MDEBUG(("cbackref matched\n"));
00874 return REG_OKAY;
00875 }
00876
00877
00878
00879
00880 static int
00881 caltdissect(struct vars * v,
00882 struct subre * t,
00883 chr *begin,
00884 chr *end)
00885 {
00886 struct dfa *d;
00887 int er;
00888
00889
00890 while (t != NULL)
00891 {
00892 assert(t->op == '|');
00893 assert(t->left != NULL && t->left->cnfa.nstates > 0);
00894
00895 MDEBUG(("calt n%d\n", t->id));
00896
00897 d = getsubdfa(v, t->left);
00898 NOERR();
00899 if (longest(v, d, begin, end, (int *) NULL) == end)
00900 {
00901 MDEBUG(("calt matched\n"));
00902 er = cdissect(v, t->left, begin, end);
00903 if (er != REG_NOMATCH)
00904 return er;
00905 }
00906
00907 t = t->right;
00908 }
00909
00910 return REG_NOMATCH;
00911 }
00912
00913
00914
00915
00916 static int
00917 citerdissect(struct vars * v,
00918 struct subre * t,
00919 chr *begin,
00920 chr *end)
00921 {
00922 struct dfa *d;
00923 chr **endpts;
00924 chr *limit;
00925 int min_matches;
00926 size_t max_matches;
00927 int nverified;
00928 int k;
00929 int i;
00930 int er;
00931
00932 assert(t->op == '*');
00933 assert(t->left != NULL && t->left->cnfa.nstates > 0);
00934 assert(!(t->left->flags & SHORTER));
00935 assert(begin <= end);
00936
00937
00938
00939
00940
00941
00942 min_matches = t->min;
00943 if (min_matches <= 0)
00944 {
00945 if (begin == end)
00946 return REG_OKAY;
00947 min_matches = 1;
00948 }
00949
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959 max_matches = end - begin;
00960 if (max_matches > t->max && t->max != INFINITY)
00961 max_matches = t->max;
00962 if (max_matches < min_matches)
00963 max_matches = min_matches;
00964 endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
00965 if (endpts == NULL)
00966 return REG_ESPACE;
00967 endpts[0] = begin;
00968
00969 d = getsubdfa(v, t->left);
00970 if (ISERR())
00971 {
00972 FREE(endpts);
00973 return v->err;
00974 }
00975 MDEBUG(("citer %d\n", t->id));
00976
00977
00978
00979
00980
00981
00982
00983
00984
00985
00986
00987
00988 nverified = 0;
00989 k = 1;
00990 limit = end;
00991
00992
00993 while (k > 0)
00994 {
00995
00996 endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
00997 if (endpts[k] == NULL)
00998 {
00999
01000 k--;
01001 goto backtrack;
01002 }
01003 MDEBUG(("%d: working endpoint %d: %ld\n",
01004 t->id, k, LOFF(endpts[k])));
01005
01006
01007 if (nverified >= k)
01008 nverified = k - 1;
01009
01010 if (endpts[k] != end)
01011 {
01012
01013 if (k >= max_matches)
01014 {
01015
01016 k--;
01017 goto backtrack;
01018 }
01019
01020
01021 if (endpts[k] == endpts[k - 1] &&
01022 (k >= min_matches || min_matches - k < end - endpts[k]))
01023 goto backtrack;
01024
01025 k++;
01026 limit = end;
01027 continue;
01028 }
01029
01030
01031
01032
01033
01034
01035
01036 if (k < min_matches)
01037 goto backtrack;
01038
01039 MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
01040
01041 for (i = nverified + 1; i <= k; i++)
01042 {
01043 zaptreesubs(v, t->left);
01044 er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
01045 if (er == REG_OKAY)
01046 {
01047 nverified = i;
01048 continue;
01049 }
01050 if (er == REG_NOMATCH)
01051 break;
01052
01053 FREE(endpts);
01054 return er;
01055 }
01056
01057 if (i > k)
01058 {
01059
01060 MDEBUG(("%d successful\n", t->id));
01061 FREE(endpts);
01062 return REG_OKAY;
01063 }
01064
01065
01066
01067 backtrack:
01068
01069
01070
01071
01072
01073 while (k > 0)
01074 {
01075 chr *prev_end = endpts[k - 1];
01076
01077 if (endpts[k] > prev_end)
01078 {
01079 limit = endpts[k] - 1;
01080 if (limit > prev_end ||
01081 (k < min_matches && min_matches - k >= end - prev_end))
01082 {
01083
01084 break;
01085 }
01086 }
01087
01088 k--;
01089 }
01090 }
01091
01092
01093 MDEBUG(("%d failed\n", t->id));
01094 FREE(endpts);
01095 return REG_NOMATCH;
01096 }
01097
01098
01099
01100
01101 static int
01102 creviterdissect(struct vars * v,
01103 struct subre * t,
01104 chr *begin,
01105 chr *end)
01106 {
01107 struct dfa *d;
01108 chr **endpts;
01109 chr *limit;
01110 int min_matches;
01111 size_t max_matches;
01112 int nverified;
01113 int k;
01114 int i;
01115 int er;
01116
01117 assert(t->op == '*');
01118 assert(t->left != NULL && t->left->cnfa.nstates > 0);
01119 assert(t->left->flags & SHORTER);
01120 assert(begin <= end);
01121
01122
01123
01124
01125
01126
01127 min_matches = t->min;
01128 if (min_matches <= 0)
01129 {
01130 if (begin == end)
01131 return REG_OKAY;
01132 min_matches = 1;
01133 }
01134
01135
01136
01137
01138
01139
01140
01141
01142
01143
01144 max_matches = end - begin;
01145 if (max_matches > t->max && t->max != INFINITY)
01146 max_matches = t->max;
01147 if (max_matches < min_matches)
01148 max_matches = min_matches;
01149 endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
01150 if (endpts == NULL)
01151 return REG_ESPACE;
01152 endpts[0] = begin;
01153
01154 d = getsubdfa(v, t->left);
01155 if (ISERR())
01156 {
01157 FREE(endpts);
01158 return v->err;
01159 }
01160 MDEBUG(("creviter %d\n", t->id));
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173 nverified = 0;
01174 k = 1;
01175 limit = begin;
01176
01177
01178 while (k > 0)
01179 {
01180
01181 if (limit == endpts[k - 1] &&
01182 limit != end &&
01183 (k >= min_matches || min_matches - k < end - limit))
01184 limit++;
01185
01186
01187 endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
01188 (chr **) NULL, (int *) NULL);
01189 if (endpts[k] == NULL)
01190 {
01191
01192 k--;
01193 goto backtrack;
01194 }
01195 MDEBUG(("%d: working endpoint %d: %ld\n",
01196 t->id, k, LOFF(endpts[k])));
01197
01198
01199 if (nverified >= k)
01200 nverified = k - 1;
01201
01202 if (endpts[k] != end)
01203 {
01204
01205 if (k >= max_matches)
01206 {
01207
01208 k--;
01209 goto backtrack;
01210 }
01211
01212 k++;
01213 limit = endpts[k - 1];
01214 continue;
01215 }
01216
01217
01218
01219
01220
01221
01222
01223 if (k < min_matches)
01224 goto backtrack;
01225
01226 MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
01227
01228 for (i = nverified + 1; i <= k; i++)
01229 {
01230 zaptreesubs(v, t->left);
01231 er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
01232 if (er == REG_OKAY)
01233 {
01234 nverified = i;
01235 continue;
01236 }
01237 if (er == REG_NOMATCH)
01238 break;
01239
01240 FREE(endpts);
01241 return er;
01242 }
01243
01244 if (i > k)
01245 {
01246
01247 MDEBUG(("%d successful\n", t->id));
01248 FREE(endpts);
01249 return REG_OKAY;
01250 }
01251
01252
01253
01254 backtrack:
01255
01256
01257
01258
01259 while (k > 0)
01260 {
01261 if (endpts[k] < end)
01262 {
01263 limit = endpts[k] + 1;
01264
01265 break;
01266 }
01267
01268 k--;
01269 }
01270 }
01271
01272
01273 MDEBUG(("%d failed\n", t->id));
01274 FREE(endpts);
01275 return REG_NOMATCH;
01276 }
01277
01278
01279
01280 #include "rege_dfa.c"