00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 static chr *
00039 longest(struct vars * v,
00040 struct dfa * d,
00041 chr *start,
00042 chr *stop,
00043 int *hitstopp)
00044 {
00045 chr *cp;
00046 chr *realstop = (stop == v->stop) ? stop : stop + 1;
00047 color co;
00048 struct sset *css;
00049 struct sset *ss;
00050 chr *post;
00051 int i;
00052 struct colormap *cm = d->cm;
00053
00054
00055 css = initialize(v, d, start);
00056 cp = start;
00057 if (hitstopp != NULL)
00058 *hitstopp = 0;
00059
00060
00061 FDEBUG(("+++ startup +++\n"));
00062 if (cp == v->start)
00063 {
00064 co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
00065 FDEBUG(("color %ld\n", (long) co));
00066 }
00067 else
00068 {
00069 co = GETCOLOR(cm, *(cp - 1));
00070 FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
00071 }
00072 css = miss(v, d, css, co, cp, start);
00073 if (css == NULL)
00074 return NULL;
00075 css->lastseen = cp;
00076
00077
00078 if (v->eflags & REG_FTRACE)
00079 while (cp < realstop)
00080 {
00081 FDEBUG(("+++ at c%d +++\n", (int) (css - d->ssets)));
00082 co = GETCOLOR(cm, *cp);
00083 FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
00084 ss = css->outs[co];
00085 if (ss == NULL)
00086 {
00087 ss = miss(v, d, css, co, cp + 1, start);
00088 if (ss == NULL)
00089 break;
00090 }
00091 cp++;
00092 ss->lastseen = cp;
00093 css = ss;
00094 }
00095 else
00096 while (cp < realstop)
00097 {
00098 co = GETCOLOR(cm, *cp);
00099 ss = css->outs[co];
00100 if (ss == NULL)
00101 {
00102 ss = miss(v, d, css, co, cp + 1, start);
00103 if (ss == NULL)
00104 break;
00105 }
00106 cp++;
00107 ss->lastseen = cp;
00108 css = ss;
00109 }
00110
00111
00112 FDEBUG(("+++ shutdown at c%d +++\n", (int) (css - d->ssets)));
00113 if (cp == v->stop && stop == v->stop)
00114 {
00115 if (hitstopp != NULL)
00116 *hitstopp = 1;
00117 co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
00118 FDEBUG(("color %ld\n", (long) co));
00119 ss = miss(v, d, css, co, cp, start);
00120
00121 if (ss != NULL && (ss->flags & POSTSTATE))
00122 return cp;
00123 else if (ss != NULL)
00124 ss->lastseen = cp;
00125 }
00126
00127
00128 post = d->lastpost;
00129 for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
00130 if ((ss->flags & POSTSTATE) && post != ss->lastseen &&
00131 (post == NULL || post < ss->lastseen))
00132 post = ss->lastseen;
00133 if (post != NULL)
00134 return post - 1;
00135
00136 return NULL;
00137 }
00138
00139
00140
00141
00142 static chr *
00143 shortest(struct vars * v,
00144 struct dfa * d,
00145 chr *start,
00146 chr *min,
00147 chr *max,
00148 chr **coldp,
00149 int *hitstopp)
00150 {
00151 chr *cp;
00152 chr *realmin = (min == v->stop) ? min : min + 1;
00153 chr *realmax = (max == v->stop) ? max : max + 1;
00154 color co;
00155 struct sset *css;
00156 struct sset *ss;
00157 struct colormap *cm = d->cm;
00158
00159
00160 css = initialize(v, d, start);
00161 cp = start;
00162 if (hitstopp != NULL)
00163 *hitstopp = 0;
00164
00165
00166 FDEBUG(("--- startup ---\n"));
00167 if (cp == v->start)
00168 {
00169 co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
00170 FDEBUG(("color %ld\n", (long) co));
00171 }
00172 else
00173 {
00174 co = GETCOLOR(cm, *(cp - 1));
00175 FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
00176 }
00177 css = miss(v, d, css, co, cp, start);
00178 if (css == NULL)
00179 return NULL;
00180 css->lastseen = cp;
00181 ss = css;
00182
00183
00184 if (v->eflags & REG_FTRACE)
00185 while (cp < realmax)
00186 {
00187 FDEBUG(("--- at c%d ---\n", (int) (css - d->ssets)));
00188 co = GETCOLOR(cm, *cp);
00189 FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
00190 ss = css->outs[co];
00191 if (ss == NULL)
00192 {
00193 ss = miss(v, d, css, co, cp + 1, start);
00194 if (ss == NULL)
00195 break;
00196 }
00197 cp++;
00198 ss->lastseen = cp;
00199 css = ss;
00200 if ((ss->flags & POSTSTATE) && cp >= realmin)
00201 break;
00202 }
00203 else
00204 while (cp < realmax)
00205 {
00206 co = GETCOLOR(cm, *cp);
00207 ss = css->outs[co];
00208 if (ss == NULL)
00209 {
00210 ss = miss(v, d, css, co, cp + 1, start);
00211 if (ss == NULL)
00212 break;
00213 }
00214 cp++;
00215 ss->lastseen = cp;
00216 css = ss;
00217 if ((ss->flags & POSTSTATE) && cp >= realmin)
00218 break;
00219 }
00220
00221 if (ss == NULL)
00222 return NULL;
00223
00224 if (coldp != NULL)
00225 *coldp = lastcold(v, d);
00226
00227 if ((ss->flags & POSTSTATE) && cp > min)
00228 {
00229 assert(cp >= realmin);
00230 cp--;
00231 }
00232 else if (cp == v->stop && max == v->stop)
00233 {
00234 co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
00235 FDEBUG(("color %ld\n", (long) co));
00236 ss = miss(v, d, css, co, cp, start);
00237
00238 if ((ss == NULL || !(ss->flags & POSTSTATE)) && hitstopp != NULL)
00239 *hitstopp = 1;
00240 }
00241
00242 if (ss == NULL || !(ss->flags & POSTSTATE))
00243 return NULL;
00244
00245 return cp;
00246 }
00247
00248
00249
00250
00251 static chr *
00252 lastcold(struct vars * v,
00253 struct dfa * d)
00254 {
00255 struct sset *ss;
00256 chr *nopr;
00257 int i;
00258
00259 nopr = d->lastnopr;
00260 if (nopr == NULL)
00261 nopr = v->start;
00262 for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
00263 if ((ss->flags & NOPROGRESS) && nopr < ss->lastseen)
00264 nopr = ss->lastseen;
00265 return nopr;
00266 }
00267
00268
00269
00270
00271 static struct dfa *
00272 newdfa(struct vars * v,
00273 struct cnfa * cnfa,
00274 struct colormap * cm,
00275 struct smalldfa * sml)
00276 {
00277 struct dfa *d;
00278 size_t nss = cnfa->nstates * 2;
00279 int wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
00280 struct smalldfa *smallwas = sml;
00281
00282 assert(cnfa != NULL && cnfa->nstates != 0);
00283
00284 if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS)
00285 {
00286 assert(wordsper == 1);
00287 if (sml == NULL)
00288 {
00289 sml = (struct smalldfa *) MALLOC(sizeof(struct smalldfa));
00290 if (sml == NULL)
00291 {
00292 ERR(REG_ESPACE);
00293 return NULL;
00294 }
00295 }
00296 d = &sml->dfa;
00297 d->ssets = sml->ssets;
00298 d->statesarea = sml->statesarea;
00299 d->work = &d->statesarea[nss];
00300 d->outsarea = sml->outsarea;
00301 d->incarea = sml->incarea;
00302 d->cptsmalloced = 0;
00303 d->mallocarea = (smallwas == NULL) ? (char *) sml : NULL;
00304 }
00305 else
00306 {
00307 d = (struct dfa *) MALLOC(sizeof(struct dfa));
00308 if (d == NULL)
00309 {
00310 ERR(REG_ESPACE);
00311 return NULL;
00312 }
00313 d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset));
00314 d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper *
00315 sizeof(unsigned));
00316 d->work = &d->statesarea[nss * wordsper];
00317 d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors *
00318 sizeof(struct sset *));
00319 d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors *
00320 sizeof(struct arcp));
00321 d->cptsmalloced = 1;
00322 d->mallocarea = (char *) d;
00323 if (d->ssets == NULL || d->statesarea == NULL ||
00324 d->outsarea == NULL || d->incarea == NULL)
00325 {
00326 freedfa(d);
00327 ERR(REG_ESPACE);
00328 return NULL;
00329 }
00330 }
00331
00332 d->nssets = (v->eflags & REG_SMALL) ? 7 : nss;
00333 d->nssused = 0;
00334 d->nstates = cnfa->nstates;
00335 d->ncolors = cnfa->ncolors;
00336 d->wordsper = wordsper;
00337 d->cnfa = cnfa;
00338 d->cm = cm;
00339 d->lastpost = NULL;
00340 d->lastnopr = NULL;
00341 d->search = d->ssets;
00342
00343
00344
00345 return d;
00346 }
00347
00348
00349
00350
00351 static void
00352 freedfa(struct dfa * d)
00353 {
00354 if (d->cptsmalloced)
00355 {
00356 if (d->ssets != NULL)
00357 FREE(d->ssets);
00358 if (d->statesarea != NULL)
00359 FREE(d->statesarea);
00360 if (d->outsarea != NULL)
00361 FREE(d->outsarea);
00362 if (d->incarea != NULL)
00363 FREE(d->incarea);
00364 }
00365
00366 if (d->mallocarea != NULL)
00367 FREE(d->mallocarea);
00368 }
00369
00370
00371
00372
00373
00374
00375 static unsigned
00376 hash(unsigned *uv,
00377 int n)
00378 {
00379 int i;
00380 unsigned h;
00381
00382 h = 0;
00383 for (i = 0; i < n; i++)
00384 h ^= uv[i];
00385 return h;
00386 }
00387
00388
00389
00390
00391 static struct sset *
00392 initialize(struct vars * v,
00393 struct dfa * d,
00394 chr *start)
00395 {
00396 struct sset *ss;
00397 int i;
00398
00399
00400 if (d->nssused > 0 && (d->ssets[0].flags & STARTER))
00401 ss = &d->ssets[0];
00402 else
00403 {
00404 ss = getvacant(v, d, start, start);
00405 for (i = 0; i < d->wordsper; i++)
00406 ss->states[i] = 0;
00407 BSET(ss->states, d->cnfa->pre);
00408 ss->hash = HASH(ss->states, d->wordsper);
00409 assert(d->cnfa->pre != d->cnfa->post);
00410 ss->flags = STARTER | LOCKED | NOPROGRESS;
00411
00412 }
00413
00414 for (i = 0; i < d->nssused; i++)
00415 d->ssets[i].lastseen = NULL;
00416 ss->lastseen = start;
00417 d->lastpost = NULL;
00418 d->lastnopr = NULL;
00419 return ss;
00420 }
00421
00422
00423
00424
00425 static struct sset *
00426 miss(struct vars * v,
00427 struct dfa * d,
00428 struct sset * css,
00429 pcolor co,
00430 chr *cp,
00431 chr *start)
00432 {
00433 struct cnfa *cnfa = d->cnfa;
00434 int i;
00435 unsigned h;
00436 struct carc *ca;
00437 struct sset *p;
00438 int ispost;
00439 int noprogress;
00440 int gotstate;
00441 int dolacons;
00442 int sawlacons;
00443
00444
00445 if (css->outs[co] != NULL)
00446 {
00447 FDEBUG(("hit\n"));
00448 return css->outs[co];
00449 }
00450 FDEBUG(("miss\n"));
00451
00452
00453 for (i = 0; i < d->wordsper; i++)
00454 d->work[i] = 0;
00455 ispost = 0;
00456 noprogress = 1;
00457 gotstate = 0;
00458 for (i = 0; i < d->nstates; i++)
00459 if (ISBSET(css->states, i))
00460 for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
00461 if (ca->co == co)
00462 {
00463 BSET(d->work, ca->to);
00464 gotstate = 1;
00465 if (ca->to == cnfa->post)
00466 ispost = 1;
00467 if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
00468 noprogress = 0;
00469 FDEBUG(("%d -> %d\n", i, ca->to));
00470 }
00471 dolacons = (gotstate) ? (cnfa->flags & HASLACONS) : 0;
00472 sawlacons = 0;
00473 while (dolacons)
00474 {
00475 dolacons = 0;
00476 for (i = 0; i < d->nstates; i++)
00477 if (ISBSET(d->work, i))
00478 for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
00479 {
00480 if (ca->co < cnfa->ncolors)
00481 continue;
00482 sawlacons = 1;
00483 if (ISBSET(d->work, ca->to))
00484 continue;
00485 if (!lacon(v, cnfa, cp, ca->co))
00486 continue;
00487 BSET(d->work, ca->to);
00488 dolacons = 1;
00489 if (ca->to == cnfa->post)
00490 ispost = 1;
00491 if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
00492 noprogress = 0;
00493 FDEBUG(("%d :> %d\n", i, ca->to));
00494 }
00495 }
00496 if (!gotstate)
00497 return NULL;
00498 h = HASH(d->work, d->wordsper);
00499
00500
00501 for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
00502 if (HIT(h, d->work, p, d->wordsper))
00503 {
00504 FDEBUG(("cached c%d\n", (int) (p - d->ssets)));
00505 break;
00506 }
00507 if (i == 0)
00508 {
00509 p = getvacant(v, d, cp, start);
00510 assert(p != css);
00511 for (i = 0; i < d->wordsper; i++)
00512 p->states[i] = d->work[i];
00513 p->hash = h;
00514 p->flags = (ispost) ? POSTSTATE : 0;
00515 if (noprogress)
00516 p->flags |= NOPROGRESS;
00517
00518 }
00519
00520 if (!sawlacons)
00521 {
00522 FDEBUG(("c%d[%d]->c%d\n",
00523 (int) (css - d->ssets), co, (int) (p - d->ssets)));
00524 css->outs[co] = p;
00525 css->inchain[co] = p->ins;
00526 p->ins.ss = css;
00527 p->ins.co = (color) co;
00528 }
00529 return p;
00530 }
00531
00532
00533
00534
00535 static int
00536 lacon(struct vars * v,
00537 struct cnfa * pcnfa,
00538 chr *cp,
00539 pcolor co)
00540 {
00541 int n;
00542 struct subre *sub;
00543 struct dfa *d;
00544 struct smalldfa sd;
00545 chr *end;
00546
00547 n = co - pcnfa->ncolors;
00548 assert(n < v->g->nlacons && v->g->lacons != NULL);
00549 FDEBUG(("=== testing lacon %d\n", n));
00550 sub = &v->g->lacons[n];
00551 d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd);
00552 if (d == NULL)
00553 {
00554 ERR(REG_ESPACE);
00555 return 0;
00556 }
00557 end = longest(v, d, cp, v->stop, (int *) NULL);
00558 freedfa(d);
00559 FDEBUG(("=== lacon %d match %d\n", n, (end != NULL)));
00560 return (sub->subno) ? (end != NULL) : (end == NULL);
00561 }
00562
00563
00564
00565
00566
00567
00568 static struct sset *
00569 getvacant(struct vars * v,
00570 struct dfa * d,
00571 chr *cp,
00572 chr *start)
00573 {
00574 int i;
00575 struct sset *ss;
00576 struct sset *p;
00577 struct arcp ap;
00578 color co;
00579
00580 ss = pickss(v, d, cp, start);
00581 assert(!(ss->flags & LOCKED));
00582
00583
00584 ap = ss->ins;
00585 while ((p = ap.ss) != NULL)
00586 {
00587 co = ap.co;
00588 FDEBUG(("zapping c%d's %ld outarc\n", (int) (p - d->ssets), (long) co));
00589 p->outs[co] = NULL;
00590 ap = p->inchain[co];
00591 p->inchain[co].ss = NULL;
00592 }
00593 ss->ins.ss = NULL;
00594
00595
00596 for (i = 0; i < d->ncolors; i++)
00597 {
00598 p = ss->outs[i];
00599 assert(p != ss);
00600 if (p == NULL)
00601 continue;
00602 FDEBUG(("del outarc %d from c%d's in chn\n", i, (int) (p - d->ssets)));
00603 if (p->ins.ss == ss && p->ins.co == i)
00604 p->ins = ss->inchain[i];
00605 else
00606 {
00607 struct arcp lastap = {NULL, 0};
00608
00609 assert(p->ins.ss != NULL);
00610 for (ap = p->ins; ap.ss != NULL &&
00611 !(ap.ss == ss && ap.co == i);
00612 ap = ap.ss->inchain[ap.co])
00613 lastap = ap;
00614 assert(ap.ss != NULL);
00615 lastap.ss->inchain[lastap.co] = ss->inchain[i];
00616 }
00617 ss->outs[i] = NULL;
00618 ss->inchain[i].ss = NULL;
00619 }
00620
00621
00622 if ((ss->flags & POSTSTATE) && ss->lastseen != d->lastpost &&
00623 (d->lastpost == NULL || d->lastpost < ss->lastseen))
00624 d->lastpost = ss->lastseen;
00625
00626
00627 if ((ss->flags & NOPROGRESS) && ss->lastseen != d->lastnopr &&
00628 (d->lastnopr == NULL || d->lastnopr < ss->lastseen))
00629 d->lastnopr = ss->lastseen;
00630
00631 return ss;
00632 }
00633
00634
00635
00636
00637 static struct sset *
00638 pickss(struct vars * v,
00639 struct dfa * d,
00640 chr *cp,
00641 chr *start)
00642 {
00643 int i;
00644 struct sset *ss;
00645 struct sset *end;
00646 chr *ancient;
00647
00648
00649 if (d->nssused < d->nssets)
00650 {
00651 i = d->nssused;
00652 d->nssused++;
00653 ss = &d->ssets[i];
00654 FDEBUG(("new c%d\n", i));
00655
00656 ss->states = &d->statesarea[i * d->wordsper];
00657 ss->flags = 0;
00658 ss->ins.ss = NULL;
00659 ss->ins.co = WHITE;
00660 ss->outs = &d->outsarea[i * d->ncolors];
00661 ss->inchain = &d->incarea[i * d->ncolors];
00662 for (i = 0; i < d->ncolors; i++)
00663 {
00664 ss->outs[i] = NULL;
00665 ss->inchain[i].ss = NULL;
00666 }
00667 return ss;
00668 }
00669
00670
00671 if (cp - start > d->nssets * 2 / 3)
00672 ancient = cp - d->nssets * 2 / 3;
00673 else
00674 ancient = start;
00675 for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++)
00676 if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
00677 !(ss->flags & LOCKED))
00678 {
00679 d->search = ss + 1;
00680 FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
00681 return ss;
00682 }
00683 for (ss = d->ssets, end = d->search; ss < end; ss++)
00684 if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
00685 !(ss->flags & LOCKED))
00686 {
00687 d->search = ss + 1;
00688 FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
00689 return ss;
00690 }
00691
00692
00693 FDEBUG(("cannot find victim to replace!\n"));
00694 assert(NOTREACHED);
00695 ERR(REG_ASSERT);
00696 return d->ssets;
00697 }