PostgreSQL Source Code: src/backend/regex/regc

Go to the documentation of this file.
00001 /*
00002  * NFA utilities.
00003  * This file is #included by regcomp.c.
00004  *
00005  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
00006  *
00007  * Development of this software was funded, in part, by Cray Research Inc.,
00008  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
00009  * Corporation, none of whom are responsible for the results.  The author
00010  * thanks all of them.
00011  *
00012  * Redistribution and use in source and binary forms -- with or without
00013  * modification -- are permitted for any purpose, provided that
00014  * redistributions in source form retain this entire copyright notice and
00015  * indicate the origin and nature of any modifications.
00016  *
00017  * I'd appreciate being given credit for this package in the documentation
00018  * of software which uses it, but that is not a requirement.
00019  *
00020  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
00021  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
00022  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
00023  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00024  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00025  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
00026  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
00027  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
00028  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
00029  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  *
00031  * src/backend/regex/regc_nfa.c
00032  *
00033  *
00034  * One or two things that technically ought to be in here
00035  * are actually in color.c, thanks to some incestuous relationships in
00036  * the color chains.
00037  */
00038 
00039 #define NISERR()    VISERR(nfa->v)
00040 #define NERR(e)     VERR(nfa->v, (e))
00041 
00042 
00043 /*
00044  * newnfa - set up an NFA
00045  */
00046 static struct nfa *             /* the NFA, or NULL */
00047 newnfa(struct vars * v,
00048        struct colormap * cm,
00049        struct nfa * parent)     /* NULL if primary NFA */
00050 {
00051     struct nfa *nfa;
00052 
00053     nfa = (struct nfa *) MALLOC(sizeof(struct nfa));
00054     if (nfa == NULL)
00055         return NULL;
00056 
00057     nfa->states = NULL;
00058     nfa->slast = NULL;
00059     nfa->free = NULL;
00060     nfa->nstates = 0;
00061     nfa->cm = cm;
00062     nfa->v = v;
00063     nfa->size = 0;
00064     nfa->bos[0] = nfa->bos[1] = COLORLESS;
00065     nfa->eos[0] = nfa->eos[1] = COLORLESS;
00066     nfa->parent = parent;       /* Precedes newfstate so parent is valid. */
00067     nfa->post = newfstate(nfa, '@');    /* number 0 */
00068     nfa->pre = newfstate(nfa, '>');     /* number 1 */
00069 
00070     nfa->init = newstate(nfa);  /* may become invalid later */
00071     nfa->final = newstate(nfa);
00072     if (ISERR())
00073     {
00074         freenfa(nfa);
00075         return NULL;
00076     }
00077     rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
00078     newarc(nfa, '^', 1, nfa->pre, nfa->init);
00079     newarc(nfa, '^', 0, nfa->pre, nfa->init);
00080     rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
00081     newarc(nfa, '$', 1, nfa->final, nfa->post);
00082     newarc(nfa, '$', 0, nfa->final, nfa->post);
00083 
00084     if (ISERR())
00085     {
00086         freenfa(nfa);
00087         return NULL;
00088     }
00089     return nfa;
00090 }
00091 
00092 /*
00093  * TooManyStates - checks if the max states exceeds the compile-time value
00094  */
00095 static int
00096 TooManyStates(struct nfa * nfa)
00097 {
00098     struct nfa *parent = nfa->parent;
00099     size_t      sz = nfa->size;
00100 
00101     while (parent != NULL)
00102     {
00103         sz = parent->size;
00104         parent = parent->parent;
00105     }
00106     if (sz > REG_MAX_STATES)
00107         return 1;
00108     return 0;
00109 }
00110 
00111 /*
00112  * IncrementSize - increases the tracked size of the NFA and its parents.
00113  */
00114 static void
00115 IncrementSize(struct nfa * nfa)
00116 {
00117     struct nfa *parent = nfa->parent;
00118 
00119     nfa->size++;
00120     while (parent != NULL)
00121     {
00122         parent->size++;
00123         parent = parent->parent;
00124     }
00125 }
00126 
00127 /*
00128  * DecrementSize - decreases the tracked size of the NFA and its parents.
00129  */
00130 static void
00131 DecrementSize(struct nfa * nfa)
00132 {
00133     struct nfa *parent = nfa->parent;
00134 
00135     nfa->size--;
00136     while (parent != NULL)
00137     {
00138         parent->size--;
00139         parent = parent->parent;
00140     }
00141 }
00142 
00143 /*
00144  * freenfa - free an entire NFA
00145  */
00146 static void
00147 freenfa(struct nfa * nfa)
00148 {
00149     struct state *s;
00150 
00151     while ((s = nfa->states) != NULL)
00152     {
00153         s->nins = s->nouts = 0; /* don't worry about arcs */
00154         freestate(nfa, s);
00155     }
00156     while ((s = nfa->free) != NULL)
00157     {
00158         nfa->free = s->next;
00159         destroystate(nfa, s);
00160     }
00161 
00162     nfa->slast = NULL;
00163     nfa->nstates = -1;
00164     nfa->pre = NULL;
00165     nfa->post = NULL;
00166     FREE(nfa);
00167 }
00168 
00169 /*
00170  * newstate - allocate an NFA state, with zero flag value
00171  */
00172 static struct state *           /* NULL on error */
00173 newstate(struct nfa * nfa)
00174 {
00175     struct state *s;
00176 
00177     if (TooManyStates(nfa))
00178     {
00179         NERR(REG_ETOOBIG);
00180         return NULL;
00181     }
00182     if (nfa->free != NULL)
00183     {
00184         s = nfa->free;
00185         nfa->free = s->next;
00186     }
00187     else
00188     {
00189         s = (struct state *) MALLOC(sizeof(struct state));
00190         if (s == NULL)
00191         {
00192             NERR(REG_ESPACE);
00193             return NULL;
00194         }
00195         s->oas.next = NULL;
00196         s->free = NULL;
00197         s->noas = 0;
00198     }
00199 
00200     assert(nfa->nstates >= 0);
00201     s->no = nfa->nstates++;
00202     s->flag = 0;
00203     if (nfa->states == NULL)
00204         nfa->states = s;
00205     s->nins = 0;
00206     s->ins = NULL;
00207     s->nouts = 0;
00208     s->outs = NULL;
00209     s->tmp = NULL;
00210     s->next = NULL;
00211     if (nfa->slast != NULL)
00212     {
00213         assert(nfa->slast->next == NULL);
00214         nfa->slast->next = s;
00215     }
00216     s->prev = nfa->slast;
00217     nfa->slast = s;
00218     /* track the current size and the parent size */
00219     IncrementSize(nfa);
00220     return s;
00221 }
00222 
00223 /*
00224  * newfstate - allocate an NFA state with a specified flag value
00225  */
00226 static struct state *           /* NULL on error */
00227 newfstate(struct nfa * nfa, int flag)
00228 {
00229     struct state *s;
00230 
00231     s = newstate(nfa);
00232     if (s != NULL)
00233         s->flag = (char) flag;
00234     return s;
00235 }
00236 
00237 /*
00238  * dropstate - delete a state's inarcs and outarcs and free it
00239  */
00240 static void
00241 dropstate(struct nfa * nfa,
00242           struct state * s)
00243 {
00244     struct arc *a;
00245 
00246     while ((a = s->ins) != NULL)
00247         freearc(nfa, a);
00248     while ((a = s->outs) != NULL)
00249         freearc(nfa, a);
00250     freestate(nfa, s);
00251 }
00252 
00253 /*
00254  * freestate - free a state, which has no in-arcs or out-arcs
00255  */
00256 static void
00257 freestate(struct nfa * nfa,
00258           struct state * s)
00259 {
00260     assert(s != NULL);
00261     assert(s->nins == 0 && s->nouts == 0);
00262 
00263     s->no = FREESTATE;
00264     s->flag = 0;
00265     if (s->next != NULL)
00266         s->next->prev = s->prev;
00267     else
00268     {
00269         assert(s == nfa->slast);
00270         nfa->slast = s->prev;
00271     }
00272     if (s->prev != NULL)
00273         s->prev->next = s->next;
00274     else
00275     {
00276         assert(s == nfa->states);
00277         nfa->states = s->next;
00278     }
00279     s->prev = NULL;
00280     s->next = nfa->free;        /* don't delete it, put it on the free list */
00281     nfa->free = s;
00282     DecrementSize(nfa);
00283 }
00284 
00285 /*
00286  * destroystate - really get rid of an already-freed state
00287  */
00288 static void
00289 destroystate(struct nfa * nfa,
00290              struct state * s)
00291 {
00292     struct arcbatch *ab;
00293     struct arcbatch *abnext;
00294 
00295     assert(s->no == FREESTATE);
00296     for (ab = s->oas.next; ab != NULL; ab = abnext)
00297     {
00298         abnext = ab->next;
00299         FREE(ab);
00300     }
00301     s->ins = NULL;
00302     s->outs = NULL;
00303     s->next = NULL;
00304     FREE(s);
00305 }
00306 
00307 /*
00308  * newarc - set up a new arc within an NFA
00309  */
00310 static void
00311 newarc(struct nfa * nfa,
00312        int t,
00313        pcolor co,
00314        struct state * from,
00315        struct state * to)
00316 {
00317     struct arc *a;
00318 
00319     assert(from != NULL && to != NULL);
00320 
00321     /* check for duplicates */
00322     for (a = from->outs; a != NULL; a = a->outchain)
00323         if (a->to == to && a->co == co && a->type == t)
00324             return;
00325 
00326     a = allocarc(nfa, from);
00327     if (NISERR())
00328         return;
00329     assert(a != NULL);
00330 
00331     a->type = t;
00332     a->co = (color) co;
00333     a->to = to;
00334     a->from = from;
00335 
00336     /*
00337      * Put the new arc on the beginning, not the end, of the chains. Not only
00338      * is this easier, it has the very useful side effect that deleting the
00339      * most-recently-added arc is the cheapest case rather than the most
00340      * expensive one.
00341      */
00342     a->inchain = to->ins;
00343     to->ins = a;
00344     a->outchain = from->outs;
00345     from->outs = a;
00346 
00347     from->nouts++;
00348     to->nins++;
00349 
00350     if (COLORED(a) && nfa->parent == NULL)
00351         colorchain(nfa->cm, a);
00352 }
00353 
00354 /*
00355  * allocarc - allocate a new out-arc within a state
00356  */
00357 static struct arc *             /* NULL for failure */
00358 allocarc(struct nfa * nfa,
00359          struct state * s)
00360 {
00361     struct arc *a;
00362 
00363     /* shortcut */
00364     if (s->free == NULL && s->noas < ABSIZE)
00365     {
00366         a = &s->oas.a[s->noas];
00367         s->noas++;
00368         return a;
00369     }
00370 
00371     /* if none at hand, get more */
00372     if (s->free == NULL)
00373     {
00374         struct arcbatch *newAb;
00375         int         i;
00376 
00377         newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
00378         if (newAb == NULL)
00379         {
00380             NERR(REG_ESPACE);
00381             return NULL;
00382         }
00383         newAb->next = s->oas.next;
00384         s->oas.next = newAb;
00385 
00386         for (i = 0; i < ABSIZE; i++)
00387         {
00388             newAb->a[i].type = 0;
00389             newAb->a[i].freechain = &newAb->a[i + 1];
00390         }
00391         newAb->a[ABSIZE - 1].freechain = NULL;
00392         s->free = &newAb->a[0];
00393     }
00394     assert(s->free != NULL);
00395 
00396     a = s->free;
00397     s->free = a->freechain;
00398     return a;
00399 }
00400 
00401 /*
00402  * freearc - free an arc
00403  */
00404 static void
00405 freearc(struct nfa * nfa,
00406         struct arc * victim)
00407 {
00408     struct state *from = victim->from;
00409     struct state *to = victim->to;
00410     struct arc *a;
00411 
00412     assert(victim->type != 0);
00413 
00414     /* take it off color chain if necessary */
00415     if (COLORED(victim) && nfa->parent == NULL)
00416         uncolorchain(nfa->cm, victim);
00417 
00418     /* take it off source's out-chain */
00419     assert(from != NULL);
00420     assert(from->outs != NULL);
00421     a = from->outs;
00422     if (a == victim)            /* simple case:  first in chain */
00423         from->outs = victim->outchain;
00424     else
00425     {
00426         for (; a != NULL && a->outchain != victim; a = a->outchain)
00427             continue;
00428         assert(a != NULL);
00429         a->outchain = victim->outchain;
00430     }
00431     from->nouts--;
00432 
00433     /* take it off target's in-chain */
00434     assert(to != NULL);
00435     assert(to->ins != NULL);
00436     a = to->ins;
00437     if (a == victim)            /* simple case:  first in chain */
00438         to->ins = victim->inchain;
00439     else
00440     {
00441         for (; a != NULL && a->inchain != victim; a = a->inchain)
00442             continue;
00443         assert(a != NULL);
00444         a->inchain = victim->inchain;
00445     }
00446     to->nins--;
00447 
00448     /* clean up and place on free list */
00449     victim->type = 0;
00450     victim->from = NULL;        /* precautions... */
00451     victim->to = NULL;
00452     victim->inchain = NULL;
00453     victim->outchain = NULL;
00454     victim->freechain = from->free;
00455     from->free = victim;
00456 }
00457 
00458 /*
00459  * hasnonemptyout - Does state have a non-EMPTY out arc?
00460  */
00461 static int
00462 hasnonemptyout(struct state * s)
00463 {
00464     struct arc *a;
00465 
00466     for (a = s->outs; a != NULL; a = a->outchain)
00467     {
00468         if (a->type != EMPTY)
00469             return 1;
00470     }
00471     return 0;
00472 }
00473 
00474 /*
00475  * nonemptyouts - count non-EMPTY out arcs of a state
00476  */
00477 static int
00478 nonemptyouts(struct state * s)
00479 {
00480     int         n = 0;
00481     struct arc *a;
00482 
00483     for (a = s->outs; a != NULL; a = a->outchain)
00484     {
00485         if (a->type != EMPTY)
00486             n++;
00487     }
00488     return n;
00489 }
00490 
00491 /*
00492  * nonemptyins - count non-EMPTY in arcs of a state
00493  */
00494 static int
00495 nonemptyins(struct state * s)
00496 {
00497     int         n = 0;
00498     struct arc *a;
00499 
00500     for (a = s->ins; a != NULL; a = a->inchain)
00501     {
00502         if (a->type != EMPTY)
00503             n++;
00504     }
00505     return n;
00506 }
00507 
00508 /*
00509  * findarc - find arc, if any, from given source with given type and color
00510  * If there is more than one such arc, the result is random.
00511  */
00512 static struct arc *
00513 findarc(struct state * s,
00514         int type,
00515         pcolor co)
00516 {
00517     struct arc *a;
00518 
00519     for (a = s->outs; a != NULL; a = a->outchain)
00520         if (a->type == type && a->co == co)
00521             return a;
00522     return NULL;
00523 }
00524 
00525 /*
00526  * cparc - allocate a new arc within an NFA, copying details from old one
00527  */
00528 static void
00529 cparc(struct nfa * nfa,
00530       struct arc * oa,
00531       struct state * from,
00532       struct state * to)
00533 {
00534     newarc(nfa, oa->type, oa->co, from, to);
00535 }
00536 
00537 /*
00538  * moveins - move all in arcs of a state to another state
00539  *
00540  * You might think this could be done better by just updating the
00541  * existing arcs, and you would be right if it weren't for the desire
00542  * for duplicate suppression, which makes it easier to just make new
00543  * ones to exploit the suppression built into newarc.
00544  */
00545 static void
00546 moveins(struct nfa * nfa,
00547         struct state * oldState,
00548         struct state * newState)
00549 {
00550     struct arc *a;
00551 
00552     assert(oldState != newState);
00553 
00554     while ((a = oldState->ins) != NULL)
00555     {
00556         cparc(nfa, a, a->from, newState);
00557         freearc(nfa, a);
00558     }
00559     assert(oldState->nins == 0);
00560     assert(oldState->ins == NULL);
00561 }
00562 
00563 /*
00564  * copyins - copy in arcs of a state to another state
00565  *
00566  * Either all arcs, or only non-empty ones as determined by all value.
00567  */
00568 static void
00569 copyins(struct nfa * nfa,
00570         struct state * oldState,
00571         struct state * newState,
00572         int all)
00573 {
00574     struct arc *a;
00575 
00576     assert(oldState != newState);
00577 
00578     for (a = oldState->ins; a != NULL; a = a->inchain)
00579     {
00580         if (all || a->type != EMPTY)
00581             cparc(nfa, a, a->from, newState);
00582     }
00583 }
00584 
00585 /*
00586  * moveouts - move all out arcs of a state to another state
00587  */
00588 static void
00589 moveouts(struct nfa * nfa,
00590          struct state * oldState,
00591          struct state * newState)
00592 {
00593     struct arc *a;
00594 
00595     assert(oldState != newState);
00596 
00597     while ((a = oldState->outs) != NULL)
00598     {
00599         cparc(nfa, a, newState, a->to);
00600         freearc(nfa, a);
00601     }
00602 }
00603 
00604 /*
00605  * copyouts - copy out arcs of a state to another state
00606  *
00607  * Either all arcs, or only non-empty ones as determined by all value.
00608  */
00609 static void
00610 copyouts(struct nfa * nfa,
00611          struct state * oldState,
00612          struct state * newState,
00613          int all)
00614 {
00615     struct arc *a;
00616 
00617     assert(oldState != newState);
00618 
00619     for (a = oldState->outs; a != NULL; a = a->outchain)
00620     {
00621         if (all || a->type != EMPTY)
00622             cparc(nfa, a, newState, a->to);
00623     }
00624 }
00625 
00626 /*
00627  * cloneouts - copy out arcs of a state to another state pair, modifying type
00628  */
00629 static void
00630 cloneouts(struct nfa * nfa,
00631           struct state * old,
00632           struct state * from,
00633           struct state * to,
00634           int type)
00635 {
00636     struct arc *a;
00637 
00638     assert(old != from);
00639 
00640     for (a = old->outs; a != NULL; a = a->outchain)
00641         newarc(nfa, type, a->co, from, to);
00642 }
00643 
00644 /*
00645  * delsub - delete a sub-NFA, updating subre pointers if necessary
00646  *
00647  * This uses a recursive traversal of the sub-NFA, marking already-seen
00648  * states using their tmp pointer.
00649  */
00650 static void
00651 delsub(struct nfa * nfa,
00652        struct state * lp,       /* the sub-NFA goes from here... */
00653        struct state * rp)       /* ...to here, *not* inclusive */
00654 {
00655     assert(lp != rp);
00656 
00657     rp->tmp = rp;               /* mark end */
00658 
00659     deltraverse(nfa, lp, lp);
00660     assert(lp->nouts == 0 && rp->nins == 0);    /* did the job */
00661     assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */
00662 
00663     rp->tmp = NULL;             /* unmark end */
00664     lp->tmp = NULL;             /* and begin, marked by deltraverse */
00665 }
00666 
00667 /*
00668  * deltraverse - the recursive heart of delsub
00669  * This routine's basic job is to destroy all out-arcs of the state.
00670  */
00671 static void
00672 deltraverse(struct nfa * nfa,
00673             struct state * leftend,
00674             struct state * s)
00675 {
00676     struct arc *a;
00677     struct state *to;
00678 
00679     if (s->nouts == 0)
00680         return;                 /* nothing to do */
00681     if (s->tmp != NULL)
00682         return;                 /* already in progress */
00683 
00684     s->tmp = s;                 /* mark as in progress */
00685 
00686     while ((a = s->outs) != NULL)
00687     {
00688         to = a->to;
00689         deltraverse(nfa, leftend, to);
00690         assert(to->nouts == 0 || to->tmp != NULL);
00691         freearc(nfa, a);
00692         if (to->nins == 0 && to->tmp == NULL)
00693         {
00694             assert(to->nouts == 0);
00695             freestate(nfa, to);
00696         }
00697     }
00698 
00699     assert(s->no != FREESTATE); /* we're still here */
00700     assert(s == leftend || s->nins != 0);       /* and still reachable */
00701     assert(s->nouts == 0);      /* but have no outarcs */
00702 
00703     s->tmp = NULL;              /* we're done here */
00704 }
00705 
00706 /*
00707  * dupnfa - duplicate sub-NFA
00708  *
00709  * Another recursive traversal, this time using tmp to point to duplicates
00710  * as well as mark already-seen states.  (You knew there was a reason why
00711  * it's a state pointer, didn't you? :-))
00712  */
00713 static void
00714 dupnfa(struct nfa * nfa,
00715        struct state * start,    /* duplicate of subNFA starting here */
00716        struct state * stop,     /* and stopping here */
00717        struct state * from,     /* stringing duplicate from here */
00718        struct state * to)       /* to here */
00719 {
00720     if (start == stop)
00721     {
00722         newarc(nfa, EMPTY, 0, from, to);
00723         return;
00724     }
00725 
00726     stop->tmp = to;
00727     duptraverse(nfa, start, from);
00728     /* done, except for clearing out the tmp pointers */
00729 
00730     stop->tmp = NULL;
00731     cleartraverse(nfa, start);
00732 }
00733 
00734 /*
00735  * duptraverse - recursive heart of dupnfa
00736  */
00737 static void
00738 duptraverse(struct nfa * nfa,
00739             struct state * s,
00740             struct state * stmp)    /* s's duplicate, or NULL */
00741 {
00742     struct arc *a;
00743 
00744     if (s->tmp != NULL)
00745         return;                 /* already done */
00746 
00747     s->tmp = (stmp == NULL) ? newstate(nfa) : stmp;
00748     if (s->tmp == NULL)
00749     {
00750         assert(NISERR());
00751         return;
00752     }
00753 
00754     for (a = s->outs; a != NULL && !NISERR(); a = a->outchain)
00755     {
00756         duptraverse(nfa, a->to, (struct state *) NULL);
00757         if (NISERR())
00758             break;
00759         assert(a->to->tmp != NULL);
00760         cparc(nfa, a, s->tmp, a->to->tmp);
00761     }
00762 }
00763 
00764 /*
00765  * cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
00766  */
00767 static void
00768 cleartraverse(struct nfa * nfa,
00769               struct state * s)
00770 {
00771     struct arc *a;
00772 
00773     if (s->tmp == NULL)
00774         return;
00775     s->tmp = NULL;
00776 
00777     for (a = s->outs; a != NULL; a = a->outchain)
00778         cleartraverse(nfa, a->to);
00779 }
00780 
00781 /*
00782  * specialcolors - fill in special colors for an NFA
00783  */
00784 static void
00785 specialcolors(struct nfa * nfa)
00786 {
00787     /* false colors for BOS, BOL, EOS, EOL */
00788     if (nfa->parent == NULL)
00789     {
00790         nfa->bos[0] = pseudocolor(nfa->cm);
00791         nfa->bos[1] = pseudocolor(nfa->cm);
00792         nfa->eos[0] = pseudocolor(nfa->cm);
00793         nfa->eos[1] = pseudocolor(nfa->cm);
00794     }
00795     else
00796     {
00797         assert(nfa->parent->bos[0] != COLORLESS);
00798         nfa->bos[0] = nfa->parent->bos[0];
00799         assert(nfa->parent->bos[1] != COLORLESS);
00800         nfa->bos[1] = nfa->parent->bos[1];
00801         assert(nfa->parent->eos[0] != COLORLESS);
00802         nfa->eos[0] = nfa->parent->eos[0];
00803         assert(nfa->parent->eos[1] != COLORLESS);
00804         nfa->eos[1] = nfa->parent->eos[1];
00805     }
00806 }
00807 
00808 /*
00809  * optimize - optimize an NFA
00810  */
00811 static long                     /* re_info bits */
00812 optimize(struct nfa * nfa,
00813          FILE *f)               /* for debug output; NULL none */
00814 {
00815 #ifdef REG_DEBUG
00816     int         verbose = (f != NULL) ? 1 : 0;
00817 
00818     if (verbose)
00819         fprintf(f, "\ninitial cleanup:\n");
00820 #endif
00821     cleanup(nfa);               /* may simplify situation */
00822 #ifdef REG_DEBUG
00823     if (verbose)
00824         dumpnfa(nfa, f);
00825     if (verbose)
00826         fprintf(f, "\nempties:\n");
00827 #endif
00828     fixempties(nfa, f);         /* get rid of EMPTY arcs */
00829 #ifdef REG_DEBUG
00830     if (verbose)
00831         fprintf(f, "\nconstraints:\n");
00832 #endif
00833     pullback(nfa, f);           /* pull back constraints backward */
00834     pushfwd(nfa, f);            /* push fwd constraints forward */
00835 #ifdef REG_DEBUG
00836     if (verbose)
00837         fprintf(f, "\nfinal cleanup:\n");
00838 #endif
00839     cleanup(nfa);               /* final tidying */
00840     return analyze(nfa);        /* and analysis */
00841 }
00842 
00843 /*
00844  * pullback - pull back constraints backward to (with luck) eliminate them
00845  */
00846 static void
00847 pullback(struct nfa * nfa,
00848          FILE *f)               /* for debug output; NULL none */
00849 {
00850     struct state *s;
00851     struct state *nexts;
00852     struct arc *a;
00853     struct arc *nexta;
00854     int         progress;
00855 
00856     /* find and pull until there are no more */
00857     do
00858     {
00859         progress = 0;
00860         for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
00861         {
00862             nexts = s->next;
00863             for (a = s->outs; a != NULL && !NISERR(); a = nexta)
00864             {
00865                 nexta = a->outchain;
00866                 if (a->type == '^' || a->type == BEHIND)
00867                     if (pull(nfa, a))
00868                         progress = 1;
00869                 assert(nexta == NULL || s->no != FREESTATE);
00870             }
00871         }
00872         if (progress && f != NULL)
00873             dumpnfa(nfa, f);
00874     } while (progress && !NISERR());
00875     if (NISERR())
00876         return;
00877 
00878     for (a = nfa->pre->outs; a != NULL; a = nexta)
00879     {
00880         nexta = a->outchain;
00881         if (a->type == '^')
00882         {
00883             assert(a->co == 0 || a->co == 1);
00884             newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to);
00885             freearc(nfa, a);
00886         }
00887     }
00888 }
00889 
00890 /*
00891  * pull - pull a back constraint backward past its source state
00892  * A significant property of this function is that it deletes at most
00893  * one state -- the constraint's from state -- and only if the constraint
00894  * was that state's last outarc.
00895  */
00896 static int                      /* 0 couldn't, 1 could */
00897 pull(struct nfa * nfa,
00898      struct arc * con)
00899 {
00900     struct state *from = con->from;
00901     struct state *to = con->to;
00902     struct arc *a;
00903     struct arc *nexta;
00904     struct state *s;
00905 
00906     if (from == to)
00907     {                           /* circular constraint is pointless */
00908         freearc(nfa, con);
00909         return 1;
00910     }
00911     if (from->flag)             /* can't pull back beyond start */
00912         return 0;
00913     if (from->nins == 0)
00914     {                           /* unreachable */
00915         freearc(nfa, con);
00916         return 1;
00917     }
00918 
00919     /*
00920      * DGP 2007-11-15: Cloning a state with a circular constraint on its list
00921      * of outs can lead to trouble [Tcl Bug 1810038], so get rid of them
00922      * first.
00923      */
00924     for (a = from->outs; a != NULL; a = nexta)
00925     {
00926         nexta = a->outchain;
00927         switch (a->type)
00928         {
00929             case '^':
00930             case '$':
00931             case BEHIND:
00932             case AHEAD:
00933                 if (from == a->to)
00934                     freearc(nfa, a);
00935                 break;
00936         }
00937     }
00938 
00939     /* first, clone from state if necessary to avoid other outarcs */
00940     if (from->nouts > 1)
00941     {
00942         s = newstate(nfa);
00943         if (NISERR())
00944             return 0;
00945         assert(to != from);     /* con is not an inarc */
00946         copyins(nfa, from, s, 1);       /* duplicate inarcs */
00947         cparc(nfa, con, s, to); /* move constraint arc */
00948         freearc(nfa, con);
00949         from = s;
00950         con = from->outs;
00951     }
00952     assert(from->nouts == 1);
00953 
00954     /* propagate the constraint into the from state's inarcs */
00955     for (a = from->ins; a != NULL; a = nexta)
00956     {
00957         nexta = a->inchain;
00958         switch (combine(con, a))
00959         {
00960             case INCOMPATIBLE:  /* destroy the arc */
00961                 freearc(nfa, a);
00962                 break;
00963             case SATISFIED:     /* no action needed */
00964                 break;
00965             case COMPATIBLE:    /* swap the two arcs, more or less */
00966                 s = newstate(nfa);
00967                 if (NISERR())
00968                     return 0;
00969                 cparc(nfa, a, s, to);   /* anticipate move */
00970                 cparc(nfa, con, a->from, s);
00971                 if (NISERR())
00972                     return 0;
00973                 freearc(nfa, a);
00974                 break;
00975             default:
00976                 assert(NOTREACHED);
00977                 break;
00978         }
00979     }
00980 
00981     /* remaining inarcs, if any, incorporate the constraint */
00982     moveins(nfa, from, to);
00983     dropstate(nfa, from);       /* will free the constraint */
00984     return 1;
00985 }
00986 
00987 /*
00988  * pushfwd - push forward constraints forward to (with luck) eliminate them
00989  */
00990 static void
00991 pushfwd(struct nfa * nfa,
00992         FILE *f)                /* for debug output; NULL none */
00993 {
00994     struct state *s;
00995     struct state *nexts;
00996     struct arc *a;
00997     struct arc *nexta;
00998     int         progress;
00999 
01000     /* find and push until there are no more */
01001     do
01002     {
01003         progress = 0;
01004         for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
01005         {
01006             nexts = s->next;
01007             for (a = s->ins; a != NULL && !NISERR(); a = nexta)
01008             {
01009                 nexta = a->inchain;
01010                 if (a->type == '$' || a->type == AHEAD)
01011                     if (push(nfa, a))
01012                         progress = 1;
01013                 assert(nexta == NULL || s->no != FREESTATE);
01014             }
01015         }
01016         if (progress && f != NULL)
01017             dumpnfa(nfa, f);
01018     } while (progress && !NISERR());
01019     if (NISERR())
01020         return;
01021 
01022     for (a = nfa->post->ins; a != NULL; a = nexta)
01023     {
01024         nexta = a->inchain;
01025         if (a->type == '$')
01026         {
01027             assert(a->co == 0 || a->co == 1);
01028             newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to);
01029             freearc(nfa, a);
01030         }
01031     }
01032 }
01033 
01034 /*
01035  * push - push a forward constraint forward past its destination state
01036  * A significant property of this function is that it deletes at most
01037  * one state -- the constraint's to state -- and only if the constraint
01038  * was that state's last inarc.
01039  */
01040 static int                      /* 0 couldn't, 1 could */
01041 push(struct nfa * nfa,
01042      struct arc * con)
01043 {
01044     struct state *from = con->from;
01045     struct state *to = con->to;
01046     struct arc *a;
01047     struct arc *nexta;
01048     struct state *s;
01049 
01050     if (to == from)
01051     {                           /* circular constraint is pointless */
01052         freearc(nfa, con);
01053         return 1;
01054     }
01055     if (to->flag)               /* can't push forward beyond end */
01056         return 0;
01057     if (to->nouts == 0)
01058     {                           /* dead end */
01059         freearc(nfa, con);
01060         return 1;
01061     }
01062 
01063     /*
01064      * DGP 2007-11-15: Here we duplicate the same protections as appear in
01065      * pull() above to avoid troubles with cloning a state with a circular
01066      * constraint on its list of ins.  It is not clear whether this is
01067      * necessary, or is protecting against a "can't happen". Any test case
01068      * that actually leads to a freearc() call here would be a welcome
01069      * addition to the test suite.
01070      */
01071     for (a = to->ins; a != NULL; a = nexta)
01072     {
01073         nexta = a->inchain;
01074         switch (a->type)
01075         {
01076             case '^':
01077             case '$':
01078             case BEHIND:
01079             case AHEAD:
01080                 if (a->from == to)
01081                     freearc(nfa, a);
01082                 break;
01083         }
01084     }
01085 
01086     /* first, clone to state if necessary to avoid other inarcs */
01087     if (to->nins > 1)
01088     {
01089         s = newstate(nfa);
01090         if (NISERR())
01091             return 0;
01092         copyouts(nfa, to, s, 1);    /* duplicate outarcs */
01093         cparc(nfa, con, from, s);       /* move constraint */
01094         freearc(nfa, con);
01095         to = s;
01096         con = to->ins;
01097     }
01098     assert(to->nins == 1);
01099 
01100     /* propagate the constraint into the to state's outarcs */
01101     for (a = to->outs; a != NULL; a = nexta)
01102     {
01103         nexta = a->outchain;
01104         switch (combine(con, a))
01105         {
01106             case INCOMPATIBLE:  /* destroy the arc */
01107                 freearc(nfa, a);
01108                 break;
01109             case SATISFIED:     /* no action needed */
01110                 break;
01111             case COMPATIBLE:    /* swap the two arcs, more or less */
01112                 s = newstate(nfa);
01113                 if (NISERR())
01114                     return 0;
01115                 cparc(nfa, con, s, a->to);      /* anticipate move */
01116                 cparc(nfa, a, from, s);
01117                 if (NISERR())
01118                     return 0;
01119                 freearc(nfa, a);
01120                 break;
01121             default:
01122                 assert(NOTREACHED);
01123                 break;
01124         }
01125     }
01126 
01127     /* remaining outarcs, if any, incorporate the constraint */
01128     moveouts(nfa, to, from);
01129     dropstate(nfa, to);         /* will free the constraint */
01130     return 1;
01131 }
01132 
01133 /*
01134  * combine - constraint lands on an arc, what happens?
01135  *
01136  * #def INCOMPATIBLE    1   // destroys arc
01137  * #def SATISFIED       2   // constraint satisfied
01138  * #def COMPATIBLE      3   // compatible but not satisfied yet
01139  */
01140 static int
01141 combine(struct arc * con,
01142         struct arc * a)
01143 {
01144 #define  CA(ct,at)   (((ct)<<CHAR_BIT) | (at))
01145 
01146     switch (CA(con->type, a->type))
01147     {
01148         case CA('^', PLAIN):    /* newlines are handled separately */
01149         case CA('$', PLAIN):
01150             return INCOMPATIBLE;
01151             break;
01152         case CA(AHEAD, PLAIN):  /* color constraints meet colors */
01153         case CA(BEHIND, PLAIN):
01154             if (con->co == a->co)
01155                 return SATISFIED;
01156             return INCOMPATIBLE;
01157             break;
01158         case CA('^', '^'):      /* collision, similar constraints */
01159         case CA('$', '$'):
01160         case CA(AHEAD, AHEAD):
01161         case CA(BEHIND, BEHIND):
01162             if (con->co == a->co)       /* true duplication */
01163                 return SATISFIED;
01164             return INCOMPATIBLE;
01165             break;
01166         case CA('^', BEHIND):   /* collision, dissimilar constraints */
01167         case CA(BEHIND, '^'):
01168         case CA('$', AHEAD):
01169         case CA(AHEAD, '$'):
01170             return INCOMPATIBLE;
01171             break;
01172         case CA('^', '$'):      /* constraints passing each other */
01173         case CA('^', AHEAD):
01174         case CA(BEHIND, '$'):
01175         case CA(BEHIND, AHEAD):
01176         case CA('$', '^'):
01177         case CA('$', BEHIND):
01178         case CA(AHEAD, '^'):
01179         case CA(AHEAD, BEHIND):
01180         case CA('^', LACON):
01181         case CA(BEHIND, LACON):
01182         case CA('$', LACON):
01183         case CA(AHEAD, LACON):
01184             return COMPATIBLE;
01185             break;
01186     }
01187     assert(NOTREACHED);
01188     return INCOMPATIBLE;        /* for benefit of blind compilers */
01189 }
01190 
01191 /*
01192  * fixempties - get rid of EMPTY arcs
01193  */
01194 static void
01195 fixempties(struct nfa * nfa,
01196            FILE *f)             /* for debug output; NULL none */
01197 {
01198     struct state *s;
01199     struct state *s2;
01200     struct state *nexts;
01201     struct arc *a;
01202     struct arc *nexta;
01203 
01204     /*
01205      * First, get rid of any states whose sole out-arc is an EMPTY, since
01206      * they're basically just aliases for their successor.  The parsing
01207      * algorithm creates enough of these that it's worth special-casing this.
01208      */
01209     for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
01210     {
01211         nexts = s->next;
01212         if (s->flag || s->nouts != 1)
01213             continue;
01214         a = s->outs;
01215         assert(a != NULL && a->outchain == NULL);
01216         if (a->type != EMPTY)
01217             continue;
01218         if (s != a->to)
01219             moveins(nfa, s, a->to);
01220         dropstate(nfa, s);
01221     }
01222 
01223     /*
01224      * Similarly, get rid of any state with a single EMPTY in-arc, by folding
01225      * it into its predecessor.
01226      */
01227     for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
01228     {
01229         nexts = s->next;
01230         /* while we're at it, ensure tmp fields are clear for next step */
01231         assert(s->tmp == NULL);
01232         if (s->flag || s->nins != 1)
01233             continue;
01234         a = s->ins;
01235         assert(a != NULL && a->inchain == NULL);
01236         if (a->type != EMPTY)
01237             continue;
01238         if (s != a->from)
01239             moveouts(nfa, s, a->from);
01240         dropstate(nfa, s);
01241     }
01242 
01243     /*
01244      * For each remaining NFA state, find all other states that are reachable
01245      * from it by a chain of one or more EMPTY arcs.  Then generate new arcs
01246      * that eliminate the need for each such chain.
01247      *
01248      * If we just do this straightforwardly, the algorithm gets slow in
01249      * complex graphs, because the same arcs get copied to all intermediate
01250      * states of an EMPTY chain, and then uselessly pushed repeatedly to the
01251      * chain's final state; we waste a lot of time in newarc's duplicate
01252      * checking.  To improve matters, we decree that any state with only EMPTY
01253      * out-arcs is "doomed" and will not be part of the final NFA. That can be
01254      * ensured by not adding any new out-arcs to such a state. Having ensured
01255      * that, we need not update the state's in-arcs list either; all arcs that
01256      * might have gotten pushed forward to it will just get pushed directly to
01257      * successor states.  This eliminates most of the useless duplicate arcs.
01258      */
01259     for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
01260     {
01261         for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); s2 = nexts)
01262         {
01263             /*
01264              * If s2 is doomed, we decide that (1) we will always push arcs
01265              * forward to it, not pull them back to s; and (2) we can optimize
01266              * away the push-forward, per comment above.  So do nothing.
01267              */
01268             if (s2->flag || hasnonemptyout(s2))
01269                 replaceempty(nfa, s, s2);
01270 
01271             /* Reset the tmp fields as we walk back */
01272             nexts = s2->tmp;
01273             s2->tmp = NULL;
01274         }
01275         s->tmp = NULL;
01276     }
01277 
01278     if (NISERR())
01279         return;
01280 
01281     /*
01282      * Now remove all the EMPTY arcs, since we don't need them anymore.
01283      */
01284     for (s = nfa->states; s != NULL; s = s->next)
01285     {
01286         for (a = s->outs; a != NULL; a = nexta)
01287         {
01288             nexta = a->outchain;
01289             if (a->type == EMPTY)
01290                 freearc(nfa, a);
01291         }
01292     }
01293 
01294     /*
01295      * And remove any states that have become useless.  (This cleanup is not
01296      * very thorough, and would be even less so if we tried to combine it with
01297      * the previous step; but cleanup() will take care of anything we miss.)
01298      */
01299     for (s = nfa->states; s != NULL; s = nexts)
01300     {
01301         nexts = s->next;
01302         if ((s->nins == 0 || s->nouts == 0) && !s->flag)
01303             dropstate(nfa, s);
01304     }
01305 
01306     if (f != NULL)
01307         dumpnfa(nfa, f);
01308 }
01309 
01310 /*
01311  * emptyreachable - recursively find all states reachable from s by EMPTY arcs
01312  *
01313  * The return value is the last such state found.  Its tmp field links back
01314  * to the next-to-last such state, and so on back to s, so that all these
01315  * states can be located without searching the whole NFA.
01316  *
01317  * The maximum recursion depth here is equal to the length of the longest
01318  * loop-free chain of EMPTY arcs, which is surely no more than the size of
01319  * the NFA, and in practice will be a lot less than that.
01320  */
01321 static struct state *
01322 emptyreachable(struct state * s, struct state * lastfound)
01323 {
01324     struct arc *a;
01325 
01326     s->tmp = lastfound;
01327     lastfound = s;
01328     for (a = s->outs; a != NULL; a = a->outchain)
01329     {
01330         if (a->type == EMPTY && a->to->tmp == NULL)
01331             lastfound = emptyreachable(a->to, lastfound);
01332     }
01333     return lastfound;
01334 }
01335 
01336 /*
01337  * replaceempty - replace an EMPTY arc chain with some non-empty arcs
01338  *
01339  * The EMPTY arc(s) should be deleted later, but we can't do it here because
01340  * they may still be needed to identify other arc chains during fixempties().
01341  */
01342 static void
01343 replaceempty(struct nfa * nfa,
01344              struct state * from,
01345              struct state * to)
01346 {
01347     int         fromouts;
01348     int         toins;
01349 
01350     assert(from != to);
01351 
01352     /*
01353      * Create replacement arcs that bypass the need for the EMPTY chain.  We
01354      * can do this either by pushing arcs forward (linking directly from
01355      * "from"'s predecessors to "to") or by pulling them back (linking
01356      * directly from "from" to "to"'s successors).  In general, we choose
01357      * whichever way creates greater fan-out or fan-in, so as to improve the
01358      * odds of reducing the other state to zero in-arcs or out-arcs and
01359      * thereby being able to delete it.  However, if "from" is doomed (has no
01360      * non-EMPTY out-arcs), we must keep it so, so always push forward in that
01361      * case.
01362      *
01363      * The fan-out/fan-in comparison should count only non-EMPTY arcs.  If
01364      * "from" is doomed, we can skip counting "to"'s arcs, since we want to
01365      * force taking the copyins path in that case.
01366      */
01367     fromouts = nonemptyouts(from);
01368     toins = (fromouts == 0) ? 1 : nonemptyins(to);
01369 
01370     if (fromouts > toins)
01371     {
01372         copyouts(nfa, to, from, 0);
01373         return;
01374     }
01375     if (fromouts < toins)
01376     {
01377         copyins(nfa, from, to, 0);
01378         return;
01379     }
01380 
01381     /*
01382      * fromouts == toins.  Decide on secondary issue: copy fewest arcs.
01383      *
01384      * Doesn't seem to be worth the trouble to exclude empties from these
01385      * comparisons; that takes extra time and doesn't seem to improve the
01386      * resulting graph much.
01387      */
01388     if (from->nins > to->nouts)
01389     {
01390         copyouts(nfa, to, from, 0);
01391         return;
01392     }
01393     else
01394     {
01395         copyins(nfa, from, to, 0);
01396         return;
01397     }
01398 }
01399 
01400 /*
01401  * cleanup - clean up NFA after optimizations
01402  */
01403 static void
01404 cleanup(struct nfa * nfa)
01405 {
01406     struct state *s;
01407     struct state *nexts;
01408     int         n;
01409 
01410     /* clear out unreachable or dead-end states */
01411     /* use pre to mark reachable, then post to mark can-reach-post */
01412     markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre);
01413     markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
01414     for (s = nfa->states; s != NULL; s = nexts)
01415     {
01416         nexts = s->next;
01417         if (s->tmp != nfa->post && !s->flag)
01418             dropstate(nfa, s);
01419     }
01420     assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
01421     cleartraverse(nfa, nfa->pre);
01422     assert(nfa->post->nins == 0 || nfa->post->tmp == NULL);
01423     /* the nins==0 (final unreachable) case will be caught later */
01424 
01425     /* renumber surviving states */
01426     n = 0;
01427     for (s = nfa->states; s != NULL; s = s->next)
01428         s->no = n++;
01429     nfa->nstates = n;
01430 }
01431 
01432 /*
01433  * markreachable - recursive marking of reachable states
01434  */
01435 static void
01436 markreachable(struct nfa * nfa,
01437               struct state * s,
01438               struct state * okay,      /* consider only states with this mark */
01439               struct state * mark)      /* the value to mark with */
01440 {
01441     struct arc *a;
01442 
01443     if (s->tmp != okay)
01444         return;
01445     s->tmp = mark;
01446 
01447     for (a = s->outs; a != NULL; a = a->outchain)
01448         markreachable(nfa, a->to, okay, mark);
01449 }
01450 
01451 /*
01452  * markcanreach - recursive marking of states which can reach here
01453  */
01454 static void
01455 markcanreach(struct nfa * nfa,
01456              struct state * s,
01457              struct state * okay,       /* consider only states with this mark */
01458              struct state * mark)       /* the value to mark with */
01459 {
01460     struct arc *a;
01461 
01462     if (s->tmp != okay)
01463         return;
01464     s->tmp = mark;
01465 
01466     for (a = s->ins; a != NULL; a = a->inchain)
01467         markcanreach(nfa, a->from, okay, mark);
01468 }
01469 
01470 /*
01471  * analyze - ascertain potentially-useful facts about an optimized NFA
01472  */
01473 static long                     /* re_info bits to be ORed in */
01474 analyze(struct nfa * nfa)
01475 {
01476     struct arc *a;
01477     struct arc *aa;
01478 
01479     if (nfa->pre->outs == NULL)
01480         return REG_UIMPOSSIBLE;
01481     for (a = nfa->pre->outs; a != NULL; a = a->outchain)
01482         for (aa = a->to->outs; aa != NULL; aa = aa->outchain)
01483             if (aa->to == nfa->post)
01484                 return REG_UEMPTYMATCH;
01485     return 0;
01486 }
01487 
01488 /*
01489  * compact - compact an NFA
01490  */
01491 static void
01492 compact(struct nfa * nfa,
01493         struct cnfa * cnfa)
01494 {
01495     struct state *s;
01496     struct arc *a;
01497     size_t      nstates;
01498     size_t      narcs;
01499     struct carc *ca;
01500     struct carc *first;
01501 
01502     assert(!NISERR());
01503 
01504     nstates = 0;
01505     narcs = 0;
01506     for (s = nfa->states; s != NULL; s = s->next)
01507     {
01508         nstates++;
01509         narcs += s->nouts + 1;      /* need one extra for endmarker */
01510     }
01511 
01512     cnfa->stflags = (char *) MALLOC(nstates * sizeof(char));
01513     cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
01514     cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
01515     if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL)
01516     {
01517         if (cnfa->stflags != NULL)
01518             FREE(cnfa->stflags);
01519         if (cnfa->states != NULL)
01520             FREE(cnfa->states);
01521         if (cnfa->arcs != NULL)
01522             FREE(cnfa->arcs);
01523         NERR(REG_ESPACE);
01524         return;
01525     }
01526     cnfa->nstates = nstates;
01527     cnfa->pre = nfa->pre->no;
01528     cnfa->post = nfa->post->no;
01529     cnfa->bos[0] = nfa->bos[0];
01530     cnfa->bos[1] = nfa->bos[1];
01531     cnfa->eos[0] = nfa->eos[0];
01532     cnfa->eos[1] = nfa->eos[1];
01533     cnfa->ncolors = maxcolor(nfa->cm) + 1;
01534     cnfa->flags = 0;
01535 
01536     ca = cnfa->arcs;
01537     for (s = nfa->states; s != NULL; s = s->next)
01538     {
01539         assert((size_t) s->no < nstates);
01540         cnfa->stflags[s->no] = 0;
01541         cnfa->states[s->no] = ca;
01542         first = ca;
01543         for (a = s->outs; a != NULL; a = a->outchain)
01544             switch (a->type)
01545             {
01546                 case PLAIN:
01547                     ca->co = a->co;
01548                     ca->to = a->to->no;
01549                     ca++;
01550                     break;
01551                 case LACON:
01552                     assert(s->no != cnfa->pre);
01553                     ca->co = (color) (cnfa->ncolors + a->co);
01554                     ca->to = a->to->no;
01555                     ca++;
01556                     cnfa->flags |= HASLACONS;
01557                     break;
01558                 default:
01559                     assert(NOTREACHED);
01560                     break;
01561             }
01562         carcsort(first, ca - 1);
01563         ca->co = COLORLESS;
01564         ca->to = 0;
01565         ca++;
01566     }
01567     assert(ca == &cnfa->arcs[narcs]);
01568     assert(cnfa->nstates != 0);
01569 
01570     /* mark no-progress states */
01571     for (a = nfa->pre->outs; a != NULL; a = a->outchain)
01572         cnfa->stflags[a->to->no] = CNFA_NOPROGRESS;
01573     cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS;
01574 }
01575 
01576 /*
01577  * carcsort - sort compacted-NFA arcs by color
01578  *
01579  * Really dumb algorithm, but if the list is long enough for that to matter,
01580  * you're in real trouble anyway.
01581  */
01582 static void
01583 carcsort(struct carc * first,
01584          struct carc * last)
01585 {
01586     struct carc *p;
01587     struct carc *q;
01588     struct carc tmp;
01589 
01590     if (last - first <= 1)
01591         return;
01592 
01593     for (p = first; p <= last; p++)
01594         for (q = p; q <= last; q++)
01595             if (p->co > q->co ||
01596                 (p->co == q->co && p->to > q->to))
01597             {
01598                 assert(p != q);
01599                 tmp = *p;
01600                 *p = *q;
01601                 *q = tmp;
01602             }
01603 }
01604 
01605 /*
01606  * freecnfa - free a compacted NFA
01607  */
01608 static void
01609 freecnfa(struct cnfa * cnfa)
01610 {
01611     assert(cnfa->nstates != 0); /* not empty already */
01612     cnfa->nstates = 0;
01613     FREE(cnfa->stflags);
01614     FREE(cnfa->states);
01615     FREE(cnfa->arcs);
01616 }
01617 
01618 /*
01619  * dumpnfa - dump an NFA in human-readable form
01620  */
01621 static void
01622 dumpnfa(struct nfa * nfa,
01623         FILE *f)
01624 {
01625 #ifdef REG_DEBUG
01626     struct state *s;
01627 
01628     fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
01629     if (nfa->bos[0] != COLORLESS)
01630         fprintf(f, ", bos [%ld]", (long) nfa->bos[0]);
01631     if (nfa->bos[1] != COLORLESS)
01632         fprintf(f, ", bol [%ld]", (long) nfa->bos[1]);
01633     if (nfa->eos[0] != COLORLESS)
01634         fprintf(f, ", eos [%ld]", (long) nfa->eos[0]);
01635     if (nfa->eos[1] != COLORLESS)
01636         fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
01637     fprintf(f, "\n");
01638     for (s = nfa->states; s != NULL; s = s->next)
01639         dumpstate(s, f);
01640     if (nfa->parent == NULL)
01641         dumpcolors(nfa->cm, f);
01642     fflush(f);
01643 #endif
01644 }
01645 
01646 #ifdef REG_DEBUG                /* subordinates of dumpnfa */
01647 
01648 /*
01649  * dumpstate - dump an NFA state in human-readable form
01650  */
01651 static void
01652 dumpstate(struct state * s,
01653           FILE *f)
01654 {
01655     struct arc *a;
01656 
01657     fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
01658             (s->flag) ? s->flag : '.');
01659     if (s->prev != NULL && s->prev->next != s)
01660         fprintf(f, "\tstate chain bad\n");
01661     if (s->nouts == 0)
01662         fprintf(f, "\tno out arcs\n");
01663     else
01664         dumparcs(s, f);
01665     fflush(f);
01666     for (a = s->ins; a != NULL; a = a->inchain)
01667     {
01668         if (a->to != s)
01669             fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
01670                     a->from->no, a->to->no, s->no);
01671     }
01672 }
01673 
01674 /*
01675  * dumparcs - dump out-arcs in human-readable form
01676  */
01677 static void
01678 dumparcs(struct state * s,
01679          FILE *f)
01680 {
01681     int         pos;
01682 
01683     assert(s->nouts > 0);
01684     /* printing arcs in reverse order is usually clearer */
01685     pos = dumprarcs(s->outs, s, f, 1);
01686     if (pos != 1)
01687         fprintf(f, "\n");
01688 }
01689 
01690 /*
01691  * dumprarcs - dump remaining outarcs, recursively, in reverse order
01692  */
01693 static int                      /* resulting print position */
01694 dumprarcs(struct arc * a,
01695           struct state * s,
01696           FILE *f,
01697           int pos)              /* initial print position */
01698 {
01699     if (a->outchain != NULL)
01700         pos = dumprarcs(a->outchain, s, f, pos);
01701     dumparc(a, s, f);
01702     if (pos == 5)
01703     {
01704         fprintf(f, "\n");
01705         pos = 1;
01706     }
01707     else
01708         pos++;
01709     return pos;
01710 }
01711 
01712 /*
01713  * dumparc - dump one outarc in readable form, including prefixing tab
01714  */
01715 static void
01716 dumparc(struct arc * a,
01717         struct state * s,
01718         FILE *f)
01719 {
01720     struct arc *aa;
01721     struct arcbatch *ab;
01722 
01723     fprintf(f, "\t");
01724     switch (a->type)
01725     {
01726         case PLAIN:
01727             fprintf(f, "[%ld]", (long) a->co);
01728             break;
01729         case AHEAD:
01730             fprintf(f, ">%ld>", (long) a->co);
01731             break;
01732         case BEHIND:
01733             fprintf(f, "<%ld<", (long) a->co);
01734             break;
01735         case LACON:
01736             fprintf(f, ":%ld:", (long) a->co);
01737             break;
01738         case '^':
01739         case '$':
01740             fprintf(f, "%c%d", a->type, (int) a->co);
01741             break;
01742         case EMPTY:
01743             break;
01744         default:
01745             fprintf(f, "0x%x/0%lo", a->type, (long) a->co);
01746             break;
01747     }
01748     if (a->from != s)
01749         fprintf(f, "?%d?", a->from->no);
01750     for (ab = &a->from->oas; ab != NULL; ab = ab->next)
01751     {
01752         for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++)
01753             if (aa == a)
01754                 break;          /* NOTE BREAK OUT */
01755         if (aa < &ab->a[ABSIZE])    /* propagate break */
01756             break;              /* NOTE BREAK OUT */
01757     }
01758     if (ab == NULL)
01759         fprintf(f, "?!?");      /* not in allocated space */
01760     fprintf(f, "->");
01761     if (a->to == NULL)
01762     {
01763         fprintf(f, "NULL");
01764         return;
01765     }
01766     fprintf(f, "%d", a->to->no);
01767     for (aa = a->to->ins; aa != NULL; aa = aa->inchain)
01768         if (aa == a)
01769             break;              /* NOTE BREAK OUT */
01770     if (aa == NULL)
01771         fprintf(f, "?!?");      /* missing from in-chain */
01772 }
01773 #endif   /* REG_DEBUG */
01774 
01775 /*
01776  * dumpcnfa - dump a compacted NFA in human-readable form
01777  */
01778 #ifdef REG_DEBUG
01779 static void
01780 dumpcnfa(struct cnfa * cnfa,
01781          FILE *f)
01782 {
01783     int         st;
01784 
01785     fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
01786     if (cnfa->bos[0] != COLORLESS)
01787         fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]);
01788     if (cnfa->bos[1] != COLORLESS)
01789         fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]);
01790     if (cnfa->eos[0] != COLORLESS)
01791         fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]);
01792     if (cnfa->eos[1] != COLORLESS)
01793         fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]);
01794     if (cnfa->flags & HASLACONS)
01795         fprintf(f, ", haslacons");
01796     fprintf(f, "\n");
01797     for (st = 0; st < cnfa->nstates; st++)
01798         dumpcstate(st, cnfa, f);
01799     fflush(f);
01800 }
01801 #endif
01802 
01803 #ifdef REG_DEBUG                /* subordinates of dumpcnfa */
01804 
01805 /*
01806  * dumpcstate - dump a compacted-NFA state in human-readable form
01807  */
01808 static void
01809 dumpcstate(int st,
01810            struct cnfa * cnfa,
01811            FILE *f)
01812 {
01813     struct carc * ca;
01814     int         pos;
01815 
01816     fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : ".");
01817     pos = 1;
01818     for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
01819     {
01820         if (ca->co < cnfa->ncolors)
01821             fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to);
01822         else
01823             fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to);
01824         if (pos == 5)
01825         {
01826             fprintf(f, "\n");
01827             pos = 1;
01828         }
01829         else
01830             pos++;
01831     }
01832     if (ca == cnfa->states[st] || pos != 1)
01833         fprintf(f, "\n");
01834     fflush(f);
01835 }
01836 
01837 #endif   /* REG_DEBUG */
Header And Logo

regc_nfa.c