00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 #define NISERR() VISERR(nfa->v)
00040 #define NERR(e) VERR(nfa->v, (e))
00041
00042
00043
00044
00045
00046 static struct nfa *
00047 newnfa(struct vars * v,
00048 struct colormap * cm,
00049 struct nfa * parent)
00050 {
00051 struct nfa *nfa;
00052
00053 nfa = (struct nfa *) MALLOC(sizeof(struct nfa));
00054 if (nfa == NULL)
00055 return NULL;
00056
00057 nfa->states = NULL;
00058 nfa->slast = NULL;
00059 nfa->free = NULL;
00060 nfa->nstates = 0;
00061 nfa->cm = cm;
00062 nfa->v = v;
00063 nfa->size = 0;
00064 nfa->bos[0] = nfa->bos[1] = COLORLESS;
00065 nfa->eos[0] = nfa->eos[1] = COLORLESS;
00066 nfa->parent = parent;
00067 nfa->post = newfstate(nfa, '@');
00068 nfa->pre = newfstate(nfa, '>');
00069
00070 nfa->init = newstate(nfa);
00071 nfa->final = newstate(nfa);
00072 if (ISERR())
00073 {
00074 freenfa(nfa);
00075 return NULL;
00076 }
00077 rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
00078 newarc(nfa, '^', 1, nfa->pre, nfa->init);
00079 newarc(nfa, '^', 0, nfa->pre, nfa->init);
00080 rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
00081 newarc(nfa, '$', 1, nfa->final, nfa->post);
00082 newarc(nfa, '$', 0, nfa->final, nfa->post);
00083
00084 if (ISERR())
00085 {
00086 freenfa(nfa);
00087 return NULL;
00088 }
00089 return nfa;
00090 }
00091
00092
00093
00094
00095 static int
00096 TooManyStates(struct nfa * nfa)
00097 {
00098 struct nfa *parent = nfa->parent;
00099 size_t sz = nfa->size;
00100
00101 while (parent != NULL)
00102 {
00103 sz = parent->size;
00104 parent = parent->parent;
00105 }
00106 if (sz > REG_MAX_STATES)
00107 return 1;
00108 return 0;
00109 }
00110
00111
00112
00113
00114 static void
00115 IncrementSize(struct nfa * nfa)
00116 {
00117 struct nfa *parent = nfa->parent;
00118
00119 nfa->size++;
00120 while (parent != NULL)
00121 {
00122 parent->size++;
00123 parent = parent->parent;
00124 }
00125 }
00126
00127
00128
00129
00130 static void
00131 DecrementSize(struct nfa * nfa)
00132 {
00133 struct nfa *parent = nfa->parent;
00134
00135 nfa->size--;
00136 while (parent != NULL)
00137 {
00138 parent->size--;
00139 parent = parent->parent;
00140 }
00141 }
00142
00143
00144
00145
00146 static void
00147 freenfa(struct nfa * nfa)
00148 {
00149 struct state *s;
00150
00151 while ((s = nfa->states) != NULL)
00152 {
00153 s->nins = s->nouts = 0;
00154 freestate(nfa, s);
00155 }
00156 while ((s = nfa->free) != NULL)
00157 {
00158 nfa->free = s->next;
00159 destroystate(nfa, s);
00160 }
00161
00162 nfa->slast = NULL;
00163 nfa->nstates = -1;
00164 nfa->pre = NULL;
00165 nfa->post = NULL;
00166 FREE(nfa);
00167 }
00168
00169
00170
00171
00172 static struct state *
00173 newstate(struct nfa * nfa)
00174 {
00175 struct state *s;
00176
00177 if (TooManyStates(nfa))
00178 {
00179 NERR(REG_ETOOBIG);
00180 return NULL;
00181 }
00182 if (nfa->free != NULL)
00183 {
00184 s = nfa->free;
00185 nfa->free = s->next;
00186 }
00187 else
00188 {
00189 s = (struct state *) MALLOC(sizeof(struct state));
00190 if (s == NULL)
00191 {
00192 NERR(REG_ESPACE);
00193 return NULL;
00194 }
00195 s->oas.next = NULL;
00196 s->free = NULL;
00197 s->noas = 0;
00198 }
00199
00200 assert(nfa->nstates >= 0);
00201 s->no = nfa->nstates++;
00202 s->flag = 0;
00203 if (nfa->states == NULL)
00204 nfa->states = s;
00205 s->nins = 0;
00206 s->ins = NULL;
00207 s->nouts = 0;
00208 s->outs = NULL;
00209 s->tmp = NULL;
00210 s->next = NULL;
00211 if (nfa->slast != NULL)
00212 {
00213 assert(nfa->slast->next == NULL);
00214 nfa->slast->next = s;
00215 }
00216 s->prev = nfa->slast;
00217 nfa->slast = s;
00218
00219 IncrementSize(nfa);
00220 return s;
00221 }
00222
00223
00224
00225
00226 static struct state *
00227 newfstate(struct nfa * nfa, int flag)
00228 {
00229 struct state *s;
00230
00231 s = newstate(nfa);
00232 if (s != NULL)
00233 s->flag = (char) flag;
00234 return s;
00235 }
00236
00237
00238
00239
00240 static void
00241 dropstate(struct nfa * nfa,
00242 struct state * s)
00243 {
00244 struct arc *a;
00245
00246 while ((a = s->ins) != NULL)
00247 freearc(nfa, a);
00248 while ((a = s->outs) != NULL)
00249 freearc(nfa, a);
00250 freestate(nfa, s);
00251 }
00252
00253
00254
00255
00256 static void
00257 freestate(struct nfa * nfa,
00258 struct state * s)
00259 {
00260 assert(s != NULL);
00261 assert(s->nins == 0 && s->nouts == 0);
00262
00263 s->no = FREESTATE;
00264 s->flag = 0;
00265 if (s->next != NULL)
00266 s->next->prev = s->prev;
00267 else
00268 {
00269 assert(s == nfa->slast);
00270 nfa->slast = s->prev;
00271 }
00272 if (s->prev != NULL)
00273 s->prev->next = s->next;
00274 else
00275 {
00276 assert(s == nfa->states);
00277 nfa->states = s->next;
00278 }
00279 s->prev = NULL;
00280 s->next = nfa->free;
00281 nfa->free = s;
00282 DecrementSize(nfa);
00283 }
00284
00285
00286
00287
00288 static void
00289 destroystate(struct nfa * nfa,
00290 struct state * s)
00291 {
00292 struct arcbatch *ab;
00293 struct arcbatch *abnext;
00294
00295 assert(s->no == FREESTATE);
00296 for (ab = s->oas.next; ab != NULL; ab = abnext)
00297 {
00298 abnext = ab->next;
00299 FREE(ab);
00300 }
00301 s->ins = NULL;
00302 s->outs = NULL;
00303 s->next = NULL;
00304 FREE(s);
00305 }
00306
00307
00308
00309
00310 static void
00311 newarc(struct nfa * nfa,
00312 int t,
00313 pcolor co,
00314 struct state * from,
00315 struct state * to)
00316 {
00317 struct arc *a;
00318
00319 assert(from != NULL && to != NULL);
00320
00321
00322 for (a = from->outs; a != NULL; a = a->outchain)
00323 if (a->to == to && a->co == co && a->type == t)
00324 return;
00325
00326 a = allocarc(nfa, from);
00327 if (NISERR())
00328 return;
00329 assert(a != NULL);
00330
00331 a->type = t;
00332 a->co = (color) co;
00333 a->to = to;
00334 a->from = from;
00335
00336
00337
00338
00339
00340
00341
00342 a->inchain = to->ins;
00343 to->ins = a;
00344 a->outchain = from->outs;
00345 from->outs = a;
00346
00347 from->nouts++;
00348 to->nins++;
00349
00350 if (COLORED(a) && nfa->parent == NULL)
00351 colorchain(nfa->cm, a);
00352 }
00353
00354
00355
00356
00357 static struct arc *
00358 allocarc(struct nfa * nfa,
00359 struct state * s)
00360 {
00361 struct arc *a;
00362
00363
00364 if (s->free == NULL && s->noas < ABSIZE)
00365 {
00366 a = &s->oas.a[s->noas];
00367 s->noas++;
00368 return a;
00369 }
00370
00371
00372 if (s->free == NULL)
00373 {
00374 struct arcbatch *newAb;
00375 int i;
00376
00377 newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
00378 if (newAb == NULL)
00379 {
00380 NERR(REG_ESPACE);
00381 return NULL;
00382 }
00383 newAb->next = s->oas.next;
00384 s->oas.next = newAb;
00385
00386 for (i = 0; i < ABSIZE; i++)
00387 {
00388 newAb->a[i].type = 0;
00389 newAb->a[i].freechain = &newAb->a[i + 1];
00390 }
00391 newAb->a[ABSIZE - 1].freechain = NULL;
00392 s->free = &newAb->a[0];
00393 }
00394 assert(s->free != NULL);
00395
00396 a = s->free;
00397 s->free = a->freechain;
00398 return a;
00399 }
00400
00401
00402
00403
00404 static void
00405 freearc(struct nfa * nfa,
00406 struct arc * victim)
00407 {
00408 struct state *from = victim->from;
00409 struct state *to = victim->to;
00410 struct arc *a;
00411
00412 assert(victim->type != 0);
00413
00414
00415 if (COLORED(victim) && nfa->parent == NULL)
00416 uncolorchain(nfa->cm, victim);
00417
00418
00419 assert(from != NULL);
00420 assert(from->outs != NULL);
00421 a = from->outs;
00422 if (a == victim)
00423 from->outs = victim->outchain;
00424 else
00425 {
00426 for (; a != NULL && a->outchain != victim; a = a->outchain)
00427 continue;
00428 assert(a != NULL);
00429 a->outchain = victim->outchain;
00430 }
00431 from->nouts--;
00432
00433
00434 assert(to != NULL);
00435 assert(to->ins != NULL);
00436 a = to->ins;
00437 if (a == victim)
00438 to->ins = victim->inchain;
00439 else
00440 {
00441 for (; a != NULL && a->inchain != victim; a = a->inchain)
00442 continue;
00443 assert(a != NULL);
00444 a->inchain = victim->inchain;
00445 }
00446 to->nins--;
00447
00448
00449 victim->type = 0;
00450 victim->from = NULL;
00451 victim->to = NULL;
00452 victim->inchain = NULL;
00453 victim->outchain = NULL;
00454 victim->freechain = from->free;
00455 from->free = victim;
00456 }
00457
00458
00459
00460
00461 static int
00462 hasnonemptyout(struct state * s)
00463 {
00464 struct arc *a;
00465
00466 for (a = s->outs; a != NULL; a = a->outchain)
00467 {
00468 if (a->type != EMPTY)
00469 return 1;
00470 }
00471 return 0;
00472 }
00473
00474
00475
00476
00477 static int
00478 nonemptyouts(struct state * s)
00479 {
00480 int n = 0;
00481 struct arc *a;
00482
00483 for (a = s->outs; a != NULL; a = a->outchain)
00484 {
00485 if (a->type != EMPTY)
00486 n++;
00487 }
00488 return n;
00489 }
00490
00491
00492
00493
00494 static int
00495 nonemptyins(struct state * s)
00496 {
00497 int n = 0;
00498 struct arc *a;
00499
00500 for (a = s->ins; a != NULL; a = a->inchain)
00501 {
00502 if (a->type != EMPTY)
00503 n++;
00504 }
00505 return n;
00506 }
00507
00508
00509
00510
00511
00512 static struct arc *
00513 findarc(struct state * s,
00514 int type,
00515 pcolor co)
00516 {
00517 struct arc *a;
00518
00519 for (a = s->outs; a != NULL; a = a->outchain)
00520 if (a->type == type && a->co == co)
00521 return a;
00522 return NULL;
00523 }
00524
00525
00526
00527
00528 static void
00529 cparc(struct nfa * nfa,
00530 struct arc * oa,
00531 struct state * from,
00532 struct state * to)
00533 {
00534 newarc(nfa, oa->type, oa->co, from, to);
00535 }
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545 static void
00546 moveins(struct nfa * nfa,
00547 struct state * oldState,
00548 struct state * newState)
00549 {
00550 struct arc *a;
00551
00552 assert(oldState != newState);
00553
00554 while ((a = oldState->ins) != NULL)
00555 {
00556 cparc(nfa, a, a->from, newState);
00557 freearc(nfa, a);
00558 }
00559 assert(oldState->nins == 0);
00560 assert(oldState->ins == NULL);
00561 }
00562
00563
00564
00565
00566
00567
00568 static void
00569 copyins(struct nfa * nfa,
00570 struct state * oldState,
00571 struct state * newState,
00572 int all)
00573 {
00574 struct arc *a;
00575
00576 assert(oldState != newState);
00577
00578 for (a = oldState->ins; a != NULL; a = a->inchain)
00579 {
00580 if (all || a->type != EMPTY)
00581 cparc(nfa, a, a->from, newState);
00582 }
00583 }
00584
00585
00586
00587
00588 static void
00589 moveouts(struct nfa * nfa,
00590 struct state * oldState,
00591 struct state * newState)
00592 {
00593 struct arc *a;
00594
00595 assert(oldState != newState);
00596
00597 while ((a = oldState->outs) != NULL)
00598 {
00599 cparc(nfa, a, newState, a->to);
00600 freearc(nfa, a);
00601 }
00602 }
00603
00604
00605
00606
00607
00608
00609 static void
00610 copyouts(struct nfa * nfa,
00611 struct state * oldState,
00612 struct state * newState,
00613 int all)
00614 {
00615 struct arc *a;
00616
00617 assert(oldState != newState);
00618
00619 for (a = oldState->outs; a != NULL; a = a->outchain)
00620 {
00621 if (all || a->type != EMPTY)
00622 cparc(nfa, a, newState, a->to);
00623 }
00624 }
00625
00626
00627
00628
00629 static void
00630 cloneouts(struct nfa * nfa,
00631 struct state * old,
00632 struct state * from,
00633 struct state * to,
00634 int type)
00635 {
00636 struct arc *a;
00637
00638 assert(old != from);
00639
00640 for (a = old->outs; a != NULL; a = a->outchain)
00641 newarc(nfa, type, a->co, from, to);
00642 }
00643
00644
00645
00646
00647
00648
00649
00650 static void
00651 delsub(struct nfa * nfa,
00652 struct state * lp,
00653 struct state * rp)
00654 {
00655 assert(lp != rp);
00656
00657 rp->tmp = rp;
00658
00659 deltraverse(nfa, lp, lp);
00660 assert(lp->nouts == 0 && rp->nins == 0);
00661 assert(lp->no != FREESTATE && rp->no != FREESTATE);
00662
00663 rp->tmp = NULL;
00664 lp->tmp = NULL;
00665 }
00666
00667
00668
00669
00670
00671 static void
00672 deltraverse(struct nfa * nfa,
00673 struct state * leftend,
00674 struct state * s)
00675 {
00676 struct arc *a;
00677 struct state *to;
00678
00679 if (s->nouts == 0)
00680 return;
00681 if (s->tmp != NULL)
00682 return;
00683
00684 s->tmp = s;
00685
00686 while ((a = s->outs) != NULL)
00687 {
00688 to = a->to;
00689 deltraverse(nfa, leftend, to);
00690 assert(to->nouts == 0 || to->tmp != NULL);
00691 freearc(nfa, a);
00692 if (to->nins == 0 && to->tmp == NULL)
00693 {
00694 assert(to->nouts == 0);
00695 freestate(nfa, to);
00696 }
00697 }
00698
00699 assert(s->no != FREESTATE);
00700 assert(s == leftend || s->nins != 0);
00701 assert(s->nouts == 0);
00702
00703 s->tmp = NULL;
00704 }
00705
00706
00707
00708
00709
00710
00711
00712
00713 static void
00714 dupnfa(struct nfa * nfa,
00715 struct state * start,
00716 struct state * stop,
00717 struct state * from,
00718 struct state * to)
00719 {
00720 if (start == stop)
00721 {
00722 newarc(nfa, EMPTY, 0, from, to);
00723 return;
00724 }
00725
00726 stop->tmp = to;
00727 duptraverse(nfa, start, from);
00728
00729
00730 stop->tmp = NULL;
00731 cleartraverse(nfa, start);
00732 }
00733
00734
00735
00736
00737 static void
00738 duptraverse(struct nfa * nfa,
00739 struct state * s,
00740 struct state * stmp)
00741 {
00742 struct arc *a;
00743
00744 if (s->tmp != NULL)
00745 return;
00746
00747 s->tmp = (stmp == NULL) ? newstate(nfa) : stmp;
00748 if (s->tmp == NULL)
00749 {
00750 assert(NISERR());
00751 return;
00752 }
00753
00754 for (a = s->outs; a != NULL && !NISERR(); a = a->outchain)
00755 {
00756 duptraverse(nfa, a->to, (struct state *) NULL);
00757 if (NISERR())
00758 break;
00759 assert(a->to->tmp != NULL);
00760 cparc(nfa, a, s->tmp, a->to->tmp);
00761 }
00762 }
00763
00764
00765
00766
00767 static void
00768 cleartraverse(struct nfa * nfa,
00769 struct state * s)
00770 {
00771 struct arc *a;
00772
00773 if (s->tmp == NULL)
00774 return;
00775 s->tmp = NULL;
00776
00777 for (a = s->outs; a != NULL; a = a->outchain)
00778 cleartraverse(nfa, a->to);
00779 }
00780
00781
00782
00783
00784 static void
00785 specialcolors(struct nfa * nfa)
00786 {
00787
00788 if (nfa->parent == NULL)
00789 {
00790 nfa->bos[0] = pseudocolor(nfa->cm);
00791 nfa->bos[1] = pseudocolor(nfa->cm);
00792 nfa->eos[0] = pseudocolor(nfa->cm);
00793 nfa->eos[1] = pseudocolor(nfa->cm);
00794 }
00795 else
00796 {
00797 assert(nfa->parent->bos[0] != COLORLESS);
00798 nfa->bos[0] = nfa->parent->bos[0];
00799 assert(nfa->parent->bos[1] != COLORLESS);
00800 nfa->bos[1] = nfa->parent->bos[1];
00801 assert(nfa->parent->eos[0] != COLORLESS);
00802 nfa->eos[0] = nfa->parent->eos[0];
00803 assert(nfa->parent->eos[1] != COLORLESS);
00804 nfa->eos[1] = nfa->parent->eos[1];
00805 }
00806 }
00807
00808
00809
00810
00811 static long
00812 optimize(struct nfa * nfa,
00813 FILE *f)
00814 {
00815 #ifdef REG_DEBUG
00816 int verbose = (f != NULL) ? 1 : 0;
00817
00818 if (verbose)
00819 fprintf(f, "\ninitial cleanup:\n");
00820 #endif
00821 cleanup(nfa);
00822 #ifdef REG_DEBUG
00823 if (verbose)
00824 dumpnfa(nfa, f);
00825 if (verbose)
00826 fprintf(f, "\nempties:\n");
00827 #endif
00828 fixempties(nfa, f);
00829 #ifdef REG_DEBUG
00830 if (verbose)
00831 fprintf(f, "\nconstraints:\n");
00832 #endif
00833 pullback(nfa, f);
00834 pushfwd(nfa, f);
00835 #ifdef REG_DEBUG
00836 if (verbose)
00837 fprintf(f, "\nfinal cleanup:\n");
00838 #endif
00839 cleanup(nfa);
00840 return analyze(nfa);
00841 }
00842
00843
00844
00845
00846 static void
00847 pullback(struct nfa * nfa,
00848 FILE *f)
00849 {
00850 struct state *s;
00851 struct state *nexts;
00852 struct arc *a;
00853 struct arc *nexta;
00854 int progress;
00855
00856
00857 do
00858 {
00859 progress = 0;
00860 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
00861 {
00862 nexts = s->next;
00863 for (a = s->outs; a != NULL && !NISERR(); a = nexta)
00864 {
00865 nexta = a->outchain;
00866 if (a->type == '^' || a->type == BEHIND)
00867 if (pull(nfa, a))
00868 progress = 1;
00869 assert(nexta == NULL || s->no != FREESTATE);
00870 }
00871 }
00872 if (progress && f != NULL)
00873 dumpnfa(nfa, f);
00874 } while (progress && !NISERR());
00875 if (NISERR())
00876 return;
00877
00878 for (a = nfa->pre->outs; a != NULL; a = nexta)
00879 {
00880 nexta = a->outchain;
00881 if (a->type == '^')
00882 {
00883 assert(a->co == 0 || a->co == 1);
00884 newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to);
00885 freearc(nfa, a);
00886 }
00887 }
00888 }
00889
00890
00891
00892
00893
00894
00895
00896 static int
00897 pull(struct nfa * nfa,
00898 struct arc * con)
00899 {
00900 struct state *from = con->from;
00901 struct state *to = con->to;
00902 struct arc *a;
00903 struct arc *nexta;
00904 struct state *s;
00905
00906 if (from == to)
00907 {
00908 freearc(nfa, con);
00909 return 1;
00910 }
00911 if (from->flag)
00912 return 0;
00913 if (from->nins == 0)
00914 {
00915 freearc(nfa, con);
00916 return 1;
00917 }
00918
00919
00920
00921
00922
00923
00924 for (a = from->outs; a != NULL; a = nexta)
00925 {
00926 nexta = a->outchain;
00927 switch (a->type)
00928 {
00929 case '^':
00930 case '$':
00931 case BEHIND:
00932 case AHEAD:
00933 if (from == a->to)
00934 freearc(nfa, a);
00935 break;
00936 }
00937 }
00938
00939
00940 if (from->nouts > 1)
00941 {
00942 s = newstate(nfa);
00943 if (NISERR())
00944 return 0;
00945 assert(to != from);
00946 copyins(nfa, from, s, 1);
00947 cparc(nfa, con, s, to);
00948 freearc(nfa, con);
00949 from = s;
00950 con = from->outs;
00951 }
00952 assert(from->nouts == 1);
00953
00954
00955 for (a = from->ins; a != NULL; a = nexta)
00956 {
00957 nexta = a->inchain;
00958 switch (combine(con, a))
00959 {
00960 case INCOMPATIBLE:
00961 freearc(nfa, a);
00962 break;
00963 case SATISFIED:
00964 break;
00965 case COMPATIBLE:
00966 s = newstate(nfa);
00967 if (NISERR())
00968 return 0;
00969 cparc(nfa, a, s, to);
00970 cparc(nfa, con, a->from, s);
00971 if (NISERR())
00972 return 0;
00973 freearc(nfa, a);
00974 break;
00975 default:
00976 assert(NOTREACHED);
00977 break;
00978 }
00979 }
00980
00981
00982 moveins(nfa, from, to);
00983 dropstate(nfa, from);
00984 return 1;
00985 }
00986
00987
00988
00989
00990 static void
00991 pushfwd(struct nfa * nfa,
00992 FILE *f)
00993 {
00994 struct state *s;
00995 struct state *nexts;
00996 struct arc *a;
00997 struct arc *nexta;
00998 int progress;
00999
01000
01001 do
01002 {
01003 progress = 0;
01004 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
01005 {
01006 nexts = s->next;
01007 for (a = s->ins; a != NULL && !NISERR(); a = nexta)
01008 {
01009 nexta = a->inchain;
01010 if (a->type == '$' || a->type == AHEAD)
01011 if (push(nfa, a))
01012 progress = 1;
01013 assert(nexta == NULL || s->no != FREESTATE);
01014 }
01015 }
01016 if (progress && f != NULL)
01017 dumpnfa(nfa, f);
01018 } while (progress && !NISERR());
01019 if (NISERR())
01020 return;
01021
01022 for (a = nfa->post->ins; a != NULL; a = nexta)
01023 {
01024 nexta = a->inchain;
01025 if (a->type == '$')
01026 {
01027 assert(a->co == 0 || a->co == 1);
01028 newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to);
01029 freearc(nfa, a);
01030 }
01031 }
01032 }
01033
01034
01035
01036
01037
01038
01039
01040 static int
01041 push(struct nfa * nfa,
01042 struct arc * con)
01043 {
01044 struct state *from = con->from;
01045 struct state *to = con->to;
01046 struct arc *a;
01047 struct arc *nexta;
01048 struct state *s;
01049
01050 if (to == from)
01051 {
01052 freearc(nfa, con);
01053 return 1;
01054 }
01055 if (to->flag)
01056 return 0;
01057 if (to->nouts == 0)
01058 {
01059 freearc(nfa, con);
01060 return 1;
01061 }
01062
01063
01064
01065
01066
01067
01068
01069
01070
01071 for (a = to->ins; a != NULL; a = nexta)
01072 {
01073 nexta = a->inchain;
01074 switch (a->type)
01075 {
01076 case '^':
01077 case '$':
01078 case BEHIND:
01079 case AHEAD:
01080 if (a->from == to)
01081 freearc(nfa, a);
01082 break;
01083 }
01084 }
01085
01086
01087 if (to->nins > 1)
01088 {
01089 s = newstate(nfa);
01090 if (NISERR())
01091 return 0;
01092 copyouts(nfa, to, s, 1);
01093 cparc(nfa, con, from, s);
01094 freearc(nfa, con);
01095 to = s;
01096 con = to->ins;
01097 }
01098 assert(to->nins == 1);
01099
01100
01101 for (a = to->outs; a != NULL; a = nexta)
01102 {
01103 nexta = a->outchain;
01104 switch (combine(con, a))
01105 {
01106 case INCOMPATIBLE:
01107 freearc(nfa, a);
01108 break;
01109 case SATISFIED:
01110 break;
01111 case COMPATIBLE:
01112 s = newstate(nfa);
01113 if (NISERR())
01114 return 0;
01115 cparc(nfa, con, s, a->to);
01116 cparc(nfa, a, from, s);
01117 if (NISERR())
01118 return 0;
01119 freearc(nfa, a);
01120 break;
01121 default:
01122 assert(NOTREACHED);
01123 break;
01124 }
01125 }
01126
01127
01128 moveouts(nfa, to, from);
01129 dropstate(nfa, to);
01130 return 1;
01131 }
01132
01133
01134
01135
01136
01137
01138
01139
01140 static int
01141 combine(struct arc * con,
01142 struct arc * a)
01143 {
01144 #define CA(ct,at) (((ct)<<CHAR_BIT) | (at))
01145
01146 switch (CA(con->type, a->type))
01147 {
01148 case CA('^', PLAIN):
01149 case CA('$', PLAIN):
01150 return INCOMPATIBLE;
01151 break;
01152 case CA(AHEAD, PLAIN):
01153 case CA(BEHIND, PLAIN):
01154 if (con->co == a->co)
01155 return SATISFIED;
01156 return INCOMPATIBLE;
01157 break;
01158 case CA('^', '^'):
01159 case CA('$', '$'):
01160 case CA(AHEAD, AHEAD):
01161 case CA(BEHIND, BEHIND):
01162 if (con->co == a->co)
01163 return SATISFIED;
01164 return INCOMPATIBLE;
01165 break;
01166 case CA('^', BEHIND):
01167 case CA(BEHIND, '^'):
01168 case CA('$', AHEAD):
01169 case CA(AHEAD, '$'):
01170 return INCOMPATIBLE;
01171 break;
01172 case CA('^', '$'):
01173 case CA('^', AHEAD):
01174 case CA(BEHIND, '$'):
01175 case CA(BEHIND, AHEAD):
01176 case CA('$', '^'):
01177 case CA('$', BEHIND):
01178 case CA(AHEAD, '^'):
01179 case CA(AHEAD, BEHIND):
01180 case CA('^', LACON):
01181 case CA(BEHIND, LACON):
01182 case CA('$', LACON):
01183 case CA(AHEAD, LACON):
01184 return COMPATIBLE;
01185 break;
01186 }
01187 assert(NOTREACHED);
01188 return INCOMPATIBLE;
01189 }
01190
01191
01192
01193
01194 static void
01195 fixempties(struct nfa * nfa,
01196 FILE *f)
01197 {
01198 struct state *s;
01199 struct state *s2;
01200 struct state *nexts;
01201 struct arc *a;
01202 struct arc *nexta;
01203
01204
01205
01206
01207
01208
01209 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
01210 {
01211 nexts = s->next;
01212 if (s->flag || s->nouts != 1)
01213 continue;
01214 a = s->outs;
01215 assert(a != NULL && a->outchain == NULL);
01216 if (a->type != EMPTY)
01217 continue;
01218 if (s != a->to)
01219 moveins(nfa, s, a->to);
01220 dropstate(nfa, s);
01221 }
01222
01223
01224
01225
01226
01227 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
01228 {
01229 nexts = s->next;
01230
01231 assert(s->tmp == NULL);
01232 if (s->flag || s->nins != 1)
01233 continue;
01234 a = s->ins;
01235 assert(a != NULL && a->inchain == NULL);
01236 if (a->type != EMPTY)
01237 continue;
01238 if (s != a->from)
01239 moveouts(nfa, s, a->from);
01240 dropstate(nfa, s);
01241 }
01242
01243
01244
01245
01246
01247
01248
01249
01250
01251
01252
01253
01254
01255
01256
01257
01258
01259 for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
01260 {
01261 for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); s2 = nexts)
01262 {
01263
01264
01265
01266
01267
01268 if (s2->flag || hasnonemptyout(s2))
01269 replaceempty(nfa, s, s2);
01270
01271
01272 nexts = s2->tmp;
01273 s2->tmp = NULL;
01274 }
01275 s->tmp = NULL;
01276 }
01277
01278 if (NISERR())
01279 return;
01280
01281
01282
01283
01284 for (s = nfa->states; s != NULL; s = s->next)
01285 {
01286 for (a = s->outs; a != NULL; a = nexta)
01287 {
01288 nexta = a->outchain;
01289 if (a->type == EMPTY)
01290 freearc(nfa, a);
01291 }
01292 }
01293
01294
01295
01296
01297
01298
01299 for (s = nfa->states; s != NULL; s = nexts)
01300 {
01301 nexts = s->next;
01302 if ((s->nins == 0 || s->nouts == 0) && !s->flag)
01303 dropstate(nfa, s);
01304 }
01305
01306 if (f != NULL)
01307 dumpnfa(nfa, f);
01308 }
01309
01310
01311
01312
01313
01314
01315
01316
01317
01318
01319
01320
01321 static struct state *
01322 emptyreachable(struct state * s, struct state * lastfound)
01323 {
01324 struct arc *a;
01325
01326 s->tmp = lastfound;
01327 lastfound = s;
01328 for (a = s->outs; a != NULL; a = a->outchain)
01329 {
01330 if (a->type == EMPTY && a->to->tmp == NULL)
01331 lastfound = emptyreachable(a->to, lastfound);
01332 }
01333 return lastfound;
01334 }
01335
01336
01337
01338
01339
01340
01341
01342 static void
01343 replaceempty(struct nfa * nfa,
01344 struct state * from,
01345 struct state * to)
01346 {
01347 int fromouts;
01348 int toins;
01349
01350 assert(from != to);
01351
01352
01353
01354
01355
01356
01357
01358
01359
01360
01361
01362
01363
01364
01365
01366
01367 fromouts = nonemptyouts(from);
01368 toins = (fromouts == 0) ? 1 : nonemptyins(to);
01369
01370 if (fromouts > toins)
01371 {
01372 copyouts(nfa, to, from, 0);
01373 return;
01374 }
01375 if (fromouts < toins)
01376 {
01377 copyins(nfa, from, to, 0);
01378 return;
01379 }
01380
01381
01382
01383
01384
01385
01386
01387
01388 if (from->nins > to->nouts)
01389 {
01390 copyouts(nfa, to, from, 0);
01391 return;
01392 }
01393 else
01394 {
01395 copyins(nfa, from, to, 0);
01396 return;
01397 }
01398 }
01399
01400
01401
01402
01403 static void
01404 cleanup(struct nfa * nfa)
01405 {
01406 struct state *s;
01407 struct state *nexts;
01408 int n;
01409
01410
01411
01412 markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre);
01413 markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
01414 for (s = nfa->states; s != NULL; s = nexts)
01415 {
01416 nexts = s->next;
01417 if (s->tmp != nfa->post && !s->flag)
01418 dropstate(nfa, s);
01419 }
01420 assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
01421 cleartraverse(nfa, nfa->pre);
01422 assert(nfa->post->nins == 0 || nfa->post->tmp == NULL);
01423
01424
01425
01426 n = 0;
01427 for (s = nfa->states; s != NULL; s = s->next)
01428 s->no = n++;
01429 nfa->nstates = n;
01430 }
01431
01432
01433
01434
01435 static void
01436 markreachable(struct nfa * nfa,
01437 struct state * s,
01438 struct state * okay,
01439 struct state * mark)
01440 {
01441 struct arc *a;
01442
01443 if (s->tmp != okay)
01444 return;
01445 s->tmp = mark;
01446
01447 for (a = s->outs; a != NULL; a = a->outchain)
01448 markreachable(nfa, a->to, okay, mark);
01449 }
01450
01451
01452
01453
01454 static void
01455 markcanreach(struct nfa * nfa,
01456 struct state * s,
01457 struct state * okay,
01458 struct state * mark)
01459 {
01460 struct arc *a;
01461
01462 if (s->tmp != okay)
01463 return;
01464 s->tmp = mark;
01465
01466 for (a = s->ins; a != NULL; a = a->inchain)
01467 markcanreach(nfa, a->from, okay, mark);
01468 }
01469
01470
01471
01472
01473 static long
01474 analyze(struct nfa * nfa)
01475 {
01476 struct arc *a;
01477 struct arc *aa;
01478
01479 if (nfa->pre->outs == NULL)
01480 return REG_UIMPOSSIBLE;
01481 for (a = nfa->pre->outs; a != NULL; a = a->outchain)
01482 for (aa = a->to->outs; aa != NULL; aa = aa->outchain)
01483 if (aa->to == nfa->post)
01484 return REG_UEMPTYMATCH;
01485 return 0;
01486 }
01487
01488
01489
01490
01491 static void
01492 compact(struct nfa * nfa,
01493 struct cnfa * cnfa)
01494 {
01495 struct state *s;
01496 struct arc *a;
01497 size_t nstates;
01498 size_t narcs;
01499 struct carc *ca;
01500 struct carc *first;
01501
01502 assert(!NISERR());
01503
01504 nstates = 0;
01505 narcs = 0;
01506 for (s = nfa->states; s != NULL; s = s->next)
01507 {
01508 nstates++;
01509 narcs += s->nouts + 1;
01510 }
01511
01512 cnfa->stflags = (char *) MALLOC(nstates * sizeof(char));
01513 cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
01514 cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
01515 if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL)
01516 {
01517 if (cnfa->stflags != NULL)
01518 FREE(cnfa->stflags);
01519 if (cnfa->states != NULL)
01520 FREE(cnfa->states);
01521 if (cnfa->arcs != NULL)
01522 FREE(cnfa->arcs);
01523 NERR(REG_ESPACE);
01524 return;
01525 }
01526 cnfa->nstates = nstates;
01527 cnfa->pre = nfa->pre->no;
01528 cnfa->post = nfa->post->no;
01529 cnfa->bos[0] = nfa->bos[0];
01530 cnfa->bos[1] = nfa->bos[1];
01531 cnfa->eos[0] = nfa->eos[0];
01532 cnfa->eos[1] = nfa->eos[1];
01533 cnfa->ncolors = maxcolor(nfa->cm) + 1;
01534 cnfa->flags = 0;
01535
01536 ca = cnfa->arcs;
01537 for (s = nfa->states; s != NULL; s = s->next)
01538 {
01539 assert((size_t) s->no < nstates);
01540 cnfa->stflags[s->no] = 0;
01541 cnfa->states[s->no] = ca;
01542 first = ca;
01543 for (a = s->outs; a != NULL; a = a->outchain)
01544 switch (a->type)
01545 {
01546 case PLAIN:
01547 ca->co = a->co;
01548 ca->to = a->to->no;
01549 ca++;
01550 break;
01551 case LACON:
01552 assert(s->no != cnfa->pre);
01553 ca->co = (color) (cnfa->ncolors + a->co);
01554 ca->to = a->to->no;
01555 ca++;
01556 cnfa->flags |= HASLACONS;
01557 break;
01558 default:
01559 assert(NOTREACHED);
01560 break;
01561 }
01562 carcsort(first, ca - 1);
01563 ca->co = COLORLESS;
01564 ca->to = 0;
01565 ca++;
01566 }
01567 assert(ca == &cnfa->arcs[narcs]);
01568 assert(cnfa->nstates != 0);
01569
01570
01571 for (a = nfa->pre->outs; a != NULL; a = a->outchain)
01572 cnfa->stflags[a->to->no] = CNFA_NOPROGRESS;
01573 cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS;
01574 }
01575
01576
01577
01578
01579
01580
01581
01582 static void
01583 carcsort(struct carc * first,
01584 struct carc * last)
01585 {
01586 struct carc *p;
01587 struct carc *q;
01588 struct carc tmp;
01589
01590 if (last - first <= 1)
01591 return;
01592
01593 for (p = first; p <= last; p++)
01594 for (q = p; q <= last; q++)
01595 if (p->co > q->co ||
01596 (p->co == q->co && p->to > q->to))
01597 {
01598 assert(p != q);
01599 tmp = *p;
01600 *p = *q;
01601 *q = tmp;
01602 }
01603 }
01604
01605
01606
01607
01608 static void
01609 freecnfa(struct cnfa * cnfa)
01610 {
01611 assert(cnfa->nstates != 0);
01612 cnfa->nstates = 0;
01613 FREE(cnfa->stflags);
01614 FREE(cnfa->states);
01615 FREE(cnfa->arcs);
01616 }
01617
01618
01619
01620
01621 static void
01622 dumpnfa(struct nfa * nfa,
01623 FILE *f)
01624 {
01625 #ifdef REG_DEBUG
01626 struct state *s;
01627
01628 fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
01629 if (nfa->bos[0] != COLORLESS)
01630 fprintf(f, ", bos [%ld]", (long) nfa->bos[0]);
01631 if (nfa->bos[1] != COLORLESS)
01632 fprintf(f, ", bol [%ld]", (long) nfa->bos[1]);
01633 if (nfa->eos[0] != COLORLESS)
01634 fprintf(f, ", eos [%ld]", (long) nfa->eos[0]);
01635 if (nfa->eos[1] != COLORLESS)
01636 fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
01637 fprintf(f, "\n");
01638 for (s = nfa->states; s != NULL; s = s->next)
01639 dumpstate(s, f);
01640 if (nfa->parent == NULL)
01641 dumpcolors(nfa->cm, f);
01642 fflush(f);
01643 #endif
01644 }
01645
01646 #ifdef REG_DEBUG
01647
01648
01649
01650
01651 static void
01652 dumpstate(struct state * s,
01653 FILE *f)
01654 {
01655 struct arc *a;
01656
01657 fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
01658 (s->flag) ? s->flag : '.');
01659 if (s->prev != NULL && s->prev->next != s)
01660 fprintf(f, "\tstate chain bad\n");
01661 if (s->nouts == 0)
01662 fprintf(f, "\tno out arcs\n");
01663 else
01664 dumparcs(s, f);
01665 fflush(f);
01666 for (a = s->ins; a != NULL; a = a->inchain)
01667 {
01668 if (a->to != s)
01669 fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
01670 a->from->no, a->to->no, s->no);
01671 }
01672 }
01673
01674
01675
01676
01677 static void
01678 dumparcs(struct state * s,
01679 FILE *f)
01680 {
01681 int pos;
01682
01683 assert(s->nouts > 0);
01684
01685 pos = dumprarcs(s->outs, s, f, 1);
01686 if (pos != 1)
01687 fprintf(f, "\n");
01688 }
01689
01690
01691
01692
01693 static int
01694 dumprarcs(struct arc * a,
01695 struct state * s,
01696 FILE *f,
01697 int pos)
01698 {
01699 if (a->outchain != NULL)
01700 pos = dumprarcs(a->outchain, s, f, pos);
01701 dumparc(a, s, f);
01702 if (pos == 5)
01703 {
01704 fprintf(f, "\n");
01705 pos = 1;
01706 }
01707 else
01708 pos++;
01709 return pos;
01710 }
01711
01712
01713
01714
01715 static void
01716 dumparc(struct arc * a,
01717 struct state * s,
01718 FILE *f)
01719 {
01720 struct arc *aa;
01721 struct arcbatch *ab;
01722
01723 fprintf(f, "\t");
01724 switch (a->type)
01725 {
01726 case PLAIN:
01727 fprintf(f, "[%ld]", (long) a->co);
01728 break;
01729 case AHEAD:
01730 fprintf(f, ">%ld>", (long) a->co);
01731 break;
01732 case BEHIND:
01733 fprintf(f, "<%ld<", (long) a->co);
01734 break;
01735 case LACON:
01736 fprintf(f, ":%ld:", (long) a->co);
01737 break;
01738 case '^':
01739 case '$':
01740 fprintf(f, "%c%d", a->type, (int) a->co);
01741 break;
01742 case EMPTY:
01743 break;
01744 default:
01745 fprintf(f, "0x%x/0%lo", a->type, (long) a->co);
01746 break;
01747 }
01748 if (a->from != s)
01749 fprintf(f, "?%d?", a->from->no);
01750 for (ab = &a->from->oas; ab != NULL; ab = ab->next)
01751 {
01752 for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++)
01753 if (aa == a)
01754 break;
01755 if (aa < &ab->a[ABSIZE])
01756 break;
01757 }
01758 if (ab == NULL)
01759 fprintf(f, "?!?");
01760 fprintf(f, "->");
01761 if (a->to == NULL)
01762 {
01763 fprintf(f, "NULL");
01764 return;
01765 }
01766 fprintf(f, "%d", a->to->no);
01767 for (aa = a->to->ins; aa != NULL; aa = aa->inchain)
01768 if (aa == a)
01769 break;
01770 if (aa == NULL)
01771 fprintf(f, "?!?");
01772 }
01773 #endif
01774
01775
01776
01777
01778 #ifdef REG_DEBUG
01779 static void
01780 dumpcnfa(struct cnfa * cnfa,
01781 FILE *f)
01782 {
01783 int st;
01784
01785 fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
01786 if (cnfa->bos[0] != COLORLESS)
01787 fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]);
01788 if (cnfa->bos[1] != COLORLESS)
01789 fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]);
01790 if (cnfa->eos[0] != COLORLESS)
01791 fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]);
01792 if (cnfa->eos[1] != COLORLESS)
01793 fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]);
01794 if (cnfa->flags & HASLACONS)
01795 fprintf(f, ", haslacons");
01796 fprintf(f, "\n");
01797 for (st = 0; st < cnfa->nstates; st++)
01798 dumpcstate(st, cnfa, f);
01799 fflush(f);
01800 }
01801 #endif
01802
01803 #ifdef REG_DEBUG
01804
01805
01806
01807
01808 static void
01809 dumpcstate(int st,
01810 struct cnfa * cnfa,
01811 FILE *f)
01812 {
01813 struct carc * ca;
01814 int pos;
01815
01816 fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : ".");
01817 pos = 1;
01818 for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
01819 {
01820 if (ca->co < cnfa->ncolors)
01821 fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to);
01822 else
01823 fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to);
01824 if (pos == 5)
01825 {
01826 fprintf(f, "\n");
01827 pos = 1;
01828 }
01829 else
01830 pos++;
01831 }
01832 if (ca == cnfa->states[st] || pos != 1)
01833 fprintf(f, "\n");
01834 fflush(f);
01835 }
01836
01837 #endif