#include "regex/regguts.h"#include "rege_dfa.c"
Go to the source code of this file.
Data Structures | |
| struct | arcp |
| struct | sset |
| struct | dfa |
| struct | smalldfa |
| struct | vars |
Defines | |
| #define | HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw)) |
| #define | HIT(h, bv, ss, nw) |
| #define | STARTER 01 |
| #define | POSTSTATE 02 |
| #define | LOCKED 04 |
| #define | NOPROGRESS 010 |
| #define | WORK 1 |
| #define | FEWSTATES 20 |
| #define | FEWCOLORS 15 |
| #define | DOMALLOC ((struct smalldfa *)NULL) |
| #define | VISERR(vv) ((vv)->err != 0) |
| #define | ISERR() VISERR(v) |
| #define | VERR(vv, e) ((vv)->err = ((vv)->err ? (vv)->err : (e))) |
| #define | ERR(e) VERR(v, e) |
| #define | NOERR() {if (ISERR()) return v->err;} |
| #define | OFF(p) ((p) - v->start) |
| #define | LOFF(p) ((long)OFF(p)) |
| #define | LOCALMAT 20 |
| #define | LOCALDFAS 40 |
Functions | |
| static struct dfa * | getsubdfa (struct vars *, struct subre *) |
| static int | find (struct vars *, struct cnfa *, struct colormap *) |
| static int | cfind (struct vars *, struct cnfa *, struct colormap *) |
| static int | cfindloop (struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **) |
| static void | zapallsubs (regmatch_t *, size_t) |
| static void | zaptreesubs (struct vars *, struct subre *) |
| static void | subset (struct vars *, struct subre *, chr *, chr *) |
| static int | cdissect (struct vars *, struct subre *, chr *, chr *) |
| static int | ccondissect (struct vars *, struct subre *, chr *, chr *) |
| static int | crevcondissect (struct vars *, struct subre *, chr *, chr *) |
| static int | cbrdissect (struct vars *, struct subre *, chr *, chr *) |
| static int | caltdissect (struct vars *, struct subre *, chr *, chr *) |
| static int | citerdissect (struct vars *, struct subre *, chr *, chr *) |
| static int | creviterdissect (struct vars *, struct subre *, chr *, chr *) |
| static chr * | longest (struct vars *, struct dfa *, chr *, chr *, int *) |
| static chr * | shortest (struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *) |
| static chr * | lastcold (struct vars *, struct dfa *) |
| static struct dfa * | newdfa (struct vars *, struct cnfa *, struct colormap *, struct smalldfa *) |
| static void | freedfa (struct dfa *) |
| static unsigned | hash (unsigned *, int) |
| static struct sset * | initialize (struct vars *, struct dfa *, chr *) |
| static struct sset * | miss (struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *) |
| static int | lacon (struct vars *, struct cnfa *, chr *, pcolor) |
| static struct sset * | getvacant (struct vars *, struct dfa *, chr *, chr *) |
| static struct sset * | pickss (struct vars *, struct dfa *, chr *, chr *) |
| int | pg_regexec (regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags) |
| #define DOMALLOC ((struct smalldfa *)NULL) |
Definition at line 98 of file regexec.c.
Referenced by getsubdfa().
Definition at line 123 of file regexec.c.
Referenced by cfindloop().
| #define HASH | ( | bv, | ||
| nw | ||||
| ) | (((nw) == 1) ? *(bv) : hash(bv, nw)) |
| #define HIT | ( | h, | ||
| bv, | ||||
| ss, | ||||
| nw | ||||
| ) |
| #define ISERR | ( | ) | VISERR(v) |
Definition at line 121 of file regexec.c.
Referenced by cfind(), citerdissect(), creviterdissect(), find(), and getsubdfa().
| #define LOCALDFAS 40 |
Referenced by pg_regexec().
| #define LOCALMAT 20 |
Referenced by pg_regexec().
| #define LOCKED 04 |
Definition at line 55 of file regexec.c.
Referenced by getvacant(), initialize(), and pickss().
| #define LOFF | ( | p | ) | ((long)OFF(p)) |
Definition at line 126 of file regexec.c.
Referenced by ccondissect(), cdissect(), cfindloop(), citerdissect(), crevcondissect(), creviterdissect(), and find().
| #define NOERR | ( | ) | {if (ISERR()) return v->err;} |
Definition at line 124 of file regexec.c.
Referenced by caltdissect(), ccondissect(), cfind(), crevcondissect(), and find().
| #define NOPROGRESS 010 |
Definition at line 56 of file regexec.c.
Referenced by getvacant(), and lastcold().
| #define OFF | ( | p | ) | ((p) - v->start) |
| #define POSTSTATE 02 |
Definition at line 54 of file regexec.c.
Referenced by getvacant(), longest(), miss(), and shortest().
| #define STARTER 01 |
Definition at line 53 of file regexec.c.
Referenced by initialize().
Definition at line 881 of file regexec.c.
References assert, cdissect(), subre::cnfa, getsubdfa(), subre::id, subre::left, longest(), MDEBUG, NOERR, cnfa::nstates, NULL, subre::op, REG_NOMATCH, and subre::right.
Referenced by cdissect().
{
struct dfa *d;
int er;
/* We loop, rather than tail-recurse, to handle a chain of alternatives */
while (t != NULL)
{
assert(t->op == '|');
assert(t->left != NULL && t->left->cnfa.nstates > 0);
MDEBUG(("calt n%d\n", t->id));
d = getsubdfa(v, t->left);
NOERR();
if (longest(v, d, begin, end, (int *) NULL) == end)
{
MDEBUG(("calt matched\n"));
er = cdissect(v, t->left, begin, end);
if (er != REG_NOMATCH)
return er;
}
t = t->right;
}
return REG_NOMATCH;
}
Definition at line 800 of file regexec.c.
References assert, vars::g, subre::id, INFINITY, subre::max, MDEBUG, subre::min, NULL, subre::op, vars::pmatch, regmatch_t::rm_eo, regmatch_t::rm_so, vars::start, and subre::subno.
Referenced by cdissect().
{
int n = t->subno;
size_t numreps;
size_t tlen;
size_t brlen;
chr *brstring;
chr *p;
int min = t->min;
int max = t->max;
assert(t != NULL);
assert(t->op == 'b');
assert(n >= 0);
assert((size_t) n < v->nmatch);
MDEBUG(("cbackref n%d %d{%d-%d}\n", t->id, n, min, max));
/* get the backreferenced string */
if (v->pmatch[n].rm_so == -1)
return REG_NOMATCH;
brstring = v->start + v->pmatch[n].rm_so;
brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
/* special cases for zero-length strings */
if (brlen == 0)
{
/*
* matches only if target is zero length, but any number of
* repetitions can be considered to be present
*/
if (begin == end && min <= max)
{
MDEBUG(("cbackref matched trivially\n"));
return REG_OKAY;
}
return REG_NOMATCH;
}
if (begin == end)
{
/* matches only if zero repetitions are okay */
if (min == 0)
{
MDEBUG(("cbackref matched trivially\n"));
return REG_OKAY;
}
return REG_NOMATCH;
}
/*
* check target length to see if it could possibly be an allowed number of
* repetitions of brstring
*/
assert(end > begin);
tlen = end - begin;
if (tlen % brlen != 0)
return REG_NOMATCH;
numreps = tlen / brlen;
if (numreps < min || (numreps > max && max != INFINITY))
return REG_NOMATCH;
/* okay, compare the actual string contents */
p = begin;
while (numreps-- > 0)
{
if ((*v->g->compare) (brstring, p, brlen) != 0)
return REG_NOMATCH;
p += brlen;
}
MDEBUG(("cbackref matched\n"));
return REG_OKAY;
}
Definition at line 650 of file regexec.c.
References assert, cdissect(), subre::cnfa, subre::flags, getsubdfa(), subre::id, subre::left, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, NULL, subre::op, REG_NOMATCH, REG_OKAY, subre::right, SHORTER, and zaptreesubs().
Referenced by cdissect().
{
struct dfa *d;
struct dfa *d2;
chr *mid;
int er;
assert(t->op == '.');
assert(t->left != NULL && t->left->cnfa.nstates > 0);
assert(t->right != NULL && t->right->cnfa.nstates > 0);
assert(!(t->left->flags & SHORTER));
d = getsubdfa(v, t->left);
NOERR();
d2 = getsubdfa(v, t->right);
NOERR();
MDEBUG(("cconcat %d\n", t->id));
/* pick a tentative midpoint */
mid = longest(v, d, begin, end, (int *) NULL);
if (mid == NULL)
return REG_NOMATCH;
MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
/* iterate until satisfaction or failure */
for (;;)
{
/* try this midpoint on for size */
if (longest(v, d2, mid, end, (int *) NULL) == end)
{
er = cdissect(v, t->left, begin, mid);
if (er == REG_OKAY)
{
er = cdissect(v, t->right, mid, end);
if (er == REG_OKAY)
{
/* satisfaction */
MDEBUG(("successful\n"));
return REG_OKAY;
}
}
if (er != REG_NOMATCH)
return er;
}
/* that midpoint didn't work, find a new one */
if (mid == begin)
{
/* all possibilities exhausted */
MDEBUG(("%d no midpoint\n", t->id));
return REG_NOMATCH;
}
mid = longest(v, d, begin, mid - 1, (int *) NULL);
if (mid == NULL)
{
/* failed to find a new one */
MDEBUG(("%d failed midpoint\n", t->id));
return REG_NOMATCH;
}
MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
zaptreesubs(v, t->left);
zaptreesubs(v, t->right);
}
/* can't get here */
return REG_ASSERT;
}
Definition at line 586 of file regexec.c.
References assert, BACKR, caltdissect(), cbrdissect(), ccondissect(), citerdissect(), crevcondissect(), creviterdissect(), subre::flags, subre::left, LOFF, MDEBUG, NULL, subre::op, REG_NOMATCH, REG_OKAY, subre::right, SHORTER, subre::subno, and subset().
Referenced by caltdissect(), ccondissect(), cfindloop(), citerdissect(), crevcondissect(), creviterdissect(), and find().
{
int er;
assert(t != NULL);
MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
switch (t->op)
{
case '=': /* terminal node */
assert(t->left == NULL && t->right == NULL);
er = REG_OKAY; /* no action, parent did the work */
break;
case 'b': /* back reference */
assert(t->left == NULL && t->right == NULL);
er = cbrdissect(v, t, begin, end);
break;
case '.': /* concatenation */
assert(t->left != NULL && t->right != NULL);
if (t->left->flags & SHORTER) /* reverse scan */
er = crevcondissect(v, t, begin, end);
else
er = ccondissect(v, t, begin, end);
break;
case '|': /* alternation */
assert(t->left != NULL);
er = caltdissect(v, t, begin, end);
break;
case '*': /* iteration */
assert(t->left != NULL);
if (t->left->flags & SHORTER) /* reverse scan */
er = creviterdissect(v, t, begin, end);
else
er = citerdissect(v, t, begin, end);
break;
case '(': /* capturing */
assert(t->left != NULL && t->right == NULL);
assert(t->subno > 0);
er = cdissect(v, t->left, begin, end);
if (er == REG_OKAY)
subset(v, t, begin, end);
break;
default:
er = REG_ASSERT;
break;
}
/*
* We should never have a match failure unless backrefs lurk below;
* otherwise, either caller failed to check the DFA, or there's some
* inconsistency between the DFA and the node's innards.
*/
assert(er != REG_NOMATCH || (t->flags & BACKR));
return er;
}
Definition at line 383 of file regexec.c.
References assert, cfindloop(), guts::cflags, vars::details, vars::dfa1, vars::dfa2, vars::err, freedfa(), vars::g, ISERR, newdfa(), NOERR, NULL, OFF, REG_EXPECT, regmatch_t::rm_eo, rm_detail_t::rm_extend, regmatch_t::rm_so, guts::search, and vars::stop.
Referenced by pg_regexec().
{
struct dfa *s;
struct dfa *d;
chr *cold;
int ret;
s = newdfa(v, &v->g->search, cm, &v->dfa1);
NOERR();
d = newdfa(v, cnfa, cm, &v->dfa2);
if (ISERR())
{
assert(d == NULL);
freedfa(s);
return v->err;
}
ret = cfindloop(v, cnfa, cm, d, s, &cold);
freedfa(d);
freedfa(s);
NOERR();
if (v->g->cflags & REG_EXPECT)
{
assert(v->details != NULL);
if (cold != NULL)
v->details->rm_extend.rm_so = OFF(cold);
else
v->details->rm_extend.rm_so = OFF(v->stop);
v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
}
return ret;
}
| static int cfindloop | ( | struct vars * | v, | |
| struct cnfa * | cnfa, | |||
| struct colormap * | cm, | |||
| struct dfa * | d, | |||
| struct dfa * | s, | |||
| chr ** | coldp | |||
| ) | [static] |
Definition at line 423 of file regexec.c.
References assert, cdissect(), ERR, subre::flags, vars::g, LOFF, longest(), MDEBUG, vars::nmatch, NULL, OFF, vars::pmatch, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, regmatch_t::rm_so, vars::search_start, shortest(), vars::stop, guts::tree, and zapallsubs().
Referenced by cfind().
{
chr *begin;
chr *end;
chr *cold;
chr *open; /* open and close of range of possible starts */
chr *close;
chr *estart;
chr *estop;
int er;
int shorter = v->g->tree->flags & SHORTER;
int hitend;
assert(d != NULL && s != NULL);
cold = NULL;
close = v->search_start;
do
{
MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
if (close == NULL)
break; /* NOTE BREAK */
assert(cold != NULL);
open = cold;
cold = NULL;
MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
for (begin = open; begin <= close; begin++)
{
MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
estart = begin;
estop = v->stop;
for (;;)
{
if (shorter)
end = shortest(v, d, begin, estart,
estop, (chr **) NULL, &hitend);
else
end = longest(v, d, begin, estop,
&hitend);
if (hitend && cold == NULL)
cold = begin;
if (end == NULL)
break; /* NOTE BREAK OUT */
MDEBUG(("tentative end %ld\n", LOFF(end)));
zapallsubs(v->pmatch, v->nmatch);
er = cdissect(v, v->g->tree, begin, end);
if (er == REG_OKAY)
{
if (v->nmatch > 0)
{
v->pmatch[0].rm_so = OFF(begin);
v->pmatch[0].rm_eo = OFF(end);
}
*coldp = cold;
return REG_OKAY;
}
if (er != REG_NOMATCH)
{
ERR(er);
*coldp = cold;
return er;
}
if ((shorter) ? end == estop : end == begin)
{
/* no point in trying again */
*coldp = cold;
return REG_NOMATCH;
}
/* go around and try again */
if (shorter)
estart = end + 1;
else
estop = end - 1;
}
}
} while (close < v->stop);
*coldp = cold;
return REG_NOMATCH;
}
Definition at line 917 of file regexec.c.
References assert, cdissect(), subre::cnfa, vars::err, subre::flags, FREE, getsubdfa(), i, subre::id, INFINITY, ISERR, subre::left, LOFF, longest(), MALLOC, subre::max, MDEBUG, subre::min, cnfa::nstates, NULL, subre::op, REG_NOMATCH, REG_OKAY, SHORTER, and zaptreesubs().
Referenced by cdissect().
{
struct dfa *d;
chr **endpts;
chr *limit;
int min_matches;
size_t max_matches;
int nverified;
int k;
int i;
int er;
assert(t->op == '*');
assert(t->left != NULL && t->left->cnfa.nstates > 0);
assert(!(t->left->flags & SHORTER));
assert(begin <= end);
/*
* If zero matches are allowed, and target string is empty, just declare
* victory. OTOH, if target string isn't empty, zero matches can't work
* so we pretend the min is 1.
*/
min_matches = t->min;
if (min_matches <= 0)
{
if (begin == end)
return REG_OKAY;
min_matches = 1;
}
/*
* We need workspace to track the endpoints of each sub-match. Normally
* we consider only nonzero-length sub-matches, so there can be at most
* end-begin of them. However, if min is larger than that, we will also
* consider zero-length sub-matches in order to find enough matches.
*
* For convenience, endpts[0] contains the "begin" pointer and we store
* sub-match endpoints in endpts[1..max_matches].
*/
max_matches = end - begin;
if (max_matches > t->max && t->max != INFINITY)
max_matches = t->max;
if (max_matches < min_matches)
max_matches = min_matches;
endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
if (endpts == NULL)
return REG_ESPACE;
endpts[0] = begin;
d = getsubdfa(v, t->left);
if (ISERR())
{
FREE(endpts);
return v->err;
}
MDEBUG(("citer %d\n", t->id));
/*
* Our strategy is to first find a set of sub-match endpoints that are
* valid according to the child node's DFA, and then recursively dissect
* each sub-match to confirm validity. If any validity check fails,
* backtrack the last sub-match and try again. And, when we next try for
* a validity check, we need not recheck any successfully verified
* sub-matches that we didn't move the endpoints of. nverified remembers
* how many sub-matches are currently known okay.
*/
/* initialize to consider first sub-match */
nverified = 0;
k = 1;
limit = end;
/* iterate until satisfaction or failure */
while (k > 0)
{
/* try to find an endpoint for the k'th sub-match */
endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
if (endpts[k] == NULL)
{
/* no match possible, so see if we can shorten previous one */
k--;
goto backtrack;
}
MDEBUG(("%d: working endpoint %d: %ld\n",
t->id, k, LOFF(endpts[k])));
/* k'th sub-match can no longer be considered verified */
if (nverified >= k)
nverified = k - 1;
if (endpts[k] != end)
{
/* haven't reached end yet, try another iteration if allowed */
if (k >= max_matches)
{
/* must try to shorten some previous match */
k--;
goto backtrack;
}
/* reject zero-length match unless necessary to achieve min */
if (endpts[k] == endpts[k - 1] &&
(k >= min_matches || min_matches - k < end - endpts[k]))
goto backtrack;
k++;
limit = end;
continue;
}
/*
* We've identified a way to divide the string into k sub-matches that
* works so far as the child DFA can tell. If k is an allowed number
* of matches, start the slow part: recurse to verify each sub-match.
* We always have k <= max_matches, needn't check that.
*/
if (k < min_matches)
goto backtrack;
MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
for (i = nverified + 1; i <= k; i++)
{
zaptreesubs(v, t->left);
er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
if (er == REG_OKAY)
{
nverified = i;
continue;
}
if (er == REG_NOMATCH)
break;
/* oops, something failed */
FREE(endpts);
return er;
}
if (i > k)
{
/* satisfaction */
MDEBUG(("%d successful\n", t->id));
FREE(endpts);
return REG_OKAY;
}
/* match failed to verify, so backtrack */
backtrack:
/*
* Must consider shorter versions of the current sub-match. However,
* we'll only ask for a zero-length match if necessary.
*/
while (k > 0)
{
chr *prev_end = endpts[k - 1];
if (endpts[k] > prev_end)
{
limit = endpts[k] - 1;
if (limit > prev_end ||
(k < min_matches && min_matches - k >= end - prev_end))
{
/* break out of backtrack loop, continue the outer one */
break;
}
}
/* can't shorten k'th sub-match any more, consider previous one */
k--;
}
}
/* all possibilities exhausted */
MDEBUG(("%d failed\n", t->id));
FREE(endpts);
return REG_NOMATCH;
}
Definition at line 725 of file regexec.c.
References assert, cdissect(), subre::cnfa, subre::flags, getsubdfa(), subre::id, subre::left, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, NULL, subre::op, REG_NOMATCH, REG_OKAY, subre::right, SHORTER, shortest(), and zaptreesubs().
Referenced by cdissect().
{
struct dfa *d;
struct dfa *d2;
chr *mid;
int er;
assert(t->op == '.');
assert(t->left != NULL && t->left->cnfa.nstates > 0);
assert(t->right != NULL && t->right->cnfa.nstates > 0);
assert(t->left->flags & SHORTER);
d = getsubdfa(v, t->left);
NOERR();
d2 = getsubdfa(v, t->right);
NOERR();
MDEBUG(("crevcon %d\n", t->id));
/* pick a tentative midpoint */
mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
if (mid == NULL)
return REG_NOMATCH;
MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
/* iterate until satisfaction or failure */
for (;;)
{
/* try this midpoint on for size */
if (longest(v, d2, mid, end, (int *) NULL) == end)
{
er = cdissect(v, t->left, begin, mid);
if (er == REG_OKAY)
{
er = cdissect(v, t->right, mid, end);
if (er == REG_OKAY)
{
/* satisfaction */
MDEBUG(("successful\n"));
return REG_OKAY;
}
}
if (er != REG_NOMATCH)
return er;
}
/* that midpoint didn't work, find a new one */
if (mid == end)
{
/* all possibilities exhausted */
MDEBUG(("%d no midpoint\n", t->id));
return REG_NOMATCH;
}
mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
if (mid == NULL)
{
/* failed to find a new one */
MDEBUG(("%d failed midpoint\n", t->id));
return REG_NOMATCH;
}
MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
zaptreesubs(v, t->left);
zaptreesubs(v, t->right);
}
/* can't get here */
return REG_ASSERT;
}
Definition at line 1102 of file regexec.c.
References assert, cdissect(), subre::cnfa, vars::err, subre::flags, FREE, getsubdfa(), i, subre::id, INFINITY, ISERR, subre::left, LOFF, MALLOC, subre::max, MDEBUG, subre::min, cnfa::nstates, NULL, subre::op, REG_NOMATCH, REG_OKAY, SHORTER, shortest(), and zaptreesubs().
Referenced by cdissect().
{
struct dfa *d;
chr **endpts;
chr *limit;
int min_matches;
size_t max_matches;
int nverified;
int k;
int i;
int er;
assert(t->op == '*');
assert(t->left != NULL && t->left->cnfa.nstates > 0);
assert(t->left->flags & SHORTER);
assert(begin <= end);
/*
* If zero matches are allowed, and target string is empty, just declare
* victory. OTOH, if target string isn't empty, zero matches can't work
* so we pretend the min is 1.
*/
min_matches = t->min;
if (min_matches <= 0)
{
if (begin == end)
return REG_OKAY;
min_matches = 1;
}
/*
* We need workspace to track the endpoints of each sub-match. Normally
* we consider only nonzero-length sub-matches, so there can be at most
* end-begin of them. However, if min is larger than that, we will also
* consider zero-length sub-matches in order to find enough matches.
*
* For convenience, endpts[0] contains the "begin" pointer and we store
* sub-match endpoints in endpts[1..max_matches].
*/
max_matches = end - begin;
if (max_matches > t->max && t->max != INFINITY)
max_matches = t->max;
if (max_matches < min_matches)
max_matches = min_matches;
endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
if (endpts == NULL)
return REG_ESPACE;
endpts[0] = begin;
d = getsubdfa(v, t->left);
if (ISERR())
{
FREE(endpts);
return v->err;
}
MDEBUG(("creviter %d\n", t->id));
/*
* Our strategy is to first find a set of sub-match endpoints that are
* valid according to the child node's DFA, and then recursively dissect
* each sub-match to confirm validity. If any validity check fails,
* backtrack the last sub-match and try again. And, when we next try for
* a validity check, we need not recheck any successfully verified
* sub-matches that we didn't move the endpoints of. nverified remembers
* how many sub-matches are currently known okay.
*/
/* initialize to consider first sub-match */
nverified = 0;
k = 1;
limit = begin;
/* iterate until satisfaction or failure */
while (k > 0)
{
/* disallow zero-length match unless necessary to achieve min */
if (limit == endpts[k - 1] &&
limit != end &&
(k >= min_matches || min_matches - k < end - limit))
limit++;
/* try to find an endpoint for the k'th sub-match */
endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
(chr **) NULL, (int *) NULL);
if (endpts[k] == NULL)
{
/* no match possible, so see if we can lengthen previous one */
k--;
goto backtrack;
}
MDEBUG(("%d: working endpoint %d: %ld\n",
t->id, k, LOFF(endpts[k])));
/* k'th sub-match can no longer be considered verified */
if (nverified >= k)
nverified = k - 1;
if (endpts[k] != end)
{
/* haven't reached end yet, try another iteration if allowed */
if (k >= max_matches)
{
/* must try to lengthen some previous match */
k--;
goto backtrack;
}
k++;
limit = endpts[k - 1];
continue;
}
/*
* We've identified a way to divide the string into k sub-matches that
* works so far as the child DFA can tell. If k is an allowed number
* of matches, start the slow part: recurse to verify each sub-match.
* We always have k <= max_matches, needn't check that.
*/
if (k < min_matches)
goto backtrack;
MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
for (i = nverified + 1; i <= k; i++)
{
zaptreesubs(v, t->left);
er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
if (er == REG_OKAY)
{
nverified = i;
continue;
}
if (er == REG_NOMATCH)
break;
/* oops, something failed */
FREE(endpts);
return er;
}
if (i > k)
{
/* satisfaction */
MDEBUG(("%d successful\n", t->id));
FREE(endpts);
return REG_OKAY;
}
/* match failed to verify, so backtrack */
backtrack:
/*
* Must consider longer versions of the current sub-match.
*/
while (k > 0)
{
if (endpts[k] < end)
{
limit = endpts[k] + 1;
/* break out of backtrack loop, continue the outer one */
break;
}
/* can't lengthen k'th sub-match any more, consider previous one */
k--;
}
}
/* all possibilities exhausted */
MDEBUG(("%d failed\n", t->id));
FREE(endpts);
return REG_NOMATCH;
}
Definition at line 296 of file regexec.c.
References assert, cdissect(), guts::cflags, vars::details, vars::dfa1, end, subre::flags, freedfa(), vars::g, ISERR, LOFF, longest(), MDEBUG, newdfa(), vars::nmatch, NOERR, NULL, OFF, vars::pmatch, REG_EXPECT, regmatch_t::rm_eo, rm_detail_t::rm_extend, regmatch_t::rm_so, guts::search, vars::search_start, shortest(), vars::start, vars::stop, guts::tree, and zapallsubs().
Referenced by NIImportAffixes(), NIImportOOAffixes(), and pg_regexec().
{
struct dfa *s;
struct dfa *d;
chr *begin;
chr *end = NULL;
chr *cold;
chr *open; /* open and close of range of possible starts */
chr *close;
int hitend;
int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
/* first, a shot with the search RE */
s = newdfa(v, &v->g->search, cm, &v->dfa1);
assert(!(ISERR() && s != NULL));
NOERR();
MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
cold = NULL;
close = shortest(v, s, v->search_start, v->search_start, v->stop,
&cold, (int *) NULL);
freedfa(s);
NOERR();
if (v->g->cflags & REG_EXPECT)
{
assert(v->details != NULL);
if (cold != NULL)
v->details->rm_extend.rm_so = OFF(cold);
else
v->details->rm_extend.rm_so = OFF(v->stop);
v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
}
if (close == NULL) /* not found */
return REG_NOMATCH;
if (v->nmatch == 0) /* found, don't need exact location */
return REG_OKAY;
/* find starting point and match */
assert(cold != NULL);
open = cold;
cold = NULL;
MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
d = newdfa(v, cnfa, cm, &v->dfa1);
assert(!(ISERR() && d != NULL));
NOERR();
for (begin = open; begin <= close; begin++)
{
MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
if (shorter)
end = shortest(v, d, begin, begin, v->stop,
(chr **) NULL, &hitend);
else
end = longest(v, d, begin, v->stop, &hitend);
NOERR();
if (hitend && cold == NULL)
cold = begin;
if (end != NULL)
break; /* NOTE BREAK OUT */
}
assert(end != NULL); /* search RE succeeded so loop should */
freedfa(d);
/* and pin down details */
assert(v->nmatch > 0);
v->pmatch[0].rm_so = OFF(begin);
v->pmatch[0].rm_eo = OFF(end);
if (v->g->cflags & REG_EXPECT)
{
if (cold != NULL)
v->details->rm_extend.rm_so = OFF(cold);
else
v->details->rm_extend.rm_so = OFF(v->stop);
v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
}
if (v->nmatch == 1) /* no need for submatches */
return REG_OKAY;
/* find submatches */
zapallsubs(v->pmatch, v->nmatch);
return cdissect(v, v->g->tree, begin, end);
}
| static void freedfa | ( | struct dfa * | ) | [static] |
Referenced by cfind(), find(), and pg_regexec().
Definition at line 280 of file regexec.c.
References guts::cmap, subre::cnfa, DOMALLOC, vars::g, subre::id, ISERR, newdfa(), NULL, and vars::subdfas.
Referenced by caltdissect(), ccondissect(), citerdissect(), crevcondissect(), and creviterdissect().
| static unsigned hash | ( | unsigned * | , | |
| int | ||||
| ) | [static] |
Referenced by caltdissect(), ccondissect(), cfindloop(), citerdissect(), crevcondissect(), and find().
| static struct sset* miss | ( | struct vars * | , | |
| struct dfa * | , | |||
| struct sset * | , | |||
| pcolor | , | |||
| chr * | , | |||
| chr * | ||||
| ) | [static, read] |
| static struct dfa* newdfa | ( | struct vars * | , | |
| struct cnfa * | , | |||
| struct colormap * | , | |||
| struct smalldfa * | ||||
| ) | [static, read] |
Referenced by cfind(), find(), and getsubdfa().
| int pg_regexec | ( | regex_t * | re, | |
| const chr * | string, | |||
| size_t | len, | |||
| size_t | search_start, | |||
| rm_detail_t * | details, | |||
| size_t | nmatch, | |||
| regmatch_t | pmatch[], | |||
| int | flags | |||
| ) |
Definition at line 167 of file regexec.c.
References assert, cfind(), guts::cflags, guts::cmap, subre::cnfa, vars::details, vars::eflags, vars::err, find(), FREE, freedfa(), vars::g, i, guts::info, LOCALDFAS, LOCALMAT, MALLOC, vars::nmatch, guts::nsub, guts::ntree, NULL, pg_set_regex_collation(), vars::pmatch, vars::re, regex_t::re_collation, regex_t::re_csize, regex_t::re_guts, regex_t::re_magic, REG_EXPECT, REG_INVARG, REG_NOSUB, REG_OKAY, REG_UIMPOSSIBLE, REMAGIC, vars::search_start, vars::start, vars::stop, vars::subdfas, guts::tree, VS, and zapallsubs().
Referenced by check_ident_usermap(), CheckAffix(), RE_wchar_execute(), and replace_text_regexp().
{
struct vars var;
register struct vars *v = &var;
int st;
size_t n;
size_t i;
int backref;
#define LOCALMAT 20
regmatch_t mat[LOCALMAT];
#define LOCALDFAS 40
struct dfa *subdfas[LOCALDFAS];
/* sanity checks */
if (re == NULL || string == NULL || re->re_magic != REMAGIC)
return REG_INVARG;
if (re->re_csize != sizeof(chr))
return REG_MIXED;
/* Initialize locale-dependent support */
pg_set_regex_collation(re->re_collation);
/* setup */
v->re = re;
v->g = (struct guts *) re->re_guts;
if ((v->g->cflags & REG_EXPECT) && details == NULL)
return REG_INVARG;
if (v->g->info & REG_UIMPOSSIBLE)
return REG_NOMATCH;
backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
v->eflags = flags;
if (v->g->cflags & REG_NOSUB)
nmatch = 0; /* override client */
v->nmatch = nmatch;
if (backref)
{
/* need work area */
if (v->g->nsub + 1 <= LOCALMAT)
v->pmatch = mat;
else
v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
sizeof(regmatch_t));
if (v->pmatch == NULL)
return REG_ESPACE;
v->nmatch = v->g->nsub + 1;
}
else
v->pmatch = pmatch;
v->details = details;
v->start = (chr *) string;
v->search_start = (chr *) string + search_start;
v->stop = (chr *) string + len;
v->err = 0;
assert(v->g->ntree >= 0);
n = (size_t) v->g->ntree;
if (n <= LOCALDFAS)
v->subdfas = subdfas;
else
v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
if (v->subdfas == NULL)
{
if (v->pmatch != pmatch && v->pmatch != mat)
FREE(v->pmatch);
return REG_ESPACE;
}
for (i = 0; i < n; i++)
v->subdfas[i] = NULL;
/* do it */
assert(v->g->tree != NULL);
if (backref)
st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
else
st = find(v, &v->g->tree->cnfa, &v->g->cmap);
/* copy (portion of) match vector over if necessary */
if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0)
{
zapallsubs(pmatch, nmatch);
n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t));
}
/* clean up */
if (v->pmatch != pmatch && v->pmatch != mat)
FREE(v->pmatch);
for (i = 0; i < n; i++)
{
if (v->subdfas[i] != NULL)
freedfa(v->subdfas[i]);
}
if (v->subdfas != subdfas)
FREE(v->subdfas);
return st;
}
| static chr* shortest | ( | struct vars * | , | |
| struct dfa * | , | |||
| chr * | , | |||
| chr * | , | |||
| chr * | , | |||
| chr ** | , | |||
| int * | ||||
| ) | [static] |
Referenced by cfindloop(), crevcondissect(), creviterdissect(), and find().
Definition at line 554 of file regexec.c.
References assert, MDEBUG, vars::nmatch, OFF, vars::pmatch, regmatch_t::rm_eo, regmatch_t::rm_so, and subre::subno.
Referenced by cdissect().
| static void zapallsubs | ( | regmatch_t * | p, | |
| size_t | n | |||
| ) | [static] |
Definition at line 513 of file regexec.c.
References i, regmatch_t::rm_eo, and regmatch_t::rm_so.
Referenced by cfindloop(), find(), and pg_regexec().
Definition at line 529 of file regexec.c.
References assert, subre::left, NULL, subre::op, vars::pmatch, subre::right, regmatch_t::rm_eo, regmatch_t::rm_so, and subre::subno.
Referenced by ccondissect(), citerdissect(), crevcondissect(), and creviterdissect().
1.7.1