Header And Logo

PostgreSQL
| The world's most advanced open source database.

Data Structures | Defines | Functions

regexec.c File Reference

#include "regex/regguts.h"
#include "rege_dfa.c"
Include dependency graph for regexec.c:

Go to the source code of this file.

Data Structures

struct  arcp
struct  sset
struct  dfa
struct  smalldfa
struct  vars

Defines

#define HASH(bv, nw)   (((nw) == 1) ? *(bv) : hash(bv, nw))
#define HIT(h, bv, ss, nw)
#define STARTER   01
#define POSTSTATE   02
#define LOCKED   04
#define NOPROGRESS   010
#define WORK   1
#define FEWSTATES   20
#define FEWCOLORS   15
#define DOMALLOC   ((struct smalldfa *)NULL)
#define VISERR(vv)   ((vv)->err != 0)
#define ISERR()   VISERR(v)
#define VERR(vv, e)   ((vv)->err = ((vv)->err ? (vv)->err : (e)))
#define ERR(e)   VERR(v, e)
#define NOERR()   {if (ISERR()) return v->err;}
#define OFF(p)   ((p) - v->start)
#define LOFF(p)   ((long)OFF(p))
#define LOCALMAT   20
#define LOCALDFAS   40

Functions

static struct dfagetsubdfa (struct vars *, struct subre *)
static int find (struct vars *, struct cnfa *, struct colormap *)
static int cfind (struct vars *, struct cnfa *, struct colormap *)
static int cfindloop (struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **)
static void zapallsubs (regmatch_t *, size_t)
static void zaptreesubs (struct vars *, struct subre *)
static void subset (struct vars *, struct subre *, chr *, chr *)
static int cdissect (struct vars *, struct subre *, chr *, chr *)
static int ccondissect (struct vars *, struct subre *, chr *, chr *)
static int crevcondissect (struct vars *, struct subre *, chr *, chr *)
static int cbrdissect (struct vars *, struct subre *, chr *, chr *)
static int caltdissect (struct vars *, struct subre *, chr *, chr *)
static int citerdissect (struct vars *, struct subre *, chr *, chr *)
static int creviterdissect (struct vars *, struct subre *, chr *, chr *)
static chrlongest (struct vars *, struct dfa *, chr *, chr *, int *)
static chrshortest (struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)
static chrlastcold (struct vars *, struct dfa *)
static struct dfanewdfa (struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)
static void freedfa (struct dfa *)
static unsigned hash (unsigned *, int)
static struct ssetinitialize (struct vars *, struct dfa *, chr *)
static struct ssetmiss (struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *)
static int lacon (struct vars *, struct cnfa *, chr *, pcolor)
static struct ssetgetvacant (struct vars *, struct dfa *, chr *, chr *)
static struct ssetpickss (struct vars *, struct dfa *, chr *, chr *)
int pg_regexec (regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)

Define Documentation

#define DOMALLOC   ((struct smalldfa *)NULL)

Definition at line 98 of file regexec.c.

Referenced by getsubdfa().

#define ERR (   e  )     VERR(v, e)

Definition at line 123 of file regexec.c.

Referenced by cfindloop().

#define FEWCOLORS   15

Definition at line 88 of file regexec.c.

Referenced by newdfa().

#define FEWSTATES   20

Definition at line 87 of file regexec.c.

#define HASH (   bv,
  nw 
)    (((nw) == 1) ? *(bv) : hash(bv, nw))

Definition at line 49 of file regexec.c.

#define HIT (   h,
  bv,
  ss,
  nw 
)
Value:
((ss)->hash == (h) && ((nw) == 1 || \
        memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))

Definition at line 50 of file regexec.c.

Referenced by miss().

#define ISERR (  )     VISERR(v)

Definition at line 121 of file regexec.c.

Referenced by cfind(), citerdissect(), creviterdissect(), find(), and getsubdfa().

#define LOCALDFAS   40

Referenced by pg_regexec().

#define LOCALMAT   20

Referenced by pg_regexec().

#define LOCKED   04

Definition at line 55 of file regexec.c.

Referenced by getvacant(), initialize(), and pickss().

#define LOFF (   p  )     ((long)OFF(p))
#define NOERR (  )     {if (ISERR()) return v->err;}

Definition at line 124 of file regexec.c.

Referenced by caltdissect(), ccondissect(), cfind(), crevcondissect(), and find().

#define NOPROGRESS   010

Definition at line 56 of file regexec.c.

Referenced by getvacant(), and lastcold().

#define OFF (   p  )     ((p) - v->start)

Definition at line 125 of file regexec.c.

Referenced by cfind(), cfindloop(), find(), and subset().

#define POSTSTATE   02

Definition at line 54 of file regexec.c.

Referenced by getvacant(), longest(), miss(), and shortest().

#define STARTER   01

Definition at line 53 of file regexec.c.

Referenced by initialize().

#define VERR (   vv,
  e 
)    ((vv)->err = ((vv)->err ? (vv)->err : (e)))

Definition at line 122 of file regexec.c.

#define VISERR (   vv  )     ((vv)->err != 0)

Definition at line 120 of file regexec.c.

#define WORK   1

Definition at line 84 of file regexec.c.

Referenced by newdfa().


Function Documentation

static int caltdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
) [static]

Definition at line 881 of file regexec.c.

References assert, cdissect(), subre::cnfa, getsubdfa(), subre::id, subre::left, longest(), MDEBUG, NOERR, cnfa::nstates, NULL, subre::op, REG_NOMATCH, and subre::right.

Referenced by cdissect().

{
    struct dfa *d;
    int         er;

    /* We loop, rather than tail-recurse, to handle a chain of alternatives */
    while (t != NULL)
    {
        assert(t->op == '|');
        assert(t->left != NULL && t->left->cnfa.nstates > 0);

        MDEBUG(("calt n%d\n", t->id));

        d = getsubdfa(v, t->left);
        NOERR();
        if (longest(v, d, begin, end, (int *) NULL) == end)
        {
            MDEBUG(("calt matched\n"));
            er = cdissect(v, t->left, begin, end);
            if (er != REG_NOMATCH)
                return er;
        }

        t = t->right;
    }

    return REG_NOMATCH;
}

static int cbrdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
) [static]

Definition at line 800 of file regexec.c.

References assert, vars::g, subre::id, INFINITY, subre::max, MDEBUG, subre::min, NULL, subre::op, vars::pmatch, regmatch_t::rm_eo, regmatch_t::rm_so, vars::start, and subre::subno.

Referenced by cdissect().

{
    int         n = t->subno;
    size_t      numreps;
    size_t      tlen;
    size_t      brlen;
    chr        *brstring;
    chr        *p;
    int         min = t->min;
    int         max = t->max;

    assert(t != NULL);
    assert(t->op == 'b');
    assert(n >= 0);
    assert((size_t) n < v->nmatch);

    MDEBUG(("cbackref n%d %d{%d-%d}\n", t->id, n, min, max));

    /* get the backreferenced string */
    if (v->pmatch[n].rm_so == -1)
        return REG_NOMATCH;
    brstring = v->start + v->pmatch[n].rm_so;
    brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;

    /* special cases for zero-length strings */
    if (brlen == 0)
    {
        /*
         * matches only if target is zero length, but any number of
         * repetitions can be considered to be present
         */
        if (begin == end && min <= max)
        {
            MDEBUG(("cbackref matched trivially\n"));
            return REG_OKAY;
        }
        return REG_NOMATCH;
    }
    if (begin == end)
    {
        /* matches only if zero repetitions are okay */
        if (min == 0)
        {
            MDEBUG(("cbackref matched trivially\n"));
            return REG_OKAY;
        }
        return REG_NOMATCH;
    }

    /*
     * check target length to see if it could possibly be an allowed number of
     * repetitions of brstring
     */
    assert(end > begin);
    tlen = end - begin;
    if (tlen % brlen != 0)
        return REG_NOMATCH;
    numreps = tlen / brlen;
    if (numreps < min || (numreps > max && max != INFINITY))
        return REG_NOMATCH;

    /* okay, compare the actual string contents */
    p = begin;
    while (numreps-- > 0)
    {
        if ((*v->g->compare) (brstring, p, brlen) != 0)
            return REG_NOMATCH;
        p += brlen;
    }

    MDEBUG(("cbackref matched\n"));
    return REG_OKAY;
}

static int ccondissect ( struct vars v,
struct subre t,
chr begin,
chr end 
) [static]

Definition at line 650 of file regexec.c.

References assert, cdissect(), subre::cnfa, subre::flags, getsubdfa(), subre::id, subre::left, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, NULL, subre::op, REG_NOMATCH, REG_OKAY, subre::right, SHORTER, and zaptreesubs().

Referenced by cdissect().

{
    struct dfa *d;
    struct dfa *d2;
    chr        *mid;
    int         er;

    assert(t->op == '.');
    assert(t->left != NULL && t->left->cnfa.nstates > 0);
    assert(t->right != NULL && t->right->cnfa.nstates > 0);
    assert(!(t->left->flags & SHORTER));

    d = getsubdfa(v, t->left);
    NOERR();
    d2 = getsubdfa(v, t->right);
    NOERR();
    MDEBUG(("cconcat %d\n", t->id));

    /* pick a tentative midpoint */
    mid = longest(v, d, begin, end, (int *) NULL);
    if (mid == NULL)
        return REG_NOMATCH;
    MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));

    /* iterate until satisfaction or failure */
    for (;;)
    {
        /* try this midpoint on for size */
        if (longest(v, d2, mid, end, (int *) NULL) == end)
        {
            er = cdissect(v, t->left, begin, mid);
            if (er == REG_OKAY)
            {
                er = cdissect(v, t->right, mid, end);
                if (er == REG_OKAY)
                {
                    /* satisfaction */
                    MDEBUG(("successful\n"));
                    return REG_OKAY;
                }
            }
            if (er != REG_NOMATCH)
                return er;
        }

        /* that midpoint didn't work, find a new one */
        if (mid == begin)
        {
            /* all possibilities exhausted */
            MDEBUG(("%d no midpoint\n", t->id));
            return REG_NOMATCH;
        }
        mid = longest(v, d, begin, mid - 1, (int *) NULL);
        if (mid == NULL)
        {
            /* failed to find a new one */
            MDEBUG(("%d failed midpoint\n", t->id));
            return REG_NOMATCH;
        }
        MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
        zaptreesubs(v, t->left);
        zaptreesubs(v, t->right);
    }

    /* can't get here */
    return REG_ASSERT;
}

static int cdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
) [static]

Definition at line 586 of file regexec.c.

References assert, BACKR, caltdissect(), cbrdissect(), ccondissect(), citerdissect(), crevcondissect(), creviterdissect(), subre::flags, subre::left, LOFF, MDEBUG, NULL, subre::op, REG_NOMATCH, REG_OKAY, subre::right, SHORTER, subre::subno, and subset().

Referenced by caltdissect(), ccondissect(), cfindloop(), citerdissect(), crevcondissect(), creviterdissect(), and find().

{
    int         er;

    assert(t != NULL);
    MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));

    switch (t->op)
    {
        case '=':               /* terminal node */
            assert(t->left == NULL && t->right == NULL);
            er = REG_OKAY;      /* no action, parent did the work */
            break;
        case 'b':               /* back reference */
            assert(t->left == NULL && t->right == NULL);
            er = cbrdissect(v, t, begin, end);
            break;
        case '.':               /* concatenation */
            assert(t->left != NULL && t->right != NULL);
            if (t->left->flags & SHORTER)       /* reverse scan */
                er = crevcondissect(v, t, begin, end);
            else
                er = ccondissect(v, t, begin, end);
            break;
        case '|':               /* alternation */
            assert(t->left != NULL);
            er = caltdissect(v, t, begin, end);
            break;
        case '*':               /* iteration */
            assert(t->left != NULL);
            if (t->left->flags & SHORTER)       /* reverse scan */
                er = creviterdissect(v, t, begin, end);
            else
                er = citerdissect(v, t, begin, end);
            break;
        case '(':               /* capturing */
            assert(t->left != NULL && t->right == NULL);
            assert(t->subno > 0);
            er = cdissect(v, t->left, begin, end);
            if (er == REG_OKAY)
                subset(v, t, begin, end);
            break;
        default:
            er = REG_ASSERT;
            break;
    }

    /*
     * We should never have a match failure unless backrefs lurk below;
     * otherwise, either caller failed to check the DFA, or there's some
     * inconsistency between the DFA and the node's innards.
     */
    assert(er != REG_NOMATCH || (t->flags & BACKR));

    return er;
}

static int cfind ( struct vars v,
struct cnfa cnfa,
struct colormap cm 
) [static]

Definition at line 383 of file regexec.c.

References assert, cfindloop(), guts::cflags, vars::details, vars::dfa1, vars::dfa2, vars::err, freedfa(), vars::g, ISERR, newdfa(), NOERR, NULL, OFF, REG_EXPECT, regmatch_t::rm_eo, rm_detail_t::rm_extend, regmatch_t::rm_so, guts::search, and vars::stop.

Referenced by pg_regexec().

{
    struct dfa *s;
    struct dfa *d;
    chr        *cold;
    int         ret;

    s = newdfa(v, &v->g->search, cm, &v->dfa1);
    NOERR();
    d = newdfa(v, cnfa, cm, &v->dfa2);
    if (ISERR())
    {
        assert(d == NULL);
        freedfa(s);
        return v->err;
    }

    ret = cfindloop(v, cnfa, cm, d, s, &cold);

    freedfa(d);
    freedfa(s);
    NOERR();
    if (v->g->cflags & REG_EXPECT)
    {
        assert(v->details != NULL);
        if (cold != NULL)
            v->details->rm_extend.rm_so = OFF(cold);
        else
            v->details->rm_extend.rm_so = OFF(v->stop);
        v->details->rm_extend.rm_eo = OFF(v->stop);     /* unknown */
    }
    return ret;
}

static int cfindloop ( struct vars v,
struct cnfa cnfa,
struct colormap cm,
struct dfa d,
struct dfa s,
chr **  coldp 
) [static]

Definition at line 423 of file regexec.c.

References assert, cdissect(), ERR, subre::flags, vars::g, LOFF, longest(), MDEBUG, vars::nmatch, NULL, OFF, vars::pmatch, REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, regmatch_t::rm_so, vars::search_start, shortest(), vars::stop, guts::tree, and zapallsubs().

Referenced by cfind().

{
    chr        *begin;
    chr        *end;
    chr        *cold;
    chr        *open;           /* open and close of range of possible starts */
    chr        *close;
    chr        *estart;
    chr        *estop;
    int         er;
    int         shorter = v->g->tree->flags & SHORTER;
    int         hitend;

    assert(d != NULL && s != NULL);
    cold = NULL;
    close = v->search_start;
    do
    {
        MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
        close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
        if (close == NULL)
            break;              /* NOTE BREAK */
        assert(cold != NULL);
        open = cold;
        cold = NULL;
        MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
        for (begin = open; begin <= close; begin++)
        {
            MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
            estart = begin;
            estop = v->stop;
            for (;;)
            {
                if (shorter)
                    end = shortest(v, d, begin, estart,
                                   estop, (chr **) NULL, &hitend);
                else
                    end = longest(v, d, begin, estop,
                                  &hitend);
                if (hitend && cold == NULL)
                    cold = begin;
                if (end == NULL)
                    break;      /* NOTE BREAK OUT */
                MDEBUG(("tentative end %ld\n", LOFF(end)));
                zapallsubs(v->pmatch, v->nmatch);
                er = cdissect(v, v->g->tree, begin, end);
                if (er == REG_OKAY)
                {
                    if (v->nmatch > 0)
                    {
                        v->pmatch[0].rm_so = OFF(begin);
                        v->pmatch[0].rm_eo = OFF(end);
                    }
                    *coldp = cold;
                    return REG_OKAY;
                }
                if (er != REG_NOMATCH)
                {
                    ERR(er);
                    *coldp = cold;
                    return er;
                }
                if ((shorter) ? end == estop : end == begin)
                {
                    /* no point in trying again */
                    *coldp = cold;
                    return REG_NOMATCH;
                }
                /* go around and try again */
                if (shorter)
                    estart = end + 1;
                else
                    estop = end - 1;
            }
        }
    } while (close < v->stop);

    *coldp = cold;
    return REG_NOMATCH;
}

static int citerdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
) [static]

Definition at line 917 of file regexec.c.

References assert, cdissect(), subre::cnfa, vars::err, subre::flags, FREE, getsubdfa(), i, subre::id, INFINITY, ISERR, subre::left, LOFF, longest(), MALLOC, subre::max, MDEBUG, subre::min, cnfa::nstates, NULL, subre::op, REG_NOMATCH, REG_OKAY, SHORTER, and zaptreesubs().

Referenced by cdissect().

{
    struct dfa *d;
    chr       **endpts;
    chr        *limit;
    int         min_matches;
    size_t      max_matches;
    int         nverified;
    int         k;
    int         i;
    int         er;

    assert(t->op == '*');
    assert(t->left != NULL && t->left->cnfa.nstates > 0);
    assert(!(t->left->flags & SHORTER));
    assert(begin <= end);

    /*
     * If zero matches are allowed, and target string is empty, just declare
     * victory.  OTOH, if target string isn't empty, zero matches can't work
     * so we pretend the min is 1.
     */
    min_matches = t->min;
    if (min_matches <= 0)
    {
        if (begin == end)
            return REG_OKAY;
        min_matches = 1;
    }

    /*
     * We need workspace to track the endpoints of each sub-match.  Normally
     * we consider only nonzero-length sub-matches, so there can be at most
     * end-begin of them.  However, if min is larger than that, we will also
     * consider zero-length sub-matches in order to find enough matches.
     *
     * For convenience, endpts[0] contains the "begin" pointer and we store
     * sub-match endpoints in endpts[1..max_matches].
     */
    max_matches = end - begin;
    if (max_matches > t->max && t->max != INFINITY)
        max_matches = t->max;
    if (max_matches < min_matches)
        max_matches = min_matches;
    endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
    if (endpts == NULL)
        return REG_ESPACE;
    endpts[0] = begin;

    d = getsubdfa(v, t->left);
    if (ISERR())
    {
        FREE(endpts);
        return v->err;
    }
    MDEBUG(("citer %d\n", t->id));

    /*
     * Our strategy is to first find a set of sub-match endpoints that are
     * valid according to the child node's DFA, and then recursively dissect
     * each sub-match to confirm validity.  If any validity check fails,
     * backtrack the last sub-match and try again.  And, when we next try for
     * a validity check, we need not recheck any successfully verified
     * sub-matches that we didn't move the endpoints of.  nverified remembers
     * how many sub-matches are currently known okay.
     */

    /* initialize to consider first sub-match */
    nverified = 0;
    k = 1;
    limit = end;

    /* iterate until satisfaction or failure */
    while (k > 0)
    {
        /* try to find an endpoint for the k'th sub-match */
        endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
        if (endpts[k] == NULL)
        {
            /* no match possible, so see if we can shorten previous one */
            k--;
            goto backtrack;
        }
        MDEBUG(("%d: working endpoint %d: %ld\n",
                t->id, k, LOFF(endpts[k])));

        /* k'th sub-match can no longer be considered verified */
        if (nverified >= k)
            nverified = k - 1;

        if (endpts[k] != end)
        {
            /* haven't reached end yet, try another iteration if allowed */
            if (k >= max_matches)
            {
                /* must try to shorten some previous match */
                k--;
                goto backtrack;
            }

            /* reject zero-length match unless necessary to achieve min */
            if (endpts[k] == endpts[k - 1] &&
                (k >= min_matches || min_matches - k < end - endpts[k]))
                goto backtrack;

            k++;
            limit = end;
            continue;
        }

        /*
         * We've identified a way to divide the string into k sub-matches that
         * works so far as the child DFA can tell.  If k is an allowed number
         * of matches, start the slow part: recurse to verify each sub-match.
         * We always have k <= max_matches, needn't check that.
         */
        if (k < min_matches)
            goto backtrack;

        MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));

        for (i = nverified + 1; i <= k; i++)
        {
            zaptreesubs(v, t->left);
            er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
            if (er == REG_OKAY)
            {
                nverified = i;
                continue;
            }
            if (er == REG_NOMATCH)
                break;
            /* oops, something failed */
            FREE(endpts);
            return er;
        }

        if (i > k)
        {
            /* satisfaction */
            MDEBUG(("%d successful\n", t->id));
            FREE(endpts);
            return REG_OKAY;
        }

        /* match failed to verify, so backtrack */

backtrack:

        /*
         * Must consider shorter versions of the current sub-match.  However,
         * we'll only ask for a zero-length match if necessary.
         */
        while (k > 0)
        {
            chr        *prev_end = endpts[k - 1];

            if (endpts[k] > prev_end)
            {
                limit = endpts[k] - 1;
                if (limit > prev_end ||
                    (k < min_matches && min_matches - k >= end - prev_end))
                {
                    /* break out of backtrack loop, continue the outer one */
                    break;
                }
            }
            /* can't shorten k'th sub-match any more, consider previous one */
            k--;
        }
    }

    /* all possibilities exhausted */
    MDEBUG(("%d failed\n", t->id));
    FREE(endpts);
    return REG_NOMATCH;
}

static int crevcondissect ( struct vars v,
struct subre t,
chr begin,
chr end 
) [static]

Definition at line 725 of file regexec.c.

References assert, cdissect(), subre::cnfa, subre::flags, getsubdfa(), subre::id, subre::left, LOFF, longest(), MDEBUG, NOERR, cnfa::nstates, NULL, subre::op, REG_NOMATCH, REG_OKAY, subre::right, SHORTER, shortest(), and zaptreesubs().

Referenced by cdissect().

{
    struct dfa *d;
    struct dfa *d2;
    chr        *mid;
    int         er;

    assert(t->op == '.');
    assert(t->left != NULL && t->left->cnfa.nstates > 0);
    assert(t->right != NULL && t->right->cnfa.nstates > 0);
    assert(t->left->flags & SHORTER);

    d = getsubdfa(v, t->left);
    NOERR();
    d2 = getsubdfa(v, t->right);
    NOERR();
    MDEBUG(("crevcon %d\n", t->id));

    /* pick a tentative midpoint */
    mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
    if (mid == NULL)
        return REG_NOMATCH;
    MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));

    /* iterate until satisfaction or failure */
    for (;;)
    {
        /* try this midpoint on for size */
        if (longest(v, d2, mid, end, (int *) NULL) == end)
        {
            er = cdissect(v, t->left, begin, mid);
            if (er == REG_OKAY)
            {
                er = cdissect(v, t->right, mid, end);
                if (er == REG_OKAY)
                {
                    /* satisfaction */
                    MDEBUG(("successful\n"));
                    return REG_OKAY;
                }
            }
            if (er != REG_NOMATCH)
                return er;
        }

        /* that midpoint didn't work, find a new one */
        if (mid == end)
        {
            /* all possibilities exhausted */
            MDEBUG(("%d no midpoint\n", t->id));
            return REG_NOMATCH;
        }
        mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
        if (mid == NULL)
        {
            /* failed to find a new one */
            MDEBUG(("%d failed midpoint\n", t->id));
            return REG_NOMATCH;
        }
        MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
        zaptreesubs(v, t->left);
        zaptreesubs(v, t->right);
    }

    /* can't get here */
    return REG_ASSERT;
}

static int creviterdissect ( struct vars v,
struct subre t,
chr begin,
chr end 
) [static]

Definition at line 1102 of file regexec.c.

References assert, cdissect(), subre::cnfa, vars::err, subre::flags, FREE, getsubdfa(), i, subre::id, INFINITY, ISERR, subre::left, LOFF, MALLOC, subre::max, MDEBUG, subre::min, cnfa::nstates, NULL, subre::op, REG_NOMATCH, REG_OKAY, SHORTER, shortest(), and zaptreesubs().

Referenced by cdissect().

{
    struct dfa *d;
    chr       **endpts;
    chr        *limit;
    int         min_matches;
    size_t      max_matches;
    int         nverified;
    int         k;
    int         i;
    int         er;

    assert(t->op == '*');
    assert(t->left != NULL && t->left->cnfa.nstates > 0);
    assert(t->left->flags & SHORTER);
    assert(begin <= end);

    /*
     * If zero matches are allowed, and target string is empty, just declare
     * victory.  OTOH, if target string isn't empty, zero matches can't work
     * so we pretend the min is 1.
     */
    min_matches = t->min;
    if (min_matches <= 0)
    {
        if (begin == end)
            return REG_OKAY;
        min_matches = 1;
    }

    /*
     * We need workspace to track the endpoints of each sub-match.  Normally
     * we consider only nonzero-length sub-matches, so there can be at most
     * end-begin of them.  However, if min is larger than that, we will also
     * consider zero-length sub-matches in order to find enough matches.
     *
     * For convenience, endpts[0] contains the "begin" pointer and we store
     * sub-match endpoints in endpts[1..max_matches].
     */
    max_matches = end - begin;
    if (max_matches > t->max && t->max != INFINITY)
        max_matches = t->max;
    if (max_matches < min_matches)
        max_matches = min_matches;
    endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
    if (endpts == NULL)
        return REG_ESPACE;
    endpts[0] = begin;

    d = getsubdfa(v, t->left);
    if (ISERR())
    {
        FREE(endpts);
        return v->err;
    }
    MDEBUG(("creviter %d\n", t->id));

    /*
     * Our strategy is to first find a set of sub-match endpoints that are
     * valid according to the child node's DFA, and then recursively dissect
     * each sub-match to confirm validity.  If any validity check fails,
     * backtrack the last sub-match and try again.  And, when we next try for
     * a validity check, we need not recheck any successfully verified
     * sub-matches that we didn't move the endpoints of.  nverified remembers
     * how many sub-matches are currently known okay.
     */

    /* initialize to consider first sub-match */
    nverified = 0;
    k = 1;
    limit = begin;

    /* iterate until satisfaction or failure */
    while (k > 0)
    {
        /* disallow zero-length match unless necessary to achieve min */
        if (limit == endpts[k - 1] &&
            limit != end &&
            (k >= min_matches || min_matches - k < end - limit))
            limit++;

        /* try to find an endpoint for the k'th sub-match */
        endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
                             (chr **) NULL, (int *) NULL);
        if (endpts[k] == NULL)
        {
            /* no match possible, so see if we can lengthen previous one */
            k--;
            goto backtrack;
        }
        MDEBUG(("%d: working endpoint %d: %ld\n",
                t->id, k, LOFF(endpts[k])));

        /* k'th sub-match can no longer be considered verified */
        if (nverified >= k)
            nverified = k - 1;

        if (endpts[k] != end)
        {
            /* haven't reached end yet, try another iteration if allowed */
            if (k >= max_matches)
            {
                /* must try to lengthen some previous match */
                k--;
                goto backtrack;
            }

            k++;
            limit = endpts[k - 1];
            continue;
        }

        /*
         * We've identified a way to divide the string into k sub-matches that
         * works so far as the child DFA can tell.  If k is an allowed number
         * of matches, start the slow part: recurse to verify each sub-match.
         * We always have k <= max_matches, needn't check that.
         */
        if (k < min_matches)
            goto backtrack;

        MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));

        for (i = nverified + 1; i <= k; i++)
        {
            zaptreesubs(v, t->left);
            er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
            if (er == REG_OKAY)
            {
                nverified = i;
                continue;
            }
            if (er == REG_NOMATCH)
                break;
            /* oops, something failed */
            FREE(endpts);
            return er;
        }

        if (i > k)
        {
            /* satisfaction */
            MDEBUG(("%d successful\n", t->id));
            FREE(endpts);
            return REG_OKAY;
        }

        /* match failed to verify, so backtrack */

backtrack:

        /*
         * Must consider longer versions of the current sub-match.
         */
        while (k > 0)
        {
            if (endpts[k] < end)
            {
                limit = endpts[k] + 1;
                /* break out of backtrack loop, continue the outer one */
                break;
            }
            /* can't lengthen k'th sub-match any more, consider previous one */
            k--;
        }
    }

    /* all possibilities exhausted */
    MDEBUG(("%d failed\n", t->id));
    FREE(endpts);
    return REG_NOMATCH;
}

static int find ( struct vars v,
struct cnfa cnfa,
struct colormap cm 
) [static]

Definition at line 296 of file regexec.c.

References assert, cdissect(), guts::cflags, vars::details, vars::dfa1, end, subre::flags, freedfa(), vars::g, ISERR, LOFF, longest(), MDEBUG, newdfa(), vars::nmatch, NOERR, NULL, OFF, vars::pmatch, REG_EXPECT, regmatch_t::rm_eo, rm_detail_t::rm_extend, regmatch_t::rm_so, guts::search, vars::search_start, shortest(), vars::start, vars::stop, guts::tree, and zapallsubs().

Referenced by NIImportAffixes(), NIImportOOAffixes(), and pg_regexec().

{
    struct dfa *s;
    struct dfa *d;
    chr        *begin;
    chr        *end = NULL;
    chr        *cold;
    chr        *open;           /* open and close of range of possible starts */
    chr        *close;
    int         hitend;
    int         shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;

    /* first, a shot with the search RE */
    s = newdfa(v, &v->g->search, cm, &v->dfa1);
    assert(!(ISERR() && s != NULL));
    NOERR();
    MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
    cold = NULL;
    close = shortest(v, s, v->search_start, v->search_start, v->stop,
                     &cold, (int *) NULL);
    freedfa(s);
    NOERR();
    if (v->g->cflags & REG_EXPECT)
    {
        assert(v->details != NULL);
        if (cold != NULL)
            v->details->rm_extend.rm_so = OFF(cold);
        else
            v->details->rm_extend.rm_so = OFF(v->stop);
        v->details->rm_extend.rm_eo = OFF(v->stop);     /* unknown */
    }
    if (close == NULL)          /* not found */
        return REG_NOMATCH;
    if (v->nmatch == 0)         /* found, don't need exact location */
        return REG_OKAY;

    /* find starting point and match */
    assert(cold != NULL);
    open = cold;
    cold = NULL;
    MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
    d = newdfa(v, cnfa, cm, &v->dfa1);
    assert(!(ISERR() && d != NULL));
    NOERR();
    for (begin = open; begin <= close; begin++)
    {
        MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
        if (shorter)
            end = shortest(v, d, begin, begin, v->stop,
                           (chr **) NULL, &hitend);
        else
            end = longest(v, d, begin, v->stop, &hitend);
        NOERR();
        if (hitend && cold == NULL)
            cold = begin;
        if (end != NULL)
            break;              /* NOTE BREAK OUT */
    }
    assert(end != NULL);        /* search RE succeeded so loop should */
    freedfa(d);

    /* and pin down details */
    assert(v->nmatch > 0);
    v->pmatch[0].rm_so = OFF(begin);
    v->pmatch[0].rm_eo = OFF(end);
    if (v->g->cflags & REG_EXPECT)
    {
        if (cold != NULL)
            v->details->rm_extend.rm_so = OFF(cold);
        else
            v->details->rm_extend.rm_so = OFF(v->stop);
        v->details->rm_extend.rm_eo = OFF(v->stop);     /* unknown */
    }
    if (v->nmatch == 1)         /* no need for submatches */
        return REG_OKAY;

    /* find submatches */
    zapallsubs(v->pmatch, v->nmatch);
    return cdissect(v, v->g->tree, begin, end);
}

static void freedfa ( struct dfa  )  [static]

Referenced by cfind(), find(), and pg_regexec().

static struct dfa * getsubdfa ( struct vars v,
struct subre t 
) [static, read]

Definition at line 280 of file regexec.c.

References guts::cmap, subre::cnfa, DOMALLOC, vars::g, subre::id, ISERR, newdfa(), NULL, and vars::subdfas.

Referenced by caltdissect(), ccondissect(), citerdissect(), crevcondissect(), and creviterdissect().

{
    if (v->subdfas[t->id] == NULL)
    {
        v->subdfas[t->id] = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
        if (ISERR())
            return NULL;
    }
    return v->subdfas[t->id];
}

static struct sset* getvacant ( struct vars ,
struct dfa ,
chr ,
chr  
) [static, read]
static unsigned hash ( unsigned *  ,
int   
) [static]
static struct sset* initialize ( struct vars ,
struct dfa ,
chr  
) [static, read]
static int lacon ( struct vars ,
struct cnfa ,
chr ,
pcolor   
) [static]
static chr* lastcold ( struct vars ,
struct dfa  
) [static]
static chr* longest ( struct vars ,
struct dfa ,
chr ,
chr ,
int *   
) [static]
static struct sset* miss ( struct vars ,
struct dfa ,
struct sset ,
pcolor  ,
chr ,
chr  
) [static, read]
static struct dfa* newdfa ( struct vars ,
struct cnfa ,
struct colormap ,
struct smalldfa  
) [static, read]

Referenced by cfind(), find(), and getsubdfa().

int pg_regexec ( regex_t re,
const chr string,
size_t  len,
size_t  search_start,
rm_detail_t details,
size_t  nmatch,
regmatch_t  pmatch[],
int  flags 
)

Definition at line 167 of file regexec.c.

References assert, cfind(), guts::cflags, guts::cmap, subre::cnfa, vars::details, vars::eflags, vars::err, find(), FREE, freedfa(), vars::g, i, guts::info, LOCALDFAS, LOCALMAT, MALLOC, vars::nmatch, guts::nsub, guts::ntree, NULL, pg_set_regex_collation(), vars::pmatch, vars::re, regex_t::re_collation, regex_t::re_csize, regex_t::re_guts, regex_t::re_magic, REG_EXPECT, REG_INVARG, REG_NOSUB, REG_OKAY, REG_UIMPOSSIBLE, REMAGIC, vars::search_start, vars::start, vars::stop, vars::subdfas, guts::tree, VS, and zapallsubs().

Referenced by check_ident_usermap(), CheckAffix(), RE_wchar_execute(), and replace_text_regexp().

{
    struct vars var;
    register struct vars *v = &var;
    int         st;
    size_t      n;
    size_t      i;
    int         backref;

#define  LOCALMAT    20
    regmatch_t  mat[LOCALMAT];

#define  LOCALDFAS   40
    struct dfa *subdfas[LOCALDFAS];

    /* sanity checks */
    if (re == NULL || string == NULL || re->re_magic != REMAGIC)
        return REG_INVARG;
    if (re->re_csize != sizeof(chr))
        return REG_MIXED;

    /* Initialize locale-dependent support */
    pg_set_regex_collation(re->re_collation);

    /* setup */
    v->re = re;
    v->g = (struct guts *) re->re_guts;
    if ((v->g->cflags & REG_EXPECT) && details == NULL)
        return REG_INVARG;
    if (v->g->info & REG_UIMPOSSIBLE)
        return REG_NOMATCH;
    backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
    v->eflags = flags;
    if (v->g->cflags & REG_NOSUB)
        nmatch = 0;             /* override client */
    v->nmatch = nmatch;
    if (backref)
    {
        /* need work area */
        if (v->g->nsub + 1 <= LOCALMAT)
            v->pmatch = mat;
        else
            v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
                                              sizeof(regmatch_t));
        if (v->pmatch == NULL)
            return REG_ESPACE;
        v->nmatch = v->g->nsub + 1;
    }
    else
        v->pmatch = pmatch;
    v->details = details;
    v->start = (chr *) string;
    v->search_start = (chr *) string + search_start;
    v->stop = (chr *) string + len;
    v->err = 0;
    assert(v->g->ntree >= 0);
    n = (size_t) v->g->ntree;
    if (n <= LOCALDFAS)
        v->subdfas = subdfas;
    else
        v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
    if (v->subdfas == NULL)
    {
        if (v->pmatch != pmatch && v->pmatch != mat)
            FREE(v->pmatch);
        return REG_ESPACE;
    }
    for (i = 0; i < n; i++)
        v->subdfas[i] = NULL;

    /* do it */
    assert(v->g->tree != NULL);
    if (backref)
        st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
    else
        st = find(v, &v->g->tree->cnfa, &v->g->cmap);

    /* copy (portion of) match vector over if necessary */
    if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0)
    {
        zapallsubs(pmatch, nmatch);
        n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
        memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t));
    }

    /* clean up */
    if (v->pmatch != pmatch && v->pmatch != mat)
        FREE(v->pmatch);
    for (i = 0; i < n; i++)
    {
        if (v->subdfas[i] != NULL)
            freedfa(v->subdfas[i]);
    }
    if (v->subdfas != subdfas)
        FREE(v->subdfas);

    return st;
}

static struct sset* pickss ( struct vars ,
struct dfa ,
chr ,
chr  
) [static, read]
static chr* shortest ( struct vars ,
struct dfa ,
chr ,
chr ,
chr ,
chr **  ,
int *   
) [static]
static void subset ( struct vars v,
struct subre sub,
chr begin,
chr end 
) [static]

Definition at line 554 of file regexec.c.

References assert, MDEBUG, vars::nmatch, OFF, vars::pmatch, regmatch_t::rm_eo, regmatch_t::rm_so, and subre::subno.

Referenced by cdissect().

{
    int         n = sub->subno;

    assert(n > 0);
    if ((size_t) n >= v->nmatch)
        return;

    MDEBUG(("setting %d\n", n));
    v->pmatch[n].rm_so = OFF(begin);
    v->pmatch[n].rm_eo = OFF(end);
}

static void zapallsubs ( regmatch_t p,
size_t  n 
) [static]

Definition at line 513 of file regexec.c.

References i, regmatch_t::rm_eo, and regmatch_t::rm_so.

Referenced by cfindloop(), find(), and pg_regexec().

{
    size_t      i;

    for (i = n - 1; i > 0; i--)
    {
        p[i].rm_so = -1;
        p[i].rm_eo = -1;
    }
}

static void zaptreesubs ( struct vars v,
struct subre t 
) [static]

Definition at line 529 of file regexec.c.

References assert, subre::left, NULL, subre::op, vars::pmatch, subre::right, regmatch_t::rm_eo, regmatch_t::rm_so, and subre::subno.

Referenced by ccondissect(), citerdissect(), crevcondissect(), and creviterdissect().

{
    if (t->op == '(')
    {
        int         n = t->subno;

        assert(n > 0);
        if ((size_t) n < v->nmatch)
        {
            v->pmatch[n].rm_so = -1;
            v->pmatch[n].rm_eo = -1;
        }
    }

    if (t->left != NULL)
        zaptreesubs(v, t->left);
    if (t->right != NULL)
        zaptreesubs(v, t->right);
}