Header And Logo

PostgreSQL
| The world's most advanced open source database.

Defines | Functions | Variables

regc_lex.c File Reference

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Defines

#define ATEOS()   (v->now >= v->stop)
#define HAVE(n)   (v->stop - v->now >= (n))
#define NEXT1(c)   (!ATEOS() && *v->now == CHR(c))
#define NEXT2(a, b)   (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
#define NEXT3(a, b, c)
#define SET(c)   (v->nexttype = (c))
#define SETV(c, n)   (v->nexttype = (c), v->nextvalue = (n))
#define RET(c)   return (SET(c), 1)
#define RETV(c, n)   return (SETV(c, n), 1)
#define FAILW(e)   return (ERR(e), 0)
#define LASTTYPE(t)   (v->lasttype == (t))
#define L_ERE   1
#define L_BRE   2
#define L_Q   3
#define L_EBND   4
#define L_BBND   5
#define L_BRACK   6
#define L_CEL   7
#define L_ECL   8
#define L_CCL   9
#define INTOCON(c)   (v->lexcon = (c))
#define INCON(con)   (v->lexcon == (con))
#define ENDOF(array)   ((array) + sizeof(array)/sizeof(chr))

Functions

static void lexstart (struct vars *v)
static void prefixes (struct vars *v)
static void lexnest (struct vars *v, const chr *beginp, const chr *endp)
static void lexword (struct vars *v)
static int next (struct vars *v)
static int lexescape (struct vars *v)
static chr lexdigits (struct vars *v, int base, int minlen, int maxlen)
static int brenext (struct vars *v, chr pc)
static void skip (struct vars *v)
static chr newline (void)
static chr chrnamed (struct vars *v, const chr *startp, const chr *endp, chr lastresort)

Variables

static const chr backd []
static const chr backD []
static const chr brbackd []
static const chr backs []
static const chr backS []
static const chr brbacks []
static const chr backw []
static const chr backW []
static const chr brbackw []

Define Documentation

#define ATEOS (  )     (v->now >= v->stop)

Definition at line 36 of file regc_lex.c.

Referenced by brenext(), lexdigits(), lexescape(), next(), prefixes(), and skip().

#define ENDOF (   array  )     ((array) + sizeof(array)/sizeof(chr))

Definition at line 64 of file regc_lex.c.

Referenced by lexescape(), lexword(), and next().

#define FAILW (   e  )     return (ERR(e), 0)

Definition at line 47 of file regc_lex.c.

Referenced by brenext(), lexescape(), and next().

#define HAVE (   n  )     (v->stop - v->now >= (n))

Definition at line 37 of file regc_lex.c.

Referenced by brenext(), next(), and prefixes().

#define INCON (   con  )     (v->lexcon == (con))

Definition at line 61 of file regc_lex.c.

Referenced by next().

#define INTOCON (   c  )     (v->lexcon = (c))

Definition at line 60 of file regc_lex.c.

Referenced by brenext(), lexstart(), and next().

#define L_BBND   5

Definition at line 55 of file regc_lex.c.

Referenced by brenext(), and next().

#define L_BRACK   6

Definition at line 56 of file regc_lex.c.

Referenced by brenext(), and next().

#define L_BRE   2

Definition at line 52 of file regc_lex.c.

Referenced by lexstart(), and next().

#define L_CCL   9

Definition at line 59 of file regc_lex.c.

Referenced by next().

#define L_CEL   7

Definition at line 57 of file regc_lex.c.

Referenced by next().

#define L_EBND   4

Definition at line 54 of file regc_lex.c.

Referenced by next().

#define L_ECL   8

Definition at line 58 of file regc_lex.c.

Referenced by next().

#define L_ERE   1

Definition at line 51 of file regc_lex.c.

Referenced by lexstart(), and next().

#define L_Q   3

Definition at line 53 of file regc_lex.c.

Referenced by lexstart(), and next().

#define LASTTYPE (   t  )     (v->lasttype == (t))

Definition at line 48 of file regc_lex.c.

Referenced by brenext(), and next().

#define NEXT1 (   c  )     (!ATEOS() && *v->now == CHR(c))

Definition at line 38 of file regc_lex.c.

Referenced by brenext(), next(), prefixes(), and skip().

#define NEXT2 (   a,
  b 
)    (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))

Definition at line 39 of file regc_lex.c.

Referenced by brenext(), and prefixes().

#define NEXT3 (   a,
  b,
  c 
)
Value:
(HAVE(3) && *v->now == CHR(a) && \
                        *(v->now+1) == CHR(b) && \
                        *(v->now+2) == CHR(c))

Definition at line 40 of file regc_lex.c.

Referenced by prefixes().

#define RET (   c  )     return (SET(c), 1)

Definition at line 45 of file regc_lex.c.

Referenced by brenext(), lexescape(), and next().

#define RETV (   c,
  n 
)    return (SETV(c, n), 1)

Definition at line 46 of file regc_lex.c.

Referenced by brenext(), lexescape(), and next().

#define SET (   c  )     (v->nexttype = (c))

Definition at line 43 of file regc_lex.c.

#define SETV (   c,
  n 
)    (v->nexttype = (c), v->nextvalue = (n))

Definition at line 44 of file regc_lex.c.


Function Documentation

static int brenext ( struct vars v,
chr  pc 
) [static]

Definition at line 958 of file regc_lex.c.

References assert, ATEOS, BACKREF, vars::cflags, CHR, DIGITVAL, EMPTY, FAILW, HAVE, INTOCON, iscalnum, L_BBND, L_BRACK, LASTTYPE, NEXT1, NEXT2, NOTE, NOTREACHED, vars::now, PLAIN, REG_EESCAPE, REG_EXPANDED, REG_UBACKREF, REG_UBOUNDS, REG_UBSALNUM, REG_UNONPOSIX, REG_UUNSPEC, RET, RETV, and skip().

Referenced by next().

{
    chr         c = (chr) pc;

    switch (c)
    {
        case CHR('*'):
            if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
                RETV(PLAIN, c);
            RET('*');
            break;
        case CHR('['):
            if (HAVE(6) && *(v->now + 0) == CHR('[') &&
                *(v->now + 1) == CHR(':') &&
                (*(v->now + 2) == CHR('<') ||
                 *(v->now + 2) == CHR('>')) &&
                *(v->now + 3) == CHR(':') &&
                *(v->now + 4) == CHR(']') &&
                *(v->now + 5) == CHR(']'))
            {
                c = *(v->now + 2);
                v->now += 6;
                NOTE(REG_UNONPOSIX);
                RET((c == CHR('<')) ? '<' : '>');
            }
            INTOCON(L_BRACK);
            if (NEXT1('^'))
            {
                v->now++;
                RETV('[', 0);
            }
            RETV('[', 1);
            break;
        case CHR('.'):
            RET('.');
            break;
        case CHR('^'):
            if (LASTTYPE(EMPTY))
                RET('^');
            if (LASTTYPE('('))
            {
                NOTE(REG_UUNSPEC);
                RET('^');
            }
            RETV(PLAIN, c);
            break;
        case CHR('$'):
            if (v->cflags & REG_EXPANDED)
                skip(v);
            if (ATEOS())
                RET('$');
            if (NEXT2('\\', ')'))
            {
                NOTE(REG_UUNSPEC);
                RET('$');
            }
            RETV(PLAIN, c);
            break;
        case CHR('\\'):
            break;              /* see below */
        default:
            RETV(PLAIN, c);
            break;
    }

    assert(c == CHR('\\'));

    if (ATEOS())
        FAILW(REG_EESCAPE);

    c = *v->now++;
    switch (c)
    {
        case CHR('{'):
            INTOCON(L_BBND);
            NOTE(REG_UBOUNDS);
            RET('{');
            break;
        case CHR('('):
            RETV('(', 1);
            break;
        case CHR(')'):
            RETV(')', c);
            break;
        case CHR('<'):
            NOTE(REG_UNONPOSIX);
            RET('<');
            break;
        case CHR('>'):
            NOTE(REG_UNONPOSIX);
            RET('>');
            break;
        case CHR('1'):
        case CHR('2'):
        case CHR('3'):
        case CHR('4'):
        case CHR('5'):
        case CHR('6'):
        case CHR('7'):
        case CHR('8'):
        case CHR('9'):
            NOTE(REG_UBACKREF);
            RETV(BACKREF, (chr) DIGITVAL(c));
            break;
        default:
            if (iscalnum(c))
            {
                NOTE(REG_UBSALNUM);
                NOTE(REG_UUNSPEC);
            }
            RETV(PLAIN, c);
            break;
    }

    assert(NOTREACHED);
    return 0;
}

static chr chrnamed ( struct vars v,
const chr startp,
const chr endp,
chr  lastresort 
) [static]

Definition at line 1121 of file regc_lex.c.

References cvec::chrs, element(), vars::err, cvec::nchrs, and range().

Referenced by lexescape().

{
    celt        c;
    int         errsave;
    int         e;
    struct cvec *cv;

    errsave = v->err;
    v->err = 0;
    c = element(v, startp, endp);
    e = v->err;
    v->err = errsave;

    if (e != 0)
        return (chr) lastresort;

    cv = range(v, c, c, 0);
    if (cv->nchrs == 0)
        return (chr) lastresort;
    return cv->chrs[0];
}

static chr lexdigits ( struct vars v,
int  base,
int  minlen,
int  maxlen 
) [static]

Definition at line 877 of file regc_lex.c.

References ATEOS, CHR, DIGITVAL, ERR, vars::now, and REG_EESCAPE.

Referenced by lexescape().

{
    uchr        n;              /* unsigned to avoid overflow misbehavior */
    int         len;
    chr         c;
    int         d;
    const uchr  ub = (uchr) base;

    n = 0;
    for (len = 0; len < maxlen && !ATEOS(); len++)
    {
        c = *v->now++;
        switch (c)
        {
            case CHR('0'):
            case CHR('1'):
            case CHR('2'):
            case CHR('3'):
            case CHR('4'):
            case CHR('5'):
            case CHR('6'):
            case CHR('7'):
            case CHR('8'):
            case CHR('9'):
                d = DIGITVAL(c);
                break;
            case CHR('a'):
            case CHR('A'):
                d = 10;
                break;
            case CHR('b'):
            case CHR('B'):
                d = 11;
                break;
            case CHR('c'):
            case CHR('C'):
                d = 12;
                break;
            case CHR('d'):
            case CHR('D'):
                d = 13;
                break;
            case CHR('e'):
            case CHR('E'):
                d = 14;
                break;
            case CHR('f'):
            case CHR('F'):
                d = 15;
                break;
            default:
                v->now--;       /* oops, not a digit at all */
                d = -1;
                break;
        }

        if (d >= base)
        {                       /* not a plausible digit */
            v->now--;
            d = -1;
        }
        if (d < 0)
            break;              /* NOTE BREAK OUT */
        n = n * ub + (uchr) d;
    }
    if (len < minlen)
        ERR(REG_EESCAPE);

    return (chr) n;
}

static int lexescape ( struct vars v  )  [static]

Definition at line 716 of file regc_lex.c.

References assert, ATEOS, BACKREF, CCLASS, vars::cflags, CHR, chrnamed(), ENDOF, FAILW, iscalnum, iscalpha, ISERR, lexdigits(), NOTE, NOTREACHED, vars::now, NWBDRY, PLAIN, REG_ADVF, REG_EESCAPE, REG_UBACKREF, REG_ULOCALE, REG_UNONPOSIX, REG_UUNPORT, RET, RETV, SBEGIN, SEND, and WBDRY.

Referenced by next().

{
    chr         c;
    static chr  alert[] = {
        CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
    };
    static chr  esc[] = {
        CHR('E'), CHR('S'), CHR('C')
    };
    const chr  *save;

    assert(v->cflags & REG_ADVF);

    assert(!ATEOS());
    c = *v->now++;
    if (!iscalnum(c))
        RETV(PLAIN, c);

    NOTE(REG_UNONPOSIX);
    switch (c)
    {
        case CHR('a'):
            RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
            break;
        case CHR('A'):
            RETV(SBEGIN, 0);
            break;
        case CHR('b'):
            RETV(PLAIN, CHR('\b'));
            break;
        case CHR('B'):
            RETV(PLAIN, CHR('\\'));
            break;
        case CHR('c'):
            NOTE(REG_UUNPORT);
            if (ATEOS())
                FAILW(REG_EESCAPE);
            RETV(PLAIN, (chr) (*v->now++ & 037));
            break;
        case CHR('d'):
            NOTE(REG_ULOCALE);
            RETV(CCLASS, 'd');
            break;
        case CHR('D'):
            NOTE(REG_ULOCALE);
            RETV(CCLASS, 'D');
            break;
        case CHR('e'):
            NOTE(REG_UUNPORT);
            RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
            break;
        case CHR('f'):
            RETV(PLAIN, CHR('\f'));
            break;
        case CHR('m'):
            RET('<');
            break;
        case CHR('M'):
            RET('>');
            break;
        case CHR('n'):
            RETV(PLAIN, CHR('\n'));
            break;
        case CHR('r'):
            RETV(PLAIN, CHR('\r'));
            break;
        case CHR('s'):
            NOTE(REG_ULOCALE);
            RETV(CCLASS, 's');
            break;
        case CHR('S'):
            NOTE(REG_ULOCALE);
            RETV(CCLASS, 'S');
            break;
        case CHR('t'):
            RETV(PLAIN, CHR('\t'));
            break;
        case CHR('u'):
            c = lexdigits(v, 16, 4, 4);
            if (ISERR())
                FAILW(REG_EESCAPE);
            RETV(PLAIN, c);
            break;
        case CHR('U'):
            c = lexdigits(v, 16, 8, 8);
            if (ISERR())
                FAILW(REG_EESCAPE);
            RETV(PLAIN, c);
            break;
        case CHR('v'):
            RETV(PLAIN, CHR('\v'));
            break;
        case CHR('w'):
            NOTE(REG_ULOCALE);
            RETV(CCLASS, 'w');
            break;
        case CHR('W'):
            NOTE(REG_ULOCALE);
            RETV(CCLASS, 'W');
            break;
        case CHR('x'):
            NOTE(REG_UUNPORT);
            c = lexdigits(v, 16, 1, 255);       /* REs >255 long outside spec */
            if (ISERR())
                FAILW(REG_EESCAPE);
            RETV(PLAIN, c);
            break;
        case CHR('y'):
            NOTE(REG_ULOCALE);
            RETV(WBDRY, 0);
            break;
        case CHR('Y'):
            NOTE(REG_ULOCALE);
            RETV(NWBDRY, 0);
            break;
        case CHR('Z'):
            RETV(SEND, 0);
            break;
        case CHR('1'):
        case CHR('2'):
        case CHR('3'):
        case CHR('4'):
        case CHR('5'):
        case CHR('6'):
        case CHR('7'):
        case CHR('8'):
        case CHR('9'):
            save = v->now;
            v->now--;           /* put first digit back */
            c = lexdigits(v, 10, 1, 255);       /* REs >255 long outside spec */
            if (ISERR())
                FAILW(REG_EESCAPE);
            /* ugly heuristic (first test is "exactly 1 digit?") */
            if (v->now == save || ((int) c > 0 && (int) c <= v->nsubexp))
            {
                NOTE(REG_UBACKREF);
                RETV(BACKREF, (chr) c);
            }
            /* oops, doesn't look like it's a backref after all... */
            v->now = save;
            /* and fall through into octal number */
        case CHR('0'):
            NOTE(REG_UUNPORT);
            v->now--;           /* put first digit back */
            c = lexdigits(v, 8, 1, 3);
            if (ISERR())
                FAILW(REG_EESCAPE);
            RETV(PLAIN, c);
            break;
        default:
            assert(iscalpha(c));
            FAILW(REG_EESCAPE); /* unknown alphabetic escape */
            break;
    }
    assert(NOTREACHED);
}

static void lexnest ( struct vars v,
const chr beginp,
const chr endp 
) [static]

Definition at line 203 of file regc_lex.c.

References assert, vars::now, NULL, vars::savenow, vars::savestop, and vars::stop.

Referenced by lexword(), and next().

{
    assert(v->savenow == NULL); /* only one level of nesting */
    v->savenow = v->now;
    v->savestop = v->stop;
    v->now = beginp;
    v->stop = endp;
}

static void lexstart ( struct vars v  )  [static]

Definition at line 70 of file regc_lex.c.

References assert, vars::cflags, INTOCON, L_BRE, L_ERE, L_Q, next(), vars::nexttype, NOERR, prefixes(), REG_ADVANCED, REG_ADVF, REG_EXPANDED, REG_EXTENDED, REG_NEWLINE, and REG_QUOTE.

{
    prefixes(v);                /* may turn on new type bits etc. */
    NOERR();

    if (v->cflags & REG_QUOTE)
    {
        assert(!(v->cflags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE)));
        INTOCON(L_Q);
    }
    else if (v->cflags & REG_EXTENDED)
    {
        assert(!(v->cflags & REG_QUOTE));
        INTOCON(L_ERE);
    }
    else
    {
        assert(!(v->cflags & (REG_QUOTE | REG_ADVF)));
        INTOCON(L_BRE);
    }

    v->nexttype = EMPTY;        /* remember we were at the start */
    next(v);                    /* set up the first token */
}

static void lexword ( struct vars v  )  [static]

Definition at line 268 of file regc_lex.c.

References backw, ENDOF, and lexnest().

{
    lexnest(v, backw, ENDOF(backw));
}

static chr newline ( void   )  [static]

Definition at line 1109 of file regc_lex.c.

References CHR.

{
    return CHR('\n');
}

static int next ( struct vars v  )  [static]

Definition at line 277 of file regc_lex.c.

References assert, ATEOS, backD, backd, backS, backs, backW, backw, brbackd, brbacks, brbackw, brenext(), CCLASS, vars::cflags, CHR, COLLEL, DIGIT, DIGITVAL, ECLASS, EMPTY, END, ENDOF, EOS, FAILW, HAVE, INCON, INTOCON, iscalnum, iscdigit, ISERR, L_BBND, L_BRACK, L_BRE, L_CCL, L_CEL, L_EBND, L_ECL, L_ERE, L_Q, LACON, LASTTYPE, vars::lasttype, vars::lexcon, lexescape(), lexnest(), NEXT1, vars::nexttype, vars::nextvalue, NOTE, NOTREACHED, vars::now, NULL, PLAIN, RANGE, REG_ADVF, REG_ASSERT, REG_BADBR, REG_BADRPT, REG_BOSONLY, REG_EBRACE, REG_EBRACK, REG_EESCAPE, REG_EXPANDED, REG_EXTENDED, REG_NOSUB, REG_UBBS, REG_UBOUNDS, REG_UBRACES, REG_UBSALNUM, REG_ULOCALE, REG_ULOOKAHEAD, REG_UNONPOSIX, REG_UUNSPEC, RET, RETV, vars::savenow, vars::savestop, SBEGIN, skip(), and vars::stop.

Referenced by AllocSetDelete(), AllocSetReset(), ECPGconnect(), find_struct_member(), get_source_line(), lexstart(), MergeAttributes(), pg_event_trigger_dropped_objects(), pgstat_db_requested(), pgstat_recv_inquiry(), pgstat_write_statsfiles(), prepare_common(), set_timetravel(), setval3_oid(), and setval_oid().

{
    chr         c;

    /* errors yield an infinite sequence of failures */
    if (ISERR())
        return 0;               /* the error has set nexttype to EOS */

    /* remember flavor of last token */
    v->lasttype = v->nexttype;

    /* REG_BOSONLY */
    if (v->nexttype == EMPTY && (v->cflags & REG_BOSONLY))
    {
        /* at start of a REG_BOSONLY RE */
        RETV(SBEGIN, 0);        /* same as \A */
    }

    /* if we're nested and we've hit end, return to outer level */
    if (v->savenow != NULL && ATEOS())
    {
        v->now = v->savenow;
        v->stop = v->savestop;
        v->savenow = v->savestop = NULL;
    }

    /* skip white space etc. if appropriate (not in literal or []) */
    if (v->cflags & REG_EXPANDED)
        switch (v->lexcon)
        {
            case L_ERE:
            case L_BRE:
            case L_EBND:
            case L_BBND:
                skip(v);
                break;
        }

    /* handle EOS, depending on context */
    if (ATEOS())
    {
        switch (v->lexcon)
        {
            case L_ERE:
            case L_BRE:
            case L_Q:
                RET(EOS);
                break;
            case L_EBND:
            case L_BBND:
                FAILW(REG_EBRACE);
                break;
            case L_BRACK:
            case L_CEL:
            case L_ECL:
            case L_CCL:
                FAILW(REG_EBRACK);
                break;
        }
        assert(NOTREACHED);
    }

    /* okay, time to actually get a character */
    c = *v->now++;

    /* deal with the easy contexts, punt EREs to code below */
    switch (v->lexcon)
    {
        case L_BRE:             /* punt BREs to separate function */
            return brenext(v, c);
            break;
        case L_ERE:             /* see below */
            break;
        case L_Q:               /* literal strings are easy */
            RETV(PLAIN, c);
            break;
        case L_BBND:            /* bounds are fairly simple */
        case L_EBND:
            switch (c)
            {
                case CHR('0'):
                case CHR('1'):
                case CHR('2'):
                case CHR('3'):
                case CHR('4'):
                case CHR('5'):
                case CHR('6'):
                case CHR('7'):
                case CHR('8'):
                case CHR('9'):
                    RETV(DIGIT, (chr) DIGITVAL(c));
                    break;
                case CHR(','):
                    RET(',');
                    break;
                case CHR('}'):  /* ERE bound ends with } */
                    if (INCON(L_EBND))
                    {
                        INTOCON(L_ERE);
                        if ((v->cflags & REG_ADVF) && NEXT1('?'))
                        {
                            v->now++;
                            NOTE(REG_UNONPOSIX);
                            RETV('}', 0);
                        }
                        RETV('}', 1);
                    }
                    else
                        FAILW(REG_BADBR);
                    break;
                case CHR('\\'): /* BRE bound ends with \} */
                    if (INCON(L_BBND) && NEXT1('}'))
                    {
                        v->now++;
                        INTOCON(L_BRE);
                        RET('}');
                    }
                    else
                        FAILW(REG_BADBR);
                    break;
                default:
                    FAILW(REG_BADBR);
                    break;
            }
            assert(NOTREACHED);
            break;
        case L_BRACK:           /* brackets are not too hard */
            switch (c)
            {
                case CHR(']'):
                    if (LASTTYPE('['))
                        RETV(PLAIN, c);
                    else
                    {
                        INTOCON((v->cflags & REG_EXTENDED) ?
                                L_ERE : L_BRE);
                        RET(']');
                    }
                    break;
                case CHR('\\'):
                    NOTE(REG_UBBS);
                    if (!(v->cflags & REG_ADVF))
                        RETV(PLAIN, c);
                    NOTE(REG_UNONPOSIX);
                    if (ATEOS())
                        FAILW(REG_EESCAPE);
                    (DISCARD) lexescape(v);
                    switch (v->nexttype)
                    {           /* not all escapes okay here */
                        case PLAIN:
                            return 1;
                            break;
                        case CCLASS:
                            switch (v->nextvalue)
                            {
                                case 'd':
                                    lexnest(v, brbackd, ENDOF(brbackd));
                                    break;
                                case 's':
                                    lexnest(v, brbacks, ENDOF(brbacks));
                                    break;
                                case 'w':
                                    lexnest(v, brbackw, ENDOF(brbackw));
                                    break;
                                default:
                                    FAILW(REG_EESCAPE);
                                    break;
                            }
                            /* lexnest done, back up and try again */
                            v->nexttype = v->lasttype;
                            return next(v);
                            break;
                    }
                    /* not one of the acceptable escapes */
                    FAILW(REG_EESCAPE);
                    break;
                case CHR('-'):
                    if (LASTTYPE('[') || NEXT1(']'))
                        RETV(PLAIN, c);
                    else
                        RETV(RANGE, c);
                    break;
                case CHR('['):
                    if (ATEOS())
                        FAILW(REG_EBRACK);
                    switch (*v->now++)
                    {
                        case CHR('.'):
                            INTOCON(L_CEL);
                            /* might or might not be locale-specific */
                            RET(COLLEL);
                            break;
                        case CHR('='):
                            INTOCON(L_ECL);
                            NOTE(REG_ULOCALE);
                            RET(ECLASS);
                            break;
                        case CHR(':'):
                            INTOCON(L_CCL);
                            NOTE(REG_ULOCALE);
                            RET(CCLASS);
                            break;
                        default:        /* oops */
                            v->now--;
                            RETV(PLAIN, c);
                            break;
                    }
                    assert(NOTREACHED);
                    break;
                default:
                    RETV(PLAIN, c);
                    break;
            }
            assert(NOTREACHED);
            break;
        case L_CEL:             /* collating elements are easy */
            if (c == CHR('.') && NEXT1(']'))
            {
                v->now++;
                INTOCON(L_BRACK);
                RETV(END, '.');
            }
            else
                RETV(PLAIN, c);
            break;
        case L_ECL:             /* ditto equivalence classes */
            if (c == CHR('=') && NEXT1(']'))
            {
                v->now++;
                INTOCON(L_BRACK);
                RETV(END, '=');
            }
            else
                RETV(PLAIN, c);
            break;
        case L_CCL:             /* ditto character classes */
            if (c == CHR(':') && NEXT1(']'))
            {
                v->now++;
                INTOCON(L_BRACK);
                RETV(END, ':');
            }
            else
                RETV(PLAIN, c);
            break;
        default:
            assert(NOTREACHED);
            break;
    }

    /* that got rid of everything except EREs and AREs */
    assert(INCON(L_ERE));

    /* deal with EREs and AREs, except for backslashes */
    switch (c)
    {
        case CHR('|'):
            RET('|');
            break;
        case CHR('*'):
            if ((v->cflags & REG_ADVF) && NEXT1('?'))
            {
                v->now++;
                NOTE(REG_UNONPOSIX);
                RETV('*', 0);
            }
            RETV('*', 1);
            break;
        case CHR('+'):
            if ((v->cflags & REG_ADVF) && NEXT1('?'))
            {
                v->now++;
                NOTE(REG_UNONPOSIX);
                RETV('+', 0);
            }
            RETV('+', 1);
            break;
        case CHR('?'):
            if ((v->cflags & REG_ADVF) && NEXT1('?'))
            {
                v->now++;
                NOTE(REG_UNONPOSIX);
                RETV('?', 0);
            }
            RETV('?', 1);
            break;
        case CHR('{'):          /* bounds start or plain character */
            if (v->cflags & REG_EXPANDED)
                skip(v);
            if (ATEOS() || !iscdigit(*v->now))
            {
                NOTE(REG_UBRACES);
                NOTE(REG_UUNSPEC);
                RETV(PLAIN, c);
            }
            else
            {
                NOTE(REG_UBOUNDS);
                INTOCON(L_EBND);
                RET('{');
            }
            assert(NOTREACHED);
            break;
        case CHR('('):          /* parenthesis, or advanced extension */
            if ((v->cflags & REG_ADVF) && NEXT1('?'))
            {
                NOTE(REG_UNONPOSIX);
                v->now++;
                switch (*v->now++)
                {
                    case CHR(':'):      /* non-capturing paren */
                        RETV('(', 0);
                        break;
                    case CHR('#'):      /* comment */
                        while (!ATEOS() && *v->now != CHR(')'))
                            v->now++;
                        if (!ATEOS())
                            v->now++;
                        assert(v->nexttype == v->lasttype);
                        return next(v);
                        break;
                    case CHR('='):      /* positive lookahead */
                        NOTE(REG_ULOOKAHEAD);
                        RETV(LACON, 1);
                        break;
                    case CHR('!'):      /* negative lookahead */
                        NOTE(REG_ULOOKAHEAD);
                        RETV(LACON, 0);
                        break;
                    default:
                        FAILW(REG_BADRPT);
                        break;
                }
                assert(NOTREACHED);
            }
            if (v->cflags & REG_NOSUB)
                RETV('(', 0);   /* all parens non-capturing */
            else
                RETV('(', 1);
            break;
        case CHR(')'):
            if (LASTTYPE('('))
                NOTE(REG_UUNSPEC);
            RETV(')', c);
            break;
        case CHR('['):          /* easy except for [[:<:]] and [[:>:]] */
            if (HAVE(6) && *(v->now + 0) == CHR('[') &&
                *(v->now + 1) == CHR(':') &&
                (*(v->now + 2) == CHR('<') ||
                 *(v->now + 2) == CHR('>')) &&
                *(v->now + 3) == CHR(':') &&
                *(v->now + 4) == CHR(']') &&
                *(v->now + 5) == CHR(']'))
            {
                c = *(v->now + 2);
                v->now += 6;
                NOTE(REG_UNONPOSIX);
                RET((c == CHR('<')) ? '<' : '>');
            }
            INTOCON(L_BRACK);
            if (NEXT1('^'))
            {
                v->now++;
                RETV('[', 0);
            }
            RETV('[', 1);
            break;
        case CHR('.'):
            RET('.');
            break;
        case CHR('^'):
            RET('^');
            break;
        case CHR('$'):
            RET('$');
            break;
        case CHR('\\'): /* mostly punt backslashes to code below */
            if (ATEOS())
                FAILW(REG_EESCAPE);
            break;
        default:                /* ordinary character */
            RETV(PLAIN, c);
            break;
    }

    /* ERE/ARE backslash handling; backslash already eaten */
    assert(!ATEOS());
    if (!(v->cflags & REG_ADVF))
    {                           /* only AREs have non-trivial escapes */
        if (iscalnum(*v->now))
        {
            NOTE(REG_UBSALNUM);
            NOTE(REG_UUNSPEC);
        }
        RETV(PLAIN, *v->now++);
    }
    (DISCARD) lexescape(v);
    if (ISERR())
        FAILW(REG_EESCAPE);
    if (v->nexttype == CCLASS)
    {                           /* fudge at lexical level */
        switch (v->nextvalue)
        {
            case 'd':
                lexnest(v, backd, ENDOF(backd));
                break;
            case 'D':
                lexnest(v, backD, ENDOF(backD));
                break;
            case 's':
                lexnest(v, backs, ENDOF(backs));
                break;
            case 'S':
                lexnest(v, backS, ENDOF(backS));
                break;
            case 'w':
                lexnest(v, backw, ENDOF(backw));
                break;
            case 'W':
                lexnest(v, backW, ENDOF(backW));
                break;
            default:
                assert(NOTREACHED);
                FAILW(REG_ASSERT);
                break;
        }
        /* lexnest done, back up and try again */
        v->nexttype = v->lasttype;
        return next(v);
    }
    /* otherwise, lexescape has already done the work */
    return !ISERR();
}

static void prefixes ( struct vars v  )  [static]

Definition at line 99 of file regc_lex.c.

References ATEOS, vars::cflags, CHR, ERR, HAVE, iscalpha, NEXT1, NEXT2, NEXT3, NOTE, vars::now, REG_ADVANCED, REG_ADVF, REG_BADOPT, REG_BADPAT, REG_BADRPT, REG_EXPANDED, REG_QUOTE, and REG_UNONPOSIX.

Referenced by lexstart(), and NIImportAffixes().

{
    /* literal string doesn't get any of this stuff */
    if (v->cflags & REG_QUOTE)
        return;

    /* initial "***" gets special things */
    if (HAVE(4) && NEXT3('*', '*', '*'))
        switch (*(v->now + 3))
        {
            case CHR('?'):      /* "***?" error, msg shows version */
                ERR(REG_BADPAT);
                return;         /* proceed no further */
                break;
            case CHR('='):      /* "***=" shifts to literal string */
                NOTE(REG_UNONPOSIX);
                v->cflags |= REG_QUOTE;
                v->cflags &= ~(REG_ADVANCED | REG_EXPANDED | REG_NEWLINE);
                v->now += 4;
                return;         /* and there can be no more prefixes */
                break;
            case CHR(':'):      /* "***:" shifts to AREs */
                NOTE(REG_UNONPOSIX);
                v->cflags |= REG_ADVANCED;
                v->now += 4;
                break;
            default:            /* otherwise *** is just an error */
                ERR(REG_BADRPT);
                return;
                break;
        }

    /* BREs and EREs don't get embedded options */
    if ((v->cflags & REG_ADVANCED) != REG_ADVANCED)
        return;

    /* embedded options (AREs only) */
    if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2)))
    {
        NOTE(REG_UNONPOSIX);
        v->now += 2;
        for (; !ATEOS() && iscalpha(*v->now); v->now++)
            switch (*v->now)
            {
                case CHR('b'):  /* BREs (but why???) */
                    v->cflags &= ~(REG_ADVANCED | REG_QUOTE);
                    break;
                case CHR('c'):  /* case sensitive */
                    v->cflags &= ~REG_ICASE;
                    break;
                case CHR('e'):  /* plain EREs */
                    v->cflags |= REG_EXTENDED;
                    v->cflags &= ~(REG_ADVF | REG_QUOTE);
                    break;
                case CHR('i'):  /* case insensitive */
                    v->cflags |= REG_ICASE;
                    break;
                case CHR('m'):  /* Perloid synonym for n */
                case CHR('n'):  /* \n affects ^ $ . [^ */
                    v->cflags |= REG_NEWLINE;
                    break;
                case CHR('p'):  /* ~Perl, \n affects . [^ */
                    v->cflags |= REG_NLSTOP;
                    v->cflags &= ~REG_NLANCH;
                    break;
                case CHR('q'):  /* literal string */
                    v->cflags |= REG_QUOTE;
                    v->cflags &= ~REG_ADVANCED;
                    break;
                case CHR('s'):  /* single line, \n ordinary */
                    v->cflags &= ~REG_NEWLINE;
                    break;
                case CHR('t'):  /* tight syntax */
                    v->cflags &= ~REG_EXPANDED;
                    break;
                case CHR('w'):  /* weird, \n affects ^ $ only */
                    v->cflags &= ~REG_NLSTOP;
                    v->cflags |= REG_NLANCH;
                    break;
                case CHR('x'):  /* expanded syntax */
                    v->cflags |= REG_EXPANDED;
                    break;
                default:
                    ERR(REG_BADOPT);
                    return;
            }
        if (!NEXT1(')'))
        {
            ERR(REG_BADOPT);
            return;
        }
        v->now++;
        if (v->cflags & REG_QUOTE)
            v->cflags &= ~(REG_EXPANDED | REG_NEWLINE);
    }
}

static void skip ( struct vars v  )  [static]

Definition at line 1081 of file regc_lex.c.

References assert, ATEOS, vars::cflags, CHR, iscspace, NEXT1, NOTE, vars::now, REG_EXPANDED, and REG_UNONPOSIX.

Referenced by array_replace_internal(), brenext(), DefineVirtualRelation(), next(), s_udiv(), and system_reseed().

{
    const chr  *start = v->now;

    assert(v->cflags & REG_EXPANDED);

    for (;;)
    {
        while (!ATEOS() && iscspace(*v->now))
            v->now++;
        if (ATEOS() || *v->now != CHR('#'))
            break;              /* NOTE BREAK OUT */
        assert(NEXT1('#'));
        while (!ATEOS() && *v->now != CHR('\n'))
            v->now++;
        /* leave the newline to be picked up by the iscspace loop */
    }

    if (v->now != start)
        NOTE(REG_UNONPOSIX);
}


Variable Documentation

const chr backd[] [static]
Initial value:
 {  
    CHR('['), CHR('['), CHR(':'),
    CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
    CHR(':'), CHR(']'), CHR(']')
}

Definition at line 217 of file regc_lex.c.

Referenced by next().

const chr backD[] [static]
Initial value:
 {  
    CHR('['), CHR('^'), CHR('['), CHR(':'),
    CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
    CHR(':'), CHR(']'), CHR(']')
}

Definition at line 222 of file regc_lex.c.

Referenced by next().

const chr backs[] [static]
Initial value:
 {  
    CHR('['), CHR('['), CHR(':'),
    CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
    CHR(':'), CHR(']'), CHR(']')
}

Definition at line 232 of file regc_lex.c.

Referenced by next().

const chr backS[] [static]
Initial value:
 {  
    CHR('['), CHR('^'), CHR('['), CHR(':'),
    CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
    CHR(':'), CHR(']'), CHR(']')
}

Definition at line 237 of file regc_lex.c.

Referenced by next().

const chr backW[] [static]
Initial value:
 {  
    CHR('['), CHR('^'), CHR('['), CHR(':'),
    CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
    CHR(':'), CHR(']'), CHR('_'), CHR(']')
}

Definition at line 252 of file regc_lex.c.

Referenced by next().

const chr backw[] [static]
Initial value:
 {  
    CHR('['), CHR('['), CHR(':'),
    CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
    CHR(':'), CHR(']'), CHR('_'), CHR(']')
}

Definition at line 247 of file regc_lex.c.

Referenced by lexword(), and next().

const chr brbackd[] [static]
Initial value:
 {  
    CHR('['), CHR(':'),
    CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
    CHR(':'), CHR(']')
}

Definition at line 227 of file regc_lex.c.

Referenced by next().

const chr brbacks[] [static]
Initial value:
 {  
    CHR('['), CHR(':'),
    CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
    CHR(':'), CHR(']')
}

Definition at line 242 of file regc_lex.c.

Referenced by next().

const chr brbackw[] [static]
Initial value:
 {  
    CHR('['), CHR(':'),
    CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
    CHR(':'), CHR(']'), CHR('_')
}

Definition at line 257 of file regc_lex.c.

Referenced by next().