Header And Logo

PostgreSQL
| The world's most advanced open source database.

Data Structures | Defines | Typedefs | Functions | Variables

regexp.c File Reference

#include "postgres.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "regex/regex.h"
#include "utils/array.h"
#include "utils/builtins.h"
Include dependency graph for regexp.c:

Go to the source code of this file.

Data Structures

struct  pg_re_flags
struct  regexp_matches_ctx
struct  cached_re_str

Defines

#define PG_GETARG_TEXT_PP_IF_EXISTS(_n)   (PG_NARGS() > (_n) ? PG_GETARG_TEXT_PP(_n) : NULL)
#define MAX_CACHED_RES   32

Typedefs

typedef struct pg_re_flags pg_re_flags
typedef struct regexp_matches_ctx regexp_matches_ctx
typedef struct cached_re_str cached_re_str

Functions

static regexp_matches_ctxsetup_regexp_matches (text *orig_str, text *pattern, text *flags, Oid collation, bool force_glob, bool use_subpatterns, bool ignore_degenerate)
static void cleanup_regexp_matches (regexp_matches_ctx *matchctx)
static ArrayTypebuild_regexp_matches_result (regexp_matches_ctx *matchctx)
static Datum build_regexp_split_result (regexp_matches_ctx *splitctx)
static regex_tRE_compile_and_cache (text *text_re, int cflags, Oid collation)
static bool RE_wchar_execute (regex_t *re, pg_wchar *data, int data_len, int start_search, int nmatch, regmatch_t *pmatch)
static bool RE_execute (regex_t *re, char *dat, int dat_len, int nmatch, regmatch_t *pmatch)
static bool RE_compile_and_execute (text *text_re, char *dat, int dat_len, int cflags, Oid collation, int nmatch, regmatch_t *pmatch)
static void parse_re_flags (pg_re_flags *flags, text *opts)
Datum nameregexeq (PG_FUNCTION_ARGS)
Datum nameregexne (PG_FUNCTION_ARGS)
Datum textregexeq (PG_FUNCTION_ARGS)
Datum textregexne (PG_FUNCTION_ARGS)
Datum nameicregexeq (PG_FUNCTION_ARGS)
Datum nameicregexne (PG_FUNCTION_ARGS)
Datum texticregexeq (PG_FUNCTION_ARGS)
Datum texticregexne (PG_FUNCTION_ARGS)
Datum textregexsubstr (PG_FUNCTION_ARGS)
Datum textregexreplace_noopt (PG_FUNCTION_ARGS)
Datum textregexreplace (PG_FUNCTION_ARGS)
Datum similar_escape (PG_FUNCTION_ARGS)
Datum regexp_matches (PG_FUNCTION_ARGS)
Datum regexp_matches_no_flags (PG_FUNCTION_ARGS)
Datum regexp_split_to_table (PG_FUNCTION_ARGS)
Datum regexp_split_to_table_no_flags (PG_FUNCTION_ARGS)
Datum regexp_split_to_array (PG_FUNCTION_ARGS)
Datum regexp_split_to_array_no_flags (PG_FUNCTION_ARGS)
char * regexp_fixed_prefix (text *text_re, bool case_insensitive, Oid collation, bool *exact)

Variables

static int num_res = 0
static cached_re_str re_array [MAX_CACHED_RES]

Define Documentation

#define MAX_CACHED_RES   32

Definition at line 90 of file regexp.c.

Referenced by RE_compile_and_cache().

#define PG_GETARG_TEXT_PP_IF_EXISTS (   _n  )     (PG_NARGS() > (_n) ? PG_GETARG_TEXT_PP(_n) : NULL)

Definition at line 38 of file regexp.c.

Referenced by regexp_matches(), regexp_split_to_array(), and regexp_split_to_table().


Typedef Documentation

typedef struct cached_re_str cached_re_str
typedef struct pg_re_flags pg_re_flags

Function Documentation

static ArrayType * build_regexp_matches_result ( regexp_matches_ctx matchctx  )  [static]

Definition at line 999 of file regexp.c.

References construct_md_array(), DirectFunctionCall3, regexp_matches_ctx::elems, i, Int32GetDatum, regexp_matches_ctx::match_locs, regexp_matches_ctx::next_match, regexp_matches_ctx::npatterns, regexp_matches_ctx::nulls, regexp_matches_ctx::orig_str, PointerGetDatum, text_substr(), and TEXTOID.

Referenced by regexp_matches().

{
    Datum      *elems = matchctx->elems;
    bool       *nulls = matchctx->nulls;
    int         dims[1];
    int         lbs[1];
    int         loc;
    int         i;

    /* Extract matching substrings from the original string */
    loc = matchctx->next_match * matchctx->npatterns * 2;
    for (i = 0; i < matchctx->npatterns; i++)
    {
        int         so = matchctx->match_locs[loc++];
        int         eo = matchctx->match_locs[loc++];

        if (so < 0 || eo < 0)
        {
            elems[i] = (Datum) 0;
            nulls[i] = true;
        }
        else
        {
            elems[i] = DirectFunctionCall3(text_substr,
                                         PointerGetDatum(matchctx->orig_str),
                                           Int32GetDatum(so + 1),
                                           Int32GetDatum(eo - so));
            nulls[i] = false;
        }
    }

    /* And form an array */
    dims[0] = matchctx->npatterns;
    lbs[0] = 1;
    /* XXX: this hardcodes assumptions about the text type */
    return construct_md_array(elems, nulls, 1, dims, lbs,
                              TEXTOID, -1, false, 'i');
}

static Datum build_regexp_split_result ( regexp_matches_ctx splitctx  )  [static]

Definition at line 1142 of file regexp.c.

References DirectFunctionCall2, DirectFunctionCall3, elog, ERROR, Int32GetDatum, regexp_matches_ctx::match_locs, regexp_matches_ctx::next_match, regexp_matches_ctx::nmatches, regexp_matches_ctx::orig_str, PointerGetDatum, text_substr(), and text_substr_no_len().

Referenced by regexp_split_to_array(), and regexp_split_to_table().

{
    int         startpos;
    int         endpos;

    if (splitctx->next_match > 0)
        startpos = splitctx->match_locs[splitctx->next_match * 2 - 1];
    else
        startpos = 0;
    if (startpos < 0)
        elog(ERROR, "invalid match ending position");

    if (splitctx->next_match < splitctx->nmatches)
    {
        endpos = splitctx->match_locs[splitctx->next_match * 2];
        if (endpos < startpos)
            elog(ERROR, "invalid match starting position");
        return DirectFunctionCall3(text_substr,
                                   PointerGetDatum(splitctx->orig_str),
                                   Int32GetDatum(startpos + 1),
                                   Int32GetDatum(endpos - startpos));
    }
    else
    {
        /* no more matches, return rest of string */
        return DirectFunctionCall2(text_substr_no_len,
                                   PointerGetDatum(splitctx->orig_str),
                                   Int32GetDatum(startpos + 1));
    }
}

static void cleanup_regexp_matches ( regexp_matches_ctx matchctx  )  [static]

Definition at line 984 of file regexp.c.

References regexp_matches_ctx::elems, regexp_matches_ctx::match_locs, regexp_matches_ctx::nulls, regexp_matches_ctx::orig_str, and pfree().

Referenced by regexp_matches(), and regexp_split_to_table().

{
    pfree(matchctx->orig_str);
    pfree(matchctx->match_locs);
    if (matchctx->elems)
        pfree(matchctx->elems);
    if (matchctx->nulls)
        pfree(matchctx->nulls);
    pfree(matchctx);
}

Datum nameicregexeq ( PG_FUNCTION_ARGS   ) 
Datum nameicregexne ( PG_FUNCTION_ARGS   ) 
Datum nameregexeq ( PG_FUNCTION_ARGS   ) 
Datum nameregexne ( PG_FUNCTION_ARGS   ) 
static void parse_re_flags ( pg_re_flags flags,
text opts 
) [static]

Definition at line 351 of file regexp.c.

References pg_re_flags::cflags, ereport, errcode(), errmsg(), ERROR, pg_re_flags::glob, i, REG_ADVANCED, REG_EXTENDED, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by setup_regexp_matches(), and textregexreplace().

{
    /* regex flavor is always folded into the compile flags */
    flags->cflags = REG_ADVANCED;
    flags->glob = false;

    if (opts)
    {
        char       *opt_p = VARDATA_ANY(opts);
        int         opt_len = VARSIZE_ANY_EXHDR(opts);
        int         i;

        for (i = 0; i < opt_len; i++)
        {
            switch (opt_p[i])
            {
                case 'g':
                    flags->glob = true;
                    break;
                case 'b':       /* BREs (but why???) */
                    flags->cflags &= ~(REG_ADVANCED | REG_EXTENDED | REG_QUOTE);
                    break;
                case 'c':       /* case sensitive */
                    flags->cflags &= ~REG_ICASE;
                    break;
                case 'e':       /* plain EREs */
                    flags->cflags |= REG_EXTENDED;
                    flags->cflags &= ~(REG_ADVANCED | REG_QUOTE);
                    break;
                case 'i':       /* case insensitive */
                    flags->cflags |= REG_ICASE;
                    break;
                case 'm':       /* Perloid synonym for n */
                case 'n':       /* \n affects ^ $ . [^ */
                    flags->cflags |= REG_NEWLINE;
                    break;
                case 'p':       /* ~Perl, \n affects . [^ */
                    flags->cflags |= REG_NLSTOP;
                    flags->cflags &= ~REG_NLANCH;
                    break;
                case 'q':       /* literal string */
                    flags->cflags |= REG_QUOTE;
                    flags->cflags &= ~(REG_ADVANCED | REG_EXTENDED);
                    break;
                case 's':       /* single line, \n ordinary */
                    flags->cflags &= ~REG_NEWLINE;
                    break;
                case 't':       /* tight syntax */
                    flags->cflags &= ~REG_EXPANDED;
                    break;
                case 'w':       /* weird, \n affects ^ $ only */
                    flags->cflags &= ~REG_NLSTOP;
                    flags->cflags |= REG_NLANCH;
                    break;
                case 'x':       /* expanded syntax */
                    flags->cflags |= REG_EXPANDED;
                    break;
                default:
                    ereport(ERROR,
                            (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                             errmsg("invalid regexp option: \"%c\"",
                                    opt_p[i])));
                    break;
            }
        }
    }
}

static regex_t* RE_compile_and_cache ( text text_re,
int  cflags,
Oid  collation 
) [static]

Definition at line 132 of file regexp.c.

References Assert, cached_re_str::cre_collation, cached_re_str::cre_flags, cached_re_str::cre_pat, cached_re_str::cre_pat_len, cached_re_str::cre_re, ereport, errcode(), errmsg(), ERROR, free, i, malloc, Max, MAX_CACHED_RES, memcmp(), memmove, NULL, num_res, palloc(), pfree(), pg_mb2wchar_with_len(), pg_regcomp(), pg_regerror(), pg_regfree(), REG_OKAY, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by RE_compile_and_execute(), regexp_fixed_prefix(), setup_regexp_matches(), textregexreplace(), textregexreplace_noopt(), and textregexsubstr().

{
    int         text_re_len = VARSIZE_ANY_EXHDR(text_re);
    char       *text_re_val = VARDATA_ANY(text_re);
    pg_wchar   *pattern;
    int         pattern_len;
    int         i;
    int         regcomp_result;
    cached_re_str re_temp;
    char        errMsg[100];

    /*
     * Look for a match among previously compiled REs.  Since the data
     * structure is self-organizing with most-used entries at the front, our
     * search strategy can just be to scan from the front.
     */
    for (i = 0; i < num_res; i++)
    {
        if (re_array[i].cre_pat_len == text_re_len &&
            re_array[i].cre_flags == cflags &&
            re_array[i].cre_collation == collation &&
            memcmp(re_array[i].cre_pat, text_re_val, text_re_len) == 0)
        {
            /*
             * Found a match; move it to front if not there already.
             */
            if (i > 0)
            {
                re_temp = re_array[i];
                memmove(&re_array[1], &re_array[0], i * sizeof(cached_re_str));
                re_array[0] = re_temp;
            }

            return &re_array[0].cre_re;
        }
    }

    /*
     * Couldn't find it, so try to compile the new RE.  To avoid leaking
     * resources on failure, we build into the re_temp local.
     */

    /* Convert pattern string to wide characters */
    pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
    pattern_len = pg_mb2wchar_with_len(text_re_val,
                                       pattern,
                                       text_re_len);

    regcomp_result = pg_regcomp(&re_temp.cre_re,
                                pattern,
                                pattern_len,
                                cflags,
                                collation);

    pfree(pattern);

    if (regcomp_result != REG_OKAY)
    {
        /* re didn't compile (no need for pg_regfree, if so) */
        pg_regerror(regcomp_result, &re_temp.cre_re, errMsg, sizeof(errMsg));
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
                 errmsg("invalid regular expression: %s", errMsg)));
    }

    /*
     * We use malloc/free for the cre_pat field because the storage has to
     * persist across transactions, and because we want to get control back on
     * out-of-memory.  The Max() is because some malloc implementations return
     * NULL for malloc(0).
     */
    re_temp.cre_pat = malloc(Max(text_re_len, 1));
    if (re_temp.cre_pat == NULL)
    {
        pg_regfree(&re_temp.cre_re);
        ereport(ERROR,
                (errcode(ERRCODE_OUT_OF_MEMORY),
                 errmsg("out of memory")));
    }
    memcpy(re_temp.cre_pat, text_re_val, text_re_len);
    re_temp.cre_pat_len = text_re_len;
    re_temp.cre_flags = cflags;
    re_temp.cre_collation = collation;

    /*
     * Okay, we have a valid new item in re_temp; insert it into the storage
     * array.  Discard last entry if needed.
     */
    if (num_res >= MAX_CACHED_RES)
    {
        --num_res;
        Assert(num_res < MAX_CACHED_RES);
        pg_regfree(&re_array[num_res].cre_re);
        free(re_array[num_res].cre_pat);
    }

    if (num_res > 0)
        memmove(&re_array[1], &re_array[0], num_res * sizeof(cached_re_str));

    re_array[0] = re_temp;
    num_res++;

    return &re_array[0].cre_re;
}

static bool RE_compile_and_execute ( text text_re,
char *  dat,
int  dat_len,
int  cflags,
Oid  collation,
int  nmatch,
regmatch_t pmatch 
) [static]

Definition at line 328 of file regexp.c.

References RE_compile_and_cache(), and RE_execute().

Referenced by nameicregexeq(), nameicregexne(), nameregexeq(), nameregexne(), texticregexeq(), texticregexne(), textregexeq(), and textregexne().

{
    regex_t    *re;

    /* Compile RE */
    re = RE_compile_and_cache(text_re, cflags, collation);

    return RE_execute(re, dat, dat_len, nmatch, pmatch);
}

static bool RE_execute ( regex_t re,
char *  dat,
int  dat_len,
int  nmatch,
regmatch_t pmatch 
) [static]

Definition at line 294 of file regexp.c.

References palloc(), pfree(), pg_mb2wchar_with_len(), and RE_wchar_execute().

Referenced by RE_compile_and_execute(), and textregexsubstr().

{
    pg_wchar   *data;
    int         data_len;
    bool        match;

    /* Convert data string to wide characters */
    data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
    data_len = pg_mb2wchar_with_len(dat, data, dat_len);

    /* Perform RE match and return result */
    match = RE_wchar_execute(re, data, data_len, 0, nmatch, pmatch);

    pfree(data);
    return match;
}

static bool RE_wchar_execute ( regex_t re,
pg_wchar data,
int  data_len,
int  start_search,
int  nmatch,
regmatch_t pmatch 
) [static]

Definition at line 252 of file regexp.c.

References ereport, errcode(), errmsg(), ERROR, NULL, pg_regerror(), pg_regexec(), REG_NOMATCH, and REG_OKAY.

Referenced by RE_execute(), and setup_regexp_matches().

{
    int         regexec_result;
    char        errMsg[100];

    /* Perform RE match and return result */
    regexec_result = pg_regexec(re,
                                data,
                                data_len,
                                start_search,
                                NULL,   /* no details */
                                nmatch,
                                pmatch,
                                0);

    if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
    {
        /* re failed??? */
        pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
                 errmsg("regular expression failed: %s", errMsg)));
    }

    return (regexec_result == REG_OKAY);
}

char* regexp_fixed_prefix ( text text_re,
bool  case_insensitive,
Oid  collation,
bool exact 
)

Definition at line 1180 of file regexp.c.

References Assert, ereport, errcode(), errmsg(), ERROR, free, palloc(), pg_database_encoding_max_length(), pg_regerror(), pg_regprefix(), pg_wchar2mb_with_len(), RE_compile_and_cache(), REG_EXACT, REG_NOMATCH, and REG_PREFIX.

Referenced by regex_fixed_prefix().

{
    char       *result;
    regex_t    *re;
    int         cflags;
    int         re_result;
    pg_wchar   *str;
    size_t      slen;
    size_t      maxlen;
    char        errMsg[100];

    *exact = false;             /* default result */

    /* Compile RE */
    cflags = REG_ADVANCED;
    if (case_insensitive)
        cflags |= REG_ICASE;

    re = RE_compile_and_cache(text_re, cflags, collation);

    /* Examine it to see if there's a fixed prefix */
    re_result = pg_regprefix(re, &str, &slen);

    switch (re_result)
    {
        case REG_NOMATCH:
            return NULL;

        case REG_PREFIX:
            /* continue with wchar conversion */
            break;

        case REG_EXACT:
            *exact = true;
            /* continue with wchar conversion */
            break;

        default:
            /* re failed??? */
            pg_regerror(re_result, re, errMsg, sizeof(errMsg));
            ereport(ERROR,
                    (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
                     errmsg("regular expression failed: %s", errMsg)));
            break;
    }

    /* Convert pg_wchar result back to database encoding */
    maxlen = pg_database_encoding_max_length() * slen + 1;
    result = (char *) palloc(maxlen);
    slen = pg_wchar2mb_with_len(str, result, slen);
    Assert(slen < maxlen);

    free(str);

    return result;
}

Datum regexp_matches ( PG_FUNCTION_ARGS   ) 

Definition at line 783 of file regexp.c.

References build_regexp_matches_result(), cleanup_regexp_matches(), regexp_matches_ctx::elems, MemoryContextSwitchTo(), FuncCallContext::multi_call_memory_ctx, regexp_matches_ctx::next_match, regexp_matches_ctx::nmatches, regexp_matches_ctx::npatterns, regexp_matches_ctx::nulls, palloc(), PG_GET_COLLATION, PG_GETARG_TEXT_P_COPY, PG_GETARG_TEXT_PP, PG_GETARG_TEXT_PP_IF_EXISTS, PointerGetDatum, setup_regexp_matches(), SRF_FIRSTCALL_INIT, SRF_IS_FIRSTCALL, SRF_PERCALL_SETUP, SRF_RETURN_DONE, SRF_RETURN_NEXT, and FuncCallContext::user_fctx.

Referenced by regexp_matches_no_flags().

{
    FuncCallContext *funcctx;
    regexp_matches_ctx *matchctx;

    if (SRF_IS_FIRSTCALL())
    {
        text       *pattern = PG_GETARG_TEXT_PP(1);
        text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
        MemoryContext oldcontext;

        funcctx = SRF_FIRSTCALL_INIT();
        oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

        /* be sure to copy the input string into the multi-call ctx */
        matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
                                        flags,
                                        PG_GET_COLLATION(),
                                        false, true, false);

        /* Pre-create workspace that build_regexp_matches_result needs */
        matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns);
        matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns);

        MemoryContextSwitchTo(oldcontext);
        funcctx->user_fctx = (void *) matchctx;
    }

    funcctx = SRF_PERCALL_SETUP();
    matchctx = (regexp_matches_ctx *) funcctx->user_fctx;

    if (matchctx->next_match < matchctx->nmatches)
    {
        ArrayType  *result_ary;

        result_ary = build_regexp_matches_result(matchctx);
        matchctx->next_match++;
        SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
    }

    /* release space in multi-call ctx to avoid intraquery memory leak */
    cleanup_regexp_matches(matchctx);

    SRF_RETURN_DONE(funcctx);
}

Datum regexp_matches_no_flags ( PG_FUNCTION_ARGS   ) 

Definition at line 831 of file regexp.c.

References regexp_matches().

{
    return regexp_matches(fcinfo);
}

Datum regexp_split_to_array ( PG_FUNCTION_ARGS   ) 

Definition at line 1098 of file regexp.c.

References accumArrayResult(), build_regexp_split_result(), CurrentMemoryContext, makeArrayResult(), regexp_matches_ctx::next_match, regexp_matches_ctx::nmatches, PG_GET_COLLATION, PG_GETARG_TEXT_PP, PG_GETARG_TEXT_PP_IF_EXISTS, PG_RETURN_ARRAYTYPE_P, setup_regexp_matches(), and TEXTOID.

Referenced by regexp_split_to_array_no_flags().

{
    ArrayBuildState *astate = NULL;
    regexp_matches_ctx *splitctx;

    splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
                                    PG_GETARG_TEXT_PP(1),
                                    PG_GETARG_TEXT_PP_IF_EXISTS(2),
                                    PG_GET_COLLATION(),
                                    true, false, true);

    while (splitctx->next_match <= splitctx->nmatches)
    {
        astate = accumArrayResult(astate,
                                  build_regexp_split_result(splitctx),
                                  false,
                                  TEXTOID,
                                  CurrentMemoryContext);
        splitctx->next_match++;
    }

    /*
     * We don't call cleanup_regexp_matches here; it would try to pfree the
     * input string, which we didn't copy.  The space is not in a long-lived
     * memory context anyway.
     */

    PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
}

Datum regexp_split_to_array_no_flags ( PG_FUNCTION_ARGS   ) 

Definition at line 1130 of file regexp.c.

References regexp_split_to_array().

{
    return regexp_split_to_array(fcinfo);
}

Datum regexp_split_to_table ( PG_FUNCTION_ARGS   ) 

Definition at line 1044 of file regexp.c.

References build_regexp_split_result(), cleanup_regexp_matches(), MemoryContextSwitchTo(), FuncCallContext::multi_call_memory_ctx, regexp_matches_ctx::next_match, regexp_matches_ctx::nmatches, PG_GET_COLLATION, PG_GETARG_TEXT_P_COPY, PG_GETARG_TEXT_PP, PG_GETARG_TEXT_PP_IF_EXISTS, setup_regexp_matches(), SRF_FIRSTCALL_INIT, SRF_IS_FIRSTCALL, SRF_PERCALL_SETUP, SRF_RETURN_DONE, SRF_RETURN_NEXT, and FuncCallContext::user_fctx.

Referenced by regexp_split_to_table_no_flags().

{
    FuncCallContext *funcctx;
    regexp_matches_ctx *splitctx;

    if (SRF_IS_FIRSTCALL())
    {
        text       *pattern = PG_GETARG_TEXT_PP(1);
        text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
        MemoryContext oldcontext;

        funcctx = SRF_FIRSTCALL_INIT();
        oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

        /* be sure to copy the input string into the multi-call ctx */
        splitctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
                                        flags,
                                        PG_GET_COLLATION(),
                                        true, false, true);

        MemoryContextSwitchTo(oldcontext);
        funcctx->user_fctx = (void *) splitctx;
    }

    funcctx = SRF_PERCALL_SETUP();
    splitctx = (regexp_matches_ctx *) funcctx->user_fctx;

    if (splitctx->next_match <= splitctx->nmatches)
    {
        Datum       result = build_regexp_split_result(splitctx);

        splitctx->next_match++;
        SRF_RETURN_NEXT(funcctx, result);
    }

    /* release space in multi-call ctx to avoid intraquery memory leak */
    cleanup_regexp_matches(splitctx);

    SRF_RETURN_DONE(funcctx);
}

Datum regexp_split_to_table_no_flags ( PG_FUNCTION_ARGS   ) 

Definition at line 1087 of file regexp.c.

References regexp_split_to_table().

{
    return regexp_split_to_table(fcinfo);
}

static regexp_matches_ctx * setup_regexp_matches ( text orig_str,
text pattern,
text flags,
Oid  collation,
bool  force_glob,
bool  use_subpatterns,
bool  ignore_degenerate 
) [static]

Definition at line 849 of file regexp.c.

References pg_re_flags::cflags, ereport, errcode(), errmsg(), ERROR, pg_re_flags::glob, i, regexp_matches_ctx::match_locs, regexp_matches_ctx::nmatches, regexp_matches_ctx::npatterns, regexp_matches_ctx::orig_str, palloc(), palloc0(), parse_re_flags(), pfree(), pg_mb2wchar_with_len(), RE_compile_and_cache(), regex_t::re_nsub, RE_wchar_execute(), repalloc(), regmatch_t::rm_eo, regmatch_t::rm_so, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by regexp_matches(), regexp_split_to_array(), and regexp_split_to_table().

{
    regexp_matches_ctx *matchctx = palloc0(sizeof(regexp_matches_ctx));
    int         orig_len;
    pg_wchar   *wide_str;
    int         wide_len;
    pg_re_flags re_flags;
    regex_t    *cpattern;
    regmatch_t *pmatch;
    int         pmatch_len;
    int         array_len;
    int         array_idx;
    int         prev_match_end;
    int         start_search;

    /* save original string --- we'll extract result substrings from it */
    matchctx->orig_str = orig_str;

    /* convert string to pg_wchar form for matching */
    orig_len = VARSIZE_ANY_EXHDR(orig_str);
    wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
    wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);

    /* determine options */
    parse_re_flags(&re_flags, flags);
    if (force_glob)
    {
        /* user mustn't specify 'g' for regexp_split */
        if (re_flags.glob)
            ereport(ERROR,
                    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                 errmsg("regexp_split does not support the global option")));
        /* but we find all the matches anyway */
        re_flags.glob = true;
    }

    /* set up the compiled pattern */
    cpattern = RE_compile_and_cache(pattern, re_flags.cflags, collation);

    /* do we want to remember subpatterns? */
    if (use_subpatterns && cpattern->re_nsub > 0)
    {
        matchctx->npatterns = cpattern->re_nsub;
        pmatch_len = cpattern->re_nsub + 1;
    }
    else
    {
        use_subpatterns = false;
        matchctx->npatterns = 1;
        pmatch_len = 1;
    }

    /* temporary output space for RE package */
    pmatch = palloc(sizeof(regmatch_t) * pmatch_len);

    /* the real output space (grown dynamically if needed) */
    array_len = re_flags.glob ? 256 : 32;
    matchctx->match_locs = (int *) palloc(sizeof(int) * array_len);
    array_idx = 0;

    /* search for the pattern, perhaps repeatedly */
    prev_match_end = 0;
    start_search = 0;
    while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
                            pmatch_len, pmatch))
    {
        /*
         * If requested, ignore degenerate matches, which are zero-length
         * matches occurring at the start or end of a string or just after a
         * previous match.
         */
        if (!ignore_degenerate ||
            (pmatch[0].rm_so < wide_len &&
             pmatch[0].rm_eo > prev_match_end))
        {
            /* enlarge output space if needed */
            while (array_idx + matchctx->npatterns * 2 > array_len)
            {
                array_len *= 2;
                matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
                                                    sizeof(int) * array_len);
            }

            /* save this match's locations */
            if (use_subpatterns)
            {
                int         i;

                for (i = 1; i <= matchctx->npatterns; i++)
                {
                    matchctx->match_locs[array_idx++] = pmatch[i].rm_so;
                    matchctx->match_locs[array_idx++] = pmatch[i].rm_eo;
                }
            }
            else
            {
                matchctx->match_locs[array_idx++] = pmatch[0].rm_so;
                matchctx->match_locs[array_idx++] = pmatch[0].rm_eo;
            }
            matchctx->nmatches++;
        }
        prev_match_end = pmatch[0].rm_eo;

        /* if not glob, stop after one match */
        if (!re_flags.glob)
            break;

        /*
         * Advance search position.  Normally we start just after the end of
         * the previous match, but always advance at least one character (the
         * special case can occur if the pattern matches zero characters just
         * after the prior match or at the end of the string).
         */
        if (start_search < pmatch[0].rm_eo)
            start_search = pmatch[0].rm_eo;
        else
            start_search++;
        if (start_search > wide_len)
            break;
    }

    /* Clean up temp storage */
    pfree(wide_str);
    pfree(pmatch);

    return matchctx;
}

Datum similar_escape ( PG_FUNCTION_ARGS   ) 

Definition at line 647 of file regexp.c.

References ereport, errcode(), errhint(), errmsg(), ERROR, palloc(), PG_ARGISNULL, PG_GETARG_TEXT_PP, PG_RETURN_NULL, PG_RETURN_TEXT_P, SET_VARSIZE, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY_EXHDR.

{
    text       *pat_text;
    text       *esc_text;
    text       *result;
    char       *p,
               *e,
               *r;
    int         plen,
                elen;
    bool        afterescape = false;
    bool        incharclass = false;
    int         nquotes = 0;

    /* This function is not strict, so must test explicitly */
    if (PG_ARGISNULL(0))
        PG_RETURN_NULL();
    pat_text = PG_GETARG_TEXT_PP(0);
    p = VARDATA_ANY(pat_text);
    plen = VARSIZE_ANY_EXHDR(pat_text);
    if (PG_ARGISNULL(1))
    {
        /* No ESCAPE clause provided; default to backslash as escape */
        e = "\\";
        elen = 1;
    }
    else
    {
        esc_text = PG_GETARG_TEXT_PP(1);
        e = VARDATA_ANY(esc_text);
        elen = VARSIZE_ANY_EXHDR(esc_text);
        if (elen == 0)
            e = NULL;           /* no escape character */
        else if (elen != 1)
            ereport(ERROR,
                    (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
                     errmsg("invalid escape string"),
                  errhint("Escape string must be empty or one character.")));
    }

    /*----------
     * We surround the transformed input string with
     *          ^(?: ... )$
     * which requires some explanation.  We need "^" and "$" to force
     * the pattern to match the entire input string as per SQL99 spec.
     * The "(?:" and ")" are a non-capturing set of parens; we have to have
     * parens in case the string contains "|", else the "^" and "$" will
     * be bound into the first and last alternatives which is not what we
     * want, and the parens must be non capturing because we don't want them
     * to count when selecting output for SUBSTRING.
     *----------
     */

    /*
     * We need room for the prefix/postfix plus as many as 3 output bytes per
     * input byte; since the input is at most 1GB this can't overflow
     */
    result = (text *) palloc(VARHDRSZ + 6 + 3 * plen);
    r = VARDATA(result);

    *r++ = '^';
    *r++ = '(';
    *r++ = '?';
    *r++ = ':';

    while (plen > 0)
    {
        char        pchar = *p;

        if (afterescape)
        {
            if (pchar == '"' && !incharclass)   /* for SUBSTRING patterns */
                *r++ = ((nquotes++ % 2) == 0) ? '(' : ')';
            else
            {
                *r++ = '\\';
                *r++ = pchar;
            }
            afterescape = false;
        }
        else if (e && pchar == *e)
        {
            /* SQL99 escape character; do not send to output */
            afterescape = true;
        }
        else if (incharclass)
        {
            if (pchar == '\\')
                *r++ = '\\';
            *r++ = pchar;
            if (pchar == ']')
                incharclass = false;
        }
        else if (pchar == '[')
        {
            *r++ = pchar;
            incharclass = true;
        }
        else if (pchar == '%')
        {
            *r++ = '.';
            *r++ = '*';
        }
        else if (pchar == '_')
            *r++ = '.';
        else if (pchar == '(')
        {
            /* convert to non-capturing parenthesis */
            *r++ = '(';
            *r++ = '?';
            *r++ = ':';
        }
        else if (pchar == '\\' || pchar == '.' ||
                 pchar == '^' || pchar == '$')
        {
            *r++ = '\\';
            *r++ = pchar;
        }
        else
            *r++ = pchar;
        p++, plen--;
    }

    *r++ = ')';
    *r++ = '$';

    SET_VARSIZE(result, r - ((char *) result));

    PG_RETURN_TEXT_P(result);
}

Datum texticregexeq ( PG_FUNCTION_ARGS   ) 
Datum texticregexne ( PG_FUNCTION_ARGS   ) 
Datum textregexeq ( PG_FUNCTION_ARGS   ) 
Datum textregexne ( PG_FUNCTION_ARGS   ) 
Datum textregexreplace ( PG_FUNCTION_ARGS   ) 
Datum textregexreplace_noopt ( PG_FUNCTION_ARGS   ) 
Datum textregexsubstr ( PG_FUNCTION_ARGS   ) 

Definition at line 549 of file regexp.c.

References DirectFunctionCall3, Int32GetDatum, PG_GET_COLLATION, PG_GETARG_TEXT_PP, PG_RETURN_NULL, PointerGetDatum, RE_compile_and_cache(), RE_execute(), regex_t::re_nsub, REG_ADVANCED, regmatch_t::rm_eo, regmatch_t::rm_so, text_substr(), VARDATA_ANY, and VARSIZE_ANY_EXHDR.

{
    text       *s = PG_GETARG_TEXT_PP(0);
    text       *p = PG_GETARG_TEXT_PP(1);
    regex_t    *re;
    regmatch_t  pmatch[2];
    int         so,
                eo;

    /* Compile RE */
    re = RE_compile_and_cache(p, REG_ADVANCED, PG_GET_COLLATION());

    /*
     * We pass two regmatch_t structs to get info about the overall match and
     * the match for the first parenthesized subexpression (if any). If there
     * is a parenthesized subexpression, we return what it matched; else
     * return what the whole regexp matched.
     */
    if (!RE_execute(re,
                    VARDATA_ANY(s), VARSIZE_ANY_EXHDR(s),
                    2, pmatch))
        PG_RETURN_NULL();       /* definitely no match */

    if (re->re_nsub > 0)
    {
        /* has parenthesized subexpressions, use the first one */
        so = pmatch[1].rm_so;
        eo = pmatch[1].rm_eo;
    }
    else
    {
        /* no parenthesized subexpression, use whole match */
        so = pmatch[0].rm_so;
        eo = pmatch[0].rm_eo;
    }

    /*
     * It is possible to have a match to the whole pattern but no match for a
     * subexpression; for example 'foo(bar)?' is considered to match 'foo' but
     * there is no subexpression match.  So this extra test for match failure
     * is not redundant.
     */
    if (so < 0 || eo < 0)
        PG_RETURN_NULL();

    return DirectFunctionCall3(text_substr,
                               PointerGetDatum(s),
                               Int32GetDatum(so + 1),
                               Int32GetDatum(eo - so));
}


Variable Documentation

int num_res = 0 [static]

Definition at line 103 of file regexp.c.

Referenced by RE_compile_and_cache().

cached_re_str re_array[MAX_CACHED_RES] [static]

Definition at line 104 of file regexp.c.