Header And Logo

PostgreSQL
| The world's most advanced open source database.

Data Structures | Defines | Typedefs | Functions

trgm_regexp.c File Reference

#include "postgres.h"
#include "trgm.h"
#include "regex/regexport.h"
#include "tsearch/ts_locale.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
Include dependency graph for trgm_regexp.c:

Go to the source code of this file.

Data Structures

struct  trgm_mb_char
struct  TrgmColorInfo
struct  TrgmPrefix
struct  ColorTrgm
struct  TrgmStateKey
struct  TrgmState
struct  TrgmArc
struct  TrgmArcInfo
struct  ColorTrgmInfo
struct  TrgmNFA
struct  TrgmPackedArc
struct  TrgmPackedState
struct  TrgmPackedGraph
struct  TrgmPackArcInfo

Defines

#define MAX_EXPANDED_STATES   128
#define MAX_EXPANDED_ARCS   1024
#define MAX_TRGM_COUNT   256
#define COLOR_COUNT_LIMIT   256
#define COLOR_UNKNOWN   (-1)
#define COLOR_BLANK   (-2)

Typedefs

typedef int TrgmColor
typedef struct TrgmState TrgmState

Functions

static TRGMcreateTrgmNFAInternal (regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
static void RE_compile (regex_t *regex, text *text_re, int cflags, Oid collation)
static void getColorInfo (regex_t *regex, TrgmNFA *trgmNFA)
static bool convertPgWchar (pg_wchar c, trgm_mb_char *result)
static void transformGraph (TrgmNFA *trgmNFA)
static void processState (TrgmNFA *trgmNFA, TrgmState *state)
static void addKey (TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
static void addKeyToQueue (TrgmNFA *trgmNFA, TrgmStateKey *key)
static void addArcs (TrgmNFA *trgmNFA, TrgmState *state)
static void addArc (TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
static bool validArcLabel (TrgmStateKey *key, TrgmColor co)
static TrgmStategetState (TrgmNFA *trgmNFA, TrgmStateKey *key)
static bool prefixContains (TrgmPrefix *prefix1, TrgmPrefix *prefix2)
static bool selectColorTrigrams (TrgmNFA *trgmNFA)
static TRGMexpandColorTrigrams (TrgmNFA *trgmNFA, MemoryContext rcontext)
static void fillTrgm (trgm *ptrgm, trgm_mb_char s[3])
static void mergeStates (TrgmState *state1, TrgmState *state2)
static int colorTrgmInfoCmp (const void *p1, const void *p2)
static int colorTrgmInfoCountCmp (const void *p1, const void *p2)
static TrgmPackedGraphpackGraph (TrgmNFA *trgmNFA, MemoryContext rcontext)
static int packArcInfoCmp (const void *a1, const void *a2)
TRGMcreateTrgmNFA (text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
bool trigramsMatchGraph (TrgmPackedGraph *graph, bool *check)

Define Documentation

#define COLOR_BLANK   (-2)
#define COLOR_COUNT_LIMIT   256

Definition at line 207 of file trgm_regexp.c.

Referenced by getColorInfo().

#define COLOR_UNKNOWN   (-1)

Definition at line 255 of file trgm_regexp.c.

Referenced by prefixContains(), and validArcLabel().

#define MAX_EXPANDED_ARCS   1024

Definition at line 205 of file trgm_regexp.c.

Referenced by transformGraph().

#define MAX_EXPANDED_STATES   128

Definition at line 204 of file trgm_regexp.c.

Referenced by transformGraph().

#define MAX_TRGM_COUNT   256

Definition at line 206 of file trgm_regexp.c.

Referenced by selectColorTrigrams().


Typedef Documentation

typedef int TrgmColor

Definition at line 252 of file trgm_regexp.c.

typedef struct TrgmState TrgmState

Function Documentation

static void addArc ( TrgmNFA trgmNFA,
TrgmState state,
TrgmStateKey key,
TrgmColor  co,
TrgmStateKey destKey 
) [static]

Definition at line 1250 of file trgm_regexp.c.

References TrgmState::arcs, TrgmNFA::arcsCount, TrgmPrefix::colors, ColorTrgm::colors, TrgmArc::ctrgm, TrgmState::enterKeys, lappend(), lfirst, TrgmStateKey::nstate, palloc(), TrgmStateKey::prefix, prefixContains(), TrgmArc::target, and validArcLabel().

Referenced by addArcs().

{
    TrgmArc    *arc;
    ListCell   *cell;

    /* Do nothing if this wouldn't be a valid arc label trigram */
    if (!validArcLabel(key, co))
        return;

    /*
     * Check if we are going to reach key which is covered by a key which is
     * already listed in this state.  If so arc is useless: the NFA can bypass
     * it through a path that doesn't require any predictable trigram, so
     * whether the arc's trigram is present or not doesn't really matter.
     */
    foreach(cell, state->enterKeys)
    {
        TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);

        if (existingKey->nstate == destKey->nstate &&
            prefixContains(&existingKey->prefix, &destKey->prefix))
            return;
    }

    /* Checks were successful, add new arc */
    arc = (TrgmArc *) palloc(sizeof(TrgmArc));
    arc->target = getState(trgmNFA, destKey);
    arc->ctrgm.colors[0] = key->prefix.colors[0];
    arc->ctrgm.colors[1] = key->prefix.colors[1];
    arc->ctrgm.colors[2] = co;

    state->arcs = lappend(state->arcs, arc);
    trgmNFA->arcsCount++;
}

static void addArcs ( TrgmNFA trgmNFA,
TrgmState state 
) [static]

Definition at line 1158 of file trgm_regexp.c.

References addArc(), regex_arc_t::co, COLOR_BLANK, TrgmNFA::colorInfo, TrgmPrefix::colors, TrgmColorInfo::containsNonWord, TrgmState::enterKeys, TrgmColorInfo::expandable, i, lfirst, MemSet, TrgmStateKey::nstate, palloc(), pfree(), pg_reg_getnumoutarcs(), pg_reg_getoutarcs(), TrgmStateKey::prefix, TrgmNFA::regex, regex_arc_t::to, and TrgmColorInfo::wordCharsCount.

Referenced by processState().

{
    TrgmStateKey destKey;
    ListCell   *cell;
    regex_arc_t *arcs;
    int         arcsCount,
                i;

    /*
     * Ensure any pad bytes in destKey are zero, since it may get used as a
     * hashtable key by getState.
     */
    MemSet(&destKey, 0, sizeof(destKey));

    /*
     * Iterate over enter keys associated with this expanded-graph state. This
     * includes both the state's own stateKey, and any enter keys we added to
     * it during addKey (which represent expanded-graph states that are not
     * distinguishable from this one by means of trigrams).  For each such
     * enter key, examine all the out-arcs of the key's underlying NFA state,
     * and try to make a trigram arc leading to where the out-arc leads.
     * (addArc will deal with whether the arc is valid or not.)
     */
    foreach(cell, state->enterKeys)
    {
        TrgmStateKey *key = (TrgmStateKey *) lfirst(cell);

        arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
        arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
        pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);

        for (i = 0; i < arcsCount; i++)
        {
            regex_arc_t *arc = &arcs[i];
            TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];

            /*
             * Ignore non-expandable colors; addKey already handled the case.
             *
             * We need no special check for begin/end pseudocolors here.  We
             * don't need to do any processing for them, and they will be
             * marked non-expandable since the regex engine will have reported
             * them that way.
             */
            if (!colorInfo->expandable)
                continue;

            if (colorInfo->containsNonWord)
            {
                /*
                 * Color includes non-word character(s).
                 *
                 * Generate an arc, treating this transition as occurring on
                 * BLANK.  This allows word-ending trigrams to be manufactured
                 * if possible.
                 */
                destKey.prefix.colors[0] = key->prefix.colors[1];
                destKey.prefix.colors[1] = COLOR_BLANK;
                destKey.nstate = arc->to;

                addArc(trgmNFA, state, key, COLOR_BLANK, &destKey);
            }

            if (colorInfo->wordCharsCount > 0)
            {
                /*
                 * Color includes word character(s).
                 *
                 * Generate an arc.  Color is pushed into prefix of target
                 * state.
                 */
                destKey.prefix.colors[0] = key->prefix.colors[1];
                destKey.prefix.colors[1] = arc->co;
                destKey.nstate = arc->to;

                addArc(trgmNFA, state, key, arc->co, &destKey);
            }
        }

        pfree(arcs);
    }
}

static void addKey ( TrgmNFA trgmNFA,
TrgmState state,
TrgmStateKey key 
) [static]

Definition at line 975 of file trgm_regexp.c.

References addKeyToQueue(), regex_arc_t::co, COLOR_BLANK, TrgmNFA::colorInfo, TrgmPrefix::colors, TrgmColorInfo::containsNonWord, TrgmState::enterKeys, TrgmColorInfo::expandable, TrgmState::fin, i, lappend(), lfirst, list_delete_cell(), list_head(), lnext, MemSet, TrgmStateKey::nstate, palloc(), pfree(), pg_reg_colorisbegin(), pg_reg_colorisend(), pg_reg_getfinalstate(), pg_reg_getnumoutarcs(), pg_reg_getoutarcs(), TrgmStateKey::prefix, prefixContains(), TrgmNFA::regex, regex_arc_t::to, validArcLabel(), and TrgmColorInfo::wordCharsCount.

Referenced by processState().

{
    regex_arc_t *arcs;
    TrgmStateKey destKey;
    ListCell   *cell,
               *prev,
               *next;
    int         i,
                arcsCount;

    /*
     * Ensure any pad bytes in destKey are zero, since it may get used as a
     * hashtable key by getState.
     */
    MemSet(&destKey, 0, sizeof(destKey));

    /*
     * Compare key to each existing enter key of the state to check for
     * redundancy.  We can drop either old key(s) or the new key if we find
     * redundancy.
     */
    prev = NULL;
    cell = list_head(state->enterKeys);
    while (cell)
    {
        TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);

        next = lnext(cell);
        if (existingKey->nstate == key->nstate)
        {
            if (prefixContains(&existingKey->prefix, &key->prefix))
            {
                /* This old key already covers the new key. Nothing to do */
                return;
            }
            if (prefixContains(&key->prefix, &existingKey->prefix))
            {
                /*
                 * The new key covers this old key. Remove the old key, it's
                 * no longer needed once we add this key to the list.
                 */
                state->enterKeys = list_delete_cell(state->enterKeys,
                                                    cell, prev);
            }
            else
                prev = cell;
        }
        else
            prev = cell;
        cell = next;
    }

    /* No redundancy, so add this key to the state's list */
    state->enterKeys = lappend(state->enterKeys, key);

    /* If state is now known final, mark it and we're done */
    if (key->nstate == pg_reg_getfinalstate(trgmNFA->regex))
    {
        state->fin = true;
        return;
    }

    /*
     * Loop through all outgoing arcs of the corresponding state in the
     * original NFA.
     */
    arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
    arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
    pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);

    for (i = 0; i < arcsCount; i++)
    {
        regex_arc_t *arc = &arcs[i];

        if (pg_reg_colorisbegin(trgmNFA->regex, arc->co))
        {
            /*
             * Start of line/string (^).  Trigram extraction treats start of
             * line same as start of word: double space prefix is added.
             * Hence, make an enter key showing we can reach the arc
             * destination with all-blank prefix.
             */
            destKey.prefix.colors[0] = COLOR_BLANK;
            destKey.prefix.colors[1] = COLOR_BLANK;
            destKey.nstate = arc->to;

            /* Add enter key to this state */
            addKeyToQueue(trgmNFA, &destKey);
        }
        else if (pg_reg_colorisend(trgmNFA->regex, arc->co))
        {
            /*
             * End of line/string ($).  We must consider this arc as a
             * transition that doesn't read anything.  The reason for adding
             * this enter key to the state is that if the arc leads to the
             * NFA's final state, we must mark this expanded state as final.
             */
            destKey.prefix.colors[0] = COLOR_UNKNOWN;
            destKey.prefix.colors[1] = COLOR_UNKNOWN;
            destKey.nstate = arc->to;

            /* Add enter key to this state */
            addKeyToQueue(trgmNFA, &destKey);
        }
        else
        {
            /* Regular color */
            TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];

            if (colorInfo->expandable)
            {
                if (colorInfo->containsNonWord &&
                    !validArcLabel(key, COLOR_BLANK))
                {
                    /*
                     * We can reach the arc destination after reading a
                     * non-word character, but the prefix is not something
                     * that addArc will accept with COLOR_BLANK, so no trigram
                     * arc can get made for this transition.  We must make an
                     * enter key to show that the arc destination is
                     * reachable.  Set it up with an all-blank prefix, since
                     * that corresponds to what the trigram extraction code
                     * will do at a word starting boundary.
                     */
                    destKey.prefix.colors[0] = COLOR_BLANK;
                    destKey.prefix.colors[1] = COLOR_BLANK;
                    destKey.nstate = arc->to;
                    addKeyToQueue(trgmNFA, &destKey);
                }

                if (colorInfo->wordCharsCount > 0 &&
                    !validArcLabel(key, arc->co))
                {
                    /*
                     * We can reach the arc destination after reading a word
                     * character, but the prefix is not something that addArc
                     * will accept, so no trigram arc can get made for this
                     * transition.  We must make an enter key to show that the
                     * arc destination is reachable.  The prefix for the enter
                     * key should reflect the info we have for this arc.
                     */
                    destKey.prefix.colors[0] = key->prefix.colors[1];
                    destKey.prefix.colors[1] = arc->co;
                    destKey.nstate = arc->to;
                    addKeyToQueue(trgmNFA, &destKey);
                }
            }
            else
            {
                /*
                 * Unexpandable color.  Add enter key with ambiguous prefix,
                 * showing we can reach the destination from this state, but
                 * the preceding colors will be uncertain.  (We do not set the
                 * first prefix color to key->prefix.colors[1], because a
                 * prefix of known followed by unknown is invalid.)
                 */
                destKey.prefix.colors[0] = COLOR_UNKNOWN;
                destKey.prefix.colors[1] = COLOR_UNKNOWN;
                destKey.nstate = arc->to;
                addKeyToQueue(trgmNFA, &destKey);
            }
        }
    }

    pfree(arcs);
}

static void addKeyToQueue ( TrgmNFA trgmNFA,
TrgmStateKey key 
) [static]

Definition at line 1146 of file trgm_regexp.c.

References TrgmNFA::keysQueue, lappend(), and palloc().

Referenced by addKey().

{
    TrgmStateKey *keyCopy = (TrgmStateKey *) palloc(sizeof(TrgmStateKey));

    memcpy(keyCopy, key, sizeof(TrgmStateKey));
    trgmNFA->keysQueue = lappend(trgmNFA->keysQueue, keyCopy);
}

static int colorTrgmInfoCmp ( const void *  p1,
const void *  p2 
) [static]

Definition at line 1739 of file trgm_regexp.c.

References ColorTrgmInfo::ctrgm, and memcmp().

Referenced by selectColorTrigrams().

{
    const ColorTrgmInfo *c1 = (const ColorTrgmInfo *) p1;
    const ColorTrgmInfo *c2 = (const ColorTrgmInfo *) p2;

    return memcmp(&c1->ctrgm, &c2->ctrgm, sizeof(ColorTrgm));
}

static int colorTrgmInfoCountCmp ( const void *  p1,
const void *  p2 
) [static]

Definition at line 1752 of file trgm_regexp.c.

References ColorTrgmInfo::count.

Referenced by selectColorTrigrams().

{
    const ColorTrgmInfo *c1 = (const ColorTrgmInfo *) p1;
    const ColorTrgmInfo *c2 = (const ColorTrgmInfo *) p2;

    if (c1->count < c2->count)
        return 1;
    else if (c1->count == c2->count)
        return 0;
    else
        return -1;
}

static bool convertPgWchar ( pg_wchar  c,
trgm_mb_char result 
) [static]

Definition at line 804 of file trgm_regexp.c.

References trgm_mb_char::bytes, lowerstr(), MAX_MULTIBYTE_CHAR_LEN, pfree(), and pg_wchar2mb_with_len().

Referenced by getColorInfo().

{
    /* "s" has enough space for a multibyte character and a trailing NUL */
    char        s[MAX_MULTIBYTE_CHAR_LEN + 1];

    /*
     * We can ignore the NUL character, since it can never appear in a PG text
     * string.  This avoids the need for various special cases when
     * reconstructing trigrams.
     */
    if (c == 0)
        return false;

    /* Do the conversion, making sure the result is NUL-terminated */
    memset(s, 0, sizeof(s));
    pg_wchar2mb_with_len(&c, s, 1);

    /*
     * In IGNORECASE mode, we can ignore uppercase characters.  We assume that
     * the regex engine generated both uppercase and lowercase equivalents
     * within each color, since we used the REG_ICASE option; so there's no
     * need to process the uppercase version.
     *
     * XXX this code is dependent on the assumption that lowerstr() works the
     * same as the regex engine's internal case folding machinery.  Might be
     * wiser to expose pg_wc_tolower and test whether c == pg_wc_tolower(c).
     * On the other hand, the trigrams in the index were created using
     * lowerstr(), so we're probably screwed if there's any incompatibility
     * anyway.
     */
#ifdef IGNORECASE
    {
        char       *lowerCased = lowerstr(s);

        if (strcmp(lowerCased, s) != 0)
        {
            pfree(lowerCased);
            return false;
        }
        pfree(lowerCased);
    }
#endif

    /* Fill result with exactly MAX_MULTIBYTE_CHAR_LEN bytes */
    strncpy(result->bytes, s, MAX_MULTIBYTE_CHAR_LEN);
    return true;
}

TRGM* createTrgmNFA ( text text_re,
Oid  collation,
TrgmPackedGraph **  graph,
MemoryContext  rcontext 
)

Definition at line 484 of file trgm_regexp.c.

References ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE, ALLOCSET_DEFAULT_MINSIZE, AllocSetContextCreate(), createTrgmNFAInternal(), CurrentMemoryContext, MemoryContextDelete(), MemoryContextSwitchTo(), PG_CATCH, PG_END_TRY, PG_RE_THROW, pg_regfree(), PG_TRY, RE_compile(), REG_ADVANCED, and REG_ICASE.

Referenced by gin_extract_query_trgm(), and gtrgm_consistent().

{
    TRGM       *trg;
    regex_t     regex;
    MemoryContext tmpcontext;
    MemoryContext oldcontext;

    /*
     * This processing generates a great deal of cruft, which we'd like to
     * clean up before returning (since this function may be called in a
     * query-lifespan memory context).  Make a temp context we can work in so
     * that cleanup is easy.
     */
    tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
                                       "createTrgmNFA temporary context",
                                       ALLOCSET_DEFAULT_MINSIZE,
                                       ALLOCSET_DEFAULT_INITSIZE,
                                       ALLOCSET_DEFAULT_MAXSIZE);
    oldcontext = MemoryContextSwitchTo(tmpcontext);

    /*
     * Stage 1: Compile the regexp into a NFA, using the regexp library.
     */
#ifdef IGNORECASE
    RE_compile(&regex, text_re, REG_ADVANCED | REG_ICASE, collation);
#else
    RE_compile(&regex, text_re, REG_ADVANCED, collation);
#endif

    /*
     * Since the regexp library allocates its internal data structures with
     * malloc, we need to use a PG_TRY block to ensure that pg_regfree() gets
     * done even if there's an error.
     */
    PG_TRY();
    {
        trg = createTrgmNFAInternal(&regex, graph, rcontext);
    }
    PG_CATCH();
    {
        pg_regfree(&regex);
        PG_RE_THROW();
    }
    PG_END_TRY();

    pg_regfree(&regex);

    /* Clean up all the cruft we created */
    MemoryContextSwitchTo(oldcontext);
    MemoryContextDelete(tmpcontext);

    return trg;
}

static TRGM * createTrgmNFAInternal ( regex_t regex,
TrgmPackedGraph **  graph,
MemoryContext  rcontext 
) [static]

Definition at line 543 of file trgm_regexp.c.

References TrgmNFA::colorInfo, expandColorTrigrams(), TrgmState::fin, getColorInfo(), TrgmNFA::initState, TrgmNFA::ncolors, packGraph(), TrgmNFA::regex, selectColorTrigrams(), and transformGraph().

Referenced by createTrgmNFA().

{
    TRGM       *trg;
    TrgmNFA     trgmNFA;

    trgmNFA.regex = regex;

    /* Collect color information from the regex */
    getColorInfo(regex, &trgmNFA);

#ifdef TRGM_REGEXP_DEBUG
    printSourceNFA(regex, trgmNFA.colorInfo, trgmNFA.ncolors);
#endif

    /*
     * Stage 2: Create an expanded graph from the source NFA.
     */
    transformGraph(&trgmNFA);

#ifdef TRGM_REGEXP_DEBUG
    printTrgmNFA(&trgmNFA);
#endif

    /*
     * Fail if we were unable to make a nontrivial graph, ie it is possible to
     * get from the initial state to the final state without reading any
     * predictable trigram.
     */
    if (trgmNFA.initState->fin)
        return NULL;

    /*
     * Stage 3: Select color trigrams to expand.  Fail if too many trigrams.
     */
    if (!selectColorTrigrams(&trgmNFA))
        return NULL;

    /*
     * Stage 4: Expand color trigrams and pack graph into final
     * representation.
     */
    trg = expandColorTrigrams(&trgmNFA, rcontext);

    *graph = packGraph(&trgmNFA, rcontext);

#ifdef TRGM_REGEXP_DEBUG
    printTrgmPackedGraph(*graph, trg);
#endif

    return trg;
}

static TRGM * expandColorTrigrams ( TrgmNFA trgmNFA,
MemoryContext  rcontext 
) [static]

Definition at line 1610 of file trgm_regexp.c.

References ARRKEY, trgm_mb_char::bytes, CALCGTSIZE, COLOR_BLANK, TrgmNFA::colorInfo, ColorTrgm::colors, TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, ColorTrgmInfo::ctrgm, ColorTrgmInfo::expanded, fillTrgm(), TRGM::flag, GETARR, i, MemoryContextAllocZero(), SET_VARSIZE, TrgmNFA::totalTrgmCount, TRGMHDRSIZE, TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

{
    TRGM       *trg;
    trgm       *p;
    int         i;
    TrgmColorInfo blankColor;
    trgm_mb_char blankChar;

    /* Set up "blank" color structure containing a single zero character */
    memset(blankChar.bytes, 0, sizeof(blankChar.bytes));
    blankColor.wordCharsCount = 1;
    blankColor.wordChars = &blankChar;

    /* Construct the trgm array */
    trg = (TRGM *)
        MemoryContextAllocZero(rcontext,
                               TRGMHDRSIZE +
                               trgmNFA->totalTrgmCount * sizeof(trgm));
    trg->flag = ARRKEY;
    SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, trgmNFA->totalTrgmCount));
    p = GETARR(trg);
    for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
    {
        ColorTrgmInfo *colorTrgm = &trgmNFA->colorTrgms[i];
        TrgmColorInfo *c[3];
        trgm_mb_char s[3];
        int         j,
                    i1,
                    i2,
                    i3;

        /* Ignore any unexpanded trigrams ... */
        if (!colorTrgm->expanded)
            continue;

        /* Get colors, substituting the dummy struct for COLOR_BLANK */
        for (j = 0; j < 3; j++)
        {
            if (colorTrgm->ctrgm.colors[j] != COLOR_BLANK)
                c[j] = &trgmNFA->colorInfo[colorTrgm->ctrgm.colors[j]];
            else
                c[j] = &blankColor;
        }

        /* Iterate over all possible combinations of colors' characters */
        for (i1 = 0; i1 < c[0]->wordCharsCount; i1++)
        {
            s[0] = c[0]->wordChars[i1];
            for (i2 = 0; i2 < c[1]->wordCharsCount; i2++)
            {
                s[1] = c[1]->wordChars[i2];
                for (i3 = 0; i3 < c[2]->wordCharsCount; i3++)
                {
                    s[2] = c[2]->wordChars[i3];
                    fillTrgm(p, s);
                    p++;
                }
            }
        }
    }

    return trg;
}

static void fillTrgm ( trgm ptrgm,
trgm_mb_char  s[3] 
) [static]

Definition at line 1678 of file trgm_regexp.c.

References trgm_mb_char::bytes, compact_trigram(), i, and MAX_MULTIBYTE_CHAR_LEN.

Referenced by expandColorTrigrams().

{
    char        str[3 * MAX_MULTIBYTE_CHAR_LEN],
               *p;
    int         i,
                j;

    /* Write multibyte string into "str" (we don't need null termination) */
    p = str;

    for (i = 0; i < 3; i++)
    {
        if (s[i].bytes[0] != 0)
        {
            for (j = 0; j < MAX_MULTIBYTE_CHAR_LEN && s[i].bytes[j]; j++)
                *p++ = s[i].bytes[j];
        }
        else
        {
            /* Emit a space in place of COLOR_BLANK */
            *p++ = ' ';
        }
    }

    /* Convert "str" to a standard trigram (possibly hashing it) */
    compact_trigram(ptrgm, str, p - str);
}

static void getColorInfo ( regex_t regex,
TrgmNFA trgmNFA 
) [static]

Definition at line 741 of file trgm_regexp.c.

References trgm_mb_char::bytes, chars, COLOR_COUNT_LIMIT, TrgmNFA::colorInfo, TrgmColorInfo::containsNonWord, convertPgWchar(), TrgmColorInfo::expandable, i, ISWORDCHR, TrgmNFA::ncolors, palloc(), palloc0(), pfree(), pg_reg_getcharacters(), pg_reg_getnumcharacters(), pg_reg_getnumcolors(), TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

{
    int         colorsCount = pg_reg_getnumcolors(regex);
    int         i;

    trgmNFA->ncolors = colorsCount;
    trgmNFA->colorInfo = (TrgmColorInfo *)
        palloc0(colorsCount * sizeof(TrgmColorInfo));

    /*
     * Loop over colors, filling TrgmColorInfo about each.
     */
    for (i = 0; i < colorsCount; i++)
    {
        TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[i];
        int         charsCount = pg_reg_getnumcharacters(regex, i);
        pg_wchar   *chars;
        int         j;

        if (charsCount < 0 || charsCount > COLOR_COUNT_LIMIT)
        {
            /* Non expandable, or too large to work with */
            colorInfo->expandable = false;
            continue;
        }

        colorInfo->expandable = true;
        colorInfo->containsNonWord = false;
        colorInfo->wordChars = (trgm_mb_char *)
            palloc(sizeof(trgm_mb_char) * charsCount);
        colorInfo->wordCharsCount = 0;

        /* Extract all the chars in this color */
        chars = (pg_wchar *) palloc(sizeof(pg_wchar) * charsCount);
        pg_reg_getcharacters(regex, i, chars, charsCount);

        /*
         * Convert characters back to multibyte form, and save only those that
         * are word characters.  Set "containsNonWord" if any non-word
         * character.  (Note: it'd probably be nicer to keep the chars in
         * pg_wchar format for now, but ISWORDCHR wants to see multibyte.)
         */
        for (j = 0; j < charsCount; j++)
        {
            trgm_mb_char c;

            if (!convertPgWchar(chars[j], &c))
                continue;       /* ok to ignore it altogether */
            if (ISWORDCHR(c.bytes))
                colorInfo->wordChars[colorInfo->wordCharsCount++] = c;
            else
                colorInfo->containsNonWord = true;
        }

        pfree(chars);
    }
}

static TrgmState* getState ( TrgmNFA trgmNFA,
TrgmStateKey key 
) [static]

Definition at line 1347 of file trgm_regexp.c.

References TrgmState::arcs, TrgmState::children, TrgmState::enterKeys, TrgmState::fin, HASH_ENTER, hash_search(), TrgmState::init, lappend(), TrgmState::number, TrgmState::parent, TrgmNFA::queue, and TrgmNFA::states.

{
    TrgmState  *state;
    bool        found;

    state = (TrgmState *) hash_search(trgmNFA->states, key, HASH_ENTER,
                                      &found);
    if (!found)
    {
        /* New state: initialize and queue it */
        state->arcs = NIL;
        state->enterKeys = NIL;
        state->init = false;
        state->fin = false;
        state->parent = NULL;
        state->children = NIL;
        state->number = -1;

        trgmNFA->queue = lappend(trgmNFA->queue, state);
    }
    return state;
}

static void mergeStates ( TrgmState state1,
TrgmState state2 
) [static]

Definition at line 1710 of file trgm_regexp.c.

References Assert, TrgmState::children, TrgmState::fin, TrgmState::init, lappend(), lfirst, list_concat(), and TrgmState::parent.

Referenced by selectColorTrigrams().

{
    ListCell   *cell;

    Assert(state1 != state2);
    Assert(!state1->parent);
    Assert(!state2->parent);

    /* state1 absorbs state2's init/fin flags */
    state1->init |= state2->init;
    state1->fin |= state2->fin;

    /* state2, and all its children, become children of state1 */
    foreach(cell, state2->children)
    {
        TrgmState  *state = (TrgmState *) lfirst(cell);

        state->parent = state1;
    }
    state2->parent = state1;
    state1->children = list_concat(state1->children, state2->children);
    state1->children = lappend(state1->children, state2);
    state2->children = NIL;
}

static int packArcInfoCmp ( const void *  a1,
const void *  a2 
) [static]

Definition at line 1932 of file trgm_regexp.c.

References TrgmPackArcInfo::colorTrgm, TrgmPackArcInfo::sourceState, and TrgmPackArcInfo::targetState.

Referenced by packGraph().

{
    const TrgmPackArcInfo *p1 = (const TrgmPackArcInfo *) a1;
    const TrgmPackArcInfo *p2 = (const TrgmPackArcInfo *) a2;

    if (p1->sourceState < p2->sourceState)
        return -1;
    if (p1->sourceState > p2->sourceState)
        return 1;
    if (p1->colorTrgm < p2->colorTrgm)
        return -1;
    if (p1->colorTrgm > p2->colorTrgm)
        return 1;
    if (p1->targetState < p2->targetState)
        return -1;
    if (p1->targetState > p2->targetState)
        return 1;
    return 0;
}

static TrgmPackedGraph * packGraph ( TrgmNFA trgmNFA,
MemoryContext  rcontext 
) [static]

Definition at line 1777 of file trgm_regexp.c.

References TrgmPackedState::arcs, TrgmState::arcs, TrgmPackedState::arcsCount, TrgmNFA::arcsCount, Assert, TrgmPackedArc::colorTrgm, TrgmPackArcInfo::colorTrgm, TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, ColorTrgmInfo::count, TrgmArc::ctrgm, ColorTrgmInfo::expanded, TrgmState::fin, hash_seq_init(), hash_seq_search(), i, TrgmState::init, lfirst, MemoryContextAlloc(), NULL, ColorTrgmInfo::number, TrgmState::number, packArcInfoCmp(), palloc(), TrgmState::parent, qsort, TrgmPackArcInfo::sourceState, TrgmPackedGraph::states, TrgmNFA::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, TrgmPackedGraph::statesQueue, TrgmArc::target, TrgmPackedArc::targetState, and TrgmPackArcInfo::targetState.

Referenced by createTrgmNFAInternal().

{
    int         number = 2,
                arcIndex,
                arcsCount;
    HASH_SEQ_STATUS scan_status;
    TrgmState  *state;
    TrgmPackArcInfo *arcs,
               *p1,
               *p2;
    TrgmPackedArc *packedArcs;
    TrgmPackedGraph *result;
    int         i,
                j;

    /* Enumerate surviving states, giving init and fin reserved numbers */
    hash_seq_init(&scan_status, trgmNFA->states);
    while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
    {
        while (state->parent)
            state = state->parent;

        if (state->number < 0)
        {
            if (state->init)
                state->number = 0;
            else if (state->fin)
                state->number = 1;
            else
            {
                state->number = number;
                number++;
            }
        }
    }

    /* Collect array of all arcs */
    arcs = (TrgmPackArcInfo *)
        palloc(sizeof(TrgmPackArcInfo) * trgmNFA->arcsCount);
    arcIndex = 0;
    hash_seq_init(&scan_status, trgmNFA->states);
    while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
    {
        TrgmState  *source = state;
        ListCell   *cell;

        while (source->parent)
            source = source->parent;

        foreach(cell, state->arcs)
        {
            TrgmArc    *arc = (TrgmArc *) lfirst(cell);
            TrgmState  *target = arc->target;

            while (target->parent)
                target = target->parent;

            if (source->number != target->number)
            {
                ColorTrgmInfo *ctrgm;

                ctrgm = (ColorTrgmInfo *) bsearch(&arc->ctrgm,
                                                  trgmNFA->colorTrgms,
                                                  trgmNFA->colorTrgmsCount,
                                                  sizeof(ColorTrgmInfo),
                                                  colorTrgmInfoCmp);
                Assert(ctrgm != NULL);
                Assert(ctrgm->expanded);

                arcs[arcIndex].sourceState = source->number;
                arcs[arcIndex].targetState = target->number;
                arcs[arcIndex].colorTrgm = ctrgm->number;
                arcIndex++;
            }
        }
    }

    /* Sort arcs to ease duplicate detection */
    qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp);

    /* We could have duplicates because states were merged. Remove them. */
    /* p1 is probe point, p2 is last known non-duplicate. */
    p2 = arcs;
    for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
    {
        if (packArcInfoCmp(p1, p2) > 0)
        {
            p2++;
            *p2 = *p1;
        }
    }
    arcsCount = (p2 - arcs) + 1;

    /* Create packed representation */
    result = (TrgmPackedGraph *)
        MemoryContextAlloc(rcontext, sizeof(TrgmPackedGraph));

    /* Pack color trigrams information */
    result->colorTrigramsCount = 0;
    for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
    {
        if (trgmNFA->colorTrgms[i].expanded)
            result->colorTrigramsCount++;
    }
    result->colorTrigramGroups = (int *)
        MemoryContextAlloc(rcontext, sizeof(int) * result->colorTrigramsCount);
    j = 0;
    for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
    {
        if (trgmNFA->colorTrgms[i].expanded)
        {
            result->colorTrigramGroups[j] = trgmNFA->colorTrgms[i].count;
            j++;
        }
    }

    /* Pack states and arcs information */
    result->statesCount = number;
    result->states = (TrgmPackedState *)
        MemoryContextAlloc(rcontext, number * sizeof(TrgmPackedState));
    packedArcs = (TrgmPackedArc *)
        MemoryContextAlloc(rcontext, arcsCount * sizeof(TrgmPackedArc));
    j = 0;
    for (i = 0; i < number; i++)
    {
        int         cnt = 0;

        result->states[i].arcs = &packedArcs[j];
        while (j < arcsCount && arcs[j].sourceState == i)
        {
            packedArcs[j].targetState = arcs[j].targetState;
            packedArcs[j].colorTrgm = arcs[j].colorTrgm;
            cnt++;
            j++;
        }
        result->states[i].arcsCount = cnt;
    }

    /* Allocate working memory for trigramsMatchGraph() */
    result->colorTrigramsActive = (bool *)
        MemoryContextAlloc(rcontext, sizeof(bool) * result->colorTrigramsCount);
    result->statesActive = (bool *)
        MemoryContextAlloc(rcontext, sizeof(bool) * result->statesCount);
    result->statesQueue = (int *)
        MemoryContextAlloc(rcontext, sizeof(int) * result->statesCount);

    return result;
}

static bool prefixContains ( TrgmPrefix prefix1,
TrgmPrefix prefix2 
) [static]

Definition at line 1377 of file trgm_regexp.c.

References COLOR_UNKNOWN, and TrgmPrefix::colors.

Referenced by addArc(), and addKey().

{
    if (prefix1->colors[1] == COLOR_UNKNOWN)
    {
        /* Fully ambiguous prefix contains everything */
        return true;
    }
    else if (prefix1->colors[0] == COLOR_UNKNOWN)
    {
        /*
         * Prefix with only first unknown color contains every prefix with
         * same second color.
         */
        if (prefix1->colors[1] == prefix2->colors[1])
            return true;
        else
            return false;
    }
    else
    {
        /* Exact prefix contains only the exact same prefix */
        if (prefix1->colors[0] == prefix2->colors[0] &&
            prefix1->colors[1] == prefix2->colors[1])
            return true;
        else
            return false;
    }
}

static void processState ( TrgmNFA trgmNFA,
TrgmState state 
) [static]

Definition at line 932 of file trgm_regexp.c.

References addArcs(), addKey(), TrgmState::fin, TrgmNFA::keysQueue, linitial, list_delete_first(), NIL, and TrgmState::stateKey.

Referenced by transformGraph().

{
    /* keysQueue should be NIL already, but make sure */
    trgmNFA->keysQueue = NIL;

    /*
     * Add state's own key, and then process all keys added to keysQueue until
     * queue is empty.  But we can quit if the state gets marked final.
     */
    addKey(trgmNFA, state, &state->stateKey);
    while (trgmNFA->keysQueue != NIL && !state->fin)
    {
        TrgmStateKey *key = (TrgmStateKey *) linitial(trgmNFA->keysQueue);

        trgmNFA->keysQueue = list_delete_first(trgmNFA->keysQueue);
        addKey(trgmNFA, state, key);
    }

    /*
     * Add outgoing arcs only if state isn't final (we have no interest in
     * outgoing arcs if we already match)
     */
    if (!state->fin)
        addArcs(trgmNFA, state);
}

static void RE_compile ( regex_t regex,
text text_re,
int  cflags,
Oid  collation 
) [static]

Definition at line 697 of file trgm_regexp.c.

References ereport, errcode(), errmsg(), ERROR, palloc(), pfree(), pg_mb2wchar_with_len(), pg_regcomp(), pg_regerror(), REG_OKAY, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by createTrgmNFA().

{
    int         text_re_len = VARSIZE_ANY_EXHDR(text_re);
    char       *text_re_val = VARDATA_ANY(text_re);
    pg_wchar   *pattern;
    int         pattern_len;
    int         regcomp_result;
    char        errMsg[100];

    /* Convert pattern string to wide characters */
    pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
    pattern_len = pg_mb2wchar_with_len(text_re_val,
                                       pattern,
                                       text_re_len);

    /* Compile regex */
    regcomp_result = pg_regcomp(regex,
                                pattern,
                                pattern_len,
                                cflags,
                                collation);

    pfree(pattern);

    if (regcomp_result != REG_OKAY)
    {
        /* re didn't compile (no need for pg_regfree, if so) */
        pg_regerror(regcomp_result, regex, errMsg, sizeof(errMsg));
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
                 errmsg("invalid regular expression: %s", errMsg)));
    }
}

static bool selectColorTrigrams ( TrgmNFA trgmNFA  )  [static]

Definition at line 1419 of file trgm_regexp.c.

References ColorTrgmInfo::arcs, TrgmState::arcs, TrgmNFA::arcsCount, Assert, COLOR_BLANK, TrgmNFA::colorInfo, ColorTrgm::colors, colorTrgmInfoCmp(), colorTrgmInfoCountCmp(), TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, ColorTrgmInfo::count, TrgmArc::ctrgm, ColorTrgmInfo::ctrgm, ColorTrgmInfo::expanded, TrgmState::fin, hash_seq_init(), hash_seq_search(), i, TrgmState::init, lfirst, list_concat(), list_make1, MAX_TRGM_COUNT, mergeStates(), NULL, ColorTrgmInfo::number, palloc(), TrgmState::parent, qsort, TrgmArcInfo::source, TrgmNFA::states, TrgmArc::target, TrgmArcInfo::target, TrgmNFA::totalTrgmCount, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

{
    HASH_SEQ_STATUS scan_status;
    int         arcsCount = trgmNFA->arcsCount,
                i;
    TrgmState  *state;
    ColorTrgmInfo *colorTrgms;
    int64       totalTrgmCount;
    int         number;

    /* Collect color trigrams from all arcs */
    colorTrgms = (ColorTrgmInfo *) palloc(sizeof(ColorTrgmInfo) * arcsCount);
    trgmNFA->colorTrgms = colorTrgms;

    i = 0;
    hash_seq_init(&scan_status, trgmNFA->states);
    while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
    {
        ListCell   *cell;

        foreach(cell, state->arcs)
        {
            TrgmArc    *arc = (TrgmArc *) lfirst(cell);
            TrgmArcInfo *arcInfo = (TrgmArcInfo *) palloc(sizeof(TrgmArcInfo));

            arcInfo->source = state;
            arcInfo->target = arc->target;
            colorTrgms[i].arcs = list_make1(arcInfo);
            colorTrgms[i].expanded = true;
            colorTrgms[i].ctrgm = arc->ctrgm;
            i++;
        }
    }
    Assert(i == arcsCount);

    /* Remove duplicates, merging their arcs lists */
    if (arcsCount >= 2)
    {
        ColorTrgmInfo *p1,
                   *p2;

        /* Sort trigrams to ease duplicate detection */
        qsort(colorTrgms, arcsCount, sizeof(ColorTrgmInfo), colorTrgmInfoCmp);

        /* p1 is probe point, p2 is last known non-duplicate. */
        p2 = colorTrgms;
        for (p1 = colorTrgms + 1; p1 < colorTrgms + arcsCount; p1++)
        {
            if (colorTrgmInfoCmp(p1, p2) > 0)
            {
                p2++;
                *p2 = *p1;
            }
            else
            {
                p2->arcs = list_concat(p2->arcs, p1->arcs);
            }
        }
        trgmNFA->colorTrgmsCount = (p2 - colorTrgms) + 1;
    }
    else
    {
        trgmNFA->colorTrgmsCount = arcsCount;
    }

    /*
     * Count number of simple trigrams generated by each color trigram.
     *
     * Note: per-color-trigram counts cannot overflow an int so long as
     * COLOR_COUNT_LIMIT is not more than the cube root of INT_MAX, ie about
     * 1290.  However, the grand total totalTrgmCount might conceivably
     * overflow an int, so we use int64 for that within this routine.
     */
    totalTrgmCount = 0;
    for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
    {
        ColorTrgmInfo *trgmInfo = &colorTrgms[i];
        int         j,
                    count = 1;

        for (j = 0; j < 3; j++)
        {
            TrgmColor   c = trgmInfo->ctrgm.colors[j];

            if (c != COLOR_BLANK)
                count *= trgmNFA->colorInfo[c].wordCharsCount;
        }
        trgmInfo->count = count;
        totalTrgmCount += count;
    }

    /* Sort color trigrams in descending order of simple trigram counts */
    qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
          colorTrgmInfoCountCmp);

    /*
     * Remove color trigrams from the graph so long as total number of simple
     * trigrams exceeds MAX_TRGM_COUNT.  We prefer to remove color trigrams
     * with the most associated simple trigrams, since those are the most
     * promising for reducing the total number of simple trigrams.  When
     * removing a color trigram we have to merge states connected by arcs
     * labeled with that trigram.  It's necessary to not merge initial and
     * final states, because our graph becomes useless if that happens; so we
     * cannot always remove the trigram we'd prefer to.
     */
    for (i = 0;
         (i < trgmNFA->colorTrgmsCount) && (totalTrgmCount > MAX_TRGM_COUNT);
         i++)
    {
        ColorTrgmInfo *trgmInfo = &colorTrgms[i];
        bool        canRemove = true;
        ListCell   *cell;

        /*
         * Does any arc of this color trigram connect initial and final
         * states?  If so we can't remove it.
         */
        foreach(cell, trgmInfo->arcs)
        {
            TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
            TrgmState  *source = arcInfo->source,
                       *target = arcInfo->target;

            /* examine parent states, if any merging has already happened */
            while (source->parent)
                source = source->parent;
            while (target->parent)
                target = target->parent;

            if ((source->init || target->init) &&
                (source->fin || target->fin))
            {
                canRemove = false;
                break;
            }
        }
        if (!canRemove)
            continue;

        /* OK, merge states linked by each arc labeled by the trigram */
        foreach(cell, trgmInfo->arcs)
        {
            TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
            TrgmState  *source = arcInfo->source,
                       *target = arcInfo->target;

            while (source->parent)
                source = source->parent;
            while (target->parent)
                target = target->parent;
            if (source != target)
                mergeStates(source, target);
        }

        /* Mark trigram unexpanded, and update totalTrgmCount */
        trgmInfo->expanded = false;
        totalTrgmCount -= trgmInfo->count;
    }

    /* Did we succeed in fitting into MAX_TRGM_COUNT? */
    if (totalTrgmCount > MAX_TRGM_COUNT)
        return false;

    trgmNFA->totalTrgmCount = (int) totalTrgmCount;

    /*
     * Sort color trigrams by colors (will be useful for bsearch in packGraph)
     * and enumerate the color trigrams that are expanded.
     */
    number = 0;
    qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
          colorTrgmInfoCmp);
    for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
    {
        if (colorTrgms[i].expanded)
        {
            colorTrgms[i].number = number;
            number++;
        }
    }

    return true;
}

static void transformGraph ( TrgmNFA trgmNFA  )  [static]

Definition at line 870 of file trgm_regexp.c.

References TrgmNFA::arcsCount, TrgmPrefix::colors, CurrentMemoryContext, HASHCTL::entrysize, TrgmState::fin, HASHCTL::hash, HASH_CONTEXT, hash_create(), HASH_ELEM, HASH_FUNCTION, hash_get_num_entries(), HASHCTL::hcxt, TrgmState::init, TrgmNFA::initState, HASHCTL::keysize, TrgmNFA::keysQueue, linitial, list_delete_first(), MAX_EXPANDED_ARCS, MAX_EXPANDED_STATES, MemSet, NIL, TrgmStateKey::nstate, TrgmNFA::overflowed, pg_reg_getinitialstate(), TrgmStateKey::prefix, processState(), TrgmNFA::queue, TrgmNFA::regex, and TrgmNFA::states.

Referenced by createTrgmNFAInternal().

{
    HASHCTL     hashCtl;
    TrgmStateKey initkey;
    TrgmState  *initstate;

    /* Initialize this stage's workspace in trgmNFA struct */
    trgmNFA->queue = NIL;
    trgmNFA->keysQueue = NIL;
    trgmNFA->arcsCount = 0;
    trgmNFA->overflowed = false;

    /* Create hashtable for states */
    hashCtl.keysize = sizeof(TrgmStateKey);
    hashCtl.entrysize = sizeof(TrgmState);
    hashCtl.hcxt = CurrentMemoryContext;
    hashCtl.hash = tag_hash;
    trgmNFA->states = hash_create("Trigram NFA",
                                  1024,
                                  &hashCtl,
                                  HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION);

    /* Create initial state: ambiguous prefix, NFA's initial state */
    MemSet(&initkey, 0, sizeof(initkey));
    initkey.prefix.colors[0] = COLOR_UNKNOWN;
    initkey.prefix.colors[1] = COLOR_UNKNOWN;
    initkey.nstate = pg_reg_getinitialstate(trgmNFA->regex);

    initstate = getState(trgmNFA, &initkey);
    initstate->init = true;
    trgmNFA->initState = initstate;

    /*
     * Recursively build the expanded graph by processing queue of states
     * (breadth-first search).  getState already put initstate in the queue.
     */
    while (trgmNFA->queue != NIL)
    {
        TrgmState  *state = (TrgmState *) linitial(trgmNFA->queue);

        trgmNFA->queue = list_delete_first(trgmNFA->queue);

        /*
         * If we overflowed then just mark state as final.  Otherwise do
         * actual processing.
         */
        if (trgmNFA->overflowed)
            state->fin = true;
        else
            processState(trgmNFA, state);

        /* Did we overflow? */
        if (trgmNFA->arcsCount > MAX_EXPANDED_ARCS ||
            hash_get_num_entries(trgmNFA->states) > MAX_EXPANDED_STATES)
            trgmNFA->overflowed = true;
    }
}

bool trigramsMatchGraph ( TrgmPackedGraph graph,
bool check 
)

Definition at line 604 of file trgm_regexp.c.

References TrgmPackedState::arcs, TrgmPackedState::arcsCount, TrgmPackedArc::colorTrgm, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, i, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, TrgmPackedGraph::statesQueue, and TrgmPackedArc::targetState.

Referenced by gin_trgm_consistent(), and gtrgm_consistent().

{
    int         i,
                j,
                k,
                queueIn,
                queueOut;

    /*
     * Reset temporary working areas.
     */
    memset(graph->colorTrigramsActive, 0,
           sizeof(bool) * graph->colorTrigramsCount);
    memset(graph->statesActive, 0, sizeof(bool) * graph->statesCount);

    /*
     * Check which color trigrams were matched.  A match for any simple
     * trigram associated with a color trigram counts as a match of the color
     * trigram.
     */
    j = 0;
    for (i = 0; i < graph->colorTrigramsCount; i++)
    {
        int         cnt = graph->colorTrigramGroups[i];

        for (k = j; k < j + cnt; k++)
        {
            if (check[k])
            {
                /*
                 * Found one matched trigram in the group. Can skip the rest
                 * of them and go to the next group.
                 */
                graph->colorTrigramsActive[i] = true;
                break;
            }
        }
        j = j + cnt;
    }

    /*
     * Initialize the statesQueue to hold just the initial state.  Note:
     * statesQueue has room for statesCount entries, which is certainly enough
     * since no state will be put in the queue more than once. The
     * statesActive array marks which states have been queued.
     */
    graph->statesActive[0] = true;
    graph->statesQueue[0] = 0;
    queueIn = 0;
    queueOut = 1;

    /* Process queued states as long as there are any. */
    while (queueIn < queueOut)
    {
        int         stateno = graph->statesQueue[queueIn++];
        TrgmPackedState *state = &graph->states[stateno];
        int         cnt = state->arcsCount;

        /* Loop over state's out-arcs */
        for (i = 0; i < cnt; i++)
        {
            TrgmPackedArc *arc = &state->arcs[i];

            /*
             * If corresponding color trigram is present then activate the
             * corresponding state.  We're done if that's the final state,
             * otherwise queue the state if it's not been queued already.
             */
            if (graph->colorTrigramsActive[arc->colorTrgm])
            {
                int         nextstate = arc->targetState;

                if (nextstate == 1)
                    return true;    /* success: final state is reachable */

                if (!graph->statesActive[nextstate])
                {
                    graph->statesActive[nextstate] = true;
                    graph->statesQueue[queueOut++] = nextstate;
                }
            }
        }
    }

    /* Queue is empty, so match fails. */
    return false;
}

static bool validArcLabel ( TrgmStateKey key,
TrgmColor  co 
) [static]

Definition at line 1292 of file trgm_regexp.c.

References Assert, COLOR_BLANK, COLOR_UNKNOWN, TrgmPrefix::colors, and TrgmStateKey::prefix.

Referenced by addArc(), and addKey().

{
    /*
     * We have to know full trigram in order to add outgoing arc.  So we can't
     * do it if prefix is ambiguous.
     */
    if (key->prefix.colors[0] == COLOR_UNKNOWN)
        return false;

    /* If key->prefix.colors[0] isn't unknown, its second color isn't either */
    Assert(key->prefix.colors[1] != COLOR_UNKNOWN);
    /* And we should not be called with an unknown arc color anytime */
    Assert(co != COLOR_UNKNOWN);

    /*
     * We don't bother with making arcs representing three non-word
     * characters, since that's useless for trigram extraction.
     */
    if (key->prefix.colors[0] == COLOR_BLANK &&
        key->prefix.colors[1] == COLOR_BLANK &&
        co == COLOR_BLANK)
        return false;

    /*
     * We also reject nonblank-blank-anything.  The nonblank-blank-nonblank
     * case doesn't correspond to any trigram the trigram extraction code
     * would make.  The nonblank-blank-blank case is also not possible with
     * RPADDING = 1.  (Note that in many cases we'd fail to generate such a
     * trigram even if it were valid, for example processing "foo bar" will
     * not result in considering the trigram "o  ".  So if you want to support
     * RPADDING = 2, there's more to do than just twiddle this test.)
     */
    if (key->prefix.colors[0] != COLOR_BLANK &&
        key->prefix.colors[1] == COLOR_BLANK)
        return false;

    /*
     * Other combinations involving blank are valid, in particular we assume
     * blank-blank-nonblank is valid, which presumes that LPADDING is 2.
     *
     * Note: Using again the example "foo bar", we will not consider the
     * trigram "  b", though this trigram would be found by the trigram
     * extraction code.  Since we will find " ba", it doesn't seem worth
     * trying to hack the algorithm to generate the additional trigram.
     */

    /* arc label is valid */
    return true;
}