#include "postgres.h"#include "trgm.h"#include "regex/regexport.h"#include "tsearch/ts_locale.h"#include "utils/hsearch.h"#include "utils/memutils.h"
Go to the source code of this file.
| #define COLOR_BLANK (-2) |
Definition at line 256 of file trgm_regexp.c.
Referenced by addArcs(), addKey(), expandColorTrigrams(), selectColorTrigrams(), and validArcLabel().
| #define COLOR_COUNT_LIMIT 256 |
Definition at line 207 of file trgm_regexp.c.
Referenced by getColorInfo().
| #define COLOR_UNKNOWN (-1) |
Definition at line 255 of file trgm_regexp.c.
Referenced by prefixContains(), and validArcLabel().
| #define MAX_EXPANDED_ARCS 1024 |
Definition at line 205 of file trgm_regexp.c.
Referenced by transformGraph().
| #define MAX_EXPANDED_STATES 128 |
Definition at line 204 of file trgm_regexp.c.
Referenced by transformGraph().
| #define MAX_TRGM_COUNT 256 |
Definition at line 206 of file trgm_regexp.c.
Referenced by selectColorTrigrams().
| typedef int TrgmColor |
Definition at line 252 of file trgm_regexp.c.
| static void addArc | ( | TrgmNFA * | trgmNFA, | |
| TrgmState * | state, | |||
| TrgmStateKey * | key, | |||
| TrgmColor | co, | |||
| TrgmStateKey * | destKey | |||
| ) | [static] |
Definition at line 1250 of file trgm_regexp.c.
References TrgmState::arcs, TrgmNFA::arcsCount, TrgmPrefix::colors, ColorTrgm::colors, TrgmArc::ctrgm, TrgmState::enterKeys, lappend(), lfirst, TrgmStateKey::nstate, palloc(), TrgmStateKey::prefix, prefixContains(), TrgmArc::target, and validArcLabel().
Referenced by addArcs().
{
TrgmArc *arc;
ListCell *cell;
/* Do nothing if this wouldn't be a valid arc label trigram */
if (!validArcLabel(key, co))
return;
/*
* Check if we are going to reach key which is covered by a key which is
* already listed in this state. If so arc is useless: the NFA can bypass
* it through a path that doesn't require any predictable trigram, so
* whether the arc's trigram is present or not doesn't really matter.
*/
foreach(cell, state->enterKeys)
{
TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
if (existingKey->nstate == destKey->nstate &&
prefixContains(&existingKey->prefix, &destKey->prefix))
return;
}
/* Checks were successful, add new arc */
arc = (TrgmArc *) palloc(sizeof(TrgmArc));
arc->target = getState(trgmNFA, destKey);
arc->ctrgm.colors[0] = key->prefix.colors[0];
arc->ctrgm.colors[1] = key->prefix.colors[1];
arc->ctrgm.colors[2] = co;
state->arcs = lappend(state->arcs, arc);
trgmNFA->arcsCount++;
}
Definition at line 1158 of file trgm_regexp.c.
References addArc(), regex_arc_t::co, COLOR_BLANK, TrgmNFA::colorInfo, TrgmPrefix::colors, TrgmColorInfo::containsNonWord, TrgmState::enterKeys, TrgmColorInfo::expandable, i, lfirst, MemSet, TrgmStateKey::nstate, palloc(), pfree(), pg_reg_getnumoutarcs(), pg_reg_getoutarcs(), TrgmStateKey::prefix, TrgmNFA::regex, regex_arc_t::to, and TrgmColorInfo::wordCharsCount.
Referenced by processState().
{
TrgmStateKey destKey;
ListCell *cell;
regex_arc_t *arcs;
int arcsCount,
i;
/*
* Ensure any pad bytes in destKey are zero, since it may get used as a
* hashtable key by getState.
*/
MemSet(&destKey, 0, sizeof(destKey));
/*
* Iterate over enter keys associated with this expanded-graph state. This
* includes both the state's own stateKey, and any enter keys we added to
* it during addKey (which represent expanded-graph states that are not
* distinguishable from this one by means of trigrams). For each such
* enter key, examine all the out-arcs of the key's underlying NFA state,
* and try to make a trigram arc leading to where the out-arc leads.
* (addArc will deal with whether the arc is valid or not.)
*/
foreach(cell, state->enterKeys)
{
TrgmStateKey *key = (TrgmStateKey *) lfirst(cell);
arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
for (i = 0; i < arcsCount; i++)
{
regex_arc_t *arc = &arcs[i];
TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];
/*
* Ignore non-expandable colors; addKey already handled the case.
*
* We need no special check for begin/end pseudocolors here. We
* don't need to do any processing for them, and they will be
* marked non-expandable since the regex engine will have reported
* them that way.
*/
if (!colorInfo->expandable)
continue;
if (colorInfo->containsNonWord)
{
/*
* Color includes non-word character(s).
*
* Generate an arc, treating this transition as occurring on
* BLANK. This allows word-ending trigrams to be manufactured
* if possible.
*/
destKey.prefix.colors[0] = key->prefix.colors[1];
destKey.prefix.colors[1] = COLOR_BLANK;
destKey.nstate = arc->to;
addArc(trgmNFA, state, key, COLOR_BLANK, &destKey);
}
if (colorInfo->wordCharsCount > 0)
{
/*
* Color includes word character(s).
*
* Generate an arc. Color is pushed into prefix of target
* state.
*/
destKey.prefix.colors[0] = key->prefix.colors[1];
destKey.prefix.colors[1] = arc->co;
destKey.nstate = arc->to;
addArc(trgmNFA, state, key, arc->co, &destKey);
}
}
pfree(arcs);
}
}
| static void addKey | ( | TrgmNFA * | trgmNFA, | |
| TrgmState * | state, | |||
| TrgmStateKey * | key | |||
| ) | [static] |
Definition at line 975 of file trgm_regexp.c.
References addKeyToQueue(), regex_arc_t::co, COLOR_BLANK, TrgmNFA::colorInfo, TrgmPrefix::colors, TrgmColorInfo::containsNonWord, TrgmState::enterKeys, TrgmColorInfo::expandable, TrgmState::fin, i, lappend(), lfirst, list_delete_cell(), list_head(), lnext, MemSet, TrgmStateKey::nstate, palloc(), pfree(), pg_reg_colorisbegin(), pg_reg_colorisend(), pg_reg_getfinalstate(), pg_reg_getnumoutarcs(), pg_reg_getoutarcs(), TrgmStateKey::prefix, prefixContains(), TrgmNFA::regex, regex_arc_t::to, validArcLabel(), and TrgmColorInfo::wordCharsCount.
Referenced by processState().
{
regex_arc_t *arcs;
TrgmStateKey destKey;
ListCell *cell,
*prev,
*next;
int i,
arcsCount;
/*
* Ensure any pad bytes in destKey are zero, since it may get used as a
* hashtable key by getState.
*/
MemSet(&destKey, 0, sizeof(destKey));
/*
* Compare key to each existing enter key of the state to check for
* redundancy. We can drop either old key(s) or the new key if we find
* redundancy.
*/
prev = NULL;
cell = list_head(state->enterKeys);
while (cell)
{
TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
next = lnext(cell);
if (existingKey->nstate == key->nstate)
{
if (prefixContains(&existingKey->prefix, &key->prefix))
{
/* This old key already covers the new key. Nothing to do */
return;
}
if (prefixContains(&key->prefix, &existingKey->prefix))
{
/*
* The new key covers this old key. Remove the old key, it's
* no longer needed once we add this key to the list.
*/
state->enterKeys = list_delete_cell(state->enterKeys,
cell, prev);
}
else
prev = cell;
}
else
prev = cell;
cell = next;
}
/* No redundancy, so add this key to the state's list */
state->enterKeys = lappend(state->enterKeys, key);
/* If state is now known final, mark it and we're done */
if (key->nstate == pg_reg_getfinalstate(trgmNFA->regex))
{
state->fin = true;
return;
}
/*
* Loop through all outgoing arcs of the corresponding state in the
* original NFA.
*/
arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
for (i = 0; i < arcsCount; i++)
{
regex_arc_t *arc = &arcs[i];
if (pg_reg_colorisbegin(trgmNFA->regex, arc->co))
{
/*
* Start of line/string (^). Trigram extraction treats start of
* line same as start of word: double space prefix is added.
* Hence, make an enter key showing we can reach the arc
* destination with all-blank prefix.
*/
destKey.prefix.colors[0] = COLOR_BLANK;
destKey.prefix.colors[1] = COLOR_BLANK;
destKey.nstate = arc->to;
/* Add enter key to this state */
addKeyToQueue(trgmNFA, &destKey);
}
else if (pg_reg_colorisend(trgmNFA->regex, arc->co))
{
/*
* End of line/string ($). We must consider this arc as a
* transition that doesn't read anything. The reason for adding
* this enter key to the state is that if the arc leads to the
* NFA's final state, we must mark this expanded state as final.
*/
destKey.prefix.colors[0] = COLOR_UNKNOWN;
destKey.prefix.colors[1] = COLOR_UNKNOWN;
destKey.nstate = arc->to;
/* Add enter key to this state */
addKeyToQueue(trgmNFA, &destKey);
}
else
{
/* Regular color */
TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];
if (colorInfo->expandable)
{
if (colorInfo->containsNonWord &&
!validArcLabel(key, COLOR_BLANK))
{
/*
* We can reach the arc destination after reading a
* non-word character, but the prefix is not something
* that addArc will accept with COLOR_BLANK, so no trigram
* arc can get made for this transition. We must make an
* enter key to show that the arc destination is
* reachable. Set it up with an all-blank prefix, since
* that corresponds to what the trigram extraction code
* will do at a word starting boundary.
*/
destKey.prefix.colors[0] = COLOR_BLANK;
destKey.prefix.colors[1] = COLOR_BLANK;
destKey.nstate = arc->to;
addKeyToQueue(trgmNFA, &destKey);
}
if (colorInfo->wordCharsCount > 0 &&
!validArcLabel(key, arc->co))
{
/*
* We can reach the arc destination after reading a word
* character, but the prefix is not something that addArc
* will accept, so no trigram arc can get made for this
* transition. We must make an enter key to show that the
* arc destination is reachable. The prefix for the enter
* key should reflect the info we have for this arc.
*/
destKey.prefix.colors[0] = key->prefix.colors[1];
destKey.prefix.colors[1] = arc->co;
destKey.nstate = arc->to;
addKeyToQueue(trgmNFA, &destKey);
}
}
else
{
/*
* Unexpandable color. Add enter key with ambiguous prefix,
* showing we can reach the destination from this state, but
* the preceding colors will be uncertain. (We do not set the
* first prefix color to key->prefix.colors[1], because a
* prefix of known followed by unknown is invalid.)
*/
destKey.prefix.colors[0] = COLOR_UNKNOWN;
destKey.prefix.colors[1] = COLOR_UNKNOWN;
destKey.nstate = arc->to;
addKeyToQueue(trgmNFA, &destKey);
}
}
}
pfree(arcs);
}
| static void addKeyToQueue | ( | TrgmNFA * | trgmNFA, | |
| TrgmStateKey * | key | |||
| ) | [static] |
Definition at line 1146 of file trgm_regexp.c.
References TrgmNFA::keysQueue, lappend(), and palloc().
Referenced by addKey().
{
TrgmStateKey *keyCopy = (TrgmStateKey *) palloc(sizeof(TrgmStateKey));
memcpy(keyCopy, key, sizeof(TrgmStateKey));
trgmNFA->keysQueue = lappend(trgmNFA->keysQueue, keyCopy);
}
| static int colorTrgmInfoCmp | ( | const void * | p1, | |
| const void * | p2 | |||
| ) | [static] |
Definition at line 1739 of file trgm_regexp.c.
References ColorTrgmInfo::ctrgm, and memcmp().
Referenced by selectColorTrigrams().
{
const ColorTrgmInfo *c1 = (const ColorTrgmInfo *) p1;
const ColorTrgmInfo *c2 = (const ColorTrgmInfo *) p2;
return memcmp(&c1->ctrgm, &c2->ctrgm, sizeof(ColorTrgm));
}
| static int colorTrgmInfoCountCmp | ( | const void * | p1, | |
| const void * | p2 | |||
| ) | [static] |
Definition at line 1752 of file trgm_regexp.c.
References ColorTrgmInfo::count.
Referenced by selectColorTrigrams().
{
const ColorTrgmInfo *c1 = (const ColorTrgmInfo *) p1;
const ColorTrgmInfo *c2 = (const ColorTrgmInfo *) p2;
if (c1->count < c2->count)
return 1;
else if (c1->count == c2->count)
return 0;
else
return -1;
}
| static bool convertPgWchar | ( | pg_wchar | c, | |
| trgm_mb_char * | result | |||
| ) | [static] |
Definition at line 804 of file trgm_regexp.c.
References trgm_mb_char::bytes, lowerstr(), MAX_MULTIBYTE_CHAR_LEN, pfree(), and pg_wchar2mb_with_len().
Referenced by getColorInfo().
{
/* "s" has enough space for a multibyte character and a trailing NUL */
char s[MAX_MULTIBYTE_CHAR_LEN + 1];
/*
* We can ignore the NUL character, since it can never appear in a PG text
* string. This avoids the need for various special cases when
* reconstructing trigrams.
*/
if (c == 0)
return false;
/* Do the conversion, making sure the result is NUL-terminated */
memset(s, 0, sizeof(s));
pg_wchar2mb_with_len(&c, s, 1);
/*
* In IGNORECASE mode, we can ignore uppercase characters. We assume that
* the regex engine generated both uppercase and lowercase equivalents
* within each color, since we used the REG_ICASE option; so there's no
* need to process the uppercase version.
*
* XXX this code is dependent on the assumption that lowerstr() works the
* same as the regex engine's internal case folding machinery. Might be
* wiser to expose pg_wc_tolower and test whether c == pg_wc_tolower(c).
* On the other hand, the trigrams in the index were created using
* lowerstr(), so we're probably screwed if there's any incompatibility
* anyway.
*/
#ifdef IGNORECASE
{
char *lowerCased = lowerstr(s);
if (strcmp(lowerCased, s) != 0)
{
pfree(lowerCased);
return false;
}
pfree(lowerCased);
}
#endif
/* Fill result with exactly MAX_MULTIBYTE_CHAR_LEN bytes */
strncpy(result->bytes, s, MAX_MULTIBYTE_CHAR_LEN);
return true;
}
| TRGM* createTrgmNFA | ( | text * | text_re, | |
| Oid | collation, | |||
| TrgmPackedGraph ** | graph, | |||
| MemoryContext | rcontext | |||
| ) |
Definition at line 484 of file trgm_regexp.c.
References ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE, ALLOCSET_DEFAULT_MINSIZE, AllocSetContextCreate(), createTrgmNFAInternal(), CurrentMemoryContext, MemoryContextDelete(), MemoryContextSwitchTo(), PG_CATCH, PG_END_TRY, PG_RE_THROW, pg_regfree(), PG_TRY, RE_compile(), REG_ADVANCED, and REG_ICASE.
Referenced by gin_extract_query_trgm(), and gtrgm_consistent().
{
TRGM *trg;
regex_t regex;
MemoryContext tmpcontext;
MemoryContext oldcontext;
/*
* This processing generates a great deal of cruft, which we'd like to
* clean up before returning (since this function may be called in a
* query-lifespan memory context). Make a temp context we can work in so
* that cleanup is easy.
*/
tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
"createTrgmNFA temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
oldcontext = MemoryContextSwitchTo(tmpcontext);
/*
* Stage 1: Compile the regexp into a NFA, using the regexp library.
*/
#ifdef IGNORECASE
RE_compile(®ex, text_re, REG_ADVANCED | REG_ICASE, collation);
#else
RE_compile(®ex, text_re, REG_ADVANCED, collation);
#endif
/*
* Since the regexp library allocates its internal data structures with
* malloc, we need to use a PG_TRY block to ensure that pg_regfree() gets
* done even if there's an error.
*/
PG_TRY();
{
trg = createTrgmNFAInternal(®ex, graph, rcontext);
}
PG_CATCH();
{
pg_regfree(®ex);
PG_RE_THROW();
}
PG_END_TRY();
pg_regfree(®ex);
/* Clean up all the cruft we created */
MemoryContextSwitchTo(oldcontext);
MemoryContextDelete(tmpcontext);
return trg;
}
| static TRGM * createTrgmNFAInternal | ( | regex_t * | regex, | |
| TrgmPackedGraph ** | graph, | |||
| MemoryContext | rcontext | |||
| ) | [static] |
Definition at line 543 of file trgm_regexp.c.
References TrgmNFA::colorInfo, expandColorTrigrams(), TrgmState::fin, getColorInfo(), TrgmNFA::initState, TrgmNFA::ncolors, packGraph(), TrgmNFA::regex, selectColorTrigrams(), and transformGraph().
Referenced by createTrgmNFA().
{
TRGM *trg;
TrgmNFA trgmNFA;
trgmNFA.regex = regex;
/* Collect color information from the regex */
getColorInfo(regex, &trgmNFA);
#ifdef TRGM_REGEXP_DEBUG
printSourceNFA(regex, trgmNFA.colorInfo, trgmNFA.ncolors);
#endif
/*
* Stage 2: Create an expanded graph from the source NFA.
*/
transformGraph(&trgmNFA);
#ifdef TRGM_REGEXP_DEBUG
printTrgmNFA(&trgmNFA);
#endif
/*
* Fail if we were unable to make a nontrivial graph, ie it is possible to
* get from the initial state to the final state without reading any
* predictable trigram.
*/
if (trgmNFA.initState->fin)
return NULL;
/*
* Stage 3: Select color trigrams to expand. Fail if too many trigrams.
*/
if (!selectColorTrigrams(&trgmNFA))
return NULL;
/*
* Stage 4: Expand color trigrams and pack graph into final
* representation.
*/
trg = expandColorTrigrams(&trgmNFA, rcontext);
*graph = packGraph(&trgmNFA, rcontext);
#ifdef TRGM_REGEXP_DEBUG
printTrgmPackedGraph(*graph, trg);
#endif
return trg;
}
| static TRGM * expandColorTrigrams | ( | TrgmNFA * | trgmNFA, | |
| MemoryContext | rcontext | |||
| ) | [static] |
Definition at line 1610 of file trgm_regexp.c.
References ARRKEY, trgm_mb_char::bytes, CALCGTSIZE, COLOR_BLANK, TrgmNFA::colorInfo, ColorTrgm::colors, TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, ColorTrgmInfo::ctrgm, ColorTrgmInfo::expanded, fillTrgm(), TRGM::flag, GETARR, i, MemoryContextAllocZero(), SET_VARSIZE, TrgmNFA::totalTrgmCount, TRGMHDRSIZE, TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.
Referenced by createTrgmNFAInternal().
{
TRGM *trg;
trgm *p;
int i;
TrgmColorInfo blankColor;
trgm_mb_char blankChar;
/* Set up "blank" color structure containing a single zero character */
memset(blankChar.bytes, 0, sizeof(blankChar.bytes));
blankColor.wordCharsCount = 1;
blankColor.wordChars = &blankChar;
/* Construct the trgm array */
trg = (TRGM *)
MemoryContextAllocZero(rcontext,
TRGMHDRSIZE +
trgmNFA->totalTrgmCount * sizeof(trgm));
trg->flag = ARRKEY;
SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, trgmNFA->totalTrgmCount));
p = GETARR(trg);
for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
{
ColorTrgmInfo *colorTrgm = &trgmNFA->colorTrgms[i];
TrgmColorInfo *c[3];
trgm_mb_char s[3];
int j,
i1,
i2,
i3;
/* Ignore any unexpanded trigrams ... */
if (!colorTrgm->expanded)
continue;
/* Get colors, substituting the dummy struct for COLOR_BLANK */
for (j = 0; j < 3; j++)
{
if (colorTrgm->ctrgm.colors[j] != COLOR_BLANK)
c[j] = &trgmNFA->colorInfo[colorTrgm->ctrgm.colors[j]];
else
c[j] = &blankColor;
}
/* Iterate over all possible combinations of colors' characters */
for (i1 = 0; i1 < c[0]->wordCharsCount; i1++)
{
s[0] = c[0]->wordChars[i1];
for (i2 = 0; i2 < c[1]->wordCharsCount; i2++)
{
s[1] = c[1]->wordChars[i2];
for (i3 = 0; i3 < c[2]->wordCharsCount; i3++)
{
s[2] = c[2]->wordChars[i3];
fillTrgm(p, s);
p++;
}
}
}
}
return trg;
}
| static void fillTrgm | ( | trgm * | ptrgm, | |
| trgm_mb_char | s[3] | |||
| ) | [static] |
Definition at line 1678 of file trgm_regexp.c.
References trgm_mb_char::bytes, compact_trigram(), i, and MAX_MULTIBYTE_CHAR_LEN.
Referenced by expandColorTrigrams().
{
char str[3 * MAX_MULTIBYTE_CHAR_LEN],
*p;
int i,
j;
/* Write multibyte string into "str" (we don't need null termination) */
p = str;
for (i = 0; i < 3; i++)
{
if (s[i].bytes[0] != 0)
{
for (j = 0; j < MAX_MULTIBYTE_CHAR_LEN && s[i].bytes[j]; j++)
*p++ = s[i].bytes[j];
}
else
{
/* Emit a space in place of COLOR_BLANK */
*p++ = ' ';
}
}
/* Convert "str" to a standard trigram (possibly hashing it) */
compact_trigram(ptrgm, str, p - str);
}
Definition at line 741 of file trgm_regexp.c.
References trgm_mb_char::bytes, chars, COLOR_COUNT_LIMIT, TrgmNFA::colorInfo, TrgmColorInfo::containsNonWord, convertPgWchar(), TrgmColorInfo::expandable, i, ISWORDCHR, TrgmNFA::ncolors, palloc(), palloc0(), pfree(), pg_reg_getcharacters(), pg_reg_getnumcharacters(), pg_reg_getnumcolors(), TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.
Referenced by createTrgmNFAInternal().
{
int colorsCount = pg_reg_getnumcolors(regex);
int i;
trgmNFA->ncolors = colorsCount;
trgmNFA->colorInfo = (TrgmColorInfo *)
palloc0(colorsCount * sizeof(TrgmColorInfo));
/*
* Loop over colors, filling TrgmColorInfo about each.
*/
for (i = 0; i < colorsCount; i++)
{
TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[i];
int charsCount = pg_reg_getnumcharacters(regex, i);
pg_wchar *chars;
int j;
if (charsCount < 0 || charsCount > COLOR_COUNT_LIMIT)
{
/* Non expandable, or too large to work with */
colorInfo->expandable = false;
continue;
}
colorInfo->expandable = true;
colorInfo->containsNonWord = false;
colorInfo->wordChars = (trgm_mb_char *)
palloc(sizeof(trgm_mb_char) * charsCount);
colorInfo->wordCharsCount = 0;
/* Extract all the chars in this color */
chars = (pg_wchar *) palloc(sizeof(pg_wchar) * charsCount);
pg_reg_getcharacters(regex, i, chars, charsCount);
/*
* Convert characters back to multibyte form, and save only those that
* are word characters. Set "containsNonWord" if any non-word
* character. (Note: it'd probably be nicer to keep the chars in
* pg_wchar format for now, but ISWORDCHR wants to see multibyte.)
*/
for (j = 0; j < charsCount; j++)
{
trgm_mb_char c;
if (!convertPgWchar(chars[j], &c))
continue; /* ok to ignore it altogether */
if (ISWORDCHR(c.bytes))
colorInfo->wordChars[colorInfo->wordCharsCount++] = c;
else
colorInfo->containsNonWord = true;
}
pfree(chars);
}
}
| static TrgmState* getState | ( | TrgmNFA * | trgmNFA, | |
| TrgmStateKey * | key | |||
| ) | [static] |
Definition at line 1347 of file trgm_regexp.c.
References TrgmState::arcs, TrgmState::children, TrgmState::enterKeys, TrgmState::fin, HASH_ENTER, hash_search(), TrgmState::init, lappend(), TrgmState::number, TrgmState::parent, TrgmNFA::queue, and TrgmNFA::states.
{
TrgmState *state;
bool found;
state = (TrgmState *) hash_search(trgmNFA->states, key, HASH_ENTER,
&found);
if (!found)
{
/* New state: initialize and queue it */
state->arcs = NIL;
state->enterKeys = NIL;
state->init = false;
state->fin = false;
state->parent = NULL;
state->children = NIL;
state->number = -1;
trgmNFA->queue = lappend(trgmNFA->queue, state);
}
return state;
}
Definition at line 1710 of file trgm_regexp.c.
References Assert, TrgmState::children, TrgmState::fin, TrgmState::init, lappend(), lfirst, list_concat(), and TrgmState::parent.
Referenced by selectColorTrigrams().
{
ListCell *cell;
Assert(state1 != state2);
Assert(!state1->parent);
Assert(!state2->parent);
/* state1 absorbs state2's init/fin flags */
state1->init |= state2->init;
state1->fin |= state2->fin;
/* state2, and all its children, become children of state1 */
foreach(cell, state2->children)
{
TrgmState *state = (TrgmState *) lfirst(cell);
state->parent = state1;
}
state2->parent = state1;
state1->children = list_concat(state1->children, state2->children);
state1->children = lappend(state1->children, state2);
state2->children = NIL;
}
| static int packArcInfoCmp | ( | const void * | a1, | |
| const void * | a2 | |||
| ) | [static] |
Definition at line 1932 of file trgm_regexp.c.
References TrgmPackArcInfo::colorTrgm, TrgmPackArcInfo::sourceState, and TrgmPackArcInfo::targetState.
Referenced by packGraph().
{
const TrgmPackArcInfo *p1 = (const TrgmPackArcInfo *) a1;
const TrgmPackArcInfo *p2 = (const TrgmPackArcInfo *) a2;
if (p1->sourceState < p2->sourceState)
return -1;
if (p1->sourceState > p2->sourceState)
return 1;
if (p1->colorTrgm < p2->colorTrgm)
return -1;
if (p1->colorTrgm > p2->colorTrgm)
return 1;
if (p1->targetState < p2->targetState)
return -1;
if (p1->targetState > p2->targetState)
return 1;
return 0;
}
| static TrgmPackedGraph * packGraph | ( | TrgmNFA * | trgmNFA, | |
| MemoryContext | rcontext | |||
| ) | [static] |
Definition at line 1777 of file trgm_regexp.c.
References TrgmPackedState::arcs, TrgmState::arcs, TrgmPackedState::arcsCount, TrgmNFA::arcsCount, Assert, TrgmPackedArc::colorTrgm, TrgmPackArcInfo::colorTrgm, TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, ColorTrgmInfo::count, TrgmArc::ctrgm, ColorTrgmInfo::expanded, TrgmState::fin, hash_seq_init(), hash_seq_search(), i, TrgmState::init, lfirst, MemoryContextAlloc(), NULL, ColorTrgmInfo::number, TrgmState::number, packArcInfoCmp(), palloc(), TrgmState::parent, qsort, TrgmPackArcInfo::sourceState, TrgmPackedGraph::states, TrgmNFA::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, TrgmPackedGraph::statesQueue, TrgmArc::target, TrgmPackedArc::targetState, and TrgmPackArcInfo::targetState.
Referenced by createTrgmNFAInternal().
{
int number = 2,
arcIndex,
arcsCount;
HASH_SEQ_STATUS scan_status;
TrgmState *state;
TrgmPackArcInfo *arcs,
*p1,
*p2;
TrgmPackedArc *packedArcs;
TrgmPackedGraph *result;
int i,
j;
/* Enumerate surviving states, giving init and fin reserved numbers */
hash_seq_init(&scan_status, trgmNFA->states);
while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
{
while (state->parent)
state = state->parent;
if (state->number < 0)
{
if (state->init)
state->number = 0;
else if (state->fin)
state->number = 1;
else
{
state->number = number;
number++;
}
}
}
/* Collect array of all arcs */
arcs = (TrgmPackArcInfo *)
palloc(sizeof(TrgmPackArcInfo) * trgmNFA->arcsCount);
arcIndex = 0;
hash_seq_init(&scan_status, trgmNFA->states);
while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
{
TrgmState *source = state;
ListCell *cell;
while (source->parent)
source = source->parent;
foreach(cell, state->arcs)
{
TrgmArc *arc = (TrgmArc *) lfirst(cell);
TrgmState *target = arc->target;
while (target->parent)
target = target->parent;
if (source->number != target->number)
{
ColorTrgmInfo *ctrgm;
ctrgm = (ColorTrgmInfo *) bsearch(&arc->ctrgm,
trgmNFA->colorTrgms,
trgmNFA->colorTrgmsCount,
sizeof(ColorTrgmInfo),
colorTrgmInfoCmp);
Assert(ctrgm != NULL);
Assert(ctrgm->expanded);
arcs[arcIndex].sourceState = source->number;
arcs[arcIndex].targetState = target->number;
arcs[arcIndex].colorTrgm = ctrgm->number;
arcIndex++;
}
}
}
/* Sort arcs to ease duplicate detection */
qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp);
/* We could have duplicates because states were merged. Remove them. */
/* p1 is probe point, p2 is last known non-duplicate. */
p2 = arcs;
for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
{
if (packArcInfoCmp(p1, p2) > 0)
{
p2++;
*p2 = *p1;
}
}
arcsCount = (p2 - arcs) + 1;
/* Create packed representation */
result = (TrgmPackedGraph *)
MemoryContextAlloc(rcontext, sizeof(TrgmPackedGraph));
/* Pack color trigrams information */
result->colorTrigramsCount = 0;
for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
{
if (trgmNFA->colorTrgms[i].expanded)
result->colorTrigramsCount++;
}
result->colorTrigramGroups = (int *)
MemoryContextAlloc(rcontext, sizeof(int) * result->colorTrigramsCount);
j = 0;
for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
{
if (trgmNFA->colorTrgms[i].expanded)
{
result->colorTrigramGroups[j] = trgmNFA->colorTrgms[i].count;
j++;
}
}
/* Pack states and arcs information */
result->statesCount = number;
result->states = (TrgmPackedState *)
MemoryContextAlloc(rcontext, number * sizeof(TrgmPackedState));
packedArcs = (TrgmPackedArc *)
MemoryContextAlloc(rcontext, arcsCount * sizeof(TrgmPackedArc));
j = 0;
for (i = 0; i < number; i++)
{
int cnt = 0;
result->states[i].arcs = &packedArcs[j];
while (j < arcsCount && arcs[j].sourceState == i)
{
packedArcs[j].targetState = arcs[j].targetState;
packedArcs[j].colorTrgm = arcs[j].colorTrgm;
cnt++;
j++;
}
result->states[i].arcsCount = cnt;
}
/* Allocate working memory for trigramsMatchGraph() */
result->colorTrigramsActive = (bool *)
MemoryContextAlloc(rcontext, sizeof(bool) * result->colorTrigramsCount);
result->statesActive = (bool *)
MemoryContextAlloc(rcontext, sizeof(bool) * result->statesCount);
result->statesQueue = (int *)
MemoryContextAlloc(rcontext, sizeof(int) * result->statesCount);
return result;
}
| static bool prefixContains | ( | TrgmPrefix * | prefix1, | |
| TrgmPrefix * | prefix2 | |||
| ) | [static] |
Definition at line 1377 of file trgm_regexp.c.
References COLOR_UNKNOWN, and TrgmPrefix::colors.
Referenced by addArc(), and addKey().
{
if (prefix1->colors[1] == COLOR_UNKNOWN)
{
/* Fully ambiguous prefix contains everything */
return true;
}
else if (prefix1->colors[0] == COLOR_UNKNOWN)
{
/*
* Prefix with only first unknown color contains every prefix with
* same second color.
*/
if (prefix1->colors[1] == prefix2->colors[1])
return true;
else
return false;
}
else
{
/* Exact prefix contains only the exact same prefix */
if (prefix1->colors[0] == prefix2->colors[0] &&
prefix1->colors[1] == prefix2->colors[1])
return true;
else
return false;
}
}
Definition at line 932 of file trgm_regexp.c.
References addArcs(), addKey(), TrgmState::fin, TrgmNFA::keysQueue, linitial, list_delete_first(), NIL, and TrgmState::stateKey.
Referenced by transformGraph().
{
/* keysQueue should be NIL already, but make sure */
trgmNFA->keysQueue = NIL;
/*
* Add state's own key, and then process all keys added to keysQueue until
* queue is empty. But we can quit if the state gets marked final.
*/
addKey(trgmNFA, state, &state->stateKey);
while (trgmNFA->keysQueue != NIL && !state->fin)
{
TrgmStateKey *key = (TrgmStateKey *) linitial(trgmNFA->keysQueue);
trgmNFA->keysQueue = list_delete_first(trgmNFA->keysQueue);
addKey(trgmNFA, state, key);
}
/*
* Add outgoing arcs only if state isn't final (we have no interest in
* outgoing arcs if we already match)
*/
if (!state->fin)
addArcs(trgmNFA, state);
}
Definition at line 697 of file trgm_regexp.c.
References ereport, errcode(), errmsg(), ERROR, palloc(), pfree(), pg_mb2wchar_with_len(), pg_regcomp(), pg_regerror(), REG_OKAY, VARDATA_ANY, and VARSIZE_ANY_EXHDR.
Referenced by createTrgmNFA().
{
int text_re_len = VARSIZE_ANY_EXHDR(text_re);
char *text_re_val = VARDATA_ANY(text_re);
pg_wchar *pattern;
int pattern_len;
int regcomp_result;
char errMsg[100];
/* Convert pattern string to wide characters */
pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
pattern_len = pg_mb2wchar_with_len(text_re_val,
pattern,
text_re_len);
/* Compile regex */
regcomp_result = pg_regcomp(regex,
pattern,
pattern_len,
cflags,
collation);
pfree(pattern);
if (regcomp_result != REG_OKAY)
{
/* re didn't compile (no need for pg_regfree, if so) */
pg_regerror(regcomp_result, regex, errMsg, sizeof(errMsg));
ereport(ERROR,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("invalid regular expression: %s", errMsg)));
}
}
Definition at line 1419 of file trgm_regexp.c.
References ColorTrgmInfo::arcs, TrgmState::arcs, TrgmNFA::arcsCount, Assert, COLOR_BLANK, TrgmNFA::colorInfo, ColorTrgm::colors, colorTrgmInfoCmp(), colorTrgmInfoCountCmp(), TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, ColorTrgmInfo::count, TrgmArc::ctrgm, ColorTrgmInfo::ctrgm, ColorTrgmInfo::expanded, TrgmState::fin, hash_seq_init(), hash_seq_search(), i, TrgmState::init, lfirst, list_concat(), list_make1, MAX_TRGM_COUNT, mergeStates(), NULL, ColorTrgmInfo::number, palloc(), TrgmState::parent, qsort, TrgmArcInfo::source, TrgmNFA::states, TrgmArc::target, TrgmArcInfo::target, TrgmNFA::totalTrgmCount, and TrgmColorInfo::wordCharsCount.
Referenced by createTrgmNFAInternal().
{
HASH_SEQ_STATUS scan_status;
int arcsCount = trgmNFA->arcsCount,
i;
TrgmState *state;
ColorTrgmInfo *colorTrgms;
int64 totalTrgmCount;
int number;
/* Collect color trigrams from all arcs */
colorTrgms = (ColorTrgmInfo *) palloc(sizeof(ColorTrgmInfo) * arcsCount);
trgmNFA->colorTrgms = colorTrgms;
i = 0;
hash_seq_init(&scan_status, trgmNFA->states);
while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
{
ListCell *cell;
foreach(cell, state->arcs)
{
TrgmArc *arc = (TrgmArc *) lfirst(cell);
TrgmArcInfo *arcInfo = (TrgmArcInfo *) palloc(sizeof(TrgmArcInfo));
arcInfo->source = state;
arcInfo->target = arc->target;
colorTrgms[i].arcs = list_make1(arcInfo);
colorTrgms[i].expanded = true;
colorTrgms[i].ctrgm = arc->ctrgm;
i++;
}
}
Assert(i == arcsCount);
/* Remove duplicates, merging their arcs lists */
if (arcsCount >= 2)
{
ColorTrgmInfo *p1,
*p2;
/* Sort trigrams to ease duplicate detection */
qsort(colorTrgms, arcsCount, sizeof(ColorTrgmInfo), colorTrgmInfoCmp);
/* p1 is probe point, p2 is last known non-duplicate. */
p2 = colorTrgms;
for (p1 = colorTrgms + 1; p1 < colorTrgms + arcsCount; p1++)
{
if (colorTrgmInfoCmp(p1, p2) > 0)
{
p2++;
*p2 = *p1;
}
else
{
p2->arcs = list_concat(p2->arcs, p1->arcs);
}
}
trgmNFA->colorTrgmsCount = (p2 - colorTrgms) + 1;
}
else
{
trgmNFA->colorTrgmsCount = arcsCount;
}
/*
* Count number of simple trigrams generated by each color trigram.
*
* Note: per-color-trigram counts cannot overflow an int so long as
* COLOR_COUNT_LIMIT is not more than the cube root of INT_MAX, ie about
* 1290. However, the grand total totalTrgmCount might conceivably
* overflow an int, so we use int64 for that within this routine.
*/
totalTrgmCount = 0;
for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
{
ColorTrgmInfo *trgmInfo = &colorTrgms[i];
int j,
count = 1;
for (j = 0; j < 3; j++)
{
TrgmColor c = trgmInfo->ctrgm.colors[j];
if (c != COLOR_BLANK)
count *= trgmNFA->colorInfo[c].wordCharsCount;
}
trgmInfo->count = count;
totalTrgmCount += count;
}
/* Sort color trigrams in descending order of simple trigram counts */
qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
colorTrgmInfoCountCmp);
/*
* Remove color trigrams from the graph so long as total number of simple
* trigrams exceeds MAX_TRGM_COUNT. We prefer to remove color trigrams
* with the most associated simple trigrams, since those are the most
* promising for reducing the total number of simple trigrams. When
* removing a color trigram we have to merge states connected by arcs
* labeled with that trigram. It's necessary to not merge initial and
* final states, because our graph becomes useless if that happens; so we
* cannot always remove the trigram we'd prefer to.
*/
for (i = 0;
(i < trgmNFA->colorTrgmsCount) && (totalTrgmCount > MAX_TRGM_COUNT);
i++)
{
ColorTrgmInfo *trgmInfo = &colorTrgms[i];
bool canRemove = true;
ListCell *cell;
/*
* Does any arc of this color trigram connect initial and final
* states? If so we can't remove it.
*/
foreach(cell, trgmInfo->arcs)
{
TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
TrgmState *source = arcInfo->source,
*target = arcInfo->target;
/* examine parent states, if any merging has already happened */
while (source->parent)
source = source->parent;
while (target->parent)
target = target->parent;
if ((source->init || target->init) &&
(source->fin || target->fin))
{
canRemove = false;
break;
}
}
if (!canRemove)
continue;
/* OK, merge states linked by each arc labeled by the trigram */
foreach(cell, trgmInfo->arcs)
{
TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
TrgmState *source = arcInfo->source,
*target = arcInfo->target;
while (source->parent)
source = source->parent;
while (target->parent)
target = target->parent;
if (source != target)
mergeStates(source, target);
}
/* Mark trigram unexpanded, and update totalTrgmCount */
trgmInfo->expanded = false;
totalTrgmCount -= trgmInfo->count;
}
/* Did we succeed in fitting into MAX_TRGM_COUNT? */
if (totalTrgmCount > MAX_TRGM_COUNT)
return false;
trgmNFA->totalTrgmCount = (int) totalTrgmCount;
/*
* Sort color trigrams by colors (will be useful for bsearch in packGraph)
* and enumerate the color trigrams that are expanded.
*/
number = 0;
qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
colorTrgmInfoCmp);
for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
{
if (colorTrgms[i].expanded)
{
colorTrgms[i].number = number;
number++;
}
}
return true;
}
| static void transformGraph | ( | TrgmNFA * | trgmNFA | ) | [static] |
Definition at line 870 of file trgm_regexp.c.
References TrgmNFA::arcsCount, TrgmPrefix::colors, CurrentMemoryContext, HASHCTL::entrysize, TrgmState::fin, HASHCTL::hash, HASH_CONTEXT, hash_create(), HASH_ELEM, HASH_FUNCTION, hash_get_num_entries(), HASHCTL::hcxt, TrgmState::init, TrgmNFA::initState, HASHCTL::keysize, TrgmNFA::keysQueue, linitial, list_delete_first(), MAX_EXPANDED_ARCS, MAX_EXPANDED_STATES, MemSet, NIL, TrgmStateKey::nstate, TrgmNFA::overflowed, pg_reg_getinitialstate(), TrgmStateKey::prefix, processState(), TrgmNFA::queue, TrgmNFA::regex, and TrgmNFA::states.
Referenced by createTrgmNFAInternal().
{
HASHCTL hashCtl;
TrgmStateKey initkey;
TrgmState *initstate;
/* Initialize this stage's workspace in trgmNFA struct */
trgmNFA->queue = NIL;
trgmNFA->keysQueue = NIL;
trgmNFA->arcsCount = 0;
trgmNFA->overflowed = false;
/* Create hashtable for states */
hashCtl.keysize = sizeof(TrgmStateKey);
hashCtl.entrysize = sizeof(TrgmState);
hashCtl.hcxt = CurrentMemoryContext;
hashCtl.hash = tag_hash;
trgmNFA->states = hash_create("Trigram NFA",
1024,
&hashCtl,
HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION);
/* Create initial state: ambiguous prefix, NFA's initial state */
MemSet(&initkey, 0, sizeof(initkey));
initkey.prefix.colors[0] = COLOR_UNKNOWN;
initkey.prefix.colors[1] = COLOR_UNKNOWN;
initkey.nstate = pg_reg_getinitialstate(trgmNFA->regex);
initstate = getState(trgmNFA, &initkey);
initstate->init = true;
trgmNFA->initState = initstate;
/*
* Recursively build the expanded graph by processing queue of states
* (breadth-first search). getState already put initstate in the queue.
*/
while (trgmNFA->queue != NIL)
{
TrgmState *state = (TrgmState *) linitial(trgmNFA->queue);
trgmNFA->queue = list_delete_first(trgmNFA->queue);
/*
* If we overflowed then just mark state as final. Otherwise do
* actual processing.
*/
if (trgmNFA->overflowed)
state->fin = true;
else
processState(trgmNFA, state);
/* Did we overflow? */
if (trgmNFA->arcsCount > MAX_EXPANDED_ARCS ||
hash_get_num_entries(trgmNFA->states) > MAX_EXPANDED_STATES)
trgmNFA->overflowed = true;
}
}
| bool trigramsMatchGraph | ( | TrgmPackedGraph * | graph, | |
| bool * | check | |||
| ) |
Definition at line 604 of file trgm_regexp.c.
References TrgmPackedState::arcs, TrgmPackedState::arcsCount, TrgmPackedArc::colorTrgm, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, i, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, TrgmPackedGraph::statesQueue, and TrgmPackedArc::targetState.
Referenced by gin_trgm_consistent(), and gtrgm_consistent().
{
int i,
j,
k,
queueIn,
queueOut;
/*
* Reset temporary working areas.
*/
memset(graph->colorTrigramsActive, 0,
sizeof(bool) * graph->colorTrigramsCount);
memset(graph->statesActive, 0, sizeof(bool) * graph->statesCount);
/*
* Check which color trigrams were matched. A match for any simple
* trigram associated with a color trigram counts as a match of the color
* trigram.
*/
j = 0;
for (i = 0; i < graph->colorTrigramsCount; i++)
{
int cnt = graph->colorTrigramGroups[i];
for (k = j; k < j + cnt; k++)
{
if (check[k])
{
/*
* Found one matched trigram in the group. Can skip the rest
* of them and go to the next group.
*/
graph->colorTrigramsActive[i] = true;
break;
}
}
j = j + cnt;
}
/*
* Initialize the statesQueue to hold just the initial state. Note:
* statesQueue has room for statesCount entries, which is certainly enough
* since no state will be put in the queue more than once. The
* statesActive array marks which states have been queued.
*/
graph->statesActive[0] = true;
graph->statesQueue[0] = 0;
queueIn = 0;
queueOut = 1;
/* Process queued states as long as there are any. */
while (queueIn < queueOut)
{
int stateno = graph->statesQueue[queueIn++];
TrgmPackedState *state = &graph->states[stateno];
int cnt = state->arcsCount;
/* Loop over state's out-arcs */
for (i = 0; i < cnt; i++)
{
TrgmPackedArc *arc = &state->arcs[i];
/*
* If corresponding color trigram is present then activate the
* corresponding state. We're done if that's the final state,
* otherwise queue the state if it's not been queued already.
*/
if (graph->colorTrigramsActive[arc->colorTrgm])
{
int nextstate = arc->targetState;
if (nextstate == 1)
return true; /* success: final state is reachable */
if (!graph->statesActive[nextstate])
{
graph->statesActive[nextstate] = true;
graph->statesQueue[queueOut++] = nextstate;
}
}
}
}
/* Queue is empty, so match fails. */
return false;
}
| static bool validArcLabel | ( | TrgmStateKey * | key, | |
| TrgmColor | co | |||
| ) | [static] |
Definition at line 1292 of file trgm_regexp.c.
References Assert, COLOR_BLANK, COLOR_UNKNOWN, TrgmPrefix::colors, and TrgmStateKey::prefix.
Referenced by addArc(), and addKey().
{
/*
* We have to know full trigram in order to add outgoing arc. So we can't
* do it if prefix is ambiguous.
*/
if (key->prefix.colors[0] == COLOR_UNKNOWN)
return false;
/* If key->prefix.colors[0] isn't unknown, its second color isn't either */
Assert(key->prefix.colors[1] != COLOR_UNKNOWN);
/* And we should not be called with an unknown arc color anytime */
Assert(co != COLOR_UNKNOWN);
/*
* We don't bother with making arcs representing three non-word
* characters, since that's useless for trigram extraction.
*/
if (key->prefix.colors[0] == COLOR_BLANK &&
key->prefix.colors[1] == COLOR_BLANK &&
co == COLOR_BLANK)
return false;
/*
* We also reject nonblank-blank-anything. The nonblank-blank-nonblank
* case doesn't correspond to any trigram the trigram extraction code
* would make. The nonblank-blank-blank case is also not possible with
* RPADDING = 1. (Note that in many cases we'd fail to generate such a
* trigram even if it were valid, for example processing "foo bar" will
* not result in considering the trigram "o ". So if you want to support
* RPADDING = 2, there's more to do than just twiddle this test.)
*/
if (key->prefix.colors[0] != COLOR_BLANK &&
key->prefix.colors[1] == COLOR_BLANK)
return false;
/*
* Other combinations involving blank are valid, in particular we assume
* blank-blank-nonblank is valid, which presumes that LPADDING is 2.
*
* Note: Using again the example "foo bar", we will not consider the
* trigram " b", though this trigram would be found by the trigram
* extraction code. Since we will find " ba", it doesn't seem worth
* trying to hack the algorithm to generate the additional trigram.
*/
/* arc label is valid */
return true;
}
1.7.1