#include "access/gist.h"#include "access/itup.h"#include "storage/bufpage.h"

Go to the source code of this file.
Data Structures | |
| struct | TRGM |
Defines | |
| #define | LPADDING 2 |
| #define | RPADDING 1 |
| #define | KEEPONLYALNUM |
| #define | IGNORECASE |
| #define | DIVUNION |
| #define | SimilarityStrategyNumber 1 |
| #define | DistanceStrategyNumber 2 |
| #define | LikeStrategyNumber 3 |
| #define | ILikeStrategyNumber 4 |
| #define | RegExpStrategyNumber 5 |
| #define | RegExpICaseStrategyNumber 6 |
| #define | CMPCHAR(a, b) ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) ) |
| #define | CMPPCHAR(a, b, i) CMPCHAR( *(((const char*)(a))+i), *(((const char*)(b))+i) ) |
| #define | CMPTRGM(a, b) ( CMPPCHAR(a,b,0) ? CMPPCHAR(a,b,0) : ( CMPPCHAR(a,b,1) ? CMPPCHAR(a,b,1) : CMPPCHAR(a,b,2) ) ) |
| #define | CPTRGM(a, b) |
| #define | ISWORDCHR(c) (t_isalpha(c) || t_isdigit(c)) |
| #define | ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') ) |
| #define | ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) ) |
| #define | ISESCAPECHAR(x) (*(x) == '\\') |
| #define | ISWILDCARDCHAR(x) (*(x) == '_' || *(x) == '%') |
| #define | TRGMHDRSIZE (VARHDRSZ + sizeof(uint8)) |
| #define | BITBYTE 8 |
| #define | SIGLENINT 3 |
| #define | SIGLEN ( sizeof(int)*SIGLENINT ) |
| #define | SIGLENBIT (SIGLEN*BITBYTE - 1) |
| #define | LOOPBYTE for(i=0;i<SIGLEN;i++) |
| #define | GETBYTE(x, i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) ) |
| #define | GETBITBYTE(x, i) ( (((char)(x)) >> (i)) & 0x01 ) |
| #define | CLRBIT(x, i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) ) |
| #define | SETBIT(x, i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) ) |
| #define | GETBIT(x, i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 ) |
| #define | HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT) |
| #define | HASH(sign, val) SETBIT((sign), HASHVAL(val)) |
| #define | ARRKEY 0x01 |
| #define | SIGNKEY 0x02 |
| #define | ALLISTRUE 0x04 |
| #define | ISARRKEY(x) ( ((TRGM*)x)->flag & ARRKEY ) |
| #define | ISSIGNKEY(x) ( ((TRGM*)x)->flag & SIGNKEY ) |
| #define | ISALLTRUE(x) ( ((TRGM*)x)->flag & ALLISTRUE ) |
| #define | CALCGTSIZE(flag, len) ( TRGMHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(trgm)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) ) |
| #define | GETSIGN(x) ( (BITVECP)( (char*)x+TRGMHDRSIZE ) ) |
| #define | GETARR(x) ( (trgm*)( (char*)x+TRGMHDRSIZE ) ) |
| #define | ARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) ) |
Typedefs | |
| typedef char | trgm [3] |
| typedef char | BITVEC [SIGLEN] |
| typedef char * | BITVECP |
| typedef struct TrgmPackedGraph | TrgmPackedGraph |
Functions | |
| uint32 | trgm2int (trgm *ptr) |
| void | compact_trigram (trgm *tptr, char *str, int bytelen) |
| TRGM * | generate_trgm (char *str, int slen) |
| TRGM * | generate_wildcard_trgm (const char *str, int slen) |
| float4 | cnt_sml (TRGM *trg1, TRGM *trg2) |
| bool | trgm_contained_by (TRGM *trg1, TRGM *trg2) |
| bool * | trgm_presence_map (TRGM *query, TRGM *key) |
| TRGM * | createTrgmNFA (text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext) |
| bool | trigramsMatchGraph (TrgmPackedGraph *graph, bool *check) |
Variables | |
| float4 | trgm_limit |
| #define ARRKEY 0x01 |
Definition at line 93 of file trgm.h.
Referenced by expandColorTrigrams(), generate_trgm(), and generate_wildcard_trgm().
| #define ARRNELEM | ( | x | ) | ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) ) |
Definition at line 104 of file trgm.h.
Referenced by cnt_sml(), cnt_sml_sign_common(), gin_extract_query_trgm(), gin_extract_value_trgm(), gtrgm_consistent(), gtrgm_distance(), gtrgm_same(), makesign(), show_trgm(), trgm_contained_by(), trgm_presence_map(), and unionkey().
| #define CALCGTSIZE | ( | flag, | ||
| len | ||||
| ) | ( TRGMHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(trgm)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) ) |
| #define CMPCHAR | ( | a, | ||
| b | ||||
| ) | ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) ) |
| #define CMPTRGM | ( | a, | ||
| b | ||||
| ) | ( CMPPCHAR(a,b,0) ? CMPPCHAR(a,b,0) : ( CMPPCHAR(a,b,1) ? CMPPCHAR(a,b,1) : CMPPCHAR(a,b,2) ) ) |
Definition at line 41 of file trgm.h.
Referenced by cnt_sml(), comp_trgm(), gtrgm_same(), trgm_contained_by(), trgm_presence_map(), and unique_array().
| #define CPTRGM | ( | a, | ||
| b | ||||
| ) |
do { \ *(((char*)(a))+0) = *(((char*)(b))+0); \ *(((char*)(a))+1) = *(((char*)(b))+1); \ *(((char*)(a))+2) = *(((char*)(b))+2); \ } while(0);
Definition at line 43 of file trgm.h.
Referenced by cnt_sml_sign_common(), compact_trigram(), gtrgm_consistent(), make_trigrams(), makesign(), show_trgm(), unionkey(), and unique_array().
| #define DistanceStrategyNumber 2 |
Definition at line 30 of file trgm.h.
Referenced by gtrgm_distance().
| #define GETARR | ( | x | ) | ( (trgm*)( (char*)x+TRGMHDRSIZE ) ) |
Definition at line 103 of file trgm.h.
Referenced by cnt_sml(), cnt_sml_sign_common(), expandColorTrigrams(), generate_trgm(), generate_wildcard_trgm(), gin_extract_query_trgm(), gin_extract_value_trgm(), gtrgm_consistent(), gtrgm_same(), makesign(), show_trgm(), trgm_contained_by(), trgm_presence_map(), and unionkey().
| #define ILikeStrategyNumber 4 |
Definition at line 32 of file trgm.h.
Referenced by gin_extract_query_trgm(), gin_trgm_consistent(), and gtrgm_consistent().
| #define ISARRKEY | ( | x | ) | ( ((TRGM*)x)->flag & ARRKEY ) |
Definition at line 97 of file trgm.h.
Referenced by fillcache(), and gtrgm_penalty().
| #define ISESCAPECHAR | ( | x | ) | (*(x) == '\\') |
Definition at line 58 of file trgm.h.
Referenced by get_wildcard_part().
| #define ISPRINTABLECHAR | ( | a | ) | ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') ) |
| #define ISPRINTABLETRGM | ( | t | ) | ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) ) |
Definition at line 56 of file trgm.h.
Referenced by show_trgm().
| #define ISSIGNKEY | ( | x | ) | ( ((TRGM*)x)->flag & SIGNKEY ) |
Definition at line 98 of file trgm.h.
Referenced by gtrgm_compress(), gtrgm_same(), and unionkey().
| #define ISWILDCARDCHAR | ( | x | ) | (*(x) == '_' || *(x) == '%') |
Definition at line 59 of file trgm.h.
Referenced by get_wildcard_part().
Definition at line 50 of file trgm.h.
Referenced by find_word(), get_wildcard_part(), and getColorInfo().
| #define LikeStrategyNumber 3 |
Definition at line 31 of file trgm.h.
Referenced by gin_extract_query_trgm(), gin_trgm_consistent(), and gtrgm_consistent().
| #define LPADDING 2 |
Definition at line 15 of file trgm.h.
Referenced by generate_trgm(), generate_wildcard_trgm(), and get_wildcard_part().
| #define RegExpICaseStrategyNumber 6 |
Definition at line 34 of file trgm.h.
Referenced by gin_extract_query_trgm(), gin_trgm_consistent(), and gtrgm_consistent().
| #define RegExpStrategyNumber 5 |
Definition at line 33 of file trgm.h.
Referenced by gin_extract_query_trgm(), gin_trgm_consistent(), and gtrgm_consistent().
| #define RPADDING 1 |
Definition at line 16 of file trgm.h.
Referenced by generate_trgm(), generate_wildcard_trgm(), and get_wildcard_part().
| #define SIGNKEY 0x02 |
Definition at line 94 of file trgm.h.
Referenced by gtrgm_compress(), and gtrgm_picksplit().
| #define SimilarityStrategyNumber 1 |
Definition at line 29 of file trgm.h.
Referenced by gin_extract_query_trgm(), gin_trgm_consistent(), and gtrgm_consistent().
| #define TRGMHDRSIZE (VARHDRSZ + sizeof(uint8)) |
Definition at line 69 of file trgm.h.
Referenced by expandColorTrigrams(), generate_trgm(), and generate_wildcard_trgm().
| typedef struct TrgmPackedGraph TrgmPackedGraph |
Definition at line 539 of file trgm_op.c.
References ARRNELEM, CMPTRGM, and GETARR.
Referenced by gtrgm_consistent(), gtrgm_distance(), and similarity().
{
trgm *ptr1,
*ptr2;
int count = 0;
int len1,
len2;
ptr1 = GETARR(trg1);
ptr2 = GETARR(trg2);
len1 = ARRNELEM(trg1);
len2 = ARRNELEM(trg2);
/* explicit test is needed to avoid 0/0 division when both lengths are 0 */
if (len1 <= 0 || len2 <= 0)
return (float4) 0.0;
while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
{
int res = CMPTRGM(ptr1, ptr2);
if (res < 0)
ptr1++;
else if (res > 0)
ptr2++;
else
{
ptr1++;
ptr2++;
count++;
}
}
#ifdef DIVUNION
return ((float4) count) / ((float4) (len1 + len2 - count));
#else
return ((float4) count) / ((float4) ((len1 > len2) ? len1 : len2));
#endif
}
| void compact_trigram | ( | trgm * | tptr, | |
| char * | str, | |||
| int | bytelen | |||
| ) |
Definition at line 112 of file trgm_op.c.
References COMP_CRC32, CPTRGM, FIN_CRC32, and INIT_CRC32.
Referenced by fillTrgm(), and make_trigrams().
{
if (bytelen == 3)
{
CPTRGM(tptr, str);
}
else
{
pg_crc32 crc;
INIT_CRC32(crc);
COMP_CRC32(crc, str, bytelen);
FIN_CRC32(crc);
/*
* use only 3 upper bytes from crc, hope, it's good enough hashing
*/
CPTRGM(tptr, &crc);
}
}
| TRGM* createTrgmNFA | ( | text * | text_re, | |
| Oid | collation, | |||
| TrgmPackedGraph ** | graph, | |||
| MemoryContext | rcontext | |||
| ) |
Definition at line 484 of file trgm_regexp.c.
References ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE, ALLOCSET_DEFAULT_MINSIZE, AllocSetContextCreate(), createTrgmNFAInternal(), CurrentMemoryContext, MemoryContextDelete(), MemoryContextSwitchTo(), PG_CATCH, PG_END_TRY, PG_RE_THROW, pg_regfree(), PG_TRY, RE_compile(), REG_ADVANCED, and REG_ICASE.
Referenced by gin_extract_query_trgm(), and gtrgm_consistent().
{
TRGM *trg;
regex_t regex;
MemoryContext tmpcontext;
MemoryContext oldcontext;
/*
* This processing generates a great deal of cruft, which we'd like to
* clean up before returning (since this function may be called in a
* query-lifespan memory context). Make a temp context we can work in so
* that cleanup is easy.
*/
tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
"createTrgmNFA temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
oldcontext = MemoryContextSwitchTo(tmpcontext);
/*
* Stage 1: Compile the regexp into a NFA, using the regexp library.
*/
#ifdef IGNORECASE
RE_compile(®ex, text_re, REG_ADVANCED | REG_ICASE, collation);
#else
RE_compile(®ex, text_re, REG_ADVANCED, collation);
#endif
/*
* Since the regexp library allocates its internal data structures with
* malloc, we need to use a PG_TRY block to ensure that pg_regfree() gets
* done even if there's an error.
*/
PG_TRY();
{
trg = createTrgmNFAInternal(®ex, graph, rcontext);
}
PG_CATCH();
{
pg_regfree(®ex);
PG_RE_THROW();
}
PG_END_TRY();
pg_regfree(®ex);
/* Clean up all the cruft we created */
MemoryContextSwitchTo(oldcontext);
MemoryContextDelete(tmpcontext);
return trg;
}
| TRGM* generate_trgm | ( | char * | str, | |
| int | slen | |||
| ) |
Definition at line 180 of file trgm_op.c.
References ARRKEY, buf, CALCGTSIZE, comp_trgm(), find_word(), TRGM::flag, GETARR, lowerstr_with_len(), LPADDING, make_trigrams(), palloc(), pfree(), qsort, RPADDING, SET_VARSIZE, TRGMHDRSIZE, and unique_array().
Referenced by gin_extract_query_trgm(), gin_extract_value_trgm(), gtrgm_compress(), gtrgm_consistent(), gtrgm_distance(), show_trgm(), and similarity().
{
TRGM *trg;
char *buf;
trgm *tptr;
int len,
charlen,
bytelen;
char *bword,
*eword;
trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3);
trg->flag = ARRKEY;
SET_VARSIZE(trg, TRGMHDRSIZE);
if (slen + LPADDING + RPADDING < 3 || slen == 0)
return trg;
tptr = GETARR(trg);
buf = palloc(sizeof(char) * (slen + 4));
if (LPADDING > 0)
{
*buf = ' ';
if (LPADDING > 1)
*(buf + 1) = ' ';
}
eword = str;
while ((bword = find_word(eword, slen - (eword - str), &eword, &charlen)) != NULL)
{
#ifdef IGNORECASE
bword = lowerstr_with_len(bword, eword - bword);
bytelen = strlen(bword);
#else
bytelen = eword - bword;
#endif
memcpy(buf + LPADDING, bword, bytelen);
#ifdef IGNORECASE
pfree(bword);
#endif
buf[LPADDING + bytelen] = ' ';
buf[LPADDING + bytelen + 1] = ' ';
/*
* count trigrams
*/
tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING,
charlen + LPADDING + RPADDING);
}
pfree(buf);
if ((len = tptr - GETARR(trg)) == 0)
return trg;
if (len > 0)
{
qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
len = unique_array(GETARR(trg), len);
}
SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
return trg;
}
| TRGM* generate_wildcard_trgm | ( | const char * | str, | |
| int | slen | |||
| ) |
Definition at line 411 of file trgm_op.c.
References ARRKEY, buf, CALCGTSIZE, comp_trgm(), TRGM::flag, get_wildcard_part(), GETARR, lowerstr_with_len(), LPADDING, make_trigrams(), NULL, palloc(), pfree(), qsort, RPADDING, SET_VARSIZE, TRGMHDRSIZE, and unique_array().
Referenced by gin_extract_query_trgm(), and gtrgm_consistent().
{
TRGM *trg;
char *buf,
*buf2;
trgm *tptr;
int len,
charlen,
bytelen;
const char *eword;
trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3);
trg->flag = ARRKEY;
SET_VARSIZE(trg, TRGMHDRSIZE);
if (slen + LPADDING + RPADDING < 3 || slen == 0)
return trg;
tptr = GETARR(trg);
buf = palloc(sizeof(char) * (slen + 4));
/*
* Extract trigrams from each substring extracted by get_wildcard_part.
*/
eword = str;
while ((eword = get_wildcard_part(eword, slen - (eword - str),
buf, &bytelen, &charlen)) != NULL)
{
#ifdef IGNORECASE
buf2 = lowerstr_with_len(buf, bytelen);
bytelen = strlen(buf2);
#else
buf2 = buf;
#endif
/*
* count trigrams
*/
tptr = make_trigrams(tptr, buf2, bytelen, charlen);
#ifdef IGNORECASE
pfree(buf2);
#endif
}
pfree(buf);
if ((len = tptr - GETARR(trg)) == 0)
return trg;
/*
* Make trigrams unique.
*/
if (len > 0)
{
qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
len = unique_array(GETARR(trg), len);
}
SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
return trg;
}
Definition at line 476 of file trgm_op.c.
References val.
Referenced by gin_extract_query_trgm(), gin_extract_value_trgm(), and show_trgm().
Definition at line 586 of file trgm_op.c.
References ARRNELEM, CMPTRGM, and GETARR.
Referenced by gtrgm_consistent().
{
trgm *ptr1,
*ptr2;
int len1,
len2;
ptr1 = GETARR(trg1);
ptr2 = GETARR(trg2);
len1 = ARRNELEM(trg1);
len2 = ARRNELEM(trg2);
while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
{
int res = CMPTRGM(ptr1, ptr2);
if (res < 0)
return false;
else if (res > 0)
ptr2++;
else
{
ptr1++;
ptr2++;
}
}
if (ptr1 - GETARR(trg1) < len1)
return false;
else
return true;
}
Definition at line 625 of file trgm_op.c.
References ARRNELEM, CMPTRGM, GETARR, i, and palloc0().
Referenced by gtrgm_consistent().
{
bool *result;
trgm *ptrq = GETARR(query),
*ptrk = GETARR(key);
int lenq = ARRNELEM(query),
lenk = ARRNELEM(key),
i;
result = (bool *) palloc0(lenq * sizeof(bool));
/* for each query trigram, do a binary search in the key array */
for (i = 0; i < lenq; i++)
{
int lo = 0;
int hi = lenk;
while (lo < hi)
{
int mid = (lo + hi) / 2;
int res = CMPTRGM(ptrq, ptrk + mid);
if (res < 0)
hi = mid;
else if (res > 0)
lo = mid + 1;
else
{
result[i] = true;
break;
}
}
ptrq++;
}
return result;
}
| bool trigramsMatchGraph | ( | TrgmPackedGraph * | graph, | |
| bool * | check | |||
| ) |
Definition at line 604 of file trgm_regexp.c.
References TrgmPackedState::arcs, TrgmPackedState::arcsCount, TrgmPackedArc::colorTrgm, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, i, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, TrgmPackedGraph::statesQueue, and TrgmPackedArc::targetState.
Referenced by gin_trgm_consistent(), and gtrgm_consistent().
{
int i,
j,
k,
queueIn,
queueOut;
/*
* Reset temporary working areas.
*/
memset(graph->colorTrigramsActive, 0,
sizeof(bool) * graph->colorTrigramsCount);
memset(graph->statesActive, 0, sizeof(bool) * graph->statesCount);
/*
* Check which color trigrams were matched. A match for any simple
* trigram associated with a color trigram counts as a match of the color
* trigram.
*/
j = 0;
for (i = 0; i < graph->colorTrigramsCount; i++)
{
int cnt = graph->colorTrigramGroups[i];
for (k = j; k < j + cnt; k++)
{
if (check[k])
{
/*
* Found one matched trigram in the group. Can skip the rest
* of them and go to the next group.
*/
graph->colorTrigramsActive[i] = true;
break;
}
}
j = j + cnt;
}
/*
* Initialize the statesQueue to hold just the initial state. Note:
* statesQueue has room for statesCount entries, which is certainly enough
* since no state will be put in the queue more than once. The
* statesActive array marks which states have been queued.
*/
graph->statesActive[0] = true;
graph->statesQueue[0] = 0;
queueIn = 0;
queueOut = 1;
/* Process queued states as long as there are any. */
while (queueIn < queueOut)
{
int stateno = graph->statesQueue[queueIn++];
TrgmPackedState *state = &graph->states[stateno];
int cnt = state->arcsCount;
/* Loop over state's out-arcs */
for (i = 0; i < cnt; i++)
{
TrgmPackedArc *arc = &state->arcs[i];
/*
* If corresponding color trigram is present then activate the
* corresponding state. We're done if that's the final state,
* otherwise queue the state if it's not been queued already.
*/
if (graph->colorTrigramsActive[arc->colorTrgm])
{
int nextstate = arc->targetState;
if (nextstate == 1)
return true; /* success: final state is reachable */
if (!graph->statesActive[nextstate])
{
graph->statesActive[nextstate] = true;
graph->statesQueue[queueOut++] = nextstate;
}
}
}
}
/* Queue is empty, so match fails. */
return false;
}
Definition at line 16 of file trgm_op.c.
Referenced by gin_trgm_consistent(), gtrgm_consistent(), set_limit(), show_limit(), and similarity_op().
1.7.1