#include "access/gist.h"
#include "access/itup.h"
#include "storage/bufpage.h"
Go to the source code of this file.
Data Structures | |
struct | TRGM |
Defines | |
#define | LPADDING 2 |
#define | RPADDING 1 |
#define | KEEPONLYALNUM |
#define | IGNORECASE |
#define | DIVUNION |
#define | SimilarityStrategyNumber 1 |
#define | DistanceStrategyNumber 2 |
#define | LikeStrategyNumber 3 |
#define | ILikeStrategyNumber 4 |
#define | RegExpStrategyNumber 5 |
#define | RegExpICaseStrategyNumber 6 |
#define | CMPCHAR(a, b) ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) ) |
#define | CMPPCHAR(a, b, i) CMPCHAR( *(((const char*)(a))+i), *(((const char*)(b))+i) ) |
#define | CMPTRGM(a, b) ( CMPPCHAR(a,b,0) ? CMPPCHAR(a,b,0) : ( CMPPCHAR(a,b,1) ? CMPPCHAR(a,b,1) : CMPPCHAR(a,b,2) ) ) |
#define | CPTRGM(a, b) |
#define | ISWORDCHR(c) (t_isalpha(c) || t_isdigit(c)) |
#define | ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') ) |
#define | ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) ) |
#define | ISESCAPECHAR(x) (*(x) == '\\') |
#define | ISWILDCARDCHAR(x) (*(x) == '_' || *(x) == '%') |
#define | TRGMHDRSIZE (VARHDRSZ + sizeof(uint8)) |
#define | BITBYTE 8 |
#define | SIGLENINT 3 |
#define | SIGLEN ( sizeof(int)*SIGLENINT ) |
#define | SIGLENBIT (SIGLEN*BITBYTE - 1) |
#define | LOOPBYTE for(i=0;i<SIGLEN;i++) |
#define | GETBYTE(x, i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) ) |
#define | GETBITBYTE(x, i) ( (((char)(x)) >> (i)) & 0x01 ) |
#define | CLRBIT(x, i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) ) |
#define | SETBIT(x, i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) ) |
#define | GETBIT(x, i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 ) |
#define | HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT) |
#define | HASH(sign, val) SETBIT((sign), HASHVAL(val)) |
#define | ARRKEY 0x01 |
#define | SIGNKEY 0x02 |
#define | ALLISTRUE 0x04 |
#define | ISARRKEY(x) ( ((TRGM*)x)->flag & ARRKEY ) |
#define | ISSIGNKEY(x) ( ((TRGM*)x)->flag & SIGNKEY ) |
#define | ISALLTRUE(x) ( ((TRGM*)x)->flag & ALLISTRUE ) |
#define | CALCGTSIZE(flag, len) ( TRGMHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(trgm)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) ) |
#define | GETSIGN(x) ( (BITVECP)( (char*)x+TRGMHDRSIZE ) ) |
#define | GETARR(x) ( (trgm*)( (char*)x+TRGMHDRSIZE ) ) |
#define | ARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) ) |
Typedefs | |
typedef char | trgm [3] |
typedef char | BITVEC [SIGLEN] |
typedef char * | BITVECP |
typedef struct TrgmPackedGraph | TrgmPackedGraph |
Functions | |
uint32 | trgm2int (trgm *ptr) |
void | compact_trigram (trgm *tptr, char *str, int bytelen) |
TRGM * | generate_trgm (char *str, int slen) |
TRGM * | generate_wildcard_trgm (const char *str, int slen) |
float4 | cnt_sml (TRGM *trg1, TRGM *trg2) |
bool | trgm_contained_by (TRGM *trg1, TRGM *trg2) |
bool * | trgm_presence_map (TRGM *query, TRGM *key) |
TRGM * | createTrgmNFA (text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext) |
bool | trigramsMatchGraph (TrgmPackedGraph *graph, bool *check) |
Variables | |
float4 | trgm_limit |
#define ARRKEY 0x01 |
Definition at line 93 of file trgm.h.
Referenced by expandColorTrigrams(), generate_trgm(), and generate_wildcard_trgm().
#define ARRNELEM | ( | x | ) | ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) ) |
Definition at line 104 of file trgm.h.
Referenced by cnt_sml(), cnt_sml_sign_common(), gin_extract_query_trgm(), gin_extract_value_trgm(), gtrgm_consistent(), gtrgm_distance(), gtrgm_same(), makesign(), show_trgm(), trgm_contained_by(), trgm_presence_map(), and unionkey().
#define CALCGTSIZE | ( | flag, | ||
len | ||||
) | ( TRGMHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(trgm)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) ) |
#define CMPCHAR | ( | a, | ||
b | ||||
) | ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) ) |
#define CMPTRGM | ( | a, | ||
b | ||||
) | ( CMPPCHAR(a,b,0) ? CMPPCHAR(a,b,0) : ( CMPPCHAR(a,b,1) ? CMPPCHAR(a,b,1) : CMPPCHAR(a,b,2) ) ) |
Definition at line 41 of file trgm.h.
Referenced by cnt_sml(), comp_trgm(), gtrgm_same(), trgm_contained_by(), trgm_presence_map(), and unique_array().
#define CPTRGM | ( | a, | ||
b | ||||
) |
do { \ *(((char*)(a))+0) = *(((char*)(b))+0); \ *(((char*)(a))+1) = *(((char*)(b))+1); \ *(((char*)(a))+2) = *(((char*)(b))+2); \ } while(0);
Definition at line 43 of file trgm.h.
Referenced by cnt_sml_sign_common(), compact_trigram(), gtrgm_consistent(), make_trigrams(), makesign(), show_trgm(), unionkey(), and unique_array().
#define DistanceStrategyNumber 2 |
Definition at line 30 of file trgm.h.
Referenced by gtrgm_distance().
#define GETARR | ( | x | ) | ( (trgm*)( (char*)x+TRGMHDRSIZE ) ) |
Definition at line 103 of file trgm.h.
Referenced by cnt_sml(), cnt_sml_sign_common(), expandColorTrigrams(), generate_trgm(), generate_wildcard_trgm(), gin_extract_query_trgm(), gin_extract_value_trgm(), gtrgm_consistent(), gtrgm_same(), makesign(), show_trgm(), trgm_contained_by(), trgm_presence_map(), and unionkey().
#define ILikeStrategyNumber 4 |
Definition at line 32 of file trgm.h.
Referenced by gin_extract_query_trgm(), gin_trgm_consistent(), and gtrgm_consistent().
#define ISARRKEY | ( | x | ) | ( ((TRGM*)x)->flag & ARRKEY ) |
Definition at line 97 of file trgm.h.
Referenced by fillcache(), and gtrgm_penalty().
#define ISESCAPECHAR | ( | x | ) | (*(x) == '\\') |
Definition at line 58 of file trgm.h.
Referenced by get_wildcard_part().
#define ISPRINTABLECHAR | ( | a | ) | ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') ) |
#define ISPRINTABLETRGM | ( | t | ) | ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) ) |
Definition at line 56 of file trgm.h.
Referenced by show_trgm().
#define ISSIGNKEY | ( | x | ) | ( ((TRGM*)x)->flag & SIGNKEY ) |
Definition at line 98 of file trgm.h.
Referenced by gtrgm_compress(), gtrgm_same(), and unionkey().
#define ISWILDCARDCHAR | ( | x | ) | (*(x) == '_' || *(x) == '%') |
Definition at line 59 of file trgm.h.
Referenced by get_wildcard_part().
Definition at line 50 of file trgm.h.
Referenced by find_word(), get_wildcard_part(), and getColorInfo().
#define LikeStrategyNumber 3 |
Definition at line 31 of file trgm.h.
Referenced by gin_extract_query_trgm(), gin_trgm_consistent(), and gtrgm_consistent().
#define LPADDING 2 |
Definition at line 15 of file trgm.h.
Referenced by generate_trgm(), generate_wildcard_trgm(), and get_wildcard_part().
#define RegExpICaseStrategyNumber 6 |
Definition at line 34 of file trgm.h.
Referenced by gin_extract_query_trgm(), gin_trgm_consistent(), and gtrgm_consistent().
#define RegExpStrategyNumber 5 |
Definition at line 33 of file trgm.h.
Referenced by gin_extract_query_trgm(), gin_trgm_consistent(), and gtrgm_consistent().
#define RPADDING 1 |
Definition at line 16 of file trgm.h.
Referenced by generate_trgm(), generate_wildcard_trgm(), and get_wildcard_part().
#define SIGNKEY 0x02 |
Definition at line 94 of file trgm.h.
Referenced by gtrgm_compress(), and gtrgm_picksplit().
#define SimilarityStrategyNumber 1 |
Definition at line 29 of file trgm.h.
Referenced by gin_extract_query_trgm(), gin_trgm_consistent(), and gtrgm_consistent().
#define TRGMHDRSIZE (VARHDRSZ + sizeof(uint8)) |
Definition at line 69 of file trgm.h.
Referenced by expandColorTrigrams(), generate_trgm(), and generate_wildcard_trgm().
typedef struct TrgmPackedGraph TrgmPackedGraph |
Definition at line 539 of file trgm_op.c.
References ARRNELEM, CMPTRGM, and GETARR.
Referenced by gtrgm_consistent(), gtrgm_distance(), and similarity().
{ trgm *ptr1, *ptr2; int count = 0; int len1, len2; ptr1 = GETARR(trg1); ptr2 = GETARR(trg2); len1 = ARRNELEM(trg1); len2 = ARRNELEM(trg2); /* explicit test is needed to avoid 0/0 division when both lengths are 0 */ if (len1 <= 0 || len2 <= 0) return (float4) 0.0; while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2) { int res = CMPTRGM(ptr1, ptr2); if (res < 0) ptr1++; else if (res > 0) ptr2++; else { ptr1++; ptr2++; count++; } } #ifdef DIVUNION return ((float4) count) / ((float4) (len1 + len2 - count)); #else return ((float4) count) / ((float4) ((len1 > len2) ? len1 : len2)); #endif }
void compact_trigram | ( | trgm * | tptr, | |
char * | str, | |||
int | bytelen | |||
) |
Definition at line 112 of file trgm_op.c.
References COMP_CRC32, CPTRGM, FIN_CRC32, and INIT_CRC32.
Referenced by fillTrgm(), and make_trigrams().
{ if (bytelen == 3) { CPTRGM(tptr, str); } else { pg_crc32 crc; INIT_CRC32(crc); COMP_CRC32(crc, str, bytelen); FIN_CRC32(crc); /* * use only 3 upper bytes from crc, hope, it's good enough hashing */ CPTRGM(tptr, &crc); } }
TRGM* createTrgmNFA | ( | text * | text_re, | |
Oid | collation, | |||
TrgmPackedGraph ** | graph, | |||
MemoryContext | rcontext | |||
) |
Definition at line 484 of file trgm_regexp.c.
References ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE, ALLOCSET_DEFAULT_MINSIZE, AllocSetContextCreate(), createTrgmNFAInternal(), CurrentMemoryContext, MemoryContextDelete(), MemoryContextSwitchTo(), PG_CATCH, PG_END_TRY, PG_RE_THROW, pg_regfree(), PG_TRY, RE_compile(), REG_ADVANCED, and REG_ICASE.
Referenced by gin_extract_query_trgm(), and gtrgm_consistent().
{ TRGM *trg; regex_t regex; MemoryContext tmpcontext; MemoryContext oldcontext; /* * This processing generates a great deal of cruft, which we'd like to * clean up before returning (since this function may be called in a * query-lifespan memory context). Make a temp context we can work in so * that cleanup is easy. */ tmpcontext = AllocSetContextCreate(CurrentMemoryContext, "createTrgmNFA temporary context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); oldcontext = MemoryContextSwitchTo(tmpcontext); /* * Stage 1: Compile the regexp into a NFA, using the regexp library. */ #ifdef IGNORECASE RE_compile(®ex, text_re, REG_ADVANCED | REG_ICASE, collation); #else RE_compile(®ex, text_re, REG_ADVANCED, collation); #endif /* * Since the regexp library allocates its internal data structures with * malloc, we need to use a PG_TRY block to ensure that pg_regfree() gets * done even if there's an error. */ PG_TRY(); { trg = createTrgmNFAInternal(®ex, graph, rcontext); } PG_CATCH(); { pg_regfree(®ex); PG_RE_THROW(); } PG_END_TRY(); pg_regfree(®ex); /* Clean up all the cruft we created */ MemoryContextSwitchTo(oldcontext); MemoryContextDelete(tmpcontext); return trg; }
TRGM* generate_trgm | ( | char * | str, | |
int | slen | |||
) |
Definition at line 180 of file trgm_op.c.
References ARRKEY, buf, CALCGTSIZE, comp_trgm(), find_word(), TRGM::flag, GETARR, lowerstr_with_len(), LPADDING, make_trigrams(), palloc(), pfree(), qsort, RPADDING, SET_VARSIZE, TRGMHDRSIZE, and unique_array().
Referenced by gin_extract_query_trgm(), gin_extract_value_trgm(), gtrgm_compress(), gtrgm_consistent(), gtrgm_distance(), show_trgm(), and similarity().
{ TRGM *trg; char *buf; trgm *tptr; int len, charlen, bytelen; char *bword, *eword; trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3); trg->flag = ARRKEY; SET_VARSIZE(trg, TRGMHDRSIZE); if (slen + LPADDING + RPADDING < 3 || slen == 0) return trg; tptr = GETARR(trg); buf = palloc(sizeof(char) * (slen + 4)); if (LPADDING > 0) { *buf = ' '; if (LPADDING > 1) *(buf + 1) = ' '; } eword = str; while ((bword = find_word(eword, slen - (eword - str), &eword, &charlen)) != NULL) { #ifdef IGNORECASE bword = lowerstr_with_len(bword, eword - bword); bytelen = strlen(bword); #else bytelen = eword - bword; #endif memcpy(buf + LPADDING, bword, bytelen); #ifdef IGNORECASE pfree(bword); #endif buf[LPADDING + bytelen] = ' '; buf[LPADDING + bytelen + 1] = ' '; /* * count trigrams */ tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING, charlen + LPADDING + RPADDING); } pfree(buf); if ((len = tptr - GETARR(trg)) == 0) return trg; if (len > 0) { qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm); len = unique_array(GETARR(trg), len); } SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len)); return trg; }
TRGM* generate_wildcard_trgm | ( | const char * | str, | |
int | slen | |||
) |
Definition at line 411 of file trgm_op.c.
References ARRKEY, buf, CALCGTSIZE, comp_trgm(), TRGM::flag, get_wildcard_part(), GETARR, lowerstr_with_len(), LPADDING, make_trigrams(), NULL, palloc(), pfree(), qsort, RPADDING, SET_VARSIZE, TRGMHDRSIZE, and unique_array().
Referenced by gin_extract_query_trgm(), and gtrgm_consistent().
{ TRGM *trg; char *buf, *buf2; trgm *tptr; int len, charlen, bytelen; const char *eword; trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3); trg->flag = ARRKEY; SET_VARSIZE(trg, TRGMHDRSIZE); if (slen + LPADDING + RPADDING < 3 || slen == 0) return trg; tptr = GETARR(trg); buf = palloc(sizeof(char) * (slen + 4)); /* * Extract trigrams from each substring extracted by get_wildcard_part. */ eword = str; while ((eword = get_wildcard_part(eword, slen - (eword - str), buf, &bytelen, &charlen)) != NULL) { #ifdef IGNORECASE buf2 = lowerstr_with_len(buf, bytelen); bytelen = strlen(buf2); #else buf2 = buf; #endif /* * count trigrams */ tptr = make_trigrams(tptr, buf2, bytelen, charlen); #ifdef IGNORECASE pfree(buf2); #endif } pfree(buf); if ((len = tptr - GETARR(trg)) == 0) return trg; /* * Make trigrams unique. */ if (len > 0) { qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm); len = unique_array(GETARR(trg), len); } SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len)); return trg; }
Definition at line 476 of file trgm_op.c.
References val.
Referenced by gin_extract_query_trgm(), gin_extract_value_trgm(), and show_trgm().
Definition at line 586 of file trgm_op.c.
References ARRNELEM, CMPTRGM, and GETARR.
Referenced by gtrgm_consistent().
{ trgm *ptr1, *ptr2; int len1, len2; ptr1 = GETARR(trg1); ptr2 = GETARR(trg2); len1 = ARRNELEM(trg1); len2 = ARRNELEM(trg2); while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2) { int res = CMPTRGM(ptr1, ptr2); if (res < 0) return false; else if (res > 0) ptr2++; else { ptr1++; ptr2++; } } if (ptr1 - GETARR(trg1) < len1) return false; else return true; }
Definition at line 625 of file trgm_op.c.
References ARRNELEM, CMPTRGM, GETARR, i, and palloc0().
Referenced by gtrgm_consistent().
{ bool *result; trgm *ptrq = GETARR(query), *ptrk = GETARR(key); int lenq = ARRNELEM(query), lenk = ARRNELEM(key), i; result = (bool *) palloc0(lenq * sizeof(bool)); /* for each query trigram, do a binary search in the key array */ for (i = 0; i < lenq; i++) { int lo = 0; int hi = lenk; while (lo < hi) { int mid = (lo + hi) / 2; int res = CMPTRGM(ptrq, ptrk + mid); if (res < 0) hi = mid; else if (res > 0) lo = mid + 1; else { result[i] = true; break; } } ptrq++; } return result; }
bool trigramsMatchGraph | ( | TrgmPackedGraph * | graph, | |
bool * | check | |||
) |
Definition at line 604 of file trgm_regexp.c.
References TrgmPackedState::arcs, TrgmPackedState::arcsCount, TrgmPackedArc::colorTrgm, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, i, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, TrgmPackedGraph::statesQueue, and TrgmPackedArc::targetState.
Referenced by gin_trgm_consistent(), and gtrgm_consistent().
{ int i, j, k, queueIn, queueOut; /* * Reset temporary working areas. */ memset(graph->colorTrigramsActive, 0, sizeof(bool) * graph->colorTrigramsCount); memset(graph->statesActive, 0, sizeof(bool) * graph->statesCount); /* * Check which color trigrams were matched. A match for any simple * trigram associated with a color trigram counts as a match of the color * trigram. */ j = 0; for (i = 0; i < graph->colorTrigramsCount; i++) { int cnt = graph->colorTrigramGroups[i]; for (k = j; k < j + cnt; k++) { if (check[k]) { /* * Found one matched trigram in the group. Can skip the rest * of them and go to the next group. */ graph->colorTrigramsActive[i] = true; break; } } j = j + cnt; } /* * Initialize the statesQueue to hold just the initial state. Note: * statesQueue has room for statesCount entries, which is certainly enough * since no state will be put in the queue more than once. The * statesActive array marks which states have been queued. */ graph->statesActive[0] = true; graph->statesQueue[0] = 0; queueIn = 0; queueOut = 1; /* Process queued states as long as there are any. */ while (queueIn < queueOut) { int stateno = graph->statesQueue[queueIn++]; TrgmPackedState *state = &graph->states[stateno]; int cnt = state->arcsCount; /* Loop over state's out-arcs */ for (i = 0; i < cnt; i++) { TrgmPackedArc *arc = &state->arcs[i]; /* * If corresponding color trigram is present then activate the * corresponding state. We're done if that's the final state, * otherwise queue the state if it's not been queued already. */ if (graph->colorTrigramsActive[arc->colorTrgm]) { int nextstate = arc->targetState; if (nextstate == 1) return true; /* success: final state is reachable */ if (!graph->statesActive[nextstate]) { graph->statesActive[nextstate] = true; graph->statesQueue[queueOut++] = nextstate; } } } } /* Queue is empty, so match fails. */ return false; }
Definition at line 16 of file trgm_op.c.
Referenced by gin_trgm_consistent(), gtrgm_consistent(), set_limit(), show_limit(), and similarity_op().