Header And Logo

PostgreSQL
| The world's most advanced open source database.

trgm.h

Go to the documentation of this file.
00001 /*
00002  * contrib/pg_trgm/trgm.h
00003  */
00004 #ifndef __TRGM_H__
00005 #define __TRGM_H__
00006 
00007 #include "access/gist.h"
00008 #include "access/itup.h"
00009 #include "storage/bufpage.h"
00010 
00011 /*
00012  * Options ... but note that trgm_regexp.c effectively assumes these values
00013  * of LPADDING and RPADDING.
00014  */
00015 #define LPADDING        2
00016 #define RPADDING        1
00017 #define KEEPONLYALNUM
00018 /*
00019  * Caution: IGNORECASE macro means that trigrams are case-insensitive.
00020  * If this macro is disabled, the ~* and ~~* operators must be removed from
00021  * the operator classes, because we can't handle case-insensitive wildcard
00022  * search with case-sensitive trigrams.  Failure to do this will result in
00023  * "cannot handle ~*(~~*) with case-sensitive trigrams" errors.
00024  */
00025 #define IGNORECASE
00026 #define DIVUNION
00027 
00028 /* operator strategy numbers */
00029 #define SimilarityStrategyNumber    1
00030 #define DistanceStrategyNumber      2
00031 #define LikeStrategyNumber          3
00032 #define ILikeStrategyNumber         4
00033 #define RegExpStrategyNumber        5
00034 #define RegExpICaseStrategyNumber   6
00035 
00036 
00037 typedef char trgm[3];
00038 
00039 #define CMPCHAR(a,b) ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) )
00040 #define CMPPCHAR(a,b,i)  CMPCHAR( *(((const char*)(a))+i), *(((const char*)(b))+i) )
00041 #define CMPTRGM(a,b) ( CMPPCHAR(a,b,0) ? CMPPCHAR(a,b,0) : ( CMPPCHAR(a,b,1) ? CMPPCHAR(a,b,1) : CMPPCHAR(a,b,2) ) )
00042 
00043 #define CPTRGM(a,b) do {                \
00044     *(((char*)(a))+0) = *(((char*)(b))+0);  \
00045     *(((char*)(a))+1) = *(((char*)(b))+1);  \
00046     *(((char*)(a))+2) = *(((char*)(b))+2);  \
00047 } while(0);
00048 
00049 #ifdef KEEPONLYALNUM
00050 #define ISWORDCHR(c)    (t_isalpha(c) || t_isdigit(c))
00051 #define ISPRINTABLECHAR(a)  ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
00052 #else
00053 #define ISWORDCHR(c)    (!t_isspace(c))
00054 #define ISPRINTABLECHAR(a)  ( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) )
00055 #endif
00056 #define ISPRINTABLETRGM(t)  ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
00057 
00058 #define ISESCAPECHAR(x) (*(x) == '\\')  /* Wildcard escape character */
00059 #define ISWILDCARDCHAR(x) (*(x) == '_' || *(x) == '%')  /* Wildcard
00060                                                          * meta-character */
00061 
00062 typedef struct
00063 {
00064     int32       vl_len_;        /* varlena header (do not touch directly!) */
00065     uint8       flag;
00066     char        data[1];
00067 } TRGM;
00068 
00069 #define TRGMHDRSIZE       (VARHDRSZ + sizeof(uint8))
00070 
00071 /* gist */
00072 #define BITBYTE 8
00073 #define SIGLENINT  3            /* >122 => key will toast, so very slow!!! */
00074 #define SIGLEN  ( sizeof(int)*SIGLENINT )
00075 
00076 #define SIGLENBIT (SIGLEN*BITBYTE - 1)  /* see makesign */
00077 
00078 typedef char BITVEC[SIGLEN];
00079 typedef char *BITVECP;
00080 
00081 #define LOOPBYTE \
00082             for(i=0;i<SIGLEN;i++)
00083 
00084 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
00085 #define GETBITBYTE(x,i) ( (((char)(x)) >> (i)) & 0x01 )
00086 #define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
00087 #define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
00088 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
00089 
00090 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
00091 #define HASH(sign, val) SETBIT((sign), HASHVAL(val))
00092 
00093 #define ARRKEY          0x01
00094 #define SIGNKEY         0x02
00095 #define ALLISTRUE       0x04
00096 
00097 #define ISARRKEY(x) ( ((TRGM*)x)->flag & ARRKEY )
00098 #define ISSIGNKEY(x)    ( ((TRGM*)x)->flag & SIGNKEY )
00099 #define ISALLTRUE(x)    ( ((TRGM*)x)->flag & ALLISTRUE )
00100 
00101 #define CALCGTSIZE(flag, len) ( TRGMHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(trgm)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
00102 #define GETSIGN(x)      ( (BITVECP)( (char*)x+TRGMHDRSIZE ) )
00103 #define GETARR(x)       ( (trgm*)( (char*)x+TRGMHDRSIZE ) )
00104 #define ARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) )
00105 
00106 typedef struct TrgmPackedGraph TrgmPackedGraph;
00107 
00108 extern float4 trgm_limit;
00109 
00110 extern uint32 trgm2int(trgm *ptr);
00111 extern void compact_trigram(trgm *tptr, char *str, int bytelen);
00112 extern TRGM *generate_trgm(char *str, int slen);
00113 extern TRGM *generate_wildcard_trgm(const char *str, int slen);
00114 extern float4 cnt_sml(TRGM *trg1, TRGM *trg2);
00115 extern bool trgm_contained_by(TRGM *trg1, TRGM *trg2);
00116 extern bool *trgm_presence_map(TRGM *query, TRGM *key);
00117 extern TRGM *createTrgmNFA(text *text_re, Oid collation,
00118               TrgmPackedGraph **graph, MemoryContext rcontext);
00119 extern bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check);
00120 
00121 #endif   /* __TRGM_H__ */