Header And Logo

PostgreSQL
| The world's most advanced open source database.

ts_type.h

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * ts_type.h
00004  *    Definitions for the tsvector and tsquery types
00005  *
00006  * Copyright (c) 1998-2013, PostgreSQL Global Development Group
00007  *
00008  * src/include/tsearch/ts_type.h
00009  *
00010  *-------------------------------------------------------------------------
00011  */
00012 #ifndef _PG_TSTYPE_H_
00013 #define _PG_TSTYPE_H_
00014 
00015 #include "fmgr.h"
00016 #include "utils/pg_crc.h"
00017 
00018 
00019 /*
00020  * TSVector type.
00021  *
00022  * Structure of tsvector datatype:
00023  * 1) standard varlena header
00024  * 2) int32     size - number of lexemes (WordEntry array entries)
00025  * 3) Array of WordEntry - one per lexeme; must be sorted according to
00026  *              tsCompareString() (ie, memcmp of lexeme strings).
00027  *              WordEntry->pos gives the number of bytes from end of WordEntry
00028  *              array to start of lexeme's string, which is of length len.
00029  * 4) Per-lexeme data storage:
00030  *    lexeme string (not null-terminated)
00031  *    if haspos is true:
00032  *      padding byte if necessary to make the position data 2-byte aligned
00033  *      uint16          number of positions that follow
00034  *      WordEntryPos[]  positions
00035  *
00036  * The positions for each lexeme must be sorted.
00037  *
00038  * Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4
00039  */
00040 
00041 typedef struct
00042 {
00043     uint32
00044                 haspos:1,
00045                 len:11,         /* MAX 2Kb */
00046                 pos:20;         /* MAX 1Mb */
00047 } WordEntry;
00048 
00049 #define MAXSTRLEN ( (1<<11) - 1)
00050 #define MAXSTRPOS ( (1<<20) - 1)
00051 
00052 /*
00053  * Equivalent to
00054  * typedef struct {
00055  *      uint16
00056  *          weight:2,
00057  *          pos:14;
00058  * }
00059  */
00060 
00061 typedef uint16 WordEntryPos;
00062 
00063 typedef struct
00064 {
00065     uint16      npos;
00066     WordEntryPos pos[1];        /* variable length */
00067 } WordEntryPosVector;
00068 
00069 
00070 #define WEP_GETWEIGHT(x)    ( (x) >> 14 )
00071 #define WEP_GETPOS(x)       ( (x) & 0x3fff )
00072 
00073 #define WEP_SETWEIGHT(x,v)  ( (x) = ( (v) << 14 ) | ( (x) & 0x3fff ) )
00074 #define WEP_SETPOS(x,v)     ( (x) = ( (x) & 0xc000 ) | ( (v) & 0x3fff ) )
00075 
00076 #define MAXENTRYPOS (1<<14)
00077 #define MAXNUMPOS   (256)
00078 #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
00079 
00080 /* This struct represents a complete tsvector datum */
00081 typedef struct
00082 {
00083     int32       vl_len_;        /* varlena header (do not touch directly!) */
00084     int32       size;
00085     WordEntry   entries[1];     /* variable length */
00086     /* lexemes follow the entries[] array */
00087 } TSVectorData;
00088 
00089 typedef TSVectorData *TSVector;
00090 
00091 #define DATAHDRSIZE (offsetof(TSVectorData, entries))
00092 #define CALCDATASIZE(nentries, lenstr) (DATAHDRSIZE + (nentries) * sizeof(WordEntry) + (lenstr) )
00093 
00094 /* pointer to start of a tsvector's WordEntry array */
00095 #define ARRPTR(x)   ( (x)->entries )
00096 
00097 /* pointer to start of a tsvector's lexeme storage */
00098 #define STRPTR(x)   ( (char *) &(x)->entries[(x)->size] )
00099 
00100 #define _POSVECPTR(x, e)    ((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
00101 #define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
00102 #define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos)
00103 
00104 /*
00105  * fmgr interface macros
00106  */
00107 
00108 #define DatumGetTSVector(X)         ((TSVector) PG_DETOAST_DATUM(X))
00109 #define DatumGetTSVectorCopy(X)     ((TSVector) PG_DETOAST_DATUM_COPY(X))
00110 #define TSVectorGetDatum(X)         PointerGetDatum(X)
00111 #define PG_GETARG_TSVECTOR(n)       DatumGetTSVector(PG_GETARG_DATUM(n))
00112 #define PG_GETARG_TSVECTOR_COPY(n)  DatumGetTSVectorCopy(PG_GETARG_DATUM(n))
00113 #define PG_RETURN_TSVECTOR(x)       return TSVectorGetDatum(x)
00114 
00115 /*
00116  * I/O
00117  */
00118 extern Datum tsvectorin(PG_FUNCTION_ARGS);
00119 extern Datum tsvectorout(PG_FUNCTION_ARGS);
00120 extern Datum tsvectorsend(PG_FUNCTION_ARGS);
00121 extern Datum tsvectorrecv(PG_FUNCTION_ARGS);
00122 
00123 /*
00124  * operations with tsvector
00125  */
00126 extern Datum tsvector_lt(PG_FUNCTION_ARGS);
00127 extern Datum tsvector_le(PG_FUNCTION_ARGS);
00128 extern Datum tsvector_eq(PG_FUNCTION_ARGS);
00129 extern Datum tsvector_ne(PG_FUNCTION_ARGS);
00130 extern Datum tsvector_ge(PG_FUNCTION_ARGS);
00131 extern Datum tsvector_gt(PG_FUNCTION_ARGS);
00132 extern Datum tsvector_cmp(PG_FUNCTION_ARGS);
00133 
00134 extern Datum tsvector_length(PG_FUNCTION_ARGS);
00135 extern Datum tsvector_strip(PG_FUNCTION_ARGS);
00136 extern Datum tsvector_setweight(PG_FUNCTION_ARGS);
00137 extern Datum tsvector_concat(PG_FUNCTION_ARGS);
00138 extern Datum tsvector_update_trigger_byid(PG_FUNCTION_ARGS);
00139 extern Datum tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS);
00140 
00141 extern Datum ts_match_vq(PG_FUNCTION_ARGS);
00142 extern Datum ts_match_qv(PG_FUNCTION_ARGS);
00143 extern Datum ts_match_tt(PG_FUNCTION_ARGS);
00144 extern Datum ts_match_tq(PG_FUNCTION_ARGS);
00145 
00146 extern Datum ts_stat1(PG_FUNCTION_ARGS);
00147 extern Datum ts_stat2(PG_FUNCTION_ARGS);
00148 
00149 extern Datum ts_rank_tt(PG_FUNCTION_ARGS);
00150 extern Datum ts_rank_wtt(PG_FUNCTION_ARGS);
00151 extern Datum ts_rank_ttf(PG_FUNCTION_ARGS);
00152 extern Datum ts_rank_wttf(PG_FUNCTION_ARGS);
00153 extern Datum ts_rankcd_tt(PG_FUNCTION_ARGS);
00154 extern Datum ts_rankcd_wtt(PG_FUNCTION_ARGS);
00155 extern Datum ts_rankcd_ttf(PG_FUNCTION_ARGS);
00156 extern Datum ts_rankcd_wttf(PG_FUNCTION_ARGS);
00157 
00158 extern Datum tsmatchsel(PG_FUNCTION_ARGS);
00159 extern Datum tsmatchjoinsel(PG_FUNCTION_ARGS);
00160 
00161 extern Datum ts_typanalyze(PG_FUNCTION_ARGS);
00162 
00163 
00164 /*
00165  * TSQuery
00166  *
00167  *
00168  */
00169 
00170 typedef int8 QueryItemType;
00171 
00172 /* Valid values for QueryItemType: */
00173 #define QI_VAL 1
00174 #define QI_OPR 2
00175 #define QI_VALSTOP 3            /* This is only used in an intermediate stack
00176                                  * representation in parse_tsquery. It's not a
00177                                  * legal type elsewhere. */
00178 
00179 /*
00180  * QueryItem is one node in tsquery - operator or operand.
00181  */
00182 typedef struct
00183 {
00184     QueryItemType type;         /* operand or kind of operator (ts_tokentype) */
00185     uint8       weight;         /* weights of operand to search. It's a
00186                                  * bitmask of allowed weights. if it =0 then
00187                                  * any weight are allowed. Weights and bit
00188                                  * map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */
00189     bool        prefix;         /* true if it's a prefix search */
00190     int32       valcrc;         /* XXX: pg_crc32 would be a more appropriate
00191                                  * data type, but we use comparisons to signed
00192                                  * integers in the code. They would need to be
00193                                  * changed as well. */
00194 
00195     /* pointer to text value of operand, must correlate with WordEntry */
00196     uint32
00197                 length:12,
00198                 distance:20;
00199 } QueryOperand;
00200 
00201 
00202 /* Legal values for QueryOperator.operator */
00203 #define OP_NOT  1
00204 #define OP_AND  2
00205 #define OP_OR   3
00206 
00207 typedef struct
00208 {
00209     QueryItemType type;
00210     int8        oper;           /* see above */
00211     uint32      left;           /* pointer to left operand. Right operand is
00212                                  * item + 1, left operand is placed
00213                                  * item+item->left */
00214 } QueryOperator;
00215 
00216 /*
00217  * Note: TSQuery is 4-bytes aligned, so make sure there's no fields
00218  * inside QueryItem requiring 8-byte alignment, like int64.
00219  */
00220 typedef union
00221 {
00222     QueryItemType type;
00223     QueryOperator qoperator;
00224     QueryOperand qoperand;
00225 } QueryItem;
00226 
00227 /*
00228  * Storage:
00229  *  (len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
00230  */
00231 
00232 typedef struct
00233 {
00234     int32       vl_len_;        /* varlena header (do not touch directly!) */
00235     int32       size;           /* number of QueryItems */
00236     char        data[1];        /* data starts here */
00237 } TSQueryData;
00238 
00239 typedef TSQueryData *TSQuery;
00240 
00241 #define HDRSIZETQ   ( VARHDRSZ + sizeof(int32) )
00242 
00243 /* Computes the size of header and all QueryItems. size is the number of
00244  * QueryItems, and lenofoperand is the total length of all operands
00245  */
00246 #define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
00247 
00248 /* Returns a pointer to the first QueryItem in a TSQuery */
00249 #define GETQUERY(x)  ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
00250 
00251 /* Returns a pointer to the beginning of operands in a TSQuery */
00252 #define GETOPERAND(x)   ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
00253 
00254 /*
00255  * fmgr interface macros
00256  * Note, TSQuery type marked as plain storage, so it can't be toasted
00257  * but PG_DETOAST_DATUM_COPY is used for simplicity
00258  */
00259 
00260 #define DatumGetTSQuery(X)          ((TSQuery) DatumGetPointer(X))
00261 #define DatumGetTSQueryCopy(X)      ((TSQuery) PG_DETOAST_DATUM_COPY(X))
00262 #define TSQueryGetDatum(X)          PointerGetDatum(X)
00263 #define PG_GETARG_TSQUERY(n)        DatumGetTSQuery(PG_GETARG_DATUM(n))
00264 #define PG_GETARG_TSQUERY_COPY(n)   DatumGetTSQueryCopy(PG_GETARG_DATUM(n))
00265 #define PG_RETURN_TSQUERY(x)        return TSQueryGetDatum(x)
00266 
00267 /*
00268  * I/O
00269  */
00270 extern Datum tsqueryin(PG_FUNCTION_ARGS);
00271 extern Datum tsqueryout(PG_FUNCTION_ARGS);
00272 extern Datum tsquerysend(PG_FUNCTION_ARGS);
00273 extern Datum tsqueryrecv(PG_FUNCTION_ARGS);
00274 
00275 /*
00276  * operations with tsquery
00277  */
00278 extern Datum tsquery_lt(PG_FUNCTION_ARGS);
00279 extern Datum tsquery_le(PG_FUNCTION_ARGS);
00280 extern Datum tsquery_eq(PG_FUNCTION_ARGS);
00281 extern Datum tsquery_ne(PG_FUNCTION_ARGS);
00282 extern Datum tsquery_ge(PG_FUNCTION_ARGS);
00283 extern Datum tsquery_gt(PG_FUNCTION_ARGS);
00284 extern Datum tsquery_cmp(PG_FUNCTION_ARGS);
00285 
00286 extern Datum tsquerytree(PG_FUNCTION_ARGS);
00287 extern Datum tsquery_numnode(PG_FUNCTION_ARGS);
00288 
00289 extern Datum tsquery_and(PG_FUNCTION_ARGS);
00290 extern Datum tsquery_or(PG_FUNCTION_ARGS);
00291 extern Datum tsquery_not(PG_FUNCTION_ARGS);
00292 
00293 extern Datum tsquery_rewrite(PG_FUNCTION_ARGS);
00294 extern Datum tsquery_rewrite_query(PG_FUNCTION_ARGS);
00295 
00296 extern Datum tsq_mcontains(PG_FUNCTION_ARGS);
00297 extern Datum tsq_mcontained(PG_FUNCTION_ARGS);
00298 
00299 #endif   /* _PG_TSTYPE_H_ */