Header And Logo

PostgreSQL
| The world's most advanced open source database.

tsquery.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * tsquery.c
00004  *    I/O functions for tsquery
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  *
00008  *
00009  * IDENTIFICATION
00010  *    src/backend/utils/adt/tsquery.c
00011  *
00012  *-------------------------------------------------------------------------
00013  */
00014 
00015 #include "postgres.h"
00016 
00017 #include "libpq/pqformat.h"
00018 #include "miscadmin.h"
00019 #include "tsearch/ts_locale.h"
00020 #include "tsearch/ts_utils.h"
00021 #include "utils/builtins.h"
00022 #include "utils/memutils.h"
00023 
00024 
00025 struct TSQueryParserStateData
00026 {
00027     /* State for gettoken_query */
00028     char       *buffer;         /* entire string we are scanning */
00029     char       *buf;            /* current scan point */
00030     int         state;
00031     int         count;          /* nesting count, incremented by (,
00032                                  * decremented by ) */
00033 
00034     /* polish (prefix) notation in list, filled in by push* functions */
00035     List       *polstr;
00036 
00037     /*
00038      * Strings from operands are collected in op. curop is a pointer to the
00039      * end of used space of op.
00040      */
00041     char       *op;
00042     char       *curop;
00043     int         lenop;          /* allocated size of op */
00044     int         sumlen;         /* used size of op */
00045 
00046     /* state for value's parser */
00047     TSVectorParseState valstate;
00048 };
00049 
00050 /* parser's states */
00051 #define WAITOPERAND 1
00052 #define WAITOPERATOR    2
00053 #define WAITFIRSTOPERAND 3
00054 #define WAITSINGLEOPERAND 4
00055 
00056 /*
00057  * subroutine to parse the modifiers (weight and prefix flag currently)
00058  * part, like ':1AB' of a query.
00059  */
00060 static char *
00061 get_modifiers(char *buf, int16 *weight, bool *prefix)
00062 {
00063     *weight = 0;
00064     *prefix = false;
00065 
00066     if (!t_iseq(buf, ':'))
00067         return buf;
00068 
00069     buf++;
00070     while (*buf && pg_mblen(buf) == 1)
00071     {
00072         switch (*buf)
00073         {
00074             case 'a':
00075             case 'A':
00076                 *weight |= 1 << 3;
00077                 break;
00078             case 'b':
00079             case 'B':
00080                 *weight |= 1 << 2;
00081                 break;
00082             case 'c':
00083             case 'C':
00084                 *weight |= 1 << 1;
00085                 break;
00086             case 'd':
00087             case 'D':
00088                 *weight |= 1;
00089                 break;
00090             case '*':
00091                 *prefix = true;
00092                 break;
00093             default:
00094                 return buf;
00095         }
00096         buf++;
00097     }
00098 
00099     return buf;
00100 }
00101 
00102 /*
00103  * token types for parsing
00104  */
00105 typedef enum
00106 {
00107     PT_END = 0,
00108     PT_ERR = 1,
00109     PT_VAL = 2,
00110     PT_OPR = 3,
00111     PT_OPEN = 4,
00112     PT_CLOSE = 5
00113 } ts_tokentype;
00114 
00115 /*
00116  * get token from query string
00117  *
00118  * *operator is filled in with OP_* when return values is PT_OPR
00119  * *strval, *lenval and *weight are filled in when return value is PT_VAL
00120  */
00121 static ts_tokentype
00122 gettoken_query(TSQueryParserState state,
00123                int8 *operator,
00124                int *lenval, char **strval, int16 *weight, bool *prefix)
00125 {
00126     *weight = 0;
00127     *prefix = false;
00128 
00129     while (1)
00130     {
00131         switch (state->state)
00132         {
00133             case WAITFIRSTOPERAND:
00134             case WAITOPERAND:
00135                 if (t_iseq(state->buf, '!'))
00136                 {
00137                     (state->buf)++;     /* can safely ++, t_iseq guarantee
00138                                          * that pg_mblen()==1 */
00139                     *operator = OP_NOT;
00140                     state->state = WAITOPERAND;
00141                     return PT_OPR;
00142                 }
00143                 else if (t_iseq(state->buf, '('))
00144                 {
00145                     state->count++;
00146                     (state->buf)++;
00147                     state->state = WAITOPERAND;
00148                     return PT_OPEN;
00149                 }
00150                 else if (t_iseq(state->buf, ':'))
00151                 {
00152                     ereport(ERROR,
00153                             (errcode(ERRCODE_SYNTAX_ERROR),
00154                              errmsg("syntax error in tsquery: \"%s\"",
00155                                     state->buffer)));
00156                 }
00157                 else if (!t_isspace(state->buf))
00158                 {
00159                     /*
00160                      * We rely on the tsvector parser to parse the value for
00161                      * us
00162                      */
00163                     reset_tsvector_parser(state->valstate, state->buf);
00164                     if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
00165                     {
00166                         state->buf = get_modifiers(state->buf, weight, prefix);
00167                         state->state = WAITOPERATOR;
00168                         return PT_VAL;
00169                     }
00170                     else if (state->state == WAITFIRSTOPERAND)
00171                         return PT_END;
00172                     else
00173                         ereport(ERROR,
00174                                 (errcode(ERRCODE_SYNTAX_ERROR),
00175                                  errmsg("no operand in tsquery: \"%s\"",
00176                                         state->buffer)));
00177                 }
00178                 break;
00179             case WAITOPERATOR:
00180                 if (t_iseq(state->buf, '&'))
00181                 {
00182                     state->state = WAITOPERAND;
00183                     *operator = OP_AND;
00184                     (state->buf)++;
00185                     return PT_OPR;
00186                 }
00187                 if (t_iseq(state->buf, '|'))
00188                 {
00189                     state->state = WAITOPERAND;
00190                     *operator = OP_OR;
00191                     (state->buf)++;
00192                     return PT_OPR;
00193                 }
00194                 else if (t_iseq(state->buf, ')'))
00195                 {
00196                     (state->buf)++;
00197                     state->count--;
00198                     return (state->count < 0) ? PT_ERR : PT_CLOSE;
00199                 }
00200                 else if (*(state->buf) == '\0')
00201                     return (state->count) ? PT_ERR : PT_END;
00202                 else if (!t_isspace(state->buf))
00203                     return PT_ERR;
00204                 break;
00205             case WAITSINGLEOPERAND:
00206                 if (*(state->buf) == '\0')
00207                     return PT_END;
00208                 *strval = state->buf;
00209                 *lenval = strlen(state->buf);
00210                 state->buf += strlen(state->buf);
00211                 state->count++;
00212                 return PT_VAL;
00213             default:
00214                 return PT_ERR;
00215                 break;
00216         }
00217         state->buf += pg_mblen(state->buf);
00218     }
00219 }
00220 
00221 /*
00222  * Push an operator to state->polstr
00223  */
00224 void
00225 pushOperator(TSQueryParserState state, int8 oper)
00226 {
00227     QueryOperator *tmp;
00228 
00229     Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR);
00230 
00231     tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
00232     tmp->type = QI_OPR;
00233     tmp->oper = oper;
00234     /* left is filled in later with findoprnd */
00235 
00236     state->polstr = lcons(tmp, state->polstr);
00237 }
00238 
00239 static void
00240 pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
00241 {
00242     QueryOperand *tmp;
00243 
00244     if (distance >= MAXSTRPOS)
00245         ereport(ERROR,
00246                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00247                  errmsg("value is too big in tsquery: \"%s\"",
00248                         state->buffer)));
00249     if (lenval >= MAXSTRLEN)
00250         ereport(ERROR,
00251                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00252                  errmsg("operand is too long in tsquery: \"%s\"",
00253                         state->buffer)));
00254 
00255     tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
00256     tmp->type = QI_VAL;
00257     tmp->weight = weight;
00258     tmp->prefix = prefix;
00259     tmp->valcrc = (int32) valcrc;
00260     tmp->length = lenval;
00261     tmp->distance = distance;
00262 
00263     state->polstr = lcons(tmp, state->polstr);
00264 }
00265 
00266 /*
00267  * Push an operand to state->polstr.
00268  *
00269  * strval must point to a string equal to state->curop. lenval is the length
00270  * of the string.
00271  */
00272 void
00273 pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
00274 {
00275     pg_crc32    valcrc;
00276 
00277     if (lenval >= MAXSTRLEN)
00278         ereport(ERROR,
00279                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00280                  errmsg("word is too long in tsquery: \"%s\"",
00281                         state->buffer)));
00282 
00283     INIT_CRC32(valcrc);
00284     COMP_CRC32(valcrc, strval, lenval);
00285     FIN_CRC32(valcrc);
00286     pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
00287 
00288     /* append the value string to state.op, enlarging buffer if needed first */
00289     while (state->curop - state->op + lenval + 1 >= state->lenop)
00290     {
00291         int         used = state->curop - state->op;
00292 
00293         state->lenop *= 2;
00294         state->op = (char *) repalloc((void *) state->op, state->lenop);
00295         state->curop = state->op + used;
00296     }
00297     memcpy((void *) state->curop, (void *) strval, lenval);
00298     state->curop += lenval;
00299     *(state->curop) = '\0';
00300     state->curop++;
00301     state->sumlen += lenval + 1 /* \0 */ ;
00302 }
00303 
00304 
00305 /*
00306  * Push a stopword placeholder to state->polstr
00307  */
00308 void
00309 pushStop(TSQueryParserState state)
00310 {
00311     QueryOperand *tmp;
00312 
00313     tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
00314     tmp->type = QI_VALSTOP;
00315 
00316     state->polstr = lcons(tmp, state->polstr);
00317 }
00318 
00319 
00320 #define STACKDEPTH  32
00321 
00322 /*
00323  * Make polish (prefix) notation of query.
00324  *
00325  * See parse_tsquery for explanation of pushval.
00326  */
00327 static void
00328 makepol(TSQueryParserState state,
00329         PushFunction pushval,
00330         Datum opaque)
00331 {
00332     int8        operator = 0;
00333     ts_tokentype type;
00334     int         lenval = 0;
00335     char       *strval = NULL;
00336     int8        opstack[STACKDEPTH];
00337     int         lenstack = 0;
00338     int16       weight = 0;
00339     bool        prefix;
00340 
00341     /* since this function recurses, it could be driven to stack overflow */
00342     check_stack_depth();
00343 
00344     while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
00345     {
00346         switch (type)
00347         {
00348             case PT_VAL:
00349                 pushval(opaque, state, strval, lenval, weight, prefix);
00350                 while (lenstack && (opstack[lenstack - 1] == OP_AND ||
00351                                     opstack[lenstack - 1] == OP_NOT))
00352                 {
00353                     lenstack--;
00354                     pushOperator(state, opstack[lenstack]);
00355                 }
00356                 break;
00357             case PT_OPR:
00358                 if (lenstack && operator == OP_OR)
00359                     pushOperator(state, OP_OR);
00360                 else
00361                 {
00362                     if (lenstack == STACKDEPTH) /* internal error */
00363                         elog(ERROR, "tsquery stack too small");
00364                     opstack[lenstack] = operator;
00365                     lenstack++;
00366                 }
00367                 break;
00368             case PT_OPEN:
00369                 makepol(state, pushval, opaque);
00370 
00371                 while (lenstack && (opstack[lenstack - 1] == OP_AND ||
00372                                     opstack[lenstack - 1] == OP_NOT))
00373                 {
00374                     lenstack--;
00375                     pushOperator(state, opstack[lenstack]);
00376                 }
00377                 break;
00378             case PT_CLOSE:
00379                 while (lenstack)
00380                 {
00381                     lenstack--;
00382                     pushOperator(state, opstack[lenstack]);
00383                 };
00384                 return;
00385             case PT_ERR:
00386             default:
00387                 ereport(ERROR,
00388                         (errcode(ERRCODE_SYNTAX_ERROR),
00389                          errmsg("syntax error in tsquery: \"%s\"",
00390                                 state->buffer)));
00391         }
00392     }
00393     while (lenstack)
00394     {
00395         lenstack--;
00396         pushOperator(state, opstack[lenstack]);
00397     }
00398 }
00399 
00400 static void
00401 findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
00402 {
00403     /* since this function recurses, it could be driven to stack overflow. */
00404     check_stack_depth();
00405 
00406     if (*pos >= nnodes)
00407         elog(ERROR, "malformed tsquery: operand not found");
00408 
00409     if (ptr[*pos].type == QI_VAL ||
00410         ptr[*pos].type == QI_VALSTOP)   /* need to handle VALSTOP here, they
00411                                          * haven't been cleaned away yet. */
00412     {
00413         (*pos)++;
00414     }
00415     else
00416     {
00417         Assert(ptr[*pos].type == QI_OPR);
00418 
00419         if (ptr[*pos].qoperator.oper == OP_NOT)
00420         {
00421             ptr[*pos].qoperator.left = 1;
00422             (*pos)++;
00423             findoprnd_recurse(ptr, pos, nnodes);
00424         }
00425         else
00426         {
00427             QueryOperator *curitem = &ptr[*pos].qoperator;
00428             int         tmp = *pos;
00429 
00430             Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
00431 
00432             (*pos)++;
00433             findoprnd_recurse(ptr, pos, nnodes);
00434             curitem->left = *pos - tmp;
00435             findoprnd_recurse(ptr, pos, nnodes);
00436         }
00437     }
00438 }
00439 
00440 
00441 /*
00442  * Fills in the left-fields previously left unfilled. The input
00443  * QueryItems must be in polish (prefix) notation.
00444  */
00445 static void
00446 findoprnd(QueryItem *ptr, int size)
00447 {
00448     uint32      pos;
00449 
00450     pos = 0;
00451     findoprnd_recurse(ptr, &pos, size);
00452 
00453     if (pos != size)
00454         elog(ERROR, "malformed tsquery: extra nodes");
00455 }
00456 
00457 
00458 /*
00459  * Each value (operand) in the query is be passed to pushval. pushval can
00460  * transform the simple value to an arbitrarily complex expression using
00461  * pushValue and pushOperator. It must push a single value with pushValue,
00462  * a complete expression with all operands, or a a stopword placeholder
00463  * with pushStop, otherwise the prefix notation representation will be broken,
00464  * having an operator with no operand.
00465  *
00466  * opaque is passed on to pushval as is, pushval can use it to store its
00467  * private state.
00468  *
00469  * The returned query might contain QI_STOPVAL nodes. The caller is responsible
00470  * for cleaning them up (with clean_fakeval)
00471  */
00472 TSQuery
00473 parse_tsquery(char *buf,
00474               PushFunction pushval,
00475               Datum opaque,
00476               bool isplain)
00477 {
00478     struct TSQueryParserStateData state;
00479     int         i;
00480     TSQuery     query;
00481     int         commonlen;
00482     QueryItem  *ptr;
00483     ListCell   *cell;
00484 
00485     /* init state */
00486     state.buffer = buf;
00487     state.buf = buf;
00488     state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
00489     state.count = 0;
00490     state.polstr = NIL;
00491 
00492     /* init value parser's state */
00493     state.valstate = init_tsvector_parser(state.buffer, true, true);
00494 
00495     /* init list of operand */
00496     state.sumlen = 0;
00497     state.lenop = 64;
00498     state.curop = state.op = (char *) palloc(state.lenop);
00499     *(state.curop) = '\0';
00500 
00501     /* parse query & make polish notation (postfix, but in reverse order) */
00502     makepol(&state, pushval, opaque);
00503 
00504     close_tsvector_parser(state.valstate);
00505 
00506     if (list_length(state.polstr) == 0)
00507     {
00508         ereport(NOTICE,
00509                 (errmsg("text-search query doesn't contain lexemes: \"%s\"",
00510                         state.buffer)));
00511         query = (TSQuery) palloc(HDRSIZETQ);
00512         SET_VARSIZE(query, HDRSIZETQ);
00513         query->size = 0;
00514         return query;
00515     }
00516 
00517     /* Pack the QueryItems in the final TSQuery struct to return to caller */
00518     commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
00519     query = (TSQuery) palloc0(commonlen);
00520     SET_VARSIZE(query, commonlen);
00521     query->size = list_length(state.polstr);
00522     ptr = GETQUERY(query);
00523 
00524     /* Copy QueryItems to TSQuery */
00525     i = 0;
00526     foreach(cell, state.polstr)
00527     {
00528         QueryItem  *item = (QueryItem *) lfirst(cell);
00529 
00530         switch (item->type)
00531         {
00532             case QI_VAL:
00533                 memcpy(&ptr[i], item, sizeof(QueryOperand));
00534                 break;
00535             case QI_VALSTOP:
00536                 ptr[i].type = QI_VALSTOP;
00537                 break;
00538             case QI_OPR:
00539                 memcpy(&ptr[i], item, sizeof(QueryOperator));
00540                 break;
00541             default:
00542                 elog(ERROR, "unrecognized QueryItem type: %d", item->type);
00543         }
00544         i++;
00545     }
00546 
00547     /* Copy all the operand strings to TSQuery */
00548     memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
00549     pfree(state.op);
00550 
00551     /* Set left operand pointers for every operator. */
00552     findoprnd(ptr, query->size);
00553 
00554     return query;
00555 }
00556 
00557 static void
00558 pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
00559              int16 weight, bool prefix)
00560 {
00561     pushValue(state, strval, lenval, weight, prefix);
00562 }
00563 
00564 /*
00565  * in without morphology
00566  */
00567 Datum
00568 tsqueryin(PG_FUNCTION_ARGS)
00569 {
00570     char       *in = PG_GETARG_CSTRING(0);
00571 
00572     PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false));
00573 }
00574 
00575 /*
00576  * out function
00577  */
00578 typedef struct
00579 {
00580     QueryItem  *curpol;
00581     char       *buf;
00582     char       *cur;
00583     char       *op;
00584     int         buflen;
00585 } INFIX;
00586 
00587 /* Makes sure inf->buf is large enough for adding 'addsize' bytes */
00588 #define RESIZEBUF(inf, addsize) \
00589 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
00590 { \
00591     int len = (inf)->cur - (inf)->buf; \
00592     (inf)->buflen *= 2; \
00593     (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
00594     (inf)->cur = (inf)->buf + len; \
00595 }
00596 
00597 /*
00598  * recursive walk on tree and print it in
00599  * infix (human-readable) view
00600  */
00601 static void
00602 infix(INFIX *in, bool first)
00603 {
00604     /* since this function recurses, it could be driven to stack overflow. */
00605     check_stack_depth();
00606 
00607     if (in->curpol->type == QI_VAL)
00608     {
00609         QueryOperand *curpol = &in->curpol->qoperand;
00610         char       *op = in->op + curpol->distance;
00611         int         clen;
00612 
00613         RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
00614         *(in->cur) = '\'';
00615         in->cur++;
00616         while (*op)
00617         {
00618             if (t_iseq(op, '\''))
00619             {
00620                 *(in->cur) = '\'';
00621                 in->cur++;
00622             }
00623             else if (t_iseq(op, '\\'))
00624             {
00625                 *(in->cur) = '\\';
00626                 in->cur++;
00627             }
00628             COPYCHAR(in->cur, op);
00629 
00630             clen = pg_mblen(op);
00631             op += clen;
00632             in->cur += clen;
00633         }
00634         *(in->cur) = '\'';
00635         in->cur++;
00636         if (curpol->weight || curpol->prefix)
00637         {
00638             *(in->cur) = ':';
00639             in->cur++;
00640             if (curpol->prefix)
00641             {
00642                 *(in->cur) = '*';
00643                 in->cur++;
00644             }
00645             if (curpol->weight & (1 << 3))
00646             {
00647                 *(in->cur) = 'A';
00648                 in->cur++;
00649             }
00650             if (curpol->weight & (1 << 2))
00651             {
00652                 *(in->cur) = 'B';
00653                 in->cur++;
00654             }
00655             if (curpol->weight & (1 << 1))
00656             {
00657                 *(in->cur) = 'C';
00658                 in->cur++;
00659             }
00660             if (curpol->weight & 1)
00661             {
00662                 *(in->cur) = 'D';
00663                 in->cur++;
00664             }
00665         }
00666         *(in->cur) = '\0';
00667         in->curpol++;
00668     }
00669     else if (in->curpol->qoperator.oper == OP_NOT)
00670     {
00671         bool        isopr = false;
00672 
00673         RESIZEBUF(in, 1);
00674         *(in->cur) = '!';
00675         in->cur++;
00676         *(in->cur) = '\0';
00677         in->curpol++;
00678 
00679         if (in->curpol->type == QI_OPR)
00680         {
00681             isopr = true;
00682             RESIZEBUF(in, 2);
00683             sprintf(in->cur, "( ");
00684             in->cur = strchr(in->cur, '\0');
00685         }
00686 
00687         infix(in, isopr);
00688         if (isopr)
00689         {
00690             RESIZEBUF(in, 2);
00691             sprintf(in->cur, " )");
00692             in->cur = strchr(in->cur, '\0');
00693         }
00694     }
00695     else
00696     {
00697         int8        op = in->curpol->qoperator.oper;
00698         INFIX       nrm;
00699 
00700         in->curpol++;
00701         if (op == OP_OR && !first)
00702         {
00703             RESIZEBUF(in, 2);
00704             sprintf(in->cur, "( ");
00705             in->cur = strchr(in->cur, '\0');
00706         }
00707 
00708         nrm.curpol = in->curpol;
00709         nrm.op = in->op;
00710         nrm.buflen = 16;
00711         nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
00712 
00713         /* get right operand */
00714         infix(&nrm, false);
00715 
00716         /* get & print left operand */
00717         in->curpol = nrm.curpol;
00718         infix(in, false);
00719 
00720         /* print operator & right operand */
00721         RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
00722         switch (op)
00723         {
00724             case OP_OR:
00725                 sprintf(in->cur, " | %s", nrm.buf);
00726                 break;
00727             case OP_AND:
00728                 sprintf(in->cur, " & %s", nrm.buf);
00729                 break;
00730             default:
00731                 /* OP_NOT is handled in above if-branch */
00732                 elog(ERROR, "unrecognized operator type: %d", op);
00733         }
00734         in->cur = strchr(in->cur, '\0');
00735         pfree(nrm.buf);
00736 
00737         if (op == OP_OR && !first)
00738         {
00739             RESIZEBUF(in, 2);
00740             sprintf(in->cur, " )");
00741             in->cur = strchr(in->cur, '\0');
00742         }
00743     }
00744 }
00745 
00746 
00747 Datum
00748 tsqueryout(PG_FUNCTION_ARGS)
00749 {
00750     TSQuery     query = PG_GETARG_TSQUERY(0);
00751     INFIX       nrm;
00752 
00753     if (query->size == 0)
00754     {
00755         char       *b = palloc(1);
00756 
00757         *b = '\0';
00758         PG_RETURN_POINTER(b);
00759     }
00760     nrm.curpol = GETQUERY(query);
00761     nrm.buflen = 32;
00762     nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
00763     *(nrm.cur) = '\0';
00764     nrm.op = GETOPERAND(query);
00765     infix(&nrm, true);
00766 
00767     PG_FREE_IF_COPY(query, 0);
00768     PG_RETURN_CSTRING(nrm.buf);
00769 }
00770 
00771 /*
00772  * Binary Input / Output functions. The binary format is as follows:
00773  *
00774  * uint32    number of operators/operands in the query
00775  *
00776  * Followed by the operators and operands, in prefix notation. For each
00777  * operand:
00778  *
00779  * uint8    type, QI_VAL
00780  * uint8    weight
00781  *          operand text in client encoding, null-terminated
00782  * uint8    prefix
00783  *
00784  * For each operator:
00785  * uint8    type, QI_OPR
00786  * uint8    operator, one of OP_AND, OP_OR, OP_NOT.
00787  */
00788 Datum
00789 tsquerysend(PG_FUNCTION_ARGS)
00790 {
00791     TSQuery     query = PG_GETARG_TSQUERY(0);
00792     StringInfoData buf;
00793     int         i;
00794     QueryItem  *item = GETQUERY(query);
00795 
00796     pq_begintypsend(&buf);
00797 
00798     pq_sendint(&buf, query->size, sizeof(uint32));
00799     for (i = 0; i < query->size; i++)
00800     {
00801         pq_sendint(&buf, item->type, sizeof(item->type));
00802 
00803         switch (item->type)
00804         {
00805             case QI_VAL:
00806                 pq_sendint(&buf, item->qoperand.weight, sizeof(uint8));
00807                 pq_sendint(&buf, item->qoperand.prefix, sizeof(uint8));
00808                 pq_sendstring(&buf, GETOPERAND(query) + item->qoperand.distance);
00809                 break;
00810             case QI_OPR:
00811                 pq_sendint(&buf, item->qoperator.oper, sizeof(item->qoperator.oper));
00812                 break;
00813             default:
00814                 elog(ERROR, "unrecognized tsquery node type: %d", item->type);
00815         }
00816         item++;
00817     }
00818 
00819     PG_FREE_IF_COPY(query, 0);
00820 
00821     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
00822 }
00823 
00824 Datum
00825 tsqueryrecv(PG_FUNCTION_ARGS)
00826 {
00827     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
00828     TSQuery     query;
00829     int         i,
00830                 len;
00831     QueryItem  *item;
00832     int         datalen;
00833     char       *ptr;
00834     uint32      size;
00835     const char **operands;
00836 
00837     size = pq_getmsgint(buf, sizeof(uint32));
00838     if (size > (MaxAllocSize / sizeof(QueryItem)))
00839         elog(ERROR, "invalid size of tsquery");
00840 
00841     /* Allocate space to temporarily hold operand strings */
00842     operands = palloc(size * sizeof(char *));
00843 
00844     /* Allocate space for all the QueryItems. */
00845     len = HDRSIZETQ + sizeof(QueryItem) * size;
00846     query = (TSQuery) palloc0(len);
00847     query->size = size;
00848     item = GETQUERY(query);
00849 
00850     datalen = 0;
00851     for (i = 0; i < size; i++)
00852     {
00853         item->type = (int8) pq_getmsgint(buf, sizeof(int8));
00854 
00855         if (item->type == QI_VAL)
00856         {
00857             size_t      val_len;    /* length after recoding to server encoding */
00858             uint8       weight;
00859             uint8       prefix;
00860             const char *val;
00861             pg_crc32    valcrc;
00862 
00863             weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
00864             prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
00865             val = pq_getmsgstring(buf);
00866             val_len = strlen(val);
00867 
00868             /* Sanity checks */
00869 
00870             if (weight > 0xF)
00871                 elog(ERROR, "invalid tsquery: invalid weight bitmap");
00872 
00873             if (val_len > MAXSTRLEN)
00874                 elog(ERROR, "invalid tsquery: operand too long");
00875 
00876             if (datalen > MAXSTRPOS)
00877                 elog(ERROR, "invalid tsquery: total operand length exceeded");
00878 
00879             /* Looks valid. */
00880 
00881             INIT_CRC32(valcrc);
00882             COMP_CRC32(valcrc, val, val_len);
00883             FIN_CRC32(valcrc);
00884 
00885             item->qoperand.weight = weight;
00886             item->qoperand.prefix = (prefix) ? true : false;
00887             item->qoperand.valcrc = (int32) valcrc;
00888             item->qoperand.length = val_len;
00889             item->qoperand.distance = datalen;
00890 
00891             /*
00892              * Operand strings are copied to the final struct after this loop;
00893              * here we just collect them to an array
00894              */
00895             operands[i] = val;
00896 
00897             datalen += val_len + 1;     /* + 1 for the '\0' terminator */
00898         }
00899         else if (item->type == QI_OPR)
00900         {
00901             int8        oper;
00902 
00903             oper = (int8) pq_getmsgint(buf, sizeof(int8));
00904             if (oper != OP_NOT && oper != OP_OR && oper != OP_AND)
00905                 elog(ERROR, "invalid tsquery: unrecognized operator type %d",
00906                      (int) oper);
00907             if (i == size - 1)
00908                 elog(ERROR, "invalid pointer to right operand");
00909 
00910             item->qoperator.oper = oper;
00911         }
00912         else
00913             elog(ERROR, "unrecognized tsquery node type: %d", item->type);
00914 
00915         item++;
00916     }
00917 
00918     /* Enlarge buffer to make room for the operand values. */
00919     query = (TSQuery) repalloc(query, len + datalen);
00920     item = GETQUERY(query);
00921     ptr = GETOPERAND(query);
00922 
00923     /*
00924      * Fill in the left-pointers. Checks that the tree is well-formed as a
00925      * side-effect.
00926      */
00927     findoprnd(item, size);
00928 
00929     /* Copy operands to output struct */
00930     for (i = 0; i < size; i++)
00931     {
00932         if (item->type == QI_VAL)
00933         {
00934             memcpy(ptr, operands[i], item->qoperand.length + 1);
00935             ptr += item->qoperand.length + 1;
00936         }
00937         item++;
00938     }
00939 
00940     pfree(operands);
00941 
00942     Assert(ptr - GETOPERAND(query) == datalen);
00943 
00944     SET_VARSIZE(query, len + datalen);
00945 
00946     PG_RETURN_TSVECTOR(query);
00947 }
00948 
00949 /*
00950  * debug function, used only for view query
00951  * which will be executed in non-leaf pages in index
00952  */
00953 Datum
00954 tsquerytree(PG_FUNCTION_ARGS)
00955 {
00956     TSQuery     query = PG_GETARG_TSQUERY(0);
00957     INFIX       nrm;
00958     text       *res;
00959     QueryItem  *q;
00960     int         len;
00961 
00962     if (query->size == 0)
00963     {
00964         res = (text *) palloc(VARHDRSZ);
00965         SET_VARSIZE(res, VARHDRSZ);
00966         PG_RETURN_POINTER(res);
00967     }
00968 
00969     q = clean_NOT(GETQUERY(query), &len);
00970 
00971     if (!q)
00972     {
00973         res = cstring_to_text("T");
00974     }
00975     else
00976     {
00977         nrm.curpol = q;
00978         nrm.buflen = 32;
00979         nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
00980         *(nrm.cur) = '\0';
00981         nrm.op = GETOPERAND(query);
00982         infix(&nrm, true);
00983         res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
00984         pfree(q);
00985     }
00986 
00987     PG_FREE_IF_COPY(query, 0);
00988 
00989     PG_RETURN_TEXT_P(res);
00990 }