Header And Logo

PostgreSQL
| The world's most advanced open source database.

ltxtquery_io.c

Go to the documentation of this file.
00001 /*
00002  * txtquery io
00003  * Teodor Sigaev <[email protected]>
00004  * contrib/ltree/ltxtquery_io.c
00005  */
00006 #include "postgres.h"
00007 
00008 #include <ctype.h>
00009 
00010 #include "crc32.h"
00011 #include "ltree.h"
00012 
00013 PG_FUNCTION_INFO_V1(ltxtq_in);
00014 Datum       ltxtq_in(PG_FUNCTION_ARGS);
00015 
00016 PG_FUNCTION_INFO_V1(ltxtq_out);
00017 Datum       ltxtq_out(PG_FUNCTION_ARGS);
00018 
00019 
00020 /* parser's states */
00021 #define WAITOPERAND 1
00022 #define INOPERAND 2
00023 #define WAITOPERATOR    3
00024 
00025 /*
00026  * node of query tree, also used
00027  * for storing polish notation in parser
00028  */
00029 typedef struct NODE
00030 {
00031     int32       type;
00032     int32       val;
00033     int16       distance;
00034     int16       length;
00035     uint16      flag;
00036     struct NODE *next;
00037 } NODE;
00038 
00039 typedef struct
00040 {
00041     char       *buf;
00042     int32       state;
00043     int32       count;
00044     /* reverse polish notation in list (for temporary usage) */
00045     NODE       *str;
00046     /* number in str */
00047     int32       num;
00048 
00049     /* user-friendly operand */
00050     int32       lenop;
00051     int32       sumlen;
00052     char       *op;
00053     char       *curop;
00054 } QPRS_STATE;
00055 
00056 /*
00057  * get token from query string
00058  */
00059 static int32
00060 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag)
00061 {
00062     int         charlen;
00063 
00064     for (;;)
00065     {
00066         charlen = pg_mblen(state->buf);
00067 
00068         switch (state->state)
00069         {
00070             case WAITOPERAND:
00071                 if (charlen == 1 && t_iseq(state->buf, '!'))
00072                 {
00073                     (state->buf)++;
00074                     *val = (int32) '!';
00075                     return OPR;
00076                 }
00077                 else if (charlen == 1 && t_iseq(state->buf, '('))
00078                 {
00079                     state->count++;
00080                     (state->buf)++;
00081                     return OPEN;
00082                 }
00083                 else if (ISALNUM(state->buf))
00084                 {
00085                     state->state = INOPERAND;
00086                     *strval = state->buf;
00087                     *lenval = charlen;
00088                     *flag = 0;
00089                 }
00090                 else if (!t_isspace(state->buf))
00091                     ereport(ERROR,
00092                             (errcode(ERRCODE_SYNTAX_ERROR),
00093                              errmsg("operand syntax error")));
00094                 break;
00095             case INOPERAND:
00096                 if (ISALNUM(state->buf))
00097                 {
00098                     if (*flag)
00099                         ereport(ERROR,
00100                                 (errcode(ERRCODE_SYNTAX_ERROR),
00101                                  errmsg("modificators syntax error")));
00102                     *lenval += charlen;
00103                 }
00104                 else if (charlen == 1 && t_iseq(state->buf, '%'))
00105                     *flag |= LVAR_SUBLEXEME;
00106                 else if (charlen == 1 && t_iseq(state->buf, '@'))
00107                     *flag |= LVAR_INCASE;
00108                 else if (charlen == 1 && t_iseq(state->buf, '*'))
00109                     *flag |= LVAR_ANYEND;
00110                 else
00111                 {
00112                     state->state = WAITOPERATOR;
00113                     return VAL;
00114                 }
00115                 break;
00116             case WAITOPERATOR:
00117                 if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')))
00118                 {
00119                     state->state = WAITOPERAND;
00120                     *val = (int32) *(state->buf);
00121                     (state->buf)++;
00122                     return OPR;
00123                 }
00124                 else if (charlen == 1 && t_iseq(state->buf, ')'))
00125                 {
00126                     (state->buf)++;
00127                     state->count--;
00128                     return (state->count < 0) ? ERR : CLOSE;
00129                 }
00130                 else if (*(state->buf) == '\0')
00131                     return (state->count) ? ERR : END;
00132                 else if (charlen == 1 && !t_iseq(state->buf, ' '))
00133                     return ERR;
00134                 break;
00135             default:
00136                 return ERR;
00137                 break;
00138         }
00139 
00140         state->buf += charlen;
00141     }
00142 }
00143 
00144 /*
00145  * push new one in polish notation reverse view
00146  */
00147 static void
00148 pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval, uint16 flag)
00149 {
00150     NODE       *tmp = (NODE *) palloc(sizeof(NODE));
00151 
00152     tmp->type = type;
00153     tmp->val = val;
00154     tmp->flag = flag;
00155     if (distance > 0xffff)
00156         ereport(ERROR,
00157                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00158                  errmsg("value is too big")));
00159     if (lenval > 0xff)
00160         ereport(ERROR,
00161                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00162                  errmsg("operand is too long")));
00163     tmp->distance = distance;
00164     tmp->length = lenval;
00165     tmp->next = state->str;
00166     state->str = tmp;
00167     state->num++;
00168 }
00169 
00170 /*
00171  * This function is used for query_txt parsing
00172  */
00173 static void
00174 pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
00175 {
00176     if (lenval > 0xffff)
00177         ereport(ERROR,
00178                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00179                  errmsg("word is too long")));
00180 
00181     pushquery(state, type, ltree_crc32_sz(strval, lenval),
00182               state->curop - state->op, lenval, flag);
00183 
00184     while (state->curop - state->op + lenval + 1 >= state->lenop)
00185     {
00186         int32       tmp = state->curop - state->op;
00187 
00188         state->lenop *= 2;
00189         state->op = (char *) repalloc((void *) state->op, state->lenop);
00190         state->curop = state->op + tmp;
00191     }
00192     memcpy((void *) state->curop, (void *) strval, lenval);
00193     state->curop += lenval;
00194     *(state->curop) = '\0';
00195     state->curop++;
00196     state->sumlen += lenval + 1;
00197     return;
00198 }
00199 
00200 #define STACKDEPTH      32
00201 /*
00202  * make polish notaion of query
00203  */
00204 static int32
00205 makepol(QPRS_STATE *state)
00206 {
00207     int32       val = 0,
00208                 type;
00209     int32       lenval = 0;
00210     char       *strval = NULL;
00211     int32       stack[STACKDEPTH];
00212     int32       lenstack = 0;
00213     uint16      flag = 0;
00214 
00215     while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END)
00216     {
00217         switch (type)
00218         {
00219             case VAL:
00220                 pushval_asis(state, VAL, strval, lenval, flag);
00221                 while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
00222                                     stack[lenstack - 1] == (int32) '!'))
00223                 {
00224                     lenstack--;
00225                     pushquery(state, OPR, stack[lenstack], 0, 0, 0);
00226                 }
00227                 break;
00228             case OPR:
00229                 if (lenstack && val == (int32) '|')
00230                     pushquery(state, OPR, val, 0, 0, 0);
00231                 else
00232                 {
00233                     if (lenstack == STACKDEPTH)
00234                         /* internal error */
00235                         elog(ERROR, "stack too short");
00236                     stack[lenstack] = val;
00237                     lenstack++;
00238                 }
00239                 break;
00240             case OPEN:
00241                 if (makepol(state) == ERR)
00242                     return ERR;
00243                 while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
00244                                     stack[lenstack - 1] == (int32) '!'))
00245                 {
00246                     lenstack--;
00247                     pushquery(state, OPR, stack[lenstack], 0, 0, 0);
00248                 }
00249                 break;
00250             case CLOSE:
00251                 while (lenstack)
00252                 {
00253                     lenstack--;
00254                     pushquery(state, OPR, stack[lenstack], 0, 0, 0);
00255                 };
00256                 return END;
00257                 break;
00258             case ERR:
00259             default:
00260                 ereport(ERROR,
00261                         (errcode(ERRCODE_SYNTAX_ERROR),
00262                          errmsg("syntax error")));
00263 
00264                 return ERR;
00265 
00266         }
00267     }
00268     while (lenstack)
00269     {
00270         lenstack--;
00271         pushquery(state, OPR, stack[lenstack], 0, 0, 0);
00272     };
00273     return END;
00274 }
00275 
00276 static void
00277 findoprnd(ITEM *ptr, int32 *pos)
00278 {
00279     if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
00280     {
00281         ptr[*pos].left = 0;
00282         (*pos)++;
00283     }
00284     else if (ptr[*pos].val == (int32) '!')
00285     {
00286         ptr[*pos].left = 1;
00287         (*pos)++;
00288         findoprnd(ptr, pos);
00289     }
00290     else
00291     {
00292         ITEM       *curitem = &ptr[*pos];
00293         int32       tmp = *pos;
00294 
00295         (*pos)++;
00296         findoprnd(ptr, pos);
00297         curitem->left = *pos - tmp;
00298         findoprnd(ptr, pos);
00299     }
00300 }
00301 
00302 
00303 /*
00304  * input
00305  */
00306 static ltxtquery *
00307 queryin(char *buf)
00308 {
00309     QPRS_STATE  state;
00310     int32       i;
00311     ltxtquery  *query;
00312     int32       commonlen;
00313     ITEM       *ptr;
00314     NODE       *tmp;
00315     int32       pos = 0;
00316 
00317 #ifdef BS_DEBUG
00318     char        pbuf[16384],
00319                *cur;
00320 #endif
00321 
00322     /* init state */
00323     state.buf = buf;
00324     state.state = WAITOPERAND;
00325     state.count = 0;
00326     state.num = 0;
00327     state.str = NULL;
00328 
00329     /* init list of operand */
00330     state.sumlen = 0;
00331     state.lenop = 64;
00332     state.curop = state.op = (char *) palloc(state.lenop);
00333     *(state.curop) = '\0';
00334 
00335     /* parse query & make polish notation (postfix, but in reverse order) */
00336     makepol(&state);
00337     if (!state.num)
00338         ereport(ERROR,
00339                 (errcode(ERRCODE_SYNTAX_ERROR),
00340                  errmsg("syntax error"),
00341                  errdetail("Empty query.")));
00342 
00343     /* make finish struct */
00344     commonlen = COMPUTESIZE(state.num, state.sumlen);
00345     query = (ltxtquery *) palloc(commonlen);
00346     SET_VARSIZE(query, commonlen);
00347     query->size = state.num;
00348     ptr = GETQUERY(query);
00349 
00350     /* set item in polish notation */
00351     for (i = 0; i < state.num; i++)
00352     {
00353         ptr[i].type = state.str->type;
00354         ptr[i].val = state.str->val;
00355         ptr[i].distance = state.str->distance;
00356         ptr[i].length = state.str->length;
00357         ptr[i].flag = state.str->flag;
00358         tmp = state.str->next;
00359         pfree(state.str);
00360         state.str = tmp;
00361     }
00362 
00363     /* set user friendly-operand view */
00364     memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
00365     pfree(state.op);
00366 
00367     /* set left operand's position for every operator */
00368     pos = 0;
00369     findoprnd(ptr, &pos);
00370 
00371     return query;
00372 }
00373 
00374 /*
00375  * in without morphology
00376  */
00377 Datum
00378 ltxtq_in(PG_FUNCTION_ARGS)
00379 {
00380     PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0)));
00381 }
00382 
00383 /*
00384  * out function
00385  */
00386 typedef struct
00387 {
00388     ITEM       *curpol;
00389     char       *buf;
00390     char       *cur;
00391     char       *op;
00392     int32       buflen;
00393 } INFIX;
00394 
00395 #define RESIZEBUF(inf,addsize) \
00396 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
00397 { \
00398     int32 len = (inf)->cur - (inf)->buf; \
00399     (inf)->buflen *= 2; \
00400     (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
00401     (inf)->cur = (inf)->buf + len; \
00402 }
00403 
00404 /*
00405  * recursive walk on tree and print it in
00406  * infix (human-readable) view
00407  */
00408 static void
00409 infix(INFIX *in, bool first)
00410 {
00411     if (in->curpol->type == VAL)
00412     {
00413         char       *op = in->op + in->curpol->distance;
00414 
00415         RESIZEBUF(in, in->curpol->length * 2 + 5);
00416         while (*op)
00417         {
00418             *(in->cur) = *op;
00419             op++;
00420             in->cur++;
00421         }
00422         if (in->curpol->flag & LVAR_SUBLEXEME)
00423         {
00424             *(in->cur) = '%';
00425             in->cur++;
00426         }
00427         if (in->curpol->flag & LVAR_INCASE)
00428         {
00429             *(in->cur) = '@';
00430             in->cur++;
00431         }
00432         if (in->curpol->flag & LVAR_ANYEND)
00433         {
00434             *(in->cur) = '*';
00435             in->cur++;
00436         }
00437         *(in->cur) = '\0';
00438         in->curpol++;
00439     }
00440     else if (in->curpol->val == (int32) '!')
00441     {
00442         bool        isopr = false;
00443 
00444         RESIZEBUF(in, 1);
00445         *(in->cur) = '!';
00446         in->cur++;
00447         *(in->cur) = '\0';
00448         in->curpol++;
00449         if (in->curpol->type == OPR)
00450         {
00451             isopr = true;
00452             RESIZEBUF(in, 2);
00453             sprintf(in->cur, "( ");
00454             in->cur = strchr(in->cur, '\0');
00455         }
00456         infix(in, isopr);
00457         if (isopr)
00458         {
00459             RESIZEBUF(in, 2);
00460             sprintf(in->cur, " )");
00461             in->cur = strchr(in->cur, '\0');
00462         }
00463     }
00464     else
00465     {
00466         int32       op = in->curpol->val;
00467         INFIX       nrm;
00468 
00469         in->curpol++;
00470         if (op == (int32) '|' && !first)
00471         {
00472             RESIZEBUF(in, 2);
00473             sprintf(in->cur, "( ");
00474             in->cur = strchr(in->cur, '\0');
00475         }
00476 
00477         nrm.curpol = in->curpol;
00478         nrm.op = in->op;
00479         nrm.buflen = 16;
00480         nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
00481 
00482         /* get right operand */
00483         infix(&nrm, false);
00484 
00485         /* get & print left operand */
00486         in->curpol = nrm.curpol;
00487         infix(in, false);
00488 
00489         /* print operator & right operand */
00490         RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
00491         sprintf(in->cur, " %c %s", op, nrm.buf);
00492         in->cur = strchr(in->cur, '\0');
00493         pfree(nrm.buf);
00494 
00495         if (op == (int32) '|' && !first)
00496         {
00497             RESIZEBUF(in, 2);
00498             sprintf(in->cur, " )");
00499             in->cur = strchr(in->cur, '\0');
00500         }
00501     }
00502 }
00503 
00504 Datum
00505 ltxtq_out(PG_FUNCTION_ARGS)
00506 {
00507     ltxtquery  *query = PG_GETARG_LTXTQUERY(0);
00508     INFIX       nrm;
00509 
00510     if (query->size == 0)
00511         ereport(ERROR,
00512                 (errcode(ERRCODE_SYNTAX_ERROR),
00513                  errmsg("syntax error"),
00514                  errdetail("Empty query.")));
00515 
00516     nrm.curpol = GETQUERY(query);
00517     nrm.buflen = 32;
00518     nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
00519     *(nrm.cur) = '\0';
00520     nrm.op = GETOPERAND(query);
00521     infix(&nrm, true);
00522 
00523     PG_FREE_IF_COPY(query, 0);
00524     PG_RETURN_POINTER(nrm.buf);
00525 }