00001
00002
00003
00004
00005
00006 #include "postgres.h"
00007
00008 #include <ctype.h>
00009
00010 #include "crc32.h"
00011 #include "ltree.h"
00012
00013 PG_FUNCTION_INFO_V1(ltxtq_in);
00014 Datum ltxtq_in(PG_FUNCTION_ARGS);
00015
00016 PG_FUNCTION_INFO_V1(ltxtq_out);
00017 Datum ltxtq_out(PG_FUNCTION_ARGS);
00018
00019
00020
00021 #define WAITOPERAND 1
00022 #define INOPERAND 2
00023 #define WAITOPERATOR 3
00024
00025
00026
00027
00028
00029 typedef struct NODE
00030 {
00031 int32 type;
00032 int32 val;
00033 int16 distance;
00034 int16 length;
00035 uint16 flag;
00036 struct NODE *next;
00037 } NODE;
00038
00039 typedef struct
00040 {
00041 char *buf;
00042 int32 state;
00043 int32 count;
00044
00045 NODE *str;
00046
00047 int32 num;
00048
00049
00050 int32 lenop;
00051 int32 sumlen;
00052 char *op;
00053 char *curop;
00054 } QPRS_STATE;
00055
00056
00057
00058
00059 static int32
00060 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag)
00061 {
00062 int charlen;
00063
00064 for (;;)
00065 {
00066 charlen = pg_mblen(state->buf);
00067
00068 switch (state->state)
00069 {
00070 case WAITOPERAND:
00071 if (charlen == 1 && t_iseq(state->buf, '!'))
00072 {
00073 (state->buf)++;
00074 *val = (int32) '!';
00075 return OPR;
00076 }
00077 else if (charlen == 1 && t_iseq(state->buf, '('))
00078 {
00079 state->count++;
00080 (state->buf)++;
00081 return OPEN;
00082 }
00083 else if (ISALNUM(state->buf))
00084 {
00085 state->state = INOPERAND;
00086 *strval = state->buf;
00087 *lenval = charlen;
00088 *flag = 0;
00089 }
00090 else if (!t_isspace(state->buf))
00091 ereport(ERROR,
00092 (errcode(ERRCODE_SYNTAX_ERROR),
00093 errmsg("operand syntax error")));
00094 break;
00095 case INOPERAND:
00096 if (ISALNUM(state->buf))
00097 {
00098 if (*flag)
00099 ereport(ERROR,
00100 (errcode(ERRCODE_SYNTAX_ERROR),
00101 errmsg("modificators syntax error")));
00102 *lenval += charlen;
00103 }
00104 else if (charlen == 1 && t_iseq(state->buf, '%'))
00105 *flag |= LVAR_SUBLEXEME;
00106 else if (charlen == 1 && t_iseq(state->buf, '@'))
00107 *flag |= LVAR_INCASE;
00108 else if (charlen == 1 && t_iseq(state->buf, '*'))
00109 *flag |= LVAR_ANYEND;
00110 else
00111 {
00112 state->state = WAITOPERATOR;
00113 return VAL;
00114 }
00115 break;
00116 case WAITOPERATOR:
00117 if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')))
00118 {
00119 state->state = WAITOPERAND;
00120 *val = (int32) *(state->buf);
00121 (state->buf)++;
00122 return OPR;
00123 }
00124 else if (charlen == 1 && t_iseq(state->buf, ')'))
00125 {
00126 (state->buf)++;
00127 state->count--;
00128 return (state->count < 0) ? ERR : CLOSE;
00129 }
00130 else if (*(state->buf) == '\0')
00131 return (state->count) ? ERR : END;
00132 else if (charlen == 1 && !t_iseq(state->buf, ' '))
00133 return ERR;
00134 break;
00135 default:
00136 return ERR;
00137 break;
00138 }
00139
00140 state->buf += charlen;
00141 }
00142 }
00143
00144
00145
00146
00147 static void
00148 pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval, uint16 flag)
00149 {
00150 NODE *tmp = (NODE *) palloc(sizeof(NODE));
00151
00152 tmp->type = type;
00153 tmp->val = val;
00154 tmp->flag = flag;
00155 if (distance > 0xffff)
00156 ereport(ERROR,
00157 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00158 errmsg("value is too big")));
00159 if (lenval > 0xff)
00160 ereport(ERROR,
00161 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00162 errmsg("operand is too long")));
00163 tmp->distance = distance;
00164 tmp->length = lenval;
00165 tmp->next = state->str;
00166 state->str = tmp;
00167 state->num++;
00168 }
00169
00170
00171
00172
00173 static void
00174 pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
00175 {
00176 if (lenval > 0xffff)
00177 ereport(ERROR,
00178 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00179 errmsg("word is too long")));
00180
00181 pushquery(state, type, ltree_crc32_sz(strval, lenval),
00182 state->curop - state->op, lenval, flag);
00183
00184 while (state->curop - state->op + lenval + 1 >= state->lenop)
00185 {
00186 int32 tmp = state->curop - state->op;
00187
00188 state->lenop *= 2;
00189 state->op = (char *) repalloc((void *) state->op, state->lenop);
00190 state->curop = state->op + tmp;
00191 }
00192 memcpy((void *) state->curop, (void *) strval, lenval);
00193 state->curop += lenval;
00194 *(state->curop) = '\0';
00195 state->curop++;
00196 state->sumlen += lenval + 1;
00197 return;
00198 }
00199
00200 #define STACKDEPTH 32
00201
00202
00203
00204 static int32
00205 makepol(QPRS_STATE *state)
00206 {
00207 int32 val = 0,
00208 type;
00209 int32 lenval = 0;
00210 char *strval = NULL;
00211 int32 stack[STACKDEPTH];
00212 int32 lenstack = 0;
00213 uint16 flag = 0;
00214
00215 while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END)
00216 {
00217 switch (type)
00218 {
00219 case VAL:
00220 pushval_asis(state, VAL, strval, lenval, flag);
00221 while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
00222 stack[lenstack - 1] == (int32) '!'))
00223 {
00224 lenstack--;
00225 pushquery(state, OPR, stack[lenstack], 0, 0, 0);
00226 }
00227 break;
00228 case OPR:
00229 if (lenstack && val == (int32) '|')
00230 pushquery(state, OPR, val, 0, 0, 0);
00231 else
00232 {
00233 if (lenstack == STACKDEPTH)
00234
00235 elog(ERROR, "stack too short");
00236 stack[lenstack] = val;
00237 lenstack++;
00238 }
00239 break;
00240 case OPEN:
00241 if (makepol(state) == ERR)
00242 return ERR;
00243 while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
00244 stack[lenstack - 1] == (int32) '!'))
00245 {
00246 lenstack--;
00247 pushquery(state, OPR, stack[lenstack], 0, 0, 0);
00248 }
00249 break;
00250 case CLOSE:
00251 while (lenstack)
00252 {
00253 lenstack--;
00254 pushquery(state, OPR, stack[lenstack], 0, 0, 0);
00255 };
00256 return END;
00257 break;
00258 case ERR:
00259 default:
00260 ereport(ERROR,
00261 (errcode(ERRCODE_SYNTAX_ERROR),
00262 errmsg("syntax error")));
00263
00264 return ERR;
00265
00266 }
00267 }
00268 while (lenstack)
00269 {
00270 lenstack--;
00271 pushquery(state, OPR, stack[lenstack], 0, 0, 0);
00272 };
00273 return END;
00274 }
00275
00276 static void
00277 findoprnd(ITEM *ptr, int32 *pos)
00278 {
00279 if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
00280 {
00281 ptr[*pos].left = 0;
00282 (*pos)++;
00283 }
00284 else if (ptr[*pos].val == (int32) '!')
00285 {
00286 ptr[*pos].left = 1;
00287 (*pos)++;
00288 findoprnd(ptr, pos);
00289 }
00290 else
00291 {
00292 ITEM *curitem = &ptr[*pos];
00293 int32 tmp = *pos;
00294
00295 (*pos)++;
00296 findoprnd(ptr, pos);
00297 curitem->left = *pos - tmp;
00298 findoprnd(ptr, pos);
00299 }
00300 }
00301
00302
00303
00304
00305
00306 static ltxtquery *
00307 queryin(char *buf)
00308 {
00309 QPRS_STATE state;
00310 int32 i;
00311 ltxtquery *query;
00312 int32 commonlen;
00313 ITEM *ptr;
00314 NODE *tmp;
00315 int32 pos = 0;
00316
00317 #ifdef BS_DEBUG
00318 char pbuf[16384],
00319 *cur;
00320 #endif
00321
00322
00323 state.buf = buf;
00324 state.state = WAITOPERAND;
00325 state.count = 0;
00326 state.num = 0;
00327 state.str = NULL;
00328
00329
00330 state.sumlen = 0;
00331 state.lenop = 64;
00332 state.curop = state.op = (char *) palloc(state.lenop);
00333 *(state.curop) = '\0';
00334
00335
00336 makepol(&state);
00337 if (!state.num)
00338 ereport(ERROR,
00339 (errcode(ERRCODE_SYNTAX_ERROR),
00340 errmsg("syntax error"),
00341 errdetail("Empty query.")));
00342
00343
00344 commonlen = COMPUTESIZE(state.num, state.sumlen);
00345 query = (ltxtquery *) palloc(commonlen);
00346 SET_VARSIZE(query, commonlen);
00347 query->size = state.num;
00348 ptr = GETQUERY(query);
00349
00350
00351 for (i = 0; i < state.num; i++)
00352 {
00353 ptr[i].type = state.str->type;
00354 ptr[i].val = state.str->val;
00355 ptr[i].distance = state.str->distance;
00356 ptr[i].length = state.str->length;
00357 ptr[i].flag = state.str->flag;
00358 tmp = state.str->next;
00359 pfree(state.str);
00360 state.str = tmp;
00361 }
00362
00363
00364 memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
00365 pfree(state.op);
00366
00367
00368 pos = 0;
00369 findoprnd(ptr, &pos);
00370
00371 return query;
00372 }
00373
00374
00375
00376
00377 Datum
00378 ltxtq_in(PG_FUNCTION_ARGS)
00379 {
00380 PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0)));
00381 }
00382
00383
00384
00385
00386 typedef struct
00387 {
00388 ITEM *curpol;
00389 char *buf;
00390 char *cur;
00391 char *op;
00392 int32 buflen;
00393 } INFIX;
00394
00395 #define RESIZEBUF(inf,addsize) \
00396 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
00397 { \
00398 int32 len = (inf)->cur - (inf)->buf; \
00399 (inf)->buflen *= 2; \
00400 (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
00401 (inf)->cur = (inf)->buf + len; \
00402 }
00403
00404
00405
00406
00407
00408 static void
00409 infix(INFIX *in, bool first)
00410 {
00411 if (in->curpol->type == VAL)
00412 {
00413 char *op = in->op + in->curpol->distance;
00414
00415 RESIZEBUF(in, in->curpol->length * 2 + 5);
00416 while (*op)
00417 {
00418 *(in->cur) = *op;
00419 op++;
00420 in->cur++;
00421 }
00422 if (in->curpol->flag & LVAR_SUBLEXEME)
00423 {
00424 *(in->cur) = '%';
00425 in->cur++;
00426 }
00427 if (in->curpol->flag & LVAR_INCASE)
00428 {
00429 *(in->cur) = '@';
00430 in->cur++;
00431 }
00432 if (in->curpol->flag & LVAR_ANYEND)
00433 {
00434 *(in->cur) = '*';
00435 in->cur++;
00436 }
00437 *(in->cur) = '\0';
00438 in->curpol++;
00439 }
00440 else if (in->curpol->val == (int32) '!')
00441 {
00442 bool isopr = false;
00443
00444 RESIZEBUF(in, 1);
00445 *(in->cur) = '!';
00446 in->cur++;
00447 *(in->cur) = '\0';
00448 in->curpol++;
00449 if (in->curpol->type == OPR)
00450 {
00451 isopr = true;
00452 RESIZEBUF(in, 2);
00453 sprintf(in->cur, "( ");
00454 in->cur = strchr(in->cur, '\0');
00455 }
00456 infix(in, isopr);
00457 if (isopr)
00458 {
00459 RESIZEBUF(in, 2);
00460 sprintf(in->cur, " )");
00461 in->cur = strchr(in->cur, '\0');
00462 }
00463 }
00464 else
00465 {
00466 int32 op = in->curpol->val;
00467 INFIX nrm;
00468
00469 in->curpol++;
00470 if (op == (int32) '|' && !first)
00471 {
00472 RESIZEBUF(in, 2);
00473 sprintf(in->cur, "( ");
00474 in->cur = strchr(in->cur, '\0');
00475 }
00476
00477 nrm.curpol = in->curpol;
00478 nrm.op = in->op;
00479 nrm.buflen = 16;
00480 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
00481
00482
00483 infix(&nrm, false);
00484
00485
00486 in->curpol = nrm.curpol;
00487 infix(in, false);
00488
00489
00490 RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
00491 sprintf(in->cur, " %c %s", op, nrm.buf);
00492 in->cur = strchr(in->cur, '\0');
00493 pfree(nrm.buf);
00494
00495 if (op == (int32) '|' && !first)
00496 {
00497 RESIZEBUF(in, 2);
00498 sprintf(in->cur, " )");
00499 in->cur = strchr(in->cur, '\0');
00500 }
00501 }
00502 }
00503
00504 Datum
00505 ltxtq_out(PG_FUNCTION_ARGS)
00506 {
00507 ltxtquery *query = PG_GETARG_LTXTQUERY(0);
00508 INFIX nrm;
00509
00510 if (query->size == 0)
00511 ereport(ERROR,
00512 (errcode(ERRCODE_SYNTAX_ERROR),
00513 errmsg("syntax error"),
00514 errdetail("Empty query.")));
00515
00516 nrm.curpol = GETQUERY(query);
00517 nrm.buflen = 32;
00518 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
00519 *(nrm.cur) = '\0';
00520 nrm.op = GETOPERAND(query);
00521 infix(&nrm, true);
00522
00523 PG_FREE_IF_COPY(query, 0);
00524 PG_RETURN_POINTER(nrm.buf);
00525 }