Header And Logo

PostgreSQL
| The world's most advanced open source database.

tsrank.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * tsrank.c
00004  *      rank tsvector by tsquery
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  *
00008  *
00009  * IDENTIFICATION
00010  *    src/backend/utils/adt/tsrank.c
00011  *
00012  *-------------------------------------------------------------------------
00013  */
00014 #include "postgres.h"
00015 
00016 #include <math.h>
00017 
00018 #include "tsearch/ts_utils.h"
00019 #include "utils/array.h"
00020 #include "miscadmin.h"
00021 
00022 
00023 static float weights[] = {0.1f, 0.2f, 0.4f, 1.0f};
00024 
00025 #define wpos(wep)   ( w[ WEP_GETWEIGHT(wep) ] )
00026 
00027 #define RANK_NO_NORM            0x00
00028 #define RANK_NORM_LOGLENGTH     0x01
00029 #define RANK_NORM_LENGTH        0x02
00030 #define RANK_NORM_EXTDIST       0x04
00031 #define RANK_NORM_UNIQ          0x08
00032 #define RANK_NORM_LOGUNIQ       0x10
00033 #define RANK_NORM_RDIVRPLUS1    0x20
00034 #define DEF_NORM_METHOD         RANK_NO_NORM
00035 
00036 static float calc_rank_or(float *w, TSVector t, TSQuery q);
00037 static float calc_rank_and(float *w, TSVector t, TSQuery q);
00038 
00039 /*
00040  * Returns a weight of a word collocation
00041  */
00042 static float4
00043 word_distance(int32 w)
00044 {
00045     if (w > 100)
00046         return 1e-30f;
00047 
00048     return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
00049 }
00050 
00051 static int
00052 cnt_length(TSVector t)
00053 {
00054     WordEntry  *ptr = ARRPTR(t),
00055                *end = (WordEntry *) STRPTR(t);
00056     int         len = 0;
00057 
00058     while (ptr < end)
00059     {
00060         int         clen = POSDATALEN(t, ptr);
00061 
00062         if (clen == 0)
00063             len += 1;
00064         else
00065             len += clen;
00066 
00067         ptr++;
00068     }
00069 
00070     return len;
00071 }
00072 
00073 
00074 #define WordECompareQueryItem(e,q,p,i,m) \
00075     tsCompareString((q) + (i)->distance, (i)->length,   \
00076                     (e) + (p)->pos, (p)->len, (m))
00077 
00078 
00079 /*
00080  * Returns a pointer to a WordEntry's array corresponding to 'item' from
00081  * tsvector 't'. 'q' is the TSQuery containing 'item'.
00082  * Returns NULL if not found.
00083  */
00084 static WordEntry *
00085 find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
00086 {
00087     WordEntry  *StopLow = ARRPTR(t);
00088     WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
00089     WordEntry  *StopMiddle = StopHigh;
00090     int         difference;
00091 
00092     *nitem = 0;
00093 
00094     /* Loop invariant: StopLow <= item < StopHigh */
00095     while (StopLow < StopHigh)
00096     {
00097         StopMiddle = StopLow + (StopHigh - StopLow) / 2;
00098         difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
00099         if (difference == 0)
00100         {
00101             StopHigh = StopMiddle;
00102             *nitem = 1;
00103             break;
00104         }
00105         else if (difference > 0)
00106             StopLow = StopMiddle + 1;
00107         else
00108             StopHigh = StopMiddle;
00109     }
00110 
00111     if (item->prefix)
00112     {
00113         if (StopLow >= StopHigh)
00114             StopMiddle = StopHigh;
00115 
00116         *nitem = 0;
00117 
00118         while (StopMiddle < (WordEntry *) STRPTR(t) &&
00119                WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0)
00120         {
00121             (*nitem)++;
00122             StopMiddle++;
00123         }
00124     }
00125 
00126     return (*nitem > 0) ? StopHigh : NULL;
00127 }
00128 
00129 
00130 /*
00131  * sort QueryOperands by (length, word)
00132  */
00133 static int
00134 compareQueryOperand(const void *a, const void *b, void *arg)
00135 {
00136     char       *operand = (char *) arg;
00137     QueryOperand *qa = (*(QueryOperand *const *) a);
00138     QueryOperand *qb = (*(QueryOperand *const *) b);
00139 
00140     return tsCompareString(operand + qa->distance, qa->length,
00141                            operand + qb->distance, qb->length,
00142                            false);
00143 }
00144 
00145 /*
00146  * Returns a sorted, de-duplicated array of QueryOperands in a query.
00147  * The returned QueryOperands are pointers to the original QueryOperands
00148  * in the query.
00149  *
00150  * Length of the returned array is stored in *size
00151  */
00152 static QueryOperand **
00153 SortAndUniqItems(TSQuery q, int *size)
00154 {
00155     char       *operand = GETOPERAND(q);
00156     QueryItem  *item = GETQUERY(q);
00157     QueryOperand **res,
00158               **ptr,
00159               **prevptr;
00160 
00161     ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
00162 
00163     /* Collect all operands from the tree to res */
00164     while ((*size)--)
00165     {
00166         if (item->type == QI_VAL)
00167         {
00168             *ptr = (QueryOperand *) item;
00169             ptr++;
00170         }
00171         item++;
00172     }
00173 
00174     *size = ptr - res;
00175     if (*size < 2)
00176         return res;
00177 
00178     qsort_arg(res, *size, sizeof(QueryOperand *), compareQueryOperand, (void *) operand);
00179 
00180     ptr = res + 1;
00181     prevptr = res;
00182 
00183     /* remove duplicates */
00184     while (ptr - res < *size)
00185     {
00186         if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
00187         {
00188             prevptr++;
00189             *prevptr = *ptr;
00190         }
00191         ptr++;
00192     }
00193 
00194     *size = prevptr + 1 - res;
00195     return res;
00196 }
00197 
00198 /* A dummy WordEntryPos array to use when haspos is false */
00199 static WordEntryPosVector POSNULL = {
00200     1,                          /* Number of elements that follow */
00201     {0}
00202 };
00203 
00204 static float
00205 calc_rank_and(float *w, TSVector t, TSQuery q)
00206 {
00207     WordEntryPosVector **pos;
00208     int         i,
00209                 k,
00210                 l,
00211                 p;
00212     WordEntry  *entry,
00213                *firstentry;
00214     WordEntryPos *post,
00215                *ct;
00216     int32       dimt,
00217                 lenct,
00218                 dist,
00219                 nitem;
00220     float       res = -1.0;
00221     QueryOperand **item;
00222     int         size = q->size;
00223 
00224     item = SortAndUniqItems(q, &size);
00225     if (size < 2)
00226     {
00227         pfree(item);
00228         return calc_rank_or(w, t, q);
00229     }
00230     pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
00231     WEP_SETPOS(POSNULL.pos[0], MAXENTRYPOS - 1);
00232 
00233     for (i = 0; i < size; i++)
00234     {
00235         firstentry = entry = find_wordentry(t, q, item[i], &nitem);
00236         if (!entry)
00237             continue;
00238 
00239         while (entry - firstentry < nitem)
00240         {
00241             if (entry->haspos)
00242                 pos[i] = _POSVECPTR(t, entry);
00243             else
00244                 pos[i] = &POSNULL;
00245 
00246             dimt = pos[i]->npos;
00247             post = pos[i]->pos;
00248             for (k = 0; k < i; k++)
00249             {
00250                 if (!pos[k])
00251                     continue;
00252                 lenct = pos[k]->npos;
00253                 ct = pos[k]->pos;
00254                 for (l = 0; l < dimt; l++)
00255                 {
00256                     for (p = 0; p < lenct; p++)
00257                     {
00258                         dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
00259                         if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL)))
00260                         {
00261                             float       curw;
00262 
00263                             if (!dist)
00264                                 dist = MAXENTRYPOS;
00265                             curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
00266                             res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
00267                         }
00268                     }
00269                 }
00270             }
00271 
00272             entry++;
00273         }
00274     }
00275     pfree(pos);
00276     pfree(item);
00277     return res;
00278 }
00279 
00280 static float
00281 calc_rank_or(float *w, TSVector t, TSQuery q)
00282 {
00283     WordEntry  *entry,
00284                *firstentry;
00285     WordEntryPos *post;
00286     int32       dimt,
00287                 j,
00288                 i,
00289                 nitem;
00290     float       res = 0.0;
00291     QueryOperand **item;
00292     int         size = q->size;
00293 
00294     item = SortAndUniqItems(q, &size);
00295 
00296     for (i = 0; i < size; i++)
00297     {
00298         float       resj,
00299                     wjm;
00300         int32       jm;
00301 
00302         firstentry = entry = find_wordentry(t, q, item[i], &nitem);
00303         if (!entry)
00304             continue;
00305 
00306         while (entry - firstentry < nitem)
00307         {
00308             if (entry->haspos)
00309             {
00310                 dimt = POSDATALEN(t, entry);
00311                 post = POSDATAPTR(t, entry);
00312             }
00313             else
00314             {
00315                 dimt = POSNULL.npos;
00316                 post = POSNULL.pos;
00317             }
00318 
00319             resj = 0.0;
00320             wjm = -1.0;
00321             jm = 0;
00322             for (j = 0; j < dimt; j++)
00323             {
00324                 resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
00325                 if (wpos(post[j]) > wjm)
00326                 {
00327                     wjm = wpos(post[j]);
00328                     jm = j;
00329                 }
00330             }
00331 /*
00332             limit (sum(i/i^2),i->inf) = pi^2/6
00333             resj = sum(wi/i^2),i=1,noccurence,
00334             wi - should be sorted desc,
00335             don't sort for now, just choose maximum weight. This should be corrected
00336             Oleg Bartunov
00337 */
00338             res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
00339 
00340             entry++;
00341         }
00342     }
00343     if (size > 0)
00344         res = res / size;
00345     pfree(item);
00346     return res;
00347 }
00348 
00349 static float
00350 calc_rank(float *w, TSVector t, TSQuery q, int32 method)
00351 {
00352     QueryItem  *item = GETQUERY(q);
00353     float       res = 0.0;
00354     int         len;
00355 
00356     if (!t->size || !q->size)
00357         return 0.0;
00358 
00359     /* XXX: What about NOT? */
00360     res = (item->type == QI_OPR && item->qoperator.oper == OP_AND) ?
00361         calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
00362 
00363     if (res < 0)
00364         res = 1e-20f;
00365 
00366     if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
00367         res /= log((double) (cnt_length(t) + 1)) / log(2.0);
00368 
00369     if (method & RANK_NORM_LENGTH)
00370     {
00371         len = cnt_length(t);
00372         if (len > 0)
00373             res /= (float) len;
00374     }
00375 
00376     /* RANK_NORM_EXTDIST not applicable */
00377 
00378     if ((method & RANK_NORM_UNIQ) && t->size > 0)
00379         res /= (float) (t->size);
00380 
00381     if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
00382         res /= log((double) (t->size + 1)) / log(2.0);
00383 
00384     if (method & RANK_NORM_RDIVRPLUS1)
00385         res /= (res + 1);
00386 
00387     return res;
00388 }
00389 
00390 static float *
00391 getWeights(ArrayType *win)
00392 {
00393     static float ws[lengthof(weights)];
00394     int         i;
00395     float4     *arrdata;
00396 
00397     if (win == NULL)
00398         return weights;
00399 
00400     if (ARR_NDIM(win) != 1)
00401         ereport(ERROR,
00402                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
00403                  errmsg("array of weight must be one-dimensional")));
00404 
00405     if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < lengthof(weights))
00406         ereport(ERROR,
00407                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
00408                  errmsg("array of weight is too short")));
00409 
00410     if (array_contains_nulls(win))
00411         ereport(ERROR,
00412                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
00413                  errmsg("array of weight must not contain nulls")));
00414 
00415     arrdata = (float4 *) ARR_DATA_PTR(win);
00416     for (i = 0; i < lengthof(weights); i++)
00417     {
00418         ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
00419         if (ws[i] > 1.0)
00420             ereport(ERROR,
00421                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00422                      errmsg("weight out of range")));
00423     }
00424 
00425     return ws;
00426 }
00427 
00428 Datum
00429 ts_rank_wttf(PG_FUNCTION_ARGS)
00430 {
00431     ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
00432     TSVector    txt = PG_GETARG_TSVECTOR(1);
00433     TSQuery     query = PG_GETARG_TSQUERY(2);
00434     int         method = PG_GETARG_INT32(3);
00435     float       res;
00436 
00437     res = calc_rank(getWeights(win), txt, query, method);
00438 
00439     PG_FREE_IF_COPY(win, 0);
00440     PG_FREE_IF_COPY(txt, 1);
00441     PG_FREE_IF_COPY(query, 2);
00442     PG_RETURN_FLOAT4(res);
00443 }
00444 
00445 Datum
00446 ts_rank_wtt(PG_FUNCTION_ARGS)
00447 {
00448     ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
00449     TSVector    txt = PG_GETARG_TSVECTOR(1);
00450     TSQuery     query = PG_GETARG_TSQUERY(2);
00451     float       res;
00452 
00453     res = calc_rank(getWeights(win), txt, query, DEF_NORM_METHOD);
00454 
00455     PG_FREE_IF_COPY(win, 0);
00456     PG_FREE_IF_COPY(txt, 1);
00457     PG_FREE_IF_COPY(query, 2);
00458     PG_RETURN_FLOAT4(res);
00459 }
00460 
00461 Datum
00462 ts_rank_ttf(PG_FUNCTION_ARGS)
00463 {
00464     TSVector    txt = PG_GETARG_TSVECTOR(0);
00465     TSQuery     query = PG_GETARG_TSQUERY(1);
00466     int         method = PG_GETARG_INT32(2);
00467     float       res;
00468 
00469     res = calc_rank(getWeights(NULL), txt, query, method);
00470 
00471     PG_FREE_IF_COPY(txt, 0);
00472     PG_FREE_IF_COPY(query, 1);
00473     PG_RETURN_FLOAT4(res);
00474 }
00475 
00476 Datum
00477 ts_rank_tt(PG_FUNCTION_ARGS)
00478 {
00479     TSVector    txt = PG_GETARG_TSVECTOR(0);
00480     TSQuery     query = PG_GETARG_TSQUERY(1);
00481     float       res;
00482 
00483     res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD);
00484 
00485     PG_FREE_IF_COPY(txt, 0);
00486     PG_FREE_IF_COPY(query, 1);
00487     PG_RETURN_FLOAT4(res);
00488 }
00489 
00490 typedef struct
00491 {
00492     QueryItem **item;
00493     int16       nitem;
00494     uint8       wclass;
00495     int32       pos;
00496 } DocRepresentation;
00497 
00498 static int
00499 compareDocR(const void *va, const void *vb)
00500 {
00501     const DocRepresentation *a = (const DocRepresentation *) va;
00502     const DocRepresentation *b = (const DocRepresentation *) vb;
00503 
00504     if (a->pos == b->pos)
00505         return 0;
00506     return (a->pos > b->pos) ? 1 : -1;
00507 }
00508 
00509 typedef struct
00510 {
00511     TSQuery     query;
00512     bool       *operandexist;
00513 } QueryRepresentation;
00514 
00515 #define QR_GET_OPERAND_EXISTS(q, v)     ( (q)->operandexist[ ((QueryItem*)(v)) - GETQUERY((q)->query) ] )
00516 #define QR_SET_OPERAND_EXISTS(q, v)  QR_GET_OPERAND_EXISTS(q,v) = true
00517 
00518 static bool
00519 checkcondition_QueryOperand(void *checkval, QueryOperand *val)
00520 {
00521     QueryRepresentation *qr = (QueryRepresentation *) checkval;
00522 
00523     return QR_GET_OPERAND_EXISTS(qr, val);
00524 }
00525 
00526 typedef struct
00527 {
00528     int         pos;
00529     int         p;
00530     int         q;
00531     DocRepresentation *begin;
00532     DocRepresentation *end;
00533 } Extention;
00534 
00535 
00536 static bool
00537 Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, Extention *ext)
00538 {
00539     DocRepresentation *ptr;
00540     int         lastpos = ext->pos;
00541     int         i;
00542     bool        found = false;
00543 
00544     /*
00545      * since this function recurses, it could be driven to stack overflow.
00546      * (though any decent compiler will optimize away the tail-recursion.
00547      */
00548     check_stack_depth();
00549 
00550     memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);
00551 
00552     ext->p = 0x7fffffff;
00553     ext->q = 0;
00554     ptr = doc + ext->pos;
00555 
00556     /* find upper bound of cover from current position, move up */
00557     while (ptr - doc < len)
00558     {
00559         for (i = 0; i < ptr->nitem; i++)
00560         {
00561             if (ptr->item[i]->type == QI_VAL)
00562                 QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);
00563         }
00564         if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand))
00565         {
00566             if (ptr->pos > ext->q)
00567             {
00568                 ext->q = ptr->pos;
00569                 ext->end = ptr;
00570                 lastpos = ptr - doc;
00571                 found = true;
00572             }
00573             break;
00574         }
00575         ptr++;
00576     }
00577 
00578     if (!found)
00579         return false;
00580 
00581     memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);
00582 
00583     ptr = doc + lastpos;
00584 
00585     /* find lower bound of cover from found upper bound, move down */
00586     while (ptr >= doc + ext->pos)
00587     {
00588         for (i = 0; i < ptr->nitem; i++)
00589             if (ptr->item[i]->type == QI_VAL)
00590                 QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);
00591         if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand))
00592         {
00593             if (ptr->pos < ext->p)
00594             {
00595                 ext->begin = ptr;
00596                 ext->p = ptr->pos;
00597             }
00598             break;
00599         }
00600         ptr--;
00601     }
00602 
00603     if (ext->p <= ext->q)
00604     {
00605         /*
00606          * set position for next try to next lexeme after beginning of found
00607          * cover
00608          */
00609         ext->pos = (ptr - doc) + 1;
00610         return true;
00611     }
00612 
00613     ext->pos++;
00614     return Cover(doc, len, qr, ext);
00615 }
00616 
00617 static DocRepresentation *
00618 get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
00619 {
00620     QueryItem  *item = GETQUERY(qr->query);
00621     WordEntry  *entry,
00622                *firstentry;
00623     WordEntryPos *post;
00624     int32       dimt,
00625                 j,
00626                 i,
00627                 nitem;
00628     int         len = qr->query->size * 4,
00629                 cur = 0;
00630     DocRepresentation *doc;
00631     char       *operand;
00632 
00633     doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
00634     operand = GETOPERAND(qr->query);
00635 
00636     for (i = 0; i < qr->query->size; i++)
00637     {
00638         QueryOperand *curoperand;
00639 
00640         if (item[i].type != QI_VAL)
00641             continue;
00642 
00643         curoperand = &item[i].qoperand;
00644 
00645         if (QR_GET_OPERAND_EXISTS(qr, &item[i]))
00646             continue;
00647 
00648         firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
00649         if (!entry)
00650             continue;
00651 
00652         while (entry - firstentry < nitem)
00653         {
00654             if (entry->haspos)
00655             {
00656                 dimt = POSDATALEN(txt, entry);
00657                 post = POSDATAPTR(txt, entry);
00658             }
00659             else
00660             {
00661                 dimt = POSNULL.npos;
00662                 post = POSNULL.pos;
00663             }
00664 
00665             while (cur + dimt >= len)
00666             {
00667                 len *= 2;
00668                 doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
00669             }
00670 
00671             for (j = 0; j < dimt; j++)
00672             {
00673                 if (j == 0)
00674                 {
00675                     int         k;
00676 
00677                     doc[cur].nitem = 0;
00678                     doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);
00679 
00680                     for (k = 0; k < qr->query->size; k++)
00681                     {
00682                         QueryOperand *kptr = &item[k].qoperand;
00683                         QueryOperand *iptr = &item[i].qoperand;
00684 
00685                         if (k == i ||
00686                             (item[k].type == QI_VAL &&
00687                              compareQueryOperand(&kptr, &iptr, operand) == 0))
00688                         {
00689                             /*
00690                              * if k == i, we've already checked above that
00691                              * it's type == Q_VAL
00692                              */
00693                             doc[cur].item[doc[cur].nitem] = item + k;
00694                             doc[cur].nitem++;
00695                             QR_SET_OPERAND_EXISTS(qr, item + k);
00696                         }
00697                     }
00698                 }
00699                 else
00700                 {
00701                     doc[cur].nitem = doc[cur - 1].nitem;
00702                     doc[cur].item = doc[cur - 1].item;
00703                 }
00704                 doc[cur].pos = WEP_GETPOS(post[j]);
00705                 doc[cur].wclass = WEP_GETWEIGHT(post[j]);
00706                 cur++;
00707             }
00708 
00709             entry++;
00710         }
00711     }
00712 
00713     *doclen = cur;
00714 
00715     if (cur > 0)
00716     {
00717         qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
00718         return doc;
00719     }
00720 
00721     pfree(doc);
00722     return NULL;
00723 }
00724 
00725 static float4
00726 calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method)
00727 {
00728     DocRepresentation *doc;
00729     int         len,
00730                 i,
00731                 doclen = 0;
00732     Extention   ext;
00733     double      Wdoc = 0.0;
00734     double      invws[lengthof(weights)];
00735     double      SumDist = 0.0,
00736                 PrevExtPos = 0.0,
00737                 CurExtPos = 0.0;
00738     int         NExtent = 0;
00739     QueryRepresentation qr;
00740 
00741 
00742     for (i = 0; i < lengthof(weights); i++)
00743     {
00744         invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
00745         if (invws[i] > 1.0)
00746             ereport(ERROR,
00747                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00748                      errmsg("weight out of range")));
00749         invws[i] = 1.0 / invws[i];
00750     }
00751 
00752     qr.query = query;
00753     qr.operandexist = (bool *) palloc0(sizeof(bool) * query->size);
00754 
00755     doc = get_docrep(txt, &qr, &doclen);
00756     if (!doc)
00757     {
00758         pfree(qr.operandexist);
00759         return 0.0;
00760     }
00761 
00762     MemSet(&ext, 0, sizeof(Extention));
00763     while (Cover(doc, doclen, &qr, &ext))
00764     {
00765         double      Cpos = 0.0;
00766         double      InvSum = 0.0;
00767         int         nNoise;
00768         DocRepresentation *ptr = ext.begin;
00769 
00770         while (ptr <= ext.end)
00771         {
00772             InvSum += invws[ptr->wclass];
00773             ptr++;
00774         }
00775 
00776         Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
00777 
00778         /*
00779          * if doc are big enough then ext.q may be equal to ext.p due to limit
00780          * of posional information. In this case we approximate number of
00781          * noise word as half cover's length
00782          */
00783         nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
00784         if (nNoise < 0)
00785             nNoise = (ext.end - ext.begin) / 2;
00786         Wdoc += Cpos / ((double) (1 + nNoise));
00787 
00788         CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
00789         if (NExtent > 0 && CurExtPos > PrevExtPos       /* prevent devision by
00790                                                          * zero in a case of
00791                 multiple lexize */ )
00792             SumDist += 1.0 / (CurExtPos - PrevExtPos);
00793 
00794         PrevExtPos = CurExtPos;
00795         NExtent++;
00796     }
00797 
00798     if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
00799         Wdoc /= log((double) (cnt_length(txt) + 1));
00800 
00801     if (method & RANK_NORM_LENGTH)
00802     {
00803         len = cnt_length(txt);
00804         if (len > 0)
00805             Wdoc /= (double) len;
00806     }
00807 
00808     if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
00809         Wdoc /= ((double) NExtent) / SumDist;
00810 
00811     if ((method & RANK_NORM_UNIQ) && txt->size > 0)
00812         Wdoc /= (double) (txt->size);
00813 
00814     if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
00815         Wdoc /= log((double) (txt->size + 1)) / log(2.0);
00816 
00817     if (method & RANK_NORM_RDIVRPLUS1)
00818         Wdoc /= (Wdoc + 1);
00819 
00820     pfree(doc);
00821 
00822     pfree(qr.operandexist);
00823 
00824     return (float4) Wdoc;
00825 }
00826 
00827 Datum
00828 ts_rankcd_wttf(PG_FUNCTION_ARGS)
00829 {
00830     ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
00831     TSVector    txt = PG_GETARG_TSVECTOR(1);
00832     TSQuery     query = PG_GETARG_TSQUERY(2);
00833     int         method = PG_GETARG_INT32(3);
00834     float       res;
00835 
00836     res = calc_rank_cd(getWeights(win), txt, query, method);
00837 
00838     PG_FREE_IF_COPY(win, 0);
00839     PG_FREE_IF_COPY(txt, 1);
00840     PG_FREE_IF_COPY(query, 2);
00841     PG_RETURN_FLOAT4(res);
00842 }
00843 
00844 Datum
00845 ts_rankcd_wtt(PG_FUNCTION_ARGS)
00846 {
00847     ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
00848     TSVector    txt = PG_GETARG_TSVECTOR(1);
00849     TSQuery     query = PG_GETARG_TSQUERY(2);
00850     float       res;
00851 
00852     res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
00853 
00854     PG_FREE_IF_COPY(win, 0);
00855     PG_FREE_IF_COPY(txt, 1);
00856     PG_FREE_IF_COPY(query, 2);
00857     PG_RETURN_FLOAT4(res);
00858 }
00859 
00860 Datum
00861 ts_rankcd_ttf(PG_FUNCTION_ARGS)
00862 {
00863     TSVector    txt = PG_GETARG_TSVECTOR(0);
00864     TSQuery     query = PG_GETARG_TSQUERY(1);
00865     int         method = PG_GETARG_INT32(2);
00866     float       res;
00867 
00868     res = calc_rank_cd(getWeights(NULL), txt, query, method);
00869 
00870     PG_FREE_IF_COPY(txt, 0);
00871     PG_FREE_IF_COPY(query, 1);
00872     PG_RETURN_FLOAT4(res);
00873 }
00874 
00875 Datum
00876 ts_rankcd_tt(PG_FUNCTION_ARGS)
00877 {
00878     TSVector    txt = PG_GETARG_TSVECTOR(0);
00879     TSQuery     query = PG_GETARG_TSQUERY(1);
00880     float       res;
00881 
00882     res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
00883 
00884     PG_FREE_IF_COPY(txt, 0);
00885     PG_FREE_IF_COPY(query, 1);
00886     PG_RETURN_FLOAT4(res);
00887 }