00001
00002
00003
00004 #include "postgres.h"
00005
00006 #include "trgm.h"
00007
00008 #include "access/skey.h"
00009
00010
00011 typedef struct
00012 {
00013
00014 StrategyNumber strategy;
00015 text *query;
00016
00017 TRGM *trigrams;
00018
00019 TrgmPackedGraph *graph;
00020
00021
00022
00023
00024
00025 } gtrgm_consistent_cache;
00026
00027 #define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))
00028
00029
00030 PG_FUNCTION_INFO_V1(gtrgm_in);
00031 Datum gtrgm_in(PG_FUNCTION_ARGS);
00032
00033 PG_FUNCTION_INFO_V1(gtrgm_out);
00034 Datum gtrgm_out(PG_FUNCTION_ARGS);
00035
00036 PG_FUNCTION_INFO_V1(gtrgm_compress);
00037 Datum gtrgm_compress(PG_FUNCTION_ARGS);
00038
00039 PG_FUNCTION_INFO_V1(gtrgm_decompress);
00040 Datum gtrgm_decompress(PG_FUNCTION_ARGS);
00041
00042 PG_FUNCTION_INFO_V1(gtrgm_consistent);
00043 Datum gtrgm_consistent(PG_FUNCTION_ARGS);
00044
00045 PG_FUNCTION_INFO_V1(gtrgm_distance);
00046 Datum gtrgm_distance(PG_FUNCTION_ARGS);
00047
00048 PG_FUNCTION_INFO_V1(gtrgm_union);
00049 Datum gtrgm_union(PG_FUNCTION_ARGS);
00050
00051 PG_FUNCTION_INFO_V1(gtrgm_same);
00052 Datum gtrgm_same(PG_FUNCTION_ARGS);
00053
00054 PG_FUNCTION_INFO_V1(gtrgm_penalty);
00055 Datum gtrgm_penalty(PG_FUNCTION_ARGS);
00056
00057 PG_FUNCTION_INFO_V1(gtrgm_picksplit);
00058 Datum gtrgm_picksplit(PG_FUNCTION_ARGS);
00059
00060
00061 static const uint8 number_of_ones[256] = {
00062 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
00063 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00064 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00065 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00066 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00067 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00068 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00069 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00070 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00071 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00072 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00073 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00074 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00075 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00076 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00077 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
00078 };
00079
00080
00081 Datum
00082 gtrgm_in(PG_FUNCTION_ARGS)
00083 {
00084 elog(ERROR, "not implemented");
00085 PG_RETURN_DATUM(0);
00086 }
00087
00088 Datum
00089 gtrgm_out(PG_FUNCTION_ARGS)
00090 {
00091 elog(ERROR, "not implemented");
00092 PG_RETURN_DATUM(0);
00093 }
00094
00095 static void
00096 makesign(BITVECP sign, TRGM *a)
00097 {
00098 int32 k,
00099 len = ARRNELEM(a);
00100 trgm *ptr = GETARR(a);
00101 int32 tmp = 0;
00102
00103 MemSet((void *) sign, 0, sizeof(BITVEC));
00104 SETBIT(sign, SIGLENBIT);
00105 for (k = 0; k < len; k++)
00106 {
00107 CPTRGM(((char *) &tmp), ptr + k);
00108 HASH(sign, tmp);
00109 }
00110 }
00111
00112 Datum
00113 gtrgm_compress(PG_FUNCTION_ARGS)
00114 {
00115 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
00116 GISTENTRY *retval = entry;
00117
00118 if (entry->leafkey)
00119 {
00120 TRGM *res;
00121 text *val = DatumGetTextP(entry->key);
00122
00123 res = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
00124 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
00125 gistentryinit(*retval, PointerGetDatum(res),
00126 entry->rel, entry->page,
00127 entry->offset, FALSE);
00128 }
00129 else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
00130 !ISALLTRUE(DatumGetPointer(entry->key)))
00131 {
00132 int32 i,
00133 len;
00134 TRGM *res;
00135 BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
00136
00137 LOOPBYTE
00138 {
00139 if ((sign[i] & 0xff) != 0xff)
00140 PG_RETURN_POINTER(retval);
00141 }
00142
00143 len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
00144 res = (TRGM *) palloc(len);
00145 SET_VARSIZE(res, len);
00146 res->flag = SIGNKEY | ALLISTRUE;
00147
00148 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
00149 gistentryinit(*retval, PointerGetDatum(res),
00150 entry->rel, entry->page,
00151 entry->offset, FALSE);
00152 }
00153 PG_RETURN_POINTER(retval);
00154 }
00155
00156 Datum
00157 gtrgm_decompress(PG_FUNCTION_ARGS)
00158 {
00159 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
00160 GISTENTRY *retval;
00161 text *key;
00162
00163 key = DatumGetTextP(entry->key);
00164
00165 if (key != (text *) DatumGetPointer(entry->key))
00166 {
00167
00168 retval = palloc(sizeof(GISTENTRY));
00169 gistentryinit(*retval, PointerGetDatum(key),
00170 entry->rel, entry->page, entry->offset, entry->leafkey);
00171 PG_RETURN_POINTER(retval);
00172 }
00173 else
00174 {
00175
00176 PG_RETURN_POINTER(entry);
00177 }
00178 }
00179
00180 static int32
00181 cnt_sml_sign_common(TRGM *qtrg, BITVECP sign)
00182 {
00183 int32 count = 0;
00184 int32 k,
00185 len = ARRNELEM(qtrg);
00186 trgm *ptr = GETARR(qtrg);
00187 int32 tmp = 0;
00188
00189 for (k = 0; k < len; k++)
00190 {
00191 CPTRGM(((char *) &tmp), ptr + k);
00192 count += GETBIT(sign, HASHVAL(tmp));
00193 }
00194
00195 return count;
00196 }
00197
00198 Datum
00199 gtrgm_consistent(PG_FUNCTION_ARGS)
00200 {
00201 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
00202 text *query = PG_GETARG_TEXT_P(1);
00203 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
00204
00205
00206 bool *recheck = (bool *) PG_GETARG_POINTER(4);
00207 TRGM *key = (TRGM *) DatumGetPointer(entry->key);
00208 TRGM *qtrg;
00209 bool res;
00210 Size querysize = VARSIZE(query);
00211 gtrgm_consistent_cache *cache;
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226 cache = (gtrgm_consistent_cache *) fcinfo->flinfo->fn_extra;
00227 if (cache == NULL ||
00228 cache->strategy != strategy ||
00229 VARSIZE(cache->query) != querysize ||
00230 memcmp((char *) cache->query, (char *) query, querysize) != 0)
00231 {
00232 gtrgm_consistent_cache *newcache;
00233 TrgmPackedGraph *graph = NULL;
00234 Size qtrgsize;
00235
00236 switch (strategy)
00237 {
00238 case SimilarityStrategyNumber:
00239 qtrg = generate_trgm(VARDATA(query),
00240 querysize - VARHDRSZ);
00241 break;
00242 case ILikeStrategyNumber:
00243 #ifndef IGNORECASE
00244 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
00245 #endif
00246
00247 case LikeStrategyNumber:
00248 qtrg = generate_wildcard_trgm(VARDATA(query),
00249 querysize - VARHDRSZ);
00250 break;
00251 case RegExpICaseStrategyNumber:
00252 #ifndef IGNORECASE
00253 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
00254 #endif
00255
00256 case RegExpStrategyNumber:
00257 qtrg = createTrgmNFA(query, PG_GET_COLLATION(),
00258 &graph, fcinfo->flinfo->fn_mcxt);
00259
00260 if (qtrg && ARRNELEM(qtrg) <= 0)
00261 {
00262 pfree(qtrg);
00263 qtrg = NULL;
00264 }
00265 break;
00266 default:
00267 elog(ERROR, "unrecognized strategy number: %d", strategy);
00268 qtrg = NULL;
00269 break;
00270 }
00271
00272 qtrgsize = qtrg ? VARSIZE(qtrg) : 0;
00273
00274 newcache = (gtrgm_consistent_cache *)
00275 MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
00276 MAXALIGN(sizeof(gtrgm_consistent_cache)) +
00277 MAXALIGN(querysize) +
00278 qtrgsize);
00279
00280 newcache->strategy = strategy;
00281 newcache->query = (text *)
00282 ((char *) newcache + MAXALIGN(sizeof(gtrgm_consistent_cache)));
00283 memcpy((char *) newcache->query, (char *) query, querysize);
00284 if (qtrg)
00285 {
00286 newcache->trigrams = (TRGM *)
00287 ((char *) newcache->query + MAXALIGN(querysize));
00288 memcpy((char *) newcache->trigrams, (char *) qtrg, qtrgsize);
00289
00290 pfree(qtrg);
00291 }
00292 else
00293 newcache->trigrams = NULL;
00294 newcache->graph = graph;
00295
00296 if (cache)
00297 pfree(cache);
00298 fcinfo->flinfo->fn_extra = (void *) newcache;
00299 cache = newcache;
00300 }
00301
00302 qtrg = cache->trigrams;
00303
00304 switch (strategy)
00305 {
00306 case SimilarityStrategyNumber:
00307
00308 *recheck = false;
00309
00310 if (GIST_LEAF(entry))
00311 {
00312 float4 tmpsml = cnt_sml(key, qtrg);
00313
00314
00315 res = (*(int *) &tmpsml == *(int *) &trgm_limit || tmpsml > trgm_limit) ? true : false;
00316 }
00317 else if (ISALLTRUE(key))
00318 {
00319 res = true;
00320 }
00321 else
00322 {
00323 int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key));
00324 int32 len = ARRNELEM(qtrg);
00325
00326 if (len == 0)
00327 res = false;
00328 else
00329 res = (((((float8) count) / ((float8) len))) >= trgm_limit) ? true : false;
00330 }
00331 break;
00332 case ILikeStrategyNumber:
00333 #ifndef IGNORECASE
00334 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
00335 #endif
00336
00337 case LikeStrategyNumber:
00338
00339 *recheck = true;
00340
00341
00342
00343
00344
00345 if (GIST_LEAF(entry))
00346 {
00347 res = trgm_contained_by(qtrg, key);
00348 }
00349 else if (ISALLTRUE(key))
00350 {
00351 res = true;
00352 }
00353 else
00354 {
00355 int32 k,
00356 tmp = 0,
00357 len = ARRNELEM(qtrg);
00358 trgm *ptr = GETARR(qtrg);
00359 BITVECP sign = GETSIGN(key);
00360
00361 res = true;
00362 for (k = 0; k < len; k++)
00363 {
00364 CPTRGM(((char *) &tmp), ptr + k);
00365 if (!GETBIT(sign, HASHVAL(tmp)))
00366 {
00367 res = false;
00368 break;
00369 }
00370 }
00371 }
00372 break;
00373 case RegExpICaseStrategyNumber:
00374 #ifndef IGNORECASE
00375 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
00376 #endif
00377
00378 case RegExpStrategyNumber:
00379
00380 *recheck = true;
00381
00382
00383 if (qtrg)
00384 {
00385 if (GIST_LEAF(entry))
00386 {
00387 bool *check;
00388
00389 check = trgm_presence_map(qtrg, key);
00390 res = trigramsMatchGraph(cache->graph, check);
00391 pfree(check);
00392 }
00393 else if (ISALLTRUE(key))
00394 {
00395 res = true;
00396 }
00397 else
00398 {
00399 int32 k,
00400 tmp = 0,
00401 len = ARRNELEM(qtrg);
00402 trgm *ptr = GETARR(qtrg);
00403 BITVECP sign = GETSIGN(key);
00404 bool *check;
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414 check = (bool *) palloc(len * sizeof(bool));
00415 for (k = 0; k < len; k++)
00416 {
00417 CPTRGM(((char *) &tmp), ptr + k);
00418 check[k] = GETBIT(sign, HASHVAL(tmp));
00419 }
00420 res = trigramsMatchGraph(cache->graph, check);
00421 pfree(check);
00422 }
00423 }
00424 else
00425 {
00426
00427 res = true;
00428 }
00429 break;
00430 default:
00431 elog(ERROR, "unrecognized strategy number: %d", strategy);
00432 res = false;
00433 break;
00434 }
00435
00436 PG_RETURN_BOOL(res);
00437 }
00438
00439 Datum
00440 gtrgm_distance(PG_FUNCTION_ARGS)
00441 {
00442 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
00443 text *query = PG_GETARG_TEXT_P(1);
00444 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
00445
00446
00447 TRGM *key = (TRGM *) DatumGetPointer(entry->key);
00448 TRGM *qtrg;
00449 float8 res;
00450 Size querysize = VARSIZE(query);
00451 char *cache = (char *) fcinfo->flinfo->fn_extra;
00452
00453
00454
00455
00456 if (cache == NULL ||
00457 VARSIZE(cache) != querysize ||
00458 memcmp(cache, query, querysize) != 0)
00459 {
00460 char *newcache;
00461
00462 qtrg = generate_trgm(VARDATA(query), querysize - VARHDRSZ);
00463
00464 newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
00465 MAXALIGN(querysize) +
00466 VARSIZE(qtrg));
00467
00468 memcpy(newcache, query, querysize);
00469 memcpy(newcache + MAXALIGN(querysize), qtrg, VARSIZE(qtrg));
00470
00471 if (cache)
00472 pfree(cache);
00473 fcinfo->flinfo->fn_extra = newcache;
00474 cache = newcache;
00475 }
00476
00477 qtrg = (TRGM *) (cache + MAXALIGN(querysize));
00478
00479 switch (strategy)
00480 {
00481 case DistanceStrategyNumber:
00482 if (GIST_LEAF(entry))
00483 {
00484 res = 1.0 - cnt_sml(key, qtrg);
00485 }
00486 else if (ISALLTRUE(key))
00487 {
00488 res = 0.0;
00489 }
00490 else
00491 {
00492 int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key));
00493 int32 len = ARRNELEM(qtrg);
00494
00495 res = (len == 0) ? -1.0 : 1.0 - ((float8) count) / ((float8) len);
00496 }
00497 break;
00498 default:
00499 elog(ERROR, "unrecognized strategy number: %d", strategy);
00500 res = 0;
00501 break;
00502 }
00503
00504 PG_RETURN_FLOAT8(res);
00505 }
00506
00507 static int32
00508 unionkey(BITVECP sbase, TRGM *add)
00509 {
00510 int32 i;
00511
00512 if (ISSIGNKEY(add))
00513 {
00514 BITVECP sadd = GETSIGN(add);
00515
00516 if (ISALLTRUE(add))
00517 return 1;
00518
00519 LOOPBYTE
00520 sbase[i] |= sadd[i];
00521 }
00522 else
00523 {
00524 trgm *ptr = GETARR(add);
00525 int32 tmp = 0;
00526
00527 for (i = 0; i < ARRNELEM(add); i++)
00528 {
00529 CPTRGM(((char *) &tmp), ptr + i);
00530 HASH(sbase, tmp);
00531 }
00532 }
00533 return 0;
00534 }
00535
00536
00537 Datum
00538 gtrgm_union(PG_FUNCTION_ARGS)
00539 {
00540 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
00541 int32 len = entryvec->n;
00542 int *size = (int *) PG_GETARG_POINTER(1);
00543 BITVEC base;
00544 int32 i;
00545 int32 flag = 0;
00546 TRGM *result;
00547
00548 MemSet((void *) base, 0, sizeof(BITVEC));
00549 for (i = 0; i < len; i++)
00550 {
00551 if (unionkey(base, GETENTRY(entryvec, i)))
00552 {
00553 flag = ALLISTRUE;
00554 break;
00555 }
00556 }
00557
00558 flag |= SIGNKEY;
00559 len = CALCGTSIZE(flag, 0);
00560 result = (TRGM *) palloc(len);
00561 SET_VARSIZE(result, len);
00562 result->flag = flag;
00563 if (!ISALLTRUE(result))
00564 memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
00565 *size = len;
00566
00567 PG_RETURN_POINTER(result);
00568 }
00569
00570 Datum
00571 gtrgm_same(PG_FUNCTION_ARGS)
00572 {
00573 TRGM *a = (TRGM *) PG_GETARG_POINTER(0);
00574 TRGM *b = (TRGM *) PG_GETARG_POINTER(1);
00575 bool *result = (bool *) PG_GETARG_POINTER(2);
00576
00577 if (ISSIGNKEY(a))
00578 {
00579 if (ISALLTRUE(a) && ISALLTRUE(b))
00580 *result = true;
00581 else if (ISALLTRUE(a))
00582 *result = false;
00583 else if (ISALLTRUE(b))
00584 *result = false;
00585 else
00586 {
00587 int32 i;
00588 BITVECP sa = GETSIGN(a),
00589 sb = GETSIGN(b);
00590
00591 *result = true;
00592 LOOPBYTE
00593 {
00594 if (sa[i] != sb[i])
00595 {
00596 *result = false;
00597 break;
00598 }
00599 }
00600 }
00601 }
00602 else
00603 {
00604 int32 lena = ARRNELEM(a),
00605 lenb = ARRNELEM(b);
00606
00607 if (lena != lenb)
00608 *result = false;
00609 else
00610 {
00611 trgm *ptra = GETARR(a),
00612 *ptrb = GETARR(b);
00613 int32 i;
00614
00615 *result = true;
00616 for (i = 0; i < lena; i++)
00617 if (CMPTRGM(ptra + i, ptrb + i))
00618 {
00619 *result = false;
00620 break;
00621 }
00622 }
00623 }
00624
00625 PG_RETURN_POINTER(result);
00626 }
00627
00628 static int32
00629 sizebitvec(BITVECP sign)
00630 {
00631 int32 size = 0,
00632 i;
00633
00634 LOOPBYTE
00635 size += number_of_ones[(unsigned char) sign[i]];
00636 return size;
00637 }
00638
00639 static int
00640 hemdistsign(BITVECP a, BITVECP b)
00641 {
00642 int i,
00643 diff,
00644 dist = 0;
00645
00646 LOOPBYTE
00647 {
00648 diff = (unsigned char) (a[i] ^ b[i]);
00649 dist += number_of_ones[diff];
00650 }
00651 return dist;
00652 }
00653
00654 static int
00655 hemdist(TRGM *a, TRGM *b)
00656 {
00657 if (ISALLTRUE(a))
00658 {
00659 if (ISALLTRUE(b))
00660 return 0;
00661 else
00662 return SIGLENBIT - sizebitvec(GETSIGN(b));
00663 }
00664 else if (ISALLTRUE(b))
00665 return SIGLENBIT - sizebitvec(GETSIGN(a));
00666
00667 return hemdistsign(GETSIGN(a), GETSIGN(b));
00668 }
00669
00670 Datum
00671 gtrgm_penalty(PG_FUNCTION_ARGS)
00672 {
00673 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
00674 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
00675 float *penalty = (float *) PG_GETARG_POINTER(2);
00676 TRGM *origval = (TRGM *) DatumGetPointer(origentry->key);
00677 TRGM *newval = (TRGM *) DatumGetPointer(newentry->key);
00678 BITVECP orig = GETSIGN(origval);
00679
00680 *penalty = 0.0;
00681
00682 if (ISARRKEY(newval))
00683 {
00684 char *cache = (char *) fcinfo->flinfo->fn_extra;
00685 TRGM *cachedVal = (TRGM *) (cache + MAXALIGN(sizeof(BITVEC)));
00686 Size newvalsize = VARSIZE(newval);
00687 BITVECP sign;
00688
00689
00690
00691
00692 if (cache == NULL ||
00693 VARSIZE(cachedVal) != newvalsize ||
00694 memcmp(cachedVal, newval, newvalsize) != 0)
00695 {
00696 char *newcache;
00697
00698 newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
00699 MAXALIGN(sizeof(BITVEC)) +
00700 newvalsize);
00701
00702 makesign((BITVECP) newcache, newval);
00703
00704 cachedVal = (TRGM *) (newcache + MAXALIGN(sizeof(BITVEC)));
00705 memcpy(cachedVal, newval, newvalsize);
00706
00707 if (cache)
00708 pfree(cache);
00709 fcinfo->flinfo->fn_extra = newcache;
00710 cache = newcache;
00711 }
00712
00713 sign = (BITVECP) cache;
00714
00715 if (ISALLTRUE(origval))
00716 *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
00717 else
00718 *penalty = hemdistsign(sign, orig);
00719 }
00720 else
00721 *penalty = hemdist(origval, newval);
00722 PG_RETURN_POINTER(penalty);
00723 }
00724
00725 typedef struct
00726 {
00727 bool allistrue;
00728 BITVEC sign;
00729 } CACHESIGN;
00730
00731 static void
00732 fillcache(CACHESIGN *item, TRGM *key)
00733 {
00734 item->allistrue = false;
00735 if (ISARRKEY(key))
00736 makesign(item->sign, key);
00737 else if (ISALLTRUE(key))
00738 item->allistrue = true;
00739 else
00740 memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
00741 }
00742
00743 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
00744 typedef struct
00745 {
00746 OffsetNumber pos;
00747 int32 cost;
00748 } SPLITCOST;
00749
00750 static int
00751 comparecost(const void *a, const void *b)
00752 {
00753 if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
00754 return 0;
00755 else
00756 return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
00757 }
00758
00759
00760 static int
00761 hemdistcache(CACHESIGN *a, CACHESIGN *b)
00762 {
00763 if (a->allistrue)
00764 {
00765 if (b->allistrue)
00766 return 0;
00767 else
00768 return SIGLENBIT - sizebitvec(b->sign);
00769 }
00770 else if (b->allistrue)
00771 return SIGLENBIT - sizebitvec(a->sign);
00772
00773 return hemdistsign(a->sign, b->sign);
00774 }
00775
00776 Datum
00777 gtrgm_picksplit(PG_FUNCTION_ARGS)
00778 {
00779 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
00780 OffsetNumber maxoff = entryvec->n - 2;
00781 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
00782 OffsetNumber k,
00783 j;
00784 TRGM *datum_l,
00785 *datum_r;
00786 BITVECP union_l,
00787 union_r;
00788 int32 size_alpha,
00789 size_beta;
00790 int32 size_waste,
00791 waste = -1;
00792 int32 nbytes;
00793 OffsetNumber seed_1 = 0,
00794 seed_2 = 0;
00795 OffsetNumber *left,
00796 *right;
00797 BITVECP ptr;
00798 int i;
00799 CACHESIGN *cache;
00800 SPLITCOST *costvector;
00801
00802
00803 cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
00804 for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k))
00805 fillcache(&cache[k], GETENTRY(entryvec, k));
00806
00807
00808 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
00809 {
00810 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
00811 {
00812 size_waste = hemdistcache(&(cache[j]), &(cache[k]));
00813 if (size_waste > waste)
00814 {
00815 waste = size_waste;
00816 seed_1 = k;
00817 seed_2 = j;
00818 }
00819 }
00820 }
00821
00822
00823 if (seed_1 == 0 || seed_2 == 0)
00824 {
00825 seed_1 = 1;
00826 seed_2 = 2;
00827 }
00828
00829
00830 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
00831 v->spl_left = left = (OffsetNumber *) palloc(nbytes);
00832 v->spl_right = right = (OffsetNumber *) palloc(nbytes);
00833 v->spl_nleft = 0;
00834 v->spl_nright = 0;
00835
00836
00837 if (cache[seed_1].allistrue)
00838 {
00839 datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
00840 SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
00841 datum_l->flag = SIGNKEY | ALLISTRUE;
00842 }
00843 else
00844 {
00845 datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0));
00846 SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
00847 datum_l->flag = SIGNKEY;
00848 memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
00849 }
00850 if (cache[seed_2].allistrue)
00851 {
00852 datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
00853 SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
00854 datum_r->flag = SIGNKEY | ALLISTRUE;
00855 }
00856 else
00857 {
00858 datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0));
00859 SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
00860 datum_r->flag = SIGNKEY;
00861 memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
00862 }
00863
00864 union_l = GETSIGN(datum_l);
00865 union_r = GETSIGN(datum_r);
00866 maxoff = OffsetNumberNext(maxoff);
00867 fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
00868
00869 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
00870 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
00871 {
00872 costvector[j - 1].pos = j;
00873 size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
00874 size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
00875 costvector[j - 1].cost = abs(size_alpha - size_beta);
00876 }
00877 qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
00878
00879 for (k = 0; k < maxoff; k++)
00880 {
00881 j = costvector[k].pos;
00882 if (j == seed_1)
00883 {
00884 *left++ = j;
00885 v->spl_nleft++;
00886 continue;
00887 }
00888 else if (j == seed_2)
00889 {
00890 *right++ = j;
00891 v->spl_nright++;
00892 continue;
00893 }
00894
00895 if (ISALLTRUE(datum_l) || cache[j].allistrue)
00896 {
00897 if (ISALLTRUE(datum_l) && cache[j].allistrue)
00898 size_alpha = 0;
00899 else
00900 size_alpha = SIGLENBIT - sizebitvec(
00901 (cache[j].allistrue) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)
00902 );
00903 }
00904 else
00905 size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
00906
00907 if (ISALLTRUE(datum_r) || cache[j].allistrue)
00908 {
00909 if (ISALLTRUE(datum_r) && cache[j].allistrue)
00910 size_beta = 0;
00911 else
00912 size_beta = SIGLENBIT - sizebitvec(
00913 (cache[j].allistrue) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)
00914 );
00915 }
00916 else
00917 size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
00918
00919 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
00920 {
00921 if (ISALLTRUE(datum_l) || cache[j].allistrue)
00922 {
00923 if (!ISALLTRUE(datum_l))
00924 MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
00925 }
00926 else
00927 {
00928 ptr = cache[j].sign;
00929 LOOPBYTE
00930 union_l[i] |= ptr[i];
00931 }
00932 *left++ = j;
00933 v->spl_nleft++;
00934 }
00935 else
00936 {
00937 if (ISALLTRUE(datum_r) || cache[j].allistrue)
00938 {
00939 if (!ISALLTRUE(datum_r))
00940 MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
00941 }
00942 else
00943 {
00944 ptr = cache[j].sign;
00945 LOOPBYTE
00946 union_r[i] |= ptr[i];
00947 }
00948 *right++ = j;
00949 v->spl_nright++;
00950 }
00951 }
00952
00953 *right = *left = FirstOffsetNumber;
00954 v->spl_ldatum = PointerGetDatum(datum_l);
00955 v->spl_rdatum = PointerGetDatum(datum_r);
00956
00957 PG_RETURN_POINTER(v);
00958 }