00001
00002
00003
00004 #include "postgres.h"
00005
00006 #include "access/gist.h"
00007 #include "access/skey.h"
00008
00009 #include "_int.h"
00010
00011 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
00012
00013
00014
00015 PG_FUNCTION_INFO_V1(g_intbig_consistent);
00016 PG_FUNCTION_INFO_V1(g_intbig_compress);
00017 PG_FUNCTION_INFO_V1(g_intbig_decompress);
00018 PG_FUNCTION_INFO_V1(g_intbig_penalty);
00019 PG_FUNCTION_INFO_V1(g_intbig_picksplit);
00020 PG_FUNCTION_INFO_V1(g_intbig_union);
00021 PG_FUNCTION_INFO_V1(g_intbig_same);
00022
00023 Datum g_intbig_consistent(PG_FUNCTION_ARGS);
00024 Datum g_intbig_compress(PG_FUNCTION_ARGS);
00025 Datum g_intbig_decompress(PG_FUNCTION_ARGS);
00026 Datum g_intbig_penalty(PG_FUNCTION_ARGS);
00027 Datum g_intbig_picksplit(PG_FUNCTION_ARGS);
00028 Datum g_intbig_union(PG_FUNCTION_ARGS);
00029 Datum g_intbig_same(PG_FUNCTION_ARGS);
00030
00031
00032 static const uint8 number_of_ones[256] = {
00033 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
00034 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00035 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00036 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00037 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00038 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00039 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00040 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00041 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00042 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00043 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00044 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00045 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00046 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00047 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00048 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
00049 };
00050
00051 PG_FUNCTION_INFO_V1(_intbig_in);
00052 Datum _intbig_in(PG_FUNCTION_ARGS);
00053
00054 PG_FUNCTION_INFO_V1(_intbig_out);
00055 Datum _intbig_out(PG_FUNCTION_ARGS);
00056
00057
00058 Datum
00059 _intbig_in(PG_FUNCTION_ARGS)
00060 {
00061 ereport(ERROR,
00062 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
00063 errmsg("_intbig_in() not implemented")));
00064 PG_RETURN_DATUM(0);
00065 }
00066
00067 Datum
00068 _intbig_out(PG_FUNCTION_ARGS)
00069 {
00070 ereport(ERROR,
00071 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
00072 errmsg("_intbig_out() not implemented")));
00073 PG_RETURN_DATUM(0);
00074 }
00075
00076
00077
00078
00079
00080 static bool
00081 _intbig_overlap(GISTTYPE *a, ArrayType *b)
00082 {
00083 int num = ARRNELEMS(b);
00084 int32 *ptr = ARRPTR(b);
00085
00086 CHECKARRVALID(b);
00087
00088 while (num--)
00089 {
00090 if (GETBIT(GETSIGN(a), HASHVAL(*ptr)))
00091 return true;
00092 ptr++;
00093 }
00094
00095 return false;
00096 }
00097
00098 static bool
00099 _intbig_contains(GISTTYPE *a, ArrayType *b)
00100 {
00101 int num = ARRNELEMS(b);
00102 int32 *ptr = ARRPTR(b);
00103
00104 CHECKARRVALID(b);
00105
00106 while (num--)
00107 {
00108 if (!GETBIT(GETSIGN(a), HASHVAL(*ptr)))
00109 return false;
00110 ptr++;
00111 }
00112
00113 return true;
00114 }
00115
00116 Datum
00117 g_intbig_same(PG_FUNCTION_ARGS)
00118 {
00119 GISTTYPE *a = (GISTTYPE *) PG_GETARG_POINTER(0);
00120 GISTTYPE *b = (GISTTYPE *) PG_GETARG_POINTER(1);
00121 bool *result = (bool *) PG_GETARG_POINTER(2);
00122
00123 if (ISALLTRUE(a) && ISALLTRUE(b))
00124 *result = true;
00125 else if (ISALLTRUE(a))
00126 *result = false;
00127 else if (ISALLTRUE(b))
00128 *result = false;
00129 else
00130 {
00131 int32 i;
00132 BITVECP sa = GETSIGN(a),
00133 sb = GETSIGN(b);
00134
00135 *result = true;
00136 LOOPBYTE
00137 {
00138 if (sa[i] != sb[i])
00139 {
00140 *result = false;
00141 break;
00142 }
00143 }
00144 }
00145 PG_RETURN_POINTER(result);
00146 }
00147
00148 Datum
00149 g_intbig_compress(PG_FUNCTION_ARGS)
00150 {
00151 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
00152
00153 if (entry->leafkey)
00154 {
00155 GISTENTRY *retval;
00156 ArrayType *in = DatumGetArrayTypeP(entry->key);
00157 int32 *ptr;
00158 int num;
00159 GISTTYPE *res = (GISTTYPE *) palloc0(CALCGTSIZE(0));
00160
00161 CHECKARRVALID(in);
00162 if (ARRISEMPTY(in))
00163 {
00164 ptr = NULL;
00165 num = 0;
00166 }
00167 else
00168 {
00169 ptr = ARRPTR(in);
00170 num = ARRNELEMS(in);
00171 }
00172 SET_VARSIZE(res, CALCGTSIZE(0));
00173
00174 while (num--)
00175 {
00176 HASH(GETSIGN(res), *ptr);
00177 ptr++;
00178 }
00179
00180 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
00181 gistentryinit(*retval, PointerGetDatum(res),
00182 entry->rel, entry->page,
00183 entry->offset, FALSE);
00184
00185 if (in != DatumGetArrayTypeP(entry->key))
00186 pfree(in);
00187
00188 PG_RETURN_POINTER(retval);
00189 }
00190 else if (!ISALLTRUE(DatumGetPointer(entry->key)))
00191 {
00192 GISTENTRY *retval;
00193 int i;
00194 BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
00195 GISTTYPE *res;
00196
00197 LOOPBYTE
00198 {
00199 if ((sign[i] & 0xff) != 0xff)
00200 PG_RETURN_POINTER(entry);
00201 }
00202
00203 res = (GISTTYPE *) palloc(CALCGTSIZE(ALLISTRUE));
00204 SET_VARSIZE(res, CALCGTSIZE(ALLISTRUE));
00205 res->flag = ALLISTRUE;
00206
00207 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
00208 gistentryinit(*retval, PointerGetDatum(res),
00209 entry->rel, entry->page,
00210 entry->offset, FALSE);
00211
00212 PG_RETURN_POINTER(retval);
00213 }
00214
00215 PG_RETURN_POINTER(entry);
00216 }
00217
00218
00219 static int32
00220 sizebitvec(BITVECP sign)
00221 {
00222 int32 size = 0,
00223 i;
00224
00225 LOOPBYTE
00226 size += number_of_ones[(unsigned char) sign[i]];
00227 return size;
00228 }
00229
00230 static int
00231 hemdistsign(BITVECP a, BITVECP b)
00232 {
00233 int i,
00234 diff,
00235 dist = 0;
00236
00237 LOOPBYTE
00238 {
00239 diff = (unsigned char) (a[i] ^ b[i]);
00240 dist += number_of_ones[diff];
00241 }
00242 return dist;
00243 }
00244
00245 static int
00246 hemdist(GISTTYPE *a, GISTTYPE *b)
00247 {
00248 if (ISALLTRUE(a))
00249 {
00250 if (ISALLTRUE(b))
00251 return 0;
00252 else
00253 return SIGLENBIT - sizebitvec(GETSIGN(b));
00254 }
00255 else if (ISALLTRUE(b))
00256 return SIGLENBIT - sizebitvec(GETSIGN(a));
00257
00258 return hemdistsign(GETSIGN(a), GETSIGN(b));
00259 }
00260
00261 Datum
00262 g_intbig_decompress(PG_FUNCTION_ARGS)
00263 {
00264 PG_RETURN_DATUM(PG_GETARG_DATUM(0));
00265 }
00266
00267 static int32
00268 unionkey(BITVECP sbase, GISTTYPE *add)
00269 {
00270 int32 i;
00271 BITVECP sadd = GETSIGN(add);
00272
00273 if (ISALLTRUE(add))
00274 return 1;
00275 LOOPBYTE
00276 sbase[i] |= sadd[i];
00277 return 0;
00278 }
00279
00280 Datum
00281 g_intbig_union(PG_FUNCTION_ARGS)
00282 {
00283 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
00284 int *size = (int *) PG_GETARG_POINTER(1);
00285 BITVEC base;
00286 int32 i,
00287 len;
00288 int32 flag = 0;
00289 GISTTYPE *result;
00290
00291 MemSet((void *) base, 0, sizeof(BITVEC));
00292 for (i = 0; i < entryvec->n; i++)
00293 {
00294 if (unionkey(base, GETENTRY(entryvec, i)))
00295 {
00296 flag = ALLISTRUE;
00297 break;
00298 }
00299 }
00300
00301 len = CALCGTSIZE(flag);
00302 result = (GISTTYPE *) palloc(len);
00303 SET_VARSIZE(result, len);
00304 result->flag = flag;
00305 if (!ISALLTRUE(result))
00306 memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
00307 *size = len;
00308
00309 PG_RETURN_POINTER(result);
00310 }
00311
00312 Datum
00313 g_intbig_penalty(PG_FUNCTION_ARGS)
00314 {
00315 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
00316 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
00317 float *penalty = (float *) PG_GETARG_POINTER(2);
00318 GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
00319 GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
00320
00321 *penalty = hemdist(origval, newval);
00322 PG_RETURN_POINTER(penalty);
00323 }
00324
00325
00326 typedef struct
00327 {
00328 OffsetNumber pos;
00329 int32 cost;
00330 } SPLITCOST;
00331
00332 static int
00333 comparecost(const void *a, const void *b)
00334 {
00335 return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
00336 }
00337
00338
00339 Datum
00340 g_intbig_picksplit(PG_FUNCTION_ARGS)
00341 {
00342 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
00343 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
00344 OffsetNumber k,
00345 j;
00346 GISTTYPE *datum_l,
00347 *datum_r;
00348 BITVECP union_l,
00349 union_r;
00350 int32 size_alpha,
00351 size_beta;
00352 int32 size_waste,
00353 waste = -1;
00354 int32 nbytes;
00355 OffsetNumber seed_1 = 0,
00356 seed_2 = 0;
00357 OffsetNumber *left,
00358 *right;
00359 OffsetNumber maxoff;
00360 BITVECP ptr;
00361 int i;
00362 SPLITCOST *costvector;
00363 GISTTYPE *_k,
00364 *_j;
00365
00366 maxoff = entryvec->n - 2;
00367 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
00368 v->spl_left = (OffsetNumber *) palloc(nbytes);
00369 v->spl_right = (OffsetNumber *) palloc(nbytes);
00370
00371 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
00372 {
00373 _k = GETENTRY(entryvec, k);
00374 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
00375 {
00376 size_waste = hemdist(_k, GETENTRY(entryvec, j));
00377 if (size_waste > waste)
00378 {
00379 waste = size_waste;
00380 seed_1 = k;
00381 seed_2 = j;
00382 }
00383 }
00384 }
00385
00386 left = v->spl_left;
00387 v->spl_nleft = 0;
00388 right = v->spl_right;
00389 v->spl_nright = 0;
00390
00391 if (seed_1 == 0 || seed_2 == 0)
00392 {
00393 seed_1 = 1;
00394 seed_2 = 2;
00395 }
00396
00397
00398 if (ISALLTRUE(GETENTRY(entryvec, seed_1)))
00399 {
00400 datum_l = (GISTTYPE *) palloc(GTHDRSIZE);
00401 SET_VARSIZE(datum_l, GTHDRSIZE);
00402 datum_l->flag = ALLISTRUE;
00403 }
00404 else
00405 {
00406 datum_l = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
00407 SET_VARSIZE(datum_l, GTHDRSIZE + SIGLEN);
00408 datum_l->flag = 0;
00409 memcpy((void *) GETSIGN(datum_l), (void *) GETSIGN(GETENTRY(entryvec, seed_1)), sizeof(BITVEC));
00410 }
00411 if (ISALLTRUE(GETENTRY(entryvec, seed_2)))
00412 {
00413 datum_r = (GISTTYPE *) palloc(GTHDRSIZE);
00414 SET_VARSIZE(datum_r, GTHDRSIZE);
00415 datum_r->flag = ALLISTRUE;
00416 }
00417 else
00418 {
00419 datum_r = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
00420 SET_VARSIZE(datum_r, GTHDRSIZE + SIGLEN);
00421 datum_r->flag = 0;
00422 memcpy((void *) GETSIGN(datum_r), (void *) GETSIGN(GETENTRY(entryvec, seed_2)), sizeof(BITVEC));
00423 }
00424
00425 maxoff = OffsetNumberNext(maxoff);
00426
00427 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
00428 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
00429 {
00430 costvector[j - 1].pos = j;
00431 _j = GETENTRY(entryvec, j);
00432 size_alpha = hemdist(datum_l, _j);
00433 size_beta = hemdist(datum_r, _j);
00434 costvector[j - 1].cost = Abs(size_alpha - size_beta);
00435 }
00436 qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
00437
00438 union_l = GETSIGN(datum_l);
00439 union_r = GETSIGN(datum_r);
00440
00441 for (k = 0; k < maxoff; k++)
00442 {
00443 j = costvector[k].pos;
00444 if (j == seed_1)
00445 {
00446 *left++ = j;
00447 v->spl_nleft++;
00448 continue;
00449 }
00450 else if (j == seed_2)
00451 {
00452 *right++ = j;
00453 v->spl_nright++;
00454 continue;
00455 }
00456 _j = GETENTRY(entryvec, j);
00457 size_alpha = hemdist(datum_l, _j);
00458 size_beta = hemdist(datum_r, _j);
00459
00460 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
00461 {
00462 if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
00463 {
00464 if (!ISALLTRUE(datum_l))
00465 MemSet((void *) union_l, 0xff, sizeof(BITVEC));
00466 }
00467 else
00468 {
00469 ptr = GETSIGN(_j);
00470 LOOPBYTE
00471 union_l[i] |= ptr[i];
00472 }
00473 *left++ = j;
00474 v->spl_nleft++;
00475 }
00476 else
00477 {
00478 if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
00479 {
00480 if (!ISALLTRUE(datum_r))
00481 MemSet((void *) union_r, 0xff, sizeof(BITVEC));
00482 }
00483 else
00484 {
00485 ptr = GETSIGN(_j);
00486 LOOPBYTE
00487 union_r[i] |= ptr[i];
00488 }
00489 *right++ = j;
00490 v->spl_nright++;
00491 }
00492 }
00493
00494 *right = *left = FirstOffsetNumber;
00495 pfree(costvector);
00496
00497 v->spl_ldatum = PointerGetDatum(datum_l);
00498 v->spl_rdatum = PointerGetDatum(datum_r);
00499
00500 PG_RETURN_POINTER(v);
00501 }
00502
00503 Datum
00504 g_intbig_consistent(PG_FUNCTION_ARGS)
00505 {
00506 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
00507 ArrayType *query = PG_GETARG_ARRAYTYPE_P(1);
00508 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
00509
00510
00511 bool *recheck = (bool *) PG_GETARG_POINTER(4);
00512 bool retval;
00513
00514
00515 *recheck = true;
00516
00517 if (ISALLTRUE(DatumGetPointer(entry->key)))
00518 PG_RETURN_BOOL(true);
00519
00520 if (strategy == BooleanSearchStrategy)
00521 {
00522 retval = signconsistent((QUERYTYPE *) query,
00523 GETSIGN(DatumGetPointer(entry->key)),
00524 false);
00525 PG_FREE_IF_COPY(query, 1);
00526 PG_RETURN_BOOL(retval);
00527 }
00528
00529 CHECKARRVALID(query);
00530
00531 switch (strategy)
00532 {
00533 case RTOverlapStrategyNumber:
00534 retval = _intbig_overlap((GISTTYPE *) DatumGetPointer(entry->key), query);
00535 break;
00536 case RTSameStrategyNumber:
00537 if (GIST_LEAF(entry))
00538 {
00539 int i,
00540 num = ARRNELEMS(query);
00541 int32 *ptr = ARRPTR(query);
00542 BITVEC qp;
00543 BITVECP dq,
00544 de;
00545
00546 memset(qp, 0, sizeof(BITVEC));
00547
00548 while (num--)
00549 {
00550 HASH(qp, *ptr);
00551 ptr++;
00552 }
00553
00554 de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
00555 dq = qp;
00556 retval = true;
00557 LOOPBYTE
00558 {
00559 if (de[i] != dq[i])
00560 {
00561 retval = false;
00562 break;
00563 }
00564 }
00565
00566 }
00567 else
00568 retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key), query);
00569 break;
00570 case RTContainsStrategyNumber:
00571 case RTOldContainsStrategyNumber:
00572 retval = _intbig_contains((GISTTYPE *) DatumGetPointer(entry->key), query);
00573 break;
00574 case RTContainedByStrategyNumber:
00575 case RTOldContainedByStrategyNumber:
00576 if (GIST_LEAF(entry))
00577 {
00578 int i,
00579 num = ARRNELEMS(query);
00580 int32 *ptr = ARRPTR(query);
00581 BITVEC qp;
00582 BITVECP dq,
00583 de;
00584
00585 memset(qp, 0, sizeof(BITVEC));
00586
00587 while (num--)
00588 {
00589 HASH(qp, *ptr);
00590 ptr++;
00591 }
00592
00593 de = GETSIGN((GISTTYPE *) DatumGetPointer(entry->key));
00594 dq = qp;
00595 retval = true;
00596 LOOPBYTE
00597 {
00598 if (de[i] & ~dq[i])
00599 {
00600 retval = false;
00601 break;
00602 }
00603 }
00604 }
00605 else
00606 retval = _intbig_overlap((GISTTYPE *) DatumGetPointer(entry->key), query);
00607 break;
00608 default:
00609 retval = FALSE;
00610 }
00611 PG_FREE_IF_COPY(query, 1);
00612 PG_RETURN_BOOL(retval);
00613 }