00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include "postgres.h"
00016
00017 #include "access/gin_private.h"
00018 #include "utils/rel.h"
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 IndexTuple
00036 GinFormTuple(GinState *ginstate,
00037 OffsetNumber attnum, Datum key, GinNullCategory category,
00038 ItemPointerData *ipd, uint32 nipd,
00039 bool errorTooBig)
00040 {
00041 Datum datums[2];
00042 bool isnull[2];
00043 IndexTuple itup;
00044 uint32 newsize;
00045
00046
00047 if (ginstate->oneCol)
00048 {
00049 datums[0] = key;
00050 isnull[0] = (category != GIN_CAT_NORM_KEY);
00051 }
00052 else
00053 {
00054 datums[0] = UInt16GetDatum(attnum);
00055 isnull[0] = false;
00056 datums[1] = key;
00057 isnull[1] = (category != GIN_CAT_NORM_KEY);
00058 }
00059
00060 itup = index_form_tuple(ginstate->tupdesc[attnum - 1], datums, isnull);
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072 newsize = IndexTupleSize(itup);
00073
00074 if (IndexTupleHasNulls(itup))
00075 {
00076 uint32 minsize;
00077
00078 Assert(category != GIN_CAT_NORM_KEY);
00079 minsize = GinCategoryOffset(itup, ginstate) + sizeof(GinNullCategory);
00080 newsize = Max(newsize, minsize);
00081 }
00082
00083 newsize = SHORTALIGN(newsize);
00084
00085 GinSetPostingOffset(itup, newsize);
00086
00087 GinSetNPosting(itup, nipd);
00088
00089
00090
00091
00092
00093 newsize += sizeof(ItemPointerData) * nipd;
00094 newsize = MAXALIGN(newsize);
00095 if (newsize > Min(INDEX_SIZE_MASK, GinMaxItemSize))
00096 {
00097 if (errorTooBig)
00098 ereport(ERROR,
00099 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
00100 errmsg("index row size %lu exceeds maximum %lu for index \"%s\"",
00101 (unsigned long) newsize,
00102 (unsigned long) Min(INDEX_SIZE_MASK,
00103 GinMaxItemSize),
00104 RelationGetRelationName(ginstate->index))));
00105 pfree(itup);
00106 return NULL;
00107 }
00108
00109
00110
00111
00112 if (newsize != IndexTupleSize(itup))
00113 {
00114 itup = repalloc(itup, newsize);
00115
00116
00117 itup->t_info &= ~INDEX_SIZE_MASK;
00118 itup->t_info |= newsize;
00119 }
00120
00121
00122
00123
00124 if (category != GIN_CAT_NORM_KEY)
00125 {
00126 Assert(IndexTupleHasNulls(itup));
00127 GinSetNullCategory(itup, ginstate, category);
00128 }
00129
00130
00131
00132
00133 if (ipd)
00134 memcpy(GinGetPosting(itup), ipd, sizeof(ItemPointerData) * nipd);
00135
00136 return itup;
00137 }
00138
00139
00140
00141
00142
00143
00144 void
00145 GinShortenTuple(IndexTuple itup, uint32 nipd)
00146 {
00147 uint32 newsize;
00148
00149 Assert(nipd <= GinGetNPosting(itup));
00150
00151 newsize = GinGetPostingOffset(itup) + sizeof(ItemPointerData) * nipd;
00152 newsize = MAXALIGN(newsize);
00153
00154 Assert(newsize <= (itup->t_info & INDEX_SIZE_MASK));
00155
00156 itup->t_info &= ~INDEX_SIZE_MASK;
00157 itup->t_info |= newsize;
00158
00159 GinSetNPosting(itup, nipd);
00160 }
00161
00162
00163
00164
00165
00166
00167
00168
00169 static IndexTuple
00170 GinFormInteriorTuple(IndexTuple itup, Page page, BlockNumber childblk)
00171 {
00172 IndexTuple nitup;
00173
00174 if (GinPageIsLeaf(page) && !GinIsPostingTree(itup))
00175 {
00176
00177 uint32 origsize = GinGetPostingOffset(itup);
00178
00179 origsize = MAXALIGN(origsize);
00180 nitup = (IndexTuple) palloc(origsize);
00181 memcpy(nitup, itup, origsize);
00182
00183 nitup->t_info &= ~INDEX_SIZE_MASK;
00184 nitup->t_info |= origsize;
00185 }
00186 else
00187 {
00188
00189 nitup = (IndexTuple) palloc(IndexTupleSize(itup));
00190 memcpy(nitup, itup, IndexTupleSize(itup));
00191 }
00192
00193
00194 GinSetDownlink(nitup, childblk);
00195
00196 return nitup;
00197 }
00198
00199
00200
00201
00202
00203 static IndexTuple
00204 getRightMostTuple(Page page)
00205 {
00206 OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
00207
00208 return (IndexTuple) PageGetItem(page, PageGetItemId(page, maxoff));
00209 }
00210
00211 static bool
00212 entryIsMoveRight(GinBtree btree, Page page)
00213 {
00214 IndexTuple itup;
00215 OffsetNumber attnum;
00216 Datum key;
00217 GinNullCategory category;
00218
00219 if (GinPageRightMost(page))
00220 return FALSE;
00221
00222 itup = getRightMostTuple(page);
00223 attnum = gintuple_get_attrnum(btree->ginstate, itup);
00224 key = gintuple_get_key(btree->ginstate, itup, &category);
00225
00226 if (ginCompareAttEntries(btree->ginstate,
00227 btree->entryAttnum, btree->entryKey, btree->entryCategory,
00228 attnum, key, category) > 0)
00229 return TRUE;
00230
00231 return FALSE;
00232 }
00233
00234
00235
00236
00237
00238 static BlockNumber
00239 entryLocateEntry(GinBtree btree, GinBtreeStack *stack)
00240 {
00241 OffsetNumber low,
00242 high,
00243 maxoff;
00244 IndexTuple itup = NULL;
00245 int result;
00246 Page page = BufferGetPage(stack->buffer);
00247
00248 Assert(!GinPageIsLeaf(page));
00249 Assert(!GinPageIsData(page));
00250
00251 if (btree->fullScan)
00252 {
00253 stack->off = FirstOffsetNumber;
00254 stack->predictNumber *= PageGetMaxOffsetNumber(page);
00255 return btree->getLeftMostPage(btree, page);
00256 }
00257
00258 low = FirstOffsetNumber;
00259 maxoff = high = PageGetMaxOffsetNumber(page);
00260 Assert(high >= low);
00261
00262 high++;
00263
00264 while (high > low)
00265 {
00266 OffsetNumber mid = low + ((high - low) / 2);
00267
00268 if (mid == maxoff && GinPageRightMost(page))
00269 {
00270
00271 result = -1;
00272 }
00273 else
00274 {
00275 OffsetNumber attnum;
00276 Datum key;
00277 GinNullCategory category;
00278
00279 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
00280 attnum = gintuple_get_attrnum(btree->ginstate, itup);
00281 key = gintuple_get_key(btree->ginstate, itup, &category);
00282 result = ginCompareAttEntries(btree->ginstate,
00283 btree->entryAttnum,
00284 btree->entryKey,
00285 btree->entryCategory,
00286 attnum, key, category);
00287 }
00288
00289 if (result == 0)
00290 {
00291 stack->off = mid;
00292 Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO);
00293 return GinGetDownlink(itup);
00294 }
00295 else if (result > 0)
00296 low = mid + 1;
00297 else
00298 high = mid;
00299 }
00300
00301 Assert(high >= FirstOffsetNumber && high <= maxoff);
00302
00303 stack->off = high;
00304 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high));
00305 Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO);
00306 return GinGetDownlink(itup);
00307 }
00308
00309
00310
00311
00312
00313
00314 static bool
00315 entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack)
00316 {
00317 Page page = BufferGetPage(stack->buffer);
00318 OffsetNumber low,
00319 high;
00320
00321 Assert(GinPageIsLeaf(page));
00322 Assert(!GinPageIsData(page));
00323
00324 if (btree->fullScan)
00325 {
00326 stack->off = FirstOffsetNumber;
00327 return TRUE;
00328 }
00329
00330 low = FirstOffsetNumber;
00331 high = PageGetMaxOffsetNumber(page);
00332
00333 if (high < low)
00334 {
00335 stack->off = FirstOffsetNumber;
00336 return false;
00337 }
00338
00339 high++;
00340
00341 while (high > low)
00342 {
00343 OffsetNumber mid = low + ((high - low) / 2);
00344 IndexTuple itup;
00345 OffsetNumber attnum;
00346 Datum key;
00347 GinNullCategory category;
00348 int result;
00349
00350 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
00351 attnum = gintuple_get_attrnum(btree->ginstate, itup);
00352 key = gintuple_get_key(btree->ginstate, itup, &category);
00353 result = ginCompareAttEntries(btree->ginstate,
00354 btree->entryAttnum,
00355 btree->entryKey,
00356 btree->entryCategory,
00357 attnum, key, category);
00358 if (result == 0)
00359 {
00360 stack->off = mid;
00361 return true;
00362 }
00363 else if (result > 0)
00364 low = mid + 1;
00365 else
00366 high = mid;
00367 }
00368
00369 stack->off = high;
00370 return false;
00371 }
00372
00373 static OffsetNumber
00374 entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
00375 {
00376 OffsetNumber i,
00377 maxoff = PageGetMaxOffsetNumber(page);
00378 IndexTuple itup;
00379
00380 Assert(!GinPageIsLeaf(page));
00381 Assert(!GinPageIsData(page));
00382
00383
00384 if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
00385 {
00386 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff));
00387 if (GinGetDownlink(itup) == blkno)
00388 return storedOff;
00389
00390
00391
00392
00393
00394 for (i = storedOff + 1; i <= maxoff; i++)
00395 {
00396 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
00397 if (GinGetDownlink(itup) == blkno)
00398 return i;
00399 }
00400 maxoff = storedOff - 1;
00401 }
00402
00403
00404 for (i = FirstOffsetNumber; i <= maxoff; i++)
00405 {
00406 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
00407 if (GinGetDownlink(itup) == blkno)
00408 return i;
00409 }
00410
00411 return InvalidOffsetNumber;
00412 }
00413
00414 static BlockNumber
00415 entryGetLeftMostPage(GinBtree btree, Page page)
00416 {
00417 IndexTuple itup;
00418
00419 Assert(!GinPageIsLeaf(page));
00420 Assert(!GinPageIsData(page));
00421 Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
00422
00423 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
00424 return GinGetDownlink(itup);
00425 }
00426
00427 static bool
00428 entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
00429 {
00430 Size itupsz = 0;
00431 Page page = BufferGetPage(buf);
00432
00433 Assert(btree->entry);
00434 Assert(!GinPageIsData(page));
00435
00436 if (btree->isDelete)
00437 {
00438 IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
00439
00440 itupsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
00441 }
00442
00443 if (PageGetFreeSpace(page) + itupsz >= MAXALIGN(IndexTupleSize(btree->entry)) + sizeof(ItemIdData))
00444 return true;
00445
00446 return false;
00447 }
00448
00449
00450
00451
00452
00453
00454 static BlockNumber
00455 entryPreparePage(GinBtree btree, Page page, OffsetNumber off)
00456 {
00457 BlockNumber ret = InvalidBlockNumber;
00458
00459 Assert(btree->entry);
00460 Assert(!GinPageIsData(page));
00461
00462 if (btree->isDelete)
00463 {
00464 Assert(GinPageIsLeaf(page));
00465 PageIndexTupleDelete(page, off);
00466 }
00467
00468 if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
00469 {
00470 IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
00471
00472 GinSetDownlink(itup, btree->rightblkno);
00473 ret = btree->rightblkno;
00474 }
00475
00476 btree->rightblkno = InvalidBlockNumber;
00477
00478 return ret;
00479 }
00480
00481
00482
00483
00484 static void
00485 entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
00486 {
00487 Page page = BufferGetPage(buf);
00488 OffsetNumber placed;
00489 int cnt = 0;
00490
00491
00492 static XLogRecData rdata[3];
00493 static ginxlogInsert data;
00494
00495 *prdata = rdata;
00496 data.updateBlkno = entryPreparePage(btree, page, off);
00497
00498 placed = PageAddItem(page, (Item) btree->entry, IndexTupleSize(btree->entry), off, false, false);
00499 if (placed != off)
00500 elog(ERROR, "failed to add item to index page in \"%s\"",
00501 RelationGetRelationName(btree->index));
00502
00503 data.node = btree->index->rd_node;
00504 data.blkno = BufferGetBlockNumber(buf);
00505 data.offset = off;
00506 data.nitem = 1;
00507 data.isDelete = btree->isDelete;
00508 data.isData = false;
00509 data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
00510
00511
00512
00513
00514
00515
00516
00517
00518 if (data.updateBlkno == InvalidBlockNumber)
00519 {
00520 rdata[0].buffer = buf;
00521 rdata[0].buffer_std = TRUE;
00522 rdata[0].data = NULL;
00523 rdata[0].len = 0;
00524 rdata[0].next = &rdata[1];
00525 cnt++;
00526 }
00527
00528 rdata[cnt].buffer = InvalidBuffer;
00529 rdata[cnt].data = (char *) &data;
00530 rdata[cnt].len = sizeof(ginxlogInsert);
00531 rdata[cnt].next = &rdata[cnt + 1];
00532 cnt++;
00533
00534 rdata[cnt].buffer = InvalidBuffer;
00535 rdata[cnt].data = (char *) btree->entry;
00536 rdata[cnt].len = IndexTupleSize(btree->entry);
00537 rdata[cnt].next = NULL;
00538
00539 btree->entry = NULL;
00540 }
00541
00542
00543
00544
00545
00546
00547
00548 static Page
00549 entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
00550 {
00551 OffsetNumber i,
00552 maxoff,
00553 separator = InvalidOffsetNumber;
00554 Size totalsize = 0;
00555 Size lsize = 0,
00556 size;
00557 char *ptr;
00558 IndexTuple itup,
00559 leftrightmost = NULL;
00560 Page page;
00561 Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf));
00562 Page rpage = BufferGetPage(rbuf);
00563 Size pageSize = PageGetPageSize(lpage);
00564
00565
00566 static XLogRecData rdata[2];
00567 static ginxlogSplit data;
00568 static char tupstore[2 * BLCKSZ];
00569
00570 *prdata = rdata;
00571 data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
00572 InvalidOffsetNumber : GinGetDownlink(btree->entry);
00573 data.updateBlkno = entryPreparePage(btree, lpage, off);
00574
00575 maxoff = PageGetMaxOffsetNumber(lpage);
00576 ptr = tupstore;
00577
00578 for (i = FirstOffsetNumber; i <= maxoff; i++)
00579 {
00580 if (i == off)
00581 {
00582 size = MAXALIGN(IndexTupleSize(btree->entry));
00583 memcpy(ptr, btree->entry, size);
00584 ptr += size;
00585 totalsize += size + sizeof(ItemIdData);
00586 }
00587
00588 itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i));
00589 size = MAXALIGN(IndexTupleSize(itup));
00590 memcpy(ptr, itup, size);
00591 ptr += size;
00592 totalsize += size + sizeof(ItemIdData);
00593 }
00594
00595 if (off == maxoff + 1)
00596 {
00597 size = MAXALIGN(IndexTupleSize(btree->entry));
00598 memcpy(ptr, btree->entry, size);
00599 ptr += size;
00600 totalsize += size + sizeof(ItemIdData);
00601 }
00602
00603 GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
00604 GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
00605
00606 ptr = tupstore;
00607 maxoff++;
00608 lsize = 0;
00609
00610 page = lpage;
00611 for (i = FirstOffsetNumber; i <= maxoff; i++)
00612 {
00613 itup = (IndexTuple) ptr;
00614
00615 if (lsize > totalsize / 2)
00616 {
00617 if (separator == InvalidOffsetNumber)
00618 separator = i - 1;
00619 page = rpage;
00620 }
00621 else
00622 {
00623 leftrightmost = itup;
00624 lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
00625 }
00626
00627 if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
00628 elog(ERROR, "failed to add item to index page in \"%s\"",
00629 RelationGetRelationName(btree->index));
00630 ptr += MAXALIGN(IndexTupleSize(itup));
00631 }
00632
00633 btree->entry = GinFormInteriorTuple(leftrightmost, lpage,
00634 BufferGetBlockNumber(lbuf));
00635
00636 btree->rightblkno = BufferGetBlockNumber(rbuf);
00637
00638 data.node = btree->index->rd_node;
00639 data.rootBlkno = InvalidBlockNumber;
00640 data.lblkno = BufferGetBlockNumber(lbuf);
00641 data.rblkno = BufferGetBlockNumber(rbuf);
00642 data.separator = separator;
00643 data.nitem = maxoff;
00644 data.isData = FALSE;
00645 data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
00646 data.isRootSplit = FALSE;
00647
00648 rdata[0].buffer = InvalidBuffer;
00649 rdata[0].data = (char *) &data;
00650 rdata[0].len = sizeof(ginxlogSplit);
00651 rdata[0].next = &rdata[1];
00652
00653 rdata[1].buffer = InvalidBuffer;
00654 rdata[1].data = tupstore;
00655 rdata[1].len = MAXALIGN(totalsize);
00656 rdata[1].next = NULL;
00657
00658 return lpage;
00659 }
00660
00661
00662
00663
00664 IndexTuple
00665 ginPageGetLinkItup(Buffer buf)
00666 {
00667 IndexTuple itup,
00668 nitup;
00669 Page page = BufferGetPage(buf);
00670
00671 itup = getRightMostTuple(page);
00672 nitup = GinFormInteriorTuple(itup, page, BufferGetBlockNumber(buf));
00673
00674 return nitup;
00675 }
00676
00677
00678
00679
00680
00681 void
00682 ginEntryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
00683 {
00684 Page page;
00685 IndexTuple itup;
00686
00687 page = BufferGetPage(root);
00688
00689 itup = ginPageGetLinkItup(lbuf);
00690 if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
00691 elog(ERROR, "failed to add item to index root page");
00692 pfree(itup);
00693
00694 itup = ginPageGetLinkItup(rbuf);
00695 if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
00696 elog(ERROR, "failed to add item to index root page");
00697 pfree(itup);
00698 }
00699
00700
00701
00702
00703
00704
00705
00706 void
00707 ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
00708 Datum key, GinNullCategory category,
00709 GinState *ginstate)
00710 {
00711 memset(btree, 0, sizeof(GinBtreeData));
00712
00713 btree->index = ginstate->index;
00714 btree->ginstate = ginstate;
00715
00716 btree->findChildPage = entryLocateEntry;
00717 btree->isMoveRight = entryIsMoveRight;
00718 btree->findItem = entryLocateLeafEntry;
00719 btree->findChildPtr = entryFindChildPtr;
00720 btree->getLeftMostPage = entryGetLeftMostPage;
00721 btree->isEnoughSpace = entryIsEnoughSpace;
00722 btree->placeToPage = entryPlaceToPage;
00723 btree->splitPage = entrySplitPage;
00724 btree->fillRoot = ginEntryFillRoot;
00725
00726 btree->isData = FALSE;
00727 btree->searchMode = FALSE;
00728 btree->fullScan = FALSE;
00729 btree->isBuild = FALSE;
00730
00731 btree->entryAttnum = attnum;
00732 btree->entryKey = key;
00733 btree->entryCategory = category;
00734 btree->isDelete = FALSE;
00735 }