00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include "postgres.h"
00016
00017 #include "access/gin_private.h"
00018 #include "utils/rel.h"
00019
00020 int
00021 ginCompareItemPointers(ItemPointer a, ItemPointer b)
00022 {
00023 BlockNumber ba = GinItemPointerGetBlockNumber(a);
00024 BlockNumber bb = GinItemPointerGetBlockNumber(b);
00025
00026 if (ba == bb)
00027 {
00028 OffsetNumber oa = GinItemPointerGetOffsetNumber(a);
00029 OffsetNumber ob = GinItemPointerGetOffsetNumber(b);
00030
00031 if (oa == ob)
00032 return 0;
00033 return (oa > ob) ? 1 : -1;
00034 }
00035
00036 return (ba > bb) ? 1 : -1;
00037 }
00038
00039
00040
00041
00042
00043
00044 uint32
00045 ginMergeItemPointers(ItemPointerData *dst,
00046 ItemPointerData *a, uint32 na,
00047 ItemPointerData *b, uint32 nb)
00048 {
00049 ItemPointerData *dptr = dst;
00050 ItemPointerData *aptr = a,
00051 *bptr = b;
00052
00053 while (aptr - a < na && bptr - b < nb)
00054 {
00055 int cmp = ginCompareItemPointers(aptr, bptr);
00056
00057 if (cmp > 0)
00058 *dptr++ = *bptr++;
00059 else if (cmp == 0)
00060 {
00061
00062 *dptr++ = *bptr++;
00063 aptr++;
00064 }
00065 else
00066 *dptr++ = *aptr++;
00067 }
00068
00069 while (aptr - a < na)
00070 *dptr++ = *aptr++;
00071
00072 while (bptr - b < nb)
00073 *dptr++ = *bptr++;
00074
00075 return dptr - dst;
00076 }
00077
00078
00079
00080
00081
00082 static bool
00083 dataIsMoveRight(GinBtree btree, Page page)
00084 {
00085 ItemPointer iptr = GinDataPageGetRightBound(page);
00086
00087 if (GinPageRightMost(page))
00088 return FALSE;
00089
00090 return (ginCompareItemPointers(btree->items + btree->curitem, iptr) > 0) ? TRUE : FALSE;
00091 }
00092
00093
00094
00095
00096
00097 static BlockNumber
00098 dataLocateItem(GinBtree btree, GinBtreeStack *stack)
00099 {
00100 OffsetNumber low,
00101 high,
00102 maxoff;
00103 PostingItem *pitem = NULL;
00104 int result;
00105 Page page = BufferGetPage(stack->buffer);
00106
00107 Assert(!GinPageIsLeaf(page));
00108 Assert(GinPageIsData(page));
00109
00110 if (btree->fullScan)
00111 {
00112 stack->off = FirstOffsetNumber;
00113 stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
00114 return btree->getLeftMostPage(btree, page);
00115 }
00116
00117 low = FirstOffsetNumber;
00118 maxoff = high = GinPageGetOpaque(page)->maxoff;
00119 Assert(high >= low);
00120
00121 high++;
00122
00123 while (high > low)
00124 {
00125 OffsetNumber mid = low + ((high - low) / 2);
00126
00127 pitem = (PostingItem *) GinDataPageGetItem(page, mid);
00128
00129 if (mid == maxoff)
00130 {
00131
00132
00133
00134
00135 result = -1;
00136 }
00137 else
00138 {
00139 pitem = (PostingItem *) GinDataPageGetItem(page, mid);
00140 result = ginCompareItemPointers(btree->items + btree->curitem, &(pitem->key));
00141 }
00142
00143 if (result == 0)
00144 {
00145 stack->off = mid;
00146 return PostingItemGetBlockNumber(pitem);
00147 }
00148 else if (result > 0)
00149 low = mid + 1;
00150 else
00151 high = mid;
00152 }
00153
00154 Assert(high >= FirstOffsetNumber && high <= maxoff);
00155
00156 stack->off = high;
00157 pitem = (PostingItem *) GinDataPageGetItem(page, high);
00158 return PostingItemGetBlockNumber(pitem);
00159 }
00160
00161
00162
00163
00164
00165
00166 static bool
00167 dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack)
00168 {
00169 Page page = BufferGetPage(stack->buffer);
00170 OffsetNumber low,
00171 high;
00172 int result;
00173
00174 Assert(GinPageIsLeaf(page));
00175 Assert(GinPageIsData(page));
00176
00177 if (btree->fullScan)
00178 {
00179 stack->off = FirstOffsetNumber;
00180 return TRUE;
00181 }
00182
00183 low = FirstOffsetNumber;
00184 high = GinPageGetOpaque(page)->maxoff;
00185
00186 if (high < low)
00187 {
00188 stack->off = FirstOffsetNumber;
00189 return false;
00190 }
00191
00192 high++;
00193
00194 while (high > low)
00195 {
00196 OffsetNumber mid = low + ((high - low) / 2);
00197
00198 result = ginCompareItemPointers(btree->items + btree->curitem, (ItemPointer) GinDataPageGetItem(page, mid));
00199
00200 if (result == 0)
00201 {
00202 stack->off = mid;
00203 return true;
00204 }
00205 else if (result > 0)
00206 low = mid + 1;
00207 else
00208 high = mid;
00209 }
00210
00211 stack->off = high;
00212 return false;
00213 }
00214
00215
00216
00217
00218
00219 static OffsetNumber
00220 dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
00221 {
00222 OffsetNumber i,
00223 maxoff = GinPageGetOpaque(page)->maxoff;
00224 PostingItem *pitem;
00225
00226 Assert(!GinPageIsLeaf(page));
00227 Assert(GinPageIsData(page));
00228
00229
00230 if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
00231 {
00232 pitem = (PostingItem *) GinDataPageGetItem(page, storedOff);
00233 if (PostingItemGetBlockNumber(pitem) == blkno)
00234 return storedOff;
00235
00236
00237
00238
00239
00240 for (i = storedOff + 1; i <= maxoff; i++)
00241 {
00242 pitem = (PostingItem *) GinDataPageGetItem(page, i);
00243 if (PostingItemGetBlockNumber(pitem) == blkno)
00244 return i;
00245 }
00246
00247 maxoff = storedOff - 1;
00248 }
00249
00250
00251 for (i = FirstOffsetNumber; i <= maxoff; i++)
00252 {
00253 pitem = (PostingItem *) GinDataPageGetItem(page, i);
00254 if (PostingItemGetBlockNumber(pitem) == blkno)
00255 return i;
00256 }
00257
00258 return InvalidOffsetNumber;
00259 }
00260
00261
00262
00263
00264 static BlockNumber
00265 dataGetLeftMostPage(GinBtree btree, Page page)
00266 {
00267 PostingItem *pitem;
00268
00269 Assert(!GinPageIsLeaf(page));
00270 Assert(GinPageIsData(page));
00271 Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
00272
00273 pitem = (PostingItem *) GinDataPageGetItem(page, FirstOffsetNumber);
00274 return PostingItemGetBlockNumber(pitem);
00275 }
00276
00277
00278
00279
00280
00281 void
00282 GinDataPageAddItem(Page page, void *data, OffsetNumber offset)
00283 {
00284 OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
00285 char *ptr;
00286
00287 if (offset == InvalidOffsetNumber)
00288 {
00289 ptr = GinDataPageGetItem(page, maxoff + 1);
00290 }
00291 else
00292 {
00293 ptr = GinDataPageGetItem(page, offset);
00294 if (maxoff + 1 - offset != 0)
00295 memmove(ptr + GinSizeOfDataPageItem(page),
00296 ptr,
00297 (maxoff - offset + 1) * GinSizeOfDataPageItem(page));
00298 }
00299 memcpy(ptr, data, GinSizeOfDataPageItem(page));
00300
00301 GinPageGetOpaque(page)->maxoff++;
00302 }
00303
00304
00305
00306
00307 void
00308 GinPageDeletePostingItem(Page page, OffsetNumber offset)
00309 {
00310 OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
00311
00312 Assert(!GinPageIsLeaf(page));
00313 Assert(offset >= FirstOffsetNumber && offset <= maxoff);
00314
00315 if (offset != maxoff)
00316 memmove(GinDataPageGetItem(page, offset), GinDataPageGetItem(page, offset + 1),
00317 sizeof(PostingItem) * (maxoff - offset));
00318
00319 GinPageGetOpaque(page)->maxoff--;
00320 }
00321
00322
00323
00324
00325
00326 static bool
00327 dataIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
00328 {
00329 Page page = BufferGetPage(buf);
00330
00331 Assert(GinPageIsData(page));
00332 Assert(!btree->isDelete);
00333
00334 if (GinPageIsLeaf(page))
00335 {
00336 if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
00337 {
00338 if ((btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
00339 return true;
00340 }
00341 else if (sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
00342 return true;
00343 }
00344 else if (sizeof(PostingItem) <= GinDataPageGetFreeSpace(page))
00345 return true;
00346
00347 return false;
00348 }
00349
00350
00351
00352
00353
00354
00355 static BlockNumber
00356 dataPrepareData(GinBtree btree, Page page, OffsetNumber off)
00357 {
00358 BlockNumber ret = InvalidBlockNumber;
00359
00360 Assert(GinPageIsData(page));
00361
00362 if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
00363 {
00364 PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, off);
00365
00366 PostingItemSetBlockNumber(pitem, btree->rightblkno);
00367 ret = btree->rightblkno;
00368 }
00369
00370 btree->rightblkno = InvalidBlockNumber;
00371
00372 return ret;
00373 }
00374
00375
00376
00377
00378
00379 static void
00380 dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
00381 {
00382 Page page = BufferGetPage(buf);
00383 int sizeofitem = GinSizeOfDataPageItem(page);
00384 int cnt = 0;
00385
00386
00387 static XLogRecData rdata[3];
00388 static ginxlogInsert data;
00389
00390 *prdata = rdata;
00391 Assert(GinPageIsData(page));
00392
00393 data.updateBlkno = dataPrepareData(btree, page, off);
00394
00395 data.node = btree->index->rd_node;
00396 data.blkno = BufferGetBlockNumber(buf);
00397 data.offset = off;
00398 data.nitem = 1;
00399 data.isDelete = FALSE;
00400 data.isData = TRUE;
00401 data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
00402
00403
00404
00405
00406
00407
00408
00409
00410 if (data.updateBlkno == InvalidBlockNumber)
00411 {
00412 rdata[0].buffer = buf;
00413 rdata[0].buffer_std = FALSE;
00414 rdata[0].data = NULL;
00415 rdata[0].len = 0;
00416 rdata[0].next = &rdata[1];
00417 cnt++;
00418 }
00419
00420 rdata[cnt].buffer = InvalidBuffer;
00421 rdata[cnt].data = (char *) &data;
00422 rdata[cnt].len = sizeof(ginxlogInsert);
00423 rdata[cnt].next = &rdata[cnt + 1];
00424 cnt++;
00425
00426 rdata[cnt].buffer = InvalidBuffer;
00427 rdata[cnt].data = (GinPageIsLeaf(page)) ? ((char *) (btree->items + btree->curitem)) : ((char *) &(btree->pitem));
00428 rdata[cnt].len = sizeofitem;
00429 rdata[cnt].next = NULL;
00430
00431 if (GinPageIsLeaf(page))
00432 {
00433 if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
00434 {
00435
00436 uint32 savedPos = btree->curitem;
00437
00438 while (btree->curitem < btree->nitem)
00439 {
00440 GinDataPageAddItem(page, btree->items + btree->curitem, off);
00441 off++;
00442 btree->curitem++;
00443 }
00444 data.nitem = btree->curitem - savedPos;
00445 rdata[cnt].len = sizeofitem * data.nitem;
00446 }
00447 else
00448 {
00449 GinDataPageAddItem(page, btree->items + btree->curitem, off);
00450 btree->curitem++;
00451 }
00452 }
00453 else
00454 GinDataPageAddItem(page, &(btree->pitem), off);
00455 }
00456
00457
00458
00459
00460
00461
00462
00463 static Page
00464 dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
00465 {
00466 char *ptr;
00467 OffsetNumber separator;
00468 ItemPointer bound;
00469 Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf));
00470 ItemPointerData oldbound = *GinDataPageGetRightBound(lpage);
00471 int sizeofitem = GinSizeOfDataPageItem(lpage);
00472 OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff;
00473 Page rpage = BufferGetPage(rbuf);
00474 Size pageSize = PageGetPageSize(lpage);
00475 Size freeSpace;
00476 uint32 nCopied = 1;
00477
00478
00479 static ginxlogSplit data;
00480 static XLogRecData rdata[4];
00481 static char vector[2 * BLCKSZ];
00482
00483 GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
00484 freeSpace = GinDataPageGetFreeSpace(rpage);
00485
00486 *prdata = rdata;
00487 data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
00488 InvalidOffsetNumber : PostingItemGetBlockNumber(&(btree->pitem));
00489 data.updateBlkno = dataPrepareData(btree, lpage, off);
00490
00491 memcpy(vector, GinDataPageGetItem(lpage, FirstOffsetNumber),
00492 maxoff * sizeofitem);
00493
00494 if (GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff)
00495 {
00496 nCopied = 0;
00497 while (btree->curitem < btree->nitem &&
00498 maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData)))
00499 {
00500 memcpy(vector + maxoff * sizeof(ItemPointerData),
00501 btree->items + btree->curitem,
00502 sizeof(ItemPointerData));
00503 maxoff++;
00504 nCopied++;
00505 btree->curitem++;
00506 }
00507 }
00508 else
00509 {
00510 ptr = vector + (off - 1) * sizeofitem;
00511 if (maxoff + 1 - off != 0)
00512 memmove(ptr + sizeofitem, ptr, (maxoff - off + 1) * sizeofitem);
00513 if (GinPageIsLeaf(lpage))
00514 {
00515 memcpy(ptr, btree->items + btree->curitem, sizeofitem);
00516 btree->curitem++;
00517 }
00518 else
00519 memcpy(ptr, &(btree->pitem), sizeofitem);
00520
00521 maxoff++;
00522 }
00523
00524
00525
00526
00527
00528 if (btree->isBuild && GinPageRightMost(lpage))
00529 separator = freeSpace / sizeofitem;
00530 else
00531 separator = maxoff / 2;
00532
00533 GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
00534 GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
00535
00536 memcpy(GinDataPageGetItem(lpage, FirstOffsetNumber), vector, separator * sizeofitem);
00537 GinPageGetOpaque(lpage)->maxoff = separator;
00538 memcpy(GinDataPageGetItem(rpage, FirstOffsetNumber),
00539 vector + separator * sizeofitem, (maxoff - separator) * sizeofitem);
00540 GinPageGetOpaque(rpage)->maxoff = maxoff - separator;
00541
00542 PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf));
00543 if (GinPageIsLeaf(lpage))
00544 btree->pitem.key = *(ItemPointerData *) GinDataPageGetItem(lpage,
00545 GinPageGetOpaque(lpage)->maxoff);
00546 else
00547 btree->pitem.key = ((PostingItem *) GinDataPageGetItem(lpage,
00548 GinPageGetOpaque(lpage)->maxoff))->key;
00549 btree->rightblkno = BufferGetBlockNumber(rbuf);
00550
00551
00552 bound = GinDataPageGetRightBound(lpage);
00553 *bound = btree->pitem.key;
00554
00555
00556 bound = GinDataPageGetRightBound(rpage);
00557 *bound = oldbound;
00558
00559 data.node = btree->index->rd_node;
00560 data.rootBlkno = InvalidBlockNumber;
00561 data.lblkno = BufferGetBlockNumber(lbuf);
00562 data.rblkno = BufferGetBlockNumber(rbuf);
00563 data.separator = separator;
00564 data.nitem = maxoff;
00565 data.isData = TRUE;
00566 data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
00567 data.isRootSplit = FALSE;
00568 data.rightbound = oldbound;
00569
00570 rdata[0].buffer = InvalidBuffer;
00571 rdata[0].data = (char *) &data;
00572 rdata[0].len = sizeof(ginxlogSplit);
00573 rdata[0].next = &rdata[1];
00574
00575 rdata[1].buffer = InvalidBuffer;
00576 rdata[1].data = vector;
00577 rdata[1].len = MAXALIGN(maxoff * sizeofitem);
00578 rdata[1].next = NULL;
00579
00580 return lpage;
00581 }
00582
00583
00584
00585
00586
00587 void
00588 ginDataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
00589 {
00590 Page page = BufferGetPage(root),
00591 lpage = BufferGetPage(lbuf),
00592 rpage = BufferGetPage(rbuf);
00593 PostingItem li,
00594 ri;
00595
00596 li.key = *GinDataPageGetRightBound(lpage);
00597 PostingItemSetBlockNumber(&li, BufferGetBlockNumber(lbuf));
00598 GinDataPageAddItem(page, &li, InvalidOffsetNumber);
00599
00600 ri.key = *GinDataPageGetRightBound(rpage);
00601 PostingItemSetBlockNumber(&ri, BufferGetBlockNumber(rbuf));
00602 GinDataPageAddItem(page, &ri, InvalidOffsetNumber);
00603 }
00604
00605 void
00606 ginPrepareDataScan(GinBtree btree, Relation index)
00607 {
00608 memset(btree, 0, sizeof(GinBtreeData));
00609
00610 btree->index = index;
00611
00612 btree->findChildPage = dataLocateItem;
00613 btree->isMoveRight = dataIsMoveRight;
00614 btree->findItem = dataLocateLeafItem;
00615 btree->findChildPtr = dataFindChildPtr;
00616 btree->getLeftMostPage = dataGetLeftMostPage;
00617 btree->isEnoughSpace = dataIsEnoughSpace;
00618 btree->placeToPage = dataPlaceToPage;
00619 btree->splitPage = dataSplitPage;
00620 btree->fillRoot = ginDataFillRoot;
00621
00622 btree->isData = TRUE;
00623 btree->searchMode = FALSE;
00624 btree->isDelete = FALSE;
00625 btree->fullScan = FALSE;
00626 btree->isBuild = FALSE;
00627 }
00628
00629 GinPostingTreeScan *
00630 ginPrepareScanPostingTree(Relation index, BlockNumber rootBlkno, bool searchMode)
00631 {
00632 GinPostingTreeScan *gdi = (GinPostingTreeScan *) palloc0(sizeof(GinPostingTreeScan));
00633
00634 ginPrepareDataScan(&gdi->btree, index);
00635
00636 gdi->btree.searchMode = searchMode;
00637 gdi->btree.fullScan = searchMode;
00638
00639 gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);
00640
00641 return gdi;
00642 }
00643
00644
00645
00646
00647 void
00648 ginInsertItemPointers(GinPostingTreeScan *gdi,
00649 ItemPointerData *items, uint32 nitem,
00650 GinStatsData *buildStats)
00651 {
00652 BlockNumber rootBlkno = gdi->stack->blkno;
00653
00654 gdi->btree.items = items;
00655 gdi->btree.nitem = nitem;
00656 gdi->btree.curitem = 0;
00657
00658 while (gdi->btree.curitem < gdi->btree.nitem)
00659 {
00660 if (!gdi->stack)
00661 gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);
00662
00663 gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
00664
00665 if (gdi->btree.findItem(&(gdi->btree), gdi->stack))
00666 {
00667
00668
00669
00670 gdi->btree.curitem++;
00671 LockBuffer(gdi->stack->buffer, GIN_UNLOCK);
00672 freeGinBtreeStack(gdi->stack);
00673 }
00674 else
00675 ginInsertValue(&(gdi->btree), gdi->stack, buildStats);
00676
00677 gdi->stack = NULL;
00678 }
00679 }
00680
00681 Buffer
00682 ginScanBeginPostingTree(GinPostingTreeScan *gdi)
00683 {
00684 gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
00685 return gdi->stack->buffer;
00686 }