00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include "postgres.h"
00016
00017 #include "access/genam.h"
00018 #include "access/gist_private.h"
00019 #include "catalog/index.h"
00020 #include "miscadmin.h"
00021 #include "storage/buffile.h"
00022 #include "storage/bufmgr.h"
00023 #include "utils/memutils.h"
00024 #include "utils/rel.h"
00025
00026 static GISTNodeBufferPage *gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb);
00027 static void gistAddLoadedBuffer(GISTBuildBuffers *gfbb,
00028 GISTNodeBuffer *nodeBuffer);
00029 static void gistLoadNodeBuffer(GISTBuildBuffers *gfbb,
00030 GISTNodeBuffer *nodeBuffer);
00031 static void gistUnloadNodeBuffer(GISTBuildBuffers *gfbb,
00032 GISTNodeBuffer *nodeBuffer);
00033 static void gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer,
00034 IndexTuple item);
00035 static void gistGetItupFromPage(GISTNodeBufferPage *pageBuffer,
00036 IndexTuple *item);
00037 static long gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb);
00038 static void gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum);
00039
00040 static void ReadTempFileBlock(BufFile *file, long blknum, void *ptr);
00041 static void WriteTempFileBlock(BufFile *file, long blknum, void *ptr);
00042
00043
00044
00045
00046
00047 GISTBuildBuffers *
00048 gistInitBuildBuffers(int pagesPerBuffer, int levelStep, int maxLevel)
00049 {
00050 GISTBuildBuffers *gfbb;
00051 HASHCTL hashCtl;
00052
00053 gfbb = palloc(sizeof(GISTBuildBuffers));
00054 gfbb->pagesPerBuffer = pagesPerBuffer;
00055 gfbb->levelStep = levelStep;
00056
00057
00058
00059
00060
00061 gfbb->pfile = BufFileCreateTemp(false);
00062 gfbb->nFileBlocks = 0;
00063
00064
00065 gfbb->nFreeBlocks = 0;
00066 gfbb->freeBlocksLen = 32;
00067 gfbb->freeBlocks = (long *) palloc(gfbb->freeBlocksLen * sizeof(long));
00068
00069
00070
00071
00072
00073 gfbb->context = CurrentMemoryContext;
00074
00075
00076
00077
00078
00079 hashCtl.keysize = sizeof(BlockNumber);
00080 hashCtl.entrysize = sizeof(GISTNodeBuffer);
00081 hashCtl.hcxt = CurrentMemoryContext;
00082 hashCtl.hash = tag_hash;
00083 hashCtl.match = memcmp;
00084 gfbb->nodeBuffersTab = hash_create("gistbuildbuffers",
00085 1024,
00086 &hashCtl,
00087 HASH_ELEM | HASH_CONTEXT
00088 | HASH_FUNCTION | HASH_COMPARE);
00089
00090 gfbb->bufferEmptyingQueue = NIL;
00091
00092
00093
00094
00095
00096 gfbb->buffersOnLevelsLen = 1;
00097 gfbb->buffersOnLevels = (List **) palloc(sizeof(List *) *
00098 gfbb->buffersOnLevelsLen);
00099 gfbb->buffersOnLevels[0] = NIL;
00100
00101
00102
00103
00104
00105 gfbb->loadedBuffersLen = 32;
00106 gfbb->loadedBuffers = (GISTNodeBuffer **) palloc(gfbb->loadedBuffersLen *
00107 sizeof(GISTNodeBuffer *));
00108 gfbb->loadedBuffersCount = 0;
00109
00110 gfbb->rootlevel = maxLevel;
00111
00112 return gfbb;
00113 }
00114
00115
00116
00117
00118
00119 GISTNodeBuffer *
00120 gistGetNodeBuffer(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
00121 BlockNumber nodeBlocknum, int level)
00122 {
00123 GISTNodeBuffer *nodeBuffer;
00124 bool found;
00125
00126
00127 nodeBuffer = (GISTNodeBuffer *) hash_search(gfbb->nodeBuffersTab,
00128 (const void *) &nodeBlocknum,
00129 HASH_ENTER,
00130 &found);
00131 if (!found)
00132 {
00133
00134
00135
00136 MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
00137
00138
00139 nodeBuffer->blocksCount = 0;
00140 nodeBuffer->pageBlocknum = InvalidBlockNumber;
00141 nodeBuffer->pageBuffer = NULL;
00142 nodeBuffer->queuedForEmptying = false;
00143 nodeBuffer->level = level;
00144
00145
00146
00147
00148
00149 if (level >= gfbb->buffersOnLevelsLen)
00150 {
00151 int i;
00152
00153 gfbb->buffersOnLevels =
00154 (List **) repalloc(gfbb->buffersOnLevels,
00155 (level + 1) * sizeof(List *));
00156
00157
00158 for (i = gfbb->buffersOnLevelsLen; i <= level; i++)
00159 gfbb->buffersOnLevels[i] = NIL;
00160 gfbb->buffersOnLevelsLen = level + 1;
00161 }
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174 gfbb->buffersOnLevels[level] = lcons(nodeBuffer,
00175 gfbb->buffersOnLevels[level]);
00176
00177 MemoryContextSwitchTo(oldcxt);
00178 }
00179
00180 return nodeBuffer;
00181 }
00182
00183
00184
00185
00186 static GISTNodeBufferPage *
00187 gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb)
00188 {
00189 GISTNodeBufferPage *pageBuffer;
00190
00191 pageBuffer = (GISTNodeBufferPage *) MemoryContextAlloc(gfbb->context,
00192 BLCKSZ);
00193 pageBuffer->prev = InvalidBlockNumber;
00194
00195
00196 PAGE_FREE_SPACE(pageBuffer) = BLCKSZ - BUFFER_PAGE_DATA_OFFSET;
00197 return pageBuffer;
00198 }
00199
00200
00201
00202
00203 static void
00204 gistAddLoadedBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
00205 {
00206
00207 if (nodeBuffer->isTemp)
00208 return;
00209
00210
00211 if (gfbb->loadedBuffersCount >= gfbb->loadedBuffersLen)
00212 {
00213 gfbb->loadedBuffersLen *= 2;
00214 gfbb->loadedBuffers = (GISTNodeBuffer **)
00215 repalloc(gfbb->loadedBuffers,
00216 gfbb->loadedBuffersLen * sizeof(GISTNodeBuffer *));
00217 }
00218
00219 gfbb->loadedBuffers[gfbb->loadedBuffersCount] = nodeBuffer;
00220 gfbb->loadedBuffersCount++;
00221 }
00222
00223
00224
00225
00226 static void
00227 gistLoadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
00228 {
00229
00230 if (!nodeBuffer->pageBuffer && nodeBuffer->blocksCount > 0)
00231 {
00232
00233 nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
00234
00235
00236 ReadTempFileBlock(gfbb->pfile, nodeBuffer->pageBlocknum,
00237 nodeBuffer->pageBuffer);
00238
00239
00240 gistBuffersReleaseBlock(gfbb, nodeBuffer->pageBlocknum);
00241
00242
00243 gistAddLoadedBuffer(gfbb, nodeBuffer);
00244 nodeBuffer->pageBlocknum = InvalidBlockNumber;
00245 }
00246 }
00247
00248
00249
00250
00251 static void
00252 gistUnloadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
00253 {
00254
00255 if (nodeBuffer->pageBuffer)
00256 {
00257 BlockNumber blkno;
00258
00259
00260 blkno = gistBuffersGetFreeBlock(gfbb);
00261
00262
00263 WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
00264
00265
00266 pfree(nodeBuffer->pageBuffer);
00267 nodeBuffer->pageBuffer = NULL;
00268
00269
00270 nodeBuffer->pageBlocknum = blkno;
00271 }
00272 }
00273
00274
00275
00276
00277 void
00278 gistUnloadNodeBuffers(GISTBuildBuffers *gfbb)
00279 {
00280 int i;
00281
00282
00283 for (i = 0; i < gfbb->loadedBuffersCount; i++)
00284 gistUnloadNodeBuffer(gfbb, gfbb->loadedBuffers[i]);
00285
00286
00287 gfbb->loadedBuffersCount = 0;
00288 }
00289
00290
00291
00292
00293 static void
00294 gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer, IndexTuple itup)
00295 {
00296 Size itupsz = IndexTupleSize(itup);
00297 char *ptr;
00298
00299
00300 Assert(PAGE_FREE_SPACE(pageBuffer) >= MAXALIGN(itupsz));
00301
00302
00303 PAGE_FREE_SPACE(pageBuffer) -= MAXALIGN(itupsz);
00304
00305
00306 ptr = (char *) pageBuffer + BUFFER_PAGE_DATA_OFFSET
00307 + PAGE_FREE_SPACE(pageBuffer);
00308
00309
00310 memcpy(ptr, itup, itupsz);
00311 }
00312
00313
00314
00315
00316 static void
00317 gistGetItupFromPage(GISTNodeBufferPage *pageBuffer, IndexTuple *itup)
00318 {
00319 IndexTuple ptr;
00320 Size itupsz;
00321
00322 Assert(!PAGE_IS_EMPTY(pageBuffer));
00323
00324
00325 ptr = (IndexTuple) ((char *) pageBuffer
00326 + BUFFER_PAGE_DATA_OFFSET
00327 + PAGE_FREE_SPACE(pageBuffer));
00328 itupsz = IndexTupleSize(ptr);
00329
00330
00331 *itup = (IndexTuple) palloc(itupsz);
00332 memcpy(*itup, ptr, itupsz);
00333
00334
00335 PAGE_FREE_SPACE(pageBuffer) += MAXALIGN(itupsz);
00336 }
00337
00338
00339
00340
00341 void
00342 gistPushItupToNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer,
00343 IndexTuple itup)
00344 {
00345
00346
00347
00348
00349 MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
00350
00351
00352
00353
00354 if (nodeBuffer->blocksCount == 0)
00355 {
00356 nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
00357 nodeBuffer->blocksCount = 1;
00358 gistAddLoadedBuffer(gfbb, nodeBuffer);
00359 }
00360
00361
00362 if (!nodeBuffer->pageBuffer)
00363 gistLoadNodeBuffer(gfbb, nodeBuffer);
00364
00365
00366
00367
00368 if (PAGE_NO_SPACE(nodeBuffer->pageBuffer, itup))
00369 {
00370
00371
00372
00373 BlockNumber blkno;
00374
00375
00376 blkno = gistBuffersGetFreeBlock(gfbb);
00377 WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
00378
00379
00380
00381
00382
00383 PAGE_FREE_SPACE(nodeBuffer->pageBuffer) =
00384 BLCKSZ - MAXALIGN(offsetof(GISTNodeBufferPage, tupledata));
00385 nodeBuffer->pageBuffer->prev = blkno;
00386
00387
00388 nodeBuffer->blocksCount++;
00389 }
00390
00391 gistPlaceItupToPage(nodeBuffer->pageBuffer, itup);
00392
00393
00394
00395
00396 if (BUFFER_HALF_FILLED(nodeBuffer, gfbb) && !nodeBuffer->queuedForEmptying)
00397 {
00398 gfbb->bufferEmptyingQueue = lcons(nodeBuffer,
00399 gfbb->bufferEmptyingQueue);
00400 nodeBuffer->queuedForEmptying = true;
00401 }
00402
00403
00404 MemoryContextSwitchTo(oldcxt);
00405 }
00406
00407
00408
00409
00410
00411 bool
00412 gistPopItupFromNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer,
00413 IndexTuple *itup)
00414 {
00415
00416
00417
00418 if (nodeBuffer->blocksCount <= 0)
00419 return false;
00420
00421
00422 if (!nodeBuffer->pageBuffer)
00423 gistLoadNodeBuffer(gfbb, nodeBuffer);
00424
00425
00426
00427
00428 gistGetItupFromPage(nodeBuffer->pageBuffer, itup);
00429
00430
00431
00432
00433
00434 if (PAGE_IS_EMPTY(nodeBuffer->pageBuffer))
00435 {
00436 BlockNumber prevblkno;
00437
00438
00439
00440
00441 nodeBuffer->blocksCount--;
00442
00443
00444
00445
00446 prevblkno = nodeBuffer->pageBuffer->prev;
00447 if (prevblkno != InvalidBlockNumber)
00448 {
00449
00450 Assert(nodeBuffer->blocksCount > 0);
00451 ReadTempFileBlock(gfbb->pfile, prevblkno, nodeBuffer->pageBuffer);
00452
00453
00454
00455
00456
00457 gistBuffersReleaseBlock(gfbb, prevblkno);
00458 }
00459 else
00460 {
00461
00462 Assert(nodeBuffer->blocksCount == 0);
00463 pfree(nodeBuffer->pageBuffer);
00464 nodeBuffer->pageBuffer = NULL;
00465 }
00466 }
00467 return true;
00468 }
00469
00470
00471
00472
00473 static long
00474 gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb)
00475 {
00476
00477
00478
00479
00480
00481 if (gfbb->nFreeBlocks > 0)
00482 return gfbb->freeBlocks[--gfbb->nFreeBlocks];
00483 else
00484 return gfbb->nFileBlocks++;
00485 }
00486
00487
00488
00489
00490 static void
00491 gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum)
00492 {
00493 int ndx;
00494
00495
00496 if (gfbb->nFreeBlocks >= gfbb->freeBlocksLen)
00497 {
00498 gfbb->freeBlocksLen *= 2;
00499 gfbb->freeBlocks = (long *) repalloc(gfbb->freeBlocks,
00500 gfbb->freeBlocksLen *
00501 sizeof(long));
00502 }
00503
00504
00505 ndx = gfbb->nFreeBlocks++;
00506 gfbb->freeBlocks[ndx] = blocknum;
00507 }
00508
00509
00510
00511
00512 void
00513 gistFreeBuildBuffers(GISTBuildBuffers *gfbb)
00514 {
00515
00516 BufFileClose(gfbb->pfile);
00517
00518
00519 }
00520
00521
00522
00523
00524
00525 typedef struct
00526 {
00527 GISTENTRY entry[INDEX_MAX_KEYS];
00528 bool isnull[INDEX_MAX_KEYS];
00529 GISTPageSplitInfo *splitinfo;
00530 GISTNodeBuffer *nodeBuffer;
00531 } RelocationBufferInfo;
00532
00533
00534
00535
00536
00537
00538 void
00539 gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
00540 Relation r, int level,
00541 Buffer buffer, List *splitinfo)
00542 {
00543 RelocationBufferInfo *relocationBuffersInfos;
00544 bool found;
00545 GISTNodeBuffer *nodeBuffer;
00546 BlockNumber blocknum;
00547 IndexTuple itup;
00548 int splitPagesCount = 0,
00549 i;
00550 GISTENTRY entry[INDEX_MAX_KEYS];
00551 bool isnull[INDEX_MAX_KEYS];
00552 GISTNodeBuffer oldBuf;
00553 ListCell *lc;
00554
00555
00556 if (!LEVEL_HAS_BUFFERS(level, gfbb))
00557 return;
00558
00559
00560
00561
00562 blocknum = BufferGetBlockNumber(buffer);
00563 nodeBuffer = hash_search(gfbb->nodeBuffersTab, &blocknum,
00564 HASH_FIND, &found);
00565 if (!found)
00566 {
00567
00568 return;
00569 }
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579 Assert(blocknum != GIST_ROOT_BLKNO);
00580 memcpy(&oldBuf, nodeBuffer, sizeof(GISTNodeBuffer));
00581 oldBuf.isTemp = true;
00582
00583
00584 nodeBuffer->blocksCount = 0;
00585 nodeBuffer->pageBuffer = NULL;
00586 nodeBuffer->pageBlocknum = InvalidBlockNumber;
00587
00588
00589
00590
00591 splitPagesCount = list_length(splitinfo);
00592 relocationBuffersInfos =
00593 (RelocationBufferInfo *) palloc(sizeof(RelocationBufferInfo) *
00594 splitPagesCount);
00595
00596
00597
00598
00599
00600 i = 0;
00601 foreach(lc, splitinfo)
00602 {
00603 GISTPageSplitInfo *si = (GISTPageSplitInfo *) lfirst(lc);
00604 GISTNodeBuffer *newNodeBuffer;
00605
00606
00607 gistDeCompressAtt(giststate, r,
00608 si->downlink, NULL, (OffsetNumber) 0,
00609 relocationBuffersInfos[i].entry,
00610 relocationBuffersInfos[i].isnull);
00611
00612
00613
00614
00615
00616
00617
00618
00619 newNodeBuffer = gistGetNodeBuffer(gfbb, giststate, BufferGetBlockNumber(si->buf), level);
00620
00621 relocationBuffersInfos[i].nodeBuffer = newNodeBuffer;
00622 relocationBuffersInfos[i].splitinfo = si;
00623
00624 i++;
00625 }
00626
00627
00628
00629
00630
00631
00632
00633
00634
00635
00636 while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup))
00637 {
00638 float best_penalty[INDEX_MAX_KEYS];
00639 int i,
00640 which;
00641 IndexTuple newtup;
00642 RelocationBufferInfo *targetBufferInfo;
00643
00644 gistDeCompressAtt(giststate, r,
00645 itup, NULL, (OffsetNumber) 0, entry, isnull);
00646
00647
00648 which = 0;
00649
00650
00651
00652
00653
00654
00655 best_penalty[0] = -1;
00656
00657
00658
00659
00660
00661 for (i = 0; i < splitPagesCount; i++)
00662 {
00663 RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i];
00664 bool zero_penalty;
00665 int j;
00666
00667 zero_penalty = true;
00668
00669
00670 for (j = 0; j < r->rd_att->natts; j++)
00671 {
00672 float usize;
00673
00674
00675 usize = gistpenalty(giststate, j,
00676 &splitPageInfo->entry[j],
00677 splitPageInfo->isnull[j],
00678 &entry[j], isnull[j]);
00679 if (usize > 0)
00680 zero_penalty = false;
00681
00682 if (best_penalty[j] < 0 || usize < best_penalty[j])
00683 {
00684
00685
00686
00687
00688
00689
00690
00691
00692
00693 which = i;
00694 best_penalty[j] = usize;
00695
00696 if (j < r->rd_att->natts - 1)
00697 best_penalty[j + 1] = -1;
00698 }
00699 else if (best_penalty[j] == usize)
00700 {
00701
00702
00703
00704
00705
00706 }
00707 else
00708 {
00709
00710
00711
00712
00713
00714 zero_penalty = false;
00715 break;
00716 }
00717 }
00718
00719
00720
00721
00722
00723
00724 if (zero_penalty)
00725 break;
00726 }
00727
00728
00729 targetBufferInfo = &relocationBuffersInfos[which];
00730
00731
00732 gistPushItupToNodeBuffer(gfbb, targetBufferInfo->nodeBuffer, itup);
00733
00734
00735 newtup = gistgetadjusted(r, targetBufferInfo->splitinfo->downlink,
00736 itup, giststate);
00737 if (newtup)
00738 {
00739 gistDeCompressAtt(giststate, r,
00740 newtup, NULL, (OffsetNumber) 0,
00741 targetBufferInfo->entry,
00742 targetBufferInfo->isnull);
00743
00744 targetBufferInfo->splitinfo->downlink = newtup;
00745 }
00746 }
00747
00748 pfree(relocationBuffersInfos);
00749 }
00750
00751
00752
00753
00754
00755
00756
00757
00758 static void
00759 ReadTempFileBlock(BufFile *file, long blknum, void *ptr)
00760 {
00761 if (BufFileSeekBlock(file, blknum) != 0)
00762 elog(ERROR, "could not seek temporary file: %m");
00763 if (BufFileRead(file, ptr, BLCKSZ) != BLCKSZ)
00764 elog(ERROR, "could not read temporary file: %m");
00765 }
00766
00767 static void
00768 WriteTempFileBlock(BufFile *file, long blknum, void *ptr)
00769 {
00770 if (BufFileSeekBlock(file, blknum) != 0)
00771 elog(ERROR, "could not seek temporary file: %m");
00772 if (BufFileWrite(file, ptr, BLCKSZ) != BLCKSZ)
00773 {
00774
00775
00776
00777
00778 ereport(ERROR,
00779 (errcode_for_file_access(),
00780 errmsg("could not write block %ld of temporary file: %m",
00781 blknum)));
00782 }
00783 }