Header And Logo

PostgreSQL
| The world's most advanced open source database.

localbuf.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * localbuf.c
00004  *    local buffer manager. Fast buffer manager for temporary tables,
00005  *    which never need to be WAL-logged or checkpointed, etc.
00006  *
00007  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00008  * Portions Copyright (c) 1994-5, Regents of the University of California
00009  *
00010  *
00011  * IDENTIFICATION
00012  *    src/backend/storage/buffer/localbuf.c
00013  *
00014  *-------------------------------------------------------------------------
00015  */
00016 #include "postgres.h"
00017 
00018 #include "catalog/catalog.h"
00019 #include "common/relpath.h"
00020 #include "executor/instrument.h"
00021 #include "storage/buf_internals.h"
00022 #include "storage/bufmgr.h"
00023 #include "utils/guc.h"
00024 #include "utils/memutils.h"
00025 #include "utils/resowner_private.h"
00026 
00027 
00028 /*#define LBDEBUG*/
00029 
00030 /* entry for buffer lookup hashtable */
00031 typedef struct
00032 {
00033     BufferTag   key;            /* Tag of a disk page */
00034     int         id;             /* Associated local buffer's index */
00035 } LocalBufferLookupEnt;
00036 
00037 /* Note: this macro only works on local buffers, not shared ones! */
00038 #define LocalBufHdrGetBlock(bufHdr) \
00039     LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
00040 
00041 int         NLocBuffer = 0;     /* until buffers are initialized */
00042 
00043 BufferDesc *LocalBufferDescriptors = NULL;
00044 Block      *LocalBufferBlockPointers = NULL;
00045 int32      *LocalRefCount = NULL;
00046 
00047 static int  nextFreeLocalBuf = 0;
00048 
00049 static HTAB *LocalBufHash = NULL;
00050 
00051 
00052 static void InitLocalBuffers(void);
00053 static Block GetLocalBufferStorage(void);
00054 
00055 
00056 /*
00057  * LocalPrefetchBuffer -
00058  *    initiate asynchronous read of a block of a relation
00059  *
00060  * Do PrefetchBuffer's work for temporary relations.
00061  * No-op if prefetching isn't compiled in.
00062  */
00063 void
00064 LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum,
00065                     BlockNumber blockNum)
00066 {
00067 #ifdef USE_PREFETCH
00068     BufferTag   newTag;         /* identity of requested block */
00069     LocalBufferLookupEnt *hresult;
00070 
00071     INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
00072 
00073     /* Initialize local buffers if first request in this session */
00074     if (LocalBufHash == NULL)
00075         InitLocalBuffers();
00076 
00077     /* See if the desired buffer already exists */
00078     hresult = (LocalBufferLookupEnt *)
00079         hash_search(LocalBufHash, (void *) &newTag, HASH_FIND, NULL);
00080 
00081     if (hresult)
00082     {
00083         /* Yes, so nothing to do */
00084         return;
00085     }
00086 
00087     /* Not in buffers, so initiate prefetch */
00088     smgrprefetch(smgr, forkNum, blockNum);
00089 #endif   /* USE_PREFETCH */
00090 }
00091 
00092 
00093 /*
00094  * LocalBufferAlloc -
00095  *    Find or create a local buffer for the given page of the given relation.
00096  *
00097  * API is similar to bufmgr.c's BufferAlloc, except that we do not need
00098  * to do any locking since this is all local.   Also, IO_IN_PROGRESS
00099  * does not get set.  Lastly, we support only default access strategy
00100  * (hence, usage_count is always advanced).
00101  */
00102 BufferDesc *
00103 LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
00104                  bool *foundPtr)
00105 {
00106     BufferTag   newTag;         /* identity of requested block */
00107     LocalBufferLookupEnt *hresult;
00108     BufferDesc *bufHdr;
00109     int         b;
00110     int         trycounter;
00111     bool        found;
00112 
00113     INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
00114 
00115     /* Initialize local buffers if first request in this session */
00116     if (LocalBufHash == NULL)
00117         InitLocalBuffers();
00118 
00119     /* See if the desired buffer already exists */
00120     hresult = (LocalBufferLookupEnt *)
00121         hash_search(LocalBufHash, (void *) &newTag, HASH_FIND, NULL);
00122 
00123     if (hresult)
00124     {
00125         b = hresult->id;
00126         bufHdr = &LocalBufferDescriptors[b];
00127         Assert(BUFFERTAGS_EQUAL(bufHdr->tag, newTag));
00128 #ifdef LBDEBUG
00129         fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n",
00130                 smgr->smgr_rnode.node.relNode, forkNum, blockNum, -b - 1);
00131 #endif
00132         /* this part is equivalent to PinBuffer for a shared buffer */
00133         if (LocalRefCount[b] == 0)
00134         {
00135             if (bufHdr->usage_count < BM_MAX_USAGE_COUNT)
00136                 bufHdr->usage_count++;
00137         }
00138         LocalRefCount[b]++;
00139         ResourceOwnerRememberBuffer(CurrentResourceOwner,
00140                                     BufferDescriptorGetBuffer(bufHdr));
00141         if (bufHdr->flags & BM_VALID)
00142             *foundPtr = TRUE;
00143         else
00144         {
00145             /* Previous read attempt must have failed; try again */
00146             *foundPtr = FALSE;
00147         }
00148         return bufHdr;
00149     }
00150 
00151 #ifdef LBDEBUG
00152     fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n",
00153             smgr->smgr_rnode.node.relNode, forkNum, blockNum,
00154             -nextFreeLocalBuf - 1);
00155 #endif
00156 
00157     /*
00158      * Need to get a new buffer.  We use a clock sweep algorithm (essentially
00159      * the same as what freelist.c does now...)
00160      */
00161     trycounter = NLocBuffer;
00162     for (;;)
00163     {
00164         b = nextFreeLocalBuf;
00165 
00166         if (++nextFreeLocalBuf >= NLocBuffer)
00167             nextFreeLocalBuf = 0;
00168 
00169         bufHdr = &LocalBufferDescriptors[b];
00170 
00171         if (LocalRefCount[b] == 0)
00172         {
00173             if (bufHdr->usage_count > 0)
00174             {
00175                 bufHdr->usage_count--;
00176                 trycounter = NLocBuffer;
00177             }
00178             else
00179             {
00180                 /* Found a usable buffer */
00181                 LocalRefCount[b]++;
00182                 ResourceOwnerRememberBuffer(CurrentResourceOwner,
00183                                           BufferDescriptorGetBuffer(bufHdr));
00184                 break;
00185             }
00186         }
00187         else if (--trycounter == 0)
00188             ereport(ERROR,
00189                     (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
00190                      errmsg("no empty local buffer available")));
00191     }
00192 
00193     /*
00194      * this buffer is not referenced but it might still be dirty. if that's
00195      * the case, write it out before reusing it!
00196      */
00197     if (bufHdr->flags & BM_DIRTY)
00198     {
00199         SMgrRelation    oreln;
00200         Page            localpage = (char *) LocalBufHdrGetBlock(bufHdr);
00201 
00202         /* Find smgr relation for buffer */
00203         oreln = smgropen(bufHdr->tag.rnode, MyBackendId);
00204 
00205         PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
00206 
00207         /* And write... */
00208         smgrwrite(oreln,
00209                   bufHdr->tag.forkNum,
00210                   bufHdr->tag.blockNum,
00211                   localpage,
00212                   false);
00213 
00214         /* Mark not-dirty now in case we error out below */
00215         bufHdr->flags &= ~BM_DIRTY;
00216 
00217         pgBufferUsage.local_blks_written++;
00218     }
00219 
00220     /*
00221      * lazy memory allocation: allocate space on first use of a buffer.
00222      */
00223     if (LocalBufHdrGetBlock(bufHdr) == NULL)
00224     {
00225         /* Set pointer for use by BufferGetBlock() macro */
00226         LocalBufHdrGetBlock(bufHdr) = GetLocalBufferStorage();
00227     }
00228 
00229     /*
00230      * Update the hash table: remove old entry, if any, and make new one.
00231      */
00232     if (bufHdr->flags & BM_TAG_VALID)
00233     {
00234         hresult = (LocalBufferLookupEnt *)
00235             hash_search(LocalBufHash, (void *) &bufHdr->tag,
00236                         HASH_REMOVE, NULL);
00237         if (!hresult)           /* shouldn't happen */
00238             elog(ERROR, "local buffer hash table corrupted");
00239         /* mark buffer invalid just in case hash insert fails */
00240         CLEAR_BUFFERTAG(bufHdr->tag);
00241         bufHdr->flags &= ~(BM_VALID | BM_TAG_VALID);
00242     }
00243 
00244     hresult = (LocalBufferLookupEnt *)
00245         hash_search(LocalBufHash, (void *) &newTag, HASH_ENTER, &found);
00246     if (found)                  /* shouldn't happen */
00247         elog(ERROR, "local buffer hash table corrupted");
00248     hresult->id = b;
00249 
00250     /*
00251      * it's all ours now.
00252      */
00253     bufHdr->tag = newTag;
00254     bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
00255     bufHdr->flags |= BM_TAG_VALID;
00256     bufHdr->usage_count = 1;
00257 
00258     *foundPtr = FALSE;
00259     return bufHdr;
00260 }
00261 
00262 /*
00263  * MarkLocalBufferDirty -
00264  *    mark a local buffer dirty
00265  */
00266 void
00267 MarkLocalBufferDirty(Buffer buffer)
00268 {
00269     int         bufid;
00270     BufferDesc *bufHdr;
00271 
00272     Assert(BufferIsLocal(buffer));
00273 
00274 #ifdef LBDEBUG
00275     fprintf(stderr, "LB DIRTY %d\n", buffer);
00276 #endif
00277 
00278     bufid = -(buffer + 1);
00279 
00280     Assert(LocalRefCount[bufid] > 0);
00281 
00282     bufHdr = &LocalBufferDescriptors[bufid];
00283 
00284     if (!(bufHdr->flags & BM_DIRTY))
00285         pgBufferUsage.local_blks_dirtied++;
00286 
00287     bufHdr->flags |= BM_DIRTY;
00288 }
00289 
00290 /*
00291  * DropRelFileNodeLocalBuffers
00292  *      This function removes from the buffer pool all the pages of the
00293  *      specified relation that have block numbers >= firstDelBlock.
00294  *      (In particular, with firstDelBlock = 0, all pages are removed.)
00295  *      Dirty pages are simply dropped, without bothering to write them
00296  *      out first.  Therefore, this is NOT rollback-able, and so should be
00297  *      used only with extreme caution!
00298  *
00299  *      See DropRelFileNodeBuffers in bufmgr.c for more notes.
00300  */
00301 void
00302 DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
00303                             BlockNumber firstDelBlock)
00304 {
00305     int         i;
00306 
00307     for (i = 0; i < NLocBuffer; i++)
00308     {
00309         BufferDesc *bufHdr = &LocalBufferDescriptors[i];
00310         LocalBufferLookupEnt *hresult;
00311 
00312         if ((bufHdr->flags & BM_TAG_VALID) &&
00313             RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
00314             bufHdr->tag.forkNum == forkNum &&
00315             bufHdr->tag.blockNum >= firstDelBlock)
00316         {
00317             if (LocalRefCount[i] != 0)
00318                 elog(ERROR, "block %u of %s is still referenced (local %u)",
00319                      bufHdr->tag.blockNum,
00320                      relpathbackend(bufHdr->tag.rnode, MyBackendId,
00321                                     bufHdr->tag.forkNum),
00322                      LocalRefCount[i]);
00323             /* Remove entry from hashtable */
00324             hresult = (LocalBufferLookupEnt *)
00325                 hash_search(LocalBufHash, (void *) &bufHdr->tag,
00326                             HASH_REMOVE, NULL);
00327             if (!hresult)       /* shouldn't happen */
00328                 elog(ERROR, "local buffer hash table corrupted");
00329             /* Mark buffer invalid */
00330             CLEAR_BUFFERTAG(bufHdr->tag);
00331             bufHdr->flags = 0;
00332             bufHdr->usage_count = 0;
00333         }
00334     }
00335 }
00336 
00337 /*
00338  * DropRelFileNodeAllLocalBuffers
00339  *      This function removes from the buffer pool all pages of all forks
00340  *      of the specified relation.
00341  *
00342  *      See DropRelFileNodeAllBuffers in bufmgr.c for more notes.
00343  */
00344 void
00345 DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
00346 {
00347     int         i;
00348 
00349     for (i = 0; i < NLocBuffer; i++)
00350     {
00351         BufferDesc *bufHdr = &LocalBufferDescriptors[i];
00352         LocalBufferLookupEnt *hresult;
00353 
00354         if ((bufHdr->flags & BM_TAG_VALID) &&
00355             RelFileNodeEquals(bufHdr->tag.rnode, rnode))
00356         {
00357             if (LocalRefCount[i] != 0)
00358                 elog(ERROR, "block %u of %s is still referenced (local %u)",
00359                      bufHdr->tag.blockNum,
00360                      relpathbackend(bufHdr->tag.rnode, MyBackendId,
00361                                     bufHdr->tag.forkNum),
00362                      LocalRefCount[i]);
00363             /* Remove entry from hashtable */
00364             hresult = (LocalBufferLookupEnt *)
00365                 hash_search(LocalBufHash, (void *) &bufHdr->tag,
00366                             HASH_REMOVE, NULL);
00367             if (!hresult)       /* shouldn't happen */
00368                 elog(ERROR, "local buffer hash table corrupted");
00369             /* Mark buffer invalid */
00370             CLEAR_BUFFERTAG(bufHdr->tag);
00371             bufHdr->flags = 0;
00372             bufHdr->usage_count = 0;
00373         }
00374     }
00375 }
00376 
00377 /*
00378  * InitLocalBuffers -
00379  *    init the local buffer cache. Since most queries (esp. multi-user ones)
00380  *    don't involve local buffers, we delay allocating actual memory for the
00381  *    buffers until we need them; just make the buffer headers here.
00382  */
00383 static void
00384 InitLocalBuffers(void)
00385 {
00386     int         nbufs = num_temp_buffers;
00387     HASHCTL     info;
00388     int         i;
00389 
00390     /* Allocate and zero buffer headers and auxiliary arrays */
00391     LocalBufferDescriptors = (BufferDesc *) calloc(nbufs, sizeof(BufferDesc));
00392     LocalBufferBlockPointers = (Block *) calloc(nbufs, sizeof(Block));
00393     LocalRefCount = (int32 *) calloc(nbufs, sizeof(int32));
00394     if (!LocalBufferDescriptors || !LocalBufferBlockPointers || !LocalRefCount)
00395         ereport(FATAL,
00396                 (errcode(ERRCODE_OUT_OF_MEMORY),
00397                  errmsg("out of memory")));
00398 
00399     nextFreeLocalBuf = 0;
00400 
00401     /* initialize fields that need to start off nonzero */
00402     for (i = 0; i < nbufs; i++)
00403     {
00404         BufferDesc *buf = &LocalBufferDescriptors[i];
00405 
00406         /*
00407          * negative to indicate local buffer. This is tricky: shared buffers
00408          * start with 0. We have to start with -2. (Note that the routine
00409          * BufferDescriptorGetBuffer adds 1 to buf_id so our first buffer id
00410          * is -1.)
00411          */
00412         buf->buf_id = -i - 2;
00413     }
00414 
00415     /* Create the lookup hash table */
00416     MemSet(&info, 0, sizeof(info));
00417     info.keysize = sizeof(BufferTag);
00418     info.entrysize = sizeof(LocalBufferLookupEnt);
00419     info.hash = tag_hash;
00420 
00421     LocalBufHash = hash_create("Local Buffer Lookup Table",
00422                                nbufs,
00423                                &info,
00424                                HASH_ELEM | HASH_FUNCTION);
00425 
00426     if (!LocalBufHash)
00427         elog(ERROR, "could not initialize local buffer hash table");
00428 
00429     /* Initialization done, mark buffers allocated */
00430     NLocBuffer = nbufs;
00431 }
00432 
00433 /*
00434  * GetLocalBufferStorage - allocate memory for a local buffer
00435  *
00436  * The idea of this function is to aggregate our requests for storage
00437  * so that the memory manager doesn't see a whole lot of relatively small
00438  * requests.  Since we'll never give back a local buffer once it's created
00439  * within a particular process, no point in burdening memmgr with separately
00440  * managed chunks.
00441  */
00442 static Block
00443 GetLocalBufferStorage(void)
00444 {
00445     static char *cur_block = NULL;
00446     static int  next_buf_in_block = 0;
00447     static int  num_bufs_in_block = 0;
00448     static int  total_bufs_allocated = 0;
00449     static MemoryContext LocalBufferContext = NULL;
00450 
00451     char       *this_buf;
00452 
00453     Assert(total_bufs_allocated < NLocBuffer);
00454 
00455     if (next_buf_in_block >= num_bufs_in_block)
00456     {
00457         /* Need to make a new request to memmgr */
00458         int         num_bufs;
00459 
00460         /*
00461          * We allocate local buffers in a context of their own, so that the
00462          * space eaten for them is easily recognizable in MemoryContextStats
00463          * output.  Create the context on first use.
00464          */
00465         if (LocalBufferContext == NULL)
00466             LocalBufferContext =
00467                 AllocSetContextCreate(TopMemoryContext,
00468                                       "LocalBufferContext",
00469                                       ALLOCSET_DEFAULT_MINSIZE,
00470                                       ALLOCSET_DEFAULT_INITSIZE,
00471                                       ALLOCSET_DEFAULT_MAXSIZE);
00472 
00473         /* Start with a 16-buffer request; subsequent ones double each time */
00474         num_bufs = Max(num_bufs_in_block * 2, 16);
00475         /* But not more than what we need for all remaining local bufs */
00476         num_bufs = Min(num_bufs, NLocBuffer - total_bufs_allocated);
00477         /* And don't overflow MaxAllocSize, either */
00478         num_bufs = Min(num_bufs, MaxAllocSize / BLCKSZ);
00479 
00480         cur_block = (char *) MemoryContextAlloc(LocalBufferContext,
00481                                                 num_bufs * BLCKSZ);
00482         next_buf_in_block = 0;
00483         num_bufs_in_block = num_bufs;
00484     }
00485 
00486     /* Allocate next buffer in current memory block */
00487     this_buf = cur_block + next_buf_in_block * BLCKSZ;
00488     next_buf_in_block++;
00489     total_bufs_allocated++;
00490 
00491     return (Block) this_buf;
00492 }
00493 
00494 /*
00495  * AtEOXact_LocalBuffers - clean up at end of transaction.
00496  *
00497  * This is just like AtEOXact_Buffers, but for local buffers.
00498  */
00499 void
00500 AtEOXact_LocalBuffers(bool isCommit)
00501 {
00502 #ifdef USE_ASSERT_CHECKING
00503     if (assert_enabled && LocalRefCount)
00504     {
00505         int         RefCountErrors = 0;
00506         int         i;
00507 
00508         for (i = 0; i < NLocBuffer; i++)
00509         {
00510             if (LocalRefCount[i] != 0)
00511             {
00512                 Buffer  b = -i - 1;
00513 
00514                 PrintBufferLeakWarning(b);
00515                 RefCountErrors++;
00516             }
00517         }
00518         Assert(RefCountErrors == 0);
00519     }
00520 #endif
00521 }
00522 
00523 /*
00524  * AtProcExit_LocalBuffers - ensure we have dropped pins during backend exit.
00525  *
00526  * This is just like AtProcExit_Buffers, but for local buffers.  We shouldn't
00527  * be holding any remaining pins; if we are, and assertions aren't enabled,
00528  * we'll fail later in DropRelFileNodeBuffers while trying to drop the temp
00529  * rels.
00530  */
00531 void
00532 AtProcExit_LocalBuffers(void)
00533 {
00534 #ifdef USE_ASSERT_CHECKING
00535     if (assert_enabled && LocalRefCount)
00536     {
00537         int         RefCountErrors = 0;
00538         int         i;
00539 
00540         for (i = 0; i < NLocBuffer; i++)
00541         {
00542             if (LocalRefCount[i] != 0)
00543             {
00544                 Buffer  b = -i - 1;
00545 
00546                 PrintBufferLeakWarning(b);
00547                 RefCountErrors++;
00548             }
00549         }
00550         Assert(RefCountErrors == 0);
00551     }
00552 #endif
00553 }