00001 /*------------------------------------------------------------------------- 00002 * 00003 * visibilitymap.c 00004 * bitmap for tracking visibility of heap tuples 00005 * 00006 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group 00007 * Portions Copyright (c) 1994, Regents of the University of California 00008 * 00009 * 00010 * IDENTIFICATION 00011 * src/backend/access/heap/visibilitymap.c 00012 * 00013 * INTERFACE ROUTINES 00014 * visibilitymap_clear - clear a bit in the visibility map 00015 * visibilitymap_pin - pin a map page for setting a bit 00016 * visibilitymap_pin_ok - check whether correct map page is already pinned 00017 * visibilitymap_set - set a bit in a previously pinned page 00018 * visibilitymap_test - test if a bit is set 00019 * visibilitymap_count - count number of bits set in visibility map 00020 * visibilitymap_truncate - truncate the visibility map 00021 * 00022 * NOTES 00023 * 00024 * The visibility map is a bitmap with one bit per heap page. A set bit means 00025 * that all tuples on the page are known visible to all transactions, and 00026 * therefore the page doesn't need to be vacuumed. The map is conservative in 00027 * the sense that we make sure that whenever a bit is set, we know the 00028 * condition is true, but if a bit is not set, it might or might not be true. 00029 * 00030 * Clearing a visibility map bit is not separately WAL-logged. The callers 00031 * must make sure that whenever a bit is cleared, the bit is cleared on WAL 00032 * replay of the updating operation as well. 00033 * 00034 * When we *set* a visibility map during VACUUM, we must write WAL. This may 00035 * seem counterintuitive, since the bit is basically a hint: if it is clear, 00036 * it may still be the case that every tuple on the page is visible to all 00037 * transactions; we just don't know that for certain. The difficulty is that 00038 * there are two bits which are typically set together: the PD_ALL_VISIBLE bit 00039 * on the page itself, and the visibility map bit. If a crash occurs after the 00040 * visibility map page makes it to disk and before the updated heap page makes 00041 * it to disk, redo must set the bit on the heap page. Otherwise, the next 00042 * insert, update, or delete on the heap page will fail to realize that the 00043 * visibility map bit must be cleared, possibly causing index-only scans to 00044 * return wrong answers. 00045 * 00046 * VACUUM will normally skip pages for which the visibility map bit is set; 00047 * such pages can't contain any dead tuples and therefore don't need vacuuming. 00048 * The visibility map is not used for anti-wraparound vacuums, because 00049 * an anti-wraparound vacuum needs to freeze tuples and observe the latest xid 00050 * present in the table, even on pages that don't have any dead tuples. 00051 * 00052 * LOCKING 00053 * 00054 * In heapam.c, whenever a page is modified so that not all tuples on the 00055 * page are visible to everyone anymore, the corresponding bit in the 00056 * visibility map is cleared. In order to be crash-safe, we need to do this 00057 * while still holding a lock on the heap page and in the same critical 00058 * section that logs the page modification. However, we don't want to hold 00059 * the buffer lock over any I/O that may be required to read in the visibility 00060 * map page. To avoid this, we examine the heap page before locking it; 00061 * if the page-level PD_ALL_VISIBLE bit is set, we pin the visibility map 00062 * bit. Then, we lock the buffer. But this creates a race condition: there 00063 * is a possibility that in the time it takes to lock the buffer, the 00064 * PD_ALL_VISIBLE bit gets set. If that happens, we have to unlock the 00065 * buffer, pin the visibility map page, and relock the buffer. This shouldn't 00066 * happen often, because only VACUUM currently sets visibility map bits, 00067 * and the race will only occur if VACUUM processes a given page at almost 00068 * exactly the same time that someone tries to further modify it. 00069 * 00070 * To set a bit, you need to hold a lock on the heap page. That prevents 00071 * the race condition where VACUUM sees that all tuples on the page are 00072 * visible to everyone, but another backend modifies the page before VACUUM 00073 * sets the bit in the visibility map. 00074 * 00075 * When a bit is set, the LSN of the visibility map page is updated to make 00076 * sure that the visibility map update doesn't get written to disk before the 00077 * WAL record of the changes that made it possible to set the bit is flushed. 00078 * But when a bit is cleared, we don't have to do that because it's always 00079 * safe to clear a bit in the map from correctness point of view. 00080 * 00081 *------------------------------------------------------------------------- 00082 */ 00083 #include "postgres.h" 00084 00085 #include "access/heapam_xlog.h" 00086 #include "access/visibilitymap.h" 00087 #include "miscadmin.h" 00088 #include "storage/bufmgr.h" 00089 #include "storage/lmgr.h" 00090 #include "storage/smgr.h" 00091 #include "utils/inval.h" 00092 00093 00094 /*#define TRACE_VISIBILITYMAP */ 00095 00096 /* 00097 * Size of the bitmap on each visibility map page, in bytes. There's no 00098 * extra headers, so the whole page minus the standard page header is 00099 * used for the bitmap. 00100 */ 00101 #define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData)) 00102 00103 /* Number of bits allocated for each heap block. */ 00104 #define BITS_PER_HEAPBLOCK 1 00105 00106 /* Number of heap blocks we can represent in one byte. */ 00107 #define HEAPBLOCKS_PER_BYTE 8 00108 00109 /* Number of heap blocks we can represent in one visibility map page. */ 00110 #define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE) 00111 00112 /* Mapping from heap block number to the right bit in the visibility map */ 00113 #define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE) 00114 #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE) 00115 #define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE) 00116 00117 /* table for fast counting of set bits */ 00118 static const uint8 number_of_ones[256] = { 00119 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 00120 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00121 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00122 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00123 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00124 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00125 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00126 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00127 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00128 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00129 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00130 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00131 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00132 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00133 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00134 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 00135 }; 00136 00137 /* prototypes for internal routines */ 00138 static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend); 00139 static void vm_extend(Relation rel, BlockNumber nvmblocks); 00140 00141 00142 /* 00143 * visibilitymap_clear - clear a bit in visibility map 00144 * 00145 * You must pass a buffer containing the correct map page to this function. 00146 * Call visibilitymap_pin first to pin the right one. This function doesn't do 00147 * any I/O. 00148 */ 00149 void 00150 visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf) 00151 { 00152 BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); 00153 int mapByte = HEAPBLK_TO_MAPBYTE(heapBlk); 00154 int mapBit = HEAPBLK_TO_MAPBIT(heapBlk); 00155 uint8 mask = 1 << mapBit; 00156 char *map; 00157 00158 #ifdef TRACE_VISIBILITYMAP 00159 elog(DEBUG1, "vm_clear %s %d", RelationGetRelationName(rel), heapBlk); 00160 #endif 00161 00162 if (!BufferIsValid(buf) || BufferGetBlockNumber(buf) != mapBlock) 00163 elog(ERROR, "wrong buffer passed to visibilitymap_clear"); 00164 00165 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); 00166 map = PageGetContents(BufferGetPage(buf)); 00167 00168 if (map[mapByte] & mask) 00169 { 00170 map[mapByte] &= ~mask; 00171 00172 MarkBufferDirty(buf); 00173 } 00174 00175 LockBuffer(buf, BUFFER_LOCK_UNLOCK); 00176 } 00177 00178 /* 00179 * visibilitymap_pin - pin a map page for setting a bit 00180 * 00181 * Setting a bit in the visibility map is a two-phase operation. First, call 00182 * visibilitymap_pin, to pin the visibility map page containing the bit for 00183 * the heap page. Because that can require I/O to read the map page, you 00184 * shouldn't hold a lock on the heap page while doing that. Then, call 00185 * visibilitymap_set to actually set the bit. 00186 * 00187 * On entry, *buf should be InvalidBuffer or a valid buffer returned by 00188 * an earlier call to visibilitymap_pin or visibilitymap_test on the same 00189 * relation. On return, *buf is a valid buffer with the map page containing 00190 * the bit for heapBlk. 00191 * 00192 * If the page doesn't exist in the map file yet, it is extended. 00193 */ 00194 void 00195 visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf) 00196 { 00197 BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); 00198 00199 /* Reuse the old pinned buffer if possible */ 00200 if (BufferIsValid(*buf)) 00201 { 00202 if (BufferGetBlockNumber(*buf) == mapBlock) 00203 return; 00204 00205 ReleaseBuffer(*buf); 00206 } 00207 *buf = vm_readbuf(rel, mapBlock, true); 00208 } 00209 00210 /* 00211 * visibilitymap_pin_ok - do we already have the correct page pinned? 00212 * 00213 * On entry, buf should be InvalidBuffer or a valid buffer returned by 00214 * an earlier call to visibilitymap_pin or visibilitymap_test on the same 00215 * relation. The return value indicates whether the buffer covers the 00216 * given heapBlk. 00217 */ 00218 bool 00219 visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf) 00220 { 00221 BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); 00222 00223 return BufferIsValid(buf) && BufferGetBlockNumber(buf) == mapBlock; 00224 } 00225 00226 /* 00227 * visibilitymap_set - set a bit on a previously pinned page 00228 * 00229 * recptr is the LSN of the XLOG record we're replaying, if we're in recovery, 00230 * or InvalidXLogRecPtr in normal running. The page LSN is advanced to the 00231 * one provided; in normal running, we generate a new XLOG record and set the 00232 * page LSN to that value. cutoff_xid is the largest xmin on the page being 00233 * marked all-visible; it is needed for Hot Standby, and can be 00234 * InvalidTransactionId if the page contains no tuples. 00235 * 00236 * Caller is expected to set the heap page's PD_ALL_VISIBLE bit before calling 00237 * this function. Except in recovery, caller should also pass the heap 00238 * buffer. When checksums are enabled and we're not in recovery, we must add 00239 * the heap buffer to the WAL chain to protect it from being torn. 00240 * 00241 * You must pass a buffer containing the correct map page to this function. 00242 * Call visibilitymap_pin first to pin the right one. This function doesn't do 00243 * any I/O. 00244 */ 00245 void 00246 visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, 00247 XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid) 00248 { 00249 BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); 00250 uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk); 00251 uint8 mapBit = HEAPBLK_TO_MAPBIT(heapBlk); 00252 Page page; 00253 char *map; 00254 00255 #ifdef TRACE_VISIBILITYMAP 00256 elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk); 00257 #endif 00258 00259 Assert(InRecovery || XLogRecPtrIsInvalid(recptr)); 00260 Assert(InRecovery || BufferIsValid(heapBuf)); 00261 00262 /* Check that we have the right heap page pinned, if present */ 00263 if (BufferIsValid(heapBuf) && BufferGetBlockNumber(heapBuf) != heapBlk) 00264 elog(ERROR, "wrong heap buffer passed to visibilitymap_set"); 00265 00266 /* Check that we have the right VM page pinned */ 00267 if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock) 00268 elog(ERROR, "wrong VM buffer passed to visibilitymap_set"); 00269 00270 page = BufferGetPage(vmBuf); 00271 map = PageGetContents(page); 00272 LockBuffer(vmBuf, BUFFER_LOCK_EXCLUSIVE); 00273 00274 if (!(map[mapByte] & (1 << mapBit))) 00275 { 00276 START_CRIT_SECTION(); 00277 00278 map[mapByte] |= (1 << mapBit); 00279 MarkBufferDirty(vmBuf); 00280 00281 if (RelationNeedsWAL(rel)) 00282 { 00283 if (XLogRecPtrIsInvalid(recptr)) 00284 { 00285 Assert(!InRecovery); 00286 recptr = log_heap_visible(rel->rd_node, heapBuf, vmBuf, 00287 cutoff_xid); 00288 00289 /* 00290 * If data checksums are enabled, we need to protect the heap 00291 * page from being torn. 00292 */ 00293 if (DataChecksumsEnabled()) 00294 { 00295 Page heapPage = BufferGetPage(heapBuf); 00296 00297 /* caller is expected to set PD_ALL_VISIBLE first */ 00298 Assert(PageIsAllVisible(heapPage)); 00299 PageSetLSN(heapPage, recptr); 00300 } 00301 } 00302 PageSetLSN(page, recptr); 00303 } 00304 00305 END_CRIT_SECTION(); 00306 } 00307 00308 LockBuffer(vmBuf, BUFFER_LOCK_UNLOCK); 00309 } 00310 00311 /* 00312 * visibilitymap_test - test if a bit is set 00313 * 00314 * Are all tuples on heapBlk visible to all, according to the visibility map? 00315 * 00316 * On entry, *buf should be InvalidBuffer or a valid buffer returned by an 00317 * earlier call to visibilitymap_pin or visibilitymap_test on the same 00318 * relation. On return, *buf is a valid buffer with the map page containing 00319 * the bit for heapBlk, or InvalidBuffer. The caller is responsible for 00320 * releasing *buf after it's done testing and setting bits. 00321 * 00322 * NOTE: This function is typically called without a lock on the heap page, 00323 * so somebody else could change the bit just after we look at it. In fact, 00324 * since we don't lock the visibility map page either, it's even possible that 00325 * someone else could have changed the bit just before we look at it, but yet 00326 * we might see the old value. It is the caller's responsibility to deal with 00327 * all concurrency issues! 00328 */ 00329 bool 00330 visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf) 00331 { 00332 BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); 00333 uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk); 00334 uint8 mapBit = HEAPBLK_TO_MAPBIT(heapBlk); 00335 bool result; 00336 char *map; 00337 00338 #ifdef TRACE_VISIBILITYMAP 00339 elog(DEBUG1, "vm_test %s %d", RelationGetRelationName(rel), heapBlk); 00340 #endif 00341 00342 /* Reuse the old pinned buffer if possible */ 00343 if (BufferIsValid(*buf)) 00344 { 00345 if (BufferGetBlockNumber(*buf) != mapBlock) 00346 { 00347 ReleaseBuffer(*buf); 00348 *buf = InvalidBuffer; 00349 } 00350 } 00351 00352 if (!BufferIsValid(*buf)) 00353 { 00354 *buf = vm_readbuf(rel, mapBlock, false); 00355 if (!BufferIsValid(*buf)) 00356 return false; 00357 } 00358 00359 map = PageGetContents(BufferGetPage(*buf)); 00360 00361 /* 00362 * A single-bit read is atomic. There could be memory-ordering effects 00363 * here, but for performance reasons we make it the caller's job to worry 00364 * about that. 00365 */ 00366 result = (map[mapByte] & (1 << mapBit)) ? true : false; 00367 00368 return result; 00369 } 00370 00371 /* 00372 * visibilitymap_count - count number of bits set in visibility map 00373 * 00374 * Note: we ignore the possibility of race conditions when the table is being 00375 * extended concurrently with the call. New pages added to the table aren't 00376 * going to be marked all-visible, so they won't affect the result. 00377 */ 00378 BlockNumber 00379 visibilitymap_count(Relation rel) 00380 { 00381 BlockNumber result = 0; 00382 BlockNumber mapBlock; 00383 00384 for (mapBlock = 0;; mapBlock++) 00385 { 00386 Buffer mapBuffer; 00387 unsigned char *map; 00388 int i; 00389 00390 /* 00391 * Read till we fall off the end of the map. We assume that any extra 00392 * bytes in the last page are zeroed, so we don't bother excluding 00393 * them from the count. 00394 */ 00395 mapBuffer = vm_readbuf(rel, mapBlock, false); 00396 if (!BufferIsValid(mapBuffer)) 00397 break; 00398 00399 /* 00400 * We choose not to lock the page, since the result is going to be 00401 * immediately stale anyway if anyone is concurrently setting or 00402 * clearing bits, and we only really need an approximate value. 00403 */ 00404 map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer)); 00405 00406 for (i = 0; i < MAPSIZE; i++) 00407 { 00408 result += number_of_ones[map[i]]; 00409 } 00410 00411 ReleaseBuffer(mapBuffer); 00412 } 00413 00414 return result; 00415 } 00416 00417 /* 00418 * visibilitymap_truncate - truncate the visibility map 00419 * 00420 * The caller must hold AccessExclusiveLock on the relation, to ensure that 00421 * other backends receive the smgr invalidation event that this function sends 00422 * before they access the VM again. 00423 * 00424 * nheapblocks is the new size of the heap. 00425 */ 00426 void 00427 visibilitymap_truncate(Relation rel, BlockNumber nheapblocks) 00428 { 00429 BlockNumber newnblocks; 00430 00431 /* last remaining block, byte, and bit */ 00432 BlockNumber truncBlock = HEAPBLK_TO_MAPBLOCK(nheapblocks); 00433 uint32 truncByte = HEAPBLK_TO_MAPBYTE(nheapblocks); 00434 uint8 truncBit = HEAPBLK_TO_MAPBIT(nheapblocks); 00435 00436 #ifdef TRACE_VISIBILITYMAP 00437 elog(DEBUG1, "vm_truncate %s %d", RelationGetRelationName(rel), nheapblocks); 00438 #endif 00439 00440 RelationOpenSmgr(rel); 00441 00442 /* 00443 * If no visibility map has been created yet for this relation, there's 00444 * nothing to truncate. 00445 */ 00446 if (!smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM)) 00447 return; 00448 00449 /* 00450 * Unless the new size is exactly at a visibility map page boundary, the 00451 * tail bits in the last remaining map page, representing truncated heap 00452 * blocks, need to be cleared. This is not only tidy, but also necessary 00453 * because we don't get a chance to clear the bits if the heap is extended 00454 * again. 00455 */ 00456 if (truncByte != 0 || truncBit != 0) 00457 { 00458 Buffer mapBuffer; 00459 Page page; 00460 char *map; 00461 00462 newnblocks = truncBlock + 1; 00463 00464 mapBuffer = vm_readbuf(rel, truncBlock, false); 00465 if (!BufferIsValid(mapBuffer)) 00466 { 00467 /* nothing to do, the file was already smaller */ 00468 return; 00469 } 00470 00471 page = BufferGetPage(mapBuffer); 00472 map = PageGetContents(page); 00473 00474 LockBuffer(mapBuffer, BUFFER_LOCK_EXCLUSIVE); 00475 00476 /* Clear out the unwanted bytes. */ 00477 MemSet(&map[truncByte + 1], 0, MAPSIZE - (truncByte + 1)); 00478 00479 /* 00480 * Mask out the unwanted bits of the last remaining byte. 00481 * 00482 * ((1 << 0) - 1) = 00000000 ((1 << 1) - 1) = 00000001 ... ((1 << 6) - 00483 * 1) = 00111111 ((1 << 7) - 1) = 01111111 00484 */ 00485 map[truncByte] &= (1 << truncBit) - 1; 00486 00487 MarkBufferDirty(mapBuffer); 00488 UnlockReleaseBuffer(mapBuffer); 00489 } 00490 else 00491 newnblocks = truncBlock; 00492 00493 if (smgrnblocks(rel->rd_smgr, VISIBILITYMAP_FORKNUM) <= newnblocks) 00494 { 00495 /* nothing to do, the file was already smaller than requested size */ 00496 return; 00497 } 00498 00499 /* Truncate the unused VM pages, and send smgr inval message */ 00500 smgrtruncate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, newnblocks); 00501 00502 /* 00503 * We might as well update the local smgr_vm_nblocks setting. smgrtruncate 00504 * sent an smgr cache inval message, which will cause other backends to 00505 * invalidate their copy of smgr_vm_nblocks, and this one too at the next 00506 * command boundary. But this ensures it isn't outright wrong until then. 00507 */ 00508 if (rel->rd_smgr) 00509 rel->rd_smgr->smgr_vm_nblocks = newnblocks; 00510 } 00511 00512 /* 00513 * Read a visibility map page. 00514 * 00515 * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is 00516 * true, the visibility map file is extended. 00517 */ 00518 static Buffer 00519 vm_readbuf(Relation rel, BlockNumber blkno, bool extend) 00520 { 00521 Buffer buf; 00522 00523 /* 00524 * We might not have opened the relation at the smgr level yet, or we 00525 * might have been forced to close it by a sinval message. The code below 00526 * won't necessarily notice relation extension immediately when extend = 00527 * false, so we rely on sinval messages to ensure that our ideas about the 00528 * size of the map aren't too far out of date. 00529 */ 00530 RelationOpenSmgr(rel); 00531 00532 /* 00533 * If we haven't cached the size of the visibility map fork yet, check it 00534 * first. 00535 */ 00536 if (rel->rd_smgr->smgr_vm_nblocks == InvalidBlockNumber) 00537 { 00538 if (smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM)) 00539 rel->rd_smgr->smgr_vm_nblocks = smgrnblocks(rel->rd_smgr, 00540 VISIBILITYMAP_FORKNUM); 00541 else 00542 rel->rd_smgr->smgr_vm_nblocks = 0; 00543 } 00544 00545 /* Handle requests beyond EOF */ 00546 if (blkno >= rel->rd_smgr->smgr_vm_nblocks) 00547 { 00548 if (extend) 00549 vm_extend(rel, blkno + 1); 00550 else 00551 return InvalidBuffer; 00552 } 00553 00554 /* 00555 * Use ZERO_ON_ERROR mode, and initialize the page if necessary. It's 00556 * always safe to clear bits, so it's better to clear corrupt pages than 00557 * error out. 00558 */ 00559 buf = ReadBufferExtended(rel, VISIBILITYMAP_FORKNUM, blkno, 00560 RBM_ZERO_ON_ERROR, NULL); 00561 if (PageIsNew(BufferGetPage(buf))) 00562 PageInit(BufferGetPage(buf), BLCKSZ, 0); 00563 return buf; 00564 } 00565 00566 /* 00567 * Ensure that the visibility map fork is at least vm_nblocks long, extending 00568 * it if necessary with zeroed pages. 00569 */ 00570 static void 00571 vm_extend(Relation rel, BlockNumber vm_nblocks) 00572 { 00573 BlockNumber vm_nblocks_now; 00574 Page pg; 00575 00576 pg = (Page) palloc(BLCKSZ); 00577 PageInit(pg, BLCKSZ, 0); 00578 00579 /* 00580 * We use the relation extension lock to lock out other backends trying to 00581 * extend the visibility map at the same time. It also locks out extension 00582 * of the main fork, unnecessarily, but extending the visibility map 00583 * happens seldom enough that it doesn't seem worthwhile to have a 00584 * separate lock tag type for it. 00585 * 00586 * Note that another backend might have extended or created the relation 00587 * by the time we get the lock. 00588 */ 00589 LockRelationForExtension(rel, ExclusiveLock); 00590 00591 /* Might have to re-open if a cache flush happened */ 00592 RelationOpenSmgr(rel); 00593 00594 /* 00595 * Create the file first if it doesn't exist. If smgr_vm_nblocks is 00596 * positive then it must exist, no need for an smgrexists call. 00597 */ 00598 if ((rel->rd_smgr->smgr_vm_nblocks == 0 || 00599 rel->rd_smgr->smgr_vm_nblocks == InvalidBlockNumber) && 00600 !smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM)) 00601 smgrcreate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, false); 00602 00603 vm_nblocks_now = smgrnblocks(rel->rd_smgr, VISIBILITYMAP_FORKNUM); 00604 00605 /* Now extend the file */ 00606 while (vm_nblocks_now < vm_nblocks) 00607 { 00608 PageSetChecksumInplace(pg, vm_nblocks_now); 00609 00610 smgrextend(rel->rd_smgr, VISIBILITYMAP_FORKNUM, vm_nblocks_now, 00611 (char *) pg, false); 00612 vm_nblocks_now++; 00613 } 00614 00615 /* 00616 * Send a shared-inval message to force other backends to close any smgr 00617 * references they may have for this rel, which we are about to change. 00618 * This is a useful optimization because it means that backends don't have 00619 * to keep checking for creation or extension of the file, which happens 00620 * infrequently. 00621 */ 00622 CacheInvalidateSmgr(rel->rd_smgr->smgr_rnode); 00623 00624 /* Update local cache with the up-to-date size */ 00625 rel->rd_smgr->smgr_vm_nblocks = vm_nblocks_now; 00626 00627 UnlockRelationForExtension(rel, ExclusiveLock); 00628 00629 pfree(pg); 00630 }