00001 /*------------------------------------------------------------------------- 00002 * 00003 * checkpointer.c 00004 * 00005 * The checkpointer is new as of Postgres 9.2. It handles all checkpoints. 00006 * Checkpoints are automatically dispatched after a certain amount of time has 00007 * elapsed since the last one, and it can be signaled to perform requested 00008 * checkpoints as well. (The GUC parameter that mandates a checkpoint every 00009 * so many WAL segments is implemented by having backends signal when they 00010 * fill WAL segments; the checkpointer itself doesn't watch for the 00011 * condition.) 00012 * 00013 * The checkpointer is started by the postmaster as soon as the startup 00014 * subprocess finishes, or as soon as recovery begins if we are doing archive 00015 * recovery. It remains alive until the postmaster commands it to terminate. 00016 * Normal termination is by SIGUSR2, which instructs the checkpointer to 00017 * execute a shutdown checkpoint and then exit(0). (All backends must be 00018 * stopped before SIGUSR2 is issued!) Emergency termination is by SIGQUIT; 00019 * like any backend, the checkpointer will simply abort and exit on SIGQUIT. 00020 * 00021 * If the checkpointer exits unexpectedly, the postmaster treats that the same 00022 * as a backend crash: shared memory may be corrupted, so remaining backends 00023 * should be killed by SIGQUIT and then a recovery cycle started. (Even if 00024 * shared memory isn't corrupted, we have lost information about which 00025 * files need to be fsync'd for the next checkpoint, and so a system 00026 * restart needs to be forced.) 00027 * 00028 * 00029 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group 00030 * 00031 * 00032 * IDENTIFICATION 00033 * src/backend/postmaster/checkpointer.c 00034 * 00035 *------------------------------------------------------------------------- 00036 */ 00037 #include "postgres.h" 00038 00039 #include <signal.h> 00040 #include <sys/time.h> 00041 #include <time.h> 00042 #include <unistd.h> 00043 00044 #include "access/xlog.h" 00045 #include "access/xlog_internal.h" 00046 #include "libpq/pqsignal.h" 00047 #include "miscadmin.h" 00048 #include "pgstat.h" 00049 #include "postmaster/bgwriter.h" 00050 #include "replication/syncrep.h" 00051 #include "storage/bufmgr.h" 00052 #include "storage/fd.h" 00053 #include "storage/ipc.h" 00054 #include "storage/lwlock.h" 00055 #include "storage/proc.h" 00056 #include "storage/shmem.h" 00057 #include "storage/smgr.h" 00058 #include "storage/spin.h" 00059 #include "utils/guc.h" 00060 #include "utils/memutils.h" 00061 #include "utils/resowner.h" 00062 00063 00064 /*---------- 00065 * Shared memory area for communication between checkpointer and backends 00066 * 00067 * The ckpt counters allow backends to watch for completion of a checkpoint 00068 * request they send. Here's how it works: 00069 * * At start of a checkpoint, checkpointer reads (and clears) the request 00070 * flags and increments ckpt_started, while holding ckpt_lck. 00071 * * On completion of a checkpoint, checkpointer sets ckpt_done to 00072 * equal ckpt_started. 00073 * * On failure of a checkpoint, checkpointer increments ckpt_failed 00074 * and sets ckpt_done to equal ckpt_started. 00075 * 00076 * The algorithm for backends is: 00077 * 1. Record current values of ckpt_failed and ckpt_started, and 00078 * set request flags, while holding ckpt_lck. 00079 * 2. Send signal to request checkpoint. 00080 * 3. Sleep until ckpt_started changes. Now you know a checkpoint has 00081 * begun since you started this algorithm (although *not* that it was 00082 * specifically initiated by your signal), and that it is using your flags. 00083 * 4. Record new value of ckpt_started. 00084 * 5. Sleep until ckpt_done >= saved value of ckpt_started. (Use modulo 00085 * arithmetic here in case counters wrap around.) Now you know a 00086 * checkpoint has started and completed, but not whether it was 00087 * successful. 00088 * 6. If ckpt_failed is different from the originally saved value, 00089 * assume request failed; otherwise it was definitely successful. 00090 * 00091 * ckpt_flags holds the OR of the checkpoint request flags sent by all 00092 * requesting backends since the last checkpoint start. The flags are 00093 * chosen so that OR'ing is the correct way to combine multiple requests. 00094 * 00095 * num_backend_writes is used to count the number of buffer writes performed 00096 * by user backend processes. This counter should be wide enough that it 00097 * can't overflow during a single processing cycle. num_backend_fsync 00098 * counts the subset of those writes that also had to do their own fsync, 00099 * because the checkpointer failed to absorb their request. 00100 * 00101 * The requests array holds fsync requests sent by backends and not yet 00102 * absorbed by the checkpointer. 00103 * 00104 * Unlike the checkpoint fields, num_backend_writes, num_backend_fsync, and 00105 * the requests fields are protected by CheckpointerCommLock. 00106 *---------- 00107 */ 00108 typedef struct 00109 { 00110 RelFileNode rnode; 00111 ForkNumber forknum; 00112 BlockNumber segno; /* see md.c for special values */ 00113 /* might add a real request-type field later; not needed yet */ 00114 } CheckpointerRequest; 00115 00116 typedef struct 00117 { 00118 pid_t checkpointer_pid; /* PID (0 if not started) */ 00119 00120 slock_t ckpt_lck; /* protects all the ckpt_* fields */ 00121 00122 int ckpt_started; /* advances when checkpoint starts */ 00123 int ckpt_done; /* advances when checkpoint done */ 00124 int ckpt_failed; /* advances when checkpoint fails */ 00125 00126 int ckpt_flags; /* checkpoint flags, as defined in xlog.h */ 00127 00128 uint32 num_backend_writes; /* counts user backend buffer writes */ 00129 uint32 num_backend_fsync; /* counts user backend fsync calls */ 00130 00131 int num_requests; /* current # of requests */ 00132 int max_requests; /* allocated array size */ 00133 CheckpointerRequest requests[1]; /* VARIABLE LENGTH ARRAY */ 00134 } CheckpointerShmemStruct; 00135 00136 static CheckpointerShmemStruct *CheckpointerShmem; 00137 00138 /* interval for calling AbsorbFsyncRequests in CheckpointWriteDelay */ 00139 #define WRITES_PER_ABSORB 1000 00140 00141 /* 00142 * GUC parameters 00143 */ 00144 int CheckPointTimeout = 300; 00145 int CheckPointWarning = 30; 00146 double CheckPointCompletionTarget = 0.5; 00147 00148 /* 00149 * Flags set by interrupt handlers for later service in the main loop. 00150 */ 00151 static volatile sig_atomic_t got_SIGHUP = false; 00152 static volatile sig_atomic_t checkpoint_requested = false; 00153 static volatile sig_atomic_t shutdown_requested = false; 00154 00155 /* 00156 * Private state 00157 */ 00158 static bool ckpt_active = false; 00159 00160 /* these values are valid when ckpt_active is true: */ 00161 static pg_time_t ckpt_start_time; 00162 static XLogRecPtr ckpt_start_recptr; 00163 static double ckpt_cached_elapsed; 00164 00165 static pg_time_t last_checkpoint_time; 00166 static pg_time_t last_xlog_switch_time; 00167 00168 /* Prototypes for private functions */ 00169 00170 static void CheckArchiveTimeout(void); 00171 static bool IsCheckpointOnSchedule(double progress); 00172 static bool ImmediateCheckpointRequested(void); 00173 static bool CompactCheckpointerRequestQueue(void); 00174 static void UpdateSharedMemoryConfig(void); 00175 00176 /* Signal handlers */ 00177 00178 static void chkpt_quickdie(SIGNAL_ARGS); 00179 static void ChkptSigHupHandler(SIGNAL_ARGS); 00180 static void ReqCheckpointHandler(SIGNAL_ARGS); 00181 static void chkpt_sigusr1_handler(SIGNAL_ARGS); 00182 static void ReqShutdownHandler(SIGNAL_ARGS); 00183 00184 00185 /* 00186 * Main entry point for checkpointer process 00187 * 00188 * This is invoked from AuxiliaryProcessMain, which has already created the 00189 * basic execution environment, but not enabled signals yet. 00190 */ 00191 void 00192 CheckpointerMain(void) 00193 { 00194 sigjmp_buf local_sigjmp_buf; 00195 MemoryContext checkpointer_context; 00196 00197 CheckpointerShmem->checkpointer_pid = MyProcPid; 00198 00199 /* 00200 * If possible, make this process a group leader, so that the postmaster 00201 * can signal any child processes too. (checkpointer probably never has 00202 * any child processes, but for consistency we make all postmaster child 00203 * processes do this.) 00204 */ 00205 #ifdef HAVE_SETSID 00206 if (setsid() < 0) 00207 elog(FATAL, "setsid() failed: %m"); 00208 #endif 00209 00210 /* 00211 * Properly accept or ignore signals the postmaster might send us 00212 * 00213 * Note: we deliberately ignore SIGTERM, because during a standard Unix 00214 * system shutdown cycle, init will SIGTERM all processes at once. We 00215 * want to wait for the backends to exit, whereupon the postmaster will 00216 * tell us it's okay to shut down (via SIGUSR2). 00217 */ 00218 pqsignal(SIGHUP, ChkptSigHupHandler); /* set flag to read config 00219 * file */ 00220 pqsignal(SIGINT, ReqCheckpointHandler); /* request checkpoint */ 00221 pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */ 00222 pqsignal(SIGQUIT, chkpt_quickdie); /* hard crash time */ 00223 pqsignal(SIGALRM, SIG_IGN); 00224 pqsignal(SIGPIPE, SIG_IGN); 00225 pqsignal(SIGUSR1, chkpt_sigusr1_handler); 00226 pqsignal(SIGUSR2, ReqShutdownHandler); /* request shutdown */ 00227 00228 /* 00229 * Reset some signals that are accepted by postmaster but not here 00230 */ 00231 pqsignal(SIGCHLD, SIG_DFL); 00232 pqsignal(SIGTTIN, SIG_DFL); 00233 pqsignal(SIGTTOU, SIG_DFL); 00234 pqsignal(SIGCONT, SIG_DFL); 00235 pqsignal(SIGWINCH, SIG_DFL); 00236 00237 /* We allow SIGQUIT (quickdie) at all times */ 00238 sigdelset(&BlockSig, SIGQUIT); 00239 00240 /* 00241 * Initialize so that first time-driven event happens at the correct time. 00242 */ 00243 last_checkpoint_time = last_xlog_switch_time = (pg_time_t) time(NULL); 00244 00245 /* 00246 * Create a resource owner to keep track of our resources (currently only 00247 * buffer pins). 00248 */ 00249 CurrentResourceOwner = ResourceOwnerCreate(NULL, "Checkpointer"); 00250 00251 /* 00252 * Create a memory context that we will do all our work in. We do this so 00253 * that we can reset the context during error recovery and thereby avoid 00254 * possible memory leaks. Formerly this code just ran in 00255 * TopMemoryContext, but resetting that would be a really bad idea. 00256 */ 00257 checkpointer_context = AllocSetContextCreate(TopMemoryContext, 00258 "Checkpointer", 00259 ALLOCSET_DEFAULT_MINSIZE, 00260 ALLOCSET_DEFAULT_INITSIZE, 00261 ALLOCSET_DEFAULT_MAXSIZE); 00262 MemoryContextSwitchTo(checkpointer_context); 00263 00264 /* 00265 * If an exception is encountered, processing resumes here. 00266 * 00267 * See notes in postgres.c about the design of this coding. 00268 */ 00269 if (sigsetjmp(local_sigjmp_buf, 1) != 0) 00270 { 00271 /* Since not using PG_TRY, must reset error stack by hand */ 00272 error_context_stack = NULL; 00273 00274 /* Prevent interrupts while cleaning up */ 00275 HOLD_INTERRUPTS(); 00276 00277 /* Report the error to the server log */ 00278 EmitErrorReport(); 00279 00280 /* 00281 * These operations are really just a minimal subset of 00282 * AbortTransaction(). We don't have very many resources to worry 00283 * about in checkpointer, but we do have LWLocks, buffers, and temp 00284 * files. 00285 */ 00286 LWLockReleaseAll(); 00287 AbortBufferIO(); 00288 UnlockBuffers(); 00289 /* buffer pins are released here: */ 00290 ResourceOwnerRelease(CurrentResourceOwner, 00291 RESOURCE_RELEASE_BEFORE_LOCKS, 00292 false, true); 00293 /* we needn't bother with the other ResourceOwnerRelease phases */ 00294 AtEOXact_Buffers(false); 00295 AtEOXact_SMgr(); 00296 AtEOXact_Files(); 00297 AtEOXact_HashTables(false); 00298 00299 /* Warn any waiting backends that the checkpoint failed. */ 00300 if (ckpt_active) 00301 { 00302 /* use volatile pointer to prevent code rearrangement */ 00303 volatile CheckpointerShmemStruct *cps = CheckpointerShmem; 00304 00305 SpinLockAcquire(&cps->ckpt_lck); 00306 cps->ckpt_failed++; 00307 cps->ckpt_done = cps->ckpt_started; 00308 SpinLockRelease(&cps->ckpt_lck); 00309 00310 ckpt_active = false; 00311 } 00312 00313 /* 00314 * Now return to normal top-level context and clear ErrorContext for 00315 * next time. 00316 */ 00317 MemoryContextSwitchTo(checkpointer_context); 00318 FlushErrorState(); 00319 00320 /* Flush any leaked data in the top-level context */ 00321 MemoryContextResetAndDeleteChildren(checkpointer_context); 00322 00323 /* Now we can allow interrupts again */ 00324 RESUME_INTERRUPTS(); 00325 00326 /* 00327 * Sleep at least 1 second after any error. A write error is likely 00328 * to be repeated, and we don't want to be filling the error logs as 00329 * fast as we can. 00330 */ 00331 pg_usleep(1000000L); 00332 00333 /* 00334 * Close all open files after any error. This is helpful on Windows, 00335 * where holding deleted files open causes various strange errors. 00336 * It's not clear we need it elsewhere, but shouldn't hurt. 00337 */ 00338 smgrcloseall(); 00339 } 00340 00341 /* We can now handle ereport(ERROR) */ 00342 PG_exception_stack = &local_sigjmp_buf; 00343 00344 /* 00345 * Unblock signals (they were blocked when the postmaster forked us) 00346 */ 00347 PG_SETMASK(&UnBlockSig); 00348 00349 /* 00350 * Ensure all shared memory values are set correctly for the config. Doing 00351 * this here ensures no race conditions from other concurrent updaters. 00352 */ 00353 UpdateSharedMemoryConfig(); 00354 00355 /* 00356 * Advertise our latch that backends can use to wake us up while we're 00357 * sleeping. 00358 */ 00359 ProcGlobal->checkpointerLatch = &MyProc->procLatch; 00360 00361 /* 00362 * Loop forever 00363 */ 00364 for (;;) 00365 { 00366 bool do_checkpoint = false; 00367 int flags = 0; 00368 pg_time_t now; 00369 int elapsed_secs; 00370 int cur_timeout; 00371 int rc; 00372 00373 /* Clear any already-pending wakeups */ 00374 ResetLatch(&MyProc->procLatch); 00375 00376 /* 00377 * Process any requests or signals received recently. 00378 */ 00379 AbsorbFsyncRequests(); 00380 00381 if (got_SIGHUP) 00382 { 00383 got_SIGHUP = false; 00384 ProcessConfigFile(PGC_SIGHUP); 00385 00386 /* 00387 * Checkpointer is the last process to shut down, so we ask it to 00388 * hold the keys for a range of other tasks required most of which 00389 * have nothing to do with checkpointing at all. 00390 * 00391 * For various reasons, some config values can change dynamically 00392 * so the primary copy of them is held in shared memory to make 00393 * sure all backends see the same value. We make Checkpointer 00394 * responsible for updating the shared memory copy if the 00395 * parameter setting changes because of SIGHUP. 00396 */ 00397 UpdateSharedMemoryConfig(); 00398 } 00399 if (checkpoint_requested) 00400 { 00401 checkpoint_requested = false; 00402 do_checkpoint = true; 00403 BgWriterStats.m_requested_checkpoints++; 00404 } 00405 if (shutdown_requested) 00406 { 00407 /* 00408 * From here on, elog(ERROR) should end with exit(1), not send 00409 * control back to the sigsetjmp block above 00410 */ 00411 ExitOnAnyError = true; 00412 /* Close down the database */ 00413 ShutdownXLOG(0, 0); 00414 /* Normal exit from the checkpointer is here */ 00415 proc_exit(0); /* done */ 00416 } 00417 00418 /* 00419 * Force a checkpoint if too much time has elapsed since the last one. 00420 * Note that we count a timed checkpoint in stats only when this 00421 * occurs without an external request, but we set the CAUSE_TIME flag 00422 * bit even if there is also an external request. 00423 */ 00424 now = (pg_time_t) time(NULL); 00425 elapsed_secs = now - last_checkpoint_time; 00426 if (elapsed_secs >= CheckPointTimeout) 00427 { 00428 if (!do_checkpoint) 00429 BgWriterStats.m_timed_checkpoints++; 00430 do_checkpoint = true; 00431 flags |= CHECKPOINT_CAUSE_TIME; 00432 } 00433 00434 /* 00435 * Do a checkpoint if requested. 00436 */ 00437 if (do_checkpoint) 00438 { 00439 bool ckpt_performed = false; 00440 bool do_restartpoint; 00441 00442 /* use volatile pointer to prevent code rearrangement */ 00443 volatile CheckpointerShmemStruct *cps = CheckpointerShmem; 00444 00445 /* 00446 * Check if we should perform a checkpoint or a restartpoint. As a 00447 * side-effect, RecoveryInProgress() initializes TimeLineID if 00448 * it's not set yet. 00449 */ 00450 do_restartpoint = RecoveryInProgress(); 00451 00452 /* 00453 * Atomically fetch the request flags to figure out what kind of a 00454 * checkpoint we should perform, and increase the started-counter 00455 * to acknowledge that we've started a new checkpoint. 00456 */ 00457 SpinLockAcquire(&cps->ckpt_lck); 00458 flags |= cps->ckpt_flags; 00459 cps->ckpt_flags = 0; 00460 cps->ckpt_started++; 00461 SpinLockRelease(&cps->ckpt_lck); 00462 00463 /* 00464 * The end-of-recovery checkpoint is a real checkpoint that's 00465 * performed while we're still in recovery. 00466 */ 00467 if (flags & CHECKPOINT_END_OF_RECOVERY) 00468 do_restartpoint = false; 00469 00470 /* 00471 * We will warn if (a) too soon since last checkpoint (whatever 00472 * caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag 00473 * since the last checkpoint start. Note in particular that this 00474 * implementation will not generate warnings caused by 00475 * CheckPointTimeout < CheckPointWarning. 00476 */ 00477 if (!do_restartpoint && 00478 (flags & CHECKPOINT_CAUSE_XLOG) && 00479 elapsed_secs < CheckPointWarning) 00480 ereport(LOG, 00481 (errmsg_plural("checkpoints are occurring too frequently (%d second apart)", 00482 "checkpoints are occurring too frequently (%d seconds apart)", 00483 elapsed_secs, 00484 elapsed_secs), 00485 errhint("Consider increasing the configuration parameter \"checkpoint_segments\"."))); 00486 00487 /* 00488 * Initialize checkpointer-private variables used during 00489 * checkpoint 00490 */ 00491 ckpt_active = true; 00492 if (!do_restartpoint) 00493 ckpt_start_recptr = GetInsertRecPtr(); 00494 ckpt_start_time = now; 00495 ckpt_cached_elapsed = 0; 00496 00497 /* 00498 * Do the checkpoint. 00499 */ 00500 if (!do_restartpoint) 00501 { 00502 CreateCheckPoint(flags); 00503 ckpt_performed = true; 00504 } 00505 else 00506 ckpt_performed = CreateRestartPoint(flags); 00507 00508 /* 00509 * After any checkpoint, close all smgr files. This is so we 00510 * won't hang onto smgr references to deleted files indefinitely. 00511 */ 00512 smgrcloseall(); 00513 00514 /* 00515 * Indicate checkpoint completion to any waiting backends. 00516 */ 00517 SpinLockAcquire(&cps->ckpt_lck); 00518 cps->ckpt_done = cps->ckpt_started; 00519 SpinLockRelease(&cps->ckpt_lck); 00520 00521 if (ckpt_performed) 00522 { 00523 /* 00524 * Note we record the checkpoint start time not end time as 00525 * last_checkpoint_time. This is so that time-driven 00526 * checkpoints happen at a predictable spacing. 00527 */ 00528 last_checkpoint_time = now; 00529 } 00530 else 00531 { 00532 /* 00533 * We were not able to perform the restartpoint (checkpoints 00534 * throw an ERROR in case of error). Most likely because we 00535 * have not received any new checkpoint WAL records since the 00536 * last restartpoint. Try again in 15 s. 00537 */ 00538 last_checkpoint_time = now - CheckPointTimeout + 15; 00539 } 00540 00541 ckpt_active = false; 00542 } 00543 00544 /* Check for archive_timeout and switch xlog files if necessary. */ 00545 CheckArchiveTimeout(); 00546 00547 /* 00548 * Send off activity statistics to the stats collector. (The reason 00549 * why we re-use bgwriter-related code for this is that the bgwriter 00550 * and checkpointer used to be just one process. It's probably not 00551 * worth the trouble to split the stats support into two independent 00552 * stats message types.) 00553 */ 00554 pgstat_send_bgwriter(); 00555 00556 /* 00557 * Sleep until we are signaled or it's time for another checkpoint or 00558 * xlog file switch. 00559 */ 00560 now = (pg_time_t) time(NULL); 00561 elapsed_secs = now - last_checkpoint_time; 00562 if (elapsed_secs >= CheckPointTimeout) 00563 continue; /* no sleep for us ... */ 00564 cur_timeout = CheckPointTimeout - elapsed_secs; 00565 if (XLogArchiveTimeout > 0 && !RecoveryInProgress()) 00566 { 00567 elapsed_secs = now - last_xlog_switch_time; 00568 if (elapsed_secs >= XLogArchiveTimeout) 00569 continue; /* no sleep for us ... */ 00570 cur_timeout = Min(cur_timeout, XLogArchiveTimeout - elapsed_secs); 00571 } 00572 00573 rc = WaitLatch(&MyProc->procLatch, 00574 WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 00575 cur_timeout * 1000L /* convert to ms */ ); 00576 00577 /* 00578 * Emergency bailout if postmaster has died. This is to avoid the 00579 * necessity for manual cleanup of all postmaster children. 00580 */ 00581 if (rc & WL_POSTMASTER_DEATH) 00582 exit(1); 00583 } 00584 } 00585 00586 /* 00587 * CheckArchiveTimeout -- check for archive_timeout and switch xlog files 00588 * 00589 * This will switch to a new WAL file and force an archive file write 00590 * if any activity is recorded in the current WAL file, including just 00591 * a single checkpoint record. 00592 */ 00593 static void 00594 CheckArchiveTimeout(void) 00595 { 00596 pg_time_t now; 00597 pg_time_t last_time; 00598 00599 if (XLogArchiveTimeout <= 0 || RecoveryInProgress()) 00600 return; 00601 00602 now = (pg_time_t) time(NULL); 00603 00604 /* First we do a quick check using possibly-stale local state. */ 00605 if ((int) (now - last_xlog_switch_time) < XLogArchiveTimeout) 00606 return; 00607 00608 /* 00609 * Update local state ... note that last_xlog_switch_time is the last time 00610 * a switch was performed *or requested*. 00611 */ 00612 last_time = GetLastSegSwitchTime(); 00613 00614 last_xlog_switch_time = Max(last_xlog_switch_time, last_time); 00615 00616 /* Now we can do the real check */ 00617 if ((int) (now - last_xlog_switch_time) >= XLogArchiveTimeout) 00618 { 00619 XLogRecPtr switchpoint; 00620 00621 /* OK, it's time to switch */ 00622 switchpoint = RequestXLogSwitch(); 00623 00624 /* 00625 * If the returned pointer points exactly to a segment boundary, 00626 * assume nothing happened. 00627 */ 00628 if ((switchpoint % XLogSegSize) != 0) 00629 ereport(DEBUG1, 00630 (errmsg("transaction log switch forced (archive_timeout=%d)", 00631 XLogArchiveTimeout))); 00632 00633 /* 00634 * Update state in any case, so we don't retry constantly when the 00635 * system is idle. 00636 */ 00637 last_xlog_switch_time = now; 00638 } 00639 } 00640 00641 /* 00642 * Returns true if an immediate checkpoint request is pending. (Note that 00643 * this does not check the *current* checkpoint's IMMEDIATE flag, but whether 00644 * there is one pending behind it.) 00645 */ 00646 static bool 00647 ImmediateCheckpointRequested(void) 00648 { 00649 if (checkpoint_requested) 00650 { 00651 volatile CheckpointerShmemStruct *cps = CheckpointerShmem; 00652 00653 /* 00654 * We don't need to acquire the ckpt_lck in this case because we're 00655 * only looking at a single flag bit. 00656 */ 00657 if (cps->ckpt_flags & CHECKPOINT_IMMEDIATE) 00658 return true; 00659 } 00660 return false; 00661 } 00662 00663 /* 00664 * CheckpointWriteDelay -- control rate of checkpoint 00665 * 00666 * This function is called after each page write performed by BufferSync(). 00667 * It is responsible for throttling BufferSync()'s write rate to hit 00668 * checkpoint_completion_target. 00669 * 00670 * The checkpoint request flags should be passed in; currently the only one 00671 * examined is CHECKPOINT_IMMEDIATE, which disables delays between writes. 00672 * 00673 * 'progress' is an estimate of how much of the work has been done, as a 00674 * fraction between 0.0 meaning none, and 1.0 meaning all done. 00675 */ 00676 void 00677 CheckpointWriteDelay(int flags, double progress) 00678 { 00679 static int absorb_counter = WRITES_PER_ABSORB; 00680 00681 /* Do nothing if checkpoint is being executed by non-checkpointer process */ 00682 if (!AmCheckpointerProcess()) 00683 return; 00684 00685 /* 00686 * Perform the usual duties and take a nap, unless we're behind schedule, 00687 * in which case we just try to catch up as quickly as possible. 00688 */ 00689 if (!(flags & CHECKPOINT_IMMEDIATE) && 00690 !shutdown_requested && 00691 !ImmediateCheckpointRequested() && 00692 IsCheckpointOnSchedule(progress)) 00693 { 00694 if (got_SIGHUP) 00695 { 00696 got_SIGHUP = false; 00697 ProcessConfigFile(PGC_SIGHUP); 00698 /* update shmem copies of config variables */ 00699 UpdateSharedMemoryConfig(); 00700 } 00701 00702 AbsorbFsyncRequests(); 00703 absorb_counter = WRITES_PER_ABSORB; 00704 00705 CheckArchiveTimeout(); 00706 00707 /* 00708 * Report interim activity statistics to the stats collector. 00709 */ 00710 pgstat_send_bgwriter(); 00711 00712 /* 00713 * This sleep used to be connected to bgwriter_delay, typically 200ms. 00714 * That resulted in more frequent wakeups if not much work to do. 00715 * Checkpointer and bgwriter are no longer related so take the Big 00716 * Sleep. 00717 */ 00718 pg_usleep(100000L); 00719 } 00720 else if (--absorb_counter <= 0) 00721 { 00722 /* 00723 * Absorb pending fsync requests after each WRITES_PER_ABSORB write 00724 * operations even when we don't sleep, to prevent overflow of the 00725 * fsync request queue. 00726 */ 00727 AbsorbFsyncRequests(); 00728 absorb_counter = WRITES_PER_ABSORB; 00729 } 00730 } 00731 00732 /* 00733 * IsCheckpointOnSchedule -- are we on schedule to finish this checkpoint 00734 * in time? 00735 * 00736 * Compares the current progress against the time/segments elapsed since last 00737 * checkpoint, and returns true if the progress we've made this far is greater 00738 * than the elapsed time/segments. 00739 */ 00740 static bool 00741 IsCheckpointOnSchedule(double progress) 00742 { 00743 XLogRecPtr recptr; 00744 struct timeval now; 00745 double elapsed_xlogs, 00746 elapsed_time; 00747 00748 Assert(ckpt_active); 00749 00750 /* Scale progress according to checkpoint_completion_target. */ 00751 progress *= CheckPointCompletionTarget; 00752 00753 /* 00754 * Check against the cached value first. Only do the more expensive 00755 * calculations once we reach the target previously calculated. Since 00756 * neither time or WAL insert pointer moves backwards, a freshly 00757 * calculated value can only be greater than or equal to the cached value. 00758 */ 00759 if (progress < ckpt_cached_elapsed) 00760 return false; 00761 00762 /* 00763 * Check progress against WAL segments written and checkpoint_segments. 00764 * 00765 * We compare the current WAL insert location against the location 00766 * computed before calling CreateCheckPoint. The code in XLogInsert that 00767 * actually triggers a checkpoint when checkpoint_segments is exceeded 00768 * compares against RedoRecptr, so this is not completely accurate. 00769 * However, it's good enough for our purposes, we're only calculating an 00770 * estimate anyway. 00771 */ 00772 if (!RecoveryInProgress()) 00773 { 00774 recptr = GetInsertRecPtr(); 00775 elapsed_xlogs = (((double) (recptr - ckpt_start_recptr)) / XLogSegSize) / CheckPointSegments; 00776 00777 if (progress < elapsed_xlogs) 00778 { 00779 ckpt_cached_elapsed = elapsed_xlogs; 00780 return false; 00781 } 00782 } 00783 00784 /* 00785 * Check progress against time elapsed and checkpoint_timeout. 00786 */ 00787 gettimeofday(&now, NULL); 00788 elapsed_time = ((double) ((pg_time_t) now.tv_sec - ckpt_start_time) + 00789 now.tv_usec / 1000000.0) / CheckPointTimeout; 00790 00791 if (progress < elapsed_time) 00792 { 00793 ckpt_cached_elapsed = elapsed_time; 00794 return false; 00795 } 00796 00797 /* It looks like we're on schedule. */ 00798 return true; 00799 } 00800 00801 00802 /* -------------------------------- 00803 * signal handler routines 00804 * -------------------------------- 00805 */ 00806 00807 /* 00808 * chkpt_quickdie() occurs when signalled SIGQUIT by the postmaster. 00809 * 00810 * Some backend has bought the farm, 00811 * so we need to stop what we're doing and exit. 00812 */ 00813 static void 00814 chkpt_quickdie(SIGNAL_ARGS) 00815 { 00816 PG_SETMASK(&BlockSig); 00817 00818 /* 00819 * We DO NOT want to run proc_exit() callbacks -- we're here because 00820 * shared memory may be corrupted, so we don't want to try to clean up our 00821 * transaction. Just nail the windows shut and get out of town. Now that 00822 * there's an atexit callback to prevent third-party code from breaking 00823 * things by calling exit() directly, we have to reset the callbacks 00824 * explicitly to make this work as intended. 00825 */ 00826 on_exit_reset(); 00827 00828 /* 00829 * Note we do exit(2) not exit(0). This is to force the postmaster into a 00830 * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random 00831 * backend. This is necessary precisely because we don't clean up our 00832 * shared memory state. (The "dead man switch" mechanism in pmsignal.c 00833 * should ensure the postmaster sees this as a crash, too, but no harm in 00834 * being doubly sure.) 00835 */ 00836 exit(2); 00837 } 00838 00839 /* SIGHUP: set flag to re-read config file at next convenient time */ 00840 static void 00841 ChkptSigHupHandler(SIGNAL_ARGS) 00842 { 00843 int save_errno = errno; 00844 00845 got_SIGHUP = true; 00846 if (MyProc) 00847 SetLatch(&MyProc->procLatch); 00848 00849 errno = save_errno; 00850 } 00851 00852 /* SIGINT: set flag to run a normal checkpoint right away */ 00853 static void 00854 ReqCheckpointHandler(SIGNAL_ARGS) 00855 { 00856 int save_errno = errno; 00857 00858 checkpoint_requested = true; 00859 if (MyProc) 00860 SetLatch(&MyProc->procLatch); 00861 00862 errno = save_errno; 00863 } 00864 00865 /* SIGUSR1: used for latch wakeups */ 00866 static void 00867 chkpt_sigusr1_handler(SIGNAL_ARGS) 00868 { 00869 int save_errno = errno; 00870 00871 latch_sigusr1_handler(); 00872 00873 errno = save_errno; 00874 } 00875 00876 /* SIGUSR2: set flag to run a shutdown checkpoint and exit */ 00877 static void 00878 ReqShutdownHandler(SIGNAL_ARGS) 00879 { 00880 int save_errno = errno; 00881 00882 shutdown_requested = true; 00883 if (MyProc) 00884 SetLatch(&MyProc->procLatch); 00885 00886 errno = save_errno; 00887 } 00888 00889 00890 /* -------------------------------- 00891 * communication with backends 00892 * -------------------------------- 00893 */ 00894 00895 /* 00896 * CheckpointerShmemSize 00897 * Compute space needed for checkpointer-related shared memory 00898 */ 00899 Size 00900 CheckpointerShmemSize(void) 00901 { 00902 Size size; 00903 00904 /* 00905 * Currently, the size of the requests[] array is arbitrarily set equal to 00906 * NBuffers. This may prove too large or small ... 00907 */ 00908 size = offsetof(CheckpointerShmemStruct, requests); 00909 size = add_size(size, mul_size(NBuffers, sizeof(CheckpointerRequest))); 00910 00911 return size; 00912 } 00913 00914 /* 00915 * CheckpointerShmemInit 00916 * Allocate and initialize checkpointer-related shared memory 00917 */ 00918 void 00919 CheckpointerShmemInit(void) 00920 { 00921 Size size = CheckpointerShmemSize(); 00922 bool found; 00923 00924 CheckpointerShmem = (CheckpointerShmemStruct *) 00925 ShmemInitStruct("Checkpointer Data", 00926 size, 00927 &found); 00928 00929 if (!found) 00930 { 00931 /* 00932 * First time through, so initialize. Note that we zero the whole 00933 * requests array; this is so that CompactCheckpointerRequestQueue 00934 * can assume that any pad bytes in the request structs are zeroes. 00935 */ 00936 MemSet(CheckpointerShmem, 0, size); 00937 SpinLockInit(&CheckpointerShmem->ckpt_lck); 00938 CheckpointerShmem->max_requests = NBuffers; 00939 } 00940 } 00941 00942 /* 00943 * RequestCheckpoint 00944 * Called in backend processes to request a checkpoint 00945 * 00946 * flags is a bitwise OR of the following: 00947 * CHECKPOINT_IS_SHUTDOWN: checkpoint is for database shutdown. 00948 * CHECKPOINT_END_OF_RECOVERY: checkpoint is for end of WAL recovery. 00949 * CHECKPOINT_IMMEDIATE: finish the checkpoint ASAP, 00950 * ignoring checkpoint_completion_target parameter. 00951 * CHECKPOINT_FORCE: force a checkpoint even if no XLOG activity has occurred 00952 * since the last one (implied by CHECKPOINT_IS_SHUTDOWN or 00953 * CHECKPOINT_END_OF_RECOVERY). 00954 * CHECKPOINT_WAIT: wait for completion before returning (otherwise, 00955 * just signal checkpointer to do it, and return). 00956 * CHECKPOINT_CAUSE_XLOG: checkpoint is requested due to xlog filling. 00957 * (This affects logging, and in particular enables CheckPointWarning.) 00958 */ 00959 void 00960 RequestCheckpoint(int flags) 00961 { 00962 /* use volatile pointer to prevent code rearrangement */ 00963 volatile CheckpointerShmemStruct *cps = CheckpointerShmem; 00964 int ntries; 00965 int old_failed, 00966 old_started; 00967 00968 /* 00969 * If in a standalone backend, just do it ourselves. 00970 */ 00971 if (!IsPostmasterEnvironment) 00972 { 00973 /* 00974 * There's no point in doing slow checkpoints in a standalone backend, 00975 * because there's no other backends the checkpoint could disrupt. 00976 */ 00977 CreateCheckPoint(flags | CHECKPOINT_IMMEDIATE); 00978 00979 /* 00980 * After any checkpoint, close all smgr files. This is so we won't 00981 * hang onto smgr references to deleted files indefinitely. 00982 */ 00983 smgrcloseall(); 00984 00985 return; 00986 } 00987 00988 /* 00989 * Atomically set the request flags, and take a snapshot of the counters. 00990 * When we see ckpt_started > old_started, we know the flags we set here 00991 * have been seen by checkpointer. 00992 * 00993 * Note that we OR the flags with any existing flags, to avoid overriding 00994 * a "stronger" request by another backend. The flag senses must be 00995 * chosen to make this work! 00996 */ 00997 SpinLockAcquire(&cps->ckpt_lck); 00998 00999 old_failed = cps->ckpt_failed; 01000 old_started = cps->ckpt_started; 01001 cps->ckpt_flags |= flags; 01002 01003 SpinLockRelease(&cps->ckpt_lck); 01004 01005 /* 01006 * Send signal to request checkpoint. It's possible that the checkpointer 01007 * hasn't started yet, or is in process of restarting, so we will retry a 01008 * few times if needed. Also, if not told to wait for the checkpoint to 01009 * occur, we consider failure to send the signal to be nonfatal and merely 01010 * LOG it. 01011 */ 01012 for (ntries = 0;; ntries++) 01013 { 01014 if (CheckpointerShmem->checkpointer_pid == 0) 01015 { 01016 if (ntries >= 20) /* max wait 2.0 sec */ 01017 { 01018 elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG, 01019 "could not request checkpoint because checkpointer not running"); 01020 break; 01021 } 01022 } 01023 else if (kill(CheckpointerShmem->checkpointer_pid, SIGINT) != 0) 01024 { 01025 if (ntries >= 20) /* max wait 2.0 sec */ 01026 { 01027 elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG, 01028 "could not signal for checkpoint: %m"); 01029 break; 01030 } 01031 } 01032 else 01033 break; /* signal sent successfully */ 01034 01035 CHECK_FOR_INTERRUPTS(); 01036 pg_usleep(100000L); /* wait 0.1 sec, then retry */ 01037 } 01038 01039 /* 01040 * If requested, wait for completion. We detect completion according to 01041 * the algorithm given above. 01042 */ 01043 if (flags & CHECKPOINT_WAIT) 01044 { 01045 int new_started, 01046 new_failed; 01047 01048 /* Wait for a new checkpoint to start. */ 01049 for (;;) 01050 { 01051 SpinLockAcquire(&cps->ckpt_lck); 01052 new_started = cps->ckpt_started; 01053 SpinLockRelease(&cps->ckpt_lck); 01054 01055 if (new_started != old_started) 01056 break; 01057 01058 CHECK_FOR_INTERRUPTS(); 01059 pg_usleep(100000L); 01060 } 01061 01062 /* 01063 * We are waiting for ckpt_done >= new_started, in a modulo sense. 01064 */ 01065 for (;;) 01066 { 01067 int new_done; 01068 01069 SpinLockAcquire(&cps->ckpt_lck); 01070 new_done = cps->ckpt_done; 01071 new_failed = cps->ckpt_failed; 01072 SpinLockRelease(&cps->ckpt_lck); 01073 01074 if (new_done - new_started >= 0) 01075 break; 01076 01077 CHECK_FOR_INTERRUPTS(); 01078 pg_usleep(100000L); 01079 } 01080 01081 if (new_failed != old_failed) 01082 ereport(ERROR, 01083 (errmsg("checkpoint request failed"), 01084 errhint("Consult recent messages in the server log for details."))); 01085 } 01086 } 01087 01088 /* 01089 * ForwardFsyncRequest 01090 * Forward a file-fsync request from a backend to the checkpointer 01091 * 01092 * Whenever a backend is compelled to write directly to a relation 01093 * (which should be seldom, if the background writer is getting its job done), 01094 * the backend calls this routine to pass over knowledge that the relation 01095 * is dirty and must be fsync'd before next checkpoint. We also use this 01096 * opportunity to count such writes for statistical purposes. 01097 * 01098 * This functionality is only supported for regular (not backend-local) 01099 * relations, so the rnode argument is intentionally RelFileNode not 01100 * RelFileNodeBackend. 01101 * 01102 * segno specifies which segment (not block!) of the relation needs to be 01103 * fsync'd. (Since the valid range is much less than BlockNumber, we can 01104 * use high values for special flags; that's all internal to md.c, which 01105 * see for details.) 01106 * 01107 * To avoid holding the lock for longer than necessary, we normally write 01108 * to the requests[] queue without checking for duplicates. The checkpointer 01109 * will have to eliminate dups internally anyway. However, if we discover 01110 * that the queue is full, we make a pass over the entire queue to compact 01111 * it. This is somewhat expensive, but the alternative is for the backend 01112 * to perform its own fsync, which is far more expensive in practice. It 01113 * is theoretically possible a backend fsync might still be necessary, if 01114 * the queue is full and contains no duplicate entries. In that case, we 01115 * let the backend know by returning false. 01116 */ 01117 bool 01118 ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) 01119 { 01120 CheckpointerRequest *request; 01121 bool too_full; 01122 01123 if (!IsUnderPostmaster) 01124 return false; /* probably shouldn't even get here */ 01125 01126 if (AmCheckpointerProcess()) 01127 elog(ERROR, "ForwardFsyncRequest must not be called in checkpointer"); 01128 01129 LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE); 01130 01131 /* Count all backend writes regardless of if they fit in the queue */ 01132 if (!AmBackgroundWriterProcess()) 01133 CheckpointerShmem->num_backend_writes++; 01134 01135 /* 01136 * If the checkpointer isn't running or the request queue is full, the 01137 * backend will have to perform its own fsync request. But before forcing 01138 * that to happen, we can try to compact the request queue. 01139 */ 01140 if (CheckpointerShmem->checkpointer_pid == 0 || 01141 (CheckpointerShmem->num_requests >= CheckpointerShmem->max_requests && 01142 !CompactCheckpointerRequestQueue())) 01143 { 01144 /* 01145 * Count the subset of writes where backends have to do their own 01146 * fsync 01147 */ 01148 if (!AmBackgroundWriterProcess()) 01149 CheckpointerShmem->num_backend_fsync++; 01150 LWLockRelease(CheckpointerCommLock); 01151 return false; 01152 } 01153 01154 /* OK, insert request */ 01155 request = &CheckpointerShmem->requests[CheckpointerShmem->num_requests++]; 01156 request->rnode = rnode; 01157 request->forknum = forknum; 01158 request->segno = segno; 01159 01160 /* If queue is more than half full, nudge the checkpointer to empty it */ 01161 too_full = (CheckpointerShmem->num_requests >= 01162 CheckpointerShmem->max_requests / 2); 01163 01164 LWLockRelease(CheckpointerCommLock); 01165 01166 /* ... but not till after we release the lock */ 01167 if (too_full && ProcGlobal->checkpointerLatch) 01168 SetLatch(ProcGlobal->checkpointerLatch); 01169 01170 return true; 01171 } 01172 01173 /* 01174 * CompactCheckpointerRequestQueue 01175 * Remove duplicates from the request queue to avoid backend fsyncs. 01176 * Returns "true" if any entries were removed. 01177 * 01178 * Although a full fsync request queue is not common, it can lead to severe 01179 * performance problems when it does happen. So far, this situation has 01180 * only been observed to occur when the system is under heavy write load, 01181 * and especially during the "sync" phase of a checkpoint. Without this 01182 * logic, each backend begins doing an fsync for every block written, which 01183 * gets very expensive and can slow down the whole system. 01184 * 01185 * Trying to do this every time the queue is full could lose if there 01186 * aren't any removable entries. But that should be vanishingly rare in 01187 * practice: there's one queue entry per shared buffer. 01188 */ 01189 static bool 01190 CompactCheckpointerRequestQueue(void) 01191 { 01192 struct CheckpointerSlotMapping 01193 { 01194 CheckpointerRequest request; 01195 int slot; 01196 }; 01197 01198 int n, 01199 preserve_count; 01200 int num_skipped = 0; 01201 HASHCTL ctl; 01202 HTAB *htab; 01203 bool *skip_slot; 01204 01205 /* must hold CheckpointerCommLock in exclusive mode */ 01206 Assert(LWLockHeldByMe(CheckpointerCommLock)); 01207 01208 /* Initialize skip_slot array */ 01209 skip_slot = palloc0(sizeof(bool) * CheckpointerShmem->num_requests); 01210 01211 /* Initialize temporary hash table */ 01212 MemSet(&ctl, 0, sizeof(ctl)); 01213 ctl.keysize = sizeof(CheckpointerRequest); 01214 ctl.entrysize = sizeof(struct CheckpointerSlotMapping); 01215 ctl.hash = tag_hash; 01216 ctl.hcxt = CurrentMemoryContext; 01217 01218 htab = hash_create("CompactCheckpointerRequestQueue", 01219 CheckpointerShmem->num_requests, 01220 &ctl, 01221 HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); 01222 01223 /* 01224 * The basic idea here is that a request can be skipped if it's followed 01225 * by a later, identical request. It might seem more sensible to work 01226 * backwards from the end of the queue and check whether a request is 01227 * *preceded* by an earlier, identical request, in the hopes of doing less 01228 * copying. But that might change the semantics, if there's an 01229 * intervening FORGET_RELATION_FSYNC or FORGET_DATABASE_FSYNC request, so 01230 * we do it this way. It would be possible to be even smarter if we made 01231 * the code below understand the specific semantics of such requests (it 01232 * could blow away preceding entries that would end up being canceled 01233 * anyhow), but it's not clear that the extra complexity would buy us 01234 * anything. 01235 */ 01236 for (n = 0; n < CheckpointerShmem->num_requests; n++) 01237 { 01238 CheckpointerRequest *request; 01239 struct CheckpointerSlotMapping *slotmap; 01240 bool found; 01241 01242 /* 01243 * We use the request struct directly as a hashtable key. This 01244 * assumes that any padding bytes in the structs are consistently the 01245 * same, which should be okay because we zeroed them in 01246 * CheckpointerShmemInit. Note also that RelFileNode had better 01247 * contain no pad bytes. 01248 */ 01249 request = &CheckpointerShmem->requests[n]; 01250 slotmap = hash_search(htab, request, HASH_ENTER, &found); 01251 if (found) 01252 { 01253 /* Duplicate, so mark the previous occurrence as skippable */ 01254 skip_slot[slotmap->slot] = true; 01255 num_skipped++; 01256 } 01257 /* Remember slot containing latest occurrence of this request value */ 01258 slotmap->slot = n; 01259 } 01260 01261 /* Done with the hash table. */ 01262 hash_destroy(htab); 01263 01264 /* If no duplicates, we're out of luck. */ 01265 if (!num_skipped) 01266 { 01267 pfree(skip_slot); 01268 return false; 01269 } 01270 01271 /* We found some duplicates; remove them. */ 01272 preserve_count = 0; 01273 for (n = 0; n < CheckpointerShmem->num_requests; n++) 01274 { 01275 if (skip_slot[n]) 01276 continue; 01277 CheckpointerShmem->requests[preserve_count++] = CheckpointerShmem->requests[n]; 01278 } 01279 ereport(DEBUG1, 01280 (errmsg("compacted fsync request queue from %d entries to %d entries", 01281 CheckpointerShmem->num_requests, preserve_count))); 01282 CheckpointerShmem->num_requests = preserve_count; 01283 01284 /* Cleanup. */ 01285 pfree(skip_slot); 01286 return true; 01287 } 01288 01289 /* 01290 * AbsorbFsyncRequests 01291 * Retrieve queued fsync requests and pass them to local smgr. 01292 * 01293 * This is exported because it must be called during CreateCheckPoint; 01294 * we have to be sure we have accepted all pending requests just before 01295 * we start fsync'ing. Since CreateCheckPoint sometimes runs in 01296 * non-checkpointer processes, do nothing if not checkpointer. 01297 */ 01298 void 01299 AbsorbFsyncRequests(void) 01300 { 01301 CheckpointerRequest *requests = NULL; 01302 CheckpointerRequest *request; 01303 int n; 01304 01305 if (!AmCheckpointerProcess()) 01306 return; 01307 01308 /* 01309 * We have to PANIC if we fail to absorb all the pending requests (eg, 01310 * because our hashtable runs out of memory). This is because the system 01311 * cannot run safely if we are unable to fsync what we have been told to 01312 * fsync. Fortunately, the hashtable is so small that the problem is 01313 * quite unlikely to arise in practice. 01314 */ 01315 START_CRIT_SECTION(); 01316 01317 /* 01318 * We try to avoid holding the lock for a long time by copying the request 01319 * array. 01320 */ 01321 LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE); 01322 01323 /* Transfer stats counts into pending pgstats message */ 01324 BgWriterStats.m_buf_written_backend += CheckpointerShmem->num_backend_writes; 01325 BgWriterStats.m_buf_fsync_backend += CheckpointerShmem->num_backend_fsync; 01326 01327 CheckpointerShmem->num_backend_writes = 0; 01328 CheckpointerShmem->num_backend_fsync = 0; 01329 01330 n = CheckpointerShmem->num_requests; 01331 if (n > 0) 01332 { 01333 requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest)); 01334 memcpy(requests, CheckpointerShmem->requests, n * sizeof(CheckpointerRequest)); 01335 } 01336 CheckpointerShmem->num_requests = 0; 01337 01338 LWLockRelease(CheckpointerCommLock); 01339 01340 for (request = requests; n > 0; request++, n--) 01341 RememberFsyncRequest(request->rnode, request->forknum, request->segno); 01342 01343 if (requests) 01344 pfree(requests); 01345 01346 END_CRIT_SECTION(); 01347 } 01348 01349 /* 01350 * Update any shared memory configurations based on config parameters 01351 */ 01352 static void 01353 UpdateSharedMemoryConfig(void) 01354 { 01355 /* update global shmem state for sync rep */ 01356 SyncRepUpdateSyncStandbysDefined(); 01357 01358 /* 01359 * If full_page_writes has been changed by SIGHUP, we update it in shared 01360 * memory and write an XLOG_FPW_CHANGE record. 01361 */ 01362 UpdateFullPageWrites(); 01363 01364 elog(DEBUG2, "checkpointer updated shared memory configuration values"); 01365 } 01366 01367 /* 01368 * FirstCallSinceLastCheckpoint allows a process to take an action once 01369 * per checkpoint cycle by asynchronously checking for checkpoint completion. 01370 */ 01371 bool 01372 FirstCallSinceLastCheckpoint(void) 01373 { 01374 /* use volatile pointer to prevent code rearrangement */ 01375 volatile CheckpointerShmemStruct *cps = CheckpointerShmem; 01376 static int ckpt_done = 0; 01377 int new_done; 01378 bool FirstCall = false; 01379 01380 SpinLockAcquire(&cps->ckpt_lck); 01381 new_done = cps->ckpt_done; 01382 SpinLockRelease(&cps->ckpt_lck); 01383 01384 if (new_done != ckpt_done) 01385 FirstCall = true; 01386 01387 ckpt_done = new_done; 01388 01389 return FirstCall; 01390 }