00001 /*------------------------------------------------------------------------- 00002 * 00003 * bgwriter.c 00004 * 00005 * The background writer (bgwriter) is new as of Postgres 8.0. It attempts 00006 * to keep regular backends from having to write out dirty shared buffers 00007 * (which they would only do when needing to free a shared buffer to read in 00008 * another page). In the best scenario all writes from shared buffers will 00009 * be issued by the background writer process. However, regular backends are 00010 * still empowered to issue writes if the bgwriter fails to maintain enough 00011 * clean shared buffers. 00012 * 00013 * As of Postgres 9.2 the bgwriter no longer handles checkpoints. 00014 * 00015 * The bgwriter is started by the postmaster as soon as the startup subprocess 00016 * finishes, or as soon as recovery begins if we are doing archive recovery. 00017 * It remains alive until the postmaster commands it to terminate. 00018 * Normal termination is by SIGTERM, which instructs the bgwriter to exit(0). 00019 * Emergency termination is by SIGQUIT; like any backend, the bgwriter will 00020 * simply abort and exit on SIGQUIT. 00021 * 00022 * If the bgwriter exits unexpectedly, the postmaster treats that the same 00023 * as a backend crash: shared memory may be corrupted, so remaining backends 00024 * should be killed by SIGQUIT and then a recovery cycle started. 00025 * 00026 * 00027 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group 00028 * 00029 * 00030 * IDENTIFICATION 00031 * src/backend/postmaster/bgwriter.c 00032 * 00033 *------------------------------------------------------------------------- 00034 */ 00035 #include "postgres.h" 00036 00037 #include <signal.h> 00038 #include <sys/time.h> 00039 #include <time.h> 00040 #include <unistd.h> 00041 00042 #include "access/xlog.h" 00043 #include "access/xlog_internal.h" 00044 #include "libpq/pqsignal.h" 00045 #include "miscadmin.h" 00046 #include "pgstat.h" 00047 #include "postmaster/bgwriter.h" 00048 #include "storage/bufmgr.h" 00049 #include "storage/buf_internals.h" 00050 #include "storage/fd.h" 00051 #include "storage/ipc.h" 00052 #include "storage/lwlock.h" 00053 #include "storage/proc.h" 00054 #include "storage/shmem.h" 00055 #include "storage/smgr.h" 00056 #include "storage/spin.h" 00057 #include "utils/guc.h" 00058 #include "utils/memutils.h" 00059 #include "utils/resowner.h" 00060 00061 00062 /* 00063 * GUC parameters 00064 */ 00065 int BgWriterDelay = 200; 00066 00067 /* 00068 * Multiplier to apply to BgWriterDelay when we decide to hibernate. 00069 * (Perhaps this needs to be configurable?) 00070 */ 00071 #define HIBERNATE_FACTOR 50 00072 00073 /* 00074 * Flags set by interrupt handlers for later service in the main loop. 00075 */ 00076 static volatile sig_atomic_t got_SIGHUP = false; 00077 static volatile sig_atomic_t shutdown_requested = false; 00078 00079 /* Signal handlers */ 00080 00081 static void bg_quickdie(SIGNAL_ARGS); 00082 static void BgSigHupHandler(SIGNAL_ARGS); 00083 static void ReqShutdownHandler(SIGNAL_ARGS); 00084 static void bgwriter_sigusr1_handler(SIGNAL_ARGS); 00085 00086 00087 /* 00088 * Main entry point for bgwriter process 00089 * 00090 * This is invoked from AuxiliaryProcessMain, which has already created the 00091 * basic execution environment, but not enabled signals yet. 00092 */ 00093 void 00094 BackgroundWriterMain(void) 00095 { 00096 sigjmp_buf local_sigjmp_buf; 00097 MemoryContext bgwriter_context; 00098 bool prev_hibernate; 00099 00100 /* 00101 * If possible, make this process a group leader, so that the postmaster 00102 * can signal any child processes too. (bgwriter probably never has any 00103 * child processes, but for consistency we make all postmaster child 00104 * processes do this.) 00105 */ 00106 #ifdef HAVE_SETSID 00107 if (setsid() < 0) 00108 elog(FATAL, "setsid() failed: %m"); 00109 #endif 00110 00111 /* 00112 * Properly accept or ignore signals the postmaster might send us. 00113 * 00114 * bgwriter doesn't participate in ProcSignal signalling, but a SIGUSR1 00115 * handler is still needed for latch wakeups. 00116 */ 00117 pqsignal(SIGHUP, BgSigHupHandler); /* set flag to read config file */ 00118 pqsignal(SIGINT, SIG_IGN); 00119 pqsignal(SIGTERM, ReqShutdownHandler); /* shutdown */ 00120 pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */ 00121 pqsignal(SIGALRM, SIG_IGN); 00122 pqsignal(SIGPIPE, SIG_IGN); 00123 pqsignal(SIGUSR1, bgwriter_sigusr1_handler); 00124 pqsignal(SIGUSR2, SIG_IGN); 00125 00126 /* 00127 * Reset some signals that are accepted by postmaster but not here 00128 */ 00129 pqsignal(SIGCHLD, SIG_DFL); 00130 pqsignal(SIGTTIN, SIG_DFL); 00131 pqsignal(SIGTTOU, SIG_DFL); 00132 pqsignal(SIGCONT, SIG_DFL); 00133 pqsignal(SIGWINCH, SIG_DFL); 00134 00135 /* We allow SIGQUIT (quickdie) at all times */ 00136 sigdelset(&BlockSig, SIGQUIT); 00137 00138 /* 00139 * Create a resource owner to keep track of our resources (currently only 00140 * buffer pins). 00141 */ 00142 CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer"); 00143 00144 /* 00145 * Create a memory context that we will do all our work in. We do this so 00146 * that we can reset the context during error recovery and thereby avoid 00147 * possible memory leaks. Formerly this code just ran in 00148 * TopMemoryContext, but resetting that would be a really bad idea. 00149 */ 00150 bgwriter_context = AllocSetContextCreate(TopMemoryContext, 00151 "Background Writer", 00152 ALLOCSET_DEFAULT_MINSIZE, 00153 ALLOCSET_DEFAULT_INITSIZE, 00154 ALLOCSET_DEFAULT_MAXSIZE); 00155 MemoryContextSwitchTo(bgwriter_context); 00156 00157 /* 00158 * If an exception is encountered, processing resumes here. 00159 * 00160 * See notes in postgres.c about the design of this coding. 00161 */ 00162 if (sigsetjmp(local_sigjmp_buf, 1) != 0) 00163 { 00164 /* Since not using PG_TRY, must reset error stack by hand */ 00165 error_context_stack = NULL; 00166 00167 /* Prevent interrupts while cleaning up */ 00168 HOLD_INTERRUPTS(); 00169 00170 /* Report the error to the server log */ 00171 EmitErrorReport(); 00172 00173 /* 00174 * These operations are really just a minimal subset of 00175 * AbortTransaction(). We don't have very many resources to worry 00176 * about in bgwriter, but we do have LWLocks, buffers, and temp files. 00177 */ 00178 LWLockReleaseAll(); 00179 AbortBufferIO(); 00180 UnlockBuffers(); 00181 /* buffer pins are released here: */ 00182 ResourceOwnerRelease(CurrentResourceOwner, 00183 RESOURCE_RELEASE_BEFORE_LOCKS, 00184 false, true); 00185 /* we needn't bother with the other ResourceOwnerRelease phases */ 00186 AtEOXact_Buffers(false); 00187 AtEOXact_SMgr(); 00188 AtEOXact_Files(); 00189 AtEOXact_HashTables(false); 00190 00191 /* 00192 * Now return to normal top-level context and clear ErrorContext for 00193 * next time. 00194 */ 00195 MemoryContextSwitchTo(bgwriter_context); 00196 FlushErrorState(); 00197 00198 /* Flush any leaked data in the top-level context */ 00199 MemoryContextResetAndDeleteChildren(bgwriter_context); 00200 00201 /* Now we can allow interrupts again */ 00202 RESUME_INTERRUPTS(); 00203 00204 /* 00205 * Sleep at least 1 second after any error. A write error is likely 00206 * to be repeated, and we don't want to be filling the error logs as 00207 * fast as we can. 00208 */ 00209 pg_usleep(1000000L); 00210 00211 /* 00212 * Close all open files after any error. This is helpful on Windows, 00213 * where holding deleted files open causes various strange errors. 00214 * It's not clear we need it elsewhere, but shouldn't hurt. 00215 */ 00216 smgrcloseall(); 00217 } 00218 00219 /* We can now handle ereport(ERROR) */ 00220 PG_exception_stack = &local_sigjmp_buf; 00221 00222 /* 00223 * Unblock signals (they were blocked when the postmaster forked us) 00224 */ 00225 PG_SETMASK(&UnBlockSig); 00226 00227 /* 00228 * Reset hibernation state after any error. 00229 */ 00230 prev_hibernate = false; 00231 00232 /* 00233 * Loop forever 00234 */ 00235 for (;;) 00236 { 00237 bool can_hibernate; 00238 int rc; 00239 00240 /* Clear any already-pending wakeups */ 00241 ResetLatch(&MyProc->procLatch); 00242 00243 if (got_SIGHUP) 00244 { 00245 got_SIGHUP = false; 00246 ProcessConfigFile(PGC_SIGHUP); 00247 } 00248 if (shutdown_requested) 00249 { 00250 /* 00251 * From here on, elog(ERROR) should end with exit(1), not send 00252 * control back to the sigsetjmp block above 00253 */ 00254 ExitOnAnyError = true; 00255 /* Normal exit from the bgwriter is here */ 00256 proc_exit(0); /* done */ 00257 } 00258 00259 /* 00260 * Do one cycle of dirty-buffer writing. 00261 */ 00262 can_hibernate = BgBufferSync(); 00263 00264 /* 00265 * Send off activity statistics to the stats collector 00266 */ 00267 pgstat_send_bgwriter(); 00268 00269 if (FirstCallSinceLastCheckpoint()) 00270 { 00271 /* 00272 * After any checkpoint, close all smgr files. This is so we 00273 * won't hang onto smgr references to deleted files indefinitely. 00274 */ 00275 smgrcloseall(); 00276 } 00277 00278 /* 00279 * Sleep until we are signaled or BgWriterDelay has elapsed. 00280 * 00281 * Note: the feedback control loop in BgBufferSync() expects that we 00282 * will call it every BgWriterDelay msec. While it's not critical for 00283 * correctness that that be exact, the feedback loop might misbehave 00284 * if we stray too far from that. Hence, avoid loading this process 00285 * down with latch events that are likely to happen frequently during 00286 * normal operation. 00287 */ 00288 rc = WaitLatch(&MyProc->procLatch, 00289 WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 00290 BgWriterDelay /* ms */ ); 00291 00292 /* 00293 * If no latch event and BgBufferSync says nothing's happening, extend 00294 * the sleep in "hibernation" mode, where we sleep for much longer 00295 * than bgwriter_delay says. Fewer wakeups save electricity. When a 00296 * backend starts using buffers again, it will wake us up by setting 00297 * our latch. Because the extra sleep will persist only as long as no 00298 * buffer allocations happen, this should not distort the behavior of 00299 * BgBufferSync's control loop too badly; essentially, it will think 00300 * that the system-wide idle interval didn't exist. 00301 * 00302 * There is a race condition here, in that a backend might allocate a 00303 * buffer between the time BgBufferSync saw the alloc count as zero 00304 * and the time we call StrategyNotifyBgWriter. While it's not 00305 * critical that we not hibernate anyway, we try to reduce the odds of 00306 * that by only hibernating when BgBufferSync says nothing's happening 00307 * for two consecutive cycles. Also, we mitigate any possible 00308 * consequences of a missed wakeup by not hibernating forever. 00309 */ 00310 if (rc == WL_TIMEOUT && can_hibernate && prev_hibernate) 00311 { 00312 /* Ask for notification at next buffer allocation */ 00313 StrategyNotifyBgWriter(&MyProc->procLatch); 00314 /* Sleep ... */ 00315 rc = WaitLatch(&MyProc->procLatch, 00316 WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 00317 BgWriterDelay * HIBERNATE_FACTOR); 00318 /* Reset the notification request in case we timed out */ 00319 StrategyNotifyBgWriter(NULL); 00320 } 00321 00322 /* 00323 * Emergency bailout if postmaster has died. This is to avoid the 00324 * necessity for manual cleanup of all postmaster children. 00325 */ 00326 if (rc & WL_POSTMASTER_DEATH) 00327 exit(1); 00328 00329 prev_hibernate = can_hibernate; 00330 } 00331 } 00332 00333 00334 /* -------------------------------- 00335 * signal handler routines 00336 * -------------------------------- 00337 */ 00338 00339 /* 00340 * bg_quickdie() occurs when signalled SIGQUIT by the postmaster. 00341 * 00342 * Some backend has bought the farm, 00343 * so we need to stop what we're doing and exit. 00344 */ 00345 static void 00346 bg_quickdie(SIGNAL_ARGS) 00347 { 00348 PG_SETMASK(&BlockSig); 00349 00350 /* 00351 * We DO NOT want to run proc_exit() callbacks -- we're here because 00352 * shared memory may be corrupted, so we don't want to try to clean up our 00353 * transaction. Just nail the windows shut and get out of town. Now that 00354 * there's an atexit callback to prevent third-party code from breaking 00355 * things by calling exit() directly, we have to reset the callbacks 00356 * explicitly to make this work as intended. 00357 */ 00358 on_exit_reset(); 00359 00360 /* 00361 * Note we do exit(2) not exit(0). This is to force the postmaster into a 00362 * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random 00363 * backend. This is necessary precisely because we don't clean up our 00364 * shared memory state. (The "dead man switch" mechanism in pmsignal.c 00365 * should ensure the postmaster sees this as a crash, too, but no harm in 00366 * being doubly sure.) 00367 */ 00368 exit(2); 00369 } 00370 00371 /* SIGHUP: set flag to re-read config file at next convenient time */ 00372 static void 00373 BgSigHupHandler(SIGNAL_ARGS) 00374 { 00375 int save_errno = errno; 00376 00377 got_SIGHUP = true; 00378 if (MyProc) 00379 SetLatch(&MyProc->procLatch); 00380 00381 errno = save_errno; 00382 } 00383 00384 /* SIGTERM: set flag to shutdown and exit */ 00385 static void 00386 ReqShutdownHandler(SIGNAL_ARGS) 00387 { 00388 int save_errno = errno; 00389 00390 shutdown_requested = true; 00391 if (MyProc) 00392 SetLatch(&MyProc->procLatch); 00393 00394 errno = save_errno; 00395 } 00396 00397 /* SIGUSR1: used for latch wakeups */ 00398 static void 00399 bgwriter_sigusr1_handler(SIGNAL_ARGS) 00400 { 00401 int save_errno = errno; 00402 00403 latch_sigusr1_handler(); 00404 00405 errno = save_errno; 00406 }