Header And Logo

PostgreSQL
| The world's most advanced open source database.

bgwriter.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * bgwriter.c
00004  *
00005  * The background writer (bgwriter) is new as of Postgres 8.0.  It attempts
00006  * to keep regular backends from having to write out dirty shared buffers
00007  * (which they would only do when needing to free a shared buffer to read in
00008  * another page).  In the best scenario all writes from shared buffers will
00009  * be issued by the background writer process.  However, regular backends are
00010  * still empowered to issue writes if the bgwriter fails to maintain enough
00011  * clean shared buffers.
00012  *
00013  * As of Postgres 9.2 the bgwriter no longer handles checkpoints.
00014  *
00015  * The bgwriter is started by the postmaster as soon as the startup subprocess
00016  * finishes, or as soon as recovery begins if we are doing archive recovery.
00017  * It remains alive until the postmaster commands it to terminate.
00018  * Normal termination is by SIGTERM, which instructs the bgwriter to exit(0).
00019  * Emergency termination is by SIGQUIT; like any backend, the bgwriter will
00020  * simply abort and exit on SIGQUIT.
00021  *
00022  * If the bgwriter exits unexpectedly, the postmaster treats that the same
00023  * as a backend crash: shared memory may be corrupted, so remaining backends
00024  * should be killed by SIGQUIT and then a recovery cycle started.
00025  *
00026  *
00027  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00028  *
00029  *
00030  * IDENTIFICATION
00031  *    src/backend/postmaster/bgwriter.c
00032  *
00033  *-------------------------------------------------------------------------
00034  */
00035 #include "postgres.h"
00036 
00037 #include <signal.h>
00038 #include <sys/time.h>
00039 #include <time.h>
00040 #include <unistd.h>
00041 
00042 #include "access/xlog.h"
00043 #include "access/xlog_internal.h"
00044 #include "libpq/pqsignal.h"
00045 #include "miscadmin.h"
00046 #include "pgstat.h"
00047 #include "postmaster/bgwriter.h"
00048 #include "storage/bufmgr.h"
00049 #include "storage/buf_internals.h"
00050 #include "storage/fd.h"
00051 #include "storage/ipc.h"
00052 #include "storage/lwlock.h"
00053 #include "storage/proc.h"
00054 #include "storage/shmem.h"
00055 #include "storage/smgr.h"
00056 #include "storage/spin.h"
00057 #include "utils/guc.h"
00058 #include "utils/memutils.h"
00059 #include "utils/resowner.h"
00060 
00061 
00062 /*
00063  * GUC parameters
00064  */
00065 int         BgWriterDelay = 200;
00066 
00067 /*
00068  * Multiplier to apply to BgWriterDelay when we decide to hibernate.
00069  * (Perhaps this needs to be configurable?)
00070  */
00071 #define HIBERNATE_FACTOR            50
00072 
00073 /*
00074  * Flags set by interrupt handlers for later service in the main loop.
00075  */
00076 static volatile sig_atomic_t got_SIGHUP = false;
00077 static volatile sig_atomic_t shutdown_requested = false;
00078 
00079 /* Signal handlers */
00080 
00081 static void bg_quickdie(SIGNAL_ARGS);
00082 static void BgSigHupHandler(SIGNAL_ARGS);
00083 static void ReqShutdownHandler(SIGNAL_ARGS);
00084 static void bgwriter_sigusr1_handler(SIGNAL_ARGS);
00085 
00086 
00087 /*
00088  * Main entry point for bgwriter process
00089  *
00090  * This is invoked from AuxiliaryProcessMain, which has already created the
00091  * basic execution environment, but not enabled signals yet.
00092  */
00093 void
00094 BackgroundWriterMain(void)
00095 {
00096     sigjmp_buf  local_sigjmp_buf;
00097     MemoryContext bgwriter_context;
00098     bool        prev_hibernate;
00099 
00100     /*
00101      * If possible, make this process a group leader, so that the postmaster
00102      * can signal any child processes too.  (bgwriter probably never has any
00103      * child processes, but for consistency we make all postmaster child
00104      * processes do this.)
00105      */
00106 #ifdef HAVE_SETSID
00107     if (setsid() < 0)
00108         elog(FATAL, "setsid() failed: %m");
00109 #endif
00110 
00111     /*
00112      * Properly accept or ignore signals the postmaster might send us.
00113      *
00114      * bgwriter doesn't participate in ProcSignal signalling, but a SIGUSR1
00115      * handler is still needed for latch wakeups.
00116      */
00117     pqsignal(SIGHUP, BgSigHupHandler);  /* set flag to read config file */
00118     pqsignal(SIGINT, SIG_IGN);
00119     pqsignal(SIGTERM, ReqShutdownHandler);      /* shutdown */
00120     pqsignal(SIGQUIT, bg_quickdie);     /* hard crash time */
00121     pqsignal(SIGALRM, SIG_IGN);
00122     pqsignal(SIGPIPE, SIG_IGN);
00123     pqsignal(SIGUSR1, bgwriter_sigusr1_handler);
00124     pqsignal(SIGUSR2, SIG_IGN);
00125 
00126     /*
00127      * Reset some signals that are accepted by postmaster but not here
00128      */
00129     pqsignal(SIGCHLD, SIG_DFL);
00130     pqsignal(SIGTTIN, SIG_DFL);
00131     pqsignal(SIGTTOU, SIG_DFL);
00132     pqsignal(SIGCONT, SIG_DFL);
00133     pqsignal(SIGWINCH, SIG_DFL);
00134 
00135     /* We allow SIGQUIT (quickdie) at all times */
00136     sigdelset(&BlockSig, SIGQUIT);
00137 
00138     /*
00139      * Create a resource owner to keep track of our resources (currently only
00140      * buffer pins).
00141      */
00142     CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer");
00143 
00144     /*
00145      * Create a memory context that we will do all our work in.  We do this so
00146      * that we can reset the context during error recovery and thereby avoid
00147      * possible memory leaks.  Formerly this code just ran in
00148      * TopMemoryContext, but resetting that would be a really bad idea.
00149      */
00150     bgwriter_context = AllocSetContextCreate(TopMemoryContext,
00151                                              "Background Writer",
00152                                              ALLOCSET_DEFAULT_MINSIZE,
00153                                              ALLOCSET_DEFAULT_INITSIZE,
00154                                              ALLOCSET_DEFAULT_MAXSIZE);
00155     MemoryContextSwitchTo(bgwriter_context);
00156 
00157     /*
00158      * If an exception is encountered, processing resumes here.
00159      *
00160      * See notes in postgres.c about the design of this coding.
00161      */
00162     if (sigsetjmp(local_sigjmp_buf, 1) != 0)
00163     {
00164         /* Since not using PG_TRY, must reset error stack by hand */
00165         error_context_stack = NULL;
00166 
00167         /* Prevent interrupts while cleaning up */
00168         HOLD_INTERRUPTS();
00169 
00170         /* Report the error to the server log */
00171         EmitErrorReport();
00172 
00173         /*
00174          * These operations are really just a minimal subset of
00175          * AbortTransaction().  We don't have very many resources to worry
00176          * about in bgwriter, but we do have LWLocks, buffers, and temp files.
00177          */
00178         LWLockReleaseAll();
00179         AbortBufferIO();
00180         UnlockBuffers();
00181         /* buffer pins are released here: */
00182         ResourceOwnerRelease(CurrentResourceOwner,
00183                              RESOURCE_RELEASE_BEFORE_LOCKS,
00184                              false, true);
00185         /* we needn't bother with the other ResourceOwnerRelease phases */
00186         AtEOXact_Buffers(false);
00187         AtEOXact_SMgr();
00188         AtEOXact_Files();
00189         AtEOXact_HashTables(false);
00190 
00191         /*
00192          * Now return to normal top-level context and clear ErrorContext for
00193          * next time.
00194          */
00195         MemoryContextSwitchTo(bgwriter_context);
00196         FlushErrorState();
00197 
00198         /* Flush any leaked data in the top-level context */
00199         MemoryContextResetAndDeleteChildren(bgwriter_context);
00200 
00201         /* Now we can allow interrupts again */
00202         RESUME_INTERRUPTS();
00203 
00204         /*
00205          * Sleep at least 1 second after any error.  A write error is likely
00206          * to be repeated, and we don't want to be filling the error logs as
00207          * fast as we can.
00208          */
00209         pg_usleep(1000000L);
00210 
00211         /*
00212          * Close all open files after any error.  This is helpful on Windows,
00213          * where holding deleted files open causes various strange errors.
00214          * It's not clear we need it elsewhere, but shouldn't hurt.
00215          */
00216         smgrcloseall();
00217     }
00218 
00219     /* We can now handle ereport(ERROR) */
00220     PG_exception_stack = &local_sigjmp_buf;
00221 
00222     /*
00223      * Unblock signals (they were blocked when the postmaster forked us)
00224      */
00225     PG_SETMASK(&UnBlockSig);
00226 
00227     /*
00228      * Reset hibernation state after any error.
00229      */
00230     prev_hibernate = false;
00231 
00232     /*
00233      * Loop forever
00234      */
00235     for (;;)
00236     {
00237         bool        can_hibernate;
00238         int         rc;
00239 
00240         /* Clear any already-pending wakeups */
00241         ResetLatch(&MyProc->procLatch);
00242 
00243         if (got_SIGHUP)
00244         {
00245             got_SIGHUP = false;
00246             ProcessConfigFile(PGC_SIGHUP);
00247         }
00248         if (shutdown_requested)
00249         {
00250             /*
00251              * From here on, elog(ERROR) should end with exit(1), not send
00252              * control back to the sigsetjmp block above
00253              */
00254             ExitOnAnyError = true;
00255             /* Normal exit from the bgwriter is here */
00256             proc_exit(0);       /* done */
00257         }
00258 
00259         /*
00260          * Do one cycle of dirty-buffer writing.
00261          */
00262         can_hibernate = BgBufferSync();
00263 
00264         /*
00265          * Send off activity statistics to the stats collector
00266          */
00267         pgstat_send_bgwriter();
00268 
00269         if (FirstCallSinceLastCheckpoint())
00270         {
00271             /*
00272              * After any checkpoint, close all smgr files.  This is so we
00273              * won't hang onto smgr references to deleted files indefinitely.
00274              */
00275             smgrcloseall();
00276         }
00277 
00278         /*
00279          * Sleep until we are signaled or BgWriterDelay has elapsed.
00280          *
00281          * Note: the feedback control loop in BgBufferSync() expects that we
00282          * will call it every BgWriterDelay msec.  While it's not critical for
00283          * correctness that that be exact, the feedback loop might misbehave
00284          * if we stray too far from that.  Hence, avoid loading this process
00285          * down with latch events that are likely to happen frequently during
00286          * normal operation.
00287          */
00288         rc = WaitLatch(&MyProc->procLatch,
00289                        WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
00290                        BgWriterDelay /* ms */ );
00291 
00292         /*
00293          * If no latch event and BgBufferSync says nothing's happening, extend
00294          * the sleep in "hibernation" mode, where we sleep for much longer
00295          * than bgwriter_delay says.  Fewer wakeups save electricity.  When a
00296          * backend starts using buffers again, it will wake us up by setting
00297          * our latch.  Because the extra sleep will persist only as long as no
00298          * buffer allocations happen, this should not distort the behavior of
00299          * BgBufferSync's control loop too badly; essentially, it will think
00300          * that the system-wide idle interval didn't exist.
00301          *
00302          * There is a race condition here, in that a backend might allocate a
00303          * buffer between the time BgBufferSync saw the alloc count as zero
00304          * and the time we call StrategyNotifyBgWriter.  While it's not
00305          * critical that we not hibernate anyway, we try to reduce the odds of
00306          * that by only hibernating when BgBufferSync says nothing's happening
00307          * for two consecutive cycles.  Also, we mitigate any possible
00308          * consequences of a missed wakeup by not hibernating forever.
00309          */
00310         if (rc == WL_TIMEOUT && can_hibernate && prev_hibernate)
00311         {
00312             /* Ask for notification at next buffer allocation */
00313             StrategyNotifyBgWriter(&MyProc->procLatch);
00314             /* Sleep ... */
00315             rc = WaitLatch(&MyProc->procLatch,
00316                            WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
00317                            BgWriterDelay * HIBERNATE_FACTOR);
00318             /* Reset the notification request in case we timed out */
00319             StrategyNotifyBgWriter(NULL);
00320         }
00321 
00322         /*
00323          * Emergency bailout if postmaster has died.  This is to avoid the
00324          * necessity for manual cleanup of all postmaster children.
00325          */
00326         if (rc & WL_POSTMASTER_DEATH)
00327             exit(1);
00328 
00329         prev_hibernate = can_hibernate;
00330     }
00331 }
00332 
00333 
00334 /* --------------------------------
00335  *      signal handler routines
00336  * --------------------------------
00337  */
00338 
00339 /*
00340  * bg_quickdie() occurs when signalled SIGQUIT by the postmaster.
00341  *
00342  * Some backend has bought the farm,
00343  * so we need to stop what we're doing and exit.
00344  */
00345 static void
00346 bg_quickdie(SIGNAL_ARGS)
00347 {
00348     PG_SETMASK(&BlockSig);
00349 
00350     /*
00351      * We DO NOT want to run proc_exit() callbacks -- we're here because
00352      * shared memory may be corrupted, so we don't want to try to clean up our
00353      * transaction.  Just nail the windows shut and get out of town.  Now that
00354      * there's an atexit callback to prevent third-party code from breaking
00355      * things by calling exit() directly, we have to reset the callbacks
00356      * explicitly to make this work as intended.
00357      */
00358     on_exit_reset();
00359 
00360     /*
00361      * Note we do exit(2) not exit(0).  This is to force the postmaster into a
00362      * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
00363      * backend.  This is necessary precisely because we don't clean up our
00364      * shared memory state.  (The "dead man switch" mechanism in pmsignal.c
00365      * should ensure the postmaster sees this as a crash, too, but no harm in
00366      * being doubly sure.)
00367      */
00368     exit(2);
00369 }
00370 
00371 /* SIGHUP: set flag to re-read config file at next convenient time */
00372 static void
00373 BgSigHupHandler(SIGNAL_ARGS)
00374 {
00375     int         save_errno = errno;
00376 
00377     got_SIGHUP = true;
00378     if (MyProc)
00379         SetLatch(&MyProc->procLatch);
00380 
00381     errno = save_errno;
00382 }
00383 
00384 /* SIGTERM: set flag to shutdown and exit */
00385 static void
00386 ReqShutdownHandler(SIGNAL_ARGS)
00387 {
00388     int         save_errno = errno;
00389 
00390     shutdown_requested = true;
00391     if (MyProc)
00392         SetLatch(&MyProc->procLatch);
00393 
00394     errno = save_errno;
00395 }
00396 
00397 /* SIGUSR1: used for latch wakeups */
00398 static void
00399 bgwriter_sigusr1_handler(SIGNAL_ARGS)
00400 {
00401     int         save_errno = errno;
00402 
00403     latch_sigusr1_handler();
00404 
00405     errno = save_errno;
00406 }