Header And Logo

PostgreSQL
| The world's most advanced open source database.

pg_standby.c

Go to the documentation of this file.
00001 /*
00002  * contrib/pg_standby/pg_standby.c
00003  *
00004  *
00005  * pg_standby.c
00006  *
00007  * Production-ready example of how to create a Warm Standby
00008  * database server using continuous archiving as a
00009  * replication mechanism
00010  *
00011  * We separate the parameters for archive and nextWALfile
00012  * so that we can check the archive exists, even if the
00013  * WAL file doesn't (yet).
00014  *
00015  * This program will be executed once in full for each file
00016  * requested by the warm standby server.
00017  *
00018  * It is designed to cater to a variety of needs, as well
00019  * providing a customizable section.
00020  *
00021  * Original author:     Simon Riggs  [email protected]
00022  * Current maintainer:  Simon Riggs
00023  */
00024 #include "postgres_fe.h"
00025 
00026 #include <ctype.h>
00027 #include <dirent.h>
00028 #include <sys/stat.h>
00029 #include <fcntl.h>
00030 #include <signal.h>
00031 
00032 #ifdef WIN32
00033 int         getopt(int argc, char *const argv[], const char *optstring);
00034 #else
00035 #include <sys/time.h>
00036 #include <unistd.h>
00037 
00038 #ifdef HAVE_GETOPT_H
00039 #include <getopt.h>
00040 #endif
00041 #endif   /* ! WIN32 */
00042 
00043 extern char *optarg;
00044 extern int  optind;
00045 
00046 const char *progname;
00047 
00048 /* Options and defaults */
00049 int         sleeptime = 5;      /* amount of time to sleep between file checks */
00050 int         waittime = -1;      /* how long we have been waiting, -1 no wait
00051                                  * yet */
00052 int         maxwaittime = 0;    /* how long are we prepared to wait for? */
00053 int         keepfiles = 0;      /* number of WAL files to keep, 0 keep all */
00054 int         maxretries = 3;     /* number of retries on restore command */
00055 bool        debug = false;      /* are we debugging? */
00056 bool        need_cleanup = false;       /* do we need to remove files from
00057                                          * archive? */
00058 
00059 #ifndef WIN32
00060 static volatile sig_atomic_t signaled = false;
00061 #endif
00062 
00063 char       *archiveLocation;    /* where to find the archive? */
00064 char       *triggerPath;        /* where to find the trigger file? */
00065 char       *xlogFilePath;       /* where we are going to restore to */
00066 char       *nextWALFileName;    /* the file we need to get from archive */
00067 char       *restartWALFileName; /* the file from which we can restart restore */
00068 char       *priorWALFileName;   /* the file we need to get from archive */
00069 char        WALFilePath[MAXPGPATH];     /* the file path including archive */
00070 char        restoreCommand[MAXPGPATH];  /* run this to restore */
00071 char        exclusiveCleanupFileName[MAXPGPATH];        /* the file we need to
00072                                                          * get from archive */
00073 
00074 /*
00075  * Two types of failover are supported (smart and fast failover).
00076  *
00077  * The content of the trigger file determines the type of failover. If the
00078  * trigger file contains the word "smart" (or the file is empty), smart
00079  * failover is chosen: pg_standby acts as cp or ln command itself, on
00080  * successful completion all the available WAL records will be applied
00081  * resulting in zero data loss. But, it might take a long time to finish
00082  * recovery if there's a lot of unapplied WAL.
00083  *
00084  * On the other hand, if the trigger file contains the word "fast", the
00085  * recovery is finished immediately even if unapplied WAL files remain. Any
00086  * transactions in the unapplied WAL files are lost.
00087  *
00088  * An empty trigger file performs smart failover. SIGUSR or SIGINT triggers
00089  * fast failover. A timeout causes fast failover (smart failover would have
00090  * the same effect, since if the timeout is reached there is no unapplied WAL).
00091  */
00092 #define NoFailover      0
00093 #define SmartFailover   1
00094 #define FastFailover    2
00095 
00096 static int  Failover = NoFailover;
00097 
00098 #define RESTORE_COMMAND_COPY 0
00099 #define RESTORE_COMMAND_LINK 1
00100 int         restoreCommandType;
00101 
00102 #define XLOG_DATA            0
00103 #define XLOG_HISTORY         1
00104 #define XLOG_BACKUP_LABEL    2
00105 int         nextWALFileType;
00106 
00107 #define SET_RESTORE_COMMAND(cmd, arg1, arg2) \
00108     snprintf(restoreCommand, MAXPGPATH, cmd " \"%s\" \"%s\"", arg1, arg2)
00109 
00110 struct stat stat_buf;
00111 
00112 /* =====================================================================
00113  *
00114  *        Customizable section
00115  *
00116  * =====================================================================
00117  *
00118  *  Currently, this section assumes that the Archive is a locally
00119  *  accessible directory. If you want to make other assumptions,
00120  *  such as using a vendor-specific archive and access API, these
00121  *  routines are the ones you'll need to change. You're
00122  *  encouraged to submit any changes to [email protected]
00123  *  or personally to the current maintainer. Those changes may be
00124  *  folded in to later versions of this program.
00125  */
00126 
00127 #define XLOG_DATA_FNAME_LEN     24
00128 /* Reworked from access/xlog_internal.h */
00129 #define XLogFileName(fname, tli, log, seg)  \
00130     snprintf(fname, XLOG_DATA_FNAME_LEN + 1, "%08X%08X%08X", tli, log, seg)
00131 
00132 /*
00133  *  Initialize allows customized commands into the warm standby program.
00134  *
00135  *  As an example, and probably the common case, we use either
00136  *  cp/ln commands on *nix, or copy/move command on Windows.
00137  */
00138 static void
00139 CustomizableInitialize(void)
00140 {
00141 #ifdef WIN32
00142     snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, nextWALFileName);
00143     switch (restoreCommandType)
00144     {
00145         case RESTORE_COMMAND_LINK:
00146             SET_RESTORE_COMMAND("mklink", WALFilePath, xlogFilePath);
00147             break;
00148         case RESTORE_COMMAND_COPY:
00149         default:
00150             SET_RESTORE_COMMAND("copy", WALFilePath, xlogFilePath);
00151             break;
00152     }
00153 #else
00154     snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, nextWALFileName);
00155     switch (restoreCommandType)
00156     {
00157         case RESTORE_COMMAND_LINK:
00158 #if HAVE_WORKING_LINK
00159             SET_RESTORE_COMMAND("ln -s -f", WALFilePath, xlogFilePath);
00160             break;
00161 #endif
00162         case RESTORE_COMMAND_COPY:
00163         default:
00164             SET_RESTORE_COMMAND("cp", WALFilePath, xlogFilePath);
00165             break;
00166     }
00167 #endif
00168 
00169     /*
00170      * This code assumes that archiveLocation is a directory You may wish to
00171      * add code to check for tape libraries, etc.. So, since it is a
00172      * directory, we use stat to test if it's accessible
00173      */
00174     if (stat(archiveLocation, &stat_buf) != 0)
00175     {
00176         fprintf(stderr, "%s: archive location \"%s\" does not exist\n", progname, archiveLocation);
00177         fflush(stderr);
00178         exit(2);
00179     }
00180 }
00181 
00182 /*
00183  * CustomizableNextWALFileReady()
00184  *
00185  *    Is the requested file ready yet?
00186  */
00187 static bool
00188 CustomizableNextWALFileReady()
00189 {
00190     if (stat(WALFilePath, &stat_buf) == 0)
00191     {
00192         /*
00193          * If it's a backup file, return immediately. If it's a regular file
00194          * return only if it's the right size already.
00195          */
00196         if (strlen(nextWALFileName) > 24 &&
00197             strspn(nextWALFileName, "0123456789ABCDEF") == 24 &&
00198         strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".backup"),
00199                ".backup") == 0)
00200         {
00201             nextWALFileType = XLOG_BACKUP_LABEL;
00202             return true;
00203         }
00204         else if (stat_buf.st_size == XLOG_SEG_SIZE)
00205         {
00206 #ifdef WIN32
00207 
00208             /*
00209              * Windows 'cp' sets the final file size before the copy is
00210              * complete, and not yet ready to be opened by pg_standby. So we
00211              * wait for sleeptime secs before attempting to restore. If that
00212              * is not enough, we will rely on the retry/holdoff mechanism.
00213              * GNUWin32's cp does not have this problem.
00214              */
00215             pg_usleep(sleeptime * 1000000L);
00216 #endif
00217             nextWALFileType = XLOG_DATA;
00218             return true;
00219         }
00220 
00221         /*
00222          * If still too small, wait until it is the correct size
00223          */
00224         if (stat_buf.st_size > XLOG_SEG_SIZE)
00225         {
00226             if (debug)
00227             {
00228                 fprintf(stderr, "file size greater than expected\n");
00229                 fflush(stderr);
00230             }
00231             exit(3);
00232         }
00233     }
00234 
00235     return false;
00236 }
00237 
00238 #define MaxSegmentsPerLogFile ( 0xFFFFFFFF / XLOG_SEG_SIZE )
00239 
00240 static void
00241 CustomizableCleanupPriorWALFiles(void)
00242 {
00243     /*
00244      * Work out name of prior file from current filename
00245      */
00246     if (nextWALFileType == XLOG_DATA)
00247     {
00248         int         rc;
00249         DIR        *xldir;
00250         struct dirent *xlde;
00251 
00252         /*
00253          * Assume it's OK to keep failing. The failure situation may change
00254          * over time, so we'd rather keep going on the main processing than
00255          * fail because we couldn't clean up yet.
00256          */
00257         if ((xldir = opendir(archiveLocation)) != NULL)
00258         {
00259             while ((xlde = readdir(xldir)) != NULL)
00260             {
00261                 /*
00262                  * We ignore the timeline part of the XLOG segment identifiers
00263                  * in deciding whether a segment is still needed.  This
00264                  * ensures that we won't prematurely remove a segment from a
00265                  * parent timeline. We could probably be a little more
00266                  * proactive about removing segments of non-parent timelines,
00267                  * but that would be a whole lot more complicated.
00268                  *
00269                  * We use the alphanumeric sorting property of the filenames
00270                  * to decide which ones are earlier than the
00271                  * exclusiveCleanupFileName file. Note that this means files
00272                  * are not removed in the order they were originally written,
00273                  * in case this worries you.
00274                  */
00275                 if (strlen(xlde->d_name) == XLOG_DATA_FNAME_LEN &&
00276                     strspn(xlde->d_name, "0123456789ABCDEF") == XLOG_DATA_FNAME_LEN &&
00277                   strcmp(xlde->d_name + 8, exclusiveCleanupFileName + 8) < 0)
00278                 {
00279 #ifdef WIN32
00280                     snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, xlde->d_name);
00281 #else
00282                     snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, xlde->d_name);
00283 #endif
00284 
00285                     if (debug)
00286                         fprintf(stderr, "\nremoving file \"%s\"", WALFilePath);
00287 
00288                     rc = unlink(WALFilePath);
00289                     if (rc != 0)
00290                     {
00291                         fprintf(stderr, "\n%s: ERROR: could not remove file \"%s\": %s\n",
00292                                 progname, WALFilePath, strerror(errno));
00293                         break;
00294                     }
00295                 }
00296             }
00297             if (debug)
00298                 fprintf(stderr, "\n");
00299         }
00300         else
00301             fprintf(stderr, "%s: could not open archive location \"%s\": %s\n",
00302                     progname, archiveLocation, strerror(errno));
00303 
00304         closedir(xldir);
00305         fflush(stderr);
00306     }
00307 }
00308 
00309 /* =====================================================================
00310  *        End of Customizable section
00311  * =====================================================================
00312  */
00313 
00314 /*
00315  * SetWALFileNameForCleanup()
00316  *
00317  *    Set the earliest WAL filename that we want to keep on the archive
00318  *    and decide whether we need_cleanup
00319  */
00320 static bool
00321 SetWALFileNameForCleanup(void)
00322 {
00323     uint32      tli = 1,
00324                 log = 0,
00325                 seg = 0;
00326     uint32      log_diff = 0,
00327                 seg_diff = 0;
00328     bool        cleanup = false;
00329 
00330     if (restartWALFileName)
00331     {
00332         /*
00333          * Don't do cleanup if the restartWALFileName provided is later than
00334          * the xlog file requested. This is an error and we must not remove
00335          * these files from archive. This shouldn't happen, but better safe
00336          * than sorry.
00337          */
00338         if (strcmp(restartWALFileName, nextWALFileName) > 0)
00339             return false;
00340 
00341         strcpy(exclusiveCleanupFileName, restartWALFileName);
00342         return true;
00343     }
00344 
00345     if (keepfiles > 0)
00346     {
00347         sscanf(nextWALFileName, "%08X%08X%08X", &tli, &log, &seg);
00348         if (tli > 0 && log >= 0 && seg > 0)
00349         {
00350             log_diff = keepfiles / MaxSegmentsPerLogFile;
00351             seg_diff = keepfiles % MaxSegmentsPerLogFile;
00352             if (seg_diff > seg)
00353             {
00354                 log_diff++;
00355                 seg = MaxSegmentsPerLogFile - (seg_diff - seg);
00356             }
00357             else
00358                 seg -= seg_diff;
00359 
00360             if (log >= log_diff)
00361             {
00362                 log -= log_diff;
00363                 cleanup = true;
00364             }
00365             else
00366             {
00367                 log = 0;
00368                 seg = 0;
00369             }
00370         }
00371     }
00372 
00373     XLogFileName(exclusiveCleanupFileName, tli, log, seg);
00374 
00375     return cleanup;
00376 }
00377 
00378 /*
00379  * CheckForExternalTrigger()
00380  *
00381  *    Is there a trigger file? Sets global 'Failover' variable to indicate
00382  *    what kind of a trigger file it was. A "fast" trigger file is turned
00383  *    into a "smart" file as a side-effect.
00384  */
00385 static void
00386 CheckForExternalTrigger(void)
00387 {
00388     char        buf[32];
00389     int         fd;
00390     int         len;
00391 
00392     /*
00393      * Look for a trigger file, if that option has been selected
00394      *
00395      * We use stat() here because triggerPath is always a file rather than
00396      * potentially being in an archive
00397      */
00398     if (!triggerPath || stat(triggerPath, &stat_buf) != 0)
00399         return;
00400 
00401     /*
00402      * An empty trigger file performs smart failover. There's a little race
00403      * condition here: if the writer of the trigger file has just created the
00404      * file, but not yet written anything to it, we'll treat that as smart
00405      * shutdown even if the other process was just about to write "fast" to
00406      * it. But that's fine: we'll restore one more WAL file, and when we're
00407      * invoked next time, we'll see the word "fast" and fail over immediately.
00408      */
00409     if (stat_buf.st_size == 0)
00410     {
00411         Failover = SmartFailover;
00412         fprintf(stderr, "trigger file found: smart failover\n");
00413         fflush(stderr);
00414         return;
00415     }
00416 
00417     if ((fd = open(triggerPath, O_RDWR, 0)) < 0)
00418     {
00419         fprintf(stderr, "WARNING: could not open \"%s\": %s\n",
00420                 triggerPath, strerror(errno));
00421         fflush(stderr);
00422         return;
00423     }
00424 
00425     if ((len = read(fd, buf, sizeof(buf))) < 0)
00426     {
00427         fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
00428                 triggerPath, strerror(errno));
00429         fflush(stderr);
00430         close(fd);
00431         return;
00432     }
00433     buf[len] = '\0';
00434 
00435     if (strncmp(buf, "smart", 5) == 0)
00436     {
00437         Failover = SmartFailover;
00438         fprintf(stderr, "trigger file found: smart failover\n");
00439         fflush(stderr);
00440         close(fd);
00441         return;
00442     }
00443 
00444     if (strncmp(buf, "fast", 4) == 0)
00445     {
00446         Failover = FastFailover;
00447 
00448         fprintf(stderr, "trigger file found: fast failover\n");
00449         fflush(stderr);
00450 
00451         /*
00452          * Turn it into a "smart" trigger by truncating the file. Otherwise if
00453          * the server asks us again to restore a segment that was restored
00454          * already, we would return "not found" and upset the server.
00455          */
00456         if (ftruncate(fd, 0) < 0)
00457         {
00458             fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
00459                     triggerPath, strerror(errno));
00460             fflush(stderr);
00461         }
00462         close(fd);
00463 
00464         return;
00465     }
00466     close(fd);
00467 
00468     fprintf(stderr, "WARNING: invalid content in \"%s\"\n", triggerPath);
00469     fflush(stderr);
00470     return;
00471 }
00472 
00473 /*
00474  * RestoreWALFileForRecovery()
00475  *
00476  *    Perform the action required to restore the file from archive
00477  */
00478 static bool
00479 RestoreWALFileForRecovery(void)
00480 {
00481     int         rc = 0;
00482     int         numretries = 0;
00483 
00484     if (debug)
00485     {
00486         fprintf(stderr, "running restore:      ");
00487         fflush(stderr);
00488     }
00489 
00490     while (numretries <= maxretries)
00491     {
00492         rc = system(restoreCommand);
00493         if (rc == 0)
00494         {
00495             if (debug)
00496             {
00497                 fprintf(stderr, "OK\n");
00498                 fflush(stderr);
00499             }
00500             return true;
00501         }
00502         pg_usleep(numretries++ * sleeptime * 1000000L);
00503     }
00504 
00505     /*
00506      * Allow caller to add additional info
00507      */
00508     if (debug)
00509         fprintf(stderr, "not restored\n");
00510     return false;
00511 }
00512 
00513 static void
00514 usage(void)
00515 {
00516     printf("%s allows PostgreSQL warm standby servers to be configured.\n\n", progname);
00517     printf("Usage:\n");
00518     printf("  %s [OPTION]... ARCHIVELOCATION NEXTWALFILE XLOGFILEPATH [RESTARTWALFILE]\n", progname);
00519     printf("\nOptions:\n");
00520     printf("  -c                 copy file from archive (default)\n");
00521     printf("  -d                 generate lots of debugging output (testing only)\n");
00522     printf("  -k NUMFILESTOKEEP  if RESTARTWALFILE is not used, remove files prior to limit\n"
00523            "                     (0 keeps all)\n");
00524     printf("  -l                 does nothing; use of link is now deprecated\n");
00525     printf("  -r MAXRETRIES      max number of times to retry, with progressive wait\n"
00526            "                     (default=3)\n");
00527     printf("  -s SLEEPTIME       seconds to wait between file checks (min=1, max=60,\n"
00528            "                     default=5)\n");
00529     printf("  -t TRIGGERFILE     trigger file to initiate failover (no default)\n");
00530     printf("  -V, --version      output version information, then exit\n");
00531     printf("  -w MAXWAITTIME     max seconds to wait for a file (0=no limit) (default=0)\n");
00532     printf("  -?, --help         show this help, then exit\n");
00533     printf("\n"
00534            "Main intended use as restore_command in recovery.conf:\n"
00535            "  restore_command = 'pg_standby [OPTION]... ARCHIVELOCATION %%f %%p %%r'\n"
00536            "e.g.\n"
00537     "  restore_command = 'pg_standby /mnt/server/archiverdir %%f %%p %%r'\n");
00538     printf("\nReport bugs to <[email protected]>.\n");
00539 }
00540 
00541 #ifndef WIN32
00542 static void
00543 sighandler(int sig)
00544 {
00545     signaled = true;
00546 }
00547 
00548 /* We don't want SIGQUIT to core dump */
00549 static void
00550 sigquit_handler(int sig)
00551 {
00552     pqsignal(SIGINT, SIG_DFL);
00553     kill(getpid(), SIGINT);
00554 }
00555 #endif
00556 
00557 /*------------ MAIN ----------------------------------------*/
00558 int
00559 main(int argc, char **argv)
00560 {
00561     int         c;
00562 
00563     progname = get_progname(argv[0]);
00564 
00565     if (argc > 1)
00566     {
00567         if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
00568         {
00569             usage();
00570             exit(0);
00571         }
00572         if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
00573         {
00574             puts("pg_standby (PostgreSQL) " PG_VERSION);
00575             exit(0);
00576         }
00577     }
00578 
00579 #ifndef WIN32
00580 
00581     /*
00582      * You can send SIGUSR1 to trigger failover.
00583      *
00584      * Postmaster uses SIGQUIT to request immediate shutdown. The default
00585      * action is to core dump, but we don't want that, so trap it and commit
00586      * suicide without core dump.
00587      *
00588      * We used to use SIGINT and SIGQUIT to trigger failover, but that turned
00589      * out to be a bad idea because postmaster uses SIGQUIT to request
00590      * immediate shutdown. We still trap SIGINT, but that may change in a
00591      * future release.
00592      *
00593      * There's no way to trigger failover via signal on Windows.
00594      */
00595     (void) pqsignal(SIGUSR1, sighandler);
00596     (void) pqsignal(SIGINT, sighandler);    /* deprecated, use SIGUSR1 */
00597     (void) pqsignal(SIGQUIT, sigquit_handler);
00598 #endif
00599 
00600     while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1)
00601     {
00602         switch (c)
00603         {
00604             case 'c':           /* Use copy */
00605                 restoreCommandType = RESTORE_COMMAND_COPY;
00606                 break;
00607             case 'd':           /* Debug mode */
00608                 debug = true;
00609                 break;
00610             case 'k':           /* keepfiles */
00611                 keepfiles = atoi(optarg);
00612                 if (keepfiles < 0)
00613                 {
00614                     fprintf(stderr, "%s: -k keepfiles must be >= 0\n", progname);
00615                     exit(2);
00616                 }
00617                 break;
00618             case 'l':           /* Use link */
00619 
00620                 /*
00621                  * Link feature disabled, possibly permanently. Linking causes
00622                  * a problem after recovery ends that is not currently
00623                  * resolved by PostgreSQL. 25 Jun 2009
00624                  */
00625 #ifdef NOT_USED
00626                 restoreCommandType = RESTORE_COMMAND_LINK;
00627 #endif
00628                 break;
00629             case 'r':           /* Retries */
00630                 maxretries = atoi(optarg);
00631                 if (maxretries < 0)
00632                 {
00633                     fprintf(stderr, "%s: -r maxretries must be >= 0\n", progname);
00634                     exit(2);
00635                 }
00636                 break;
00637             case 's':           /* Sleep time */
00638                 sleeptime = atoi(optarg);
00639                 if (sleeptime <= 0 || sleeptime > 60)
00640                 {
00641                     fprintf(stderr, "%s: -s sleeptime incorrectly set\n", progname);
00642                     exit(2);
00643                 }
00644                 break;
00645             case 't':           /* Trigger file */
00646                 triggerPath = strdup(optarg);
00647                 break;
00648             case 'w':           /* Max wait time */
00649                 maxwaittime = atoi(optarg);
00650                 if (maxwaittime < 0)
00651                 {
00652                     fprintf(stderr, "%s: -w maxwaittime incorrectly set\n", progname);
00653                     exit(2);
00654                 }
00655                 break;
00656             default:
00657                 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
00658                 exit(2);
00659                 break;
00660         }
00661     }
00662 
00663     /*
00664      * Parameter checking - after checking to see if trigger file present
00665      */
00666     if (argc == 1)
00667     {
00668         fprintf(stderr, "%s: not enough command-line arguments\n", progname);
00669         exit(2);
00670     }
00671 
00672     /*
00673      * We will go to the archiveLocation to get nextWALFileName.
00674      * nextWALFileName may not exist yet, which would not be an error, so we
00675      * separate the archiveLocation and nextWALFileName so we can check
00676      * separately whether archiveLocation exists, if not that is an error
00677      */
00678     if (optind < argc)
00679     {
00680         archiveLocation = argv[optind];
00681         optind++;
00682     }
00683     else
00684     {
00685         fprintf(stderr, "%s: must specify archive location\n", progname);
00686         fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
00687         exit(2);
00688     }
00689 
00690     if (optind < argc)
00691     {
00692         nextWALFileName = argv[optind];
00693         optind++;
00694     }
00695     else
00696     {
00697         fprintf(stderr, "%s: must specify WAL file name as second non-option argument (use \"%%f\")\n", progname);
00698         fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
00699         exit(2);
00700     }
00701 
00702     if (optind < argc)
00703     {
00704         xlogFilePath = argv[optind];
00705         optind++;
00706     }
00707     else
00708     {
00709         fprintf(stderr, "%s: must specify xlog destination as third non-option argument (use \"%%p\")\n", progname);
00710         fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
00711         exit(2);
00712     }
00713 
00714     if (optind < argc)
00715     {
00716         restartWALFileName = argv[optind];
00717         optind++;
00718     }
00719 
00720     CustomizableInitialize();
00721 
00722     need_cleanup = SetWALFileNameForCleanup();
00723 
00724     if (debug)
00725     {
00726         fprintf(stderr, "Trigger file:         %s\n", triggerPath ? triggerPath : "<not set>");
00727         fprintf(stderr, "Waiting for WAL file: %s\n", nextWALFileName);
00728         fprintf(stderr, "WAL file path:        %s\n", WALFilePath);
00729         fprintf(stderr, "Restoring to:         %s\n", xlogFilePath);
00730         fprintf(stderr, "Sleep interval:       %d second%s\n",
00731                 sleeptime, (sleeptime > 1 ? "s" : " "));
00732         fprintf(stderr, "Max wait interval:    %d %s\n",
00733                 maxwaittime, (maxwaittime > 0 ? "seconds" : "forever"));
00734         fprintf(stderr, "Command for restore:  %s\n", restoreCommand);
00735         fprintf(stderr, "Keep archive history: ");
00736         if (need_cleanup)
00737             fprintf(stderr, "%s and later\n", exclusiveCleanupFileName);
00738         else
00739             fprintf(stderr, "no cleanup required\n");
00740         fflush(stderr);
00741     }
00742 
00743     /*
00744      * Check for initial history file: always the first file to be requested
00745      * It's OK if the file isn't there - all other files need to wait
00746      */
00747     if (strlen(nextWALFileName) > 8 &&
00748         strspn(nextWALFileName, "0123456789ABCDEF") == 8 &&
00749         strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".history"),
00750                ".history") == 0)
00751     {
00752         nextWALFileType = XLOG_HISTORY;
00753         if (RestoreWALFileForRecovery())
00754             exit(0);
00755         else
00756         {
00757             if (debug)
00758             {
00759                 fprintf(stderr, "history file not found\n");
00760                 fflush(stderr);
00761             }
00762             exit(1);
00763         }
00764     }
00765 
00766     /*
00767      * Main wait loop
00768      */
00769     for (;;)
00770     {
00771         /* Check for trigger file or signal first */
00772         CheckForExternalTrigger();
00773 #ifndef WIN32
00774         if (signaled)
00775         {
00776             Failover = FastFailover;
00777             if (debug)
00778             {
00779                 fprintf(stderr, "signaled to exit: fast failover\n");
00780                 fflush(stderr);
00781             }
00782         }
00783 #endif
00784 
00785         /*
00786          * Check for fast failover immediately, before checking if the
00787          * requested WAL file is available
00788          */
00789         if (Failover == FastFailover)
00790             exit(1);
00791 
00792         if (CustomizableNextWALFileReady())
00793         {
00794             /*
00795              * Once we have restored this file successfully we can remove some
00796              * prior WAL files. If this restore fails we musn't remove any
00797              * file because some of them will be requested again immediately
00798              * after the failed restore, or when we restart recovery.
00799              */
00800             if (RestoreWALFileForRecovery())
00801             {
00802                 if (need_cleanup)
00803                     CustomizableCleanupPriorWALFiles();
00804 
00805                 exit(0);
00806             }
00807             else
00808             {
00809                 /* Something went wrong in copying the file */
00810                 exit(1);
00811             }
00812         }
00813 
00814         /* Check for smart failover if the next WAL file was not available */
00815         if (Failover == SmartFailover)
00816             exit(1);
00817 
00818         if (sleeptime <= 60)
00819             pg_usleep(sleeptime * 1000000L);
00820 
00821         waittime += sleeptime;
00822         if (waittime >= maxwaittime && maxwaittime > 0)
00823         {
00824             Failover = FastFailover;
00825             if (debug)
00826             {
00827                 fprintf(stderr, "Timed out after %d seconds: fast failover\n",
00828                         waittime);
00829                 fflush(stderr);
00830             }
00831         }
00832         if (debug)
00833         {
00834             fprintf(stderr, "WAL file not present yet.");
00835             if (triggerPath)
00836                 fprintf(stderr, " Checking for trigger file...");
00837             fprintf(stderr, "\n");
00838             fflush(stderr);
00839         }
00840     }
00841 }