00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "postgres_fe.h"
00025
00026 #include <ctype.h>
00027 #include <dirent.h>
00028 #include <sys/stat.h>
00029 #include <fcntl.h>
00030 #include <signal.h>
00031
00032 #ifdef WIN32
00033 int getopt(int argc, char *const argv[], const char *optstring);
00034 #else
00035 #include <sys/time.h>
00036 #include <unistd.h>
00037
00038 #ifdef HAVE_GETOPT_H
00039 #include <getopt.h>
00040 #endif
00041 #endif
00042
00043 extern char *optarg;
00044 extern int optind;
00045
00046 const char *progname;
00047
00048
00049 int sleeptime = 5;
00050 int waittime = -1;
00051
00052 int maxwaittime = 0;
00053 int keepfiles = 0;
00054 int maxretries = 3;
00055 bool debug = false;
00056 bool need_cleanup = false;
00057
00058
00059 #ifndef WIN32
00060 static volatile sig_atomic_t signaled = false;
00061 #endif
00062
00063 char *archiveLocation;
00064 char *triggerPath;
00065 char *xlogFilePath;
00066 char *nextWALFileName;
00067 char *restartWALFileName;
00068 char *priorWALFileName;
00069 char WALFilePath[MAXPGPATH];
00070 char restoreCommand[MAXPGPATH];
00071 char exclusiveCleanupFileName[MAXPGPATH];
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092 #define NoFailover 0
00093 #define SmartFailover 1
00094 #define FastFailover 2
00095
00096 static int Failover = NoFailover;
00097
00098 #define RESTORE_COMMAND_COPY 0
00099 #define RESTORE_COMMAND_LINK 1
00100 int restoreCommandType;
00101
00102 #define XLOG_DATA 0
00103 #define XLOG_HISTORY 1
00104 #define XLOG_BACKUP_LABEL 2
00105 int nextWALFileType;
00106
00107 #define SET_RESTORE_COMMAND(cmd, arg1, arg2) \
00108 snprintf(restoreCommand, MAXPGPATH, cmd " \"%s\" \"%s\"", arg1, arg2)
00109
00110 struct stat stat_buf;
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127 #define XLOG_DATA_FNAME_LEN 24
00128
00129 #define XLogFileName(fname, tli, log, seg) \
00130 snprintf(fname, XLOG_DATA_FNAME_LEN + 1, "%08X%08X%08X", tli, log, seg)
00131
00132
00133
00134
00135
00136
00137
00138 static void
00139 CustomizableInitialize(void)
00140 {
00141 #ifdef WIN32
00142 snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, nextWALFileName);
00143 switch (restoreCommandType)
00144 {
00145 case RESTORE_COMMAND_LINK:
00146 SET_RESTORE_COMMAND("mklink", WALFilePath, xlogFilePath);
00147 break;
00148 case RESTORE_COMMAND_COPY:
00149 default:
00150 SET_RESTORE_COMMAND("copy", WALFilePath, xlogFilePath);
00151 break;
00152 }
00153 #else
00154 snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, nextWALFileName);
00155 switch (restoreCommandType)
00156 {
00157 case RESTORE_COMMAND_LINK:
00158 #if HAVE_WORKING_LINK
00159 SET_RESTORE_COMMAND("ln -s -f", WALFilePath, xlogFilePath);
00160 break;
00161 #endif
00162 case RESTORE_COMMAND_COPY:
00163 default:
00164 SET_RESTORE_COMMAND("cp", WALFilePath, xlogFilePath);
00165 break;
00166 }
00167 #endif
00168
00169
00170
00171
00172
00173
00174 if (stat(archiveLocation, &stat_buf) != 0)
00175 {
00176 fprintf(stderr, "%s: archive location \"%s\" does not exist\n", progname, archiveLocation);
00177 fflush(stderr);
00178 exit(2);
00179 }
00180 }
00181
00182
00183
00184
00185
00186
00187 static bool
00188 CustomizableNextWALFileReady()
00189 {
00190 if (stat(WALFilePath, &stat_buf) == 0)
00191 {
00192
00193
00194
00195
00196 if (strlen(nextWALFileName) > 24 &&
00197 strspn(nextWALFileName, "0123456789ABCDEF") == 24 &&
00198 strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".backup"),
00199 ".backup") == 0)
00200 {
00201 nextWALFileType = XLOG_BACKUP_LABEL;
00202 return true;
00203 }
00204 else if (stat_buf.st_size == XLOG_SEG_SIZE)
00205 {
00206 #ifdef WIN32
00207
00208
00209
00210
00211
00212
00213
00214
00215 pg_usleep(sleeptime * 1000000L);
00216 #endif
00217 nextWALFileType = XLOG_DATA;
00218 return true;
00219 }
00220
00221
00222
00223
00224 if (stat_buf.st_size > XLOG_SEG_SIZE)
00225 {
00226 if (debug)
00227 {
00228 fprintf(stderr, "file size greater than expected\n");
00229 fflush(stderr);
00230 }
00231 exit(3);
00232 }
00233 }
00234
00235 return false;
00236 }
00237
00238 #define MaxSegmentsPerLogFile ( 0xFFFFFFFF / XLOG_SEG_SIZE )
00239
00240 static void
00241 CustomizableCleanupPriorWALFiles(void)
00242 {
00243
00244
00245
00246 if (nextWALFileType == XLOG_DATA)
00247 {
00248 int rc;
00249 DIR *xldir;
00250 struct dirent *xlde;
00251
00252
00253
00254
00255
00256
00257 if ((xldir = opendir(archiveLocation)) != NULL)
00258 {
00259 while ((xlde = readdir(xldir)) != NULL)
00260 {
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275 if (strlen(xlde->d_name) == XLOG_DATA_FNAME_LEN &&
00276 strspn(xlde->d_name, "0123456789ABCDEF") == XLOG_DATA_FNAME_LEN &&
00277 strcmp(xlde->d_name + 8, exclusiveCleanupFileName + 8) < 0)
00278 {
00279 #ifdef WIN32
00280 snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, xlde->d_name);
00281 #else
00282 snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, xlde->d_name);
00283 #endif
00284
00285 if (debug)
00286 fprintf(stderr, "\nremoving file \"%s\"", WALFilePath);
00287
00288 rc = unlink(WALFilePath);
00289 if (rc != 0)
00290 {
00291 fprintf(stderr, "\n%s: ERROR: could not remove file \"%s\": %s\n",
00292 progname, WALFilePath, strerror(errno));
00293 break;
00294 }
00295 }
00296 }
00297 if (debug)
00298 fprintf(stderr, "\n");
00299 }
00300 else
00301 fprintf(stderr, "%s: could not open archive location \"%s\": %s\n",
00302 progname, archiveLocation, strerror(errno));
00303
00304 closedir(xldir);
00305 fflush(stderr);
00306 }
00307 }
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320 static bool
00321 SetWALFileNameForCleanup(void)
00322 {
00323 uint32 tli = 1,
00324 log = 0,
00325 seg = 0;
00326 uint32 log_diff = 0,
00327 seg_diff = 0;
00328 bool cleanup = false;
00329
00330 if (restartWALFileName)
00331 {
00332
00333
00334
00335
00336
00337
00338 if (strcmp(restartWALFileName, nextWALFileName) > 0)
00339 return false;
00340
00341 strcpy(exclusiveCleanupFileName, restartWALFileName);
00342 return true;
00343 }
00344
00345 if (keepfiles > 0)
00346 {
00347 sscanf(nextWALFileName, "%08X%08X%08X", &tli, &log, &seg);
00348 if (tli > 0 && log >= 0 && seg > 0)
00349 {
00350 log_diff = keepfiles / MaxSegmentsPerLogFile;
00351 seg_diff = keepfiles % MaxSegmentsPerLogFile;
00352 if (seg_diff > seg)
00353 {
00354 log_diff++;
00355 seg = MaxSegmentsPerLogFile - (seg_diff - seg);
00356 }
00357 else
00358 seg -= seg_diff;
00359
00360 if (log >= log_diff)
00361 {
00362 log -= log_diff;
00363 cleanup = true;
00364 }
00365 else
00366 {
00367 log = 0;
00368 seg = 0;
00369 }
00370 }
00371 }
00372
00373 XLogFileName(exclusiveCleanupFileName, tli, log, seg);
00374
00375 return cleanup;
00376 }
00377
00378
00379
00380
00381
00382
00383
00384
00385 static void
00386 CheckForExternalTrigger(void)
00387 {
00388 char buf[32];
00389 int fd;
00390 int len;
00391
00392
00393
00394
00395
00396
00397
00398 if (!triggerPath || stat(triggerPath, &stat_buf) != 0)
00399 return;
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409 if (stat_buf.st_size == 0)
00410 {
00411 Failover = SmartFailover;
00412 fprintf(stderr, "trigger file found: smart failover\n");
00413 fflush(stderr);
00414 return;
00415 }
00416
00417 if ((fd = open(triggerPath, O_RDWR, 0)) < 0)
00418 {
00419 fprintf(stderr, "WARNING: could not open \"%s\": %s\n",
00420 triggerPath, strerror(errno));
00421 fflush(stderr);
00422 return;
00423 }
00424
00425 if ((len = read(fd, buf, sizeof(buf))) < 0)
00426 {
00427 fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
00428 triggerPath, strerror(errno));
00429 fflush(stderr);
00430 close(fd);
00431 return;
00432 }
00433 buf[len] = '\0';
00434
00435 if (strncmp(buf, "smart", 5) == 0)
00436 {
00437 Failover = SmartFailover;
00438 fprintf(stderr, "trigger file found: smart failover\n");
00439 fflush(stderr);
00440 close(fd);
00441 return;
00442 }
00443
00444 if (strncmp(buf, "fast", 4) == 0)
00445 {
00446 Failover = FastFailover;
00447
00448 fprintf(stderr, "trigger file found: fast failover\n");
00449 fflush(stderr);
00450
00451
00452
00453
00454
00455
00456 if (ftruncate(fd, 0) < 0)
00457 {
00458 fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
00459 triggerPath, strerror(errno));
00460 fflush(stderr);
00461 }
00462 close(fd);
00463
00464 return;
00465 }
00466 close(fd);
00467
00468 fprintf(stderr, "WARNING: invalid content in \"%s\"\n", triggerPath);
00469 fflush(stderr);
00470 return;
00471 }
00472
00473
00474
00475
00476
00477
00478 static bool
00479 RestoreWALFileForRecovery(void)
00480 {
00481 int rc = 0;
00482 int numretries = 0;
00483
00484 if (debug)
00485 {
00486 fprintf(stderr, "running restore: ");
00487 fflush(stderr);
00488 }
00489
00490 while (numretries <= maxretries)
00491 {
00492 rc = system(restoreCommand);
00493 if (rc == 0)
00494 {
00495 if (debug)
00496 {
00497 fprintf(stderr, "OK\n");
00498 fflush(stderr);
00499 }
00500 return true;
00501 }
00502 pg_usleep(numretries++ * sleeptime * 1000000L);
00503 }
00504
00505
00506
00507
00508 if (debug)
00509 fprintf(stderr, "not restored\n");
00510 return false;
00511 }
00512
00513 static void
00514 usage(void)
00515 {
00516 printf("%s allows PostgreSQL warm standby servers to be configured.\n\n", progname);
00517 printf("Usage:\n");
00518 printf(" %s [OPTION]... ARCHIVELOCATION NEXTWALFILE XLOGFILEPATH [RESTARTWALFILE]\n", progname);
00519 printf("\nOptions:\n");
00520 printf(" -c copy file from archive (default)\n");
00521 printf(" -d generate lots of debugging output (testing only)\n");
00522 printf(" -k NUMFILESTOKEEP if RESTARTWALFILE is not used, remove files prior to limit\n"
00523 " (0 keeps all)\n");
00524 printf(" -l does nothing; use of link is now deprecated\n");
00525 printf(" -r MAXRETRIES max number of times to retry, with progressive wait\n"
00526 " (default=3)\n");
00527 printf(" -s SLEEPTIME seconds to wait between file checks (min=1, max=60,\n"
00528 " default=5)\n");
00529 printf(" -t TRIGGERFILE trigger file to initiate failover (no default)\n");
00530 printf(" -V, --version output version information, then exit\n");
00531 printf(" -w MAXWAITTIME max seconds to wait for a file (0=no limit) (default=0)\n");
00532 printf(" -?, --help show this help, then exit\n");
00533 printf("\n"
00534 "Main intended use as restore_command in recovery.conf:\n"
00535 " restore_command = 'pg_standby [OPTION]... ARCHIVELOCATION %%f %%p %%r'\n"
00536 "e.g.\n"
00537 " restore_command = 'pg_standby /mnt/server/archiverdir %%f %%p %%r'\n");
00538 printf("\nReport bugs to <[email protected]>.\n");
00539 }
00540
00541 #ifndef WIN32
00542 static void
00543 sighandler(int sig)
00544 {
00545 signaled = true;
00546 }
00547
00548
00549 static void
00550 sigquit_handler(int sig)
00551 {
00552 pqsignal(SIGINT, SIG_DFL);
00553 kill(getpid(), SIGINT);
00554 }
00555 #endif
00556
00557
00558 int
00559 main(int argc, char **argv)
00560 {
00561 int c;
00562
00563 progname = get_progname(argv[0]);
00564
00565 if (argc > 1)
00566 {
00567 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
00568 {
00569 usage();
00570 exit(0);
00571 }
00572 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
00573 {
00574 puts("pg_standby (PostgreSQL) " PG_VERSION);
00575 exit(0);
00576 }
00577 }
00578
00579 #ifndef WIN32
00580
00581
00582
00583
00584
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595 (void) pqsignal(SIGUSR1, sighandler);
00596 (void) pqsignal(SIGINT, sighandler);
00597 (void) pqsignal(SIGQUIT, sigquit_handler);
00598 #endif
00599
00600 while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1)
00601 {
00602 switch (c)
00603 {
00604 case 'c':
00605 restoreCommandType = RESTORE_COMMAND_COPY;
00606 break;
00607 case 'd':
00608 debug = true;
00609 break;
00610 case 'k':
00611 keepfiles = atoi(optarg);
00612 if (keepfiles < 0)
00613 {
00614 fprintf(stderr, "%s: -k keepfiles must be >= 0\n", progname);
00615 exit(2);
00616 }
00617 break;
00618 case 'l':
00619
00620
00621
00622
00623
00624
00625 #ifdef NOT_USED
00626 restoreCommandType = RESTORE_COMMAND_LINK;
00627 #endif
00628 break;
00629 case 'r':
00630 maxretries = atoi(optarg);
00631 if (maxretries < 0)
00632 {
00633 fprintf(stderr, "%s: -r maxretries must be >= 0\n", progname);
00634 exit(2);
00635 }
00636 break;
00637 case 's':
00638 sleeptime = atoi(optarg);
00639 if (sleeptime <= 0 || sleeptime > 60)
00640 {
00641 fprintf(stderr, "%s: -s sleeptime incorrectly set\n", progname);
00642 exit(2);
00643 }
00644 break;
00645 case 't':
00646 triggerPath = strdup(optarg);
00647 break;
00648 case 'w':
00649 maxwaittime = atoi(optarg);
00650 if (maxwaittime < 0)
00651 {
00652 fprintf(stderr, "%s: -w maxwaittime incorrectly set\n", progname);
00653 exit(2);
00654 }
00655 break;
00656 default:
00657 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
00658 exit(2);
00659 break;
00660 }
00661 }
00662
00663
00664
00665
00666 if (argc == 1)
00667 {
00668 fprintf(stderr, "%s: not enough command-line arguments\n", progname);
00669 exit(2);
00670 }
00671
00672
00673
00674
00675
00676
00677
00678 if (optind < argc)
00679 {
00680 archiveLocation = argv[optind];
00681 optind++;
00682 }
00683 else
00684 {
00685 fprintf(stderr, "%s: must specify archive location\n", progname);
00686 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
00687 exit(2);
00688 }
00689
00690 if (optind < argc)
00691 {
00692 nextWALFileName = argv[optind];
00693 optind++;
00694 }
00695 else
00696 {
00697 fprintf(stderr, "%s: must specify WAL file name as second non-option argument (use \"%%f\")\n", progname);
00698 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
00699 exit(2);
00700 }
00701
00702 if (optind < argc)
00703 {
00704 xlogFilePath = argv[optind];
00705 optind++;
00706 }
00707 else
00708 {
00709 fprintf(stderr, "%s: must specify xlog destination as third non-option argument (use \"%%p\")\n", progname);
00710 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
00711 exit(2);
00712 }
00713
00714 if (optind < argc)
00715 {
00716 restartWALFileName = argv[optind];
00717 optind++;
00718 }
00719
00720 CustomizableInitialize();
00721
00722 need_cleanup = SetWALFileNameForCleanup();
00723
00724 if (debug)
00725 {
00726 fprintf(stderr, "Trigger file: %s\n", triggerPath ? triggerPath : "<not set>");
00727 fprintf(stderr, "Waiting for WAL file: %s\n", nextWALFileName);
00728 fprintf(stderr, "WAL file path: %s\n", WALFilePath);
00729 fprintf(stderr, "Restoring to: %s\n", xlogFilePath);
00730 fprintf(stderr, "Sleep interval: %d second%s\n",
00731 sleeptime, (sleeptime > 1 ? "s" : " "));
00732 fprintf(stderr, "Max wait interval: %d %s\n",
00733 maxwaittime, (maxwaittime > 0 ? "seconds" : "forever"));
00734 fprintf(stderr, "Command for restore: %s\n", restoreCommand);
00735 fprintf(stderr, "Keep archive history: ");
00736 if (need_cleanup)
00737 fprintf(stderr, "%s and later\n", exclusiveCleanupFileName);
00738 else
00739 fprintf(stderr, "no cleanup required\n");
00740 fflush(stderr);
00741 }
00742
00743
00744
00745
00746
00747 if (strlen(nextWALFileName) > 8 &&
00748 strspn(nextWALFileName, "0123456789ABCDEF") == 8 &&
00749 strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".history"),
00750 ".history") == 0)
00751 {
00752 nextWALFileType = XLOG_HISTORY;
00753 if (RestoreWALFileForRecovery())
00754 exit(0);
00755 else
00756 {
00757 if (debug)
00758 {
00759 fprintf(stderr, "history file not found\n");
00760 fflush(stderr);
00761 }
00762 exit(1);
00763 }
00764 }
00765
00766
00767
00768
00769 for (;;)
00770 {
00771
00772 CheckForExternalTrigger();
00773 #ifndef WIN32
00774 if (signaled)
00775 {
00776 Failover = FastFailover;
00777 if (debug)
00778 {
00779 fprintf(stderr, "signaled to exit: fast failover\n");
00780 fflush(stderr);
00781 }
00782 }
00783 #endif
00784
00785
00786
00787
00788
00789 if (Failover == FastFailover)
00790 exit(1);
00791
00792 if (CustomizableNextWALFileReady())
00793 {
00794
00795
00796
00797
00798
00799
00800 if (RestoreWALFileForRecovery())
00801 {
00802 if (need_cleanup)
00803 CustomizableCleanupPriorWALFiles();
00804
00805 exit(0);
00806 }
00807 else
00808 {
00809
00810 exit(1);
00811 }
00812 }
00813
00814
00815 if (Failover == SmartFailover)
00816 exit(1);
00817
00818 if (sleeptime <= 60)
00819 pg_usleep(sleeptime * 1000000L);
00820
00821 waittime += sleeptime;
00822 if (waittime >= maxwaittime && maxwaittime > 0)
00823 {
00824 Failover = FastFailover;
00825 if (debug)
00826 {
00827 fprintf(stderr, "Timed out after %d seconds: fast failover\n",
00828 waittime);
00829 fflush(stderr);
00830 }
00831 }
00832 if (debug)
00833 {
00834 fprintf(stderr, "WAL file not present yet.");
00835 if (triggerPath)
00836 fprintf(stderr, " Checking for trigger file...");
00837 fprintf(stderr, "\n");
00838 fflush(stderr);
00839 }
00840 }
00841 }