00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include "postgres.h"
00016
00017 #include <ctype.h>
00018 #include <time.h>
00019 #include <fcntl.h>
00020 #include <sys/stat.h>
00021 #include <sys/time.h>
00022 #include <unistd.h>
00023
00024 #include "access/clog.h"
00025 #include "access/multixact.h"
00026 #include "access/subtrans.h"
00027 #include "access/timeline.h"
00028 #include "access/transam.h"
00029 #include "access/tuptoaster.h"
00030 #include "access/twophase.h"
00031 #include "access/xact.h"
00032 #include "access/xlog_internal.h"
00033 #include "access/xlogreader.h"
00034 #include "access/xlogutils.h"
00035 #include "catalog/catversion.h"
00036 #include "catalog/pg_control.h"
00037 #include "catalog/pg_database.h"
00038 #include "miscadmin.h"
00039 #include "pgstat.h"
00040 #include "postmaster/bgwriter.h"
00041 #include "postmaster/startup.h"
00042 #include "replication/walreceiver.h"
00043 #include "replication/walsender.h"
00044 #include "storage/bufmgr.h"
00045 #include "storage/fd.h"
00046 #include "storage/ipc.h"
00047 #include "storage/latch.h"
00048 #include "storage/pmsignal.h"
00049 #include "storage/predicate.h"
00050 #include "storage/proc.h"
00051 #include "storage/procarray.h"
00052 #include "storage/reinit.h"
00053 #include "storage/smgr.h"
00054 #include "storage/spin.h"
00055 #include "utils/builtins.h"
00056 #include "utils/guc.h"
00057 #include "utils/ps_status.h"
00058 #include "utils/relmapper.h"
00059 #include "utils/snapmgr.h"
00060 #include "utils/timestamp.h"
00061 #include "pg_trace.h"
00062
00063 extern uint32 bootstrap_data_checksum_version;
00064
00065
00066 #define RECOVERY_COMMAND_FILE "recovery.conf"
00067 #define RECOVERY_COMMAND_DONE "recovery.done"
00068 #define PROMOTE_SIGNAL_FILE "promote"
00069 #define FAST_PROMOTE_SIGNAL_FILE "fast_promote"
00070
00071
00072
00073 int CheckPointSegments = 3;
00074 int wal_keep_segments = 0;
00075 int XLOGbuffers = -1;
00076 int XLogArchiveTimeout = 0;
00077 bool XLogArchiveMode = false;
00078 char *XLogArchiveCommand = NULL;
00079 bool EnableHotStandby = false;
00080 bool fullPageWrites = true;
00081 bool log_checkpoints = false;
00082 int sync_method = DEFAULT_SYNC_METHOD;
00083 int wal_level = WAL_LEVEL_MINIMAL;
00084 int CommitDelay = 0;
00085 int CommitSiblings = 5;
00086
00087 #ifdef WAL_DEBUG
00088 bool XLOG_DEBUG = false;
00089 #endif
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102 #define XLOGfileslop (2*CheckPointSegments + 1)
00103
00104
00105
00106
00107
00108 const struct config_enum_entry sync_method_options[] = {
00109 {"fsync", SYNC_METHOD_FSYNC, false},
00110 #ifdef HAVE_FSYNC_WRITETHROUGH
00111 {"fsync_writethrough", SYNC_METHOD_FSYNC_WRITETHROUGH, false},
00112 #endif
00113 #ifdef HAVE_FDATASYNC
00114 {"fdatasync", SYNC_METHOD_FDATASYNC, false},
00115 #endif
00116 #ifdef OPEN_SYNC_FLAG
00117 {"open_sync", SYNC_METHOD_OPEN, false},
00118 #endif
00119 #ifdef OPEN_DATASYNC_FLAG
00120 {"open_datasync", SYNC_METHOD_OPEN_DSYNC, false},
00121 #endif
00122 {NULL, 0, false}
00123 };
00124
00125
00126
00127
00128
00129
00130 CheckpointStatsData CheckpointStats;
00131
00132
00133
00134
00135
00136 TimeLineID ThisTimeLineID = 0;
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149 bool InRecovery = false;
00150
00151
00152 HotStandbyState standbyState = STANDBY_DISABLED;
00153
00154 static XLogRecPtr LastRec;
00155
00156
00157 static XLogRecPtr receivedUpto = 0;
00158 static TimeLineID receiveTLI = 0;
00159
00160
00161
00162
00163
00164
00165
00166 static bool lastFullPageWrites;
00167
00168
00169
00170
00171
00172 static bool LocalRecoveryInProgress = true;
00173
00174
00175
00176
00177
00178 static bool LocalHotStandbyActive = false;
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190 static int LocalXLogInsertAllowed = -1;
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203 bool ArchiveRecoveryRequested = false;
00204 bool InArchiveRecovery = false;
00205
00206
00207 static bool restoredFromArchive = false;
00208
00209
00210 char *recoveryRestoreCommand = NULL;
00211 static char *recoveryEndCommand = NULL;
00212 static char *archiveCleanupCommand = NULL;
00213 static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET;
00214 static bool recoveryTargetInclusive = true;
00215 static bool recoveryPauseAtTarget = true;
00216 static TransactionId recoveryTargetXid;
00217 static TimestampTz recoveryTargetTime;
00218 static char *recoveryTargetName;
00219
00220
00221 static bool StandbyModeRequested = false;
00222 static char *PrimaryConnInfo = NULL;
00223 static char *TriggerFile = NULL;
00224
00225
00226 bool StandbyMode = false;
00227
00228
00229 static bool fast_promote = false;
00230
00231
00232 static TransactionId recoveryStopXid;
00233 static TimestampTz recoveryStopTime;
00234 static char recoveryStopName[MAXFNAMELEN];
00235 static bool recoveryStopAfter;
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261 static TimeLineID recoveryTargetTLI;
00262 static bool recoveryTargetIsLatest = false;
00263 static List *expectedTLEs;
00264 static TimeLineID curFileTLI;
00265
00266
00267
00268
00269
00270
00271
00272
00273 static XLogRecPtr ProcLastRecPtr = InvalidXLogRecPtr;
00274
00275 XLogRecPtr XactLastRecEnd = InvalidXLogRecPtr;
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287 static XLogRecPtr RedoRecPtr;
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299 static XLogRecPtr RedoStartLSN = InvalidXLogRecPtr;
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339 typedef struct XLogwrtRqst
00340 {
00341 XLogRecPtr Write;
00342 XLogRecPtr Flush;
00343 } XLogwrtRqst;
00344
00345 typedef struct XLogwrtResult
00346 {
00347 XLogRecPtr Write;
00348 XLogRecPtr Flush;
00349 } XLogwrtResult;
00350
00351
00352
00353
00354 typedef struct XLogCtlInsert
00355 {
00356 XLogRecPtr PrevRecord;
00357 int curridx;
00358 XLogPageHeader currpage;
00359 char *currpos;
00360 XLogRecPtr RedoRecPtr;
00361 bool forcePageWrites;
00362
00363
00364
00365
00366
00367
00368
00369
00370 bool fullPageWrites;
00371
00372
00373
00374
00375
00376
00377
00378
00379 bool exclusiveBackup;
00380 int nonExclusiveBackups;
00381 XLogRecPtr lastBackupStart;
00382 } XLogCtlInsert;
00383
00384
00385
00386
00387 typedef struct XLogCtlWrite
00388 {
00389 int curridx;
00390 pg_time_t lastSegSwitchTime;
00391 } XLogCtlWrite;
00392
00393
00394
00395
00396 typedef struct XLogCtlData
00397 {
00398
00399 XLogCtlInsert Insert;
00400
00401
00402 XLogwrtRqst LogwrtRqst;
00403 uint32 ckptXidEpoch;
00404 TransactionId ckptXid;
00405 XLogRecPtr asyncXactLSN;
00406 XLogSegNo lastRemovedSegNo;
00407
00408
00409 XLogRecPtr unloggedLSN;
00410 slock_t ulsn_lck;
00411
00412
00413 XLogCtlWrite Write;
00414
00415
00416
00417
00418
00419 XLogwrtResult LogwrtResult;
00420
00421
00422
00423
00424
00425
00426 char *pages;
00427 XLogRecPtr *xlblocks;
00428 int XLogCacheBlck;
00429
00430
00431
00432
00433
00434
00435
00436 TimeLineID ThisTimeLineID;
00437 TimeLineID PrevTimeLineID;
00438
00439
00440
00441
00442
00443 char archiveCleanupCommand[MAXPGPATH];
00444
00445
00446
00447
00448
00449 bool SharedRecoveryInProgress;
00450
00451
00452
00453
00454
00455 bool SharedHotStandbyActive;
00456
00457
00458
00459
00460
00461
00462 bool WalWriterSleeping;
00463
00464
00465
00466
00467
00468
00469 Latch recoveryWakeupLatch;
00470
00471
00472
00473
00474
00475
00476
00477 XLogRecPtr lastCheckPointRecPtr;
00478 CheckPoint lastCheckPoint;
00479
00480
00481
00482
00483
00484
00485
00486 XLogRecPtr lastReplayedEndRecPtr;
00487 TimeLineID lastReplayedTLI;
00488 XLogRecPtr replayEndRecPtr;
00489 TimeLineID replayEndTLI;
00490
00491 TimestampTz recoveryLastXTime;
00492
00493 TimeLineID RecoveryTargetTLI;
00494
00495
00496
00497
00498
00499 TimestampTz currentChunkStartTime;
00500
00501 bool recoveryPause;
00502
00503
00504
00505
00506
00507 XLogRecPtr lastFpwDisableRecPtr;
00508
00509 slock_t info_lck;
00510 } XLogCtlData;
00511
00512 static XLogCtlData *XLogCtl = NULL;
00513
00514
00515
00516
00517 static ControlFileData *ControlFile = NULL;
00518
00519
00520
00521
00522
00523
00524
00525
00526 #define INSERT_FREESPACE(Insert) \
00527 (XLOG_BLCKSZ - ((Insert)->currpos - (char *) (Insert)->currpage))
00528
00529
00530 #define INSERT_RECPTR(recptr,Insert,curridx) \
00531 (recptr) = XLogCtl->xlblocks[curridx] - INSERT_FREESPACE(Insert)
00532
00533 #define PrevBufIdx(idx) \
00534 (((idx) == 0) ? XLogCtl->XLogCacheBlck : ((idx) - 1))
00535
00536 #define NextBufIdx(idx) \
00537 (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
00538
00539
00540
00541
00542
00543 static XLogwrtResult LogwrtResult = {0, 0};
00544
00545
00546
00547
00548
00549 typedef enum
00550 {
00551 XLOG_FROM_ANY = 0,
00552 XLOG_FROM_ARCHIVE,
00553 XLOG_FROM_PG_XLOG,
00554 XLOG_FROM_STREAM,
00555 } XLogSource;
00556
00557
00558 static const char *xlogSourceNames[] = { "any", "archive", "pg_xlog", "stream" };
00559
00560
00561
00562
00563
00564
00565
00566 static int openLogFile = -1;
00567 static XLogSegNo openLogSegNo = 0;
00568 static uint32 openLogOff = 0;
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578 static int readFile = -1;
00579 static XLogSegNo readSegNo = 0;
00580 static uint32 readOff = 0;
00581 static uint32 readLen = 0;
00582 static XLogSource readSource = 0;
00583
00584
00585
00586
00587
00588
00589
00590
00591 static XLogSource currentSource = 0;
00592 static bool lastSourceFailed = false;
00593
00594 typedef struct XLogPageReadPrivate
00595 {
00596 int emode;
00597 bool fetching_ckpt;
00598 bool randAccess;
00599 } XLogPageReadPrivate;
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609 static TimestampTz XLogReceiptTime = 0;
00610 static XLogSource XLogReceiptSource = 0;
00611
00612
00613 static XLogRecPtr ReadRecPtr;
00614 static XLogRecPtr EndRecPtr;
00615
00616 static XLogRecPtr minRecoveryPoint;
00617
00618 static TimeLineID minRecoveryPointTLI;
00619 static bool updateMinRecoveryPoint = true;
00620
00621
00622
00623
00624
00625
00626 bool reachedConsistency = false;
00627
00628 static bool InRedo = false;
00629
00630
00631 static bool bgwriterLaunched = false;
00632
00633
00634 static void readRecoveryCommandFile(void);
00635 static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo);
00636 static bool recoveryStopsHere(XLogRecord *record, bool *includeThis);
00637 static void recoveryPausesHere(void);
00638 static void SetLatestXTime(TimestampTz xtime);
00639 static void SetCurrentChunkStartTime(TimestampTz xtime);
00640 static void CheckRequiredParameterValues(void);
00641 static void XLogReportParameters(void);
00642 static void checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI,
00643 TimeLineID prevTLI);
00644 static void LocalSetXLogInsertAllowed(void);
00645 static void CreateEndOfRecoveryRecord(void);
00646 static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags);
00647 static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo);
00648
00649 static bool XLogCheckBuffer(XLogRecData *rdata, bool holdsExclusiveLock,
00650 XLogRecPtr *lsn, BkpBlock *bkpb);
00651 static Buffer RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb,
00652 char *blk, bool get_cleanup_lock, bool keep_buffer);
00653 static bool AdvanceXLInsertBuffer(bool new_segment);
00654 static bool XLogCheckpointNeeded(XLogSegNo new_segno);
00655 static void XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch);
00656 static bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
00657 bool find_free, int *max_advance,
00658 bool use_lock);
00659 static int XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
00660 int source, bool notexistOk);
00661 static int XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source);
00662 static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
00663 int reqLen, XLogRecPtr targetRecPtr, char *readBuf,
00664 TimeLineID *readTLI);
00665 static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
00666 bool fetching_ckpt, XLogRecPtr tliRecPtr);
00667 static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
00668 static void XLogFileClose(void);
00669 static void PreallocXlogFiles(XLogRecPtr endptr);
00670 static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr endptr);
00671 static void UpdateLastRemovedPtr(char *filename);
00672 static void ValidateXLOGDirectoryStructure(void);
00673 static void CleanupBackupHistory(void);
00674 static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
00675 static XLogRecord *ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
00676 int emode, bool fetching_ckpt);
00677 static void CheckRecoveryConsistency(void);
00678 static XLogRecord *ReadCheckpointRecord(XLogReaderState *xlogreader,
00679 XLogRecPtr RecPtr, int whichChkpti, bool report);
00680 static bool rescanLatestTimeLine(void);
00681 static void WriteControlFile(void);
00682 static void ReadControlFile(void);
00683 static char *str_time(pg_time_t tnow);
00684 static bool CheckForStandbyTrigger(void);
00685
00686 #ifdef WAL_DEBUG
00687 static void xlog_outrec(StringInfo buf, XLogRecord *record);
00688 #endif
00689 static void pg_start_backup_callback(int code, Datum arg);
00690 static bool read_backup_label(XLogRecPtr *checkPointLoc,
00691 bool *backupEndRequired, bool *backupFromStandby);
00692 static void rm_redo_error_callback(void *arg);
00693 static int get_sync_bit(int method);
00694
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704
00705
00706
00707
00708
00709
00710
00711 XLogRecPtr
00712 XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
00713 {
00714 XLogCtlInsert *Insert = &XLogCtl->Insert;
00715 XLogRecPtr RecPtr;
00716 XLogRecPtr WriteRqst;
00717 uint32 freespace;
00718 int curridx;
00719 XLogRecData *rdt;
00720 XLogRecData *rdt_lastnormal;
00721 Buffer dtbuf[XLR_MAX_BKP_BLOCKS];
00722 bool dtbuf_bkp[XLR_MAX_BKP_BLOCKS];
00723 BkpBlock dtbuf_xlg[XLR_MAX_BKP_BLOCKS];
00724 XLogRecPtr dtbuf_lsn[XLR_MAX_BKP_BLOCKS];
00725 XLogRecData dtbuf_rdt1[XLR_MAX_BKP_BLOCKS];
00726 XLogRecData dtbuf_rdt2[XLR_MAX_BKP_BLOCKS];
00727 XLogRecData dtbuf_rdt3[XLR_MAX_BKP_BLOCKS];
00728 XLogRecData hdr_rdt;
00729 pg_crc32 rdata_crc;
00730 uint32 len,
00731 write_len;
00732 unsigned i;
00733 bool updrqst;
00734 bool doPageWrites;
00735 bool isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);
00736 uint8 info_orig = info;
00737 static XLogRecord *rechdr;
00738
00739 if (rechdr == NULL)
00740 {
00741 rechdr = malloc(SizeOfXLogRecord);
00742 if (rechdr == NULL)
00743 elog(ERROR, "out of memory");
00744 MemSet(rechdr, 0, SizeOfXLogRecord);
00745 }
00746
00747
00748 if (!XLogInsertAllowed())
00749 elog(ERROR, "cannot make new WAL entries during recovery");
00750
00751
00752 if (info & XLR_INFO_MASK)
00753 elog(PANIC, "invalid xlog info mask %02X", info);
00754
00755 TRACE_POSTGRESQL_XLOG_INSERT(rmid, info);
00756
00757
00758
00759
00760
00761 if (IsBootstrapProcessingMode() && rmid != RM_XLOG_ID)
00762 {
00763 RecPtr = SizeOfXLogLongPHD;
00764 return RecPtr;
00765 }
00766
00767
00768
00769
00770
00771
00772
00773
00774
00775
00776
00777
00778
00779
00780
00781 begin:;
00782 for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
00783 {
00784 dtbuf[i] = InvalidBuffer;
00785 dtbuf_bkp[i] = false;
00786 }
00787
00788
00789
00790
00791
00792
00793
00794 doPageWrites = Insert->fullPageWrites || Insert->forcePageWrites;
00795
00796 len = 0;
00797 for (rdt = rdata;;)
00798 {
00799 if (rdt->buffer == InvalidBuffer)
00800 {
00801
00802 len += rdt->len;
00803 }
00804 else
00805 {
00806
00807 for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
00808 {
00809 if (rdt->buffer == dtbuf[i])
00810 {
00811
00812 if (dtbuf_bkp[i])
00813 {
00814 rdt->data = NULL;
00815 rdt->len = 0;
00816 }
00817 else if (rdt->data)
00818 len += rdt->len;
00819 break;
00820 }
00821 if (dtbuf[i] == InvalidBuffer)
00822 {
00823
00824 dtbuf[i] = rdt->buffer;
00825 if (doPageWrites && XLogCheckBuffer(rdt, true,
00826 &(dtbuf_lsn[i]), &(dtbuf_xlg[i])))
00827 {
00828 dtbuf_bkp[i] = true;
00829 rdt->data = NULL;
00830 rdt->len = 0;
00831 }
00832 else if (rdt->data)
00833 len += rdt->len;
00834 break;
00835 }
00836 }
00837 if (i >= XLR_MAX_BKP_BLOCKS)
00838 elog(PANIC, "can backup at most %d blocks per xlog record",
00839 XLR_MAX_BKP_BLOCKS);
00840 }
00841
00842 if (rdt->next == NULL)
00843 break;
00844 rdt = rdt->next;
00845 }
00846
00847
00848
00849
00850
00851
00852
00853
00854 if (len == 0 && !isLogSwitch)
00855 elog(PANIC, "invalid xlog record length %u", len);
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870 rdt_lastnormal = rdt;
00871 write_len = len;
00872 for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
00873 {
00874 BkpBlock *bkpb;
00875 char *page;
00876
00877 if (!dtbuf_bkp[i])
00878 continue;
00879
00880 info |= XLR_BKP_BLOCK(i);
00881
00882 bkpb = &(dtbuf_xlg[i]);
00883 page = (char *) BufferGetBlock(dtbuf[i]);
00884
00885 rdt->next = &(dtbuf_rdt1[i]);
00886 rdt = rdt->next;
00887
00888 rdt->data = (char *) bkpb;
00889 rdt->len = sizeof(BkpBlock);
00890 write_len += sizeof(BkpBlock);
00891
00892 rdt->next = &(dtbuf_rdt2[i]);
00893 rdt = rdt->next;
00894
00895 if (bkpb->hole_length == 0)
00896 {
00897 rdt->data = page;
00898 rdt->len = BLCKSZ;
00899 write_len += BLCKSZ;
00900 rdt->next = NULL;
00901 }
00902 else
00903 {
00904
00905 rdt->data = page;
00906 rdt->len = bkpb->hole_offset;
00907 write_len += bkpb->hole_offset;
00908
00909 rdt->next = &(dtbuf_rdt3[i]);
00910 rdt = rdt->next;
00911
00912 rdt->data = page + (bkpb->hole_offset + bkpb->hole_length);
00913 rdt->len = BLCKSZ - (bkpb->hole_offset + bkpb->hole_length);
00914 write_len += rdt->len;
00915 rdt->next = NULL;
00916 }
00917 }
00918
00919
00920
00921
00922
00923
00924
00925
00926
00927 INIT_CRC32(rdata_crc);
00928 for (rdt = rdata; rdt != NULL; rdt = rdt->next)
00929 COMP_CRC32(rdata_crc, rdt->data, rdt->len);
00930
00931
00932
00933
00934
00935 rechdr->xl_xid = GetCurrentTransactionIdIfAny();
00936 rechdr->xl_tot_len = SizeOfXLogRecord + write_len;
00937 rechdr->xl_len = len;
00938 rechdr->xl_info = info;
00939 rechdr->xl_rmid = rmid;
00940
00941 hdr_rdt.next = rdata;
00942 hdr_rdt.data = (char *) rechdr;
00943 hdr_rdt.len = SizeOfXLogRecord;
00944
00945 write_len += SizeOfXLogRecord;
00946
00947 START_CRIT_SECTION();
00948
00949
00950 LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
00951
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961 if (RedoRecPtr != Insert->RedoRecPtr)
00962 {
00963 Assert(RedoRecPtr < Insert->RedoRecPtr);
00964 RedoRecPtr = Insert->RedoRecPtr;
00965
00966 if (doPageWrites)
00967 {
00968 for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
00969 {
00970 if (dtbuf[i] == InvalidBuffer)
00971 continue;
00972 if (dtbuf_bkp[i] == false &&
00973 dtbuf_lsn[i] <= RedoRecPtr)
00974 {
00975
00976
00977
00978
00979 LWLockRelease(WALInsertLock);
00980 END_CRIT_SECTION();
00981 rdt_lastnormal->next = NULL;
00982 info = info_orig;
00983 goto begin;
00984 }
00985 }
00986 }
00987 }
00988
00989
00990
00991
00992
00993
00994
00995 if ((Insert->fullPageWrites || Insert->forcePageWrites) && !doPageWrites)
00996 {
00997
00998 LWLockRelease(WALInsertLock);
00999 END_CRIT_SECTION();
01000 rdt_lastnormal->next = NULL;
01001 info = info_orig;
01002 goto begin;
01003 }
01004
01005
01006
01007
01008
01009 updrqst = false;
01010 freespace = INSERT_FREESPACE(Insert);
01011 if (freespace == 0)
01012 {
01013 updrqst = AdvanceXLInsertBuffer(false);
01014 freespace = INSERT_FREESPACE(Insert);
01015 }
01016
01017
01018 curridx = Insert->curridx;
01019 INSERT_RECPTR(RecPtr, Insert, curridx);
01020
01021
01022
01023
01024
01025
01026
01027
01028 if (isLogSwitch && (RecPtr % XLogSegSize) == SizeOfXLogLongPHD)
01029 {
01030
01031 LWLockRelease(WALInsertLock);
01032
01033 RecPtr -= SizeOfXLogLongPHD;
01034
01035 LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
01036 LogwrtResult = XLogCtl->LogwrtResult;
01037 if (LogwrtResult.Flush < RecPtr)
01038 {
01039 XLogwrtRqst FlushRqst;
01040
01041 FlushRqst.Write = RecPtr;
01042 FlushRqst.Flush = RecPtr;
01043 XLogWrite(FlushRqst, false, false);
01044 }
01045 LWLockRelease(WALWriteLock);
01046
01047 END_CRIT_SECTION();
01048
01049
01050 WalSndWakeupProcessRequests();
01051 return RecPtr;
01052 }
01053
01054
01055 rechdr->xl_prev = Insert->PrevRecord;
01056
01057
01058 COMP_CRC32(rdata_crc, (char *) rechdr, offsetof(XLogRecord, xl_crc));
01059 FIN_CRC32(rdata_crc);
01060 rechdr->xl_crc = rdata_crc;
01061
01062 #ifdef WAL_DEBUG
01063 if (XLOG_DEBUG)
01064 {
01065 StringInfoData buf;
01066
01067 initStringInfo(&buf);
01068 appendStringInfo(&buf, "INSERT @ %X/%X: ",
01069 (uint32) (RecPtr >> 32), (uint32) RecPtr);
01070 xlog_outrec(&buf, rechdr);
01071 if (rdata->data != NULL)
01072 {
01073 appendStringInfo(&buf, " - ");
01074 RmgrTable[rechdr->xl_rmid].rm_desc(&buf, rechdr->xl_info, rdata->data);
01075 }
01076 elog(LOG, "%s", buf.data);
01077 pfree(buf.data);
01078 }
01079 #endif
01080
01081
01082 ProcLastRecPtr = RecPtr;
01083 Insert->PrevRecord = RecPtr;
01084
01085
01086
01087
01088 rdata = &hdr_rdt;
01089 while (write_len)
01090 {
01091 while (rdata->data == NULL)
01092 rdata = rdata->next;
01093
01094 if (freespace > 0)
01095 {
01096 if (rdata->len > freespace)
01097 {
01098 memcpy(Insert->currpos, rdata->data, freespace);
01099 rdata->data += freespace;
01100 rdata->len -= freespace;
01101 write_len -= freespace;
01102 }
01103 else
01104 {
01105 memcpy(Insert->currpos, rdata->data, rdata->len);
01106 freespace -= rdata->len;
01107 write_len -= rdata->len;
01108 Insert->currpos += rdata->len;
01109 rdata = rdata->next;
01110 continue;
01111 }
01112 }
01113
01114
01115 updrqst = AdvanceXLInsertBuffer(false);
01116 curridx = Insert->curridx;
01117
01118 Insert->currpage->xlp_info |= XLP_FIRST_IS_CONTRECORD;
01119 Insert->currpage->xlp_rem_len = write_len;
01120 freespace = INSERT_FREESPACE(Insert);
01121 }
01122
01123
01124 Insert->currpos = (char *) Insert->currpage +
01125 MAXALIGN(Insert->currpos - (char *) Insert->currpage);
01126 freespace = INSERT_FREESPACE(Insert);
01127
01128
01129
01130
01131
01132 INSERT_RECPTR(RecPtr, Insert, curridx);
01133
01134
01135
01136
01137
01138
01139
01140
01141
01142 if (isLogSwitch)
01143 {
01144 XLogwrtRqst FlushRqst;
01145 XLogRecPtr OldSegEnd;
01146
01147 TRACE_POSTGRESQL_XLOG_SWITCH();
01148
01149 LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
01150
01151
01152
01153
01154
01155 WriteRqst = XLogCtl->xlblocks[curridx];
01156 FlushRqst.Write = WriteRqst;
01157 FlushRqst.Flush = WriteRqst;
01158 XLogWrite(FlushRqst, false, true);
01159
01160
01161
01162 (void) AdvanceXLInsertBuffer(true);
01163
01164
01165 curridx = Insert->curridx;
01166 Assert(curridx == XLogCtl->Write.curridx);
01167
01168
01169 OldSegEnd = XLogCtl->xlblocks[curridx];
01170 OldSegEnd -= XLOG_BLCKSZ;
01171
01172
01173 LogwrtResult.Write = OldSegEnd;
01174 LogwrtResult.Flush = OldSegEnd;
01175
01176
01177
01178
01179 {
01180
01181 volatile XLogCtlData *xlogctl = XLogCtl;
01182
01183 SpinLockAcquire(&xlogctl->info_lck);
01184 xlogctl->LogwrtResult = LogwrtResult;
01185 if (xlogctl->LogwrtRqst.Write < LogwrtResult.Write)
01186 xlogctl->LogwrtRqst.Write = LogwrtResult.Write;
01187 if (xlogctl->LogwrtRqst.Flush < LogwrtResult.Flush)
01188 xlogctl->LogwrtRqst.Flush = LogwrtResult.Flush;
01189 SpinLockRelease(&xlogctl->info_lck);
01190 }
01191
01192 LWLockRelease(WALWriteLock);
01193
01194 updrqst = false;
01195 }
01196 else
01197 {
01198
01199
01200
01201 if (freespace == 0)
01202 {
01203
01204 updrqst = true;
01205 }
01206 else
01207 {
01208
01209 curridx = PrevBufIdx(curridx);
01210 }
01211 WriteRqst = XLogCtl->xlblocks[curridx];
01212 }
01213
01214 LWLockRelease(WALInsertLock);
01215
01216 if (updrqst)
01217 {
01218
01219 volatile XLogCtlData *xlogctl = XLogCtl;
01220
01221 SpinLockAcquire(&xlogctl->info_lck);
01222
01223 if (xlogctl->LogwrtRqst.Write < WriteRqst)
01224 xlogctl->LogwrtRqst.Write = WriteRqst;
01225
01226 LogwrtResult = xlogctl->LogwrtResult;
01227 SpinLockRelease(&xlogctl->info_lck);
01228 }
01229
01230 XactLastRecEnd = RecPtr;
01231
01232 END_CRIT_SECTION();
01233
01234
01235 WalSndWakeupProcessRequests();
01236
01237 return RecPtr;
01238 }
01239
01240
01241
01242
01243
01244
01245 static bool
01246 XLogCheckBuffer(XLogRecData *rdata, bool holdsExclusiveLock,
01247 XLogRecPtr *lsn, BkpBlock *bkpb)
01248 {
01249 Page page;
01250
01251 page = BufferGetPage(rdata->buffer);
01252
01253
01254
01255
01256
01257
01258
01259 if (holdsExclusiveLock)
01260 *lsn = PageGetLSN(page);
01261 else
01262 *lsn = BufferGetLSNAtomic(rdata->buffer);
01263
01264 if (*lsn <= RedoRecPtr)
01265 {
01266
01267
01268
01269 BufferGetTag(rdata->buffer, &bkpb->node, &bkpb->fork, &bkpb->block);
01270
01271 if (rdata->buffer_std)
01272 {
01273
01274 uint16 lower = ((PageHeader) page)->pd_lower;
01275 uint16 upper = ((PageHeader) page)->pd_upper;
01276
01277 if (lower >= SizeOfPageHeaderData &&
01278 upper > lower &&
01279 upper <= BLCKSZ)
01280 {
01281 bkpb->hole_offset = lower;
01282 bkpb->hole_length = upper - lower;
01283 }
01284 else
01285 {
01286
01287 bkpb->hole_offset = 0;
01288 bkpb->hole_length = 0;
01289 }
01290 }
01291 else
01292 {
01293
01294 bkpb->hole_offset = 0;
01295 bkpb->hole_length = 0;
01296 }
01297
01298 return true;
01299 }
01300
01301 return false;
01302 }
01303
01304
01305
01306
01307
01308
01309
01310
01311
01312
01313
01314
01315
01316
01317
01318
01319 static bool
01320 AdvanceXLInsertBuffer(bool new_segment)
01321 {
01322 XLogCtlInsert *Insert = &XLogCtl->Insert;
01323 int nextidx = NextBufIdx(Insert->curridx);
01324 bool update_needed = true;
01325 XLogRecPtr OldPageRqstPtr;
01326 XLogwrtRqst WriteRqst;
01327 XLogRecPtr NewPageEndPtr;
01328 XLogRecPtr NewPageBeginPtr;
01329 XLogPageHeader NewPage;
01330
01331
01332
01333
01334
01335
01336 OldPageRqstPtr = XLogCtl->xlblocks[nextidx];
01337 if (LogwrtResult.Write < OldPageRqstPtr)
01338 {
01339
01340 XLogRecPtr FinishedPageRqstPtr;
01341
01342 FinishedPageRqstPtr = XLogCtl->xlblocks[Insert->curridx];
01343
01344
01345 {
01346
01347 volatile XLogCtlData *xlogctl = XLogCtl;
01348
01349 SpinLockAcquire(&xlogctl->info_lck);
01350 if (xlogctl->LogwrtRqst.Write < FinishedPageRqstPtr)
01351 xlogctl->LogwrtRqst.Write = FinishedPageRqstPtr;
01352 LogwrtResult = xlogctl->LogwrtResult;
01353 SpinLockRelease(&xlogctl->info_lck);
01354 }
01355
01356 update_needed = false;
01357
01358
01359
01360
01361
01362 if (LogwrtResult.Write < OldPageRqstPtr)
01363 {
01364
01365 LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
01366 LogwrtResult = XLogCtl->LogwrtResult;
01367 if (LogwrtResult.Write >= OldPageRqstPtr)
01368 {
01369
01370 LWLockRelease(WALWriteLock);
01371 }
01372 else
01373 {
01374
01375
01376
01377
01378 TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START();
01379 WriteRqst.Write = OldPageRqstPtr;
01380 WriteRqst.Flush = 0;
01381 XLogWrite(WriteRqst, false, false);
01382 LWLockRelease(WALWriteLock);
01383 TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
01384 }
01385 }
01386 }
01387
01388
01389
01390
01391
01392 NewPageBeginPtr = XLogCtl->xlblocks[Insert->curridx];
01393
01394 if (new_segment)
01395 {
01396
01397 if (NewPageBeginPtr % XLogSegSize != 0)
01398 NewPageBeginPtr += XLogSegSize - NewPageBeginPtr % XLogSegSize;
01399 }
01400
01401 NewPageEndPtr = NewPageBeginPtr;
01402 NewPageEndPtr += XLOG_BLCKSZ;
01403 XLogCtl->xlblocks[nextidx] = NewPageEndPtr;
01404 NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
01405
01406 Insert->curridx = nextidx;
01407 Insert->currpage = NewPage;
01408
01409 Insert->currpos = ((char *) NewPage) +SizeOfXLogShortPHD;
01410
01411
01412
01413
01414
01415 MemSet((char *) NewPage, 0, XLOG_BLCKSZ);
01416
01417
01418
01419
01420 NewPage ->xlp_magic = XLOG_PAGE_MAGIC;
01421
01422
01423 NewPage ->xlp_tli = ThisTimeLineID;
01424 NewPage ->xlp_pageaddr = NewPageBeginPtr;
01425
01426
01427
01428
01429
01430
01431
01432
01433
01434
01435
01436
01437
01438 if (!Insert->forcePageWrites)
01439 NewPage ->xlp_info |= XLP_BKP_REMOVABLE;
01440
01441
01442
01443
01444 if ((NewPage->xlp_pageaddr % XLogSegSize) == 0)
01445 {
01446 XLogLongPageHeader NewLongPage = (XLogLongPageHeader) NewPage;
01447
01448 NewLongPage->xlp_sysid = ControlFile->system_identifier;
01449 NewLongPage->xlp_seg_size = XLogSegSize;
01450 NewLongPage->xlp_xlog_blcksz = XLOG_BLCKSZ;
01451 NewPage ->xlp_info |= XLP_LONG_HEADER;
01452
01453 Insert->currpos = ((char *) NewPage) +SizeOfXLogLongPHD;
01454 }
01455
01456 return update_needed;
01457 }
01458
01459
01460
01461
01462
01463
01464
01465
01466
01467
01468 static bool
01469 XLogCheckpointNeeded(XLogSegNo new_segno)
01470 {
01471 XLogSegNo old_segno;
01472
01473 XLByteToSeg(RedoRecPtr, old_segno);
01474
01475 if (new_segno >= old_segno + (uint64) (CheckPointSegments - 1))
01476 return true;
01477 return false;
01478 }
01479
01480
01481
01482
01483
01484
01485
01486
01487
01488
01489
01490
01491
01492
01493
01494
01495
01496 static void
01497 XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch)
01498 {
01499 XLogCtlWrite *Write = &XLogCtl->Write;
01500 bool ispartialpage;
01501 bool last_iteration;
01502 bool finishing_seg;
01503 bool use_existent;
01504 int curridx;
01505 int npages;
01506 int startidx;
01507 uint32 startoffset;
01508
01509
01510 Assert(CritSectionCount > 0);
01511
01512
01513
01514
01515 LogwrtResult = XLogCtl->LogwrtResult;
01516
01517
01518
01519
01520
01521
01522
01523
01524
01525
01526 npages = 0;
01527 startidx = 0;
01528 startoffset = 0;
01529
01530
01531
01532
01533
01534
01535
01536
01537 curridx = Write->curridx;
01538
01539 while (LogwrtResult.Write < WriteRqst.Write)
01540 {
01541
01542
01543
01544
01545
01546 if (LogwrtResult.Write >= XLogCtl->xlblocks[curridx])
01547 elog(PANIC, "xlog write request %X/%X is past end of log %X/%X",
01548 (uint32) (LogwrtResult.Write >> 32), (uint32) LogwrtResult.Write,
01549 (uint32) (XLogCtl->xlblocks[curridx] >> 32),
01550 (uint32) XLogCtl->xlblocks[curridx]);
01551
01552
01553 LogwrtResult.Write = XLogCtl->xlblocks[curridx];
01554 ispartialpage = WriteRqst.Write < LogwrtResult.Write;
01555
01556 if (!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo))
01557 {
01558
01559
01560
01561
01562 Assert(npages == 0);
01563 if (openLogFile >= 0)
01564 XLogFileClose();
01565 XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo);
01566
01567
01568 use_existent = true;
01569 openLogFile = XLogFileInit(openLogSegNo, &use_existent, true);
01570 openLogOff = 0;
01571 }
01572
01573
01574 if (openLogFile < 0)
01575 {
01576 XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo);
01577 openLogFile = XLogFileOpen(openLogSegNo);
01578 openLogOff = 0;
01579 }
01580
01581
01582 if (npages == 0)
01583 {
01584
01585 startidx = curridx;
01586 startoffset = (LogwrtResult.Write - XLOG_BLCKSZ) % XLogSegSize;
01587 }
01588 npages++;
01589
01590
01591
01592
01593
01594
01595
01596 last_iteration = WriteRqst.Write <= LogwrtResult.Write;
01597
01598 finishing_seg = !ispartialpage &&
01599 (startoffset + npages * XLOG_BLCKSZ) >= XLogSegSize;
01600
01601 if (last_iteration ||
01602 curridx == XLogCtl->XLogCacheBlck ||
01603 finishing_seg)
01604 {
01605 char *from;
01606 Size nbytes;
01607
01608
01609 if (openLogOff != startoffset)
01610 {
01611 if (lseek(openLogFile, (off_t) startoffset, SEEK_SET) < 0)
01612 ereport(PANIC,
01613 (errcode_for_file_access(),
01614 errmsg("could not seek in log file %s to offset %u: %m",
01615 XLogFileNameP(ThisTimeLineID, openLogSegNo),
01616 startoffset)));
01617 openLogOff = startoffset;
01618 }
01619
01620
01621 from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
01622 nbytes = npages * (Size) XLOG_BLCKSZ;
01623 errno = 0;
01624 if (write(openLogFile, from, nbytes) != nbytes)
01625 {
01626
01627 if (errno == 0)
01628 errno = ENOSPC;
01629 ereport(PANIC,
01630 (errcode_for_file_access(),
01631 errmsg("could not write to log file %s "
01632 "at offset %u, length %lu: %m",
01633 XLogFileNameP(ThisTimeLineID, openLogSegNo),
01634 openLogOff, (unsigned long) nbytes)));
01635 }
01636
01637
01638 openLogOff += nbytes;
01639 Write->curridx = ispartialpage ? curridx : NextBufIdx(curridx);
01640 npages = 0;
01641
01642
01643
01644
01645
01646
01647
01648
01649
01650
01651
01652
01653
01654
01655
01656
01657
01658 if (finishing_seg || (xlog_switch && last_iteration))
01659 {
01660 issue_xlog_fsync(openLogFile, openLogSegNo);
01661
01662
01663 WalSndWakeupRequest();
01664
01665 LogwrtResult.Flush = LogwrtResult.Write;
01666
01667 if (XLogArchivingActive())
01668 XLogArchiveNotifySeg(openLogSegNo);
01669
01670 Write->lastSegSwitchTime = (pg_time_t) time(NULL);
01671
01672
01673
01674
01675
01676
01677
01678
01679 if (IsUnderPostmaster && XLogCheckpointNeeded(openLogSegNo))
01680 {
01681 (void) GetRedoRecPtr();
01682 if (XLogCheckpointNeeded(openLogSegNo))
01683 RequestCheckpoint(CHECKPOINT_CAUSE_XLOG);
01684 }
01685 }
01686 }
01687
01688 if (ispartialpage)
01689 {
01690
01691 LogwrtResult.Write = WriteRqst.Write;
01692 break;
01693 }
01694 curridx = NextBufIdx(curridx);
01695
01696
01697 if (flexible && npages == 0)
01698 break;
01699 }
01700
01701 Assert(npages == 0);
01702 Assert(curridx == Write->curridx);
01703
01704
01705
01706
01707 if (LogwrtResult.Flush < WriteRqst.Flush &&
01708 LogwrtResult.Flush < LogwrtResult.Write)
01709
01710 {
01711
01712
01713
01714
01715
01716 if (sync_method != SYNC_METHOD_OPEN &&
01717 sync_method != SYNC_METHOD_OPEN_DSYNC)
01718 {
01719 if (openLogFile >= 0 &&
01720 !XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo))
01721 XLogFileClose();
01722 if (openLogFile < 0)
01723 {
01724 XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo);
01725 openLogFile = XLogFileOpen(openLogSegNo);
01726 openLogOff = 0;
01727 }
01728
01729 issue_xlog_fsync(openLogFile, openLogSegNo);
01730 }
01731
01732
01733 WalSndWakeupRequest();
01734
01735 LogwrtResult.Flush = LogwrtResult.Write;
01736 }
01737
01738
01739
01740
01741
01742
01743
01744
01745 {
01746
01747 volatile XLogCtlData *xlogctl = XLogCtl;
01748
01749 SpinLockAcquire(&xlogctl->info_lck);
01750 xlogctl->LogwrtResult = LogwrtResult;
01751 if (xlogctl->LogwrtRqst.Write < LogwrtResult.Write)
01752 xlogctl->LogwrtRqst.Write = LogwrtResult.Write;
01753 if (xlogctl->LogwrtRqst.Flush < LogwrtResult.Flush)
01754 xlogctl->LogwrtRqst.Flush = LogwrtResult.Flush;
01755 SpinLockRelease(&xlogctl->info_lck);
01756 }
01757 }
01758
01759
01760
01761
01762
01763
01764 void
01765 XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN)
01766 {
01767 XLogRecPtr WriteRqstPtr = asyncXactLSN;
01768 bool sleeping;
01769
01770
01771 volatile XLogCtlData *xlogctl = XLogCtl;
01772
01773 SpinLockAcquire(&xlogctl->info_lck);
01774 LogwrtResult = xlogctl->LogwrtResult;
01775 sleeping = xlogctl->WalWriterSleeping;
01776 if (xlogctl->asyncXactLSN < asyncXactLSN)
01777 xlogctl->asyncXactLSN = asyncXactLSN;
01778 SpinLockRelease(&xlogctl->info_lck);
01779
01780
01781
01782
01783
01784
01785 if (!sleeping)
01786 {
01787
01788 WriteRqstPtr -= WriteRqstPtr % XLOG_BLCKSZ;
01789
01790
01791 if (WriteRqstPtr <= LogwrtResult.Flush)
01792 return;
01793 }
01794
01795
01796
01797
01798
01799
01800 if (ProcGlobal->walwriterLatch)
01801 SetLatch(ProcGlobal->walwriterLatch);
01802 }
01803
01804
01805
01806
01807
01808
01809
01810
01811
01812
01813 static void
01814 UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
01815 {
01816
01817 if (!updateMinRecoveryPoint || (!force && lsn <= minRecoveryPoint))
01818 return;
01819
01820 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
01821
01822
01823 minRecoveryPoint = ControlFile->minRecoveryPoint;
01824 minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
01825
01826
01827
01828
01829
01830
01831 if (minRecoveryPoint == 0)
01832 updateMinRecoveryPoint = false;
01833 else if (force || minRecoveryPoint < lsn)
01834 {
01835
01836 volatile XLogCtlData *xlogctl = XLogCtl;
01837 XLogRecPtr newMinRecoveryPoint;
01838 TimeLineID newMinRecoveryPointTLI;
01839
01840
01841
01842
01843
01844
01845
01846
01847
01848
01849
01850
01851
01852
01853 SpinLockAcquire(&xlogctl->info_lck);
01854 newMinRecoveryPoint = xlogctl->replayEndRecPtr;
01855 newMinRecoveryPointTLI = xlogctl->replayEndTLI;
01856 SpinLockRelease(&xlogctl->info_lck);
01857
01858 if (!force && newMinRecoveryPoint < lsn)
01859 elog(WARNING,
01860 "xlog min recovery request %X/%X is past current point %X/%X",
01861 (uint32) (lsn >> 32) , (uint32) lsn,
01862 (uint32) (newMinRecoveryPoint >> 32),
01863 (uint32) newMinRecoveryPoint);
01864
01865
01866 if (ControlFile->minRecoveryPoint < newMinRecoveryPoint)
01867 {
01868 ControlFile->minRecoveryPoint = newMinRecoveryPoint;
01869 ControlFile->minRecoveryPointTLI = newMinRecoveryPointTLI;
01870 UpdateControlFile();
01871 minRecoveryPoint = newMinRecoveryPoint;
01872 minRecoveryPointTLI = newMinRecoveryPointTLI;
01873
01874 ereport(DEBUG2,
01875 (errmsg("updated min recovery point to %X/%X on timeline %u",
01876 (uint32) (minRecoveryPoint >> 32),
01877 (uint32) minRecoveryPoint,
01878 newMinRecoveryPointTLI)));
01879 }
01880 }
01881 LWLockRelease(ControlFileLock);
01882 }
01883
01884
01885
01886
01887
01888
01889
01890 void
01891 XLogFlush(XLogRecPtr record)
01892 {
01893 XLogRecPtr WriteRqstPtr;
01894 XLogwrtRqst WriteRqst;
01895
01896
01897
01898
01899
01900
01901
01902
01903 if (!XLogInsertAllowed())
01904 {
01905 UpdateMinRecoveryPoint(record, false);
01906 return;
01907 }
01908
01909
01910 if (record <= LogwrtResult.Flush)
01911 return;
01912
01913 #ifdef WAL_DEBUG
01914 if (XLOG_DEBUG)
01915 elog(LOG, "xlog flush request %X/%X; write %X/%X; flush %X/%X",
01916 (uint32) (record >> 32), (uint32) record,
01917 (uint32) (LogwrtResult.Write >> 32), (uint32) LogwrtResult.Write,
01918 (uint32) (LogwrtResult.Flush >> 32), (uint32) LogwrtResult.Flush);
01919 #endif
01920
01921 START_CRIT_SECTION();
01922
01923
01924
01925
01926
01927
01928
01929
01930
01931
01932 WriteRqstPtr = record;
01933
01934
01935
01936
01937
01938 for (;;)
01939 {
01940
01941 volatile XLogCtlData *xlogctl = XLogCtl;
01942
01943
01944 SpinLockAcquire(&xlogctl->info_lck);
01945 if (WriteRqstPtr < xlogctl->LogwrtRqst.Write)
01946 WriteRqstPtr = xlogctl->LogwrtRqst.Write;
01947 LogwrtResult = xlogctl->LogwrtResult;
01948 SpinLockRelease(&xlogctl->info_lck);
01949
01950
01951 if (record <= LogwrtResult.Flush)
01952 break;
01953
01954
01955
01956
01957
01958
01959
01960
01961 if (!LWLockAcquireOrWait(WALWriteLock, LW_EXCLUSIVE))
01962 {
01963
01964
01965
01966
01967
01968 continue;
01969 }
01970
01971
01972 LogwrtResult = XLogCtl->LogwrtResult;
01973 if (record <= LogwrtResult.Flush)
01974 {
01975 LWLockRelease(WALWriteLock);
01976 break;
01977 }
01978
01979
01980
01981
01982
01983
01984
01985
01986
01987
01988 if (CommitDelay > 0 && enableFsync &&
01989 MinimumActiveBackends(CommitSiblings))
01990 pg_usleep(CommitDelay);
01991
01992
01993 if (LWLockConditionalAcquire(WALInsertLock, LW_EXCLUSIVE))
01994 {
01995 XLogCtlInsert *Insert = &XLogCtl->Insert;
01996 uint32 freespace = INSERT_FREESPACE(Insert);
01997
01998 if (freespace == 0)
01999 WriteRqstPtr = XLogCtl->xlblocks[Insert->curridx];
02000 else
02001 {
02002 WriteRqstPtr = XLogCtl->xlblocks[Insert->curridx];
02003 WriteRqstPtr -= freespace;
02004 }
02005 LWLockRelease(WALInsertLock);
02006 WriteRqst.Write = WriteRqstPtr;
02007 WriteRqst.Flush = WriteRqstPtr;
02008 }
02009 else
02010 {
02011 WriteRqst.Write = WriteRqstPtr;
02012 WriteRqst.Flush = record;
02013 }
02014 XLogWrite(WriteRqst, false, false);
02015
02016 LWLockRelease(WALWriteLock);
02017
02018 break;
02019 }
02020
02021 END_CRIT_SECTION();
02022
02023
02024 WalSndWakeupProcessRequests();
02025
02026
02027
02028
02029
02030
02031
02032
02033
02034
02035
02036
02037
02038
02039
02040
02041
02042
02043
02044
02045
02046
02047 if (LogwrtResult.Flush < record)
02048 elog(ERROR,
02049 "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",
02050 (uint32) (record >> 32), (uint32) record,
02051 (uint32) (LogwrtResult.Flush >> 32), (uint32) LogwrtResult.Flush);
02052 }
02053
02054
02055
02056
02057
02058
02059
02060
02061
02062
02063
02064
02065
02066
02067
02068
02069
02070
02071 bool
02072 XLogBackgroundFlush(void)
02073 {
02074 XLogRecPtr WriteRqstPtr;
02075 bool flexible = true;
02076 bool wrote_something = false;
02077
02078
02079 if (RecoveryInProgress())
02080 return false;
02081
02082
02083 {
02084
02085 volatile XLogCtlData *xlogctl = XLogCtl;
02086
02087 SpinLockAcquire(&xlogctl->info_lck);
02088 LogwrtResult = xlogctl->LogwrtResult;
02089 WriteRqstPtr = xlogctl->LogwrtRqst.Write;
02090 SpinLockRelease(&xlogctl->info_lck);
02091 }
02092
02093
02094 WriteRqstPtr -= WriteRqstPtr % XLOG_BLCKSZ;
02095
02096
02097 if (WriteRqstPtr <= LogwrtResult.Flush)
02098 {
02099
02100 volatile XLogCtlData *xlogctl = XLogCtl;
02101
02102 SpinLockAcquire(&xlogctl->info_lck);
02103 WriteRqstPtr = xlogctl->asyncXactLSN;
02104 SpinLockRelease(&xlogctl->info_lck);
02105 flexible = false;
02106 }
02107
02108
02109
02110
02111
02112
02113 if (WriteRqstPtr <= LogwrtResult.Flush)
02114 {
02115 if (openLogFile >= 0)
02116 {
02117 if (!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo))
02118 {
02119 XLogFileClose();
02120 }
02121 }
02122 return false;
02123 }
02124
02125 #ifdef WAL_DEBUG
02126 if (XLOG_DEBUG)
02127 elog(LOG, "xlog bg flush request %X/%X; write %X/%X; flush %X/%X",
02128 (uint32) (WriteRqstPtr >> 32), (uint32) WriteRqstPtr,
02129 (uint32) (LogwrtResult.Write >> 32), (uint32) LogwrtResult.Write,
02130 (uint32) (LogwrtResult.Flush >> 32), (uint32) LogwrtResult.Flush);
02131 #endif
02132
02133 START_CRIT_SECTION();
02134
02135
02136 LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
02137 LogwrtResult = XLogCtl->LogwrtResult;
02138 if (WriteRqstPtr > LogwrtResult.Flush)
02139 {
02140 XLogwrtRqst WriteRqst;
02141
02142 WriteRqst.Write = WriteRqstPtr;
02143 WriteRqst.Flush = WriteRqstPtr;
02144 XLogWrite(WriteRqst, flexible, false);
02145 wrote_something = true;
02146 }
02147 LWLockRelease(WALWriteLock);
02148
02149 END_CRIT_SECTION();
02150
02151
02152 WalSndWakeupProcessRequests();
02153
02154 return wrote_something;
02155 }
02156
02157
02158
02159
02160
02161
02162
02163 bool
02164 XLogNeedsFlush(XLogRecPtr record)
02165 {
02166
02167
02168
02169
02170
02171 if (RecoveryInProgress())
02172 {
02173
02174 if (record <= minRecoveryPoint || !updateMinRecoveryPoint)
02175 return false;
02176
02177
02178
02179
02180
02181 if (!LWLockConditionalAcquire(ControlFileLock, LW_SHARED))
02182 return true;
02183 minRecoveryPoint = ControlFile->minRecoveryPoint;
02184 minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
02185 LWLockRelease(ControlFileLock);
02186
02187
02188
02189
02190
02191
02192
02193 if (minRecoveryPoint == 0)
02194 updateMinRecoveryPoint = false;
02195
02196
02197 if (record <= minRecoveryPoint || !updateMinRecoveryPoint)
02198 return false;
02199 else
02200 return true;
02201 }
02202
02203
02204 if (record <= LogwrtResult.Flush)
02205 return false;
02206
02207
02208 {
02209
02210 volatile XLogCtlData *xlogctl = XLogCtl;
02211
02212 SpinLockAcquire(&xlogctl->info_lck);
02213 LogwrtResult = xlogctl->LogwrtResult;
02214 SpinLockRelease(&xlogctl->info_lck);
02215 }
02216
02217
02218 if (record <= LogwrtResult.Flush)
02219 return false;
02220
02221 return true;
02222 }
02223
02224
02225
02226
02227
02228
02229
02230
02231
02232
02233
02234
02235
02236
02237
02238
02239
02240
02241
02242
02243
02244 int
02245 XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
02246 {
02247 char path[MAXPGPATH];
02248 char tmppath[MAXPGPATH];
02249 char *zbuffer;
02250 XLogSegNo installed_segno;
02251 int max_advance;
02252 int fd;
02253 int nbytes;
02254
02255 XLogFilePath(path, ThisTimeLineID, logsegno);
02256
02257
02258
02259
02260 if (*use_existent)
02261 {
02262 fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method),
02263 S_IRUSR | S_IWUSR);
02264 if (fd < 0)
02265 {
02266 if (errno != ENOENT)
02267 ereport(ERROR,
02268 (errcode_for_file_access(),
02269 errmsg("could not open file \"%s\": %m", path)));
02270 }
02271 else
02272 return fd;
02273 }
02274
02275
02276
02277
02278
02279
02280
02281 elog(DEBUG2, "creating and filling new WAL file");
02282
02283 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
02284
02285 unlink(tmppath);
02286
02287
02288
02289
02290
02291
02292
02293
02294
02295 zbuffer = (char *) palloc0(XLOG_BLCKSZ);
02296
02297
02298 fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
02299 S_IRUSR | S_IWUSR);
02300 if (fd < 0)
02301 ereport(ERROR,
02302 (errcode_for_file_access(),
02303 errmsg("could not create file \"%s\": %m", tmppath)));
02304
02305
02306
02307
02308
02309
02310
02311
02312
02313
02314 for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
02315 {
02316 errno = 0;
02317 if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
02318 {
02319 int save_errno = errno;
02320
02321
02322
02323
02324 unlink(tmppath);
02325
02326 close(fd);
02327
02328
02329 errno = save_errno ? save_errno : ENOSPC;
02330
02331 ereport(ERROR,
02332 (errcode_for_file_access(),
02333 errmsg("could not write to file \"%s\": %m", tmppath)));
02334 }
02335 }
02336 pfree(zbuffer);
02337
02338 if (pg_fsync(fd) != 0)
02339 {
02340 close(fd);
02341 ereport(ERROR,
02342 (errcode_for_file_access(),
02343 errmsg("could not fsync file \"%s\": %m", tmppath)));
02344 }
02345
02346 if (close(fd))
02347 ereport(ERROR,
02348 (errcode_for_file_access(),
02349 errmsg("could not close file \"%s\": %m", tmppath)));
02350
02351
02352
02353
02354
02355
02356
02357
02358
02359 installed_segno = logsegno;
02360 max_advance = XLOGfileslop;
02361 if (!InstallXLogFileSegment(&installed_segno, tmppath,
02362 *use_existent, &max_advance,
02363 use_lock))
02364 {
02365
02366
02367
02368
02369
02370 unlink(tmppath);
02371 }
02372
02373
02374 *use_existent = false;
02375
02376
02377 fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method),
02378 S_IRUSR | S_IWUSR);
02379 if (fd < 0)
02380 ereport(ERROR,
02381 (errcode_for_file_access(),
02382 errmsg("could not open file \"%s\": %m", path)));
02383
02384 elog(DEBUG2, "done creating and filling new WAL file");
02385
02386 return fd;
02387 }
02388
02389
02390
02391
02392
02393
02394
02395
02396
02397
02398
02399
02400
02401 static void
02402 XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno)
02403 {
02404 char path[MAXPGPATH];
02405 char tmppath[MAXPGPATH];
02406 char buffer[XLOG_BLCKSZ];
02407 int srcfd;
02408 int fd;
02409 int nbytes;
02410
02411
02412
02413
02414 XLogFilePath(path, srcTLI, srcsegno);
02415 srcfd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0);
02416 if (srcfd < 0)
02417 ereport(ERROR,
02418 (errcode_for_file_access(),
02419 errmsg("could not open file \"%s\": %m", path)));
02420
02421
02422
02423
02424 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
02425
02426 unlink(tmppath);
02427
02428
02429 fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
02430 S_IRUSR | S_IWUSR);
02431 if (fd < 0)
02432 ereport(ERROR,
02433 (errcode_for_file_access(),
02434 errmsg("could not create file \"%s\": %m", tmppath)));
02435
02436
02437
02438
02439 for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(buffer))
02440 {
02441 errno = 0;
02442 if ((int) read(srcfd, buffer, sizeof(buffer)) != (int) sizeof(buffer))
02443 {
02444 if (errno != 0)
02445 ereport(ERROR,
02446 (errcode_for_file_access(),
02447 errmsg("could not read file \"%s\": %m", path)));
02448 else
02449 ereport(ERROR,
02450 (errmsg("not enough data in file \"%s\"", path)));
02451 }
02452 errno = 0;
02453 if ((int) write(fd, buffer, sizeof(buffer)) != (int) sizeof(buffer))
02454 {
02455 int save_errno = errno;
02456
02457
02458
02459
02460 unlink(tmppath);
02461
02462 errno = save_errno ? save_errno : ENOSPC;
02463
02464 ereport(ERROR,
02465 (errcode_for_file_access(),
02466 errmsg("could not write to file \"%s\": %m", tmppath)));
02467 }
02468 }
02469
02470 if (pg_fsync(fd) != 0)
02471 ereport(ERROR,
02472 (errcode_for_file_access(),
02473 errmsg("could not fsync file \"%s\": %m", tmppath)));
02474
02475 if (CloseTransientFile(fd))
02476 ereport(ERROR,
02477 (errcode_for_file_access(),
02478 errmsg("could not close file \"%s\": %m", tmppath)));
02479
02480 CloseTransientFile(srcfd);
02481
02482
02483
02484
02485 if (!InstallXLogFileSegment(&destsegno, tmppath, false, NULL, false))
02486 elog(ERROR, "InstallXLogFileSegment should not have failed");
02487 }
02488
02489
02490
02491
02492
02493
02494
02495
02496
02497
02498
02499
02500
02501
02502
02503
02504
02505
02506
02507
02508
02509
02510
02511
02512
02513
02514
02515
02516
02517
02518 static bool
02519 InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
02520 bool find_free, int *max_advance,
02521 bool use_lock)
02522 {
02523 char path[MAXPGPATH];
02524 struct stat stat_buf;
02525
02526 XLogFilePath(path, ThisTimeLineID, *segno);
02527
02528
02529
02530
02531 if (use_lock)
02532 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
02533
02534 if (!find_free)
02535 {
02536
02537 unlink(path);
02538 }
02539 else
02540 {
02541
02542 while (stat(path, &stat_buf) == 0)
02543 {
02544 if (*max_advance <= 0)
02545 {
02546
02547 if (use_lock)
02548 LWLockRelease(ControlFileLock);
02549 return false;
02550 }
02551 (*segno)++;
02552 (*max_advance)--;
02553 XLogFilePath(path, ThisTimeLineID, *segno);
02554 }
02555 }
02556
02557
02558
02559
02560
02561
02562 #if HAVE_WORKING_LINK
02563 if (link(tmppath, path) < 0)
02564 {
02565 if (use_lock)
02566 LWLockRelease(ControlFileLock);
02567 ereport(LOG,
02568 (errcode_for_file_access(),
02569 errmsg("could not link file \"%s\" to \"%s\" (initialization of log file): %m",
02570 tmppath, path)));
02571 return false;
02572 }
02573 unlink(tmppath);
02574 #else
02575 if (rename(tmppath, path) < 0)
02576 {
02577 if (use_lock)
02578 LWLockRelease(ControlFileLock);
02579 ereport(LOG,
02580 (errcode_for_file_access(),
02581 errmsg("could not rename file \"%s\" to \"%s\" (initialization of log file): %m",
02582 tmppath, path)));
02583 return false;
02584 }
02585 #endif
02586
02587 if (use_lock)
02588 LWLockRelease(ControlFileLock);
02589
02590 return true;
02591 }
02592
02593
02594
02595
02596 int
02597 XLogFileOpen(XLogSegNo segno)
02598 {
02599 char path[MAXPGPATH];
02600 int fd;
02601
02602 XLogFilePath(path, ThisTimeLineID, segno);
02603
02604 fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method),
02605 S_IRUSR | S_IWUSR);
02606 if (fd < 0)
02607 ereport(PANIC,
02608 (errcode_for_file_access(),
02609 errmsg("could not open xlog file \"%s\": %m", path)));
02610
02611 return fd;
02612 }
02613
02614
02615
02616
02617
02618
02619
02620 static int
02621 XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
02622 int source, bool notfoundOk)
02623 {
02624 char xlogfname[MAXFNAMELEN];
02625 char activitymsg[MAXFNAMELEN + 16];
02626 char path[MAXPGPATH];
02627 int fd;
02628
02629 XLogFileName(xlogfname, tli, segno);
02630
02631 switch (source)
02632 {
02633 case XLOG_FROM_ARCHIVE:
02634
02635 snprintf(activitymsg, sizeof(activitymsg), "waiting for %s",
02636 xlogfname);
02637 set_ps_display(activitymsg, false);
02638
02639 restoredFromArchive = RestoreArchivedFile(path, xlogfname,
02640 "RECOVERYXLOG",
02641 XLogSegSize,
02642 InRedo);
02643 if (!restoredFromArchive)
02644 return -1;
02645 break;
02646
02647 case XLOG_FROM_PG_XLOG:
02648 case XLOG_FROM_STREAM:
02649 XLogFilePath(path, tli, segno);
02650 restoredFromArchive = false;
02651 break;
02652
02653 default:
02654 elog(ERROR, "invalid XLogFileRead source %d", source);
02655 }
02656
02657
02658
02659
02660
02661 if (source == XLOG_FROM_ARCHIVE)
02662 {
02663 KeepFileRestoredFromArchive(path, xlogfname);
02664
02665
02666
02667
02668 snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
02669 }
02670
02671 fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
02672 if (fd >= 0)
02673 {
02674
02675 curFileTLI = tli;
02676
02677
02678 snprintf(activitymsg, sizeof(activitymsg), "recovering %s",
02679 xlogfname);
02680 set_ps_display(activitymsg, false);
02681
02682
02683 readSource = source;
02684 XLogReceiptSource = source;
02685
02686 if (source != XLOG_FROM_STREAM)
02687 XLogReceiptTime = GetCurrentTimestamp();
02688
02689 return fd;
02690 }
02691 if (errno != ENOENT || !notfoundOk)
02692 ereport(PANIC,
02693 (errcode_for_file_access(),
02694 errmsg("could not open file \"%s\": %m", path)));
02695 return -1;
02696 }
02697
02698
02699
02700
02701
02702
02703 static int
02704 XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source)
02705 {
02706 char path[MAXPGPATH];
02707 ListCell *cell;
02708 int fd;
02709 List *tles;
02710
02711
02712
02713
02714
02715
02716
02717
02718
02719
02720
02721
02722
02723
02724
02725
02726
02727
02728
02729 if (expectedTLEs)
02730 tles = expectedTLEs;
02731 else
02732 tles = readTimeLineHistory(recoveryTargetTLI);
02733
02734 foreach(cell, tles)
02735 {
02736 TimeLineID tli = ((TimeLineHistoryEntry *) lfirst(cell))->tli;
02737
02738 if (tli < curFileTLI)
02739 break;
02740
02741 if (source == XLOG_FROM_ANY || source == XLOG_FROM_ARCHIVE)
02742 {
02743 fd = XLogFileRead(segno, emode, tli,
02744 XLOG_FROM_ARCHIVE, true);
02745 if (fd != -1)
02746 {
02747 elog(DEBUG1, "got WAL segment from archive");
02748 if (!expectedTLEs)
02749 expectedTLEs = tles;
02750 return fd;
02751 }
02752 }
02753
02754 if (source == XLOG_FROM_ANY || source == XLOG_FROM_PG_XLOG)
02755 {
02756 fd = XLogFileRead(segno, emode, tli,
02757 XLOG_FROM_PG_XLOG, true);
02758 if (fd != -1)
02759 {
02760 if (!expectedTLEs)
02761 expectedTLEs = tles;
02762 return fd;
02763 }
02764 }
02765 }
02766
02767
02768 XLogFilePath(path, recoveryTargetTLI, segno);
02769 errno = ENOENT;
02770 ereport(emode,
02771 (errcode_for_file_access(),
02772 errmsg("could not open file \"%s\": %m", path)));
02773 return -1;
02774 }
02775
02776
02777
02778
02779 static void
02780 XLogFileClose(void)
02781 {
02782 Assert(openLogFile >= 0);
02783
02784
02785
02786
02787
02788
02789
02790 #if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
02791 if (!XLogIsNeeded())
02792 (void) posix_fadvise(openLogFile, 0, 0, POSIX_FADV_DONTNEED);
02793 #endif
02794
02795 if (close(openLogFile))
02796 ereport(PANIC,
02797 (errcode_for_file_access(),
02798 errmsg("could not close log file %s: %m",
02799 XLogFileNameP(ThisTimeLineID, openLogSegNo))));
02800 openLogFile = -1;
02801 }
02802
02803
02804
02805
02806
02807
02808
02809
02810
02811
02812
02813 static void
02814 PreallocXlogFiles(XLogRecPtr endptr)
02815 {
02816 XLogSegNo _logSegNo;
02817 int lf;
02818 bool use_existent;
02819
02820 XLByteToPrevSeg(endptr, _logSegNo);
02821 if ((endptr - 1) % XLogSegSize >= (uint32) (0.75 * XLogSegSize))
02822 {
02823 _logSegNo++;
02824 use_existent = true;
02825 lf = XLogFileInit(_logSegNo, &use_existent, true);
02826 close(lf);
02827 if (!use_existent)
02828 CheckpointStats.ckpt_segs_added++;
02829 }
02830 }
02831
02832
02833
02834
02835
02836
02837
02838
02839 void
02840 CheckXLogRemoved(XLogSegNo segno, TimeLineID tli)
02841 {
02842
02843 volatile XLogCtlData *xlogctl = XLogCtl;
02844 XLogSegNo lastRemovedSegNo;
02845
02846 SpinLockAcquire(&xlogctl->info_lck);
02847 lastRemovedSegNo = xlogctl->lastRemovedSegNo;
02848 SpinLockRelease(&xlogctl->info_lck);
02849
02850 if (segno <= lastRemovedSegNo)
02851 {
02852 char filename[MAXFNAMELEN];
02853
02854 XLogFileName(filename, tli, segno);
02855 ereport(ERROR,
02856 (errcode_for_file_access(),
02857 errmsg("requested WAL segment %s has already been removed",
02858 filename)));
02859 }
02860 }
02861
02862
02863
02864
02865
02866 static void
02867 UpdateLastRemovedPtr(char *filename)
02868 {
02869
02870 volatile XLogCtlData *xlogctl = XLogCtl;
02871 uint32 tli;
02872 XLogSegNo segno;
02873
02874 XLogFromFileName(filename, &tli, &segno);
02875
02876 SpinLockAcquire(&xlogctl->info_lck);
02877 if (segno > xlogctl->lastRemovedSegNo)
02878 xlogctl->lastRemovedSegNo = segno;
02879 SpinLockRelease(&xlogctl->info_lck);
02880 }
02881
02882
02883
02884
02885
02886
02887
02888 static void
02889 RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr endptr)
02890 {
02891 XLogSegNo endlogSegNo;
02892 int max_advance;
02893 DIR *xldir;
02894 struct dirent *xlde;
02895 char lastoff[MAXFNAMELEN];
02896 char path[MAXPGPATH];
02897
02898 #ifdef WIN32
02899 char newpath[MAXPGPATH];
02900 #endif
02901 struct stat statbuf;
02902
02903
02904
02905
02906
02907 XLByteToPrevSeg(endptr, endlogSegNo);
02908 max_advance = XLOGfileslop;
02909
02910 xldir = AllocateDir(XLOGDIR);
02911 if (xldir == NULL)
02912 ereport(ERROR,
02913 (errcode_for_file_access(),
02914 errmsg("could not open transaction log directory \"%s\": %m",
02915 XLOGDIR)));
02916
02917
02918
02919
02920
02921
02922 XLogFileName(lastoff, 0, segno);
02923
02924 elog(DEBUG2, "attempting to remove WAL segments older than log file %s",
02925 lastoff);
02926
02927 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
02928 {
02929
02930
02931
02932
02933
02934
02935
02936
02937
02938
02939
02940 if (strlen(xlde->d_name) == 24 &&
02941 strspn(xlde->d_name, "0123456789ABCDEF") == 24 &&
02942 strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
02943 {
02944 if (XLogArchiveCheckDone(xlde->d_name))
02945 {
02946 snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name);
02947
02948
02949 UpdateLastRemovedPtr(xlde->d_name);
02950
02951
02952
02953
02954
02955
02956
02957 if (lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) &&
02958 InstallXLogFileSegment(&endlogSegNo, path,
02959 true, &max_advance, true))
02960 {
02961 ereport(DEBUG2,
02962 (errmsg("recycled transaction log file \"%s\"",
02963 xlde->d_name)));
02964 CheckpointStats.ckpt_segs_recycled++;
02965
02966 if (max_advance > 0)
02967 {
02968 endlogSegNo++;
02969 max_advance--;
02970 }
02971 }
02972 else
02973 {
02974
02975 int rc;
02976
02977 ereport(DEBUG2,
02978 (errmsg("removing transaction log file \"%s\"",
02979 xlde->d_name)));
02980
02981 #ifdef WIN32
02982
02983
02984
02985
02986
02987
02988
02989
02990
02991
02992
02993
02994
02995
02996 snprintf(newpath, MAXPGPATH, "%s.deleted", path);
02997 if (rename(path, newpath) != 0)
02998 {
02999 ereport(LOG,
03000 (errcode_for_file_access(),
03001 errmsg("could not rename old transaction log file \"%s\": %m",
03002 path)));
03003 continue;
03004 }
03005 rc = unlink(newpath);
03006 #else
03007 rc = unlink(path);
03008 #endif
03009 if (rc != 0)
03010 {
03011 ereport(LOG,
03012 (errcode_for_file_access(),
03013 errmsg("could not remove old transaction log file \"%s\": %m",
03014 path)));
03015 continue;
03016 }
03017 CheckpointStats.ckpt_segs_removed++;
03018 }
03019
03020 XLogArchiveCleanup(xlde->d_name);
03021 }
03022 }
03023 }
03024
03025 FreeDir(xldir);
03026 }
03027
03028
03029
03030
03031
03032
03033
03034
03035
03036
03037
03038
03039
03040
03041 static void
03042 ValidateXLOGDirectoryStructure(void)
03043 {
03044 char path[MAXPGPATH];
03045 struct stat stat_buf;
03046
03047
03048 if (stat(XLOGDIR, &stat_buf) != 0 ||
03049 !S_ISDIR(stat_buf.st_mode))
03050 ereport(FATAL,
03051 (errmsg("required WAL directory \"%s\" does not exist",
03052 XLOGDIR)));
03053
03054
03055 snprintf(path, MAXPGPATH, XLOGDIR "/archive_status");
03056 if (stat(path, &stat_buf) == 0)
03057 {
03058
03059 if (!S_ISDIR(stat_buf.st_mode))
03060 ereport(FATAL,
03061 (errmsg("required WAL directory \"%s\" does not exist",
03062 path)));
03063 }
03064 else
03065 {
03066 ereport(LOG,
03067 (errmsg("creating missing WAL directory \"%s\"", path)));
03068 if (mkdir(path, S_IRWXU) < 0)
03069 ereport(FATAL,
03070 (errmsg("could not create missing directory \"%s\": %m",
03071 path)));
03072 }
03073 }
03074
03075
03076
03077
03078
03079
03080 static void
03081 CleanupBackupHistory(void)
03082 {
03083 DIR *xldir;
03084 struct dirent *xlde;
03085 char path[MAXPGPATH];
03086
03087 xldir = AllocateDir(XLOGDIR);
03088 if (xldir == NULL)
03089 ereport(ERROR,
03090 (errcode_for_file_access(),
03091 errmsg("could not open transaction log directory \"%s\": %m",
03092 XLOGDIR)));
03093
03094 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
03095 {
03096 if (strlen(xlde->d_name) > 24 &&
03097 strspn(xlde->d_name, "0123456789ABCDEF") == 24 &&
03098 strcmp(xlde->d_name + strlen(xlde->d_name) - strlen(".backup"),
03099 ".backup") == 0)
03100 {
03101 if (XLogArchiveCheckDone(xlde->d_name))
03102 {
03103 ereport(DEBUG2,
03104 (errmsg("removing transaction log backup history file \"%s\"",
03105 xlde->d_name)));
03106 snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name);
03107 unlink(path);
03108 XLogArchiveCleanup(xlde->d_name);
03109 }
03110 }
03111 }
03112
03113 FreeDir(xldir);
03114 }
03115
03116
03117
03118
03119
03120
03121
03122
03123
03124
03125
03126
03127
03128
03129
03130
03131
03132
03133
03134
03135
03136
03137
03138
03139
03140
03141
03142
03143
03144
03145
03146
03147
03148 Buffer
03149 RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, int block_index,
03150 bool get_cleanup_lock, bool keep_buffer)
03151 {
03152 BkpBlock bkpb;
03153 char *blk;
03154 int i;
03155
03156
03157 blk = (char *) XLogRecGetData(record) + record->xl_len;
03158 for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
03159 {
03160 if (!(record->xl_info & XLR_BKP_BLOCK(i)))
03161 continue;
03162
03163 memcpy(&bkpb, blk, sizeof(BkpBlock));
03164 blk += sizeof(BkpBlock);
03165
03166 if (i == block_index)
03167 {
03168
03169 return RestoreBackupBlockContents(lsn, bkpb, blk, get_cleanup_lock,
03170 keep_buffer);
03171 }
03172
03173 blk += BLCKSZ - bkpb.hole_length;
03174 }
03175
03176
03177 elog(ERROR, "failed to restore block_index %d", block_index);
03178 return InvalidBuffer;
03179 }
03180
03181
03182
03183
03184
03185
03186 static Buffer
03187 RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb, char *blk,
03188 bool get_cleanup_lock, bool keep_buffer)
03189 {
03190 Buffer buffer;
03191 Page page;
03192
03193 buffer = XLogReadBufferExtended(bkpb.node, bkpb.fork, bkpb.block,
03194 RBM_ZERO);
03195 Assert(BufferIsValid(buffer));
03196 if (get_cleanup_lock)
03197 LockBufferForCleanup(buffer);
03198 else
03199 LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
03200
03201 page = (Page) BufferGetPage(buffer);
03202
03203 if (bkpb.hole_length == 0)
03204 {
03205 memcpy((char *) page, blk, BLCKSZ);
03206 }
03207 else
03208 {
03209 memcpy((char *) page, blk, bkpb.hole_offset);
03210
03211 MemSet((char *) page + bkpb.hole_offset, 0, bkpb.hole_length);
03212 memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length),
03213 blk + bkpb.hole_offset,
03214 BLCKSZ - (bkpb.hole_offset + bkpb.hole_length));
03215 }
03216
03217
03218
03219
03220
03221
03222 PageSetLSN(page, lsn);
03223 MarkBufferDirty(buffer);
03224
03225 if (!keep_buffer)
03226 UnlockReleaseBuffer(buffer);
03227
03228 return buffer;
03229 }
03230
03231
03232
03233
03234
03235
03236
03237
03238
03239
03240
03241
03242
03243
03244 static XLogRecord *
03245 ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode,
03246 bool fetching_ckpt)
03247 {
03248 XLogRecord *record;
03249 XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data;
03250
03251
03252 private->fetching_ckpt = fetching_ckpt;
03253 private->emode = emode;
03254 private->randAccess = (RecPtr != InvalidXLogRecPtr);
03255
03256
03257 lastSourceFailed = false;
03258
03259 for (;;)
03260 {
03261 char *errormsg;
03262
03263 record = XLogReadRecord(xlogreader, RecPtr, &errormsg);
03264 ReadRecPtr = xlogreader->ReadRecPtr;
03265 EndRecPtr = xlogreader->EndRecPtr;
03266 if (record == NULL)
03267 {
03268 if (readFile >= 0)
03269 {
03270 close(readFile);
03271 readFile = -1;
03272 }
03273
03274
03275
03276
03277
03278
03279
03280 if (errormsg)
03281 ereport(emode_for_corrupt_record(emode,
03282 RecPtr ? RecPtr : EndRecPtr),
03283 (errmsg_internal("%s", errormsg) ));
03284 }
03285
03286
03287
03288 else if (!tliInHistory(xlogreader->latestPageTLI, expectedTLEs))
03289 {
03290 char fname[MAXFNAMELEN];
03291 XLogSegNo segno;
03292 int32 offset;
03293
03294 XLByteToSeg(xlogreader->latestPagePtr, segno);
03295 offset = xlogreader->latestPagePtr % XLogSegSize;
03296 XLogFileName(fname, xlogreader->readPageTLI, segno);
03297 ereport(emode_for_corrupt_record(emode,
03298 RecPtr ? RecPtr : EndRecPtr),
03299 (errmsg("unexpected timeline ID %u in log segment %s, offset %u",
03300 xlogreader->latestPageTLI,
03301 fname,
03302 offset)));
03303 record = NULL;
03304 }
03305
03306 if (record)
03307 {
03308
03309 return record;
03310 }
03311 else
03312 {
03313
03314 lastSourceFailed = true;
03315
03316
03317
03318
03319
03320
03321
03322
03323
03324
03325
03326
03327
03328 if (!InArchiveRecovery && ArchiveRecoveryRequested &&
03329 !fetching_ckpt)
03330 {
03331 ereport(DEBUG1,
03332 (errmsg_internal("reached end of WAL in pg_xlog, entering archive recovery")));
03333 InArchiveRecovery = true;
03334 if (StandbyModeRequested)
03335 StandbyMode = true;
03336
03337
03338 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
03339 ControlFile->state = DB_IN_ARCHIVE_RECOVERY;
03340 if (ControlFile->minRecoveryPoint < EndRecPtr)
03341 {
03342 ControlFile->minRecoveryPoint = EndRecPtr;
03343 ControlFile->minRecoveryPointTLI = ThisTimeLineID;
03344 }
03345
03346 minRecoveryPoint = ControlFile->minRecoveryPoint;
03347 minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
03348
03349 UpdateControlFile();
03350 LWLockRelease(ControlFileLock);
03351
03352 CheckRecoveryConsistency();
03353
03354
03355
03356
03357
03358 lastSourceFailed = false;
03359 currentSource = 0;
03360
03361 continue;
03362 }
03363
03364
03365 if (StandbyMode && !CheckForStandbyTrigger())
03366 continue;
03367 else
03368 return NULL;
03369 }
03370 }
03371 }
03372
03373
03374
03375
03376
03377
03378
03379
03380 static bool
03381 rescanLatestTimeLine(void)
03382 {
03383 List *newExpectedTLEs;
03384 bool found;
03385 ListCell *cell;
03386 TimeLineID newtarget;
03387 TimeLineID oldtarget = recoveryTargetTLI;
03388 TimeLineHistoryEntry *currentTle = NULL;
03389
03390 newtarget = findNewestTimeLine(recoveryTargetTLI);
03391 if (newtarget == recoveryTargetTLI)
03392 {
03393
03394 return false;
03395 }
03396
03397
03398
03399
03400
03401 newExpectedTLEs = readTimeLineHistory(newtarget);
03402
03403
03404
03405
03406
03407 found = false;
03408 foreach (cell, newExpectedTLEs)
03409 {
03410 currentTle = (TimeLineHistoryEntry *) lfirst(cell);
03411
03412 if (currentTle->tli == recoveryTargetTLI)
03413 {
03414 found = true;
03415 break;
03416 }
03417 }
03418 if (!found)
03419 {
03420 ereport(LOG,
03421 (errmsg("new timeline %u is not a child of database system timeline %u",
03422 newtarget,
03423 ThisTimeLineID)));
03424 return false;
03425 }
03426
03427
03428
03429
03430
03431
03432 if (currentTle->end < EndRecPtr)
03433 {
03434 ereport(LOG,
03435 (errmsg("new timeline %u forked off current database system timeline %u before current recovery point %X/%X",
03436 newtarget,
03437 ThisTimeLineID,
03438 (uint32) (EndRecPtr >> 32), (uint32) EndRecPtr)));
03439 return false;
03440 }
03441
03442
03443 recoveryTargetTLI = newtarget;
03444 list_free_deep(expectedTLEs);
03445 expectedTLEs = newExpectedTLEs;
03446
03447
03448
03449
03450
03451 restoreTimeLineHistoryFiles(oldtarget + 1, newtarget);
03452
03453 ereport(LOG,
03454 (errmsg("new target timeline is %u",
03455 recoveryTargetTLI)));
03456
03457 return true;
03458 }
03459
03460
03461
03462
03463
03464
03465
03466
03467
03468
03469
03470
03471
03472
03473
03474 static void
03475 WriteControlFile(void)
03476 {
03477 int fd;
03478 char buffer[PG_CONTROL_SIZE];
03479
03480
03481
03482
03483 ControlFile->pg_control_version = PG_CONTROL_VERSION;
03484 ControlFile->catalog_version_no = CATALOG_VERSION_NO;
03485
03486 ControlFile->maxAlign = MAXIMUM_ALIGNOF;
03487 ControlFile->floatFormat = FLOATFORMAT_VALUE;
03488
03489 ControlFile->blcksz = BLCKSZ;
03490 ControlFile->relseg_size = RELSEG_SIZE;
03491 ControlFile->xlog_blcksz = XLOG_BLCKSZ;
03492 ControlFile->xlog_seg_size = XLOG_SEG_SIZE;
03493
03494 ControlFile->nameDataLen = NAMEDATALEN;
03495 ControlFile->indexMaxKeys = INDEX_MAX_KEYS;
03496
03497 ControlFile->toast_max_chunk_size = TOAST_MAX_CHUNK_SIZE;
03498
03499 #ifdef HAVE_INT64_TIMESTAMP
03500 ControlFile->enableIntTimes = true;
03501 #else
03502 ControlFile->enableIntTimes = false;
03503 #endif
03504 ControlFile->float4ByVal = FLOAT4PASSBYVAL;
03505 ControlFile->float8ByVal = FLOAT8PASSBYVAL;
03506
03507
03508 INIT_CRC32(ControlFile->crc);
03509 COMP_CRC32(ControlFile->crc,
03510 (char *) ControlFile,
03511 offsetof(ControlFileData, crc));
03512 FIN_CRC32(ControlFile->crc);
03513
03514
03515
03516
03517
03518
03519
03520
03521 if (sizeof(ControlFileData) > PG_CONTROL_SIZE)
03522 elog(PANIC, "sizeof(ControlFileData) is larger than PG_CONTROL_SIZE; fix either one");
03523
03524 memset(buffer, 0, PG_CONTROL_SIZE);
03525 memcpy(buffer, ControlFile, sizeof(ControlFileData));
03526
03527 fd = BasicOpenFile(XLOG_CONTROL_FILE,
03528 O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
03529 S_IRUSR | S_IWUSR);
03530 if (fd < 0)
03531 ereport(PANIC,
03532 (errcode_for_file_access(),
03533 errmsg("could not create control file \"%s\": %m",
03534 XLOG_CONTROL_FILE)));
03535
03536 errno = 0;
03537 if (write(fd, buffer, PG_CONTROL_SIZE) != PG_CONTROL_SIZE)
03538 {
03539
03540 if (errno == 0)
03541 errno = ENOSPC;
03542 ereport(PANIC,
03543 (errcode_for_file_access(),
03544 errmsg("could not write to control file: %m")));
03545 }
03546
03547 if (pg_fsync(fd) != 0)
03548 ereport(PANIC,
03549 (errcode_for_file_access(),
03550 errmsg("could not fsync control file: %m")));
03551
03552 if (close(fd))
03553 ereport(PANIC,
03554 (errcode_for_file_access(),
03555 errmsg("could not close control file: %m")));
03556 }
03557
03558 static void
03559 ReadControlFile(void)
03560 {
03561 pg_crc32 crc;
03562 int fd;
03563
03564
03565
03566
03567 fd = BasicOpenFile(XLOG_CONTROL_FILE,
03568 O_RDWR | PG_BINARY,
03569 S_IRUSR | S_IWUSR);
03570 if (fd < 0)
03571 ereport(PANIC,
03572 (errcode_for_file_access(),
03573 errmsg("could not open control file \"%s\": %m",
03574 XLOG_CONTROL_FILE)));
03575
03576 if (read(fd, ControlFile, sizeof(ControlFileData)) != sizeof(ControlFileData))
03577 ereport(PANIC,
03578 (errcode_for_file_access(),
03579 errmsg("could not read from control file: %m")));
03580
03581 close(fd);
03582
03583
03584
03585
03586
03587
03588
03589
03590 if (ControlFile->pg_control_version != PG_CONTROL_VERSION && ControlFile->pg_control_version % 65536 == 0 && ControlFile->pg_control_version / 65536 != 0)
03591 ereport(FATAL,
03592 (errmsg("database files are incompatible with server"),
03593 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d (0x%08x),"
03594 " but the server was compiled with PG_CONTROL_VERSION %d (0x%08x).",
03595 ControlFile->pg_control_version, ControlFile->pg_control_version,
03596 PG_CONTROL_VERSION, PG_CONTROL_VERSION),
03597 errhint("This could be a problem of mismatched byte ordering. It looks like you need to initdb.")));
03598
03599 if (ControlFile->pg_control_version != PG_CONTROL_VERSION)
03600 ereport(FATAL,
03601 (errmsg("database files are incompatible with server"),
03602 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d,"
03603 " but the server was compiled with PG_CONTROL_VERSION %d.",
03604 ControlFile->pg_control_version, PG_CONTROL_VERSION),
03605 errhint("It looks like you need to initdb.")));
03606
03607
03608 INIT_CRC32(crc);
03609 COMP_CRC32(crc,
03610 (char *) ControlFile,
03611 offsetof(ControlFileData, crc));
03612 FIN_CRC32(crc);
03613
03614 if (!EQ_CRC32(crc, ControlFile->crc))
03615 ereport(FATAL,
03616 (errmsg("incorrect checksum in control file")));
03617
03618
03619
03620
03621
03622
03623 if (ControlFile->catalog_version_no != CATALOG_VERSION_NO)
03624 ereport(FATAL,
03625 (errmsg("database files are incompatible with server"),
03626 errdetail("The database cluster was initialized with CATALOG_VERSION_NO %d,"
03627 " but the server was compiled with CATALOG_VERSION_NO %d.",
03628 ControlFile->catalog_version_no, CATALOG_VERSION_NO),
03629 errhint("It looks like you need to initdb.")));
03630 if (ControlFile->maxAlign != MAXIMUM_ALIGNOF)
03631 ereport(FATAL,
03632 (errmsg("database files are incompatible with server"),
03633 errdetail("The database cluster was initialized with MAXALIGN %d,"
03634 " but the server was compiled with MAXALIGN %d.",
03635 ControlFile->maxAlign, MAXIMUM_ALIGNOF),
03636 errhint("It looks like you need to initdb.")));
03637 if (ControlFile->floatFormat != FLOATFORMAT_VALUE)
03638 ereport(FATAL,
03639 (errmsg("database files are incompatible with server"),
03640 errdetail("The database cluster appears to use a different floating-point number format than the server executable."),
03641 errhint("It looks like you need to initdb.")));
03642 if (ControlFile->blcksz != BLCKSZ)
03643 ereport(FATAL,
03644 (errmsg("database files are incompatible with server"),
03645 errdetail("The database cluster was initialized with BLCKSZ %d,"
03646 " but the server was compiled with BLCKSZ %d.",
03647 ControlFile->blcksz, BLCKSZ),
03648 errhint("It looks like you need to recompile or initdb.")));
03649 if (ControlFile->relseg_size != RELSEG_SIZE)
03650 ereport(FATAL,
03651 (errmsg("database files are incompatible with server"),
03652 errdetail("The database cluster was initialized with RELSEG_SIZE %d,"
03653 " but the server was compiled with RELSEG_SIZE %d.",
03654 ControlFile->relseg_size, RELSEG_SIZE),
03655 errhint("It looks like you need to recompile or initdb.")));
03656 if (ControlFile->xlog_blcksz != XLOG_BLCKSZ)
03657 ereport(FATAL,
03658 (errmsg("database files are incompatible with server"),
03659 errdetail("The database cluster was initialized with XLOG_BLCKSZ %d,"
03660 " but the server was compiled with XLOG_BLCKSZ %d.",
03661 ControlFile->xlog_blcksz, XLOG_BLCKSZ),
03662 errhint("It looks like you need to recompile or initdb.")));
03663 if (ControlFile->xlog_seg_size != XLOG_SEG_SIZE)
03664 ereport(FATAL,
03665 (errmsg("database files are incompatible with server"),
03666 errdetail("The database cluster was initialized with XLOG_SEG_SIZE %d,"
03667 " but the server was compiled with XLOG_SEG_SIZE %d.",
03668 ControlFile->xlog_seg_size, XLOG_SEG_SIZE),
03669 errhint("It looks like you need to recompile or initdb.")));
03670 if (ControlFile->nameDataLen != NAMEDATALEN)
03671 ereport(FATAL,
03672 (errmsg("database files are incompatible with server"),
03673 errdetail("The database cluster was initialized with NAMEDATALEN %d,"
03674 " but the server was compiled with NAMEDATALEN %d.",
03675 ControlFile->nameDataLen, NAMEDATALEN),
03676 errhint("It looks like you need to recompile or initdb.")));
03677 if (ControlFile->indexMaxKeys != INDEX_MAX_KEYS)
03678 ereport(FATAL,
03679 (errmsg("database files are incompatible with server"),
03680 errdetail("The database cluster was initialized with INDEX_MAX_KEYS %d,"
03681 " but the server was compiled with INDEX_MAX_KEYS %d.",
03682 ControlFile->indexMaxKeys, INDEX_MAX_KEYS),
03683 errhint("It looks like you need to recompile or initdb.")));
03684 if (ControlFile->toast_max_chunk_size != TOAST_MAX_CHUNK_SIZE)
03685 ereport(FATAL,
03686 (errmsg("database files are incompatible with server"),
03687 errdetail("The database cluster was initialized with TOAST_MAX_CHUNK_SIZE %d,"
03688 " but the server was compiled with TOAST_MAX_CHUNK_SIZE %d.",
03689 ControlFile->toast_max_chunk_size, (int) TOAST_MAX_CHUNK_SIZE),
03690 errhint("It looks like you need to recompile or initdb.")));
03691
03692 #ifdef HAVE_INT64_TIMESTAMP
03693 if (ControlFile->enableIntTimes != true)
03694 ereport(FATAL,
03695 (errmsg("database files are incompatible with server"),
03696 errdetail("The database cluster was initialized without HAVE_INT64_TIMESTAMP"
03697 " but the server was compiled with HAVE_INT64_TIMESTAMP."),
03698 errhint("It looks like you need to recompile or initdb.")));
03699 #else
03700 if (ControlFile->enableIntTimes != false)
03701 ereport(FATAL,
03702 (errmsg("database files are incompatible with server"),
03703 errdetail("The database cluster was initialized with HAVE_INT64_TIMESTAMP"
03704 " but the server was compiled without HAVE_INT64_TIMESTAMP."),
03705 errhint("It looks like you need to recompile or initdb.")));
03706 #endif
03707
03708 #ifdef USE_FLOAT4_BYVAL
03709 if (ControlFile->float4ByVal != true)
03710 ereport(FATAL,
03711 (errmsg("database files are incompatible with server"),
03712 errdetail("The database cluster was initialized without USE_FLOAT4_BYVAL"
03713 " but the server was compiled with USE_FLOAT4_BYVAL."),
03714 errhint("It looks like you need to recompile or initdb.")));
03715 #else
03716 if (ControlFile->float4ByVal != false)
03717 ereport(FATAL,
03718 (errmsg("database files are incompatible with server"),
03719 errdetail("The database cluster was initialized with USE_FLOAT4_BYVAL"
03720 " but the server was compiled without USE_FLOAT4_BYVAL."),
03721 errhint("It looks like you need to recompile or initdb.")));
03722 #endif
03723
03724 #ifdef USE_FLOAT8_BYVAL
03725 if (ControlFile->float8ByVal != true)
03726 ereport(FATAL,
03727 (errmsg("database files are incompatible with server"),
03728 errdetail("The database cluster was initialized without USE_FLOAT8_BYVAL"
03729 " but the server was compiled with USE_FLOAT8_BYVAL."),
03730 errhint("It looks like you need to recompile or initdb.")));
03731 #else
03732 if (ControlFile->float8ByVal != false)
03733 ereport(FATAL,
03734 (errmsg("database files are incompatible with server"),
03735 errdetail("The database cluster was initialized with USE_FLOAT8_BYVAL"
03736 " but the server was compiled without USE_FLOAT8_BYVAL."),
03737 errhint("It looks like you need to recompile or initdb.")));
03738 #endif
03739 }
03740
03741 void
03742 UpdateControlFile(void)
03743 {
03744 int fd;
03745
03746 INIT_CRC32(ControlFile->crc);
03747 COMP_CRC32(ControlFile->crc,
03748 (char *) ControlFile,
03749 offsetof(ControlFileData, crc));
03750 FIN_CRC32(ControlFile->crc);
03751
03752 fd = BasicOpenFile(XLOG_CONTROL_FILE,
03753 O_RDWR | PG_BINARY,
03754 S_IRUSR | S_IWUSR);
03755 if (fd < 0)
03756 ereport(PANIC,
03757 (errcode_for_file_access(),
03758 errmsg("could not open control file \"%s\": %m",
03759 XLOG_CONTROL_FILE)));
03760
03761 errno = 0;
03762 if (write(fd, ControlFile, sizeof(ControlFileData)) != sizeof(ControlFileData))
03763 {
03764
03765 if (errno == 0)
03766 errno = ENOSPC;
03767 ereport(PANIC,
03768 (errcode_for_file_access(),
03769 errmsg("could not write to control file: %m")));
03770 }
03771
03772 if (pg_fsync(fd) != 0)
03773 ereport(PANIC,
03774 (errcode_for_file_access(),
03775 errmsg("could not fsync control file: %m")));
03776
03777 if (close(fd))
03778 ereport(PANIC,
03779 (errcode_for_file_access(),
03780 errmsg("could not close control file: %m")));
03781 }
03782
03783
03784
03785
03786 uint64
03787 GetSystemIdentifier(void)
03788 {
03789 Assert(ControlFile != NULL);
03790 return ControlFile->system_identifier;
03791 }
03792
03793
03794
03795
03796 bool
03797 DataChecksumsEnabled(void)
03798 {
03799 Assert(ControlFile != NULL);
03800 return (ControlFile->data_checksum_version > 0);
03801 }
03802
03803
03804
03805
03806
03807
03808
03809
03810
03811
03812 XLogRecPtr
03813 GetFakeLSNForUnloggedRel(void)
03814 {
03815 XLogRecPtr nextUnloggedLSN;
03816
03817
03818 volatile XLogCtlData *xlogctl = XLogCtl;
03819
03820
03821 SpinLockAcquire(&xlogctl->ulsn_lck);
03822 nextUnloggedLSN = xlogctl->unloggedLSN++;
03823 SpinLockRelease(&xlogctl->ulsn_lck);
03824
03825 return nextUnloggedLSN;
03826 }
03827
03828
03829
03830
03831
03832
03833
03834
03835
03836
03837
03838
03839 static int
03840 XLOGChooseNumBuffers(void)
03841 {
03842 int xbuffers;
03843
03844 xbuffers = NBuffers / 32;
03845 if (xbuffers > XLOG_SEG_SIZE / XLOG_BLCKSZ)
03846 xbuffers = XLOG_SEG_SIZE / XLOG_BLCKSZ;
03847 if (xbuffers < 8)
03848 xbuffers = 8;
03849 return xbuffers;
03850 }
03851
03852
03853
03854
03855 bool
03856 check_wal_buffers(int *newval, void **extra, GucSource source)
03857 {
03858
03859
03860
03861 if (*newval == -1)
03862 {
03863
03864
03865
03866
03867 if (XLOGbuffers == -1)
03868 return true;
03869
03870
03871 *newval = XLOGChooseNumBuffers();
03872 }
03873
03874
03875
03876
03877
03878
03879
03880
03881 if (*newval < 4)
03882 *newval = 4;
03883
03884 return true;
03885 }
03886
03887
03888
03889
03890 Size
03891 XLOGShmemSize(void)
03892 {
03893 Size size;
03894
03895
03896
03897
03898
03899
03900
03901 if (XLOGbuffers == -1)
03902 {
03903 char buf[32];
03904
03905 snprintf(buf, sizeof(buf), "%d", XLOGChooseNumBuffers());
03906 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER, PGC_S_OVERRIDE);
03907 }
03908 Assert(XLOGbuffers > 0);
03909
03910
03911 size = sizeof(XLogCtlData);
03912
03913 size = add_size(size, mul_size(sizeof(XLogRecPtr), XLOGbuffers));
03914
03915 size = add_size(size, ALIGNOF_XLOG_BUFFER);
03916
03917 size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
03918
03919
03920
03921
03922
03923
03924
03925 return size;
03926 }
03927
03928 void
03929 XLOGShmemInit(void)
03930 {
03931 bool foundCFile,
03932 foundXLog;
03933 char *allocptr;
03934
03935 ControlFile = (ControlFileData *)
03936 ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
03937 XLogCtl = (XLogCtlData *)
03938 ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
03939
03940 if (foundCFile || foundXLog)
03941 {
03942
03943 Assert(foundCFile && foundXLog);
03944 return;
03945 }
03946
03947 memset(XLogCtl, 0, sizeof(XLogCtlData));
03948
03949
03950
03951
03952
03953
03954 allocptr = ((char *) XLogCtl) + sizeof(XLogCtlData);
03955 XLogCtl->xlblocks = (XLogRecPtr *) allocptr;
03956 memset(XLogCtl->xlblocks, 0, sizeof(XLogRecPtr) * XLOGbuffers);
03957 allocptr += sizeof(XLogRecPtr) * XLOGbuffers;
03958
03959
03960
03961
03962 allocptr = (char *) TYPEALIGN(ALIGNOF_XLOG_BUFFER, allocptr);
03963 XLogCtl->pages = allocptr;
03964 memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
03965
03966
03967
03968
03969
03970 XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
03971 XLogCtl->SharedRecoveryInProgress = true;
03972 XLogCtl->SharedHotStandbyActive = false;
03973 XLogCtl->WalWriterSleeping = false;
03974 XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
03975 SpinLockInit(&XLogCtl->info_lck);
03976 SpinLockInit(&XLogCtl->ulsn_lck);
03977 InitSharedLatch(&XLogCtl->recoveryWakeupLatch);
03978
03979
03980
03981
03982
03983
03984 if (!IsBootstrapProcessingMode())
03985 ReadControlFile();
03986 }
03987
03988
03989
03990
03991
03992 void
03993 BootStrapXLOG(void)
03994 {
03995 CheckPoint checkPoint;
03996 char *buffer;
03997 XLogPageHeader page;
03998 XLogLongPageHeader longpage;
03999 XLogRecord *record;
04000 bool use_existent;
04001 uint64 sysidentifier;
04002 struct timeval tv;
04003 pg_crc32 crc;
04004
04005
04006
04007
04008
04009
04010
04011
04012
04013
04014
04015
04016
04017 gettimeofday(&tv, NULL);
04018 sysidentifier = ((uint64) tv.tv_sec) << 32;
04019 sysidentifier |= (uint32) (tv.tv_sec | tv.tv_usec);
04020
04021
04022 ThisTimeLineID = 1;
04023
04024
04025 buffer = (char *) palloc(XLOG_BLCKSZ + ALIGNOF_XLOG_BUFFER);
04026 page = (XLogPageHeader) TYPEALIGN(ALIGNOF_XLOG_BUFFER, buffer);
04027 memset(page, 0, XLOG_BLCKSZ);
04028
04029
04030
04031
04032
04033
04034
04035
04036 checkPoint.redo = XLogSegSize + SizeOfXLogLongPHD;
04037 checkPoint.ThisTimeLineID = ThisTimeLineID;
04038 checkPoint.PrevTimeLineID = ThisTimeLineID;
04039 checkPoint.fullPageWrites = fullPageWrites;
04040 checkPoint.nextXidEpoch = 0;
04041 checkPoint.nextXid = FirstNormalTransactionId;
04042 checkPoint.nextOid = FirstBootstrapObjectId;
04043 checkPoint.nextMulti = FirstMultiXactId;
04044 checkPoint.nextMultiOffset = 0;
04045 checkPoint.oldestXid = FirstNormalTransactionId;
04046 checkPoint.oldestXidDB = TemplateDbOid;
04047 checkPoint.oldestMulti = FirstMultiXactId;
04048 checkPoint.oldestMultiDB = TemplateDbOid;
04049 checkPoint.time = (pg_time_t) time(NULL);
04050 checkPoint.oldestActiveXid = InvalidTransactionId;
04051
04052 ShmemVariableCache->nextXid = checkPoint.nextXid;
04053 ShmemVariableCache->nextOid = checkPoint.nextOid;
04054 ShmemVariableCache->oidCount = 0;
04055 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
04056 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
04057 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
04058
04059
04060 page->xlp_magic = XLOG_PAGE_MAGIC;
04061 page->xlp_info = XLP_LONG_HEADER;
04062 page->xlp_tli = ThisTimeLineID;
04063 page->xlp_pageaddr = XLogSegSize;
04064 longpage = (XLogLongPageHeader) page;
04065 longpage->xlp_sysid = sysidentifier;
04066 longpage->xlp_seg_size = XLogSegSize;
04067 longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
04068
04069
04070 record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
04071 record->xl_prev = 0;
04072 record->xl_xid = InvalidTransactionId;
04073 record->xl_tot_len = SizeOfXLogRecord + sizeof(checkPoint);
04074 record->xl_len = sizeof(checkPoint);
04075 record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
04076 record->xl_rmid = RM_XLOG_ID;
04077 memcpy(XLogRecGetData(record), &checkPoint, sizeof(checkPoint));
04078
04079 INIT_CRC32(crc);
04080 COMP_CRC32(crc, &checkPoint, sizeof(checkPoint));
04081 COMP_CRC32(crc, (char *) record, offsetof(XLogRecord, xl_crc));
04082 FIN_CRC32(crc);
04083 record->xl_crc = crc;
04084
04085
04086 use_existent = false;
04087 openLogFile = XLogFileInit(1, &use_existent, false);
04088
04089
04090 errno = 0;
04091 if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
04092 {
04093
04094 if (errno == 0)
04095 errno = ENOSPC;
04096 ereport(PANIC,
04097 (errcode_for_file_access(),
04098 errmsg("could not write bootstrap transaction log file: %m")));
04099 }
04100
04101 if (pg_fsync(openLogFile) != 0)
04102 ereport(PANIC,
04103 (errcode_for_file_access(),
04104 errmsg("could not fsync bootstrap transaction log file: %m")));
04105
04106 if (close(openLogFile))
04107 ereport(PANIC,
04108 (errcode_for_file_access(),
04109 errmsg("could not close bootstrap transaction log file: %m")));
04110
04111 openLogFile = -1;
04112
04113
04114
04115 memset(ControlFile, 0, sizeof(ControlFileData));
04116
04117 ControlFile->system_identifier = sysidentifier;
04118 ControlFile->state = DB_SHUTDOWNED;
04119 ControlFile->time = checkPoint.time;
04120 ControlFile->checkPoint = checkPoint.redo;
04121 ControlFile->checkPointCopy = checkPoint;
04122 ControlFile->unloggedLSN = 1;
04123
04124
04125 ControlFile->MaxConnections = MaxConnections;
04126 ControlFile->max_prepared_xacts = max_prepared_xacts;
04127 ControlFile->max_locks_per_xact = max_locks_per_xact;
04128 ControlFile->wal_level = wal_level;
04129 ControlFile->data_checksum_version = bootstrap_data_checksum_version;
04130
04131
04132
04133 WriteControlFile();
04134
04135
04136 BootStrapCLOG();
04137 BootStrapSUBTRANS();
04138 BootStrapMultiXact();
04139
04140 pfree(buffer);
04141 }
04142
04143 static char *
04144 str_time(pg_time_t tnow)
04145 {
04146 static char buf[128];
04147
04148 pg_strftime(buf, sizeof(buf),
04149 "%Y-%m-%d %H:%M:%S %Z",
04150 pg_localtime(&tnow, log_timezone));
04151
04152 return buf;
04153 }
04154
04155
04156
04157
04158
04159
04160
04161 static void
04162 readRecoveryCommandFile(void)
04163 {
04164 FILE *fd;
04165 TimeLineID rtli = 0;
04166 bool rtliGiven = false;
04167 ConfigVariable *item,
04168 *head = NULL,
04169 *tail = NULL;
04170
04171 fd = AllocateFile(RECOVERY_COMMAND_FILE, "r");
04172 if (fd == NULL)
04173 {
04174 if (errno == ENOENT)
04175 return;
04176 ereport(FATAL,
04177 (errcode_for_file_access(),
04178 errmsg("could not open recovery command file \"%s\": %m",
04179 RECOVERY_COMMAND_FILE)));
04180 }
04181
04182
04183
04184
04185
04186 (void) ParseConfigFp(fd, RECOVERY_COMMAND_FILE, 0, FATAL, &head, &tail);
04187
04188 FreeFile(fd);
04189
04190 for (item = head; item; item = item->next)
04191 {
04192 if (strcmp(item->name, "restore_command") == 0)
04193 {
04194 recoveryRestoreCommand = pstrdup(item->value);
04195 ereport(DEBUG2,
04196 (errmsg_internal("restore_command = '%s'",
04197 recoveryRestoreCommand)));
04198 }
04199 else if (strcmp(item->name, "recovery_end_command") == 0)
04200 {
04201 recoveryEndCommand = pstrdup(item->value);
04202 ereport(DEBUG2,
04203 (errmsg_internal("recovery_end_command = '%s'",
04204 recoveryEndCommand)));
04205 }
04206 else if (strcmp(item->name, "archive_cleanup_command") == 0)
04207 {
04208 archiveCleanupCommand = pstrdup(item->value);
04209 ereport(DEBUG2,
04210 (errmsg_internal("archive_cleanup_command = '%s'",
04211 archiveCleanupCommand)));
04212 }
04213 else if (strcmp(item->name, "pause_at_recovery_target") == 0)
04214 {
04215 if (!parse_bool(item->value, &recoveryPauseAtTarget))
04216 ereport(ERROR,
04217 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
04218 errmsg("parameter \"%s\" requires a Boolean value", "pause_at_recovery_target")));
04219 ereport(DEBUG2,
04220 (errmsg_internal("pause_at_recovery_target = '%s'",
04221 item->value)));
04222 }
04223 else if (strcmp(item->name, "recovery_target_timeline") == 0)
04224 {
04225 rtliGiven = true;
04226 if (strcmp(item->value, "latest") == 0)
04227 rtli = 0;
04228 else
04229 {
04230 errno = 0;
04231 rtli = (TimeLineID) strtoul(item->value, NULL, 0);
04232 if (errno == EINVAL || errno == ERANGE)
04233 ereport(FATAL,
04234 (errmsg("recovery_target_timeline is not a valid number: \"%s\"",
04235 item->value)));
04236 }
04237 if (rtli)
04238 ereport(DEBUG2,
04239 (errmsg_internal("recovery_target_timeline = %u", rtli)));
04240 else
04241 ereport(DEBUG2,
04242 (errmsg_internal("recovery_target_timeline = latest")));
04243 }
04244 else if (strcmp(item->name, "recovery_target_xid") == 0)
04245 {
04246 errno = 0;
04247 recoveryTargetXid = (TransactionId) strtoul(item->value, NULL, 0);
04248 if (errno == EINVAL || errno == ERANGE)
04249 ereport(FATAL,
04250 (errmsg("recovery_target_xid is not a valid number: \"%s\"",
04251 item->value)));
04252 ereport(DEBUG2,
04253 (errmsg_internal("recovery_target_xid = %u",
04254 recoveryTargetXid)));
04255 recoveryTarget = RECOVERY_TARGET_XID;
04256 }
04257 else if (strcmp(item->name, "recovery_target_time") == 0)
04258 {
04259
04260
04261
04262
04263 if (recoveryTarget == RECOVERY_TARGET_XID ||
04264 recoveryTarget == RECOVERY_TARGET_NAME)
04265 continue;
04266 recoveryTarget = RECOVERY_TARGET_TIME;
04267
04268
04269
04270
04271 recoveryTargetTime =
04272 DatumGetTimestampTz(DirectFunctionCall3(timestamptz_in,
04273 CStringGetDatum(item->value),
04274 ObjectIdGetDatum(InvalidOid),
04275 Int32GetDatum(-1)));
04276 ereport(DEBUG2,
04277 (errmsg_internal("recovery_target_time = '%s'",
04278 timestamptz_to_str(recoveryTargetTime))));
04279 }
04280 else if (strcmp(item->name, "recovery_target_name") == 0)
04281 {
04282
04283
04284
04285
04286 if (recoveryTarget == RECOVERY_TARGET_XID)
04287 continue;
04288 recoveryTarget = RECOVERY_TARGET_NAME;
04289
04290 recoveryTargetName = pstrdup(item->value);
04291 if (strlen(recoveryTargetName) >= MAXFNAMELEN)
04292 ereport(FATAL,
04293 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
04294 errmsg("recovery_target_name is too long (maximum %d characters)",
04295 MAXFNAMELEN - 1)));
04296
04297 ereport(DEBUG2,
04298 (errmsg_internal("recovery_target_name = '%s'",
04299 recoveryTargetName)));
04300 }
04301 else if (strcmp(item->name, "recovery_target_inclusive") == 0)
04302 {
04303
04304
04305
04306 if (!parse_bool(item->value, &recoveryTargetInclusive))
04307 ereport(ERROR,
04308 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
04309 errmsg("parameter \"%s\" requires a Boolean value",
04310 "recovery_target_inclusive")));
04311 ereport(DEBUG2,
04312 (errmsg_internal("recovery_target_inclusive = %s",
04313 item->value)));
04314 }
04315 else if (strcmp(item->name, "standby_mode") == 0)
04316 {
04317 if (!parse_bool(item->value, &StandbyModeRequested))
04318 ereport(ERROR,
04319 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
04320 errmsg("parameter \"%s\" requires a Boolean value",
04321 "standby_mode")));
04322 ereport(DEBUG2,
04323 (errmsg_internal("standby_mode = '%s'", item->value)));
04324 }
04325 else if (strcmp(item->name, "primary_conninfo") == 0)
04326 {
04327 PrimaryConnInfo = pstrdup(item->value);
04328 ereport(DEBUG2,
04329 (errmsg_internal("primary_conninfo = '%s'",
04330 PrimaryConnInfo)));
04331 }
04332 else if (strcmp(item->name, "trigger_file") == 0)
04333 {
04334 TriggerFile = pstrdup(item->value);
04335 ereport(DEBUG2,
04336 (errmsg_internal("trigger_file = '%s'",
04337 TriggerFile)));
04338 }
04339 else
04340 ereport(FATAL,
04341 (errmsg("unrecognized recovery parameter \"%s\"",
04342 item->name)));
04343 }
04344
04345
04346
04347
04348 if (StandbyModeRequested)
04349 {
04350 if (PrimaryConnInfo == NULL && recoveryRestoreCommand == NULL)
04351 ereport(WARNING,
04352 (errmsg("recovery command file \"%s\" specified neither primary_conninfo nor restore_command",
04353 RECOVERY_COMMAND_FILE),
04354 errhint("The database server will regularly poll the pg_xlog subdirectory to check for files placed there.")));
04355 }
04356 else
04357 {
04358 if (recoveryRestoreCommand == NULL)
04359 ereport(FATAL,
04360 (errmsg("recovery command file \"%s\" must specify restore_command when standby mode is not enabled",
04361 RECOVERY_COMMAND_FILE)));
04362 }
04363
04364
04365 ArchiveRecoveryRequested = true;
04366
04367
04368
04369
04370
04371
04372
04373 if (rtliGiven)
04374 {
04375 if (rtli)
04376 {
04377
04378 if (rtli != 1 && !existsTimeLineHistory(rtli))
04379 ereport(FATAL,
04380 (errmsg("recovery target timeline %u does not exist",
04381 rtli)));
04382 recoveryTargetTLI = rtli;
04383 recoveryTargetIsLatest = false;
04384 }
04385 else
04386 {
04387
04388 recoveryTargetTLI = findNewestTimeLine(recoveryTargetTLI);
04389 recoveryTargetIsLatest = true;
04390 }
04391 }
04392
04393 FreeConfigVariables(head);
04394 }
04395
04396
04397
04398
04399 static void
04400 exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo)
04401 {
04402 char recoveryPath[MAXPGPATH];
04403 char xlogpath[MAXPGPATH];
04404
04405
04406
04407
04408 InArchiveRecovery = false;
04409
04410
04411
04412
04413 UpdateMinRecoveryPoint(InvalidXLogRecPtr, true);
04414
04415
04416
04417
04418
04419 if (readFile >= 0)
04420 {
04421 close(readFile);
04422 readFile = -1;
04423 }
04424
04425
04426
04427
04428
04429
04430
04431
04432
04433
04434 if (endTLI != ThisTimeLineID)
04435 {
04436 XLogFileCopy(endLogSegNo, endTLI, endLogSegNo);
04437
04438 if (XLogArchivingActive())
04439 {
04440 XLogFileName(xlogpath, endTLI, endLogSegNo);
04441 XLogArchiveNotify(xlogpath);
04442 }
04443 }
04444
04445
04446
04447
04448
04449 XLogFileName(xlogpath, ThisTimeLineID, endLogSegNo);
04450 XLogArchiveCleanup(xlogpath);
04451
04452
04453
04454
04455
04456 snprintf(recoveryPath, MAXPGPATH, XLOGDIR "/RECOVERYXLOG");
04457 unlink(recoveryPath);
04458
04459
04460 snprintf(recoveryPath, MAXPGPATH, XLOGDIR "/RECOVERYHISTORY");
04461 unlink(recoveryPath);
04462
04463
04464
04465
04466
04467 unlink(RECOVERY_COMMAND_DONE);
04468 if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0)
04469 ereport(FATAL,
04470 (errcode_for_file_access(),
04471 errmsg("could not rename file \"%s\" to \"%s\": %m",
04472 RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE)));
04473
04474 ereport(LOG,
04475 (errmsg("archive recovery complete")));
04476 }
04477
04478
04479
04480
04481
04482
04483
04484
04485
04486
04487
04488
04489
04490 static bool
04491 recoveryStopsHere(XLogRecord *record, bool *includeThis)
04492 {
04493 bool stopsHere;
04494 uint8 record_info;
04495 TimestampTz recordXtime;
04496 char recordRPName[MAXFNAMELEN];
04497
04498
04499 if (record->xl_rmid != RM_XACT_ID && record->xl_rmid != RM_XLOG_ID)
04500 return false;
04501 record_info = record->xl_info & ~XLR_INFO_MASK;
04502 if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_COMPACT)
04503 {
04504 xl_xact_commit_compact *recordXactCommitData;
04505
04506 recordXactCommitData = (xl_xact_commit_compact *) XLogRecGetData(record);
04507 recordXtime = recordXactCommitData->xact_time;
04508 }
04509 else if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT)
04510 {
04511 xl_xact_commit *recordXactCommitData;
04512
04513 recordXactCommitData = (xl_xact_commit *) XLogRecGetData(record);
04514 recordXtime = recordXactCommitData->xact_time;
04515 }
04516 else if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT)
04517 {
04518 xl_xact_abort *recordXactAbortData;
04519
04520 recordXactAbortData = (xl_xact_abort *) XLogRecGetData(record);
04521 recordXtime = recordXactAbortData->xact_time;
04522 }
04523 else if (record->xl_rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
04524 {
04525 xl_restore_point *recordRestorePointData;
04526
04527 recordRestorePointData = (xl_restore_point *) XLogRecGetData(record);
04528 recordXtime = recordRestorePointData->rp_time;
04529 strncpy(recordRPName, recordRestorePointData->rp_name, MAXFNAMELEN);
04530 }
04531 else
04532 return false;
04533
04534
04535 if (recoveryTarget == RECOVERY_TARGET_UNSET)
04536 {
04537
04538
04539
04540
04541 if (record->xl_rmid == RM_XACT_ID)
04542 SetLatestXTime(recordXtime);
04543 return false;
04544 }
04545
04546 if (recoveryTarget == RECOVERY_TARGET_XID)
04547 {
04548
04549
04550
04551
04552
04553
04554
04555
04556
04557 stopsHere = (record->xl_xid == recoveryTargetXid);
04558 if (stopsHere)
04559 *includeThis = recoveryTargetInclusive;
04560 }
04561 else if (recoveryTarget == RECOVERY_TARGET_NAME)
04562 {
04563
04564
04565
04566
04567 stopsHere = (strcmp(recordRPName, recoveryTargetName) == 0);
04568
04569
04570
04571
04572
04573 *includeThis = false;
04574 }
04575 else
04576 {
04577
04578
04579
04580
04581
04582 if (recoveryTargetInclusive)
04583 stopsHere = (recordXtime > recoveryTargetTime);
04584 else
04585 stopsHere = (recordXtime >= recoveryTargetTime);
04586 if (stopsHere)
04587 *includeThis = false;
04588 }
04589
04590 if (stopsHere)
04591 {
04592 recoveryStopXid = record->xl_xid;
04593 recoveryStopTime = recordXtime;
04594 recoveryStopAfter = *includeThis;
04595
04596 if (record_info == XLOG_XACT_COMMIT_COMPACT || record_info == XLOG_XACT_COMMIT)
04597 {
04598 if (recoveryStopAfter)
04599 ereport(LOG,
04600 (errmsg("recovery stopping after commit of transaction %u, time %s",
04601 recoveryStopXid,
04602 timestamptz_to_str(recoveryStopTime))));
04603 else
04604 ereport(LOG,
04605 (errmsg("recovery stopping before commit of transaction %u, time %s",
04606 recoveryStopXid,
04607 timestamptz_to_str(recoveryStopTime))));
04608 }
04609 else if (record_info == XLOG_XACT_ABORT)
04610 {
04611 if (recoveryStopAfter)
04612 ereport(LOG,
04613 (errmsg("recovery stopping after abort of transaction %u, time %s",
04614 recoveryStopXid,
04615 timestamptz_to_str(recoveryStopTime))));
04616 else
04617 ereport(LOG,
04618 (errmsg("recovery stopping before abort of transaction %u, time %s",
04619 recoveryStopXid,
04620 timestamptz_to_str(recoveryStopTime))));
04621 }
04622 else
04623 {
04624 strncpy(recoveryStopName, recordRPName, MAXFNAMELEN);
04625
04626 ereport(LOG,
04627 (errmsg("recovery stopping at restore point \"%s\", time %s",
04628 recoveryStopName,
04629 timestamptz_to_str(recoveryStopTime))));
04630 }
04631
04632
04633
04634
04635
04636
04637 if (record->xl_rmid == RM_XACT_ID && recoveryStopAfter)
04638 SetLatestXTime(recordXtime);
04639 }
04640 else if (record->xl_rmid == RM_XACT_ID)
04641 SetLatestXTime(recordXtime);
04642
04643 return stopsHere;
04644 }
04645
04646
04647
04648
04649
04650
04651
04652
04653 static void
04654 recoveryPausesHere(void)
04655 {
04656
04657 if (!LocalHotStandbyActive)
04658 return;
04659
04660 ereport(LOG,
04661 (errmsg("recovery has paused"),
04662 errhint("Execute pg_xlog_replay_resume() to continue.")));
04663
04664 while (RecoveryIsPaused())
04665 {
04666 pg_usleep(1000000L);
04667 HandleStartupProcInterrupts();
04668 }
04669 }
04670
04671 bool
04672 RecoveryIsPaused(void)
04673 {
04674
04675 volatile XLogCtlData *xlogctl = XLogCtl;
04676 bool recoveryPause;
04677
04678 SpinLockAcquire(&xlogctl->info_lck);
04679 recoveryPause = xlogctl->recoveryPause;
04680 SpinLockRelease(&xlogctl->info_lck);
04681
04682 return recoveryPause;
04683 }
04684
04685 void
04686 SetRecoveryPause(bool recoveryPause)
04687 {
04688
04689 volatile XLogCtlData *xlogctl = XLogCtl;
04690
04691 SpinLockAcquire(&xlogctl->info_lck);
04692 xlogctl->recoveryPause = recoveryPause;
04693 SpinLockRelease(&xlogctl->info_lck);
04694 }
04695
04696
04697
04698
04699
04700
04701
04702
04703 static void
04704 SetLatestXTime(TimestampTz xtime)
04705 {
04706
04707 volatile XLogCtlData *xlogctl = XLogCtl;
04708
04709 SpinLockAcquire(&xlogctl->info_lck);
04710 xlogctl->recoveryLastXTime = xtime;
04711 SpinLockRelease(&xlogctl->info_lck);
04712 }
04713
04714
04715
04716
04717 TimestampTz
04718 GetLatestXTime(void)
04719 {
04720
04721 volatile XLogCtlData *xlogctl = XLogCtl;
04722 TimestampTz xtime;
04723
04724 SpinLockAcquire(&xlogctl->info_lck);
04725 xtime = xlogctl->recoveryLastXTime;
04726 SpinLockRelease(&xlogctl->info_lck);
04727
04728 return xtime;
04729 }
04730
04731
04732
04733
04734
04735
04736
04737 static void
04738 SetCurrentChunkStartTime(TimestampTz xtime)
04739 {
04740
04741 volatile XLogCtlData *xlogctl = XLogCtl;
04742
04743 SpinLockAcquire(&xlogctl->info_lck);
04744 xlogctl->currentChunkStartTime = xtime;
04745 SpinLockRelease(&xlogctl->info_lck);
04746 }
04747
04748
04749
04750
04751
04752 TimestampTz
04753 GetCurrentChunkReplayStartTime(void)
04754 {
04755
04756 volatile XLogCtlData *xlogctl = XLogCtl;
04757 TimestampTz xtime;
04758
04759 SpinLockAcquire(&xlogctl->info_lck);
04760 xtime = xlogctl->currentChunkStartTime;
04761 SpinLockRelease(&xlogctl->info_lck);
04762
04763 return xtime;
04764 }
04765
04766
04767
04768
04769
04770 void
04771 GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream)
04772 {
04773
04774
04775
04776
04777 Assert(InRecovery);
04778
04779 *rtime = XLogReceiptTime;
04780 *fromStream = (XLogReceiptSource == XLOG_FROM_STREAM);
04781 }
04782
04783
04784
04785
04786
04787 #define RecoveryRequiresIntParameter(param_name, currValue, minValue) \
04788 do { \
04789 if ((currValue) < (minValue)) \
04790 ereport(ERROR, \
04791 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
04792 errmsg("hot standby is not possible because " \
04793 "%s = %d is a lower setting than on the master server " \
04794 "(its value was %d)", \
04795 param_name, \
04796 currValue, \
04797 minValue))); \
04798 } while(0)
04799
04800
04801
04802
04803
04804 static void
04805 CheckRequiredParameterValues(void)
04806 {
04807
04808
04809
04810
04811 if (InArchiveRecovery && ControlFile->wal_level == WAL_LEVEL_MINIMAL)
04812 {
04813 ereport(WARNING,
04814 (errmsg("WAL was generated with wal_level=minimal, data may be missing"),
04815 errhint("This happens if you temporarily set wal_level=minimal without taking a new base backup.")));
04816 }
04817
04818
04819
04820
04821
04822 if (InArchiveRecovery && EnableHotStandby)
04823 {
04824 if (ControlFile->wal_level < WAL_LEVEL_HOT_STANDBY)
04825 ereport(ERROR,
04826 (errmsg("hot standby is not possible because wal_level was not set to \"hot_standby\" on the master server"),
04827 errhint("Either set wal_level to \"hot_standby\" on the master, or turn off hot_standby here.")));
04828
04829
04830 RecoveryRequiresIntParameter("max_connections",
04831 MaxConnections,
04832 ControlFile->MaxConnections);
04833 RecoveryRequiresIntParameter("max_prepared_transactions",
04834 max_prepared_xacts,
04835 ControlFile->max_prepared_xacts);
04836 RecoveryRequiresIntParameter("max_locks_per_transaction",
04837 max_locks_per_xact,
04838 ControlFile->max_locks_per_xact);
04839 }
04840 }
04841
04842
04843
04844
04845 void
04846 StartupXLOG(void)
04847 {
04848 XLogCtlInsert *Insert;
04849 CheckPoint checkPoint;
04850 bool wasShutdown;
04851 bool reachedStopPoint = false;
04852 bool haveBackupLabel = false;
04853 XLogRecPtr RecPtr,
04854 checkPointLoc,
04855 EndOfLog;
04856 XLogSegNo endLogSegNo;
04857 TimeLineID PrevTimeLineID;
04858 XLogRecord *record;
04859 uint32 freespace;
04860 TransactionId oldestActiveXID;
04861 bool backupEndRequired = false;
04862 bool backupFromStandby = false;
04863 DBState dbstate_at_startup;
04864 XLogReaderState *xlogreader;
04865 XLogPageReadPrivate private;
04866 bool fast_promoted = false;
04867
04868
04869
04870
04871
04872
04873
04874 ReadControlFile();
04875
04876 if (ControlFile->state < DB_SHUTDOWNED ||
04877 ControlFile->state > DB_IN_PRODUCTION ||
04878 !XRecOffIsValid(ControlFile->checkPoint))
04879 ereport(FATAL,
04880 (errmsg("control file contains invalid data")));
04881
04882 if (ControlFile->state == DB_SHUTDOWNED)
04883 ereport(LOG,
04884 (errmsg("database system was shut down at %s",
04885 str_time(ControlFile->time))));
04886 else if (ControlFile->state == DB_SHUTDOWNED_IN_RECOVERY)
04887 ereport(LOG,
04888 (errmsg("database system was shut down in recovery at %s",
04889 str_time(ControlFile->time))));
04890 else if (ControlFile->state == DB_SHUTDOWNING)
04891 ereport(LOG,
04892 (errmsg("database system shutdown was interrupted; last known up at %s",
04893 str_time(ControlFile->time))));
04894 else if (ControlFile->state == DB_IN_CRASH_RECOVERY)
04895 ereport(LOG,
04896 (errmsg("database system was interrupted while in recovery at %s",
04897 str_time(ControlFile->time)),
04898 errhint("This probably means that some data is corrupted and"
04899 " you will have to use the last backup for recovery.")));
04900 else if (ControlFile->state == DB_IN_ARCHIVE_RECOVERY)
04901 ereport(LOG,
04902 (errmsg("database system was interrupted while in recovery at log time %s",
04903 str_time(ControlFile->checkPointCopy.time)),
04904 errhint("If this has occurred more than once some data might be corrupted"
04905 " and you might need to choose an earlier recovery target.")));
04906 else if (ControlFile->state == DB_IN_PRODUCTION)
04907 ereport(LOG,
04908 (errmsg("database system was interrupted; last known up at %s",
04909 str_time(ControlFile->time))));
04910
04911
04912 #ifdef XLOG_REPLAY_DELAY
04913 if (ControlFile->state != DB_SHUTDOWNED)
04914 pg_usleep(60000000L);
04915 #endif
04916
04917
04918
04919
04920
04921
04922 ValidateXLOGDirectoryStructure();
04923
04924
04925
04926
04927
04928
04929
04930
04931
04932 RelationCacheInitFileRemove();
04933
04934
04935
04936
04937
04938 recoveryTargetTLI = ControlFile->checkPointCopy.ThisTimeLineID;
04939
04940
04941
04942
04943
04944 readRecoveryCommandFile();
04945
04946
04947
04948
04949
04950 strncpy(XLogCtl->archiveCleanupCommand,
04951 archiveCleanupCommand ? archiveCleanupCommand : "",
04952 sizeof(XLogCtl->archiveCleanupCommand));
04953
04954 if (ArchiveRecoveryRequested)
04955 {
04956 if (StandbyModeRequested)
04957 ereport(LOG,
04958 (errmsg("entering standby mode")));
04959 else if (recoveryTarget == RECOVERY_TARGET_XID)
04960 ereport(LOG,
04961 (errmsg("starting point-in-time recovery to XID %u",
04962 recoveryTargetXid)));
04963 else if (recoveryTarget == RECOVERY_TARGET_TIME)
04964 ereport(LOG,
04965 (errmsg("starting point-in-time recovery to %s",
04966 timestamptz_to_str(recoveryTargetTime))));
04967 else if (recoveryTarget == RECOVERY_TARGET_NAME)
04968 ereport(LOG,
04969 (errmsg("starting point-in-time recovery to \"%s\"",
04970 recoveryTargetName)));
04971 else
04972 ereport(LOG,
04973 (errmsg("starting archive recovery")));
04974 }
04975 else if (ControlFile->minRecoveryPointTLI > 0)
04976 {
04977
04978
04979
04980
04981
04982
04983
04984
04985
04986
04987 Assert(ControlFile->minRecoveryPointTLI != 1);
04988 recoveryTargetTLI = ControlFile->minRecoveryPointTLI;
04989 recoveryTargetIsLatest = false;
04990 }
04991
04992
04993
04994
04995
04996 if (StandbyModeRequested)
04997 OwnLatch(&XLogCtl->recoveryWakeupLatch);
04998
04999
05000 MemSet(&private, 0, sizeof(XLogPageReadPrivate));
05001 xlogreader = XLogReaderAllocate(&XLogPageRead, &private);
05002 if (!xlogreader)
05003 ereport(ERROR,
05004 (errcode(ERRCODE_OUT_OF_MEMORY),
05005 errmsg("out of memory"),
05006 errdetail("Failed while allocating an XLog reading processor")));
05007 xlogreader->system_identifier = ControlFile->system_identifier;
05008
05009 if (read_backup_label(&checkPointLoc, &backupEndRequired,
05010 &backupFromStandby))
05011 {
05012
05013
05014
05015
05016
05017 InArchiveRecovery = true;
05018 if (StandbyModeRequested)
05019 StandbyMode = true;
05020
05021
05022
05023
05024
05025 record = ReadCheckpointRecord(xlogreader, checkPointLoc, 0, true);
05026 if (record != NULL)
05027 {
05028 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
05029 wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN);
05030 ereport(DEBUG1,
05031 (errmsg("checkpoint record is at %X/%X",
05032 (uint32) (checkPointLoc >> 32), (uint32) checkPointLoc)));
05033 InRecovery = true;
05034
05035
05036
05037
05038
05039
05040
05041 if (checkPoint.redo < checkPointLoc)
05042 {
05043 if (!ReadRecord(xlogreader, checkPoint.redo, LOG, false))
05044 ereport(FATAL,
05045 (errmsg("could not find redo location referenced by checkpoint record"),
05046 errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
05047 }
05048 }
05049 else
05050 {
05051 ereport(FATAL,
05052 (errmsg("could not locate required checkpoint record"),
05053 errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
05054 wasShutdown = false;
05055 }
05056
05057 haveBackupLabel = true;
05058 }
05059 else
05060 {
05061
05062
05063
05064
05065
05066
05067
05068
05069
05070
05071
05072
05073
05074
05075
05076
05077 if (ArchiveRecoveryRequested &&
05078 (ControlFile->minRecoveryPoint != InvalidXLogRecPtr ||
05079 ControlFile->backupEndRequired ||
05080 ControlFile->backupEndPoint != InvalidXLogRecPtr ||
05081 ControlFile->state == DB_SHUTDOWNED))
05082 {
05083 InArchiveRecovery = true;
05084 if (StandbyModeRequested)
05085 StandbyMode = true;
05086 }
05087
05088
05089
05090
05091
05092 checkPointLoc = ControlFile->checkPoint;
05093 RedoStartLSN = ControlFile->checkPointCopy.redo;
05094 record = ReadCheckpointRecord(xlogreader, checkPointLoc, 1, true);
05095 if (record != NULL)
05096 {
05097 ereport(DEBUG1,
05098 (errmsg("checkpoint record is at %X/%X",
05099 (uint32) (checkPointLoc >> 32), (uint32) checkPointLoc)));
05100 }
05101 else if (StandbyMode)
05102 {
05103
05104
05105
05106
05107 ereport(PANIC,
05108 (errmsg("could not locate a valid checkpoint record")));
05109 }
05110 else
05111 {
05112 checkPointLoc = ControlFile->prevCheckPoint;
05113 record = ReadCheckpointRecord(xlogreader, checkPointLoc, 2, true);
05114 if (record != NULL)
05115 {
05116 ereport(LOG,
05117 (errmsg("using previous checkpoint record at %X/%X",
05118 (uint32) (checkPointLoc >> 32), (uint32) checkPointLoc)));
05119 InRecovery = true;
05120 }
05121 else
05122 ereport(PANIC,
05123 (errmsg("could not locate a valid checkpoint record")));
05124 }
05125 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
05126 wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN);
05127 }
05128
05129
05130
05131
05132
05133
05134 Assert(expectedTLEs);
05135 if (tliOfPointInHistory(checkPointLoc, expectedTLEs) !=
05136 checkPoint.ThisTimeLineID)
05137 {
05138 XLogRecPtr switchpoint;
05139
05140
05141
05142
05143
05144 switchpoint = tliSwitchPoint(ControlFile->checkPointCopy.ThisTimeLineID, expectedTLEs, NULL);
05145 ereport(FATAL,
05146 (errmsg("requested timeline %u is not a child of this server's history",
05147 recoveryTargetTLI),
05148 errdetail("Latest checkpoint is at %X/%X on timeline %u, but in the history of the requested timeline, the server forked off from that timeline at %X/%X",
05149 (uint32) (ControlFile->checkPoint >> 32),
05150 (uint32) ControlFile->checkPoint,
05151 ControlFile->checkPointCopy.ThisTimeLineID,
05152 (uint32) (switchpoint >> 32),
05153 (uint32) switchpoint)));
05154 }
05155
05156
05157
05158
05159
05160 if (!XLogRecPtrIsInvalid(ControlFile->minRecoveryPoint) &&
05161 tliOfPointInHistory(ControlFile->minRecoveryPoint - 1, expectedTLEs) !=
05162 ControlFile->minRecoveryPointTLI)
05163 ereport(FATAL,
05164 (errmsg("requested timeline %u does not contain minimum recovery point %X/%X on timeline %u",
05165 recoveryTargetTLI,
05166 (uint32) (ControlFile->minRecoveryPoint >> 32),
05167 (uint32) ControlFile->minRecoveryPoint,
05168 ControlFile->minRecoveryPointTLI)));
05169
05170 LastRec = RecPtr = checkPointLoc;
05171
05172 ereport(DEBUG1,
05173 (errmsg("redo record is at %X/%X; shutdown %s",
05174 (uint32) (checkPoint.redo >> 32), (uint32) checkPoint.redo,
05175 wasShutdown ? "TRUE" : "FALSE")));
05176 ereport(DEBUG1,
05177 (errmsg("next transaction ID: %u/%u; next OID: %u",
05178 checkPoint.nextXidEpoch, checkPoint.nextXid,
05179 checkPoint.nextOid)));
05180 ereport(DEBUG1,
05181 (errmsg("next MultiXactId: %u; next MultiXactOffset: %u",
05182 checkPoint.nextMulti, checkPoint.nextMultiOffset)));
05183 ereport(DEBUG1,
05184 (errmsg("oldest unfrozen transaction ID: %u, in database %u",
05185 checkPoint.oldestXid, checkPoint.oldestXidDB)));
05186 ereport(DEBUG1,
05187 (errmsg("oldest MultiXactId: %u, in database %u",
05188 checkPoint.oldestMulti, checkPoint.oldestMultiDB)));
05189 if (!TransactionIdIsNormal(checkPoint.nextXid))
05190 ereport(PANIC,
05191 (errmsg("invalid next transaction ID")));
05192
05193
05194 ShmemVariableCache->nextXid = checkPoint.nextXid;
05195 ShmemVariableCache->nextOid = checkPoint.nextOid;
05196 ShmemVariableCache->oidCount = 0;
05197 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
05198 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
05199 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
05200 XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
05201 XLogCtl->ckptXid = checkPoint.nextXid;
05202
05203
05204
05205
05206
05207
05208 if (ControlFile->state == DB_SHUTDOWNED)
05209 XLogCtl->unloggedLSN = ControlFile->unloggedLSN;
05210 else
05211 XLogCtl->unloggedLSN = 1;
05212
05213
05214
05215
05216
05217
05218 ThisTimeLineID = checkPoint.ThisTimeLineID;
05219
05220
05221
05222
05223
05224
05225
05226
05227
05228
05229
05230
05231
05232 restoreTimeLineHistoryFiles(ThisTimeLineID, recoveryTargetTLI);
05233
05234 lastFullPageWrites = checkPoint.fullPageWrites;
05235
05236 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
05237
05238 if (RecPtr < checkPoint.redo)
05239 ereport(PANIC,
05240 (errmsg("invalid redo in checkpoint record")));
05241
05242
05243
05244
05245
05246
05247 if (checkPoint.redo < RecPtr)
05248 {
05249 if (wasShutdown)
05250 ereport(PANIC,
05251 (errmsg("invalid redo record in shutdown checkpoint")));
05252 InRecovery = true;
05253 }
05254 else if (ControlFile->state != DB_SHUTDOWNED)
05255 InRecovery = true;
05256 else if (ArchiveRecoveryRequested)
05257 {
05258
05259 InRecovery = true;
05260 }
05261
05262
05263 if (InRecovery)
05264 {
05265 int rmid;
05266
05267
05268 volatile XLogCtlData *xlogctl = XLogCtl;
05269
05270
05271
05272
05273
05274
05275
05276 dbstate_at_startup = ControlFile->state;
05277 if (InArchiveRecovery)
05278 ControlFile->state = DB_IN_ARCHIVE_RECOVERY;
05279 else
05280 {
05281 ereport(LOG,
05282 (errmsg("database system was not properly shut down; "
05283 "automatic recovery in progress")));
05284 if (recoveryTargetTLI > 0)
05285 ereport(LOG,
05286 (errmsg("crash recovery starts in timeline %u "
05287 "and has target timeline %u",
05288 ControlFile->checkPointCopy.ThisTimeLineID,
05289 recoveryTargetTLI)));
05290 ControlFile->state = DB_IN_CRASH_RECOVERY;
05291 }
05292 ControlFile->prevCheckPoint = ControlFile->checkPoint;
05293 ControlFile->checkPoint = checkPointLoc;
05294 ControlFile->checkPointCopy = checkPoint;
05295 if (InArchiveRecovery)
05296 {
05297
05298 if (ControlFile->minRecoveryPoint < checkPoint.redo)
05299 {
05300 ControlFile->minRecoveryPoint = checkPoint.redo;
05301 ControlFile->minRecoveryPointTLI = checkPoint.ThisTimeLineID;
05302 }
05303 }
05304
05305
05306
05307
05308
05309
05310
05311
05312
05313
05314 if (haveBackupLabel)
05315 {
05316 ControlFile->backupStartPoint = checkPoint.redo;
05317 ControlFile->backupEndRequired = backupEndRequired;
05318
05319 if (backupFromStandby)
05320 {
05321 if (dbstate_at_startup != DB_IN_ARCHIVE_RECOVERY)
05322 ereport(FATAL,
05323 (errmsg("backup_label contains data inconsistent with control file"),
05324 errhint("This means that the backup is corrupted and you will "
05325 "have to use another backup for recovery.")));
05326 ControlFile->backupEndPoint = ControlFile->minRecoveryPoint;
05327 }
05328 }
05329 ControlFile->time = (pg_time_t) time(NULL);
05330
05331 UpdateControlFile();
05332
05333
05334 minRecoveryPoint = ControlFile->minRecoveryPoint;
05335 minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
05336
05337
05338
05339
05340 pgstat_reset_all();
05341
05342
05343
05344
05345
05346
05347
05348
05349
05350 if (haveBackupLabel)
05351 {
05352 unlink(BACKUP_LABEL_OLD);
05353 if (rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD) != 0)
05354 ereport(FATAL,
05355 (errcode_for_file_access(),
05356 errmsg("could not rename file \"%s\" to \"%s\": %m",
05357 BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
05358 }
05359
05360
05361 CheckRequiredParameterValues();
05362
05363
05364
05365
05366
05367
05368
05369 ResetUnloggedRelations(UNLOGGED_RELATION_CLEANUP);
05370
05371
05372
05373
05374
05375 DeleteAllExportedSnapshotFiles();
05376
05377
05378
05379
05380
05381
05382
05383 if (ArchiveRecoveryRequested && EnableHotStandby)
05384 {
05385 TransactionId *xids;
05386 int nxids;
05387
05388 ereport(DEBUG1,
05389 (errmsg("initializing for hot standby")));
05390
05391 InitRecoveryTransactionEnvironment();
05392
05393 if (wasShutdown)
05394 oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
05395 else
05396 oldestActiveXID = checkPoint.oldestActiveXid;
05397 Assert(TransactionIdIsValid(oldestActiveXID));
05398
05399
05400
05401
05402
05403 StartupCLOG();
05404 StartupSUBTRANS(oldestActiveXID);
05405
05406
05407
05408
05409
05410
05411
05412 if (wasShutdown)
05413 {
05414 RunningTransactionsData running;
05415 TransactionId latestCompletedXid;
05416
05417
05418
05419
05420
05421
05422
05423 running.xcnt = nxids;
05424 running.subxcnt = 0;
05425 running.subxid_overflow = false;
05426 running.nextXid = checkPoint.nextXid;
05427 running.oldestRunningXid = oldestActiveXID;
05428 latestCompletedXid = checkPoint.nextXid;
05429 TransactionIdRetreat(latestCompletedXid);
05430 Assert(TransactionIdIsNormal(latestCompletedXid));
05431 running.latestCompletedXid = latestCompletedXid;
05432 running.xids = xids;
05433
05434 ProcArrayApplyRecoveryInfo(&running);
05435
05436 StandbyRecoverPreparedTransactions(false);
05437 }
05438 }
05439
05440
05441 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
05442 {
05443 if (RmgrTable[rmid].rm_startup != NULL)
05444 RmgrTable[rmid].rm_startup();
05445 }
05446
05447
05448
05449
05450
05451
05452
05453
05454
05455
05456
05457
05458
05459 SpinLockAcquire(&xlogctl->info_lck);
05460 xlogctl->replayEndRecPtr = ReadRecPtr;
05461 xlogctl->replayEndTLI = ThisTimeLineID;
05462 xlogctl->lastReplayedEndRecPtr = EndRecPtr;
05463 xlogctl->lastReplayedTLI = ThisTimeLineID;
05464 xlogctl->recoveryLastXTime = 0;
05465 xlogctl->currentChunkStartTime = 0;
05466 xlogctl->recoveryPause = false;
05467 SpinLockRelease(&xlogctl->info_lck);
05468
05469
05470 XLogReceiptTime = GetCurrentTimestamp();
05471
05472
05473
05474
05475
05476
05477
05478
05479
05480
05481
05482
05483
05484 if (ArchiveRecoveryRequested && IsUnderPostmaster)
05485 {
05486 PublishStartupProcessInformation();
05487 SetForwardFsyncRequests();
05488 SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
05489 bgwriterLaunched = true;
05490 }
05491
05492
05493
05494
05495
05496 CheckRecoveryConsistency();
05497
05498
05499
05500
05501
05502 if (checkPoint.redo < RecPtr)
05503 {
05504
05505 record = ReadRecord(xlogreader, checkPoint.redo, PANIC, false);
05506 }
05507 else
05508 {
05509
05510 record = ReadRecord(xlogreader, InvalidXLogRecPtr, LOG, false);
05511 }
05512
05513 if (record != NULL)
05514 {
05515 bool recoveryContinue = true;
05516 bool recoveryApply = true;
05517 ErrorContextCallback errcallback;
05518 TimestampTz xtime;
05519
05520 InRedo = true;
05521
05522 ereport(LOG,
05523 (errmsg("redo starts at %X/%X",
05524 (uint32) (ReadRecPtr >> 32), (uint32) ReadRecPtr)));
05525
05526
05527
05528
05529 do
05530 {
05531 bool switchedTLI = false;
05532 #ifdef WAL_DEBUG
05533 if (XLOG_DEBUG ||
05534 (rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) ||
05535 (rmid != RM_XACT_ID && trace_recovery_messages <= DEBUG3))
05536 {
05537 StringInfoData buf;
05538
05539 initStringInfo(&buf);
05540 appendStringInfo(&buf, "REDO @ %X/%X; LSN %X/%X: ",
05541 (uint32) (ReadRecPtr >> 32), (uint32) ReadRecPtr,
05542 (uint32) (EndRecPtr >> 32), (uint32) EndRecPtr);
05543 xlog_outrec(&buf, record);
05544 appendStringInfo(&buf, " - ");
05545 RmgrTable[record->xl_rmid].rm_desc(&buf,
05546 record->xl_info,
05547 XLogRecGetData(record));
05548 elog(LOG, "%s", buf.data);
05549 pfree(buf.data);
05550 }
05551 #endif
05552
05553
05554 HandleStartupProcInterrupts();
05555
05556
05557
05558
05559
05560
05561
05562
05563
05564
05565
05566
05567
05568
05569 if (xlogctl->recoveryPause)
05570 recoveryPausesHere();
05571
05572
05573
05574
05575 if (recoveryStopsHere(record, &recoveryApply))
05576 {
05577 if (recoveryPauseAtTarget)
05578 {
05579 SetRecoveryPause(true);
05580 recoveryPausesHere();
05581 }
05582 reachedStopPoint = true;
05583 recoveryContinue = false;
05584
05585
05586 if (!recoveryApply)
05587 break;
05588 }
05589
05590
05591 errcallback.callback = rm_redo_error_callback;
05592 errcallback.arg = (void *) record;
05593 errcallback.previous = error_context_stack;
05594 error_context_stack = &errcallback;
05595
05596
05597
05598
05599
05600
05601
05602
05603 if (TransactionIdFollowsOrEquals(record->xl_xid,
05604 ShmemVariableCache->nextXid))
05605 {
05606 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
05607 ShmemVariableCache->nextXid = record->xl_xid;
05608 TransactionIdAdvance(ShmemVariableCache->nextXid);
05609 LWLockRelease(XidGenLock);
05610 }
05611
05612
05613
05614
05615
05616
05617
05618
05619
05620
05621 if (record->xl_rmid == RM_XLOG_ID)
05622 {
05623 TimeLineID newTLI = ThisTimeLineID;
05624 TimeLineID prevTLI = ThisTimeLineID;
05625 uint8 info = record->xl_info & ~XLR_INFO_MASK;
05626
05627 if (info == XLOG_CHECKPOINT_SHUTDOWN)
05628 {
05629 CheckPoint checkPoint;
05630
05631 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
05632 newTLI = checkPoint.ThisTimeLineID;
05633 prevTLI = checkPoint.PrevTimeLineID;
05634 }
05635 else if (info == XLOG_END_OF_RECOVERY)
05636 {
05637 xl_end_of_recovery xlrec;
05638
05639 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
05640 newTLI = xlrec.ThisTimeLineID;
05641 prevTLI = xlrec.PrevTimeLineID;
05642 }
05643
05644 if (newTLI != ThisTimeLineID)
05645 {
05646
05647 checkTimeLineSwitch(EndRecPtr, newTLI, prevTLI);
05648
05649
05650 ThisTimeLineID = newTLI;
05651 switchedTLI = true;
05652 }
05653 }
05654
05655
05656
05657
05658
05659 SpinLockAcquire(&xlogctl->info_lck);
05660 xlogctl->replayEndRecPtr = EndRecPtr;
05661 xlogctl->replayEndTLI = ThisTimeLineID;
05662 SpinLockRelease(&xlogctl->info_lck);
05663
05664
05665
05666
05667
05668 if (standbyState >= STANDBY_INITIALIZED &&
05669 TransactionIdIsValid(record->xl_xid))
05670 RecordKnownAssignedTransactionIds(record->xl_xid);
05671
05672
05673 RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record);
05674
05675
05676 error_context_stack = errcallback.previous;
05677
05678
05679
05680
05681
05682 SpinLockAcquire(&xlogctl->info_lck);
05683 xlogctl->lastReplayedEndRecPtr = EndRecPtr;
05684 xlogctl->lastReplayedTLI = ThisTimeLineID;
05685 SpinLockRelease(&xlogctl->info_lck);
05686
05687
05688 LastRec = ReadRecPtr;
05689
05690
05691 CheckRecoveryConsistency();
05692
05693
05694
05695
05696
05697 if (switchedTLI && AllowCascadeReplication())
05698 WalSndWakeup();
05699
05700
05701 if (!recoveryContinue)
05702 break;
05703
05704
05705 record = ReadRecord(xlogreader, InvalidXLogRecPtr, LOG, false);
05706 } while (record != NULL);
05707
05708
05709
05710
05711
05712 ereport(LOG,
05713 (errmsg("redo done at %X/%X",
05714 (uint32) (ReadRecPtr >> 32), (uint32) ReadRecPtr)));
05715 xtime = GetLatestXTime();
05716 if (xtime)
05717 ereport(LOG,
05718 (errmsg("last completed transaction was at log time %s",
05719 timestamptz_to_str(xtime))));
05720 InRedo = false;
05721 }
05722 else
05723 {
05724
05725 ereport(LOG,
05726 (errmsg("redo is not required")));
05727 }
05728 }
05729
05730
05731
05732
05733
05734
05735 ShutdownWalRcv();
05736
05737
05738
05739
05740
05741 if (StandbyModeRequested)
05742 DisownLatch(&XLogCtl->recoveryWakeupLatch);
05743
05744
05745
05746
05747
05748
05749 StandbyMode = false;
05750
05751
05752
05753
05754
05755 record = ReadRecord(xlogreader, LastRec, PANIC, false);
05756 EndOfLog = EndRecPtr;
05757 XLByteToPrevSeg(EndOfLog, endLogSegNo);
05758
05759
05760
05761
05762
05763
05764
05765
05766 if (InRecovery &&
05767 (EndOfLog < minRecoveryPoint ||
05768 !XLogRecPtrIsInvalid(ControlFile->backupStartPoint)))
05769 {
05770 if (reachedStopPoint)
05771 {
05772
05773 ereport(FATAL,
05774 (errmsg("requested recovery stop point is before consistent recovery point")));
05775 }
05776
05777
05778
05779
05780
05781
05782
05783
05784
05785
05786 if (ArchiveRecoveryRequested || ControlFile->backupEndRequired)
05787 {
05788 if (ControlFile->backupEndRequired)
05789 ereport(FATAL,
05790 (errmsg("WAL ends before end of online backup"),
05791 errhint("All WAL generated while online backup was taken must be available at recovery.")));
05792 else if (!XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
05793 ereport(FATAL,
05794 (errmsg("WAL ends before end of online backup"),
05795 errhint("Online backup started with pg_start_backup() must be ended with pg_stop_backup(), and all WAL up to that point must be available at recovery.")));
05796 else
05797 ereport(FATAL,
05798 (errmsg("WAL ends before consistent recovery point")));
05799 }
05800 }
05801
05802
05803
05804
05805
05806
05807
05808
05809
05810
05811
05812
05813
05814
05815
05816 PrevTimeLineID = ThisTimeLineID;
05817 if (ArchiveRecoveryRequested)
05818 {
05819 char reason[200];
05820
05821 Assert(InArchiveRecovery);
05822
05823 ThisTimeLineID = findNewestTimeLine(recoveryTargetTLI) + 1;
05824 ereport(LOG,
05825 (errmsg("selected new timeline ID: %u", ThisTimeLineID)));
05826
05827
05828
05829
05830
05831 if (recoveryTarget == RECOVERY_TARGET_XID)
05832 snprintf(reason, sizeof(reason),
05833 "%s transaction %u",
05834 recoveryStopAfter ? "after" : "before",
05835 recoveryStopXid);
05836 else if (recoveryTarget == RECOVERY_TARGET_TIME)
05837 snprintf(reason, sizeof(reason),
05838 "%s %s\n",
05839 recoveryStopAfter ? "after" : "before",
05840 timestamptz_to_str(recoveryStopTime));
05841 else if (recoveryTarget == RECOVERY_TARGET_NAME)
05842 snprintf(reason, sizeof(reason),
05843 "at restore point \"%s\"",
05844 recoveryStopName);
05845 else
05846 snprintf(reason, sizeof(reason), "no recovery target specified");
05847
05848 writeTimeLineHistory(ThisTimeLineID, recoveryTargetTLI,
05849 EndRecPtr, reason);
05850 }
05851
05852
05853 XLogCtl->ThisTimeLineID = ThisTimeLineID;
05854 XLogCtl->PrevTimeLineID = PrevTimeLineID;
05855
05856
05857
05858
05859
05860
05861
05862 if (ArchiveRecoveryRequested)
05863 exitArchiveRecovery(xlogreader->readPageTLI, endLogSegNo);
05864
05865
05866
05867
05868
05869
05870 openLogSegNo = endLogSegNo;
05871 openLogFile = XLogFileOpen(openLogSegNo);
05872 openLogOff = 0;
05873 Insert = &XLogCtl->Insert;
05874 Insert->PrevRecord = LastRec;
05875 XLogCtl->xlblocks[0] = ((EndOfLog - 1) / XLOG_BLCKSZ + 1) * XLOG_BLCKSZ;
05876
05877
05878
05879
05880
05881
05882 if (EndOfLog % XLOG_BLCKSZ == 0)
05883 {
05884 memset(Insert->currpage, 0, XLOG_BLCKSZ);
05885 }
05886 else
05887 {
05888 Assert(readOff == (XLogCtl->xlblocks[0] - XLOG_BLCKSZ) % XLogSegSize);
05889 memcpy((char *) Insert->currpage, xlogreader->readBuf, XLOG_BLCKSZ);
05890 }
05891 Insert->currpos = (char *) Insert->currpage +
05892 (EndOfLog + XLOG_BLCKSZ - XLogCtl->xlblocks[0]);
05893
05894 LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
05895
05896 XLogCtl->LogwrtResult = LogwrtResult;
05897
05898 XLogCtl->LogwrtRqst.Write = EndOfLog;
05899 XLogCtl->LogwrtRqst.Flush = EndOfLog;
05900
05901 freespace = INSERT_FREESPACE(Insert);
05902 if (freespace > 0)
05903 {
05904
05905 MemSet(Insert->currpos, 0, freespace);
05906 XLogCtl->Write.curridx = 0;
05907 }
05908 else
05909 {
05910
05911
05912
05913
05914
05915
05916
05917
05918 XLogCtl->Write.curridx = NextBufIdx(0);
05919 }
05920
05921
05922 oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
05923
05924
05925
05926
05927
05928
05929 Insert->fullPageWrites = lastFullPageWrites;
05930 LocalSetXLogInsertAllowed();
05931 UpdateFullPageWrites();
05932 LocalXLogInsertAllowed = -1;
05933
05934 if (InRecovery)
05935 {
05936 int rmid;
05937
05938
05939
05940
05941
05942
05943 LocalSetXLogInsertAllowed();
05944
05945
05946
05947
05948 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
05949 {
05950 if (RmgrTable[rmid].rm_cleanup != NULL)
05951 RmgrTable[rmid].rm_cleanup();
05952 }
05953
05954
05955 LocalXLogInsertAllowed = -1;
05956
05957
05958
05959
05960
05961
05962
05963
05964
05965
05966
05967
05968
05969
05970 if (bgwriterLaunched)
05971 {
05972 if (fast_promote)
05973 {
05974 checkPointLoc = ControlFile->prevCheckPoint;
05975
05976
05977
05978
05979
05980
05981 record = ReadCheckpointRecord(xlogreader, checkPointLoc, 1, false);
05982 if (record != NULL)
05983 {
05984 fast_promoted = true;
05985 CreateEndOfRecoveryRecord();
05986 }
05987 }
05988
05989 if (!fast_promoted)
05990 RequestCheckpoint(CHECKPOINT_END_OF_RECOVERY |
05991 CHECKPOINT_IMMEDIATE |
05992 CHECKPOINT_WAIT);
05993 }
05994 else
05995 CreateCheckPoint(CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_IMMEDIATE);
05996
05997
05998
05999
06000 if (recoveryEndCommand)
06001 ExecuteRecoveryCommand(recoveryEndCommand,
06002 "recovery_end_command",
06003 true);
06004 }
06005
06006
06007
06008
06009 PreallocXlogFiles(EndOfLog);
06010
06011
06012
06013
06014
06015
06016 if (InRecovery)
06017 ResetUnloggedRelations(UNLOGGED_RELATION_INIT);
06018
06019
06020
06021
06022 InRecovery = false;
06023
06024 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
06025 ControlFile->state = DB_IN_PRODUCTION;
06026 ControlFile->time = (pg_time_t) time(NULL);
06027 UpdateControlFile();
06028 LWLockRelease(ControlFileLock);
06029
06030
06031 XLogCtl->Write.lastSegSwitchTime = (pg_time_t) time(NULL);
06032
06033
06034 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
06035 ShmemVariableCache->latestCompletedXid = ShmemVariableCache->nextXid;
06036 TransactionIdRetreat(ShmemVariableCache->latestCompletedXid);
06037 LWLockRelease(ProcArrayLock);
06038
06039
06040
06041
06042
06043 if (standbyState == STANDBY_DISABLED)
06044 {
06045 StartupCLOG();
06046 StartupSUBTRANS(oldestActiveXID);
06047 }
06048
06049
06050
06051
06052 StartupMultiXact();
06053 TrimCLOG();
06054
06055
06056 RecoverPreparedTransactions();
06057
06058
06059
06060
06061
06062 if (standbyState != STANDBY_DISABLED)
06063 ShutdownRecoveryTransactionEnvironment();
06064
06065
06066 if (readFile >= 0)
06067 {
06068 close(readFile);
06069 readFile = -1;
06070 }
06071 XLogReaderFree(xlogreader);
06072
06073
06074
06075
06076
06077 LocalSetXLogInsertAllowed();
06078 XLogReportParameters();
06079
06080
06081
06082
06083
06084
06085
06086 {
06087
06088 volatile XLogCtlData *xlogctl = XLogCtl;
06089
06090 SpinLockAcquire(&xlogctl->info_lck);
06091 xlogctl->SharedRecoveryInProgress = false;
06092 SpinLockRelease(&xlogctl->info_lck);
06093 }
06094
06095
06096
06097
06098
06099 WalSndWakeup();
06100
06101
06102
06103
06104
06105
06106
06107 if (fast_promoted)
06108 RequestCheckpoint(0);
06109 }
06110
06111
06112
06113
06114
06115
06116 static void
06117 CheckRecoveryConsistency(void)
06118 {
06119
06120
06121
06122
06123 if (XLogRecPtrIsInvalid(minRecoveryPoint))
06124 return;
06125
06126
06127
06128
06129 if (!XLogRecPtrIsInvalid(ControlFile->backupEndPoint) &&
06130 ControlFile->backupEndPoint <= EndRecPtr)
06131 {
06132
06133
06134
06135
06136
06137
06138
06139 elog(DEBUG1, "end of backup reached");
06140
06141 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
06142
06143 if (ControlFile->minRecoveryPoint < EndRecPtr)
06144 ControlFile->minRecoveryPoint = EndRecPtr;
06145
06146 ControlFile->backupStartPoint = InvalidXLogRecPtr;
06147 ControlFile->backupEndPoint = InvalidXLogRecPtr;
06148 ControlFile->backupEndRequired = false;
06149 UpdateControlFile();
06150
06151 LWLockRelease(ControlFileLock);
06152 }
06153
06154
06155
06156
06157
06158
06159
06160
06161 if (!reachedConsistency && !ControlFile->backupEndRequired &&
06162 minRecoveryPoint <= XLogCtl->lastReplayedEndRecPtr &&
06163 XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
06164 {
06165
06166
06167
06168
06169 XLogCheckInvalidPages();
06170
06171 reachedConsistency = true;
06172 ereport(LOG,
06173 (errmsg("consistent recovery state reached at %X/%X",
06174 (uint32) (XLogCtl->lastReplayedEndRecPtr >> 32),
06175 (uint32) XLogCtl->lastReplayedEndRecPtr)));
06176 }
06177
06178
06179
06180
06181
06182
06183 if (standbyState == STANDBY_SNAPSHOT_READY &&
06184 !LocalHotStandbyActive &&
06185 reachedConsistency &&
06186 IsUnderPostmaster)
06187 {
06188
06189 volatile XLogCtlData *xlogctl = XLogCtl;
06190
06191 SpinLockAcquire(&xlogctl->info_lck);
06192 xlogctl->SharedHotStandbyActive = true;
06193 SpinLockRelease(&xlogctl->info_lck);
06194
06195 LocalHotStandbyActive = true;
06196
06197 SendPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY);
06198 }
06199 }
06200
06201
06202
06203
06204
06205
06206
06207
06208
06209
06210 bool
06211 RecoveryInProgress(void)
06212 {
06213
06214
06215
06216
06217
06218 if (!LocalRecoveryInProgress)
06219 return false;
06220 else
06221 {
06222
06223 volatile XLogCtlData *xlogctl = XLogCtl;
06224
06225
06226 SpinLockAcquire(&xlogctl->info_lck);
06227 LocalRecoveryInProgress = xlogctl->SharedRecoveryInProgress;
06228 SpinLockRelease(&xlogctl->info_lck);
06229
06230
06231
06232
06233
06234
06235
06236 if (!LocalRecoveryInProgress)
06237 InitXLOGAccess();
06238
06239 return LocalRecoveryInProgress;
06240 }
06241 }
06242
06243
06244
06245
06246
06247
06248
06249
06250
06251 bool
06252 HotStandbyActive(void)
06253 {
06254
06255
06256
06257
06258
06259 if (LocalHotStandbyActive)
06260 return true;
06261 else
06262 {
06263
06264 volatile XLogCtlData *xlogctl = XLogCtl;
06265
06266
06267 SpinLockAcquire(&xlogctl->info_lck);
06268 LocalHotStandbyActive = xlogctl->SharedHotStandbyActive;
06269 SpinLockRelease(&xlogctl->info_lck);
06270
06271 return LocalHotStandbyActive;
06272 }
06273 }
06274
06275
06276
06277
06278
06279
06280
06281
06282 bool
06283 XLogInsertAllowed(void)
06284 {
06285
06286
06287
06288
06289
06290 if (LocalXLogInsertAllowed >= 0)
06291 return (bool) LocalXLogInsertAllowed;
06292
06293
06294
06295
06296 if (RecoveryInProgress())
06297 return false;
06298
06299
06300
06301
06302
06303 LocalXLogInsertAllowed = 1;
06304 return true;
06305 }
06306
06307
06308
06309
06310
06311
06312
06313 static void
06314 LocalSetXLogInsertAllowed(void)
06315 {
06316 Assert(LocalXLogInsertAllowed == -1);
06317 LocalXLogInsertAllowed = 1;
06318
06319
06320 InitXLOGAccess();
06321 }
06322
06323
06324
06325
06326
06327
06328
06329 static XLogRecord *
06330 ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
06331 int whichChkpt, bool report)
06332 {
06333 XLogRecord *record;
06334
06335 if (!XRecOffIsValid(RecPtr))
06336 {
06337 if (!report)
06338 return NULL;
06339
06340 switch (whichChkpt)
06341 {
06342 case 1:
06343 ereport(LOG,
06344 (errmsg("invalid primary checkpoint link in control file")));
06345 break;
06346 case 2:
06347 ereport(LOG,
06348 (errmsg("invalid secondary checkpoint link in control file")));
06349 break;
06350 default:
06351 ereport(LOG,
06352 (errmsg("invalid checkpoint link in backup_label file")));
06353 break;
06354 }
06355 return NULL;
06356 }
06357
06358 record = ReadRecord(xlogreader, RecPtr, LOG, true);
06359
06360 if (record == NULL)
06361 {
06362 if (!report)
06363 return NULL;
06364
06365 switch (whichChkpt)
06366 {
06367 case 1:
06368 ereport(LOG,
06369 (errmsg("invalid primary checkpoint record")));
06370 break;
06371 case 2:
06372 ereport(LOG,
06373 (errmsg("invalid secondary checkpoint record")));
06374 break;
06375 default:
06376 ereport(LOG,
06377 (errmsg("invalid checkpoint record")));
06378 break;
06379 }
06380 return NULL;
06381 }
06382 if (record->xl_rmid != RM_XLOG_ID)
06383 {
06384 switch (whichChkpt)
06385 {
06386 case 1:
06387 ereport(LOG,
06388 (errmsg("invalid resource manager ID in primary checkpoint record")));
06389 break;
06390 case 2:
06391 ereport(LOG,
06392 (errmsg("invalid resource manager ID in secondary checkpoint record")));
06393 break;
06394 default:
06395 ereport(LOG,
06396 (errmsg("invalid resource manager ID in checkpoint record")));
06397 break;
06398 }
06399 return NULL;
06400 }
06401 if (record->xl_info != XLOG_CHECKPOINT_SHUTDOWN &&
06402 record->xl_info != XLOG_CHECKPOINT_ONLINE)
06403 {
06404 switch (whichChkpt)
06405 {
06406 case 1:
06407 ereport(LOG,
06408 (errmsg("invalid xl_info in primary checkpoint record")));
06409 break;
06410 case 2:
06411 ereport(LOG,
06412 (errmsg("invalid xl_info in secondary checkpoint record")));
06413 break;
06414 default:
06415 ereport(LOG,
06416 (errmsg("invalid xl_info in checkpoint record")));
06417 break;
06418 }
06419 return NULL;
06420 }
06421 if (record->xl_len != sizeof(CheckPoint) ||
06422 record->xl_tot_len != SizeOfXLogRecord + sizeof(CheckPoint))
06423 {
06424 switch (whichChkpt)
06425 {
06426 case 1:
06427 ereport(LOG,
06428 (errmsg("invalid length of primary checkpoint record")));
06429 break;
06430 case 2:
06431 ereport(LOG,
06432 (errmsg("invalid length of secondary checkpoint record")));
06433 break;
06434 default:
06435 ereport(LOG,
06436 (errmsg("invalid length of checkpoint record")));
06437 break;
06438 }
06439 return NULL;
06440 }
06441 return record;
06442 }
06443
06444
06445
06446
06447
06448
06449
06450
06451
06452
06453
06454 void
06455 InitXLOGAccess(void)
06456 {
06457
06458 ThisTimeLineID = XLogCtl->ThisTimeLineID;
06459 Assert(ThisTimeLineID != 0 || IsBootstrapProcessingMode());
06460
06461
06462 (void) GetRedoRecPtr();
06463 }
06464
06465
06466
06467
06468
06469
06470 XLogRecPtr
06471 GetRedoRecPtr(void)
06472 {
06473
06474 volatile XLogCtlData *xlogctl = XLogCtl;
06475
06476 SpinLockAcquire(&xlogctl->info_lck);
06477 Assert(RedoRecPtr <= xlogctl->Insert.RedoRecPtr);
06478 RedoRecPtr = xlogctl->Insert.RedoRecPtr;
06479 SpinLockRelease(&xlogctl->info_lck);
06480
06481 return RedoRecPtr;
06482 }
06483
06484
06485
06486
06487
06488
06489
06490
06491
06492
06493 XLogRecPtr
06494 GetInsertRecPtr(void)
06495 {
06496
06497 volatile XLogCtlData *xlogctl = XLogCtl;
06498 XLogRecPtr recptr;
06499
06500 SpinLockAcquire(&xlogctl->info_lck);
06501 recptr = xlogctl->LogwrtRqst.Write;
06502 SpinLockRelease(&xlogctl->info_lck);
06503
06504 return recptr;
06505 }
06506
06507
06508
06509
06510
06511 XLogRecPtr
06512 GetFlushRecPtr(void)
06513 {
06514
06515 volatile XLogCtlData *xlogctl = XLogCtl;
06516 XLogRecPtr recptr;
06517
06518 SpinLockAcquire(&xlogctl->info_lck);
06519 recptr = xlogctl->LogwrtResult.Flush;
06520 SpinLockRelease(&xlogctl->info_lck);
06521
06522 return recptr;
06523 }
06524
06525
06526
06527
06528 pg_time_t
06529 GetLastSegSwitchTime(void)
06530 {
06531 pg_time_t result;
06532
06533
06534 LWLockAcquire(WALWriteLock, LW_SHARED);
06535 result = XLogCtl->Write.lastSegSwitchTime;
06536 LWLockRelease(WALWriteLock);
06537
06538 return result;
06539 }
06540
06541
06542
06543
06544
06545
06546
06547
06548
06549 void
06550 GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch)
06551 {
06552 uint32 ckptXidEpoch;
06553 TransactionId ckptXid;
06554 TransactionId nextXid;
06555
06556
06557 {
06558
06559 volatile XLogCtlData *xlogctl = XLogCtl;
06560
06561 SpinLockAcquire(&xlogctl->info_lck);
06562 ckptXidEpoch = xlogctl->ckptXidEpoch;
06563 ckptXid = xlogctl->ckptXid;
06564 SpinLockRelease(&xlogctl->info_lck);
06565 }
06566
06567
06568 nextXid = ReadNewTransactionId();
06569
06570
06571
06572
06573
06574 if (nextXid < ckptXid)
06575 ckptXidEpoch++;
06576
06577 *xid = nextXid;
06578 *epoch = ckptXidEpoch;
06579 }
06580
06581
06582
06583
06584 void
06585 ShutdownXLOG(int code, Datum arg)
06586 {
06587 ereport(LOG,
06588 (errmsg("shutting down")));
06589
06590 if (RecoveryInProgress())
06591 CreateRestartPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
06592 else
06593 {
06594
06595
06596
06597
06598
06599
06600 if (XLogArchivingActive() && XLogArchiveCommandSet())
06601 RequestXLogSwitch();
06602
06603 CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
06604 }
06605 ShutdownCLOG();
06606 ShutdownSUBTRANS();
06607 ShutdownMultiXact();
06608
06609 ereport(LOG,
06610 (errmsg("database system is shut down")));
06611 }
06612
06613
06614
06615
06616 static void
06617 LogCheckpointStart(int flags, bool restartpoint)
06618 {
06619 const char *msg;
06620
06621
06622
06623
06624
06625 if (restartpoint)
06626 msg = "restartpoint starting:%s%s%s%s%s%s%s";
06627 else
06628 msg = "checkpoint starting:%s%s%s%s%s%s%s";
06629
06630 elog(LOG, msg,
06631 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
06632 (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
06633 (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
06634 (flags & CHECKPOINT_FORCE) ? " force" : "",
06635 (flags & CHECKPOINT_WAIT) ? " wait" : "",
06636 (flags & CHECKPOINT_CAUSE_XLOG) ? " xlog" : "",
06637 (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "");
06638 }
06639
06640
06641
06642
06643 static void
06644 LogCheckpointEnd(bool restartpoint)
06645 {
06646 long write_secs,
06647 sync_secs,
06648 total_secs,
06649 longest_secs,
06650 average_secs;
06651 int write_usecs,
06652 sync_usecs,
06653 total_usecs,
06654 longest_usecs,
06655 average_usecs;
06656 uint64 average_sync_time;
06657
06658 CheckpointStats.ckpt_end_t = GetCurrentTimestamp();
06659
06660 TimestampDifference(CheckpointStats.ckpt_write_t,
06661 CheckpointStats.ckpt_sync_t,
06662 &write_secs, &write_usecs);
06663
06664 TimestampDifference(CheckpointStats.ckpt_sync_t,
06665 CheckpointStats.ckpt_sync_end_t,
06666 &sync_secs, &sync_usecs);
06667
06668
06669 BgWriterStats.m_checkpoint_write_time +=
06670 write_secs * 1000 + write_usecs / 1000;
06671 BgWriterStats.m_checkpoint_sync_time +=
06672 sync_secs * 1000 + sync_usecs / 1000;
06673
06674
06675
06676
06677
06678 if (!log_checkpoints)
06679 return;
06680
06681 TimestampDifference(CheckpointStats.ckpt_start_t,
06682 CheckpointStats.ckpt_end_t,
06683 &total_secs, &total_usecs);
06684
06685
06686
06687
06688
06689
06690 longest_secs = (long) (CheckpointStats.ckpt_longest_sync / 1000000);
06691 longest_usecs = CheckpointStats.ckpt_longest_sync -
06692 (uint64) longest_secs *1000000;
06693
06694 average_sync_time = 0;
06695 if (CheckpointStats.ckpt_sync_rels > 0)
06696 average_sync_time = CheckpointStats.ckpt_agg_sync_time /
06697 CheckpointStats.ckpt_sync_rels;
06698 average_secs = (long) (average_sync_time / 1000000);
06699 average_usecs = average_sync_time - (uint64) average_secs *1000000;
06700
06701 if (restartpoint)
06702 elog(LOG, "restartpoint complete: wrote %d buffers (%.1f%%); "
06703 "%d transaction log file(s) added, %d removed, %d recycled; "
06704 "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; "
06705 "sync files=%d, longest=%ld.%03d s, average=%ld.%03d s",
06706 CheckpointStats.ckpt_bufs_written,
06707 (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
06708 CheckpointStats.ckpt_segs_added,
06709 CheckpointStats.ckpt_segs_removed,
06710 CheckpointStats.ckpt_segs_recycled,
06711 write_secs, write_usecs / 1000,
06712 sync_secs, sync_usecs / 1000,
06713 total_secs, total_usecs / 1000,
06714 CheckpointStats.ckpt_sync_rels,
06715 longest_secs, longest_usecs / 1000,
06716 average_secs, average_usecs / 1000);
06717 else
06718 elog(LOG, "checkpoint complete: wrote %d buffers (%.1f%%); "
06719 "%d transaction log file(s) added, %d removed, %d recycled; "
06720 "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; "
06721 "sync files=%d, longest=%ld.%03d s, average=%ld.%03d s",
06722 CheckpointStats.ckpt_bufs_written,
06723 (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
06724 CheckpointStats.ckpt_segs_added,
06725 CheckpointStats.ckpt_segs_removed,
06726 CheckpointStats.ckpt_segs_recycled,
06727 write_secs, write_usecs / 1000,
06728 sync_secs, sync_usecs / 1000,
06729 total_secs, total_usecs / 1000,
06730 CheckpointStats.ckpt_sync_rels,
06731 longest_secs, longest_usecs / 1000,
06732 average_secs, average_usecs / 1000);
06733 }
06734
06735
06736
06737
06738
06739
06740
06741
06742
06743
06744
06745
06746
06747
06748
06749
06750
06751
06752
06753
06754
06755
06756
06757
06758
06759
06760
06761
06762
06763 void
06764 CreateCheckPoint(int flags)
06765 {
06766 bool shutdown;
06767 CheckPoint checkPoint;
06768 XLogRecPtr recptr;
06769 XLogCtlInsert *Insert = &XLogCtl->Insert;
06770 XLogRecData rdata;
06771 uint32 freespace;
06772 XLogSegNo _logSegNo;
06773 VirtualTransactionId *vxids;
06774 int nvxids;
06775
06776
06777
06778
06779
06780 if (flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY))
06781 shutdown = true;
06782 else
06783 shutdown = false;
06784
06785
06786 if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
06787 elog(ERROR, "can't create a checkpoint during recovery");
06788
06789
06790
06791
06792
06793
06794
06795 LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
06796
06797
06798
06799
06800
06801
06802
06803
06804 MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
06805 CheckpointStats.ckpt_start_t = GetCurrentTimestamp();
06806
06807
06808
06809
06810 START_CRIT_SECTION();
06811
06812 if (shutdown)
06813 {
06814 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
06815 ControlFile->state = DB_SHUTDOWNING;
06816 ControlFile->time = (pg_time_t) time(NULL);
06817 UpdateControlFile();
06818 LWLockRelease(ControlFileLock);
06819 }
06820
06821
06822
06823
06824
06825
06826 smgrpreckpt();
06827
06828
06829 MemSet(&checkPoint, 0, sizeof(checkPoint));
06830 checkPoint.time = (pg_time_t) time(NULL);
06831
06832
06833
06834
06835
06836
06837 if (!shutdown && XLogStandbyInfoActive())
06838 checkPoint.oldestActiveXid = GetOldestActiveTransactionId();
06839 else
06840 checkPoint.oldestActiveXid = InvalidTransactionId;
06841
06842
06843
06844
06845
06846 LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
06847
06848
06849
06850
06851
06852
06853
06854
06855
06856
06857
06858
06859
06860
06861
06862
06863 if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY |
06864 CHECKPOINT_FORCE)) == 0)
06865 {
06866 XLogRecPtr curInsert;
06867
06868 INSERT_RECPTR(curInsert, Insert, Insert->curridx);
06869 if (curInsert == ControlFile->checkPoint +
06870 MAXALIGN(SizeOfXLogRecord + sizeof(CheckPoint)) &&
06871 ControlFile->checkPoint == ControlFile->checkPointCopy.redo)
06872 {
06873 LWLockRelease(WALInsertLock);
06874 LWLockRelease(CheckpointLock);
06875 END_CRIT_SECTION();
06876 return;
06877 }
06878 }
06879
06880
06881
06882
06883
06884
06885
06886 if (flags & CHECKPOINT_END_OF_RECOVERY)
06887 LocalSetXLogInsertAllowed();
06888
06889 checkPoint.ThisTimeLineID = ThisTimeLineID;
06890 if (flags & CHECKPOINT_END_OF_RECOVERY)
06891 checkPoint.PrevTimeLineID = XLogCtl->PrevTimeLineID;
06892 else
06893 checkPoint.PrevTimeLineID = ThisTimeLineID;
06894
06895 checkPoint.fullPageWrites = Insert->fullPageWrites;
06896
06897
06898
06899
06900
06901
06902
06903
06904
06905 freespace = INSERT_FREESPACE(Insert);
06906 if (freespace == 0)
06907 {
06908 (void) AdvanceXLInsertBuffer(false);
06909
06910 freespace = INSERT_FREESPACE(Insert);
06911 }
06912 INSERT_RECPTR(checkPoint.redo, Insert, Insert->curridx);
06913
06914
06915
06916
06917
06918
06919
06920
06921
06922
06923
06924
06925 {
06926
06927 volatile XLogCtlData *xlogctl = XLogCtl;
06928
06929 SpinLockAcquire(&xlogctl->info_lck);
06930 RedoRecPtr = xlogctl->Insert.RedoRecPtr = checkPoint.redo;
06931 SpinLockRelease(&xlogctl->info_lck);
06932 }
06933
06934
06935
06936
06937
06938 LWLockRelease(WALInsertLock);
06939
06940
06941
06942
06943
06944 if (log_checkpoints)
06945 LogCheckpointStart(flags, false);
06946
06947 TRACE_POSTGRESQL_CHECKPOINT_START(flags);
06948
06949
06950
06951
06952
06953
06954
06955
06956
06957
06958
06959
06960
06961
06962
06963
06964
06965
06966
06967
06968
06969
06970
06971
06972
06973
06974
06975
06976
06977
06978 vxids = GetVirtualXIDsDelayingChkpt(&nvxids);
06979 if (nvxids > 0)
06980 {
06981 uint32 nwaits = 0;
06982
06983 do
06984 {
06985 pg_usleep(10000L);
06986 nwaits++;
06987 } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids));
06988 }
06989 pfree(vxids);
06990
06991
06992
06993
06994 LWLockAcquire(XidGenLock, LW_SHARED);
06995 checkPoint.nextXid = ShmemVariableCache->nextXid;
06996 checkPoint.oldestXid = ShmemVariableCache->oldestXid;
06997 checkPoint.oldestXidDB = ShmemVariableCache->oldestXidDB;
06998 LWLockRelease(XidGenLock);
06999
07000
07001 checkPoint.nextXidEpoch = ControlFile->checkPointCopy.nextXidEpoch;
07002 if (checkPoint.nextXid < ControlFile->checkPointCopy.nextXid)
07003 checkPoint.nextXidEpoch++;
07004
07005 LWLockAcquire(OidGenLock, LW_SHARED);
07006 checkPoint.nextOid = ShmemVariableCache->nextOid;
07007 if (!shutdown)
07008 checkPoint.nextOid += ShmemVariableCache->oidCount;
07009 LWLockRelease(OidGenLock);
07010
07011 MultiXactGetCheckptMulti(shutdown,
07012 &checkPoint.nextMulti,
07013 &checkPoint.nextMultiOffset,
07014 &checkPoint.oldestMulti,
07015 &checkPoint.oldestMultiDB);
07016
07017
07018
07019
07020
07021
07022
07023
07024
07025 END_CRIT_SECTION();
07026
07027 CheckPointGuts(checkPoint.redo, flags);
07028
07029
07030
07031
07032
07033
07034
07035
07036
07037 if (!shutdown && XLogStandbyInfoActive())
07038 LogStandbySnapshot();
07039
07040 START_CRIT_SECTION();
07041
07042
07043
07044
07045 rdata.data = (char *) (&checkPoint);
07046 rdata.len = sizeof(checkPoint);
07047 rdata.buffer = InvalidBuffer;
07048 rdata.next = NULL;
07049
07050 recptr = XLogInsert(RM_XLOG_ID,
07051 shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
07052 XLOG_CHECKPOINT_ONLINE,
07053 &rdata);
07054
07055 XLogFlush(recptr);
07056
07057
07058
07059
07060
07061
07062
07063
07064 if (shutdown)
07065 {
07066 if (flags & CHECKPOINT_END_OF_RECOVERY)
07067 LocalXLogInsertAllowed = -1;
07068 else
07069 LocalXLogInsertAllowed = 0;
07070 }
07071
07072
07073
07074
07075
07076 if (shutdown && checkPoint.redo != ProcLastRecPtr)
07077 ereport(PANIC,
07078 (errmsg("concurrent transaction log activity while database system is shutting down")));
07079
07080
07081
07082
07083
07084 XLByteToSeg(ControlFile->checkPointCopy.redo, _logSegNo);
07085
07086
07087
07088
07089 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
07090 if (shutdown)
07091 ControlFile->state = DB_SHUTDOWNED;
07092 ControlFile->prevCheckPoint = ControlFile->checkPoint;
07093 ControlFile->checkPoint = ProcLastRecPtr;
07094 ControlFile->checkPointCopy = checkPoint;
07095 ControlFile->time = (pg_time_t) time(NULL);
07096
07097 ControlFile->minRecoveryPoint = InvalidXLogRecPtr;
07098 ControlFile->minRecoveryPointTLI = 0;
07099
07100
07101
07102
07103
07104
07105 SpinLockAcquire(&XLogCtl->ulsn_lck);
07106 ControlFile->unloggedLSN = XLogCtl->unloggedLSN;
07107 SpinLockRelease(&XLogCtl->ulsn_lck);
07108
07109 UpdateControlFile();
07110 LWLockRelease(ControlFileLock);
07111
07112
07113 {
07114
07115 volatile XLogCtlData *xlogctl = XLogCtl;
07116
07117 SpinLockAcquire(&xlogctl->info_lck);
07118 xlogctl->ckptXidEpoch = checkPoint.nextXidEpoch;
07119 xlogctl->ckptXid = checkPoint.nextXid;
07120 SpinLockRelease(&xlogctl->info_lck);
07121 }
07122
07123
07124
07125
07126
07127 END_CRIT_SECTION();
07128
07129
07130
07131
07132 smgrpostckpt();
07133
07134
07135
07136
07137
07138 if (_logSegNo)
07139 {
07140 KeepLogSeg(recptr, &_logSegNo);
07141 _logSegNo--;
07142 RemoveOldXlogFiles(_logSegNo, recptr);
07143 }
07144
07145
07146
07147
07148
07149 if (!shutdown)
07150 PreallocXlogFiles(recptr);
07151
07152
07153
07154
07155
07156
07157
07158
07159 if (!RecoveryInProgress())
07160 TruncateSUBTRANS(GetOldestXmin(true, false));
07161
07162
07163 LogCheckpointEnd(false);
07164
07165 TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
07166 NBuffers,
07167 CheckpointStats.ckpt_segs_added,
07168 CheckpointStats.ckpt_segs_removed,
07169 CheckpointStats.ckpt_segs_recycled);
07170
07171 LWLockRelease(CheckpointLock);
07172 }
07173
07174
07175
07176
07177
07178
07179
07180
07181
07182
07183 void
07184 CreateEndOfRecoveryRecord(void)
07185 {
07186 xl_end_of_recovery xlrec;
07187 XLogRecData rdata;
07188 XLogRecPtr recptr;
07189
07190
07191 if (!RecoveryInProgress())
07192 elog(ERROR, "can only be used to end recovery");
07193
07194 xlrec.end_time = time(NULL);
07195
07196 LWLockAcquire(WALInsertLock, LW_SHARED);
07197 xlrec.ThisTimeLineID = ThisTimeLineID;
07198 xlrec.PrevTimeLineID = XLogCtl->PrevTimeLineID;
07199 LWLockRelease(WALInsertLock);
07200
07201 LocalSetXLogInsertAllowed();
07202
07203 START_CRIT_SECTION();
07204
07205 rdata.data = (char *) &xlrec;
07206 rdata.len = sizeof(xl_end_of_recovery);
07207 rdata.buffer = InvalidBuffer;
07208 rdata.next = NULL;
07209
07210 recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY, &rdata);
07211
07212 XLogFlush(recptr);
07213
07214
07215
07216
07217
07218 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
07219 ControlFile->time = (pg_time_t) xlrec.end_time;
07220 ControlFile->minRecoveryPoint = recptr;
07221 ControlFile->minRecoveryPointTLI = ThisTimeLineID;
07222 UpdateControlFile();
07223 LWLockRelease(ControlFileLock);
07224
07225 END_CRIT_SECTION();
07226
07227 LocalXLogInsertAllowed = -1;
07228 }
07229
07230
07231
07232
07233
07234
07235
07236 static void
07237 CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
07238 {
07239 CheckPointCLOG();
07240 CheckPointSUBTRANS();
07241 CheckPointMultiXact();
07242 CheckPointPredicate();
07243 CheckPointRelationMap();
07244 CheckPointBuffers(flags);
07245
07246 CheckPointTwoPhase(checkPointRedo);
07247 }
07248
07249
07250
07251
07252
07253
07254
07255
07256
07257
07258
07259 static void
07260 RecoveryRestartPoint(const CheckPoint *checkPoint)
07261 {
07262 int rmid;
07263
07264
07265 volatile XLogCtlData *xlogctl = XLogCtl;
07266
07267
07268
07269
07270
07271
07272
07273 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
07274 {
07275 if (RmgrTable[rmid].rm_safe_restartpoint != NULL)
07276 if (!(RmgrTable[rmid].rm_safe_restartpoint()))
07277 {
07278 elog(trace_recovery(DEBUG2),
07279 "RM %d not safe to record restart point at %X/%X",
07280 rmid,
07281 (uint32) (checkPoint->redo >> 32),
07282 (uint32) checkPoint->redo);
07283 return;
07284 }
07285 }
07286
07287
07288
07289
07290
07291
07292
07293
07294 if (XLogHaveInvalidPages())
07295 {
07296 elog(trace_recovery(DEBUG2),
07297 "could not record restart point at %X/%X because there "
07298 "are unresolved references to invalid pages",
07299 (uint32) (checkPoint->redo >> 32),
07300 (uint32) checkPoint->redo);
07301 return;
07302 }
07303
07304
07305
07306
07307
07308 SpinLockAcquire(&xlogctl->info_lck);
07309 xlogctl->lastCheckPointRecPtr = ReadRecPtr;
07310 xlogctl->lastCheckPoint = *checkPoint;
07311 SpinLockRelease(&xlogctl->info_lck);
07312 }
07313
07314
07315
07316
07317
07318
07319
07320
07321
07322
07323
07324
07325 bool
07326 CreateRestartPoint(int flags)
07327 {
07328 XLogRecPtr lastCheckPointRecPtr;
07329 CheckPoint lastCheckPoint;
07330 XLogSegNo _logSegNo;
07331 TimestampTz xtime;
07332
07333
07334 volatile XLogCtlData *xlogctl = XLogCtl;
07335
07336
07337
07338
07339
07340 LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
07341
07342
07343 SpinLockAcquire(&xlogctl->info_lck);
07344 lastCheckPointRecPtr = xlogctl->lastCheckPointRecPtr;
07345 lastCheckPoint = xlogctl->lastCheckPoint;
07346 SpinLockRelease(&xlogctl->info_lck);
07347
07348
07349
07350
07351
07352 if (!RecoveryInProgress())
07353 {
07354 ereport(DEBUG2,
07355 (errmsg("skipping restartpoint, recovery has already ended")));
07356 LWLockRelease(CheckpointLock);
07357 return false;
07358 }
07359
07360
07361
07362
07363
07364
07365
07366
07367
07368
07369
07370
07371
07372
07373
07374 if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
07375 lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
07376 {
07377 ereport(DEBUG2,
07378 (errmsg("skipping restartpoint, already performed at %X/%X",
07379 (uint32) (lastCheckPoint.redo >> 32), (uint32) lastCheckPoint.redo)));
07380
07381 UpdateMinRecoveryPoint(InvalidXLogRecPtr, true);
07382 if (flags & CHECKPOINT_IS_SHUTDOWN)
07383 {
07384 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
07385 ControlFile->state = DB_SHUTDOWNED_IN_RECOVERY;
07386 ControlFile->time = (pg_time_t) time(NULL);
07387 UpdateControlFile();
07388 LWLockRelease(ControlFileLock);
07389 }
07390 LWLockRelease(CheckpointLock);
07391 return false;
07392 }
07393
07394
07395
07396
07397
07398
07399
07400
07401
07402
07403 LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
07404 SpinLockAcquire(&xlogctl->info_lck);
07405 xlogctl->Insert.RedoRecPtr = lastCheckPoint.redo;
07406 SpinLockRelease(&xlogctl->info_lck);
07407 LWLockRelease(WALInsertLock);
07408
07409
07410
07411
07412
07413
07414
07415
07416 MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
07417 CheckpointStats.ckpt_start_t = GetCurrentTimestamp();
07418
07419 if (log_checkpoints)
07420 LogCheckpointStart(flags, true);
07421
07422 CheckPointGuts(lastCheckPoint.redo, flags);
07423
07424
07425
07426
07427
07428 XLByteToSeg(ControlFile->checkPointCopy.redo, _logSegNo);
07429
07430
07431
07432
07433
07434
07435
07436 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
07437 if (ControlFile->state == DB_IN_ARCHIVE_RECOVERY &&
07438 ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
07439 {
07440 ControlFile->prevCheckPoint = ControlFile->checkPoint;
07441 ControlFile->checkPoint = lastCheckPointRecPtr;
07442 ControlFile->checkPointCopy = lastCheckPoint;
07443 ControlFile->time = (pg_time_t) time(NULL);
07444 if (flags & CHECKPOINT_IS_SHUTDOWN)
07445 ControlFile->state = DB_SHUTDOWNED_IN_RECOVERY;
07446 UpdateControlFile();
07447 }
07448 LWLockRelease(ControlFileLock);
07449
07450
07451
07452
07453
07454
07455 if (_logSegNo)
07456 {
07457 XLogRecPtr receivePtr;
07458 XLogRecPtr replayPtr;
07459 XLogRecPtr endptr;
07460
07461
07462
07463
07464 receivePtr = GetWalRcvWriteRecPtr(NULL, NULL);
07465 replayPtr = GetXLogReplayRecPtr(NULL);
07466 endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
07467
07468 KeepLogSeg(endptr, &_logSegNo);
07469 _logSegNo--;
07470
07471
07472
07473
07474
07475
07476
07477
07478
07479
07480
07481 (void) GetXLogReplayRecPtr(&ThisTimeLineID);
07482
07483 RemoveOldXlogFiles(_logSegNo, endptr);
07484
07485
07486
07487
07488
07489 PreallocXlogFiles(endptr);
07490 }
07491
07492
07493
07494
07495
07496
07497
07498
07499 if (EnableHotStandby)
07500 TruncateSUBTRANS(GetOldestXmin(true, false));
07501
07502
07503 LogCheckpointEnd(true);
07504
07505 xtime = GetLatestXTime();
07506 ereport((log_checkpoints ? LOG : DEBUG2),
07507 (errmsg("recovery restart point at %X/%X",
07508 (uint32) (lastCheckPoint.redo >> 32), (uint32) lastCheckPoint.redo),
07509 xtime ? errdetail("last completed transaction was at log time %s",
07510 timestamptz_to_str(xtime)) : 0));
07511
07512 LWLockRelease(CheckpointLock);
07513
07514
07515
07516
07517 if (XLogCtl->archiveCleanupCommand[0])
07518 ExecuteRecoveryCommand(XLogCtl->archiveCleanupCommand,
07519 "archive_cleanup_command",
07520 false);
07521
07522 return true;
07523 }
07524
07525
07526
07527
07528
07529
07530 static void
07531 KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
07532 {
07533 XLogSegNo segno;
07534
07535 if (wal_keep_segments == 0)
07536 return;
07537
07538 XLByteToSeg(recptr, segno);
07539
07540
07541 if (segno <= wal_keep_segments)
07542 segno = 1;
07543 else
07544 segno = segno - wal_keep_segments;
07545
07546
07547 if (segno < *logSegNo)
07548 *logSegNo = segno;
07549 }
07550
07551
07552
07553
07554 void
07555 XLogPutNextOid(Oid nextOid)
07556 {
07557 XLogRecData rdata;
07558
07559 rdata.data = (char *) (&nextOid);
07560 rdata.len = sizeof(Oid);
07561 rdata.buffer = InvalidBuffer;
07562 rdata.next = NULL;
07563 (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID, &rdata);
07564
07565
07566
07567
07568
07569
07570
07571
07572
07573
07574
07575
07576
07577
07578
07579
07580
07581
07582
07583 }
07584
07585
07586
07587
07588
07589
07590
07591
07592
07593
07594
07595 XLogRecPtr
07596 RequestXLogSwitch(void)
07597 {
07598 XLogRecPtr RecPtr;
07599 XLogRecData rdata;
07600
07601
07602 rdata.buffer = InvalidBuffer;
07603 rdata.data = NULL;
07604 rdata.len = 0;
07605 rdata.next = NULL;
07606
07607 RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH, &rdata);
07608
07609 return RecPtr;
07610 }
07611
07612
07613
07614
07615 XLogRecPtr
07616 XLogRestorePoint(const char *rpName)
07617 {
07618 XLogRecPtr RecPtr;
07619 XLogRecData rdata;
07620 xl_restore_point xlrec;
07621
07622 xlrec.rp_time = GetCurrentTimestamp();
07623 strncpy(xlrec.rp_name, rpName, MAXFNAMELEN);
07624
07625 rdata.buffer = InvalidBuffer;
07626 rdata.data = (char *) &xlrec;
07627 rdata.len = sizeof(xl_restore_point);
07628 rdata.next = NULL;
07629
07630 RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT, &rdata);
07631
07632 ereport(LOG,
07633 (errmsg("restore point \"%s\" created at %X/%X",
07634 rpName, (uint32) (RecPtr >> 32), (uint32) RecPtr)));
07635
07636 return RecPtr;
07637 }
07638
07639
07640
07641
07642
07643
07644
07645
07646
07647
07648
07649
07650
07651
07652
07653
07654
07655
07656
07657
07658
07659
07660
07661
07662
07663 XLogRecPtr
07664 XLogSaveBufferForHint(Buffer buffer)
07665 {
07666 XLogRecPtr recptr = InvalidXLogRecPtr;
07667 XLogRecPtr lsn;
07668 XLogRecData rdata[2];
07669 BkpBlock bkpb;
07670
07671
07672
07673
07674 Assert(MyPgXact->delayChkpt);
07675
07676
07677
07678
07679 GetRedoRecPtr();
07680
07681
07682
07683
07684
07685 rdata[0].buffer = buffer;
07686 rdata[0].buffer_std = true;
07687
07688
07689
07690
07691 if (XLogCheckBuffer(rdata, false, &lsn, &bkpb))
07692 {
07693 char copied_buffer[BLCKSZ];
07694 char *origdata = (char *) BufferGetBlock(buffer);
07695
07696
07697
07698
07699
07700
07701 memcpy(copied_buffer, origdata, bkpb.hole_offset);
07702 memcpy(copied_buffer + bkpb.hole_offset,
07703 origdata + bkpb.hole_offset + bkpb.hole_length,
07704 BLCKSZ - bkpb.hole_offset - bkpb.hole_length);
07705
07706
07707
07708
07709 rdata[0].data = (char *) &bkpb;
07710 rdata[0].len = sizeof(BkpBlock);
07711 rdata[0].buffer = InvalidBuffer;
07712 rdata[0].next = &(rdata[1]);
07713
07714
07715
07716
07717 rdata[1].data = copied_buffer;
07718 rdata[1].len = BLCKSZ - bkpb.hole_length;
07719 rdata[1].buffer = InvalidBuffer;
07720 rdata[1].next = NULL;
07721
07722 recptr = XLogInsert(RM_XLOG_ID, XLOG_HINT, rdata);
07723 }
07724
07725 return recptr;
07726 }
07727
07728
07729
07730
07731
07732 static void
07733 XLogReportParameters(void)
07734 {
07735 if (wal_level != ControlFile->wal_level ||
07736 MaxConnections != ControlFile->MaxConnections ||
07737 max_prepared_xacts != ControlFile->max_prepared_xacts ||
07738 max_locks_per_xact != ControlFile->max_locks_per_xact)
07739 {
07740
07741
07742
07743
07744
07745
07746
07747 if (wal_level != ControlFile->wal_level || XLogIsNeeded())
07748 {
07749 XLogRecData rdata;
07750 xl_parameter_change xlrec;
07751
07752 xlrec.MaxConnections = MaxConnections;
07753 xlrec.max_prepared_xacts = max_prepared_xacts;
07754 xlrec.max_locks_per_xact = max_locks_per_xact;
07755 xlrec.wal_level = wal_level;
07756
07757 rdata.buffer = InvalidBuffer;
07758 rdata.data = (char *) &xlrec;
07759 rdata.len = sizeof(xlrec);
07760 rdata.next = NULL;
07761
07762 XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE, &rdata);
07763 }
07764
07765 ControlFile->MaxConnections = MaxConnections;
07766 ControlFile->max_prepared_xacts = max_prepared_xacts;
07767 ControlFile->max_locks_per_xact = max_locks_per_xact;
07768 ControlFile->wal_level = wal_level;
07769 UpdateControlFile();
07770 }
07771 }
07772
07773
07774
07775
07776
07777
07778
07779
07780 void
07781 UpdateFullPageWrites(void)
07782 {
07783 XLogCtlInsert *Insert = &XLogCtl->Insert;
07784
07785
07786
07787
07788
07789
07790
07791
07792 if (fullPageWrites == Insert->fullPageWrites)
07793 return;
07794
07795 START_CRIT_SECTION();
07796
07797
07798
07799
07800
07801
07802
07803
07804 if (fullPageWrites)
07805 {
07806 LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
07807 Insert->fullPageWrites = true;
07808 LWLockRelease(WALInsertLock);
07809 }
07810
07811
07812
07813
07814
07815 if (XLogStandbyInfoActive() && !RecoveryInProgress())
07816 {
07817 XLogRecData rdata;
07818
07819 rdata.data = (char *) (&fullPageWrites);
07820 rdata.len = sizeof(bool);
07821 rdata.buffer = InvalidBuffer;
07822 rdata.next = NULL;
07823
07824 XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE, &rdata);
07825 }
07826
07827 if (!fullPageWrites)
07828 {
07829 LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
07830 Insert->fullPageWrites = false;
07831 LWLockRelease(WALInsertLock);
07832 }
07833 END_CRIT_SECTION();
07834 }
07835
07836
07837
07838
07839
07840
07841
07842 static void
07843 checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI)
07844 {
07845
07846 if (prevTLI != ThisTimeLineID)
07847 ereport(PANIC,
07848 (errmsg("unexpected prev timeline ID %u (current timeline ID %u) in checkpoint record",
07849 prevTLI, ThisTimeLineID)));
07850
07851
07852
07853
07854
07855 if (newTLI < ThisTimeLineID || !tliInHistory(newTLI, expectedTLEs))
07856 ereport(PANIC,
07857 (errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
07858 newTLI, ThisTimeLineID)));
07859
07860
07861
07862
07863
07864
07865
07866
07867
07868
07869
07870 if (!XLogRecPtrIsInvalid(minRecoveryPoint) &&
07871 lsn < minRecoveryPoint &&
07872 newTLI > minRecoveryPointTLI)
07873 ereport(PANIC,
07874 (errmsg("unexpected timeline ID %u in checkpoint record, before reaching minimum recovery point %X/%X on timeline %u",
07875 newTLI,
07876 (uint32) (minRecoveryPoint >> 32),
07877 (uint32) minRecoveryPoint,
07878 minRecoveryPointTLI)));
07879
07880
07881 }
07882
07883
07884
07885
07886
07887
07888
07889 void
07890 xlog_redo(XLogRecPtr lsn, XLogRecord *record)
07891 {
07892 uint8 info = record->xl_info & ~XLR_INFO_MASK;
07893
07894
07895 Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
07896
07897 if (info == XLOG_NEXTOID)
07898 {
07899 Oid nextOid;
07900
07901
07902
07903
07904
07905
07906
07907
07908 memcpy(&nextOid, XLogRecGetData(record), sizeof(Oid));
07909 LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
07910 ShmemVariableCache->nextOid = nextOid;
07911 ShmemVariableCache->oidCount = 0;
07912 LWLockRelease(OidGenLock);
07913 }
07914 else if (info == XLOG_CHECKPOINT_SHUTDOWN)
07915 {
07916 CheckPoint checkPoint;
07917
07918 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
07919
07920 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
07921 ShmemVariableCache->nextXid = checkPoint.nextXid;
07922 LWLockRelease(XidGenLock);
07923 LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
07924 ShmemVariableCache->nextOid = checkPoint.nextOid;
07925 ShmemVariableCache->oidCount = 0;
07926 LWLockRelease(OidGenLock);
07927 MultiXactSetNextMXact(checkPoint.nextMulti,
07928 checkPoint.nextMultiOffset);
07929 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
07930 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
07931
07932
07933
07934
07935
07936
07937 if (ArchiveRecoveryRequested &&
07938 !XLogRecPtrIsInvalid(ControlFile->backupStartPoint) &&
07939 XLogRecPtrIsInvalid(ControlFile->backupEndPoint))
07940 ereport(PANIC,
07941 (errmsg("online backup was canceled, recovery cannot continue")));
07942
07943
07944
07945
07946
07947
07948
07949 if (standbyState >= STANDBY_INITIALIZED)
07950 {
07951 TransactionId *xids;
07952 int nxids;
07953 TransactionId oldestActiveXID;
07954 TransactionId latestCompletedXid;
07955 RunningTransactionsData running;
07956
07957 oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
07958
07959
07960
07961
07962
07963
07964
07965 running.xcnt = nxids;
07966 running.subxcnt = 0;
07967 running.subxid_overflow = false;
07968 running.nextXid = checkPoint.nextXid;
07969 running.oldestRunningXid = oldestActiveXID;
07970 latestCompletedXid = checkPoint.nextXid;
07971 TransactionIdRetreat(latestCompletedXid);
07972 Assert(TransactionIdIsNormal(latestCompletedXid));
07973 running.latestCompletedXid = latestCompletedXid;
07974 running.xids = xids;
07975
07976 ProcArrayApplyRecoveryInfo(&running);
07977
07978 StandbyRecoverPreparedTransactions(true);
07979 }
07980
07981
07982 ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch;
07983 ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
07984
07985
07986 {
07987
07988 volatile XLogCtlData *xlogctl = XLogCtl;
07989
07990 SpinLockAcquire(&xlogctl->info_lck);
07991 xlogctl->ckptXidEpoch = checkPoint.nextXidEpoch;
07992 xlogctl->ckptXid = checkPoint.nextXid;
07993 SpinLockRelease(&xlogctl->info_lck);
07994 }
07995
07996
07997
07998
07999
08000 if (checkPoint.ThisTimeLineID != ThisTimeLineID)
08001 ereport(PANIC,
08002 (errmsg("unexpected timeline ID %u (should be %u) in checkpoint record",
08003 checkPoint.ThisTimeLineID, ThisTimeLineID)));
08004
08005 RecoveryRestartPoint(&checkPoint);
08006 }
08007 else if (info == XLOG_CHECKPOINT_ONLINE)
08008 {
08009 CheckPoint checkPoint;
08010
08011 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
08012
08013 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
08014 if (TransactionIdPrecedes(ShmemVariableCache->nextXid,
08015 checkPoint.nextXid))
08016 ShmemVariableCache->nextXid = checkPoint.nextXid;
08017 LWLockRelease(XidGenLock);
08018
08019 LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
08020 ShmemVariableCache->nextOid = checkPoint.nextOid;
08021 ShmemVariableCache->oidCount = 0;
08022 LWLockRelease(OidGenLock);
08023 MultiXactAdvanceNextMXact(checkPoint.nextMulti,
08024 checkPoint.nextMultiOffset);
08025 if (TransactionIdPrecedes(ShmemVariableCache->oldestXid,
08026 checkPoint.oldestXid))
08027 SetTransactionIdLimit(checkPoint.oldestXid,
08028 checkPoint.oldestXidDB);
08029 MultiXactAdvanceOldest(checkPoint.oldestMulti,
08030 checkPoint.oldestMultiDB);
08031
08032
08033 ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch;
08034 ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
08035
08036
08037 {
08038
08039 volatile XLogCtlData *xlogctl = XLogCtl;
08040
08041 SpinLockAcquire(&xlogctl->info_lck);
08042 xlogctl->ckptXidEpoch = checkPoint.nextXidEpoch;
08043 xlogctl->ckptXid = checkPoint.nextXid;
08044 SpinLockRelease(&xlogctl->info_lck);
08045 }
08046
08047
08048 if (checkPoint.ThisTimeLineID != ThisTimeLineID)
08049 ereport(PANIC,
08050 (errmsg("unexpected timeline ID %u (should be %u) in checkpoint record",
08051 checkPoint.ThisTimeLineID, ThisTimeLineID)));
08052
08053 RecoveryRestartPoint(&checkPoint);
08054 }
08055 else if (info == XLOG_END_OF_RECOVERY)
08056 {
08057 xl_end_of_recovery xlrec;
08058
08059 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
08060
08061
08062
08063
08064
08065
08066
08067
08068
08069
08070
08071 if (xlrec.ThisTimeLineID != ThisTimeLineID)
08072 ereport(PANIC,
08073 (errmsg("unexpected timeline ID %u (should be %u) in checkpoint record",
08074 xlrec.ThisTimeLineID, ThisTimeLineID)));
08075 }
08076 else if (info == XLOG_NOOP)
08077 {
08078
08079 }
08080 else if (info == XLOG_SWITCH)
08081 {
08082
08083 }
08084 else if (info == XLOG_RESTORE_POINT)
08085 {
08086
08087 }
08088 else if (info == XLOG_HINT)
08089 {
08090 char *data;
08091 BkpBlock bkpb;
08092
08093
08094
08095
08096
08097
08098
08099
08100
08101
08102
08103
08104
08105
08106 data = XLogRecGetData(record);
08107 memcpy(&bkpb, data, sizeof(BkpBlock));
08108 data += sizeof(BkpBlock);
08109
08110 RestoreBackupBlockContents(lsn, bkpb, data, false, false);
08111 }
08112 else if (info == XLOG_BACKUP_END)
08113 {
08114 XLogRecPtr startpoint;
08115
08116 memcpy(&startpoint, XLogRecGetData(record), sizeof(startpoint));
08117
08118 if (ControlFile->backupStartPoint == startpoint)
08119 {
08120
08121
08122
08123
08124
08125
08126
08127 elog(DEBUG1, "end of backup reached");
08128
08129 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
08130
08131 if (ControlFile->minRecoveryPoint < lsn)
08132 {
08133 ControlFile->minRecoveryPoint = lsn;
08134 ControlFile->minRecoveryPointTLI = ThisTimeLineID;
08135 }
08136 ControlFile->backupStartPoint = InvalidXLogRecPtr;
08137 ControlFile->backupEndRequired = false;
08138 UpdateControlFile();
08139
08140 LWLockRelease(ControlFileLock);
08141 }
08142 }
08143 else if (info == XLOG_PARAMETER_CHANGE)
08144 {
08145 xl_parameter_change xlrec;
08146
08147
08148 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_parameter_change));
08149
08150 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
08151 ControlFile->MaxConnections = xlrec.MaxConnections;
08152 ControlFile->max_prepared_xacts = xlrec.max_prepared_xacts;
08153 ControlFile->max_locks_per_xact = xlrec.max_locks_per_xact;
08154 ControlFile->wal_level = xlrec.wal_level;
08155
08156
08157
08158
08159
08160
08161
08162
08163
08164 minRecoveryPoint = ControlFile->minRecoveryPoint;
08165 minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
08166 if (minRecoveryPoint != 0 && minRecoveryPoint < lsn)
08167 {
08168 ControlFile->minRecoveryPoint = lsn;
08169 ControlFile->minRecoveryPointTLI = ThisTimeLineID;
08170 }
08171
08172 UpdateControlFile();
08173 LWLockRelease(ControlFileLock);
08174
08175
08176 CheckRequiredParameterValues();
08177 }
08178 else if (info == XLOG_FPW_CHANGE)
08179 {
08180
08181 volatile XLogCtlData *xlogctl = XLogCtl;
08182 bool fpw;
08183
08184 memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
08185
08186
08187
08188
08189
08190
08191 if (!fpw)
08192 {
08193 SpinLockAcquire(&xlogctl->info_lck);
08194 if (xlogctl->lastFpwDisableRecPtr < ReadRecPtr)
08195 xlogctl->lastFpwDisableRecPtr = ReadRecPtr;
08196 SpinLockRelease(&xlogctl->info_lck);
08197 }
08198
08199
08200 lastFullPageWrites = fpw;
08201 }
08202 }
08203
08204 #ifdef WAL_DEBUG
08205
08206 static void
08207 xlog_outrec(StringInfo buf, XLogRecord *record)
08208 {
08209 int i;
08210
08211 appendStringInfo(buf, "prev %X/%X; xid %u",
08212 (uint32) (record->xl_prev >> 32),
08213 (uint32) record->xl_prev,
08214 record->xl_xid);
08215
08216 appendStringInfo(buf, "; len %u",
08217 record->xl_len);
08218
08219 for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
08220 {
08221 if (record->xl_info & XLR_BKP_BLOCK(i))
08222 appendStringInfo(buf, "; bkpb%d", i);
08223 }
08224
08225 appendStringInfo(buf, ": %s", RmgrTable[record->xl_rmid].rm_name);
08226 }
08227 #endif
08228
08229
08230
08231
08232
08233
08234 static int
08235 get_sync_bit(int method)
08236 {
08237 int o_direct_flag = 0;
08238
08239
08240 if (!enableFsync)
08241 return 0;
08242
08243
08244
08245
08246
08247
08248
08249
08250
08251
08252
08253
08254
08255
08256
08257 if (!XLogIsNeeded() && !AmWalReceiverProcess())
08258 o_direct_flag = PG_O_DIRECT;
08259
08260 switch (method)
08261 {
08262
08263
08264
08265
08266
08267
08268 case SYNC_METHOD_FSYNC:
08269 case SYNC_METHOD_FSYNC_WRITETHROUGH:
08270 case SYNC_METHOD_FDATASYNC:
08271 return 0;
08272 #ifdef OPEN_SYNC_FLAG
08273 case SYNC_METHOD_OPEN:
08274 return OPEN_SYNC_FLAG | o_direct_flag;
08275 #endif
08276 #ifdef OPEN_DATASYNC_FLAG
08277 case SYNC_METHOD_OPEN_DSYNC:
08278 return OPEN_DATASYNC_FLAG | o_direct_flag;
08279 #endif
08280 default:
08281
08282 elog(ERROR, "unrecognized wal_sync_method: %d", method);
08283 return 0;
08284 }
08285 }
08286
08287
08288
08289
08290 void
08291 assign_xlog_sync_method(int new_sync_method, void *extra)
08292 {
08293 if (sync_method != new_sync_method)
08294 {
08295
08296
08297
08298
08299
08300
08301 if (openLogFile >= 0)
08302 {
08303 if (pg_fsync(openLogFile) != 0)
08304 ereport(PANIC,
08305 (errcode_for_file_access(),
08306 errmsg("could not fsync log segment %s: %m",
08307 XLogFileNameP(ThisTimeLineID, openLogSegNo))));
08308 if (get_sync_bit(sync_method) != get_sync_bit(new_sync_method))
08309 XLogFileClose();
08310 }
08311 }
08312 }
08313
08314
08315
08316
08317
08318
08319
08320
08321 void
08322 issue_xlog_fsync(int fd, XLogSegNo segno)
08323 {
08324 switch (sync_method)
08325 {
08326 case SYNC_METHOD_FSYNC:
08327 if (pg_fsync_no_writethrough(fd) != 0)
08328 ereport(PANIC,
08329 (errcode_for_file_access(),
08330 errmsg("could not fsync log file %s: %m",
08331 XLogFileNameP(ThisTimeLineID, segno))));
08332 break;
08333 #ifdef HAVE_FSYNC_WRITETHROUGH
08334 case SYNC_METHOD_FSYNC_WRITETHROUGH:
08335 if (pg_fsync_writethrough(fd) != 0)
08336 ereport(PANIC,
08337 (errcode_for_file_access(),
08338 errmsg("could not fsync write-through log file %s: %m",
08339 XLogFileNameP(ThisTimeLineID, segno))));
08340 break;
08341 #endif
08342 #ifdef HAVE_FDATASYNC
08343 case SYNC_METHOD_FDATASYNC:
08344 if (pg_fdatasync(fd) != 0)
08345 ereport(PANIC,
08346 (errcode_for_file_access(),
08347 errmsg("could not fdatasync log file %s: %m",
08348 XLogFileNameP(ThisTimeLineID, segno))));
08349 break;
08350 #endif
08351 case SYNC_METHOD_OPEN:
08352 case SYNC_METHOD_OPEN_DSYNC:
08353
08354 break;
08355 default:
08356 elog(PANIC, "unrecognized wal_sync_method: %d", sync_method);
08357 break;
08358 }
08359 }
08360
08361
08362
08363
08364 char *
08365 XLogFileNameP(TimeLineID tli, XLogSegNo segno)
08366 {
08367 char *result = palloc(MAXFNAMELEN);
08368 XLogFileName(result, tli, segno);
08369 return result;
08370 }
08371
08372
08373
08374
08375
08376
08377
08378
08379
08380
08381
08382
08383
08384
08385
08386
08387
08388
08389
08390
08391
08392
08393
08394
08395
08396 XLogRecPtr
08397 do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
08398 char **labelfile)
08399 {
08400 bool exclusive = (labelfile == NULL);
08401 bool backup_started_in_recovery = false;
08402 XLogRecPtr checkpointloc;
08403 XLogRecPtr startpoint;
08404 TimeLineID starttli;
08405 pg_time_t stamp_time;
08406 char strfbuf[128];
08407 char xlogfilename[MAXFNAMELEN];
08408 XLogSegNo _logSegNo;
08409 struct stat stat_buf;
08410 FILE *fp;
08411 StringInfoData labelfbuf;
08412
08413 backup_started_in_recovery = RecoveryInProgress();
08414
08415 if (!superuser() && !has_rolreplication(GetUserId()))
08416 ereport(ERROR,
08417 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
08418 errmsg("must be superuser or replication role to run a backup")));
08419
08420
08421
08422
08423 if (backup_started_in_recovery && exclusive)
08424 ereport(ERROR,
08425 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
08426 errmsg("recovery is in progress"),
08427 errhint("WAL control functions cannot be executed during recovery.")));
08428
08429
08430
08431
08432
08433 if (!backup_started_in_recovery && !XLogIsNeeded())
08434 ereport(ERROR,
08435 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
08436 errmsg("WAL level not sufficient for making an online backup"),
08437 errhint("wal_level must be set to \"archive\" or \"hot_standby\" at server start.")));
08438
08439 if (strlen(backupidstr) > MAXPGPATH)
08440 ereport(ERROR,
08441 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
08442 errmsg("backup label too long (max %d bytes)",
08443 MAXPGPATH)));
08444
08445
08446
08447
08448
08449
08450
08451
08452
08453
08454
08455
08456
08457
08458
08459
08460
08461
08462
08463
08464
08465 LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
08466 if (exclusive)
08467 {
08468 if (XLogCtl->Insert.exclusiveBackup)
08469 {
08470 LWLockRelease(WALInsertLock);
08471 ereport(ERROR,
08472 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
08473 errmsg("a backup is already in progress"),
08474 errhint("Run pg_stop_backup() and try again.")));
08475 }
08476 XLogCtl->Insert.exclusiveBackup = true;
08477 }
08478 else
08479 XLogCtl->Insert.nonExclusiveBackups++;
08480 XLogCtl->Insert.forcePageWrites = true;
08481 LWLockRelease(WALInsertLock);
08482
08483
08484 PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
08485 {
08486 bool gotUniqueStartpoint = false;
08487
08488
08489
08490
08491
08492
08493
08494
08495
08496
08497
08498
08499
08500
08501
08502
08503
08504
08505
08506
08507
08508
08509 if (!backup_started_in_recovery)
08510 RequestXLogSwitch();
08511
08512 do
08513 {
08514 bool checkpointfpw;
08515
08516
08517
08518
08519
08520
08521
08522
08523
08524
08525
08526
08527
08528
08529
08530
08531
08532
08533
08534 RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT |
08535 (fast ? CHECKPOINT_IMMEDIATE : 0));
08536
08537
08538
08539
08540
08541
08542
08543 LWLockAcquire(ControlFileLock, LW_SHARED);
08544 checkpointloc = ControlFile->checkPoint;
08545 startpoint = ControlFile->checkPointCopy.redo;
08546 starttli = ControlFile->checkPointCopy.ThisTimeLineID;
08547 checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
08548 LWLockRelease(ControlFileLock);
08549
08550 if (backup_started_in_recovery)
08551 {
08552
08553 volatile XLogCtlData *xlogctl = XLogCtl;
08554 XLogRecPtr recptr;
08555
08556
08557
08558
08559
08560
08561 SpinLockAcquire(&xlogctl->info_lck);
08562 recptr = xlogctl->lastFpwDisableRecPtr;
08563 SpinLockRelease(&xlogctl->info_lck);
08564
08565 if (!checkpointfpw || startpoint <= recptr)
08566 ereport(ERROR,
08567 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
08568 errmsg("WAL generated with full_page_writes=off was replayed "
08569 "since last restartpoint"),
08570 errhint("This means that the backup being taken on the standby "
08571 "is corrupt and should not be used. "
08572 "Enable full_page_writes and run CHECKPOINT on the master, "
08573 "and then try an online backup again.")));
08574
08575
08576
08577
08578
08579
08580
08581
08582 gotUniqueStartpoint = true;
08583 }
08584
08585
08586
08587
08588
08589
08590
08591
08592
08593
08594
08595
08596 LWLockAcquire(WALInsertLock, LW_SHARED);
08597 if (XLogCtl->Insert.lastBackupStart < startpoint)
08598 {
08599 XLogCtl->Insert.lastBackupStart = startpoint;
08600 gotUniqueStartpoint = true;
08601 }
08602 LWLockRelease(WALInsertLock);
08603 } while (!gotUniqueStartpoint);
08604
08605 XLByteToSeg(startpoint, _logSegNo);
08606 XLogFileName(xlogfilename, ThisTimeLineID, _logSegNo);
08607
08608
08609
08610
08611 initStringInfo(&labelfbuf);
08612
08613
08614 stamp_time = (pg_time_t) time(NULL);
08615 pg_strftime(strfbuf, sizeof(strfbuf),
08616 "%Y-%m-%d %H:%M:%S %Z",
08617 pg_localtime(&stamp_time, log_timezone));
08618 appendStringInfo(&labelfbuf, "START WAL LOCATION: %X/%X (file %s)\n",
08619 (uint32) (startpoint >> 32), (uint32) startpoint, xlogfilename);
08620 appendStringInfo(&labelfbuf, "CHECKPOINT LOCATION: %X/%X\n",
08621 (uint32) (checkpointloc >> 32), (uint32) checkpointloc);
08622 appendStringInfo(&labelfbuf, "BACKUP METHOD: %s\n",
08623 exclusive ? "pg_start_backup" : "streamed");
08624 appendStringInfo(&labelfbuf, "BACKUP FROM: %s\n",
08625 backup_started_in_recovery ? "standby" : "master");
08626 appendStringInfo(&labelfbuf, "START TIME: %s\n", strfbuf);
08627 appendStringInfo(&labelfbuf, "LABEL: %s\n", backupidstr);
08628
08629
08630
08631
08632 if (exclusive)
08633 {
08634
08635
08636
08637
08638
08639 if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0)
08640 {
08641 if (errno != ENOENT)
08642 ereport(ERROR,
08643 (errcode_for_file_access(),
08644 errmsg("could not stat file \"%s\": %m",
08645 BACKUP_LABEL_FILE)));
08646 }
08647 else
08648 ereport(ERROR,
08649 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
08650 errmsg("a backup is already in progress"),
08651 errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
08652 BACKUP_LABEL_FILE)));
08653
08654 fp = AllocateFile(BACKUP_LABEL_FILE, "w");
08655
08656 if (!fp)
08657 ereport(ERROR,
08658 (errcode_for_file_access(),
08659 errmsg("could not create file \"%s\": %m",
08660 BACKUP_LABEL_FILE)));
08661 if (fwrite(labelfbuf.data, labelfbuf.len, 1, fp) != 1 ||
08662 fflush(fp) != 0 ||
08663 pg_fsync(fileno(fp)) != 0 ||
08664 ferror(fp) ||
08665 FreeFile(fp))
08666 ereport(ERROR,
08667 (errcode_for_file_access(),
08668 errmsg("could not write file \"%s\": %m",
08669 BACKUP_LABEL_FILE)));
08670 pfree(labelfbuf.data);
08671 }
08672 else
08673 *labelfile = labelfbuf.data;
08674 }
08675 PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
08676
08677
08678
08679
08680 if (starttli_p)
08681 *starttli_p = starttli;
08682 return startpoint;
08683 }
08684
08685
08686 static void
08687 pg_start_backup_callback(int code, Datum arg)
08688 {
08689 bool exclusive = DatumGetBool(arg);
08690
08691
08692 LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
08693 if (exclusive)
08694 {
08695 Assert(XLogCtl->Insert.exclusiveBackup);
08696 XLogCtl->Insert.exclusiveBackup = false;
08697 }
08698 else
08699 {
08700 Assert(XLogCtl->Insert.nonExclusiveBackups > 0);
08701 XLogCtl->Insert.nonExclusiveBackups--;
08702 }
08703
08704 if (!XLogCtl->Insert.exclusiveBackup &&
08705 XLogCtl->Insert.nonExclusiveBackups == 0)
08706 {
08707 XLogCtl->Insert.forcePageWrites = false;
08708 }
08709 LWLockRelease(WALInsertLock);
08710 }
08711
08712
08713
08714
08715
08716
08717
08718
08719
08720
08721
08722 XLogRecPtr
08723 do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
08724 {
08725 bool exclusive = (labelfile == NULL);
08726 bool backup_started_in_recovery = false;
08727 XLogRecPtr startpoint;
08728 XLogRecPtr stoppoint;
08729 TimeLineID stoptli;
08730 XLogRecData rdata;
08731 pg_time_t stamp_time;
08732 char strfbuf[128];
08733 char histfilepath[MAXPGPATH];
08734 char startxlogfilename[MAXFNAMELEN];
08735 char stopxlogfilename[MAXFNAMELEN];
08736 char lastxlogfilename[MAXFNAMELEN];
08737 char histfilename[MAXFNAMELEN];
08738 char backupfrom[20];
08739 XLogSegNo _logSegNo;
08740 FILE *lfp;
08741 FILE *fp;
08742 char ch;
08743 int seconds_before_warning;
08744 int waits = 0;
08745 bool reported_waiting = false;
08746 char *remaining;
08747 char *ptr;
08748 uint32 hi,
08749 lo;
08750
08751 backup_started_in_recovery = RecoveryInProgress();
08752
08753 if (!superuser() && !has_rolreplication(GetUserId()))
08754 ereport(ERROR,
08755 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
08756 (errmsg("must be superuser or replication role to run a backup"))));
08757
08758
08759
08760
08761 if (backup_started_in_recovery && exclusive)
08762 ereport(ERROR,
08763 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
08764 errmsg("recovery is in progress"),
08765 errhint("WAL control functions cannot be executed during recovery.")));
08766
08767
08768
08769
08770
08771 if (!backup_started_in_recovery && !XLogIsNeeded())
08772 ereport(ERROR,
08773 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
08774 errmsg("WAL level not sufficient for making an online backup"),
08775 errhint("wal_level must be set to \"archive\" or \"hot_standby\" at server start.")));
08776
08777
08778
08779
08780 LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
08781 if (exclusive)
08782 XLogCtl->Insert.exclusiveBackup = false;
08783 else
08784 {
08785
08786
08787
08788
08789
08790
08791 Assert(XLogCtl->Insert.nonExclusiveBackups > 0);
08792 XLogCtl->Insert.nonExclusiveBackups--;
08793 }
08794
08795 if (!XLogCtl->Insert.exclusiveBackup &&
08796 XLogCtl->Insert.nonExclusiveBackups == 0)
08797 {
08798 XLogCtl->Insert.forcePageWrites = false;
08799 }
08800 LWLockRelease(WALInsertLock);
08801
08802 if (exclusive)
08803 {
08804
08805
08806
08807 struct stat statbuf;
08808 int r;
08809
08810 if (stat(BACKUP_LABEL_FILE, &statbuf))
08811 {
08812 if (errno != ENOENT)
08813 ereport(ERROR,
08814 (errcode_for_file_access(),
08815 errmsg("could not stat file \"%s\": %m",
08816 BACKUP_LABEL_FILE)));
08817 ereport(ERROR,
08818 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
08819 errmsg("a backup is not in progress")));
08820 }
08821
08822 lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
08823 if (!lfp)
08824 {
08825 ereport(ERROR,
08826 (errcode_for_file_access(),
08827 errmsg("could not read file \"%s\": %m",
08828 BACKUP_LABEL_FILE)));
08829 }
08830 labelfile = palloc(statbuf.st_size + 1);
08831 r = fread(labelfile, statbuf.st_size, 1, lfp);
08832 labelfile[statbuf.st_size] = '\0';
08833
08834
08835
08836
08837 if (r != 1 || ferror(lfp) || FreeFile(lfp))
08838 ereport(ERROR,
08839 (errcode_for_file_access(),
08840 errmsg("could not read file \"%s\": %m",
08841 BACKUP_LABEL_FILE)));
08842 if (unlink(BACKUP_LABEL_FILE) != 0)
08843 ereport(ERROR,
08844 (errcode_for_file_access(),
08845 errmsg("could not remove file \"%s\": %m",
08846 BACKUP_LABEL_FILE)));
08847 }
08848
08849
08850
08851
08852
08853 if (sscanf(labelfile, "START WAL LOCATION: %X/%X (file %24s)%c",
08854 &hi, &lo, startxlogfilename,
08855 &ch) != 4 || ch != '\n')
08856 ereport(ERROR,
08857 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
08858 errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
08859 startpoint = ((uint64) hi) << 32 | lo;
08860 remaining = strchr(labelfile, '\n') + 1;
08861
08862
08863
08864
08865
08866
08867 ptr = strstr(remaining, "BACKUP FROM:");
08868 if (!ptr || sscanf(ptr, "BACKUP FROM: %19s\n", backupfrom) != 1)
08869 ereport(ERROR,
08870 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
08871 errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
08872 if (strcmp(backupfrom, "standby") == 0 && !backup_started_in_recovery)
08873 ereport(ERROR,
08874 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
08875 errmsg("the standby was promoted during online backup"),
08876 errhint("This means that the backup being taken is corrupt "
08877 "and should not be used. "
08878 "Try taking another online backup.")));
08879
08880
08881
08882
08883
08884
08885
08886
08887
08888
08889
08890
08891
08892
08893
08894
08895
08896
08897
08898
08899
08900
08901
08902
08903
08904
08905
08906 if (backup_started_in_recovery)
08907 {
08908
08909 volatile XLogCtlData *xlogctl = XLogCtl;
08910 XLogRecPtr recptr;
08911
08912
08913
08914
08915
08916 SpinLockAcquire(&xlogctl->info_lck);
08917 recptr = xlogctl->lastFpwDisableRecPtr;
08918 SpinLockRelease(&xlogctl->info_lck);
08919
08920 if (startpoint <= recptr)
08921 ereport(ERROR,
08922 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
08923 errmsg("WAL generated with full_page_writes=off was replayed "
08924 "during online backup"),
08925 errhint("This means that the backup being taken on the standby "
08926 "is corrupt and should not be used. "
08927 "Enable full_page_writes and run CHECKPOINT on the master, "
08928 "and then try an online backup again.")));
08929
08930
08931 LWLockAcquire(ControlFileLock, LW_SHARED);
08932 stoppoint = ControlFile->minRecoveryPoint;
08933 stoptli = ControlFile->minRecoveryPointTLI;
08934 LWLockRelease(ControlFileLock);
08935
08936 if (stoptli_p)
08937 *stoptli_p = stoptli;
08938 return stoppoint;
08939 }
08940
08941
08942
08943
08944 rdata.data = (char *) (&startpoint);
08945 rdata.len = sizeof(startpoint);
08946 rdata.buffer = InvalidBuffer;
08947 rdata.next = NULL;
08948 stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END, &rdata);
08949 stoptli = ThisTimeLineID;
08950
08951
08952
08953
08954
08955 RequestXLogSwitch();
08956
08957 XLByteToPrevSeg(stoppoint, _logSegNo);
08958 XLogFileName(stopxlogfilename, ThisTimeLineID, _logSegNo);
08959
08960
08961 stamp_time = (pg_time_t) time(NULL);
08962 pg_strftime(strfbuf, sizeof(strfbuf),
08963 "%Y-%m-%d %H:%M:%S %Z",
08964 pg_localtime(&stamp_time, log_timezone));
08965
08966
08967
08968
08969 XLByteToSeg(startpoint, _logSegNo);
08970 BackupHistoryFilePath(histfilepath, ThisTimeLineID, _logSegNo,
08971 (uint32) (startpoint % XLogSegSize));
08972 fp = AllocateFile(histfilepath, "w");
08973 if (!fp)
08974 ereport(ERROR,
08975 (errcode_for_file_access(),
08976 errmsg("could not create file \"%s\": %m",
08977 histfilepath)));
08978 fprintf(fp, "START WAL LOCATION: %X/%X (file %s)\n",
08979 (uint32) (startpoint >> 32), (uint32) startpoint, startxlogfilename);
08980 fprintf(fp, "STOP WAL LOCATION: %X/%X (file %s)\n",
08981 (uint32) (stoppoint >> 32), (uint32) stoppoint, stopxlogfilename);
08982
08983 fprintf(fp, "%s", remaining);
08984 fprintf(fp, "STOP TIME: %s\n", strfbuf);
08985 if (fflush(fp) || ferror(fp) || FreeFile(fp))
08986 ereport(ERROR,
08987 (errcode_for_file_access(),
08988 errmsg("could not write file \"%s\": %m",
08989 histfilepath)));
08990
08991
08992
08993
08994
08995
08996 CleanupBackupHistory();
08997
08998
08999
09000
09001
09002
09003
09004
09005
09006
09007
09008
09009
09010
09011
09012
09013
09014
09015
09016
09017 if (waitforarchive && XLogArchivingActive())
09018 {
09019 XLByteToPrevSeg(stoppoint, _logSegNo);
09020 XLogFileName(lastxlogfilename, ThisTimeLineID, _logSegNo);
09021
09022 XLByteToSeg(startpoint, _logSegNo);
09023 BackupHistoryFileName(histfilename, ThisTimeLineID, _logSegNo,
09024 (uint32) (startpoint % XLogSegSize));
09025
09026 seconds_before_warning = 60;
09027 waits = 0;
09028
09029 while (XLogArchiveIsBusy(lastxlogfilename) ||
09030 XLogArchiveIsBusy(histfilename))
09031 {
09032 CHECK_FOR_INTERRUPTS();
09033
09034 if (!reported_waiting && waits > 5)
09035 {
09036 ereport(NOTICE,
09037 (errmsg("pg_stop_backup cleanup done, waiting for required WAL segments to be archived")));
09038 reported_waiting = true;
09039 }
09040
09041 pg_usleep(1000000L);
09042
09043 if (++waits >= seconds_before_warning)
09044 {
09045 seconds_before_warning *= 2;
09046 ereport(WARNING,
09047 (errmsg("pg_stop_backup still waiting for all required WAL segments to be archived (%d seconds elapsed)",
09048 waits),
09049 errhint("Check that your archive_command is executing properly. "
09050 "pg_stop_backup can be canceled safely, "
09051 "but the database backup will not be usable without all the WAL segments.")));
09052 }
09053 }
09054
09055 ereport(NOTICE,
09056 (errmsg("pg_stop_backup complete, all required WAL segments have been archived")));
09057 }
09058 else if (waitforarchive)
09059 ereport(NOTICE,
09060 (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
09061
09062
09063
09064
09065 if (stoptli_p)
09066 *stoptli_p = stoptli;
09067 return stoppoint;
09068 }
09069
09070
09071
09072
09073
09074
09075
09076
09077
09078
09079
09080
09081
09082 void
09083 do_pg_abort_backup(void)
09084 {
09085 LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
09086 Assert(XLogCtl->Insert.nonExclusiveBackups > 0);
09087 XLogCtl->Insert.nonExclusiveBackups--;
09088
09089 if (!XLogCtl->Insert.exclusiveBackup &&
09090 XLogCtl->Insert.nonExclusiveBackups == 0)
09091 {
09092 XLogCtl->Insert.forcePageWrites = false;
09093 }
09094 LWLockRelease(WALInsertLock);
09095 }
09096
09097
09098
09099
09100
09101
09102 XLogRecPtr
09103 GetXLogReplayRecPtr(TimeLineID *replayTLI)
09104 {
09105
09106 volatile XLogCtlData *xlogctl = XLogCtl;
09107 XLogRecPtr recptr;
09108 TimeLineID tli;
09109
09110 SpinLockAcquire(&xlogctl->info_lck);
09111 recptr = xlogctl->lastReplayedEndRecPtr;
09112 tli = xlogctl->lastReplayedTLI;
09113 SpinLockRelease(&xlogctl->info_lck);
09114
09115 if (replayTLI)
09116 *replayTLI = tli;
09117 return recptr;
09118 }
09119
09120
09121
09122
09123 XLogRecPtr
09124 GetXLogInsertRecPtr(void)
09125 {
09126 XLogCtlInsert *Insert = &XLogCtl->Insert;
09127 XLogRecPtr current_recptr;
09128
09129 LWLockAcquire(WALInsertLock, LW_SHARED);
09130 INSERT_RECPTR(current_recptr, Insert, Insert->curridx);
09131 LWLockRelease(WALInsertLock);
09132
09133 return current_recptr;
09134 }
09135
09136
09137
09138
09139 XLogRecPtr
09140 GetXLogWriteRecPtr(void)
09141 {
09142 {
09143
09144 volatile XLogCtlData *xlogctl = XLogCtl;
09145
09146 SpinLockAcquire(&xlogctl->info_lck);
09147 LogwrtResult = xlogctl->LogwrtResult;
09148 SpinLockRelease(&xlogctl->info_lck);
09149 }
09150
09151 return LogwrtResult.Write;
09152 }
09153
09154
09155
09156
09157
09158 void
09159 GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli)
09160 {
09161 LWLockAcquire(ControlFileLock, LW_SHARED);
09162 *oldrecptr = ControlFile->checkPointCopy.redo;
09163 *oldtli = ControlFile->checkPointCopy.ThisTimeLineID;
09164 LWLockRelease(ControlFileLock);
09165 }
09166
09167
09168
09169
09170
09171
09172
09173
09174
09175
09176
09177
09178
09179
09180
09181
09182
09183 static bool
09184 read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
09185 bool *backupFromStandby)
09186 {
09187 char startxlogfilename[MAXFNAMELEN];
09188 TimeLineID tli;
09189 FILE *lfp;
09190 char ch;
09191 char backuptype[20];
09192 char backupfrom[20];
09193 uint32 hi,
09194 lo;
09195
09196 *backupEndRequired = false;
09197 *backupFromStandby = false;
09198
09199
09200
09201
09202 lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
09203 if (!lfp)
09204 {
09205 if (errno != ENOENT)
09206 ereport(FATAL,
09207 (errcode_for_file_access(),
09208 errmsg("could not read file \"%s\": %m",
09209 BACKUP_LABEL_FILE)));
09210 return false;
09211 }
09212
09213
09214
09215
09216
09217
09218 if (fscanf(lfp, "START WAL LOCATION: %X/%X (file %08X%16s)%c",
09219 &hi, &lo, &tli, startxlogfilename, &ch) != 5 || ch != '\n')
09220 ereport(FATAL,
09221 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
09222 errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
09223 RedoStartLSN = ((uint64) hi) << 32 | lo;
09224 if (fscanf(lfp, "CHECKPOINT LOCATION: %X/%X%c",
09225 &hi, &lo, &ch) != 3 || ch != '\n')
09226 ereport(FATAL,
09227 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
09228 errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
09229 *checkPointLoc = ((uint64) hi) << 32 | lo;
09230
09231
09232
09233
09234
09235
09236 if (fscanf(lfp, "BACKUP METHOD: %19s\n", backuptype) == 1)
09237 {
09238 if (strcmp(backuptype, "streamed") == 0)
09239 *backupEndRequired = true;
09240 }
09241
09242 if (fscanf(lfp, "BACKUP FROM: %19s\n", backupfrom) == 1)
09243 {
09244 if (strcmp(backupfrom, "standby") == 0)
09245 *backupFromStandby = true;
09246 }
09247
09248 if (ferror(lfp) || FreeFile(lfp))
09249 ereport(FATAL,
09250 (errcode_for_file_access(),
09251 errmsg("could not read file \"%s\": %m",
09252 BACKUP_LABEL_FILE)));
09253
09254 return true;
09255 }
09256
09257
09258
09259
09260 static void
09261 rm_redo_error_callback(void *arg)
09262 {
09263 XLogRecord *record = (XLogRecord *) arg;
09264 StringInfoData buf;
09265
09266 initStringInfo(&buf);
09267 RmgrTable[record->xl_rmid].rm_desc(&buf,
09268 record->xl_info,
09269 XLogRecGetData(record));
09270
09271
09272 if (buf.len > 0)
09273 errcontext("xlog redo %s", buf.data);
09274
09275 pfree(buf.data);
09276 }
09277
09278
09279
09280
09281
09282
09283 bool
09284 BackupInProgress(void)
09285 {
09286 struct stat stat_buf;
09287
09288 return (stat(BACKUP_LABEL_FILE, &stat_buf) == 0);
09289 }
09290
09291
09292
09293
09294
09295
09296
09297
09298 void
09299 CancelBackup(void)
09300 {
09301 struct stat stat_buf;
09302
09303
09304 if (stat(BACKUP_LABEL_FILE, &stat_buf) < 0)
09305 return;
09306
09307
09308 unlink(BACKUP_LABEL_OLD);
09309
09310 if (rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD) == 0)
09311 {
09312 ereport(LOG,
09313 (errmsg("online backup mode canceled"),
09314 errdetail("\"%s\" was renamed to \"%s\".",
09315 BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
09316 }
09317 else
09318 {
09319 ereport(WARNING,
09320 (errcode_for_file_access(),
09321 errmsg("online backup mode was not canceled"),
09322 errdetail("Could not rename \"%s\" to \"%s\": %m.",
09323 BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
09324 }
09325 }
09326
09327
09328
09329
09330
09331
09332
09333
09334
09335
09336
09337
09338
09339
09340
09341
09342
09343
09344
09345
09346
09347
09348
09349
09350 static int
09351 XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
09352 XLogRecPtr targetRecPtr, char *readBuf, TimeLineID *readTLI)
09353 {
09354 XLogPageReadPrivate *private =
09355 (XLogPageReadPrivate *) xlogreader->private_data;
09356 int emode = private->emode;
09357 uint32 targetPageOff;
09358 XLogSegNo targetSegNo PG_USED_FOR_ASSERTS_ONLY;
09359
09360 XLByteToSeg(targetPagePtr, targetSegNo);
09361 targetPageOff = targetPagePtr % XLogSegSize;
09362
09363
09364
09365
09366
09367 if (readFile >= 0 && !XLByteInSeg(targetPagePtr, readSegNo))
09368 {
09369
09370
09371
09372
09373 if (StandbyModeRequested && bgwriterLaunched)
09374 {
09375 if (XLogCheckpointNeeded(readSegNo))
09376 {
09377 (void) GetRedoRecPtr();
09378 if (XLogCheckpointNeeded(readSegNo))
09379 RequestCheckpoint(CHECKPOINT_CAUSE_XLOG);
09380 }
09381 }
09382
09383 close(readFile);
09384 readFile = -1;
09385 readSource = 0;
09386 }
09387
09388 XLByteToSeg(targetPagePtr, readSegNo);
09389
09390 retry:
09391
09392 if (readFile < 0 ||
09393 (readSource == XLOG_FROM_STREAM &&
09394 receivedUpto < targetPagePtr + reqLen))
09395 {
09396 if (!WaitForWALToBecomeAvailable(targetPagePtr + reqLen,
09397 private->randAccess,
09398 private->fetching_ckpt,
09399 targetRecPtr))
09400 {
09401 if (readFile >= 0)
09402 close(readFile);
09403 readFile = -1;
09404 readLen = 0;
09405 readSource = 0;
09406
09407 return -1;
09408 }
09409 }
09410
09411
09412
09413
09414
09415 Assert(readFile != -1);
09416
09417
09418
09419
09420
09421
09422
09423 if (readSource == XLOG_FROM_STREAM)
09424 {
09425 if (((targetPagePtr) / XLOG_BLCKSZ) != (receivedUpto / XLOG_BLCKSZ))
09426 readLen = XLOG_BLCKSZ;
09427 else
09428 readLen = receivedUpto % XLogSegSize - targetPageOff;
09429 }
09430 else
09431 readLen = XLOG_BLCKSZ;
09432
09433
09434 readOff = targetPageOff;
09435 if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
09436 {
09437 char fname[MAXFNAMELEN];
09438
09439 XLogFileName(fname, curFileTLI, readSegNo);
09440 ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
09441 (errcode_for_file_access(),
09442 errmsg("could not seek in log segment %s to offset %u: %m",
09443 fname, readOff)));
09444 goto next_record_is_invalid;
09445 }
09446
09447 if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
09448 {
09449 char fname[MAXFNAMELEN];
09450
09451 XLogFileName(fname, curFileTLI, readSegNo);
09452 ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
09453 (errcode_for_file_access(),
09454 errmsg("could not read from log segment %s, offset %u: %m",
09455 fname, readOff)));
09456 goto next_record_is_invalid;
09457 }
09458
09459 Assert(targetSegNo == readSegNo);
09460 Assert(targetPageOff == readOff);
09461 Assert(reqLen <= readLen);
09462
09463 *readTLI = curFileTLI;
09464 return readLen;
09465
09466 next_record_is_invalid:
09467 lastSourceFailed = true;
09468
09469 if (readFile >= 0)
09470 close(readFile);
09471 readFile = -1;
09472 readLen = 0;
09473 readSource = 0;
09474
09475
09476 if (StandbyMode)
09477 goto retry;
09478 else
09479 return -1;
09480 }
09481
09482
09483
09484
09485
09486
09487
09488
09489
09490
09491
09492
09493
09494
09495
09496
09497
09498
09499
09500
09501
09502
09503
09504
09505
09506
09507
09508 static bool
09509 WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
09510 bool fetching_ckpt, XLogRecPtr tliRecPtr)
09511 {
09512 static pg_time_t last_fail_time = 0;
09513 pg_time_t now;
09514
09515
09516
09517
09518
09519
09520
09521
09522
09523
09524
09525
09526
09527
09528
09529
09530
09531
09532
09533
09534
09535
09536 if (!InArchiveRecovery)
09537 currentSource = XLOG_FROM_PG_XLOG;
09538 else if (currentSource == 0)
09539 currentSource = XLOG_FROM_ARCHIVE;
09540
09541 for (;;)
09542 {
09543 int oldSource = currentSource;
09544
09545
09546
09547
09548
09549
09550
09551 if (lastSourceFailed)
09552 {
09553 switch (currentSource)
09554 {
09555 case XLOG_FROM_ARCHIVE:
09556 currentSource = XLOG_FROM_PG_XLOG;
09557 break;
09558
09559 case XLOG_FROM_PG_XLOG:
09560
09561
09562
09563
09564
09565
09566 if (StandbyMode && CheckForStandbyTrigger())
09567 {
09568 ShutdownWalRcv();
09569 return false;
09570 }
09571
09572
09573
09574
09575
09576 if (!StandbyMode)
09577 return false;
09578
09579
09580
09581
09582
09583
09584
09585
09586
09587
09588
09589 if (PrimaryConnInfo)
09590 {
09591 XLogRecPtr ptr;
09592 TimeLineID tli;
09593
09594 if (fetching_ckpt)
09595 {
09596 ptr = RedoStartLSN;
09597 tli = ControlFile->checkPointCopy.ThisTimeLineID;
09598 }
09599 else
09600 {
09601 ptr = RecPtr;
09602 tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
09603
09604 if (curFileTLI > 0 && tli < curFileTLI)
09605 elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
09606 (uint32) (ptr >> 32), (uint32) ptr,
09607 tli, curFileTLI);
09608 }
09609 curFileTLI = tli;
09610 RequestXLogStreaming(curFileTLI, ptr, PrimaryConnInfo);
09611 }
09612
09613
09614
09615
09616
09617 currentSource = XLOG_FROM_STREAM;
09618 break;
09619
09620 case XLOG_FROM_STREAM:
09621
09622
09623
09624
09625
09626
09627
09628
09629
09630
09631
09632
09633
09634
09635
09636
09637
09638
09639 if (WalRcvStreaming())
09640 ShutdownWalRcv();
09641
09642
09643
09644
09645
09646 if (recoveryTargetIsLatest)
09647 {
09648 if (rescanLatestTimeLine())
09649 {
09650 currentSource = XLOG_FROM_ARCHIVE;
09651 break;
09652 }
09653 }
09654
09655
09656
09657
09658
09659
09660
09661
09662 now = (pg_time_t) time(NULL);
09663 if ((now - last_fail_time) < 5)
09664 {
09665 pg_usleep(1000000L * (5 - (now - last_fail_time)));
09666 now = (pg_time_t) time(NULL);
09667 }
09668 last_fail_time = now;
09669 currentSource = XLOG_FROM_ARCHIVE;
09670 break;
09671
09672 default:
09673 elog(ERROR, "unexpected WAL source %d", currentSource);
09674 }
09675 }
09676 else if (currentSource == XLOG_FROM_PG_XLOG)
09677 {
09678
09679
09680
09681
09682
09683 if (InArchiveRecovery)
09684 currentSource = XLOG_FROM_ARCHIVE;
09685 }
09686
09687 if (currentSource != oldSource)
09688 elog(DEBUG2, "switched WAL source from %s to %s after %s",
09689 xlogSourceNames[oldSource], xlogSourceNames[currentSource],
09690 lastSourceFailed ? "failure" : "success");
09691
09692
09693
09694
09695
09696 lastSourceFailed = false;
09697
09698 switch (currentSource)
09699 {
09700 case XLOG_FROM_ARCHIVE:
09701 case XLOG_FROM_PG_XLOG:
09702
09703 if (readFile >= 0)
09704 {
09705 close(readFile);
09706 readFile = -1;
09707 }
09708
09709 if (randAccess)
09710 curFileTLI = 0;
09711
09712
09713
09714
09715
09716 readFile = XLogFileReadAnyTLI(readSegNo, DEBUG2, currentSource);
09717 if (readFile >= 0)
09718 return true;
09719
09720
09721
09722
09723 lastSourceFailed = true;
09724 break;
09725
09726 case XLOG_FROM_STREAM:
09727 {
09728 bool havedata;
09729
09730
09731
09732
09733 if (!WalRcvStreaming())
09734 {
09735 lastSourceFailed = true;
09736 break;
09737 }
09738
09739
09740
09741
09742
09743
09744
09745
09746
09747
09748
09749
09750 if (RecPtr < receivedUpto)
09751 havedata = true;
09752 else
09753 {
09754 XLogRecPtr latestChunkStart;
09755
09756 receivedUpto = GetWalRcvWriteRecPtr(&latestChunkStart, &receiveTLI);
09757 if (RecPtr < receivedUpto && receiveTLI == curFileTLI)
09758 {
09759 havedata = true;
09760 if (latestChunkStart <= RecPtr)
09761 {
09762 XLogReceiptTime = GetCurrentTimestamp();
09763 SetCurrentChunkStartTime(XLogReceiptTime);
09764 }
09765 }
09766 else
09767 havedata = false;
09768 }
09769 if (havedata)
09770 {
09771
09772
09773
09774
09775
09776
09777
09778
09779 if (readFile < 0)
09780 {
09781 if (!expectedTLEs)
09782 expectedTLEs = readTimeLineHistory(receiveTLI);
09783 readFile = XLogFileRead(readSegNo, PANIC,
09784 receiveTLI,
09785 XLOG_FROM_STREAM, false);
09786 Assert(readFile >= 0);
09787 }
09788 else
09789 {
09790
09791 readSource = XLOG_FROM_STREAM;
09792 XLogReceiptSource = XLOG_FROM_STREAM;
09793 return true;
09794 }
09795 break;
09796 }
09797
09798
09799
09800
09801
09802 if (CheckForStandbyTrigger())
09803 {
09804
09805
09806
09807
09808
09809
09810
09811
09812 lastSourceFailed = true;
09813 break;
09814 }
09815
09816
09817
09818
09819
09820
09821 WaitLatch(&XLogCtl->recoveryWakeupLatch,
09822 WL_LATCH_SET | WL_TIMEOUT,
09823 5000L);
09824 ResetLatch(&XLogCtl->recoveryWakeupLatch);
09825 break;
09826 }
09827
09828 default:
09829 elog(ERROR, "unexpected WAL source %d", currentSource);
09830 }
09831
09832
09833
09834
09835
09836 HandleStartupProcInterrupts();
09837 } while (StandbyMode);
09838
09839 return false;
09840 }
09841
09842
09843
09844
09845
09846
09847
09848
09849
09850
09851
09852
09853
09854
09855
09856
09857
09858
09859
09860 static int
09861 emode_for_corrupt_record(int emode, XLogRecPtr RecPtr)
09862 {
09863 static XLogRecPtr lastComplaint = 0;
09864
09865 if (readSource == XLOG_FROM_PG_XLOG && emode == LOG)
09866 {
09867 if (RecPtr == lastComplaint)
09868 emode = DEBUG1;
09869 else
09870 lastComplaint = RecPtr;
09871 }
09872 return emode;
09873 }
09874
09875
09876
09877
09878
09879 static bool
09880 CheckForStandbyTrigger(void)
09881 {
09882 struct stat stat_buf;
09883 static bool triggered = false;
09884
09885 if (triggered)
09886 return true;
09887
09888 if (IsPromoteTriggered())
09889 {
09890
09891
09892
09893
09894
09895
09896
09897 if (stat(FAST_PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
09898 {
09899 unlink(FAST_PROMOTE_SIGNAL_FILE);
09900 unlink(PROMOTE_SIGNAL_FILE);
09901 fast_promote = true;
09902 }
09903 else if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
09904 {
09905 unlink(PROMOTE_SIGNAL_FILE);
09906 fast_promote = false;
09907 }
09908
09909 ereport(LOG, (errmsg("received promote request")));
09910
09911 ResetPromoteTriggered();
09912 triggered = true;
09913 return true;
09914 }
09915
09916 if (TriggerFile == NULL)
09917 return false;
09918
09919 if (stat(TriggerFile, &stat_buf) == 0)
09920 {
09921 ereport(LOG,
09922 (errmsg("trigger file found: %s", TriggerFile)));
09923 unlink(TriggerFile);
09924 triggered = true;
09925 fast_promote = true;
09926 return true;
09927 }
09928 return false;
09929 }
09930
09931
09932
09933
09934
09935 bool
09936 CheckPromoteSignal(void)
09937 {
09938 struct stat stat_buf;
09939
09940 if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0 ||
09941 stat(FAST_PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
09942 return true;
09943
09944 return false;
09945 }
09946
09947
09948
09949
09950
09951 void
09952 WakeupRecovery(void)
09953 {
09954 SetLatch(&XLogCtl->recoveryWakeupLatch);
09955 }
09956
09957
09958
09959
09960 void
09961 SetWalWriterSleeping(bool sleeping)
09962 {
09963
09964 volatile XLogCtlData *xlogctl = XLogCtl;
09965
09966 SpinLockAcquire(&xlogctl->info_lck);
09967 xlogctl->WalWriterSleeping = sleeping;
09968 SpinLockRelease(&xlogctl->info_lck);
09969 }