Header And Logo

PostgreSQL
| The world's most advanced open source database.

procarray.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * procarray.c
00004  *    POSTGRES process array code.
00005  *
00006  *
00007  * This module maintains arrays of the PGPROC and PGXACT structures for all
00008  * active backends.  Although there are several uses for this, the principal
00009  * one is as a means of determining the set of currently running transactions.
00010  *
00011  * Because of various subtle race conditions it is critical that a backend
00012  * hold the correct locks while setting or clearing its MyPgXact->xid field.
00013  * See notes in src/backend/access/transam/README.
00014  *
00015  * The process arrays now also include structures representing prepared
00016  * transactions.  The xid and subxids fields of these are valid, as are the
00017  * myProcLocks lists.  They can be distinguished from regular backend PGPROCs
00018  * at need by checking for pid == 0.
00019  *
00020  * During hot standby, we also keep a list of XIDs representing transactions
00021  * that are known to be running in the master (or more precisely, were running
00022  * as of the current point in the WAL stream).  This list is kept in the
00023  * KnownAssignedXids array, and is updated by watching the sequence of
00024  * arriving XIDs.  This is necessary because if we leave those XIDs out of
00025  * snapshots taken for standby queries, then they will appear to be already
00026  * complete, leading to MVCC failures.  Note that in hot standby, the PGPROC
00027  * array represents standby processes, which by definition are not running
00028  * transactions that have XIDs.
00029  *
00030  * It is perhaps possible for a backend on the master to terminate without
00031  * writing an abort record for its transaction.  While that shouldn't really
00032  * happen, it would tie up KnownAssignedXids indefinitely, so we protect
00033  * ourselves by pruning the array when a valid list of running XIDs arrives.
00034  *
00035  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00036  * Portions Copyright (c) 1994, Regents of the University of California
00037  *
00038  *
00039  * IDENTIFICATION
00040  *    src/backend/storage/ipc/procarray.c
00041  *
00042  *-------------------------------------------------------------------------
00043  */
00044 #include "postgres.h"
00045 
00046 #include <signal.h>
00047 
00048 #include "access/clog.h"
00049 #include "access/subtrans.h"
00050 #include "access/transam.h"
00051 #include "access/xact.h"
00052 #include "access/twophase.h"
00053 #include "miscadmin.h"
00054 #include "storage/proc.h"
00055 #include "storage/procarray.h"
00056 #include "storage/spin.h"
00057 #include "utils/builtins.h"
00058 #include "utils/snapmgr.h"
00059 
00060 
00061 /* Our shared memory area */
00062 typedef struct ProcArrayStruct
00063 {
00064     int         numProcs;       /* number of valid procs entries */
00065     int         maxProcs;       /* allocated size of procs array */
00066 
00067     /*
00068      * Known assigned XIDs handling
00069      */
00070     int         maxKnownAssignedXids;   /* allocated size of array */
00071     int         numKnownAssignedXids;   /* currrent # of valid entries */
00072     int         tailKnownAssignedXids;  /* index of oldest valid element */
00073     int         headKnownAssignedXids;  /* index of newest element, + 1 */
00074     slock_t     known_assigned_xids_lck;        /* protects head/tail pointers */
00075 
00076     /*
00077      * Highest subxid that has been removed from KnownAssignedXids array to
00078      * prevent overflow; or InvalidTransactionId if none.  We track this for
00079      * similar reasons to tracking overflowing cached subxids in PGXACT
00080      * entries.  Must hold exclusive ProcArrayLock to change this, and shared
00081      * lock to read it.
00082      */
00083     TransactionId lastOverflowedXid;
00084 
00085     /*
00086      * We declare pgprocnos[] as 1 entry because C wants a fixed-size array,
00087      * but actually it is maxProcs entries long.
00088      */
00089     int         pgprocnos[1];   /* VARIABLE LENGTH ARRAY */
00090 } ProcArrayStruct;
00091 
00092 static ProcArrayStruct *procArray;
00093 
00094 static PGPROC *allProcs;
00095 static PGXACT *allPgXact;
00096 
00097 /*
00098  * Bookkeeping for tracking emulated transactions in recovery
00099  */
00100 static TransactionId *KnownAssignedXids;
00101 static bool *KnownAssignedXidsValid;
00102 static TransactionId latestObservedXid = InvalidTransactionId;
00103 
00104 /*
00105  * If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is
00106  * the highest xid that might still be running that we don't have in
00107  * KnownAssignedXids.
00108  */
00109 static TransactionId standbySnapshotPendingXmin;
00110 
00111 #ifdef XIDCACHE_DEBUG
00112 
00113 /* counters for XidCache measurement */
00114 static long xc_by_recent_xmin = 0;
00115 static long xc_by_known_xact = 0;
00116 static long xc_by_my_xact = 0;
00117 static long xc_by_latest_xid = 0;
00118 static long xc_by_main_xid = 0;
00119 static long xc_by_child_xid = 0;
00120 static long xc_by_known_assigned = 0;
00121 static long xc_no_overflow = 0;
00122 static long xc_slow_answer = 0;
00123 
00124 #define xc_by_recent_xmin_inc()     (xc_by_recent_xmin++)
00125 #define xc_by_known_xact_inc()      (xc_by_known_xact++)
00126 #define xc_by_my_xact_inc()         (xc_by_my_xact++)
00127 #define xc_by_latest_xid_inc()      (xc_by_latest_xid++)
00128 #define xc_by_main_xid_inc()        (xc_by_main_xid++)
00129 #define xc_by_child_xid_inc()       (xc_by_child_xid++)
00130 #define xc_by_known_assigned_inc()  (xc_by_known_assigned++)
00131 #define xc_no_overflow_inc()        (xc_no_overflow++)
00132 #define xc_slow_answer_inc()        (xc_slow_answer++)
00133 
00134 static void DisplayXidCache(void);
00135 #else                           /* !XIDCACHE_DEBUG */
00136 
00137 #define xc_by_recent_xmin_inc()     ((void) 0)
00138 #define xc_by_known_xact_inc()      ((void) 0)
00139 #define xc_by_my_xact_inc()         ((void) 0)
00140 #define xc_by_latest_xid_inc()      ((void) 0)
00141 #define xc_by_main_xid_inc()        ((void) 0)
00142 #define xc_by_child_xid_inc()       ((void) 0)
00143 #define xc_by_known_assigned_inc()  ((void) 0)
00144 #define xc_no_overflow_inc()        ((void) 0)
00145 #define xc_slow_answer_inc()        ((void) 0)
00146 #endif   /* XIDCACHE_DEBUG */
00147 
00148 /* Primitives for KnownAssignedXids array handling for standby */
00149 static void KnownAssignedXidsCompress(bool force);
00150 static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
00151                      bool exclusive_lock);
00152 static bool KnownAssignedXidsSearch(TransactionId xid, bool remove);
00153 static bool KnownAssignedXidExists(TransactionId xid);
00154 static void KnownAssignedXidsRemove(TransactionId xid);
00155 static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
00156                             TransactionId *subxids);
00157 static void KnownAssignedXidsRemovePreceding(TransactionId xid);
00158 static int  KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax);
00159 static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray,
00160                                TransactionId *xmin,
00161                                TransactionId xmax);
00162 static TransactionId KnownAssignedXidsGetOldestXmin(void);
00163 static void KnownAssignedXidsDisplay(int trace_level);
00164 static void KnownAssignedXidsReset(void);
00165 
00166 /*
00167  * Report shared-memory space needed by CreateSharedProcArray.
00168  */
00169 Size
00170 ProcArrayShmemSize(void)
00171 {
00172     Size        size;
00173 
00174     /* Size of the ProcArray structure itself */
00175 #define PROCARRAY_MAXPROCS  (MaxBackends + max_prepared_xacts)
00176 
00177     size = offsetof(ProcArrayStruct, pgprocnos);
00178     size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS));
00179 
00180     /*
00181      * During Hot Standby processing we have a data structure called
00182      * KnownAssignedXids, created in shared memory. Local data structures are
00183      * also created in various backends during GetSnapshotData(),
00184      * TransactionIdIsInProgress() and GetRunningTransactionData(). All of the
00185      * main structures created in those functions must be identically sized,
00186      * since we may at times copy the whole of the data structures around. We
00187      * refer to this size as TOTAL_MAX_CACHED_SUBXIDS.
00188      *
00189      * Ideally we'd only create this structure if we were actually doing hot
00190      * standby in the current run, but we don't know that yet at the time
00191      * shared memory is being set up.
00192      */
00193 #define TOTAL_MAX_CACHED_SUBXIDS \
00194     ((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS)
00195 
00196     if (EnableHotStandby)
00197     {
00198         size = add_size(size,
00199                         mul_size(sizeof(TransactionId),
00200                                  TOTAL_MAX_CACHED_SUBXIDS));
00201         size = add_size(size,
00202                         mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS));
00203     }
00204 
00205     return size;
00206 }
00207 
00208 /*
00209  * Initialize the shared PGPROC array during postmaster startup.
00210  */
00211 void
00212 CreateSharedProcArray(void)
00213 {
00214     bool        found;
00215 
00216     /* Create or attach to the ProcArray shared structure */
00217     procArray = (ProcArrayStruct *)
00218         ShmemInitStruct("Proc Array",
00219                         add_size(offsetof(ProcArrayStruct, pgprocnos),
00220                                  mul_size(sizeof(int),
00221                                           PROCARRAY_MAXPROCS)),
00222                         &found);
00223 
00224     if (!found)
00225     {
00226         /*
00227          * We're the first - initialize.
00228          */
00229         procArray->numProcs = 0;
00230         procArray->maxProcs = PROCARRAY_MAXPROCS;
00231         procArray->maxKnownAssignedXids = TOTAL_MAX_CACHED_SUBXIDS;
00232         procArray->numKnownAssignedXids = 0;
00233         procArray->tailKnownAssignedXids = 0;
00234         procArray->headKnownAssignedXids = 0;
00235         SpinLockInit(&procArray->known_assigned_xids_lck);
00236         procArray->lastOverflowedXid = InvalidTransactionId;
00237     }
00238 
00239     allProcs = ProcGlobal->allProcs;
00240     allPgXact = ProcGlobal->allPgXact;
00241 
00242     /* Create or attach to the KnownAssignedXids arrays too, if needed */
00243     if (EnableHotStandby)
00244     {
00245         KnownAssignedXids = (TransactionId *)
00246             ShmemInitStruct("KnownAssignedXids",
00247                             mul_size(sizeof(TransactionId),
00248                                      TOTAL_MAX_CACHED_SUBXIDS),
00249                             &found);
00250         KnownAssignedXidsValid = (bool *)
00251             ShmemInitStruct("KnownAssignedXidsValid",
00252                             mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS),
00253                             &found);
00254     }
00255 }
00256 
00257 /*
00258  * Add the specified PGPROC to the shared array.
00259  */
00260 void
00261 ProcArrayAdd(PGPROC *proc)
00262 {
00263     ProcArrayStruct *arrayP = procArray;
00264     int         index;
00265 
00266     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
00267 
00268     if (arrayP->numProcs >= arrayP->maxProcs)
00269     {
00270         /*
00271          * Ooops, no room.  (This really shouldn't happen, since there is a
00272          * fixed supply of PGPROC structs too, and so we should have failed
00273          * earlier.)
00274          */
00275         LWLockRelease(ProcArrayLock);
00276         ereport(FATAL,
00277                 (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
00278                  errmsg("sorry, too many clients already")));
00279     }
00280 
00281     /*
00282      * Keep the procs array sorted by (PGPROC *) so that we can utilize
00283      * locality of references much better. This is useful while traversing the
00284      * ProcArray because there is a increased likelihood of finding the next
00285      * PGPROC structure in the cache.
00286      *
00287      * Since the occurrence of adding/removing a proc is much lower than the
00288      * access to the ProcArray itself, the overhead should be marginal
00289      */
00290     for (index = 0; index < arrayP->numProcs; index++)
00291     {
00292         /*
00293          * If we are the first PGPROC or if we have found our right position
00294          * in the array, break
00295          */
00296         if ((arrayP->pgprocnos[index] == -1) || (arrayP->pgprocnos[index] > proc->pgprocno))
00297             break;
00298     }
00299 
00300     memmove(&arrayP->pgprocnos[index + 1], &arrayP->pgprocnos[index],
00301             (arrayP->numProcs - index) * sizeof(int));
00302     arrayP->pgprocnos[index] = proc->pgprocno;
00303     arrayP->numProcs++;
00304 
00305     LWLockRelease(ProcArrayLock);
00306 }
00307 
00308 /*
00309  * Remove the specified PGPROC from the shared array.
00310  *
00311  * When latestXid is a valid XID, we are removing a live 2PC gxact from the
00312  * array, and thus causing it to appear as "not running" anymore.  In this
00313  * case we must advance latestCompletedXid.  (This is essentially the same
00314  * as ProcArrayEndTransaction followed by removal of the PGPROC, but we take
00315  * the ProcArrayLock only once, and don't damage the content of the PGPROC;
00316  * twophase.c depends on the latter.)
00317  */
00318 void
00319 ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
00320 {
00321     ProcArrayStruct *arrayP = procArray;
00322     int         index;
00323 
00324 #ifdef XIDCACHE_DEBUG
00325     /* dump stats at backend shutdown, but not prepared-xact end */
00326     if (proc->pid != 0)
00327         DisplayXidCache();
00328 #endif
00329 
00330     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
00331 
00332     if (TransactionIdIsValid(latestXid))
00333     {
00334         Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
00335 
00336         /* Advance global latestCompletedXid while holding the lock */
00337         if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
00338                                   latestXid))
00339             ShmemVariableCache->latestCompletedXid = latestXid;
00340     }
00341     else
00342     {
00343         /* Shouldn't be trying to remove a live transaction here */
00344         Assert(!TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
00345     }
00346 
00347     for (index = 0; index < arrayP->numProcs; index++)
00348     {
00349         if (arrayP->pgprocnos[index] == proc->pgprocno)
00350         {
00351             /* Keep the PGPROC array sorted. See notes above */
00352             memmove(&arrayP->pgprocnos[index], &arrayP->pgprocnos[index + 1],
00353                     (arrayP->numProcs - index - 1) * sizeof(int));
00354             arrayP->pgprocnos[arrayP->numProcs - 1] = -1;       /* for debugging */
00355             arrayP->numProcs--;
00356             LWLockRelease(ProcArrayLock);
00357             return;
00358         }
00359     }
00360 
00361     /* Ooops */
00362     LWLockRelease(ProcArrayLock);
00363 
00364     elog(LOG, "failed to find proc %p in ProcArray", proc);
00365 }
00366 
00367 
00368 /*
00369  * ProcArrayEndTransaction -- mark a transaction as no longer running
00370  *
00371  * This is used interchangeably for commit and abort cases.  The transaction
00372  * commit/abort must already be reported to WAL and pg_clog.
00373  *
00374  * proc is currently always MyProc, but we pass it explicitly for flexibility.
00375  * latestXid is the latest Xid among the transaction's main XID and
00376  * subtransactions, or InvalidTransactionId if it has no XID.  (We must ask
00377  * the caller to pass latestXid, instead of computing it from the PGPROC's
00378  * contents, because the subxid information in the PGPROC might be
00379  * incomplete.)
00380  */
00381 void
00382 ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
00383 {
00384     PGXACT     *pgxact = &allPgXact[proc->pgprocno];
00385 
00386     if (TransactionIdIsValid(latestXid))
00387     {
00388         /*
00389          * We must lock ProcArrayLock while clearing our advertised XID, so
00390          * that we do not exit the set of "running" transactions while someone
00391          * else is taking a snapshot.  See discussion in
00392          * src/backend/access/transam/README.
00393          */
00394         Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
00395 
00396         LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
00397 
00398         pgxact->xid = InvalidTransactionId;
00399         proc->lxid = InvalidLocalTransactionId;
00400         pgxact->xmin = InvalidTransactionId;
00401         /* must be cleared with xid/xmin: */
00402         pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
00403         pgxact->delayChkpt = false; /* be sure this is cleared in abort */
00404         proc->recoveryConflictPending = false;
00405 
00406         /* Clear the subtransaction-XID cache too while holding the lock */
00407         pgxact->nxids = 0;
00408         pgxact->overflowed = false;
00409 
00410         /* Also advance global latestCompletedXid while holding the lock */
00411         if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
00412                                   latestXid))
00413             ShmemVariableCache->latestCompletedXid = latestXid;
00414 
00415         LWLockRelease(ProcArrayLock);
00416     }
00417     else
00418     {
00419         /*
00420          * If we have no XID, we don't need to lock, since we won't affect
00421          * anyone else's calculation of a snapshot.  We might change their
00422          * estimate of global xmin, but that's OK.
00423          */
00424         Assert(!TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
00425 
00426         proc->lxid = InvalidLocalTransactionId;
00427         pgxact->xmin = InvalidTransactionId;
00428         /* must be cleared with xid/xmin: */
00429         pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
00430         pgxact->delayChkpt = false; /* be sure this is cleared in abort */
00431         proc->recoveryConflictPending = false;
00432 
00433         Assert(pgxact->nxids == 0);
00434         Assert(pgxact->overflowed == false);
00435     }
00436 }
00437 
00438 
00439 /*
00440  * ProcArrayClearTransaction -- clear the transaction fields
00441  *
00442  * This is used after successfully preparing a 2-phase transaction.  We are
00443  * not actually reporting the transaction's XID as no longer running --- it
00444  * will still appear as running because the 2PC's gxact is in the ProcArray
00445  * too.  We just have to clear out our own PGXACT.
00446  */
00447 void
00448 ProcArrayClearTransaction(PGPROC *proc)
00449 {
00450     PGXACT     *pgxact = &allPgXact[proc->pgprocno];
00451 
00452     /*
00453      * We can skip locking ProcArrayLock here, because this action does not
00454      * actually change anyone's view of the set of running XIDs: our entry is
00455      * duplicate with the gxact that has already been inserted into the
00456      * ProcArray.
00457      */
00458     pgxact->xid = InvalidTransactionId;
00459     proc->lxid = InvalidLocalTransactionId;
00460     pgxact->xmin = InvalidTransactionId;
00461     proc->recoveryConflictPending = false;
00462 
00463     /* redundant, but just in case */
00464     pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
00465     pgxact->delayChkpt = false;
00466 
00467     /* Clear the subtransaction-XID cache too */
00468     pgxact->nxids = 0;
00469     pgxact->overflowed = false;
00470 }
00471 
00472 /*
00473  * ProcArrayApplyRecoveryInfo -- apply recovery info about xids
00474  *
00475  * Takes us through 3 states: Initialized, Pending and Ready.
00476  * Normal case is to go all the way to Ready straight away, though there
00477  * are atypical cases where we need to take it in steps.
00478  *
00479  * Use the data about running transactions on master to create the initial
00480  * state of KnownAssignedXids. We also use these records to regularly prune
00481  * KnownAssignedXids because we know it is possible that some transactions
00482  * with FATAL errors fail to write abort records, which could cause eventual
00483  * overflow.
00484  *
00485  * See comments for LogStandbySnapshot().
00486  */
00487 void
00488 ProcArrayApplyRecoveryInfo(RunningTransactions running)
00489 {
00490     TransactionId *xids;
00491     int         nxids;
00492     TransactionId nextXid;
00493     int         i;
00494 
00495     Assert(standbyState >= STANDBY_INITIALIZED);
00496     Assert(TransactionIdIsValid(running->nextXid));
00497     Assert(TransactionIdIsValid(running->oldestRunningXid));
00498     Assert(TransactionIdIsNormal(running->latestCompletedXid));
00499 
00500     /*
00501      * Remove stale transactions, if any.
00502      */
00503     ExpireOldKnownAssignedTransactionIds(running->oldestRunningXid);
00504 
00505     /*
00506      * Remove stale locks, if any.
00507      *
00508      * Locks are always assigned to the toplevel xid so we don't need to care
00509      * about subxcnt/subxids (and by extension not about ->suboverflowed).
00510      */
00511     StandbyReleaseOldLocks(running->xcnt, running->xids);
00512 
00513     /*
00514      * If our snapshot is already valid, nothing else to do...
00515      */
00516     if (standbyState == STANDBY_SNAPSHOT_READY)
00517         return;
00518 
00519     /*
00520      * If our initial RunningTransactionsData had an overflowed snapshot then
00521      * we knew we were missing some subxids from our snapshot. If we continue
00522      * to see overflowed snapshots then we might never be able to start up, so
00523      * we make another test to see if our snapshot is now valid. We know that
00524      * the missing subxids are equal to or earlier than nextXid. After we
00525      * initialise we continue to apply changes during recovery, so once the
00526      * oldestRunningXid is later than the nextXid from the initial snapshot we
00527      * know that we no longer have missing information and can mark the
00528      * snapshot as valid.
00529      */
00530     if (standbyState == STANDBY_SNAPSHOT_PENDING)
00531     {
00532         /*
00533          * If the snapshot isn't overflowed or if its empty we can reset our
00534          * pending state and use this snapshot instead.
00535          */
00536         if (!running->subxid_overflow || running->xcnt == 0)
00537         {
00538             /*
00539              * If we have already collected known assigned xids, we need to
00540              * throw them away before we apply the recovery snapshot.
00541              */
00542             KnownAssignedXidsReset();
00543             standbyState = STANDBY_INITIALIZED;
00544         }
00545         else
00546         {
00547             if (TransactionIdPrecedes(standbySnapshotPendingXmin,
00548                                       running->oldestRunningXid))
00549             {
00550                 standbyState = STANDBY_SNAPSHOT_READY;
00551                 elog(trace_recovery(DEBUG1),
00552                      "recovery snapshots are now enabled");
00553             }
00554             else
00555                 elog(trace_recovery(DEBUG1),
00556                   "recovery snapshot waiting for non-overflowed snapshot or "
00557                 "until oldest active xid on standby is at least %u (now %u)",
00558                      standbySnapshotPendingXmin,
00559                      running->oldestRunningXid);
00560             return;
00561         }
00562     }
00563 
00564     Assert(standbyState == STANDBY_INITIALIZED);
00565 
00566     /*
00567      * OK, we need to initialise from the RunningTransactionsData record
00568      */
00569 
00570     /*
00571      * Nobody else is running yet, but take locks anyhow
00572      */
00573     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
00574 
00575     /*
00576      * KnownAssignedXids is sorted so we cannot just add the xids, we have to
00577      * sort them first.
00578      *
00579      * Some of the new xids are top-level xids and some are subtransactions.
00580      * We don't call SubtransSetParent because it doesn't matter yet. If we
00581      * aren't overflowed then all xids will fit in snapshot and so we don't
00582      * need subtrans. If we later overflow, an xid assignment record will add
00583      * xids to subtrans. If RunningXacts is overflowed then we don't have
00584      * enough information to correctly update subtrans anyway.
00585      */
00586 
00587     /*
00588      * Allocate a temporary array to avoid modifying the array passed as
00589      * argument.
00590      */
00591     xids = palloc(sizeof(TransactionId) * (running->xcnt + running->subxcnt));
00592 
00593     /*
00594      * Add to the temp array any xids which have not already completed.
00595      */
00596     nxids = 0;
00597     for (i = 0; i < running->xcnt + running->subxcnt; i++)
00598     {
00599         TransactionId xid = running->xids[i];
00600 
00601         /*
00602          * The running-xacts snapshot can contain xids that were still visible
00603          * in the procarray when the snapshot was taken, but were already
00604          * WAL-logged as completed. They're not running anymore, so ignore
00605          * them.
00606          */
00607         if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
00608             continue;
00609 
00610         xids[nxids++] = xid;
00611     }
00612 
00613     if (nxids > 0)
00614     {
00615         if (procArray->numKnownAssignedXids != 0)
00616         {
00617             LWLockRelease(ProcArrayLock);
00618             elog(ERROR, "KnownAssignedXids is not empty");
00619         }
00620 
00621         /*
00622          * Sort the array so that we can add them safely into
00623          * KnownAssignedXids.
00624          */
00625         qsort(xids, nxids, sizeof(TransactionId), xidComparator);
00626 
00627         /*
00628          * Add the sorted snapshot into KnownAssignedXids
00629          */
00630         for (i = 0; i < nxids; i++)
00631             KnownAssignedXidsAdd(xids[i], xids[i], true);
00632 
00633         KnownAssignedXidsDisplay(trace_recovery(DEBUG3));
00634     }
00635 
00636     pfree(xids);
00637 
00638     /*
00639      * Now we've got the running xids we need to set the global values that
00640      * are used to track snapshots as they evolve further.
00641      *
00642      * - latestCompletedXid which will be the xmax for snapshots -
00643      * lastOverflowedXid which shows whether snapshots overflow - nextXid
00644      *
00645      * If the snapshot overflowed, then we still initialise with what we know,
00646      * but the recovery snapshot isn't fully valid yet because we know there
00647      * are some subxids missing. We don't know the specific subxids that are
00648      * missing, so conservatively assume the last one is latestObservedXid.
00649      */
00650     latestObservedXid = running->nextXid;
00651     TransactionIdRetreat(latestObservedXid);
00652 
00653     if (running->subxid_overflow)
00654     {
00655         standbyState = STANDBY_SNAPSHOT_PENDING;
00656 
00657         standbySnapshotPendingXmin = latestObservedXid;
00658         procArray->lastOverflowedXid = latestObservedXid;
00659     }
00660     else
00661     {
00662         standbyState = STANDBY_SNAPSHOT_READY;
00663 
00664         standbySnapshotPendingXmin = InvalidTransactionId;
00665     }
00666 
00667     /*
00668      * If a transaction wrote a commit record in the gap between taking and
00669      * logging the snapshot then latestCompletedXid may already be higher than
00670      * the value from the snapshot, so check before we use the incoming value.
00671      */
00672     if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
00673                               running->latestCompletedXid))
00674         ShmemVariableCache->latestCompletedXid = running->latestCompletedXid;
00675 
00676     Assert(TransactionIdIsNormal(ShmemVariableCache->latestCompletedXid));
00677 
00678     LWLockRelease(ProcArrayLock);
00679 
00680     /*
00681      * ShmemVariableCache->nextXid must be beyond any observed xid.
00682      *
00683      * We don't expect anyone else to modify nextXid, hence we don't need to
00684      * hold a lock while examining it.  We still acquire the lock to modify
00685      * it, though.
00686      */
00687     nextXid = latestObservedXid;
00688     TransactionIdAdvance(nextXid);
00689     if (TransactionIdFollows(nextXid, ShmemVariableCache->nextXid))
00690     {
00691         LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
00692         ShmemVariableCache->nextXid = nextXid;
00693         LWLockRelease(XidGenLock);
00694     }
00695 
00696     Assert(TransactionIdIsValid(ShmemVariableCache->nextXid));
00697 
00698     KnownAssignedXidsDisplay(trace_recovery(DEBUG3));
00699     if (standbyState == STANDBY_SNAPSHOT_READY)
00700         elog(trace_recovery(DEBUG1), "recovery snapshots are now enabled");
00701     else
00702         elog(trace_recovery(DEBUG1),
00703              "recovery snapshot waiting for non-overflowed snapshot or "
00704              "until oldest active xid on standby is at least %u (now %u)",
00705              standbySnapshotPendingXmin,
00706              running->oldestRunningXid);
00707 }
00708 
00709 /*
00710  * ProcArrayApplyXidAssignment
00711  *      Process an XLOG_XACT_ASSIGNMENT WAL record
00712  */
00713 void
00714 ProcArrayApplyXidAssignment(TransactionId topxid,
00715                             int nsubxids, TransactionId *subxids)
00716 {
00717     TransactionId max_xid;
00718     int         i;
00719 
00720     Assert(standbyState >= STANDBY_INITIALIZED);
00721 
00722     max_xid = TransactionIdLatest(topxid, nsubxids, subxids);
00723 
00724     /*
00725      * Mark all the subtransactions as observed.
00726      *
00727      * NOTE: This will fail if the subxid contains too many previously
00728      * unobserved xids to fit into known-assigned-xids. That shouldn't happen
00729      * as the code stands, because xid-assignment records should never contain
00730      * more than PGPROC_MAX_CACHED_SUBXIDS entries.
00731      */
00732     RecordKnownAssignedTransactionIds(max_xid);
00733 
00734     /*
00735      * Notice that we update pg_subtrans with the top-level xid, rather than
00736      * the parent xid. This is a difference between normal processing and
00737      * recovery, yet is still correct in all cases. The reason is that
00738      * subtransaction commit is not marked in clog until commit processing, so
00739      * all aborted subtransactions have already been clearly marked in clog.
00740      * As a result we are able to refer directly to the top-level
00741      * transaction's state rather than skipping through all the intermediate
00742      * states in the subtransaction tree. This should be the first time we
00743      * have attempted to SubTransSetParent().
00744      */
00745     for (i = 0; i < nsubxids; i++)
00746         SubTransSetParent(subxids[i], topxid, false);
00747 
00748     /*
00749      * Uses same locking as transaction commit
00750      */
00751     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
00752 
00753     /*
00754      * Remove subxids from known-assigned-xacts.
00755      */
00756     KnownAssignedXidsRemoveTree(InvalidTransactionId, nsubxids, subxids);
00757 
00758     /*
00759      * Advance lastOverflowedXid to be at least the last of these subxids.
00760      */
00761     if (TransactionIdPrecedes(procArray->lastOverflowedXid, max_xid))
00762         procArray->lastOverflowedXid = max_xid;
00763 
00764     LWLockRelease(ProcArrayLock);
00765 }
00766 
00767 /*
00768  * TransactionIdIsInProgress -- is given transaction running in some backend
00769  *
00770  * Aside from some shortcuts such as checking RecentXmin and our own Xid,
00771  * there are four possibilities for finding a running transaction:
00772  *
00773  * 1. The given Xid is a main transaction Id.  We will find this out cheaply
00774  * by looking at the PGXACT struct for each backend.
00775  *
00776  * 2. The given Xid is one of the cached subxact Xids in the PGPROC array.
00777  * We can find this out cheaply too.
00778  *
00779  * 3. In Hot Standby mode, we must search the KnownAssignedXids list to see
00780  * if the Xid is running on the master.
00781  *
00782  * 4. Search the SubTrans tree to find the Xid's topmost parent, and then see
00783  * if that is running according to PGXACT or KnownAssignedXids.  This is the
00784  * slowest way, but sadly it has to be done always if the others failed,
00785  * unless we see that the cached subxact sets are complete (none have
00786  * overflowed).
00787  *
00788  * ProcArrayLock has to be held while we do 1, 2, 3.  If we save the top Xids
00789  * while doing 1 and 3, we can release the ProcArrayLock while we do 4.
00790  * This buys back some concurrency (and we can't retrieve the main Xids from
00791  * PGXACT again anyway; see GetNewTransactionId).
00792  */
00793 bool
00794 TransactionIdIsInProgress(TransactionId xid)
00795 {
00796     static TransactionId *xids = NULL;
00797     int         nxids = 0;
00798     ProcArrayStruct *arrayP = procArray;
00799     TransactionId topxid;
00800     int         i,
00801                 j;
00802 
00803     /*
00804      * Don't bother checking a transaction older than RecentXmin; it could not
00805      * possibly still be running.  (Note: in particular, this guarantees that
00806      * we reject InvalidTransactionId, FrozenTransactionId, etc as not
00807      * running.)
00808      */
00809     if (TransactionIdPrecedes(xid, RecentXmin))
00810     {
00811         xc_by_recent_xmin_inc();
00812         return false;
00813     }
00814 
00815     /*
00816      * We may have just checked the status of this transaction, so if it is
00817      * already known to be completed, we can fall out without any access to
00818      * shared memory.
00819      */
00820     if (TransactionIdIsKnownCompleted(xid))
00821     {
00822         xc_by_known_xact_inc();
00823         return false;
00824     }
00825 
00826     /*
00827      * Also, we can handle our own transaction (and subtransactions) without
00828      * any access to shared memory.
00829      */
00830     if (TransactionIdIsCurrentTransactionId(xid))
00831     {
00832         xc_by_my_xact_inc();
00833         return true;
00834     }
00835 
00836     /*
00837      * If first time through, get workspace to remember main XIDs in. We
00838      * malloc it permanently to avoid repeated palloc/pfree overhead.
00839      */
00840     if (xids == NULL)
00841     {
00842         /*
00843          * In hot standby mode, reserve enough space to hold all xids in the
00844          * known-assigned list. If we later finish recovery, we no longer need
00845          * the bigger array, but we don't bother to shrink it.
00846          */
00847         int         maxxids = RecoveryInProgress() ? TOTAL_MAX_CACHED_SUBXIDS : arrayP->maxProcs;
00848 
00849         xids = (TransactionId *) malloc(maxxids * sizeof(TransactionId));
00850         if (xids == NULL)
00851             ereport(ERROR,
00852                     (errcode(ERRCODE_OUT_OF_MEMORY),
00853                      errmsg("out of memory")));
00854     }
00855 
00856     LWLockAcquire(ProcArrayLock, LW_SHARED);
00857 
00858     /*
00859      * Now that we have the lock, we can check latestCompletedXid; if the
00860      * target Xid is after that, it's surely still running.
00861      */
00862     if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, xid))
00863     {
00864         LWLockRelease(ProcArrayLock);
00865         xc_by_latest_xid_inc();
00866         return true;
00867     }
00868 
00869     /* No shortcuts, gotta grovel through the array */
00870     for (i = 0; i < arrayP->numProcs; i++)
00871     {
00872         int         pgprocno = arrayP->pgprocnos[i];
00873         volatile PGPROC *proc = &allProcs[pgprocno];
00874         volatile PGXACT *pgxact = &allPgXact[pgprocno];
00875         TransactionId pxid;
00876 
00877         /* Ignore my own proc --- dealt with it above */
00878         if (proc == MyProc)
00879             continue;
00880 
00881         /* Fetch xid just once - see GetNewTransactionId */
00882         pxid = pgxact->xid;
00883 
00884         if (!TransactionIdIsValid(pxid))
00885             continue;
00886 
00887         /*
00888          * Step 1: check the main Xid
00889          */
00890         if (TransactionIdEquals(pxid, xid))
00891         {
00892             LWLockRelease(ProcArrayLock);
00893             xc_by_main_xid_inc();
00894             return true;
00895         }
00896 
00897         /*
00898          * We can ignore main Xids that are younger than the target Xid, since
00899          * the target could not possibly be their child.
00900          */
00901         if (TransactionIdPrecedes(xid, pxid))
00902             continue;
00903 
00904         /*
00905          * Step 2: check the cached child-Xids arrays
00906          */
00907         for (j = pgxact->nxids - 1; j >= 0; j--)
00908         {
00909             /* Fetch xid just once - see GetNewTransactionId */
00910             TransactionId cxid = proc->subxids.xids[j];
00911 
00912             if (TransactionIdEquals(cxid, xid))
00913             {
00914                 LWLockRelease(ProcArrayLock);
00915                 xc_by_child_xid_inc();
00916                 return true;
00917             }
00918         }
00919 
00920         /*
00921          * Save the main Xid for step 4.  We only need to remember main Xids
00922          * that have uncached children.  (Note: there is no race condition
00923          * here because the overflowed flag cannot be cleared, only set, while
00924          * we hold ProcArrayLock.  So we can't miss an Xid that we need to
00925          * worry about.)
00926          */
00927         if (pgxact->overflowed)
00928             xids[nxids++] = pxid;
00929     }
00930 
00931     /*
00932      * Step 3: in hot standby mode, check the known-assigned-xids list.  XIDs
00933      * in the list must be treated as running.
00934      */
00935     if (RecoveryInProgress())
00936     {
00937         /* none of the PGXACT entries should have XIDs in hot standby mode */
00938         Assert(nxids == 0);
00939 
00940         if (KnownAssignedXidExists(xid))
00941         {
00942             LWLockRelease(ProcArrayLock);
00943             xc_by_known_assigned_inc();
00944             return true;
00945         }
00946 
00947         /*
00948          * If the KnownAssignedXids overflowed, we have to check pg_subtrans
00949          * too.  Fetch all xids from KnownAssignedXids that are lower than
00950          * xid, since if xid is a subtransaction its parent will always have a
00951          * lower value.  Note we will collect both main and subXIDs here, but
00952          * there's no help for it.
00953          */
00954         if (TransactionIdPrecedesOrEquals(xid, procArray->lastOverflowedXid))
00955             nxids = KnownAssignedXidsGet(xids, xid);
00956     }
00957 
00958     LWLockRelease(ProcArrayLock);
00959 
00960     /*
00961      * If none of the relevant caches overflowed, we know the Xid is not
00962      * running without even looking at pg_subtrans.
00963      */
00964     if (nxids == 0)
00965     {
00966         xc_no_overflow_inc();
00967         return false;
00968     }
00969 
00970     /*
00971      * Step 4: have to check pg_subtrans.
00972      *
00973      * At this point, we know it's either a subtransaction of one of the Xids
00974      * in xids[], or it's not running.  If it's an already-failed
00975      * subtransaction, we want to say "not running" even though its parent may
00976      * still be running.  So first, check pg_clog to see if it's been aborted.
00977      */
00978     xc_slow_answer_inc();
00979 
00980     if (TransactionIdDidAbort(xid))
00981         return false;
00982 
00983     /*
00984      * It isn't aborted, so check whether the transaction tree it belongs to
00985      * is still running (or, more precisely, whether it was running when we
00986      * held ProcArrayLock).
00987      */
00988     topxid = SubTransGetTopmostTransaction(xid);
00989     Assert(TransactionIdIsValid(topxid));
00990     if (!TransactionIdEquals(topxid, xid))
00991     {
00992         for (i = 0; i < nxids; i++)
00993         {
00994             if (TransactionIdEquals(xids[i], topxid))
00995                 return true;
00996         }
00997     }
00998 
00999     return false;
01000 }
01001 
01002 /*
01003  * TransactionIdIsActive -- is xid the top-level XID of an active backend?
01004  *
01005  * This differs from TransactionIdIsInProgress in that it ignores prepared
01006  * transactions, as well as transactions running on the master if we're in
01007  * hot standby.  Also, we ignore subtransactions since that's not needed
01008  * for current uses.
01009  */
01010 bool
01011 TransactionIdIsActive(TransactionId xid)
01012 {
01013     bool        result = false;
01014     ProcArrayStruct *arrayP = procArray;
01015     int         i;
01016 
01017     /*
01018      * Don't bother checking a transaction older than RecentXmin; it could not
01019      * possibly still be running.
01020      */
01021     if (TransactionIdPrecedes(xid, RecentXmin))
01022         return false;
01023 
01024     LWLockAcquire(ProcArrayLock, LW_SHARED);
01025 
01026     for (i = 0; i < arrayP->numProcs; i++)
01027     {
01028         int         pgprocno = arrayP->pgprocnos[i];
01029         volatile PGPROC *proc = &allProcs[pgprocno];
01030         volatile PGXACT *pgxact = &allPgXact[pgprocno];
01031         TransactionId pxid;
01032 
01033         /* Fetch xid just once - see GetNewTransactionId */
01034         pxid = pgxact->xid;
01035 
01036         if (!TransactionIdIsValid(pxid))
01037             continue;
01038 
01039         if (proc->pid == 0)
01040             continue;           /* ignore prepared transactions */
01041 
01042         if (TransactionIdEquals(pxid, xid))
01043         {
01044             result = true;
01045             break;
01046         }
01047     }
01048 
01049     LWLockRelease(ProcArrayLock);
01050 
01051     return result;
01052 }
01053 
01054 
01055 /*
01056  * GetOldestXmin -- returns oldest transaction that was running
01057  *                  when any current transaction was started.
01058  *
01059  * If allDbs is TRUE then all backends are considered; if allDbs is FALSE
01060  * then only backends running in my own database are considered.
01061  *
01062  * If ignoreVacuum is TRUE then backends with the PROC_IN_VACUUM flag set are
01063  * ignored.
01064  *
01065  * This is used by VACUUM to decide which deleted tuples must be preserved
01066  * in a table.  allDbs = TRUE is needed for shared relations, but allDbs =
01067  * FALSE is sufficient for non-shared relations, since only backends in my
01068  * own database could ever see the tuples in them.  Also, we can ignore
01069  * concurrently running lazy VACUUMs because (a) they must be working on other
01070  * tables, and (b) they don't need to do snapshot-based lookups.
01071  *
01072  * This is also used to determine where to truncate pg_subtrans.  allDbs
01073  * must be TRUE for that case, and ignoreVacuum FALSE.
01074  *
01075  * Note: we include all currently running xids in the set of considered xids.
01076  * This ensures that if a just-started xact has not yet set its snapshot,
01077  * when it does set the snapshot it cannot set xmin less than what we compute.
01078  * See notes in src/backend/access/transam/README.
01079  *
01080  * Note: despite the above, it's possible for the calculated value to move
01081  * backwards on repeated calls. The calculated value is conservative, so that
01082  * anything older is definitely not considered as running by anyone anymore,
01083  * but the exact value calculated depends on a number of things. For example,
01084  * if allDbs is FALSE and there are no transactions running in the current
01085  * database, GetOldestXmin() returns latestCompletedXid. If a transaction
01086  * begins after that, its xmin will include in-progress transactions in other
01087  * databases that started earlier, so another call will return a lower value.
01088  * Nonetheless it is safe to vacuum a table in the current database with the
01089  * first result.  There are also replication-related effects: a walsender
01090  * process can set its xmin based on transactions that are no longer running
01091  * in the master but are still being replayed on the standby, thus possibly
01092  * making the GetOldestXmin reading go backwards.  In this case there is a
01093  * possibility that we lose data that the standby would like to have, but
01094  * there is little we can do about that --- data is only protected if the
01095  * walsender runs continuously while queries are executed on the standby.
01096  * (The Hot Standby code deals with such cases by failing standby queries
01097  * that needed to access already-removed data, so there's no integrity bug.)
01098  * The return value is also adjusted with vacuum_defer_cleanup_age, so
01099  * increasing that setting on the fly is another easy way to make
01100  * GetOldestXmin() move backwards, with no consequences for data integrity.
01101  */
01102 TransactionId
01103 GetOldestXmin(bool allDbs, bool ignoreVacuum)
01104 {
01105     ProcArrayStruct *arrayP = procArray;
01106     TransactionId result;
01107     int         index;
01108 
01109     /* Cannot look for individual databases during recovery */
01110     Assert(allDbs || !RecoveryInProgress());
01111 
01112     LWLockAcquire(ProcArrayLock, LW_SHARED);
01113 
01114     /*
01115      * We initialize the MIN() calculation with latestCompletedXid + 1. This
01116      * is a lower bound for the XIDs that might appear in the ProcArray later,
01117      * and so protects us against overestimating the result due to future
01118      * additions.
01119      */
01120     result = ShmemVariableCache->latestCompletedXid;
01121     Assert(TransactionIdIsNormal(result));
01122     TransactionIdAdvance(result);
01123 
01124     for (index = 0; index < arrayP->numProcs; index++)
01125     {
01126         int         pgprocno = arrayP->pgprocnos[index];
01127         volatile PGPROC *proc = &allProcs[pgprocno];
01128         volatile PGXACT *pgxact = &allPgXact[pgprocno];
01129 
01130         if (ignoreVacuum && (pgxact->vacuumFlags & PROC_IN_VACUUM))
01131             continue;
01132 
01133         if (allDbs ||
01134             proc->databaseId == MyDatabaseId ||
01135             proc->databaseId == 0)      /* always include WalSender */
01136         {
01137             /* Fetch xid just once - see GetNewTransactionId */
01138             TransactionId xid = pgxact->xid;
01139 
01140             /* First consider the transaction's own Xid, if any */
01141             if (TransactionIdIsNormal(xid) &&
01142                 TransactionIdPrecedes(xid, result))
01143                 result = xid;
01144 
01145             /*
01146              * Also consider the transaction's Xmin, if set.
01147              *
01148              * We must check both Xid and Xmin because a transaction might
01149              * have an Xmin but not (yet) an Xid; conversely, if it has an
01150              * Xid, that could determine some not-yet-set Xmin.
01151              */
01152             xid = pgxact->xmin; /* Fetch just once */
01153             if (TransactionIdIsNormal(xid) &&
01154                 TransactionIdPrecedes(xid, result))
01155                 result = xid;
01156         }
01157     }
01158 
01159     if (RecoveryInProgress())
01160     {
01161         /*
01162          * Check to see whether KnownAssignedXids contains an xid value older
01163          * than the main procarray.
01164          */
01165         TransactionId kaxmin = KnownAssignedXidsGetOldestXmin();
01166 
01167         LWLockRelease(ProcArrayLock);
01168 
01169         if (TransactionIdIsNormal(kaxmin) &&
01170             TransactionIdPrecedes(kaxmin, result))
01171             result = kaxmin;
01172     }
01173     else
01174     {
01175         /*
01176          * No other information needed, so release the lock immediately.
01177          */
01178         LWLockRelease(ProcArrayLock);
01179 
01180         /*
01181          * Compute the cutoff XID by subtracting vacuum_defer_cleanup_age,
01182          * being careful not to generate a "permanent" XID.
01183          *
01184          * vacuum_defer_cleanup_age provides some additional "slop" for the
01185          * benefit of hot standby queries on slave servers.  This is quick and
01186          * dirty, and perhaps not all that useful unless the master has a
01187          * predictable transaction rate, but it offers some protection when
01188          * there's no walsender connection.  Note that we are assuming
01189          * vacuum_defer_cleanup_age isn't large enough to cause wraparound ---
01190          * so guc.c should limit it to no more than the xidStopLimit threshold
01191          * in varsup.c.  Also note that we intentionally don't apply
01192          * vacuum_defer_cleanup_age on standby servers.
01193          */
01194         result -= vacuum_defer_cleanup_age;
01195         if (!TransactionIdIsNormal(result))
01196             result = FirstNormalTransactionId;
01197     }
01198 
01199     return result;
01200 }
01201 
01202 /*
01203  * GetMaxSnapshotXidCount -- get max size for snapshot XID array
01204  *
01205  * We have to export this for use by snapmgr.c.
01206  */
01207 int
01208 GetMaxSnapshotXidCount(void)
01209 {
01210     return procArray->maxProcs;
01211 }
01212 
01213 /*
01214  * GetMaxSnapshotSubxidCount -- get max size for snapshot sub-XID array
01215  *
01216  * We have to export this for use by snapmgr.c.
01217  */
01218 int
01219 GetMaxSnapshotSubxidCount(void)
01220 {
01221     return TOTAL_MAX_CACHED_SUBXIDS;
01222 }
01223 
01224 /*
01225  * GetSnapshotData -- returns information about running transactions.
01226  *
01227  * The returned snapshot includes xmin (lowest still-running xact ID),
01228  * xmax (highest completed xact ID + 1), and a list of running xact IDs
01229  * in the range xmin <= xid < xmax.  It is used as follows:
01230  *      All xact IDs < xmin are considered finished.
01231  *      All xact IDs >= xmax are considered still running.
01232  *      For an xact ID xmin <= xid < xmax, consult list to see whether
01233  *      it is considered running or not.
01234  * This ensures that the set of transactions seen as "running" by the
01235  * current xact will not change after it takes the snapshot.
01236  *
01237  * All running top-level XIDs are included in the snapshot, except for lazy
01238  * VACUUM processes.  We also try to include running subtransaction XIDs,
01239  * but since PGPROC has only a limited cache area for subxact XIDs, full
01240  * information may not be available.  If we find any overflowed subxid arrays,
01241  * we have to mark the snapshot's subxid data as overflowed, and extra work
01242  * *may* need to be done to determine what's running (see XidInMVCCSnapshot()
01243  * in tqual.c).
01244  *
01245  * We also update the following backend-global variables:
01246  *      TransactionXmin: the oldest xmin of any snapshot in use in the
01247  *          current transaction (this is the same as MyPgXact->xmin).
01248  *      RecentXmin: the xmin computed for the most recent snapshot.  XIDs
01249  *          older than this are known not running any more.
01250  *      RecentGlobalXmin: the global xmin (oldest TransactionXmin across all
01251  *          running transactions, except those running LAZY VACUUM).  This is
01252  *          the same computation done by GetOldestXmin(true, true).
01253  *
01254  * Note: this function should probably not be called with an argument that's
01255  * not statically allocated (see xip allocation below).
01256  */
01257 Snapshot
01258 GetSnapshotData(Snapshot snapshot)
01259 {
01260     ProcArrayStruct *arrayP = procArray;
01261     TransactionId xmin;
01262     TransactionId xmax;
01263     TransactionId globalxmin;
01264     int         index;
01265     int         count = 0;
01266     int         subcount = 0;
01267     bool        suboverflowed = false;
01268 
01269     Assert(snapshot != NULL);
01270 
01271     /*
01272      * Allocating space for maxProcs xids is usually overkill; numProcs would
01273      * be sufficient.  But it seems better to do the malloc while not holding
01274      * the lock, so we can't look at numProcs.  Likewise, we allocate much
01275      * more subxip storage than is probably needed.
01276      *
01277      * This does open a possibility for avoiding repeated malloc/free: since
01278      * maxProcs does not change at runtime, we can simply reuse the previous
01279      * xip arrays if any.  (This relies on the fact that all callers pass
01280      * static SnapshotData structs.)
01281      */
01282     if (snapshot->xip == NULL)
01283     {
01284         /*
01285          * First call for this snapshot. Snapshot is same size whether or not
01286          * we are in recovery, see later comments.
01287          */
01288         snapshot->xip = (TransactionId *)
01289             malloc(GetMaxSnapshotXidCount() * sizeof(TransactionId));
01290         if (snapshot->xip == NULL)
01291             ereport(ERROR,
01292                     (errcode(ERRCODE_OUT_OF_MEMORY),
01293                      errmsg("out of memory")));
01294         Assert(snapshot->subxip == NULL);
01295         snapshot->subxip = (TransactionId *)
01296             malloc(GetMaxSnapshotSubxidCount() * sizeof(TransactionId));
01297         if (snapshot->subxip == NULL)
01298             ereport(ERROR,
01299                     (errcode(ERRCODE_OUT_OF_MEMORY),
01300                      errmsg("out of memory")));
01301     }
01302 
01303     /*
01304      * It is sufficient to get shared lock on ProcArrayLock, even if we are
01305      * going to set MyPgXact->xmin.
01306      */
01307     LWLockAcquire(ProcArrayLock, LW_SHARED);
01308 
01309     /* xmax is always latestCompletedXid + 1 */
01310     xmax = ShmemVariableCache->latestCompletedXid;
01311     Assert(TransactionIdIsNormal(xmax));
01312     TransactionIdAdvance(xmax);
01313 
01314     /* initialize xmin calculation with xmax */
01315     globalxmin = xmin = xmax;
01316 
01317     snapshot->takenDuringRecovery = RecoveryInProgress();
01318 
01319     if (!snapshot->takenDuringRecovery)
01320     {
01321         int        *pgprocnos = arrayP->pgprocnos;
01322         int         numProcs;
01323 
01324         /*
01325          * Spin over procArray checking xid, xmin, and subxids.  The goal is
01326          * to gather all active xids, find the lowest xmin, and try to record
01327          * subxids.
01328          */
01329         numProcs = arrayP->numProcs;
01330         for (index = 0; index < numProcs; index++)
01331         {
01332             int         pgprocno = pgprocnos[index];
01333             volatile PGXACT *pgxact = &allPgXact[pgprocno];
01334             TransactionId xid;
01335 
01336             /* Ignore procs running LAZY VACUUM */
01337             if (pgxact->vacuumFlags & PROC_IN_VACUUM)
01338                 continue;
01339 
01340             /* Update globalxmin to be the smallest valid xmin */
01341             xid = pgxact->xmin; /* fetch just once */
01342             if (TransactionIdIsNormal(xid) &&
01343                 NormalTransactionIdPrecedes(xid, globalxmin))
01344                 globalxmin = xid;
01345 
01346             /* Fetch xid just once - see GetNewTransactionId */
01347             xid = pgxact->xid;
01348 
01349             /*
01350              * If the transaction has no XID assigned, we can skip it; it
01351              * won't have sub-XIDs either.  If the XID is >= xmax, we can also
01352              * skip it; such transactions will be treated as running anyway
01353              * (and any sub-XIDs will also be >= xmax).
01354              */
01355             if (!TransactionIdIsNormal(xid)
01356                 || !NormalTransactionIdPrecedes(xid, xmax))
01357                 continue;
01358 
01359             /*
01360              * We don't include our own XIDs (if any) in the snapshot, but we
01361              * must include them in xmin.
01362              */
01363             if (NormalTransactionIdPrecedes(xid, xmin))
01364                 xmin = xid;
01365             if (pgxact == MyPgXact)
01366                 continue;
01367 
01368             /* Add XID to snapshot. */
01369             snapshot->xip[count++] = xid;
01370 
01371             /*
01372              * Save subtransaction XIDs if possible (if we've already
01373              * overflowed, there's no point).  Note that the subxact XIDs must
01374              * be later than their parent, so no need to check them against
01375              * xmin.  We could filter against xmax, but it seems better not to
01376              * do that much work while holding the ProcArrayLock.
01377              *
01378              * The other backend can add more subxids concurrently, but cannot
01379              * remove any.  Hence it's important to fetch nxids just once.
01380              * Should be safe to use memcpy, though.  (We needn't worry about
01381              * missing any xids added concurrently, because they must postdate
01382              * xmax.)
01383              *
01384              * Again, our own XIDs are not included in the snapshot.
01385              */
01386             if (!suboverflowed)
01387             {
01388                 if (pgxact->overflowed)
01389                     suboverflowed = true;
01390                 else
01391                 {
01392                     int         nxids = pgxact->nxids;
01393 
01394                     if (nxids > 0)
01395                     {
01396                         volatile PGPROC *proc = &allProcs[pgprocno];
01397 
01398                         memcpy(snapshot->subxip + subcount,
01399                                (void *) proc->subxids.xids,
01400                                nxids * sizeof(TransactionId));
01401                         subcount += nxids;
01402                     }
01403                 }
01404             }
01405         }
01406     }
01407     else
01408     {
01409         /*
01410          * We're in hot standby, so get XIDs from KnownAssignedXids.
01411          *
01412          * We store all xids directly into subxip[]. Here's why:
01413          *
01414          * In recovery we don't know which xids are top-level and which are
01415          * subxacts, a design choice that greatly simplifies xid processing.
01416          *
01417          * It seems like we would want to try to put xids into xip[] only, but
01418          * that is fairly small. We would either need to make that bigger or
01419          * to increase the rate at which we WAL-log xid assignment; neither is
01420          * an appealing choice.
01421          *
01422          * We could try to store xids into xip[] first and then into subxip[]
01423          * if there are too many xids. That only works if the snapshot doesn't
01424          * overflow because we do not search subxip[] in that case. A simpler
01425          * way is to just store all xids in the subxact array because this is
01426          * by far the bigger array. We just leave the xip array empty.
01427          *
01428          * Either way we need to change the way XidInMVCCSnapshot() works
01429          * depending upon when the snapshot was taken, or change normal
01430          * snapshot processing so it matches.
01431          *
01432          * Note: It is possible for recovery to end before we finish taking the
01433          * snapshot, and for newly assigned transaction ids to be added to the
01434          * ProcArray.  xmax cannot change while we hold ProcArrayLock, so those
01435          * newly added transaction ids would be filtered away, so we need not
01436          * be concerned about them.
01437          */
01438         subcount = KnownAssignedXidsGetAndSetXmin(snapshot->subxip, &xmin,
01439                                                   xmax);
01440 
01441         if (TransactionIdPrecedesOrEquals(xmin, procArray->lastOverflowedXid))
01442             suboverflowed = true;
01443     }
01444 
01445     if (!TransactionIdIsValid(MyPgXact->xmin))
01446         MyPgXact->xmin = TransactionXmin = xmin;
01447     LWLockRelease(ProcArrayLock);
01448 
01449     /*
01450      * Update globalxmin to include actual process xids.  This is a slightly
01451      * different way of computing it than GetOldestXmin uses, but should give
01452      * the same result.
01453      */
01454     if (TransactionIdPrecedes(xmin, globalxmin))
01455         globalxmin = xmin;
01456 
01457     /* Update global variables too */
01458     RecentGlobalXmin = globalxmin - vacuum_defer_cleanup_age;
01459     if (!TransactionIdIsNormal(RecentGlobalXmin))
01460         RecentGlobalXmin = FirstNormalTransactionId;
01461     RecentXmin = xmin;
01462 
01463     snapshot->xmin = xmin;
01464     snapshot->xmax = xmax;
01465     snapshot->xcnt = count;
01466     snapshot->subxcnt = subcount;
01467     snapshot->suboverflowed = suboverflowed;
01468 
01469     snapshot->curcid = GetCurrentCommandId(false);
01470 
01471     /*
01472      * This is a new snapshot, so set both refcounts are zero, and mark it as
01473      * not copied in persistent memory.
01474      */
01475     snapshot->active_count = 0;
01476     snapshot->regd_count = 0;
01477     snapshot->copied = false;
01478 
01479     return snapshot;
01480 }
01481 
01482 /*
01483  * ProcArrayInstallImportedXmin -- install imported xmin into MyPgXact->xmin
01484  *
01485  * This is called when installing a snapshot imported from another
01486  * transaction.  To ensure that OldestXmin doesn't go backwards, we must
01487  * check that the source transaction is still running, and we'd better do
01488  * that atomically with installing the new xmin.
01489  *
01490  * Returns TRUE if successful, FALSE if source xact is no longer running.
01491  */
01492 bool
01493 ProcArrayInstallImportedXmin(TransactionId xmin, TransactionId sourcexid)
01494 {
01495     bool        result = false;
01496     ProcArrayStruct *arrayP = procArray;
01497     int         index;
01498 
01499     Assert(TransactionIdIsNormal(xmin));
01500     if (!TransactionIdIsNormal(sourcexid))
01501         return false;
01502 
01503     /* Get lock so source xact can't end while we're doing this */
01504     LWLockAcquire(ProcArrayLock, LW_SHARED);
01505 
01506     for (index = 0; index < arrayP->numProcs; index++)
01507     {
01508         int         pgprocno = arrayP->pgprocnos[index];
01509         volatile PGPROC *proc = &allProcs[pgprocno];
01510         volatile PGXACT *pgxact = &allPgXact[pgprocno];
01511         TransactionId xid;
01512 
01513         /* Ignore procs running LAZY VACUUM */
01514         if (pgxact->vacuumFlags & PROC_IN_VACUUM)
01515             continue;
01516 
01517         xid = pgxact->xid;      /* fetch just once */
01518         if (xid != sourcexid)
01519             continue;
01520 
01521         /*
01522          * We check the transaction's database ID for paranoia's sake: if it's
01523          * in another DB then its xmin does not cover us.  Caller should have
01524          * detected this already, so we just treat any funny cases as
01525          * "transaction not found".
01526          */
01527         if (proc->databaseId != MyDatabaseId)
01528             continue;
01529 
01530         /*
01531          * Likewise, let's just make real sure its xmin does cover us.
01532          */
01533         xid = pgxact->xmin;     /* fetch just once */
01534         if (!TransactionIdIsNormal(xid) ||
01535             !TransactionIdPrecedesOrEquals(xid, xmin))
01536             continue;
01537 
01538         /*
01539          * We're good.  Install the new xmin.  As in GetSnapshotData, set
01540          * TransactionXmin too.  (Note that because snapmgr.c called
01541          * GetSnapshotData first, we'll be overwriting a valid xmin here, so
01542          * we don't check that.)
01543          */
01544         MyPgXact->xmin = TransactionXmin = xmin;
01545 
01546         result = true;
01547         break;
01548     }
01549 
01550     LWLockRelease(ProcArrayLock);
01551 
01552     return result;
01553 }
01554 
01555 /*
01556  * GetRunningTransactionData -- returns information about running transactions.
01557  *
01558  * Similar to GetSnapshotData but returns more information. We include
01559  * all PGXACTs with an assigned TransactionId, even VACUUM processes.
01560  *
01561  * We acquire XidGenLock, but the caller is responsible for releasing it.
01562  * This ensures that no new XIDs enter the proc array until the caller has
01563  * WAL-logged this snapshot, and releases the lock.
01564  *
01565  * The returned data structure is statically allocated; caller should not
01566  * modify it, and must not assume it is valid past the next call.
01567  *
01568  * This is never executed during recovery so there is no need to look at
01569  * KnownAssignedXids.
01570  *
01571  * We don't worry about updating other counters, we want to keep this as
01572  * simple as possible and leave GetSnapshotData() as the primary code for
01573  * that bookkeeping.
01574  *
01575  * Note that if any transaction has overflowed its cached subtransactions
01576  * then there is no real need include any subtransactions. That isn't a
01577  * common enough case to worry about optimising the size of the WAL record,
01578  * and we may wish to see that data for diagnostic purposes anyway.
01579  */
01580 RunningTransactions
01581 GetRunningTransactionData(void)
01582 {
01583     /* result workspace */
01584     static RunningTransactionsData CurrentRunningXactsData;
01585 
01586     ProcArrayStruct *arrayP = procArray;
01587     RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
01588     TransactionId latestCompletedXid;
01589     TransactionId oldestRunningXid;
01590     TransactionId *xids;
01591     int         index;
01592     int         count;
01593     int         subcount;
01594     bool        suboverflowed;
01595 
01596     Assert(!RecoveryInProgress());
01597 
01598     /*
01599      * Allocating space for maxProcs xids is usually overkill; numProcs would
01600      * be sufficient.  But it seems better to do the malloc while not holding
01601      * the lock, so we can't look at numProcs.  Likewise, we allocate much
01602      * more subxip storage than is probably needed.
01603      *
01604      * Should only be allocated in bgwriter, since only ever executed during
01605      * checkpoints.
01606      */
01607     if (CurrentRunningXacts->xids == NULL)
01608     {
01609         /*
01610          * First call
01611          */
01612         CurrentRunningXacts->xids = (TransactionId *)
01613             malloc(TOTAL_MAX_CACHED_SUBXIDS * sizeof(TransactionId));
01614         if (CurrentRunningXacts->xids == NULL)
01615             ereport(ERROR,
01616                     (errcode(ERRCODE_OUT_OF_MEMORY),
01617                      errmsg("out of memory")));
01618     }
01619 
01620     xids = CurrentRunningXacts->xids;
01621 
01622     count = subcount = 0;
01623     suboverflowed = false;
01624 
01625     /*
01626      * Ensure that no xids enter or leave the procarray while we obtain
01627      * snapshot.
01628      */
01629     LWLockAcquire(ProcArrayLock, LW_SHARED);
01630     LWLockAcquire(XidGenLock, LW_SHARED);
01631 
01632     latestCompletedXid = ShmemVariableCache->latestCompletedXid;
01633 
01634     oldestRunningXid = ShmemVariableCache->nextXid;
01635 
01636     /*
01637      * Spin over procArray collecting all xids
01638      */
01639     for (index = 0; index < arrayP->numProcs; index++)
01640     {
01641         int         pgprocno = arrayP->pgprocnos[index];
01642         volatile PGXACT *pgxact = &allPgXact[pgprocno];
01643         TransactionId xid;
01644 
01645         /* Fetch xid just once - see GetNewTransactionId */
01646         xid = pgxact->xid;
01647 
01648         /*
01649          * We don't need to store transactions that don't have a TransactionId
01650          * yet because they will not show as running on a standby server.
01651          */
01652         if (!TransactionIdIsValid(xid))
01653             continue;
01654 
01655         xids[count++] = xid;
01656 
01657         if (TransactionIdPrecedes(xid, oldestRunningXid))
01658             oldestRunningXid = xid;
01659 
01660         if (pgxact->overflowed)
01661             suboverflowed = true;
01662     }
01663 
01664     /*
01665      * Spin over procArray collecting all subxids, but only if there hasn't
01666      * been a suboverflow.
01667      */
01668     if (!suboverflowed)
01669     {
01670         for (index = 0; index < arrayP->numProcs; index++)
01671         {
01672             int         pgprocno = arrayP->pgprocnos[index];
01673             volatile PGPROC *proc = &allProcs[pgprocno];
01674             volatile PGXACT *pgxact = &allPgXact[pgprocno];
01675             int         nxids;
01676 
01677             /*
01678              * Save subtransaction XIDs. Other backends can't add or remove
01679              * entries while we're holding XidGenLock.
01680              */
01681             nxids = pgxact->nxids;
01682             if (nxids > 0)
01683             {
01684                 memcpy(&xids[count], (void *) proc->subxids.xids,
01685                        nxids * sizeof(TransactionId));
01686                 count += nxids;
01687                 subcount += nxids;
01688 
01689                 /*
01690                  * Top-level XID of a transaction is always less than any of
01691                  * its subxids, so we don't need to check if any of the subxids
01692                  * are smaller than oldestRunningXid
01693                  */
01694             }
01695         }
01696     }
01697 
01698     CurrentRunningXacts->xcnt = count - subcount;
01699     CurrentRunningXacts->subxcnt = subcount;
01700     CurrentRunningXacts->subxid_overflow = suboverflowed;
01701     CurrentRunningXacts->nextXid = ShmemVariableCache->nextXid;
01702     CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
01703     CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
01704 
01705     /* We don't release XidGenLock here, the caller is responsible for that */
01706     LWLockRelease(ProcArrayLock);
01707 
01708     Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
01709     Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
01710     Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid));
01711 
01712     return CurrentRunningXacts;
01713 }
01714 
01715 /*
01716  * GetOldestActiveTransactionId()
01717  *
01718  * Similar to GetSnapshotData but returns just oldestActiveXid. We include
01719  * all PGXACTs with an assigned TransactionId, even VACUUM processes.
01720  * We look at all databases, though there is no need to include WALSender
01721  * since this has no effect on hot standby conflicts.
01722  *
01723  * This is never executed during recovery so there is no need to look at
01724  * KnownAssignedXids.
01725  *
01726  * We don't worry about updating other counters, we want to keep this as
01727  * simple as possible and leave GetSnapshotData() as the primary code for
01728  * that bookkeeping.
01729  */
01730 TransactionId
01731 GetOldestActiveTransactionId(void)
01732 {
01733     ProcArrayStruct *arrayP = procArray;
01734     TransactionId oldestRunningXid;
01735     int         index;
01736 
01737     Assert(!RecoveryInProgress());
01738 
01739     LWLockAcquire(ProcArrayLock, LW_SHARED);
01740 
01741     /*
01742      * It's okay to read nextXid without acquiring XidGenLock because (1) we
01743      * assume TransactionIds can be read atomically and (2) we don't care if
01744      * we get a slightly stale value.  It can't be very stale anyway, because
01745      * the LWLockAcquire above will have done any necessary memory
01746      * interlocking.
01747      */
01748     oldestRunningXid = ShmemVariableCache->nextXid;
01749 
01750     /*
01751      * Spin over procArray collecting all xids and subxids.
01752      */
01753     for (index = 0; index < arrayP->numProcs; index++)
01754     {
01755         int         pgprocno = arrayP->pgprocnos[index];
01756         volatile PGXACT *pgxact = &allPgXact[pgprocno];
01757         TransactionId xid;
01758 
01759         /* Fetch xid just once - see GetNewTransactionId */
01760         xid = pgxact->xid;
01761 
01762         if (!TransactionIdIsNormal(xid))
01763             continue;
01764 
01765         if (TransactionIdPrecedes(xid, oldestRunningXid))
01766             oldestRunningXid = xid;
01767 
01768         /*
01769          * Top-level XID of a transaction is always less than any of its
01770          * subxids, so we don't need to check if any of the subxids are
01771          * smaller than oldestRunningXid
01772          */
01773     }
01774 
01775     LWLockRelease(ProcArrayLock);
01776 
01777     return oldestRunningXid;
01778 }
01779 
01780 /*
01781  * GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are
01782  * delaying checkpoint because they have critical actions in progress.
01783  *
01784  * Constructs an array of VXIDs of transactions that are currently in commit
01785  * critical sections, as shown by having delayChkpt set in their PGXACT.
01786  *
01787  * Returns a palloc'd array that should be freed by the caller.
01788  * *nvxids is the number of valid entries.
01789  *
01790  * Note that because backends set or clear delayChkpt without holding any lock,
01791  * the result is somewhat indeterminate, but we don't really care.  Even in
01792  * a multiprocessor with delayed writes to shared memory, it should be certain
01793  * that setting of delayChkpt will propagate to shared memory when the backend
01794  * takes a lock, so we cannot fail to see an virtual xact as delayChkpt if
01795  * it's already inserted its commit record.  Whether it takes a little while
01796  * for clearing of delayChkpt to propagate is unimportant for correctness.
01797  */
01798 VirtualTransactionId *
01799 GetVirtualXIDsDelayingChkpt(int *nvxids)
01800 {
01801     VirtualTransactionId *vxids;
01802     ProcArrayStruct *arrayP = procArray;
01803     int         count = 0;
01804     int         index;
01805 
01806     /* allocate what's certainly enough result space */
01807     vxids = (VirtualTransactionId *)
01808         palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
01809 
01810     LWLockAcquire(ProcArrayLock, LW_SHARED);
01811 
01812     for (index = 0; index < arrayP->numProcs; index++)
01813     {
01814         int     pgprocno = arrayP->pgprocnos[index];
01815         volatile PGPROC    *proc = &allProcs[pgprocno];
01816         volatile PGXACT    *pgxact = &allPgXact[pgprocno];
01817 
01818         if (pgxact->delayChkpt)
01819         {
01820             VirtualTransactionId vxid;
01821 
01822             GET_VXID_FROM_PGPROC(vxid, *proc);
01823             if (VirtualTransactionIdIsValid(vxid))
01824                 vxids[count++] = vxid;
01825         }
01826     }
01827 
01828     LWLockRelease(ProcArrayLock);
01829 
01830     *nvxids = count;
01831     return vxids;
01832 }
01833 
01834 /*
01835  * HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying?
01836  *
01837  * This is used with the results of GetVirtualXIDsDelayingChkpt to see if any
01838  * of the specified VXIDs are still in critical sections of code.
01839  *
01840  * Note: this is O(N^2) in the number of vxacts that are/were delaying, but
01841  * those numbers should be small enough for it not to be a problem.
01842  */
01843 bool
01844 HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
01845 {
01846     bool        result = false;
01847     ProcArrayStruct *arrayP = procArray;
01848     int         index;
01849 
01850     LWLockAcquire(ProcArrayLock, LW_SHARED);
01851 
01852     while (VirtualTransactionIdIsValid(*vxids))
01853     {
01854         for (index = 0; index < arrayP->numProcs; index++)
01855         {
01856             int     pgprocno = arrayP->pgprocnos[index];
01857             volatile PGPROC    *proc = &allProcs[pgprocno];
01858             volatile PGXACT    *pgxact = &allPgXact[pgprocno];
01859             VirtualTransactionId vxid;
01860 
01861             GET_VXID_FROM_PGPROC(vxid, *proc);
01862             if (VirtualTransactionIdIsValid(vxid))
01863             {
01864                 if (VirtualTransactionIdEquals(vxid, *vxids) &&
01865                     pgxact->delayChkpt)
01866                 {
01867                     result = true;
01868                     break;
01869                 }
01870             }
01871         }
01872 
01873         if (result)
01874             break;
01875 
01876         /* The virtual transaction is gone now, wait for the next one */
01877         vxids++;
01878     }
01879 
01880     LWLockRelease(ProcArrayLock);
01881 
01882     return result;
01883 }
01884 
01885 /*
01886  * BackendPidGetProc -- get a backend's PGPROC given its PID
01887  *
01888  * Returns NULL if not found.  Note that it is up to the caller to be
01889  * sure that the question remains meaningful for long enough for the
01890  * answer to be used ...
01891  */
01892 PGPROC *
01893 BackendPidGetProc(int pid)
01894 {
01895     PGPROC     *result = NULL;
01896     ProcArrayStruct *arrayP = procArray;
01897     int         index;
01898 
01899     if (pid == 0)               /* never match dummy PGPROCs */
01900         return NULL;
01901 
01902     LWLockAcquire(ProcArrayLock, LW_SHARED);
01903 
01904     for (index = 0; index < arrayP->numProcs; index++)
01905     {
01906         PGPROC     *proc = &allProcs[arrayP->pgprocnos[index]];
01907 
01908         if (proc->pid == pid)
01909         {
01910             result = proc;
01911             break;
01912         }
01913     }
01914 
01915     LWLockRelease(ProcArrayLock);
01916 
01917     return result;
01918 }
01919 
01920 /*
01921  * BackendXidGetPid -- get a backend's pid given its XID
01922  *
01923  * Returns 0 if not found or it's a prepared transaction.  Note that
01924  * it is up to the caller to be sure that the question remains
01925  * meaningful for long enough for the answer to be used ...
01926  *
01927  * Only main transaction Ids are considered.  This function is mainly
01928  * useful for determining what backend owns a lock.
01929  *
01930  * Beware that not every xact has an XID assigned.  However, as long as you
01931  * only call this using an XID found on disk, you're safe.
01932  */
01933 int
01934 BackendXidGetPid(TransactionId xid)
01935 {
01936     int         result = 0;
01937     ProcArrayStruct *arrayP = procArray;
01938     int         index;
01939 
01940     if (xid == InvalidTransactionId)    /* never match invalid xid */
01941         return 0;
01942 
01943     LWLockAcquire(ProcArrayLock, LW_SHARED);
01944 
01945     for (index = 0; index < arrayP->numProcs; index++)
01946     {
01947         int         pgprocno = arrayP->pgprocnos[index];
01948         volatile PGPROC *proc = &allProcs[pgprocno];
01949         volatile PGXACT *pgxact = &allPgXact[pgprocno];
01950 
01951         if (pgxact->xid == xid)
01952         {
01953             result = proc->pid;
01954             break;
01955         }
01956     }
01957 
01958     LWLockRelease(ProcArrayLock);
01959 
01960     return result;
01961 }
01962 
01963 /*
01964  * IsBackendPid -- is a given pid a running backend
01965  *
01966  * This is not called by the backend, but is called by external modules.
01967  */
01968 bool
01969 IsBackendPid(int pid)
01970 {
01971     return (BackendPidGetProc(pid) != NULL);
01972 }
01973 
01974 
01975 /*
01976  * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs.
01977  *
01978  * The array is palloc'd. The number of valid entries is returned into *nvxids.
01979  *
01980  * The arguments allow filtering the set of VXIDs returned.  Our own process
01981  * is always skipped.  In addition:
01982  *  If limitXmin is not InvalidTransactionId, skip processes with
01983  *      xmin > limitXmin.
01984  *  If excludeXmin0 is true, skip processes with xmin = 0.
01985  *  If allDbs is false, skip processes attached to other databases.
01986  *  If excludeVacuum isn't zero, skip processes for which
01987  *      (vacuumFlags & excludeVacuum) is not zero.
01988  *
01989  * Note: the purpose of the limitXmin and excludeXmin0 parameters is to
01990  * allow skipping backends whose oldest live snapshot is no older than
01991  * some snapshot we have.  Since we examine the procarray with only shared
01992  * lock, there are race conditions: a backend could set its xmin just after
01993  * we look.  Indeed, on multiprocessors with weak memory ordering, the
01994  * other backend could have set its xmin *before* we look.  We know however
01995  * that such a backend must have held shared ProcArrayLock overlapping our
01996  * own hold of ProcArrayLock, else we would see its xmin update.  Therefore,
01997  * any snapshot the other backend is taking concurrently with our scan cannot
01998  * consider any transactions as still running that we think are committed
01999  * (since backends must hold ProcArrayLock exclusive to commit).
02000  */
02001 VirtualTransactionId *
02002 GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
02003                       bool allDbs, int excludeVacuum,
02004                       int *nvxids)
02005 {
02006     VirtualTransactionId *vxids;
02007     ProcArrayStruct *arrayP = procArray;
02008     int         count = 0;
02009     int         index;
02010 
02011     /* allocate what's certainly enough result space */
02012     vxids = (VirtualTransactionId *)
02013         palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
02014 
02015     LWLockAcquire(ProcArrayLock, LW_SHARED);
02016 
02017     for (index = 0; index < arrayP->numProcs; index++)
02018     {
02019         int         pgprocno = arrayP->pgprocnos[index];
02020         volatile PGPROC *proc = &allProcs[pgprocno];
02021         volatile PGXACT *pgxact = &allPgXact[pgprocno];
02022 
02023         if (proc == MyProc)
02024             continue;
02025 
02026         if (excludeVacuum & pgxact->vacuumFlags)
02027             continue;
02028 
02029         if (allDbs || proc->databaseId == MyDatabaseId)
02030         {
02031             /* Fetch xmin just once - might change on us */
02032             TransactionId pxmin = pgxact->xmin;
02033 
02034             if (excludeXmin0 && !TransactionIdIsValid(pxmin))
02035                 continue;
02036 
02037             /*
02038              * InvalidTransactionId precedes all other XIDs, so a proc that
02039              * hasn't set xmin yet will not be rejected by this test.
02040              */
02041             if (!TransactionIdIsValid(limitXmin) ||
02042                 TransactionIdPrecedesOrEquals(pxmin, limitXmin))
02043             {
02044                 VirtualTransactionId vxid;
02045 
02046                 GET_VXID_FROM_PGPROC(vxid, *proc);
02047                 if (VirtualTransactionIdIsValid(vxid))
02048                     vxids[count++] = vxid;
02049             }
02050         }
02051     }
02052 
02053     LWLockRelease(ProcArrayLock);
02054 
02055     *nvxids = count;
02056     return vxids;
02057 }
02058 
02059 /*
02060  * GetConflictingVirtualXIDs -- returns an array of currently active VXIDs.
02061  *
02062  * Usage is limited to conflict resolution during recovery on standby servers.
02063  * limitXmin is supplied as either latestRemovedXid, or InvalidTransactionId
02064  * in cases where we cannot accurately determine a value for latestRemovedXid.
02065  *
02066  * If limitXmin is InvalidTransactionId then we want to kill everybody,
02067  * so we're not worried if they have a snapshot or not, nor does it really
02068  * matter what type of lock we hold.
02069  *
02070  * All callers that are checking xmins always now supply a valid and useful
02071  * value for limitXmin. The limitXmin is always lower than the lowest
02072  * numbered KnownAssignedXid that is not already a FATAL error. This is
02073  * because we only care about cleanup records that are cleaning up tuple
02074  * versions from committed transactions. In that case they will only occur
02075  * at the point where the record is less than the lowest running xid. That
02076  * allows us to say that if any backend takes a snapshot concurrently with
02077  * us then the conflict assessment made here would never include the snapshot
02078  * that is being derived. So we take LW_SHARED on the ProcArray and allow
02079  * concurrent snapshots when limitXmin is valid. We might think about adding
02080  *   Assert(limitXmin < lowest(KnownAssignedXids))
02081  * but that would not be true in the case of FATAL errors lagging in array,
02082  * but we already know those are bogus anyway, so we skip that test.
02083  *
02084  * If dbOid is valid we skip backends attached to other databases.
02085  *
02086  * Be careful to *not* pfree the result from this function. We reuse
02087  * this array sufficiently often that we use malloc for the result.
02088  */
02089 VirtualTransactionId *
02090 GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
02091 {
02092     static VirtualTransactionId *vxids;
02093     ProcArrayStruct *arrayP = procArray;
02094     int         count = 0;
02095     int         index;
02096 
02097     /*
02098      * If first time through, get workspace to remember main XIDs in. We
02099      * malloc it permanently to avoid repeated palloc/pfree overhead. Allow
02100      * result space, remembering room for a terminator.
02101      */
02102     if (vxids == NULL)
02103     {
02104         vxids = (VirtualTransactionId *)
02105             malloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1));
02106         if (vxids == NULL)
02107             ereport(ERROR,
02108                     (errcode(ERRCODE_OUT_OF_MEMORY),
02109                      errmsg("out of memory")));
02110     }
02111 
02112     LWLockAcquire(ProcArrayLock, LW_SHARED);
02113 
02114     for (index = 0; index < arrayP->numProcs; index++)
02115     {
02116         int         pgprocno = arrayP->pgprocnos[index];
02117         volatile PGPROC *proc = &allProcs[pgprocno];
02118         volatile PGXACT *pgxact = &allPgXact[pgprocno];
02119 
02120         /* Exclude prepared transactions */
02121         if (proc->pid == 0)
02122             continue;
02123 
02124         if (!OidIsValid(dbOid) ||
02125             proc->databaseId == dbOid)
02126         {
02127             /* Fetch xmin just once - can't change on us, but good coding */
02128             TransactionId pxmin = pgxact->xmin;
02129 
02130             /*
02131              * We ignore an invalid pxmin because this means that backend has
02132              * no snapshot and cannot get another one while we hold exclusive
02133              * lock.
02134              */
02135             if (!TransactionIdIsValid(limitXmin) ||
02136                 (TransactionIdIsValid(pxmin) && !TransactionIdFollows(pxmin, limitXmin)))
02137             {
02138                 VirtualTransactionId vxid;
02139 
02140                 GET_VXID_FROM_PGPROC(vxid, *proc);
02141                 if (VirtualTransactionIdIsValid(vxid))
02142                     vxids[count++] = vxid;
02143             }
02144         }
02145     }
02146 
02147     LWLockRelease(ProcArrayLock);
02148 
02149     /* add the terminator */
02150     vxids[count].backendId = InvalidBackendId;
02151     vxids[count].localTransactionId = InvalidLocalTransactionId;
02152 
02153     return vxids;
02154 }
02155 
02156 /*
02157  * CancelVirtualTransaction - used in recovery conflict processing
02158  *
02159  * Returns pid of the process signaled, or 0 if not found.
02160  */
02161 pid_t
02162 CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
02163 {
02164     ProcArrayStruct *arrayP = procArray;
02165     int         index;
02166     pid_t       pid = 0;
02167 
02168     LWLockAcquire(ProcArrayLock, LW_SHARED);
02169 
02170     for (index = 0; index < arrayP->numProcs; index++)
02171     {
02172         int         pgprocno = arrayP->pgprocnos[index];
02173         volatile PGPROC *proc = &allProcs[pgprocno];
02174         VirtualTransactionId procvxid;
02175 
02176         GET_VXID_FROM_PGPROC(procvxid, *proc);
02177 
02178         if (procvxid.backendId == vxid.backendId &&
02179             procvxid.localTransactionId == vxid.localTransactionId)
02180         {
02181             proc->recoveryConflictPending = true;
02182             pid = proc->pid;
02183             if (pid != 0)
02184             {
02185                 /*
02186                  * Kill the pid if it's still here. If not, that's what we
02187                  * wanted so ignore any errors.
02188                  */
02189                 (void) SendProcSignal(pid, sigmode, vxid.backendId);
02190             }
02191             break;
02192         }
02193     }
02194 
02195     LWLockRelease(ProcArrayLock);
02196 
02197     return pid;
02198 }
02199 
02200 /*
02201  * MinimumActiveBackends --- count backends (other than myself) that are
02202  *      in active transactions.  Return true if the count exceeds the
02203  *      minimum threshold passed.  This is used as a heuristic to decide if
02204  *      a pre-XLOG-flush delay is worthwhile during commit.
02205  *
02206  * Do not count backends that are blocked waiting for locks, since they are
02207  * not going to get to run until someone else commits.
02208  */
02209 bool
02210 MinimumActiveBackends(int min)
02211 {
02212     ProcArrayStruct *arrayP = procArray;
02213     int         count = 0;
02214     int         index;
02215 
02216     /* Quick short-circuit if no minimum is specified */
02217     if (min == 0)
02218         return true;
02219 
02220     /*
02221      * Note: for speed, we don't acquire ProcArrayLock.  This is a little bit
02222      * bogus, but since we are only testing fields for zero or nonzero, it
02223      * should be OK.  The result is only used for heuristic purposes anyway...
02224      */
02225     for (index = 0; index < arrayP->numProcs; index++)
02226     {
02227         int         pgprocno = arrayP->pgprocnos[index];
02228         volatile PGPROC *proc = &allProcs[pgprocno];
02229         volatile PGXACT *pgxact = &allPgXact[pgprocno];
02230 
02231         /*
02232          * Since we're not holding a lock, need to check that the pointer is
02233          * valid. Someone holding the lock could have incremented numProcs
02234          * already, but not yet inserted a valid pointer to the array.
02235          *
02236          * If someone just decremented numProcs, 'proc' could also point to a
02237          * PGPROC entry that's no longer in the array. It still points to a
02238          * PGPROC struct, though, because freed PGPROC entries just go to the
02239          * free list and are recycled. Its contents are nonsense in that case,
02240          * but that's acceptable for this function.
02241          */
02242         if (proc == NULL)
02243             continue;
02244 
02245         if (proc == MyProc)
02246             continue;           /* do not count myself */
02247         if (pgxact->xid == InvalidTransactionId)
02248             continue;           /* do not count if no XID assigned */
02249         if (proc->pid == 0)
02250             continue;           /* do not count prepared xacts */
02251         if (proc->waitLock != NULL)
02252             continue;           /* do not count if blocked on a lock */
02253         count++;
02254         if (count >= min)
02255             break;
02256     }
02257 
02258     return count >= min;
02259 }
02260 
02261 /*
02262  * CountDBBackends --- count backends that are using specified database
02263  */
02264 int
02265 CountDBBackends(Oid databaseid)
02266 {
02267     ProcArrayStruct *arrayP = procArray;
02268     int         count = 0;
02269     int         index;
02270 
02271     LWLockAcquire(ProcArrayLock, LW_SHARED);
02272 
02273     for (index = 0; index < arrayP->numProcs; index++)
02274     {
02275         int         pgprocno = arrayP->pgprocnos[index];
02276         volatile PGPROC *proc = &allProcs[pgprocno];
02277 
02278         if (proc->pid == 0)
02279             continue;           /* do not count prepared xacts */
02280         if (!OidIsValid(databaseid) ||
02281             proc->databaseId == databaseid)
02282             count++;
02283     }
02284 
02285     LWLockRelease(ProcArrayLock);
02286 
02287     return count;
02288 }
02289 
02290 /*
02291  * CancelDBBackends --- cancel backends that are using specified database
02292  */
02293 void
02294 CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
02295 {
02296     ProcArrayStruct *arrayP = procArray;
02297     int         index;
02298     pid_t       pid = 0;
02299 
02300     /* tell all backends to die */
02301     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
02302 
02303     for (index = 0; index < arrayP->numProcs; index++)
02304     {
02305         int         pgprocno = arrayP->pgprocnos[index];
02306         volatile PGPROC *proc = &allProcs[pgprocno];
02307 
02308         if (databaseid == InvalidOid || proc->databaseId == databaseid)
02309         {
02310             VirtualTransactionId procvxid;
02311 
02312             GET_VXID_FROM_PGPROC(procvxid, *proc);
02313 
02314             proc->recoveryConflictPending = conflictPending;
02315             pid = proc->pid;
02316             if (pid != 0)
02317             {
02318                 /*
02319                  * Kill the pid if it's still here. If not, that's what we
02320                  * wanted so ignore any errors.
02321                  */
02322                 (void) SendProcSignal(pid, sigmode, procvxid.backendId);
02323             }
02324         }
02325     }
02326 
02327     LWLockRelease(ProcArrayLock);
02328 }
02329 
02330 /*
02331  * CountUserBackends --- count backends that are used by specified user
02332  */
02333 int
02334 CountUserBackends(Oid roleid)
02335 {
02336     ProcArrayStruct *arrayP = procArray;
02337     int         count = 0;
02338     int         index;
02339 
02340     LWLockAcquire(ProcArrayLock, LW_SHARED);
02341 
02342     for (index = 0; index < arrayP->numProcs; index++)
02343     {
02344         int         pgprocno = arrayP->pgprocnos[index];
02345         volatile PGPROC *proc = &allProcs[pgprocno];
02346 
02347         if (proc->pid == 0)
02348             continue;           /* do not count prepared xacts */
02349         if (proc->roleId == roleid)
02350             count++;
02351     }
02352 
02353     LWLockRelease(ProcArrayLock);
02354 
02355     return count;
02356 }
02357 
02358 /*
02359  * CountOtherDBBackends -- check for other backends running in the given DB
02360  *
02361  * If there are other backends in the DB, we will wait a maximum of 5 seconds
02362  * for them to exit.  Autovacuum backends are encouraged to exit early by
02363  * sending them SIGTERM, but normal user backends are just waited for.
02364  *
02365  * The current backend is always ignored; it is caller's responsibility to
02366  * check whether the current backend uses the given DB, if it's important.
02367  *
02368  * Returns TRUE if there are (still) other backends in the DB, FALSE if not.
02369  * Also, *nbackends and *nprepared are set to the number of other backends
02370  * and prepared transactions in the DB, respectively.
02371  *
02372  * This function is used to interlock DROP DATABASE and related commands
02373  * against there being any active backends in the target DB --- dropping the
02374  * DB while active backends remain would be a Bad Thing.  Note that we cannot
02375  * detect here the possibility of a newly-started backend that is trying to
02376  * connect to the doomed database, so additional interlocking is needed during
02377  * backend startup.  The caller should normally hold an exclusive lock on the
02378  * target DB before calling this, which is one reason we mustn't wait
02379  * indefinitely.
02380  */
02381 bool
02382 CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
02383 {
02384     ProcArrayStruct *arrayP = procArray;
02385 
02386 #define MAXAUTOVACPIDS  10      /* max autovacs to SIGTERM per iteration */
02387     int         autovac_pids[MAXAUTOVACPIDS];
02388     int         tries;
02389 
02390     /* 50 tries with 100ms sleep between tries makes 5 sec total wait */
02391     for (tries = 0; tries < 50; tries++)
02392     {
02393         int         nautovacs = 0;
02394         bool        found = false;
02395         int         index;
02396 
02397         CHECK_FOR_INTERRUPTS();
02398 
02399         *nbackends = *nprepared = 0;
02400 
02401         LWLockAcquire(ProcArrayLock, LW_SHARED);
02402 
02403         for (index = 0; index < arrayP->numProcs; index++)
02404         {
02405             int         pgprocno = arrayP->pgprocnos[index];
02406             volatile PGPROC *proc = &allProcs[pgprocno];
02407             volatile PGXACT *pgxact = &allPgXact[pgprocno];
02408 
02409             if (proc->databaseId != databaseId)
02410                 continue;
02411             if (proc == MyProc)
02412                 continue;
02413 
02414             found = true;
02415 
02416             if (proc->pid == 0)
02417                 (*nprepared)++;
02418             else
02419             {
02420                 (*nbackends)++;
02421                 if ((pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) &&
02422                     nautovacs < MAXAUTOVACPIDS)
02423                     autovac_pids[nautovacs++] = proc->pid;
02424             }
02425         }
02426 
02427         LWLockRelease(ProcArrayLock);
02428 
02429         if (!found)
02430             return false;       /* no conflicting backends, so done */
02431 
02432         /*
02433          * Send SIGTERM to any conflicting autovacuums before sleeping. We
02434          * postpone this step until after the loop because we don't want to
02435          * hold ProcArrayLock while issuing kill(). We have no idea what might
02436          * block kill() inside the kernel...
02437          */
02438         for (index = 0; index < nautovacs; index++)
02439             (void) kill(autovac_pids[index], SIGTERM);  /* ignore any error */
02440 
02441         /* sleep, then try again */
02442         pg_usleep(100 * 1000L); /* 100ms */
02443     }
02444 
02445     return true;                /* timed out, still conflicts */
02446 }
02447 
02448 
02449 #define XidCacheRemove(i) \
02450     do { \
02451         MyProc->subxids.xids[i] = MyProc->subxids.xids[MyPgXact->nxids - 1]; \
02452         MyPgXact->nxids--; \
02453     } while (0)
02454 
02455 /*
02456  * XidCacheRemoveRunningXids
02457  *
02458  * Remove a bunch of TransactionIds from the list of known-running
02459  * subtransactions for my backend.  Both the specified xid and those in
02460  * the xids[] array (of length nxids) are removed from the subxids cache.
02461  * latestXid must be the latest XID among the group.
02462  */
02463 void
02464 XidCacheRemoveRunningXids(TransactionId xid,
02465                           int nxids, const TransactionId *xids,
02466                           TransactionId latestXid)
02467 {
02468     int         i,
02469                 j;
02470 
02471     Assert(TransactionIdIsValid(xid));
02472 
02473     /*
02474      * We must hold ProcArrayLock exclusively in order to remove transactions
02475      * from the PGPROC array.  (See src/backend/access/transam/README.)  It's
02476      * possible this could be relaxed since we know this routine is only used
02477      * to abort subtransactions, but pending closer analysis we'd best be
02478      * conservative.
02479      */
02480     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
02481 
02482     /*
02483      * Under normal circumstances xid and xids[] will be in increasing order,
02484      * as will be the entries in subxids.  Scan backwards to avoid O(N^2)
02485      * behavior when removing a lot of xids.
02486      */
02487     for (i = nxids - 1; i >= 0; i--)
02488     {
02489         TransactionId anxid = xids[i];
02490 
02491         for (j = MyPgXact->nxids - 1; j >= 0; j--)
02492         {
02493             if (TransactionIdEquals(MyProc->subxids.xids[j], anxid))
02494             {
02495                 XidCacheRemove(j);
02496                 break;
02497             }
02498         }
02499 
02500         /*
02501          * Ordinarily we should have found it, unless the cache has
02502          * overflowed. However it's also possible for this routine to be
02503          * invoked multiple times for the same subtransaction, in case of an
02504          * error during AbortSubTransaction.  So instead of Assert, emit a
02505          * debug warning.
02506          */
02507         if (j < 0 && !MyPgXact->overflowed)
02508             elog(WARNING, "did not find subXID %u in MyProc", anxid);
02509     }
02510 
02511     for (j = MyPgXact->nxids - 1; j >= 0; j--)
02512     {
02513         if (TransactionIdEquals(MyProc->subxids.xids[j], xid))
02514         {
02515             XidCacheRemove(j);
02516             break;
02517         }
02518     }
02519     /* Ordinarily we should have found it, unless the cache has overflowed */
02520     if (j < 0 && !MyPgXact->overflowed)
02521         elog(WARNING, "did not find subXID %u in MyProc", xid);
02522 
02523     /* Also advance global latestCompletedXid while holding the lock */
02524     if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
02525                               latestXid))
02526         ShmemVariableCache->latestCompletedXid = latestXid;
02527 
02528     LWLockRelease(ProcArrayLock);
02529 }
02530 
02531 #ifdef XIDCACHE_DEBUG
02532 
02533 /*
02534  * Print stats about effectiveness of XID cache
02535  */
02536 static void
02537 DisplayXidCache(void)
02538 {
02539     fprintf(stderr,
02540             "XidCache: xmin: %ld, known: %ld, myxact: %ld, latest: %ld, mainxid: %ld, childxid: %ld, knownassigned: %ld, nooflo: %ld, slow: %ld\n",
02541             xc_by_recent_xmin,
02542             xc_by_known_xact,
02543             xc_by_my_xact,
02544             xc_by_latest_xid,
02545             xc_by_main_xid,
02546             xc_by_child_xid,
02547             xc_by_known_assigned,
02548             xc_no_overflow,
02549             xc_slow_answer);
02550 }
02551 #endif   /* XIDCACHE_DEBUG */
02552 
02553 
02554 /* ----------------------------------------------
02555  *      KnownAssignedTransactions sub-module
02556  * ----------------------------------------------
02557  */
02558 
02559 /*
02560  * In Hot Standby mode, we maintain a list of transactions that are (or were)
02561  * running in the master at the current point in WAL.  These XIDs must be
02562  * treated as running by standby transactions, even though they are not in
02563  * the standby server's PGXACT array.
02564  *
02565  * We record all XIDs that we know have been assigned.  That includes all the
02566  * XIDs seen in WAL records, plus all unobserved XIDs that we can deduce have
02567  * been assigned.  We can deduce the existence of unobserved XIDs because we
02568  * know XIDs are assigned in sequence, with no gaps.  The KnownAssignedXids
02569  * list expands as new XIDs are observed or inferred, and contracts when
02570  * transaction completion records arrive.
02571  *
02572  * During hot standby we do not fret too much about the distinction between
02573  * top-level XIDs and subtransaction XIDs. We store both together in the
02574  * KnownAssignedXids list.  In backends, this is copied into snapshots in
02575  * GetSnapshotData(), taking advantage of the fact that XidInMVCCSnapshot()
02576  * doesn't care about the distinction either.  Subtransaction XIDs are
02577  * effectively treated as top-level XIDs and in the typical case pg_subtrans
02578  * links are *not* maintained (which does not affect visibility).
02579  *
02580  * We have room in KnownAssignedXids and in snapshots to hold maxProcs *
02581  * (1 + PGPROC_MAX_CACHED_SUBXIDS) XIDs, so every master transaction must
02582  * report its subtransaction XIDs in a WAL XLOG_XACT_ASSIGNMENT record at
02583  * least every PGPROC_MAX_CACHED_SUBXIDS.  When we receive one of these
02584  * records, we mark the subXIDs as children of the top XID in pg_subtrans,
02585  * and then remove them from KnownAssignedXids.  This prevents overflow of
02586  * KnownAssignedXids and snapshots, at the cost that status checks for these
02587  * subXIDs will take a slower path through TransactionIdIsInProgress().
02588  * This means that KnownAssignedXids is not necessarily complete for subXIDs,
02589  * though it should be complete for top-level XIDs; this is the same situation
02590  * that holds with respect to the PGPROC entries in normal running.
02591  *
02592  * When we throw away subXIDs from KnownAssignedXids, we need to keep track of
02593  * that, similarly to tracking overflow of a PGPROC's subxids array.  We do
02594  * that by remembering the lastOverflowedXID, ie the last thrown-away subXID.
02595  * As long as that is within the range of interesting XIDs, we have to assume
02596  * that subXIDs are missing from snapshots.  (Note that subXID overflow occurs
02597  * on primary when 65th subXID arrives, whereas on standby it occurs when 64th
02598  * subXID arrives - that is not an error.)
02599  *
02600  * Should a backend on primary somehow disappear before it can write an abort
02601  * record, then we just leave those XIDs in KnownAssignedXids. They actually
02602  * aborted but we think they were running; the distinction is irrelevant
02603  * because either way any changes done by the transaction are not visible to
02604  * backends in the standby.  We prune KnownAssignedXids when
02605  * XLOG_RUNNING_XACTS arrives, to forestall possible overflow of the
02606  * array due to such dead XIDs.
02607  */
02608 
02609 /*
02610  * RecordKnownAssignedTransactionIds
02611  *      Record the given XID in KnownAssignedXids, as well as any preceding
02612  *      unobserved XIDs.
02613  *
02614  * RecordKnownAssignedTransactionIds() should be run for *every* WAL record
02615  * associated with a transaction. Must be called for each record after we
02616  * have executed StartupCLOG() et al, since we must ExtendCLOG() etc..
02617  *
02618  * Called during recovery in analogy with and in place of GetNewTransactionId()
02619  */
02620 void
02621 RecordKnownAssignedTransactionIds(TransactionId xid)
02622 {
02623     Assert(standbyState >= STANDBY_INITIALIZED);
02624     Assert(TransactionIdIsValid(xid));
02625 
02626     elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u",
02627          xid, latestObservedXid);
02628 
02629     /*
02630      * If the KnownAssignedXids machinery isn't up yet, do nothing.
02631      */
02632     if (standbyState <= STANDBY_INITIALIZED)
02633         return;
02634 
02635     Assert(TransactionIdIsValid(latestObservedXid));
02636 
02637     /*
02638      * When a newly observed xid arrives, it is frequently the case that it is
02639      * *not* the next xid in sequence. When this occurs, we must treat the
02640      * intervening xids as running also.
02641      */
02642     if (TransactionIdFollows(xid, latestObservedXid))
02643     {
02644         TransactionId next_expected_xid;
02645 
02646         /*
02647          * Extend clog and subtrans like we do in GetNewTransactionId() during
02648          * normal operation using individual extend steps. Typical case
02649          * requires almost no activity.
02650          */
02651         next_expected_xid = latestObservedXid;
02652         TransactionIdAdvance(next_expected_xid);
02653         while (TransactionIdPrecedesOrEquals(next_expected_xid, xid))
02654         {
02655             ExtendCLOG(next_expected_xid);
02656             ExtendSUBTRANS(next_expected_xid);
02657 
02658             TransactionIdAdvance(next_expected_xid);
02659         }
02660 
02661         /*
02662          * Add the new xids onto the KnownAssignedXids array.
02663          */
02664         next_expected_xid = latestObservedXid;
02665         TransactionIdAdvance(next_expected_xid);
02666         KnownAssignedXidsAdd(next_expected_xid, xid, false);
02667 
02668         /*
02669          * Now we can advance latestObservedXid
02670          */
02671         latestObservedXid = xid;
02672 
02673         /* ShmemVariableCache->nextXid must be beyond any observed xid */
02674         next_expected_xid = latestObservedXid;
02675         TransactionIdAdvance(next_expected_xid);
02676         LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
02677         ShmemVariableCache->nextXid = next_expected_xid;
02678         LWLockRelease(XidGenLock);
02679     }
02680 }
02681 
02682 /*
02683  * ExpireTreeKnownAssignedTransactionIds
02684  *      Remove the given XIDs from KnownAssignedXids.
02685  *
02686  * Called during recovery in analogy with and in place of ProcArrayEndTransaction()
02687  */
02688 void
02689 ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids,
02690                                TransactionId *subxids, TransactionId max_xid)
02691 {
02692     Assert(standbyState >= STANDBY_INITIALIZED);
02693 
02694     /*
02695      * Uses same locking as transaction commit
02696      */
02697     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
02698 
02699     KnownAssignedXidsRemoveTree(xid, nsubxids, subxids);
02700 
02701     /* As in ProcArrayEndTransaction, advance latestCompletedXid */
02702     if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
02703                               max_xid))
02704         ShmemVariableCache->latestCompletedXid = max_xid;
02705 
02706     LWLockRelease(ProcArrayLock);
02707 }
02708 
02709 /*
02710  * ExpireAllKnownAssignedTransactionIds
02711  *      Remove all entries in KnownAssignedXids
02712  */
02713 void
02714 ExpireAllKnownAssignedTransactionIds(void)
02715 {
02716     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
02717     KnownAssignedXidsRemovePreceding(InvalidTransactionId);
02718     LWLockRelease(ProcArrayLock);
02719 }
02720 
02721 /*
02722  * ExpireOldKnownAssignedTransactionIds
02723  *      Remove KnownAssignedXids entries preceding the given XID
02724  */
02725 void
02726 ExpireOldKnownAssignedTransactionIds(TransactionId xid)
02727 {
02728     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
02729     KnownAssignedXidsRemovePreceding(xid);
02730     LWLockRelease(ProcArrayLock);
02731 }
02732 
02733 
02734 /*
02735  * Private module functions to manipulate KnownAssignedXids
02736  *
02737  * There are 5 main uses of the KnownAssignedXids data structure:
02738  *
02739  *  * backends taking snapshots - all valid XIDs need to be copied out
02740  *  * backends seeking to determine presence of a specific XID
02741  *  * startup process adding new known-assigned XIDs
02742  *  * startup process removing specific XIDs as transactions end
02743  *  * startup process pruning array when special WAL records arrive
02744  *
02745  * This data structure is known to be a hot spot during Hot Standby, so we
02746  * go to some lengths to make these operations as efficient and as concurrent
02747  * as possible.
02748  *
02749  * The XIDs are stored in an array in sorted order --- TransactionIdPrecedes
02750  * order, to be exact --- to allow binary search for specific XIDs.  Note:
02751  * in general TransactionIdPrecedes would not provide a total order, but
02752  * we know that the entries present at any instant should not extend across
02753  * a large enough fraction of XID space to wrap around (the master would
02754  * shut down for fear of XID wrap long before that happens).  So it's OK to
02755  * use TransactionIdPrecedes as a binary-search comparator.
02756  *
02757  * It's cheap to maintain the sortedness during insertions, since new known
02758  * XIDs are always reported in XID order; we just append them at the right.
02759  *
02760  * To keep individual deletions cheap, we need to allow gaps in the array.
02761  * This is implemented by marking array elements as valid or invalid using
02762  * the parallel boolean array KnownAssignedXidsValid[].  A deletion is done
02763  * by setting KnownAssignedXidsValid[i] to false, *without* clearing the
02764  * XID entry itself.  This preserves the property that the XID entries are
02765  * sorted, so we can do binary searches easily.  Periodically we compress
02766  * out the unused entries; that's much cheaper than having to compress the
02767  * array immediately on every deletion.
02768  *
02769  * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[]
02770  * are those with indexes tail <= i < head; items outside this subscript range
02771  * have unspecified contents.  When head reaches the end of the array, we
02772  * force compression of unused entries rather than wrapping around, since
02773  * allowing wraparound would greatly complicate the search logic.  We maintain
02774  * an explicit tail pointer so that pruning of old XIDs can be done without
02775  * immediately moving the array contents.  In most cases only a small fraction
02776  * of the array contains valid entries at any instant.
02777  *
02778  * Although only the startup process can ever change the KnownAssignedXids
02779  * data structure, we still need interlocking so that standby backends will
02780  * not observe invalid intermediate states.  The convention is that backends
02781  * must hold shared ProcArrayLock to examine the array.  To remove XIDs from
02782  * the array, the startup process must hold ProcArrayLock exclusively, for
02783  * the usual transactional reasons (compare commit/abort of a transaction
02784  * during normal running).  Compressing unused entries out of the array
02785  * likewise requires exclusive lock.  To add XIDs to the array, we just insert
02786  * them into slots to the right of the head pointer and then advance the head
02787  * pointer.  This wouldn't require any lock at all, except that on machines
02788  * with weak memory ordering we need to be careful that other processors
02789  * see the array element changes before they see the head pointer change.
02790  * We handle this by using a spinlock to protect reads and writes of the
02791  * head/tail pointers.  (We could dispense with the spinlock if we were to
02792  * create suitable memory access barrier primitives and use those instead.)
02793  * The spinlock must be taken to read or write the head/tail pointers unless
02794  * the caller holds ProcArrayLock exclusively.
02795  *
02796  * Algorithmic analysis:
02797  *
02798  * If we have a maximum of M slots, with N XIDs currently spread across
02799  * S elements then we have N <= S <= M always.
02800  *
02801  *  * Adding a new XID is O(1) and needs little locking (unless compression
02802  *      must happen)
02803  *  * Compressing the array is O(S) and requires exclusive lock
02804  *  * Removing an XID is O(logS) and requires exclusive lock
02805  *  * Taking a snapshot is O(S) and requires shared lock
02806  *  * Checking for an XID is O(logS) and requires shared lock
02807  *
02808  * In comparison, using a hash table for KnownAssignedXids would mean that
02809  * taking snapshots would be O(M). If we can maintain S << M then the
02810  * sorted array technique will deliver significantly faster snapshots.
02811  * If we try to keep S too small then we will spend too much time compressing,
02812  * so there is an optimal point for any workload mix. We use a heuristic to
02813  * decide when to compress the array, though trimming also helps reduce
02814  * frequency of compressing. The heuristic requires us to track the number of
02815  * currently valid XIDs in the array.
02816  */
02817 
02818 
02819 /*
02820  * Compress KnownAssignedXids by shifting valid data down to the start of the
02821  * array, removing any gaps.
02822  *
02823  * A compression step is forced if "force" is true, otherwise we do it
02824  * only if a heuristic indicates it's a good time to do it.
02825  *
02826  * Caller must hold ProcArrayLock in exclusive mode.
02827  */
02828 static void
02829 KnownAssignedXidsCompress(bool force)
02830 {
02831     /* use volatile pointer to prevent code rearrangement */
02832     volatile ProcArrayStruct *pArray = procArray;
02833     int         head,
02834                 tail;
02835     int         compress_index;
02836     int         i;
02837 
02838     /* no spinlock required since we hold ProcArrayLock exclusively */
02839     head = pArray->headKnownAssignedXids;
02840     tail = pArray->tailKnownAssignedXids;
02841 
02842     if (!force)
02843     {
02844         /*
02845          * If we can choose how much to compress, use a heuristic to avoid
02846          * compressing too often or not often enough.
02847          *
02848          * Heuristic is if we have a large enough current spread and less than
02849          * 50% of the elements are currently in use, then compress. This
02850          * should ensure we compress fairly infrequently. We could compress
02851          * less often though the virtual array would spread out more and
02852          * snapshots would become more expensive.
02853          */
02854         int         nelements = head - tail;
02855 
02856         if (nelements < 4 * PROCARRAY_MAXPROCS ||
02857             nelements < 2 * pArray->numKnownAssignedXids)
02858             return;
02859     }
02860 
02861     /*
02862      * We compress the array by reading the valid values from tail to head,
02863      * re-aligning data to 0th element.
02864      */
02865     compress_index = 0;
02866     for (i = tail; i < head; i++)
02867     {
02868         if (KnownAssignedXidsValid[i])
02869         {
02870             KnownAssignedXids[compress_index] = KnownAssignedXids[i];
02871             KnownAssignedXidsValid[compress_index] = true;
02872             compress_index++;
02873         }
02874     }
02875 
02876     pArray->tailKnownAssignedXids = 0;
02877     pArray->headKnownAssignedXids = compress_index;
02878 }
02879 
02880 /*
02881  * Add xids into KnownAssignedXids at the head of the array.
02882  *
02883  * xids from from_xid to to_xid, inclusive, are added to the array.
02884  *
02885  * If exclusive_lock is true then caller already holds ProcArrayLock in
02886  * exclusive mode, so we need no extra locking here.  Else caller holds no
02887  * lock, so we need to be sure we maintain sufficient interlocks against
02888  * concurrent readers.  (Only the startup process ever calls this, so no need
02889  * to worry about concurrent writers.)
02890  */
02891 static void
02892 KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
02893                      bool exclusive_lock)
02894 {
02895     /* use volatile pointer to prevent code rearrangement */
02896     volatile ProcArrayStruct *pArray = procArray;
02897     TransactionId next_xid;
02898     int         head,
02899                 tail;
02900     int         nxids;
02901     int         i;
02902 
02903     Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid));
02904 
02905     /*
02906      * Calculate how many array slots we'll need.  Normally this is cheap; in
02907      * the unusual case where the XIDs cross the wrap point, we do it the hard
02908      * way.
02909      */
02910     if (to_xid >= from_xid)
02911         nxids = to_xid - from_xid + 1;
02912     else
02913     {
02914         nxids = 1;
02915         next_xid = from_xid;
02916         while (TransactionIdPrecedes(next_xid, to_xid))
02917         {
02918             nxids++;
02919             TransactionIdAdvance(next_xid);
02920         }
02921     }
02922 
02923     /*
02924      * Since only the startup process modifies the head/tail pointers, we
02925      * don't need a lock to read them here.
02926      */
02927     head = pArray->headKnownAssignedXids;
02928     tail = pArray->tailKnownAssignedXids;
02929 
02930     Assert(head >= 0 && head <= pArray->maxKnownAssignedXids);
02931     Assert(tail >= 0 && tail < pArray->maxKnownAssignedXids);
02932 
02933     /*
02934      * Verify that insertions occur in TransactionId sequence.  Note that even
02935      * if the last existing element is marked invalid, it must still have a
02936      * correctly sequenced XID value.
02937      */
02938     if (head > tail &&
02939         TransactionIdFollowsOrEquals(KnownAssignedXids[head - 1], from_xid))
02940     {
02941         KnownAssignedXidsDisplay(LOG);
02942         elog(ERROR, "out-of-order XID insertion in KnownAssignedXids");
02943     }
02944 
02945     /*
02946      * If our xids won't fit in the remaining space, compress out free space
02947      */
02948     if (head + nxids > pArray->maxKnownAssignedXids)
02949     {
02950         /* must hold lock to compress */
02951         if (!exclusive_lock)
02952             LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
02953 
02954         KnownAssignedXidsCompress(true);
02955 
02956         head = pArray->headKnownAssignedXids;
02957         /* note: we no longer care about the tail pointer */
02958 
02959         if (!exclusive_lock)
02960             LWLockRelease(ProcArrayLock);
02961 
02962         /*
02963          * If it still won't fit then we're out of memory
02964          */
02965         if (head + nxids > pArray->maxKnownAssignedXids)
02966             elog(ERROR, "too many KnownAssignedXids");
02967     }
02968 
02969     /* Now we can insert the xids into the space starting at head */
02970     next_xid = from_xid;
02971     for (i = 0; i < nxids; i++)
02972     {
02973         KnownAssignedXids[head] = next_xid;
02974         KnownAssignedXidsValid[head] = true;
02975         TransactionIdAdvance(next_xid);
02976         head++;
02977     }
02978 
02979     /* Adjust count of number of valid entries */
02980     pArray->numKnownAssignedXids += nxids;
02981 
02982     /*
02983      * Now update the head pointer.  We use a spinlock to protect this
02984      * pointer, not because the update is likely to be non-atomic, but to
02985      * ensure that other processors see the above array updates before they
02986      * see the head pointer change.
02987      *
02988      * If we're holding ProcArrayLock exclusively, there's no need to take the
02989      * spinlock.
02990      */
02991     if (exclusive_lock)
02992         pArray->headKnownAssignedXids = head;
02993     else
02994     {
02995         SpinLockAcquire(&pArray->known_assigned_xids_lck);
02996         pArray->headKnownAssignedXids = head;
02997         SpinLockRelease(&pArray->known_assigned_xids_lck);
02998     }
02999 }
03000 
03001 /*
03002  * KnownAssignedXidsSearch
03003  *
03004  * Searches KnownAssignedXids for a specific xid and optionally removes it.
03005  * Returns true if it was found, false if not.
03006  *
03007  * Caller must hold ProcArrayLock in shared or exclusive mode.
03008  * Exclusive lock must be held for remove = true.
03009  */
03010 static bool
03011 KnownAssignedXidsSearch(TransactionId xid, bool remove)
03012 {
03013     /* use volatile pointer to prevent code rearrangement */
03014     volatile ProcArrayStruct *pArray = procArray;
03015     int         first,
03016                 last;
03017     int         head;
03018     int         tail;
03019     int         result_index = -1;
03020 
03021     if (remove)
03022     {
03023         /* we hold ProcArrayLock exclusively, so no need for spinlock */
03024         tail = pArray->tailKnownAssignedXids;
03025         head = pArray->headKnownAssignedXids;
03026     }
03027     else
03028     {
03029         /* take spinlock to ensure we see up-to-date array contents */
03030         SpinLockAcquire(&pArray->known_assigned_xids_lck);
03031         tail = pArray->tailKnownAssignedXids;
03032         head = pArray->headKnownAssignedXids;
03033         SpinLockRelease(&pArray->known_assigned_xids_lck);
03034     }
03035 
03036     /*
03037      * Standard binary search.  Note we can ignore the KnownAssignedXidsValid
03038      * array here, since even invalid entries will contain sorted XIDs.
03039      */
03040     first = tail;
03041     last = head - 1;
03042     while (first <= last)
03043     {
03044         int         mid_index;
03045         TransactionId mid_xid;
03046 
03047         mid_index = (first + last) / 2;
03048         mid_xid = KnownAssignedXids[mid_index];
03049 
03050         if (xid == mid_xid)
03051         {
03052             result_index = mid_index;
03053             break;
03054         }
03055         else if (TransactionIdPrecedes(xid, mid_xid))
03056             last = mid_index - 1;
03057         else
03058             first = mid_index + 1;
03059     }
03060 
03061     if (result_index < 0)
03062         return false;           /* not in array */
03063 
03064     if (!KnownAssignedXidsValid[result_index])
03065         return false;           /* in array, but invalid */
03066 
03067     if (remove)
03068     {
03069         KnownAssignedXidsValid[result_index] = false;
03070 
03071         pArray->numKnownAssignedXids--;
03072         Assert(pArray->numKnownAssignedXids >= 0);
03073 
03074         /*
03075          * If we're removing the tail element then advance tail pointer over
03076          * any invalid elements.  This will speed future searches.
03077          */
03078         if (result_index == tail)
03079         {
03080             tail++;
03081             while (tail < head && !KnownAssignedXidsValid[tail])
03082                 tail++;
03083             if (tail >= head)
03084             {
03085                 /* Array is empty, so we can reset both pointers */
03086                 pArray->headKnownAssignedXids = 0;
03087                 pArray->tailKnownAssignedXids = 0;
03088             }
03089             else
03090             {
03091                 pArray->tailKnownAssignedXids = tail;
03092             }
03093         }
03094     }
03095 
03096     return true;
03097 }
03098 
03099 /*
03100  * Is the specified XID present in KnownAssignedXids[]?
03101  *
03102  * Caller must hold ProcArrayLock in shared or exclusive mode.
03103  */
03104 static bool
03105 KnownAssignedXidExists(TransactionId xid)
03106 {
03107     Assert(TransactionIdIsValid(xid));
03108 
03109     return KnownAssignedXidsSearch(xid, false);
03110 }
03111 
03112 /*
03113  * Remove the specified XID from KnownAssignedXids[].
03114  *
03115  * Caller must hold ProcArrayLock in exclusive mode.
03116  */
03117 static void
03118 KnownAssignedXidsRemove(TransactionId xid)
03119 {
03120     Assert(TransactionIdIsValid(xid));
03121 
03122     elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid);
03123 
03124     /*
03125      * Note: we cannot consider it an error to remove an XID that's not
03126      * present.  We intentionally remove subxact IDs while processing
03127      * XLOG_XACT_ASSIGNMENT, to avoid array overflow.  Then those XIDs will be
03128      * removed again when the top-level xact commits or aborts.
03129      *
03130      * It might be possible to track such XIDs to distinguish this case from
03131      * actual errors, but it would be complicated and probably not worth it.
03132      * So, just ignore the search result.
03133      */
03134     (void) KnownAssignedXidsSearch(xid, true);
03135 }
03136 
03137 /*
03138  * KnownAssignedXidsRemoveTree
03139  *      Remove xid (if it's not InvalidTransactionId) and all the subxids.
03140  *
03141  * Caller must hold ProcArrayLock in exclusive mode.
03142  */
03143 static void
03144 KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
03145                             TransactionId *subxids)
03146 {
03147     int         i;
03148 
03149     if (TransactionIdIsValid(xid))
03150         KnownAssignedXidsRemove(xid);
03151 
03152     for (i = 0; i < nsubxids; i++)
03153         KnownAssignedXidsRemove(subxids[i]);
03154 
03155     /* Opportunistically compress the array */
03156     KnownAssignedXidsCompress(false);
03157 }
03158 
03159 /*
03160  * Prune KnownAssignedXids up to, but *not* including xid. If xid is invalid
03161  * then clear the whole table.
03162  *
03163  * Caller must hold ProcArrayLock in exclusive mode.
03164  */
03165 static void
03166 KnownAssignedXidsRemovePreceding(TransactionId removeXid)
03167 {
03168     /* use volatile pointer to prevent code rearrangement */
03169     volatile ProcArrayStruct *pArray = procArray;
03170     int         count = 0;
03171     int         head,
03172                 tail,
03173                 i;
03174 
03175     if (!TransactionIdIsValid(removeXid))
03176     {
03177         elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids");
03178         pArray->numKnownAssignedXids = 0;
03179         pArray->headKnownAssignedXids = pArray->tailKnownAssignedXids = 0;
03180         return;
03181     }
03182 
03183     elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", removeXid);
03184 
03185     /*
03186      * Mark entries invalid starting at the tail.  Since array is sorted, we
03187      * can stop as soon as we reach a entry >= removeXid.
03188      */
03189     tail = pArray->tailKnownAssignedXids;
03190     head = pArray->headKnownAssignedXids;
03191 
03192     for (i = tail; i < head; i++)
03193     {
03194         if (KnownAssignedXidsValid[i])
03195         {
03196             TransactionId knownXid = KnownAssignedXids[i];
03197 
03198             if (TransactionIdFollowsOrEquals(knownXid, removeXid))
03199                 break;
03200 
03201             if (!StandbyTransactionIdIsPrepared(knownXid))
03202             {
03203                 KnownAssignedXidsValid[i] = false;
03204                 count++;
03205             }
03206         }
03207     }
03208 
03209     pArray->numKnownAssignedXids -= count;
03210     Assert(pArray->numKnownAssignedXids >= 0);
03211 
03212     /*
03213      * Advance the tail pointer if we've marked the tail item invalid.
03214      */
03215     for (i = tail; i < head; i++)
03216     {
03217         if (KnownAssignedXidsValid[i])
03218             break;
03219     }
03220     if (i >= head)
03221     {
03222         /* Array is empty, so we can reset both pointers */
03223         pArray->headKnownAssignedXids = 0;
03224         pArray->tailKnownAssignedXids = 0;
03225     }
03226     else
03227     {
03228         pArray->tailKnownAssignedXids = i;
03229     }
03230 
03231     /* Opportunistically compress the array */
03232     KnownAssignedXidsCompress(false);
03233 }
03234 
03235 /*
03236  * KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids.
03237  * We filter out anything >= xmax.
03238  *
03239  * Returns the number of XIDs stored into xarray[].  Caller is responsible
03240  * that array is large enough.
03241  *
03242  * Caller must hold ProcArrayLock in (at least) shared mode.
03243  */
03244 static int
03245 KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax)
03246 {
03247     TransactionId xtmp = InvalidTransactionId;
03248 
03249     return KnownAssignedXidsGetAndSetXmin(xarray, &xtmp, xmax);
03250 }
03251 
03252 /*
03253  * KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus
03254  * we reduce *xmin to the lowest xid value seen if not already lower.
03255  *
03256  * Caller must hold ProcArrayLock in (at least) shared mode.
03257  */
03258 static int
03259 KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
03260                                TransactionId xmax)
03261 {
03262     /* use volatile pointer to prevent code rearrangement */
03263     volatile ProcArrayStruct *pArray = procArray;
03264     int         count = 0;
03265     int         head,
03266                 tail;
03267     int         i;
03268 
03269     /*
03270      * Fetch head just once, since it may change while we loop. We can stop
03271      * once we reach the initially seen head, since we are certain that an xid
03272      * cannot enter and then leave the array while we hold ProcArrayLock.  We
03273      * might miss newly-added xids, but they should be >= xmax so irrelevant
03274      * anyway.
03275      *
03276      * Must take spinlock to ensure we see up-to-date array contents.
03277      */
03278     SpinLockAcquire(&pArray->known_assigned_xids_lck);
03279     tail = pArray->tailKnownAssignedXids;
03280     head = pArray->headKnownAssignedXids;
03281     SpinLockRelease(&pArray->known_assigned_xids_lck);
03282 
03283     for (i = tail; i < head; i++)
03284     {
03285         /* Skip any gaps in the array */
03286         if (KnownAssignedXidsValid[i])
03287         {
03288             TransactionId knownXid = KnownAssignedXids[i];
03289 
03290             /*
03291              * Update xmin if required.  Only the first XID need be checked,
03292              * since the array is sorted.
03293              */
03294             if (count == 0 &&
03295                 TransactionIdPrecedes(knownXid, *xmin))
03296                 *xmin = knownXid;
03297 
03298             /*
03299              * Filter out anything >= xmax, again relying on sorted property
03300              * of array.
03301              */
03302             if (TransactionIdIsValid(xmax) &&
03303                 TransactionIdFollowsOrEquals(knownXid, xmax))
03304                 break;
03305 
03306             /* Add knownXid into output array */
03307             xarray[count++] = knownXid;
03308         }
03309     }
03310 
03311     return count;
03312 }
03313 
03314 /*
03315  * Get oldest XID in the KnownAssignedXids array, or InvalidTransactionId
03316  * if nothing there.
03317  */
03318 static TransactionId
03319 KnownAssignedXidsGetOldestXmin(void)
03320 {
03321     /* use volatile pointer to prevent code rearrangement */
03322     volatile ProcArrayStruct *pArray = procArray;
03323     int         head,
03324                 tail;
03325     int         i;
03326 
03327     /*
03328      * Fetch head just once, since it may change while we loop.
03329      */
03330     SpinLockAcquire(&pArray->known_assigned_xids_lck);
03331     tail = pArray->tailKnownAssignedXids;
03332     head = pArray->headKnownAssignedXids;
03333     SpinLockRelease(&pArray->known_assigned_xids_lck);
03334 
03335     for (i = tail; i < head; i++)
03336     {
03337         /* Skip any gaps in the array */
03338         if (KnownAssignedXidsValid[i])
03339             return KnownAssignedXids[i];
03340     }
03341 
03342     return InvalidTransactionId;
03343 }
03344 
03345 /*
03346  * Display KnownAssignedXids to provide debug trail
03347  *
03348  * Currently this is only called within startup process, so we need no
03349  * special locking.
03350  *
03351  * Note this is pretty expensive, and much of the expense will be incurred
03352  * even if the elog message will get discarded.  It's not currently called
03353  * in any performance-critical places, however, so no need to be tenser.
03354  */
03355 static void
03356 KnownAssignedXidsDisplay(int trace_level)
03357 {
03358     /* use volatile pointer to prevent code rearrangement */
03359     volatile ProcArrayStruct *pArray = procArray;
03360     StringInfoData buf;
03361     int         head,
03362                 tail,
03363                 i;
03364     int         nxids = 0;
03365 
03366     tail = pArray->tailKnownAssignedXids;
03367     head = pArray->headKnownAssignedXids;
03368 
03369     initStringInfo(&buf);
03370 
03371     for (i = tail; i < head; i++)
03372     {
03373         if (KnownAssignedXidsValid[i])
03374         {
03375             nxids++;
03376             appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]);
03377         }
03378     }
03379 
03380     elog(trace_level, "%d KnownAssignedXids (num=%d tail=%d head=%d) %s",
03381          nxids,
03382          pArray->numKnownAssignedXids,
03383          pArray->tailKnownAssignedXids,
03384          pArray->headKnownAssignedXids,
03385          buf.data);
03386 
03387     pfree(buf.data);
03388 }
03389 
03390 /*
03391  * KnownAssignedXidsReset
03392  *      Resets KnownAssignedXids to be empty
03393  */
03394 static void
03395 KnownAssignedXidsReset(void)
03396 {
03397     /* use volatile pointer to prevent code rearrangement */
03398     volatile ProcArrayStruct *pArray = procArray;
03399 
03400     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
03401 
03402     pArray->numKnownAssignedXids = 0;
03403     pArray->tailKnownAssignedXids = 0;
03404     pArray->headKnownAssignedXids = 0;
03405 
03406     LWLockRelease(ProcArrayLock);
03407 }