Header And Logo

PostgreSQL
| The world's most advanced open source database.

genam.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * genam.c
00004  *    general index access method routines
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  * Portions Copyright (c) 1994, Regents of the University of California
00008  *
00009  *
00010  * IDENTIFICATION
00011  *    src/backend/access/index/genam.c
00012  *
00013  * NOTES
00014  *    many of the old access method routines have been turned into
00015  *    macros and moved to genam.h -cim 4/30/91
00016  *
00017  *-------------------------------------------------------------------------
00018  */
00019 
00020 #include "postgres.h"
00021 
00022 #include "access/relscan.h"
00023 #include "access/transam.h"
00024 #include "catalog/index.h"
00025 #include "lib/stringinfo.h"
00026 #include "miscadmin.h"
00027 #include "storage/bufmgr.h"
00028 #include "utils/builtins.h"
00029 #include "utils/lsyscache.h"
00030 #include "utils/rel.h"
00031 #include "utils/tqual.h"
00032 
00033 
00034 /* ----------------------------------------------------------------
00035  *      general access method routines
00036  *
00037  *      All indexed access methods use an identical scan structure.
00038  *      We don't know how the various AMs do locking, however, so we don't
00039  *      do anything about that here.
00040  *
00041  *      The intent is that an AM implementor will define a beginscan routine
00042  *      that calls RelationGetIndexScan, to fill in the scan, and then does
00043  *      whatever kind of locking he wants.
00044  *
00045  *      At the end of a scan, the AM's endscan routine undoes the locking,
00046  *      but does *not* call IndexScanEnd --- the higher-level index_endscan
00047  *      routine does that.  (We can't do it in the AM because index_endscan
00048  *      still needs to touch the IndexScanDesc after calling the AM.)
00049  *
00050  *      Because of this, the AM does not have a choice whether to call
00051  *      RelationGetIndexScan or not; its beginscan routine must return an
00052  *      object made by RelationGetIndexScan.  This is kinda ugly but not
00053  *      worth cleaning up now.
00054  * ----------------------------------------------------------------
00055  */
00056 
00057 /* ----------------
00058  *  RelationGetIndexScan -- Create and fill an IndexScanDesc.
00059  *
00060  *      This routine creates an index scan structure and sets up initial
00061  *      contents for it.
00062  *
00063  *      Parameters:
00064  *              indexRelation -- index relation for scan.
00065  *              nkeys -- count of scan keys (index qual conditions).
00066  *              norderbys -- count of index order-by operators.
00067  *
00068  *      Returns:
00069  *              An initialized IndexScanDesc.
00070  * ----------------
00071  */
00072 IndexScanDesc
00073 RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
00074 {
00075     IndexScanDesc scan;
00076 
00077     scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData));
00078 
00079     scan->heapRelation = NULL;  /* may be set later */
00080     scan->indexRelation = indexRelation;
00081     scan->xs_snapshot = SnapshotNow;    /* may be set later */
00082     scan->numberOfKeys = nkeys;
00083     scan->numberOfOrderBys = norderbys;
00084 
00085     /*
00086      * We allocate key workspace here, but it won't get filled until amrescan.
00087      */
00088     if (nkeys > 0)
00089         scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
00090     else
00091         scan->keyData = NULL;
00092     if (norderbys > 0)
00093         scan->orderByData = (ScanKey) palloc(sizeof(ScanKeyData) * norderbys);
00094     else
00095         scan->orderByData = NULL;
00096 
00097     scan->xs_want_itup = false; /* may be set later */
00098 
00099     /*
00100      * During recovery we ignore killed tuples and don't bother to kill them
00101      * either. We do this because the xmin on the primary node could easily be
00102      * later than the xmin on the standby node, so that what the primary
00103      * thinks is killed is supposed to be visible on standby. So for correct
00104      * MVCC for queries during recovery we must ignore these hints and check
00105      * all tuples. Do *not* set ignore_killed_tuples to true when running in a
00106      * transaction that was started during recovery. xactStartedInRecovery
00107      * should not be altered by index AMs.
00108      */
00109     scan->kill_prior_tuple = false;
00110     scan->xactStartedInRecovery = TransactionStartedDuringRecovery();
00111     scan->ignore_killed_tuples = !scan->xactStartedInRecovery;
00112 
00113     scan->opaque = NULL;
00114 
00115     scan->xs_itup = NULL;
00116     scan->xs_itupdesc = NULL;
00117 
00118     ItemPointerSetInvalid(&scan->xs_ctup.t_self);
00119     scan->xs_ctup.t_data = NULL;
00120     scan->xs_cbuf = InvalidBuffer;
00121     scan->xs_continue_hot = false;
00122 
00123     return scan;
00124 }
00125 
00126 /* ----------------
00127  *  IndexScanEnd -- End an index scan.
00128  *
00129  *      This routine just releases the storage acquired by
00130  *      RelationGetIndexScan().  Any AM-level resources are
00131  *      assumed to already have been released by the AM's
00132  *      endscan routine.
00133  *
00134  *  Returns:
00135  *      None.
00136  * ----------------
00137  */
00138 void
00139 IndexScanEnd(IndexScanDesc scan)
00140 {
00141     if (scan->keyData != NULL)
00142         pfree(scan->keyData);
00143     if (scan->orderByData != NULL)
00144         pfree(scan->orderByData);
00145 
00146     pfree(scan);
00147 }
00148 
00149 /*
00150  * BuildIndexValueDescription
00151  *
00152  * Construct a string describing the contents of an index entry, in the
00153  * form "(key_name, ...)=(key_value, ...)".  This is currently used
00154  * for building unique-constraint and exclusion-constraint error messages.
00155  *
00156  * The passed-in values/nulls arrays are the "raw" input to the index AM,
00157  * e.g. results of FormIndexDatum --- this is not necessarily what is stored
00158  * in the index, but it's what the user perceives to be stored.
00159  */
00160 char *
00161 BuildIndexValueDescription(Relation indexRelation,
00162                            Datum *values, bool *isnull)
00163 {
00164     StringInfoData buf;
00165     int         natts = indexRelation->rd_rel->relnatts;
00166     int         i;
00167 
00168     initStringInfo(&buf);
00169     appendStringInfo(&buf, "(%s)=(",
00170                      pg_get_indexdef_columns(RelationGetRelid(indexRelation),
00171                                              true));
00172 
00173     for (i = 0; i < natts; i++)
00174     {
00175         char       *val;
00176 
00177         if (isnull[i])
00178             val = "null";
00179         else
00180         {
00181             Oid         foutoid;
00182             bool        typisvarlena;
00183 
00184             /*
00185              * The provided data is not necessarily of the type stored in the
00186              * index; rather it is of the index opclass's input type. So look
00187              * at rd_opcintype not the index tupdesc.
00188              *
00189              * Note: this is a bit shaky for opclasses that have pseudotype
00190              * input types such as ANYARRAY or RECORD.  Currently, the
00191              * typoutput functions associated with the pseudotypes will work
00192              * okay, but we might have to try harder in future.
00193              */
00194             getTypeOutputInfo(indexRelation->rd_opcintype[i],
00195                               &foutoid, &typisvarlena);
00196             val = OidOutputFunctionCall(foutoid, values[i]);
00197         }
00198 
00199         if (i > 0)
00200             appendStringInfoString(&buf, ", ");
00201         appendStringInfoString(&buf, val);
00202     }
00203 
00204     appendStringInfoChar(&buf, ')');
00205 
00206     return buf.data;
00207 }
00208 
00209 
00210 /* ----------------------------------------------------------------
00211  *      heap-or-index-scan access to system catalogs
00212  *
00213  *      These functions support system catalog accesses that normally use
00214  *      an index but need to be capable of being switched to heap scans
00215  *      if the system indexes are unavailable.
00216  *
00217  *      The specified scan keys must be compatible with the named index.
00218  *      Generally this means that they must constrain either all columns
00219  *      of the index, or the first K columns of an N-column index.
00220  *
00221  *      These routines could work with non-system tables, actually,
00222  *      but they're only useful when there is a known index to use with
00223  *      the given scan keys; so in practice they're only good for
00224  *      predetermined types of scans of system catalogs.
00225  * ----------------------------------------------------------------
00226  */
00227 
00228 /*
00229  * systable_beginscan --- set up for heap-or-index scan
00230  *
00231  *  rel: catalog to scan, already opened and suitably locked
00232  *  indexId: OID of index to conditionally use
00233  *  indexOK: if false, forces a heap scan (see notes below)
00234  *  snapshot: time qual to use (usually should be SnapshotNow)
00235  *  nkeys, key: scan keys
00236  *
00237  * The attribute numbers in the scan key should be set for the heap case.
00238  * If we choose to index, we reset them to 1..n to reference the index
00239  * columns.  Note this means there must be one scankey qualification per
00240  * index column!  This is checked by the Asserts in the normal, index-using
00241  * case, but won't be checked if the heapscan path is taken.
00242  *
00243  * The routine checks the normal cases for whether an indexscan is safe,
00244  * but caller can make additional checks and pass indexOK=false if needed.
00245  * In standard case indexOK can simply be constant TRUE.
00246  */
00247 SysScanDesc
00248 systable_beginscan(Relation heapRelation,
00249                    Oid indexId,
00250                    bool indexOK,
00251                    Snapshot snapshot,
00252                    int nkeys, ScanKey key)
00253 {
00254     SysScanDesc sysscan;
00255     Relation    irel;
00256 
00257     if (indexOK &&
00258         !IgnoreSystemIndexes &&
00259         !ReindexIsProcessingIndex(indexId))
00260         irel = index_open(indexId, AccessShareLock);
00261     else
00262         irel = NULL;
00263 
00264     sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData));
00265 
00266     sysscan->heap_rel = heapRelation;
00267     sysscan->irel = irel;
00268 
00269     if (irel)
00270     {
00271         int         i;
00272 
00273         /* Change attribute numbers to be index column numbers. */
00274         for (i = 0; i < nkeys; i++)
00275         {
00276             int         j;
00277 
00278             for (j = 0; j < irel->rd_index->indnatts; j++)
00279             {
00280                 if (key[i].sk_attno == irel->rd_index->indkey.values[j])
00281                 {
00282                     key[i].sk_attno = j + 1;
00283                     break;
00284                 }
00285             }
00286             if (j == irel->rd_index->indnatts)
00287                 elog(ERROR, "column is not in index");
00288         }
00289 
00290         sysscan->iscan = index_beginscan(heapRelation, irel,
00291                                          snapshot, nkeys, 0);
00292         index_rescan(sysscan->iscan, key, nkeys, NULL, 0);
00293         sysscan->scan = NULL;
00294     }
00295     else
00296     {
00297         /*
00298          * We disallow synchronized scans when forced to use a heapscan on a
00299          * catalog.  In most cases the desired rows are near the front, so
00300          * that the unpredictable start point of a syncscan is a serious
00301          * disadvantage; and there are no compensating advantages, because
00302          * it's unlikely that such scans will occur in parallel.
00303          */
00304         sysscan->scan = heap_beginscan_strat(heapRelation, snapshot,
00305                                              nkeys, key,
00306                                              true, false);
00307         sysscan->iscan = NULL;
00308     }
00309 
00310     return sysscan;
00311 }
00312 
00313 /*
00314  * systable_getnext --- get next tuple in a heap-or-index scan
00315  *
00316  * Returns NULL if no more tuples available.
00317  *
00318  * Note that returned tuple is a reference to data in a disk buffer;
00319  * it must not be modified, and should be presumed inaccessible after
00320  * next getnext() or endscan() call.
00321  */
00322 HeapTuple
00323 systable_getnext(SysScanDesc sysscan)
00324 {
00325     HeapTuple   htup;
00326 
00327     if (sysscan->irel)
00328     {
00329         htup = index_getnext(sysscan->iscan, ForwardScanDirection);
00330 
00331         /*
00332          * We currently don't need to support lossy index operators for any
00333          * system catalog scan.  It could be done here, using the scan keys to
00334          * drive the operator calls, if we arranged to save the heap attnums
00335          * during systable_beginscan(); this is practical because we still
00336          * wouldn't need to support indexes on expressions.
00337          */
00338         if (htup && sysscan->iscan->xs_recheck)
00339             elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
00340     }
00341     else
00342         htup = heap_getnext(sysscan->scan, ForwardScanDirection);
00343 
00344     return htup;
00345 }
00346 
00347 /*
00348  * systable_recheck_tuple --- recheck visibility of most-recently-fetched tuple
00349  *
00350  * This is useful to test whether an object was deleted while we waited to
00351  * acquire lock on it.
00352  *
00353  * Note: we don't actually *need* the tuple to be passed in, but it's a
00354  * good crosscheck that the caller is interested in the right tuple.
00355  */
00356 bool
00357 systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
00358 {
00359     bool        result;
00360 
00361     if (sysscan->irel)
00362     {
00363         IndexScanDesc scan = sysscan->iscan;
00364 
00365         Assert(tup == &scan->xs_ctup);
00366         Assert(BufferIsValid(scan->xs_cbuf));
00367         /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
00368         LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
00369         result = HeapTupleSatisfiesVisibility(tup, scan->xs_snapshot,
00370                                               scan->xs_cbuf);
00371         LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
00372     }
00373     else
00374     {
00375         HeapScanDesc scan = sysscan->scan;
00376 
00377         Assert(tup == &scan->rs_ctup);
00378         Assert(BufferIsValid(scan->rs_cbuf));
00379         /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
00380         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
00381         result = HeapTupleSatisfiesVisibility(tup, scan->rs_snapshot,
00382                                               scan->rs_cbuf);
00383         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
00384     }
00385     return result;
00386 }
00387 
00388 /*
00389  * systable_endscan --- close scan, release resources
00390  *
00391  * Note that it's still up to the caller to close the heap relation.
00392  */
00393 void
00394 systable_endscan(SysScanDesc sysscan)
00395 {
00396     if (sysscan->irel)
00397     {
00398         index_endscan(sysscan->iscan);
00399         index_close(sysscan->irel, AccessShareLock);
00400     }
00401     else
00402         heap_endscan(sysscan->scan);
00403 
00404     pfree(sysscan);
00405 }
00406 
00407 
00408 /*
00409  * systable_beginscan_ordered --- set up for ordered catalog scan
00410  *
00411  * These routines have essentially the same API as systable_beginscan etc,
00412  * except that they guarantee to return multiple matching tuples in
00413  * index order.  Also, for largely historical reasons, the index to use
00414  * is opened and locked by the caller, not here.
00415  *
00416  * Currently we do not support non-index-based scans here.  (In principle
00417  * we could do a heapscan and sort, but the uses are in places that
00418  * probably don't need to still work with corrupted catalog indexes.)
00419  * For the moment, therefore, these functions are merely the thinnest of
00420  * wrappers around index_beginscan/index_getnext.  The main reason for their
00421  * existence is to centralize possible future support of lossy operators
00422  * in catalog scans.
00423  */
00424 SysScanDesc
00425 systable_beginscan_ordered(Relation heapRelation,
00426                            Relation indexRelation,
00427                            Snapshot snapshot,
00428                            int nkeys, ScanKey key)
00429 {
00430     SysScanDesc sysscan;
00431     int         i;
00432 
00433     /* REINDEX can probably be a hard error here ... */
00434     if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation)))
00435         elog(ERROR, "cannot do ordered scan on index \"%s\", because it is being reindexed",
00436              RelationGetRelationName(indexRelation));
00437     /* ... but we only throw a warning about violating IgnoreSystemIndexes */
00438     if (IgnoreSystemIndexes)
00439         elog(WARNING, "using index \"%s\" despite IgnoreSystemIndexes",
00440              RelationGetRelationName(indexRelation));
00441 
00442     sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData));
00443 
00444     sysscan->heap_rel = heapRelation;
00445     sysscan->irel = indexRelation;
00446 
00447     /* Change attribute numbers to be index column numbers. */
00448     for (i = 0; i < nkeys; i++)
00449     {
00450         int         j;
00451 
00452         for (j = 0; j < indexRelation->rd_index->indnatts; j++)
00453         {
00454             if (key[i].sk_attno == indexRelation->rd_index->indkey.values[j])
00455             {
00456                 key[i].sk_attno = j + 1;
00457                 break;
00458             }
00459         }
00460         if (j == indexRelation->rd_index->indnatts)
00461             elog(ERROR, "column is not in index");
00462     }
00463 
00464     sysscan->iscan = index_beginscan(heapRelation, indexRelation,
00465                                      snapshot, nkeys, 0);
00466     index_rescan(sysscan->iscan, key, nkeys, NULL, 0);
00467     sysscan->scan = NULL;
00468 
00469     return sysscan;
00470 }
00471 
00472 /*
00473  * systable_getnext_ordered --- get next tuple in an ordered catalog scan
00474  */
00475 HeapTuple
00476 systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
00477 {
00478     HeapTuple   htup;
00479 
00480     Assert(sysscan->irel);
00481     htup = index_getnext(sysscan->iscan, direction);
00482     /* See notes in systable_getnext */
00483     if (htup && sysscan->iscan->xs_recheck)
00484         elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
00485 
00486     return htup;
00487 }
00488 
00489 /*
00490  * systable_endscan_ordered --- close scan, release resources
00491  */
00492 void
00493 systable_endscan_ordered(SysScanDesc sysscan)
00494 {
00495     Assert(sysscan->irel);
00496     index_endscan(sysscan->iscan);
00497     pfree(sysscan);
00498 }