Header And Logo

PostgreSQL
| The world's most advanced open source database.

nodeIndexonlyscan.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * nodeIndexonlyscan.c
00004  *    Routines to support index-only scans
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  * Portions Copyright (c) 1994, Regents of the University of California
00008  *
00009  *
00010  * IDENTIFICATION
00011  *    src/backend/executor/nodeIndexonlyscan.c
00012  *
00013  *-------------------------------------------------------------------------
00014  */
00015 /*
00016  * INTERFACE ROUTINES
00017  *      ExecIndexOnlyScan           scans an index
00018  *      IndexOnlyNext               retrieve next tuple
00019  *      ExecInitIndexOnlyScan       creates and initializes state info.
00020  *      ExecReScanIndexOnlyScan     rescans the indexed relation.
00021  *      ExecEndIndexOnlyScan        releases all storage.
00022  *      ExecIndexOnlyMarkPos        marks scan position.
00023  *      ExecIndexOnlyRestrPos       restores scan position.
00024  */
00025 #include "postgres.h"
00026 
00027 #include "access/relscan.h"
00028 #include "access/visibilitymap.h"
00029 #include "executor/execdebug.h"
00030 #include "executor/nodeIndexonlyscan.h"
00031 #include "executor/nodeIndexscan.h"
00032 #include "storage/bufmgr.h"
00033 #include "storage/predicate.h"
00034 #include "utils/memutils.h"
00035 #include "utils/rel.h"
00036 
00037 
00038 static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
00039 static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup,
00040                 TupleDesc itupdesc);
00041 
00042 
00043 /* ----------------------------------------------------------------
00044  *      IndexOnlyNext
00045  *
00046  *      Retrieve a tuple from the IndexOnlyScan node's index.
00047  * ----------------------------------------------------------------
00048  */
00049 static TupleTableSlot *
00050 IndexOnlyNext(IndexOnlyScanState *node)
00051 {
00052     EState     *estate;
00053     ExprContext *econtext;
00054     ScanDirection direction;
00055     IndexScanDesc scandesc;
00056     TupleTableSlot *slot;
00057     ItemPointer tid;
00058 
00059     /*
00060      * extract necessary information from index scan node
00061      */
00062     estate = node->ss.ps.state;
00063     direction = estate->es_direction;
00064     /* flip direction if this is an overall backward scan */
00065     if (ScanDirectionIsBackward(((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir))
00066     {
00067         if (ScanDirectionIsForward(direction))
00068             direction = BackwardScanDirection;
00069         else if (ScanDirectionIsBackward(direction))
00070             direction = ForwardScanDirection;
00071     }
00072     scandesc = node->ioss_ScanDesc;
00073     econtext = node->ss.ps.ps_ExprContext;
00074     slot = node->ss.ss_ScanTupleSlot;
00075 
00076     /*
00077      * OK, now that we have what we need, fetch the next tuple.
00078      */
00079     while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
00080     {
00081         HeapTuple   tuple = NULL;
00082 
00083         /*
00084          * We can skip the heap fetch if the TID references a heap page on
00085          * which all tuples are known visible to everybody.  In any case,
00086          * we'll use the index tuple not the heap tuple as the data source.
00087          *
00088          * Note on Memory Ordering Effects: visibilitymap_test does not lock
00089          * the visibility map buffer, and therefore the result we read here
00090          * could be slightly stale.  However, it can't be stale enough to
00091          * matter.  It suffices to show that (1) there is a read barrier
00092          * between the time we read the index TID and the time we test the
00093          * visibility map; and (2) there is a write barrier between the time
00094          * some other concurrent process clears the visibility map bit and the
00095          * time it inserts the index TID.  Since acquiring or releasing a
00096          * LWLock interposes a full barrier, this is easy to show: (1) is
00097          * satisfied by the release of the index buffer content lock after
00098          * reading the TID; and (2) is satisfied by the acquisition of the
00099          * buffer content lock in order to insert the TID.
00100          */
00101         if (!visibilitymap_test(scandesc->heapRelation,
00102                                 ItemPointerGetBlockNumber(tid),
00103                                 &node->ioss_VMBuffer))
00104         {
00105             /*
00106              * Rats, we have to visit the heap to check visibility.
00107              */
00108             node->ioss_HeapFetches++;
00109             tuple = index_fetch_heap(scandesc);
00110             if (tuple == NULL)
00111                 continue;       /* no visible tuple, try next index entry */
00112 
00113             /*
00114              * Only MVCC snapshots are supported here, so there should be no
00115              * need to keep following the HOT chain once a visible entry has
00116              * been found.  If we did want to allow that, we'd need to keep
00117              * more state to remember not to call index_getnext_tid next time.
00118              */
00119             if (scandesc->xs_continue_hot)
00120                 elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
00121 
00122             /*
00123              * Note: at this point we are holding a pin on the heap page, as
00124              * recorded in scandesc->xs_cbuf.  We could release that pin now,
00125              * but it's not clear whether it's a win to do so.  The next index
00126              * entry might require a visit to the same heap page.
00127              */
00128         }
00129 
00130         /*
00131          * Fill the scan tuple slot with data from the index.
00132          */
00133         StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc);
00134 
00135         /*
00136          * If the index was lossy, we have to recheck the index quals.
00137          * (Currently, this can never happen, but we should support the case
00138          * for possible future use, eg with GiST indexes.)
00139          */
00140         if (scandesc->xs_recheck)
00141         {
00142             econtext->ecxt_scantuple = slot;
00143             ResetExprContext(econtext);
00144             if (!ExecQual(node->indexqual, econtext, false))
00145             {
00146                 /* Fails recheck, so drop it and loop back for another */
00147                 InstrCountFiltered2(node, 1);
00148                 continue;
00149             }
00150         }
00151 
00152         /*
00153          * Predicate locks for index-only scans must be acquired at the page
00154          * level when the heap is not accessed, since tuple-level predicate
00155          * locks need the tuple's xmin value.  If we had to visit the tuple
00156          * anyway, then we already have the tuple-level lock and can skip the
00157          * page lock.
00158          */
00159         if (tuple == NULL)
00160             PredicateLockPage(scandesc->heapRelation,
00161                               ItemPointerGetBlockNumber(tid),
00162                               estate->es_snapshot);
00163 
00164         return slot;
00165     }
00166 
00167     /*
00168      * if we get here it means the index scan failed so we are at the end of
00169      * the scan..
00170      */
00171     return ExecClearTuple(slot);
00172 }
00173 
00174 /*
00175  * StoreIndexTuple
00176  *      Fill the slot with data from the index tuple.
00177  *
00178  * At some point this might be generally-useful functionality, but
00179  * right now we don't need it elsewhere.
00180  */
00181 static void
00182 StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
00183 {
00184     int         nindexatts = itupdesc->natts;
00185     Datum      *values = slot->tts_values;
00186     bool       *isnull = slot->tts_isnull;
00187     int         i;
00188 
00189     /*
00190      * Note: we must use the tupdesc supplied by the AM in index_getattr, not
00191      * the slot's tupdesc, in case the latter has different datatypes (this
00192      * happens for btree name_ops in particular).  They'd better have the same
00193      * number of columns though, as well as being datatype-compatible which is
00194      * something we can't so easily check.
00195      */
00196     Assert(slot->tts_tupleDescriptor->natts == nindexatts);
00197 
00198     ExecClearTuple(slot);
00199     for (i = 0; i < nindexatts; i++)
00200         values[i] = index_getattr(itup, i + 1, itupdesc, &isnull[i]);
00201     ExecStoreVirtualTuple(slot);
00202 }
00203 
00204 /*
00205  * IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual
00206  *
00207  * This can't really happen, since an index can't supply CTID which would
00208  * be necessary data for any potential EvalPlanQual target relation.  If it
00209  * did happen, the EPQ code would pass us the wrong data, namely a heap
00210  * tuple not an index tuple.  So throw an error.
00211  */
00212 static bool
00213 IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot)
00214 {
00215     elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans");
00216     return false;               /* keep compiler quiet */
00217 }
00218 
00219 /* ----------------------------------------------------------------
00220  *      ExecIndexOnlyScan(node)
00221  * ----------------------------------------------------------------
00222  */
00223 TupleTableSlot *
00224 ExecIndexOnlyScan(IndexOnlyScanState *node)
00225 {
00226     /*
00227      * If we have runtime keys and they've not already been set up, do it now.
00228      */
00229     if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady)
00230         ExecReScan((PlanState *) node);
00231 
00232     return ExecScan(&node->ss,
00233                     (ExecScanAccessMtd) IndexOnlyNext,
00234                     (ExecScanRecheckMtd) IndexOnlyRecheck);
00235 }
00236 
00237 /* ----------------------------------------------------------------
00238  *      ExecReScanIndexOnlyScan(node)
00239  *
00240  *      Recalculates the values of any scan keys whose value depends on
00241  *      information known at runtime, then rescans the indexed relation.
00242  *
00243  *      Updating the scan key was formerly done separately in
00244  *      ExecUpdateIndexScanKeys. Integrating it into ReScan makes
00245  *      rescans of indices and relations/general streams more uniform.
00246  * ----------------------------------------------------------------
00247  */
00248 void
00249 ExecReScanIndexOnlyScan(IndexOnlyScanState *node)
00250 {
00251     /*
00252      * If we are doing runtime key calculations (ie, any of the index key
00253      * values weren't simple Consts), compute the new key values.  But first,
00254      * reset the context so we don't leak memory as each outer tuple is
00255      * scanned.  Note this assumes that we will recalculate *all* runtime keys
00256      * on each call.
00257      */
00258     if (node->ioss_NumRuntimeKeys != 0)
00259     {
00260         ExprContext *econtext = node->ioss_RuntimeContext;
00261 
00262         ResetExprContext(econtext);
00263         ExecIndexEvalRuntimeKeys(econtext,
00264                                  node->ioss_RuntimeKeys,
00265                                  node->ioss_NumRuntimeKeys);
00266     }
00267     node->ioss_RuntimeKeysReady = true;
00268 
00269     /* reset index scan */
00270     index_rescan(node->ioss_ScanDesc,
00271                  node->ioss_ScanKeys, node->ioss_NumScanKeys,
00272                  node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
00273 
00274     ExecScanReScan(&node->ss);
00275 }
00276 
00277 
00278 /* ----------------------------------------------------------------
00279  *      ExecEndIndexOnlyScan
00280  * ----------------------------------------------------------------
00281  */
00282 void
00283 ExecEndIndexOnlyScan(IndexOnlyScanState *node)
00284 {
00285     Relation    indexRelationDesc;
00286     IndexScanDesc indexScanDesc;
00287     Relation    relation;
00288 
00289     /*
00290      * extract information from the node
00291      */
00292     indexRelationDesc = node->ioss_RelationDesc;
00293     indexScanDesc = node->ioss_ScanDesc;
00294     relation = node->ss.ss_currentRelation;
00295 
00296     /* Release VM buffer pin, if any. */
00297     if (node->ioss_VMBuffer != InvalidBuffer)
00298     {
00299         ReleaseBuffer(node->ioss_VMBuffer);
00300         node->ioss_VMBuffer = InvalidBuffer;
00301     }
00302 
00303     /*
00304      * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext
00305      */
00306 #ifdef NOT_USED
00307     ExecFreeExprContext(&node->ss.ps);
00308     if (node->ioss_RuntimeContext)
00309         FreeExprContext(node->ioss_RuntimeContext, true);
00310 #endif
00311 
00312     /*
00313      * clear out tuple table slots
00314      */
00315     ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
00316     ExecClearTuple(node->ss.ss_ScanTupleSlot);
00317 
00318     /*
00319      * close the index relation (no-op if we didn't open it)
00320      */
00321     if (indexScanDesc)
00322         index_endscan(indexScanDesc);
00323     if (indexRelationDesc)
00324         index_close(indexRelationDesc, NoLock);
00325 
00326     /*
00327      * close the heap relation.
00328      */
00329     ExecCloseScanRelation(relation);
00330 }
00331 
00332 /* ----------------------------------------------------------------
00333  *      ExecIndexOnlyMarkPos
00334  * ----------------------------------------------------------------
00335  */
00336 void
00337 ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
00338 {
00339     index_markpos(node->ioss_ScanDesc);
00340 }
00341 
00342 /* ----------------------------------------------------------------
00343  *      ExecIndexOnlyRestrPos
00344  * ----------------------------------------------------------------
00345  */
00346 void
00347 ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
00348 {
00349     index_restrpos(node->ioss_ScanDesc);
00350 }
00351 
00352 /* ----------------------------------------------------------------
00353  *      ExecInitIndexOnlyScan
00354  *
00355  *      Initializes the index scan's state information, creates
00356  *      scan keys, and opens the base and index relations.
00357  *
00358  *      Note: index scans have 2 sets of state information because
00359  *            we have to keep track of the base relation and the
00360  *            index relation.
00361  * ----------------------------------------------------------------
00362  */
00363 IndexOnlyScanState *
00364 ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
00365 {
00366     IndexOnlyScanState *indexstate;
00367     Relation    currentRelation;
00368     bool        relistarget;
00369     TupleDesc   tupDesc;
00370 
00371     /*
00372      * create state structure
00373      */
00374     indexstate = makeNode(IndexOnlyScanState);
00375     indexstate->ss.ps.plan = (Plan *) node;
00376     indexstate->ss.ps.state = estate;
00377     indexstate->ioss_HeapFetches = 0;
00378 
00379     /*
00380      * Miscellaneous initialization
00381      *
00382      * create expression context for node
00383      */
00384     ExecAssignExprContext(estate, &indexstate->ss.ps);
00385 
00386     indexstate->ss.ps.ps_TupFromTlist = false;
00387 
00388     /*
00389      * initialize child expressions
00390      *
00391      * Note: we don't initialize all of the indexorderby expression, only the
00392      * sub-parts corresponding to runtime keys (see below).
00393      */
00394     indexstate->ss.ps.targetlist = (List *)
00395         ExecInitExpr((Expr *) node->scan.plan.targetlist,
00396                      (PlanState *) indexstate);
00397     indexstate->ss.ps.qual = (List *)
00398         ExecInitExpr((Expr *) node->scan.plan.qual,
00399                      (PlanState *) indexstate);
00400     indexstate->indexqual = (List *)
00401         ExecInitExpr((Expr *) node->indexqual,
00402                      (PlanState *) indexstate);
00403 
00404     /*
00405      * tuple table initialization
00406      */
00407     ExecInitResultTupleSlot(estate, &indexstate->ss.ps);
00408     ExecInitScanTupleSlot(estate, &indexstate->ss);
00409 
00410     /*
00411      * open the base relation and acquire appropriate lock on it.
00412      */
00413     currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
00414 
00415     indexstate->ss.ss_currentRelation = currentRelation;
00416     indexstate->ss.ss_currentScanDesc = NULL;   /* no heap scan here */
00417 
00418     /*
00419      * Build the scan tuple type using the indextlist generated by the
00420      * planner.  We use this, rather than the index's physical tuple
00421      * descriptor, because the latter contains storage column types not the
00422      * types of the original datums.  (It's the AM's responsibility to return
00423      * suitable data anyway.)
00424      */
00425     tupDesc = ExecTypeFromTL(node->indextlist, false);
00426     ExecAssignScanType(&indexstate->ss, tupDesc);
00427 
00428     /*
00429      * Initialize result tuple type and projection info.
00430      */
00431     ExecAssignResultTypeFromTL(&indexstate->ss.ps);
00432     ExecAssignScanProjectionInfo(&indexstate->ss);
00433 
00434     /*
00435      * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
00436      * here.  This allows an index-advisor plugin to EXPLAIN a plan containing
00437      * references to nonexistent indexes.
00438      */
00439     if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
00440         return indexstate;
00441 
00442     /*
00443      * Open the index relation.
00444      *
00445      * If the parent table is one of the target relations of the query, then
00446      * InitPlan already opened and write-locked the index, so we can avoid
00447      * taking another lock here.  Otherwise we need a normal reader's lock.
00448      */
00449     relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid);
00450     indexstate->ioss_RelationDesc = index_open(node->indexid,
00451                                      relistarget ? NoLock : AccessShareLock);
00452 
00453     /*
00454      * Initialize index-specific scan state
00455      */
00456     indexstate->ioss_RuntimeKeysReady = false;
00457     indexstate->ioss_RuntimeKeys = NULL;
00458     indexstate->ioss_NumRuntimeKeys = 0;
00459 
00460     /*
00461      * build the index scan keys from the index qualification
00462      */
00463     ExecIndexBuildScanKeys((PlanState *) indexstate,
00464                            indexstate->ioss_RelationDesc,
00465                            node->indexqual,
00466                            false,
00467                            &indexstate->ioss_ScanKeys,
00468                            &indexstate->ioss_NumScanKeys,
00469                            &indexstate->ioss_RuntimeKeys,
00470                            &indexstate->ioss_NumRuntimeKeys,
00471                            NULL,    /* no ArrayKeys */
00472                            NULL);
00473 
00474     /*
00475      * any ORDER BY exprs have to be turned into scankeys in the same way
00476      */
00477     ExecIndexBuildScanKeys((PlanState *) indexstate,
00478                            indexstate->ioss_RelationDesc,
00479                            node->indexorderby,
00480                            true,
00481                            &indexstate->ioss_OrderByKeys,
00482                            &indexstate->ioss_NumOrderByKeys,
00483                            &indexstate->ioss_RuntimeKeys,
00484                            &indexstate->ioss_NumRuntimeKeys,
00485                            NULL,    /* no ArrayKeys */
00486                            NULL);
00487 
00488     /*
00489      * If we have runtime keys, we need an ExprContext to evaluate them. The
00490      * node's standard context won't do because we want to reset that context
00491      * for every tuple.  So, build another context just like the other one...
00492      * -tgl 7/11/00
00493      */
00494     if (indexstate->ioss_NumRuntimeKeys != 0)
00495     {
00496         ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
00497 
00498         ExecAssignExprContext(estate, &indexstate->ss.ps);
00499         indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
00500         indexstate->ss.ps.ps_ExprContext = stdecontext;
00501     }
00502     else
00503     {
00504         indexstate->ioss_RuntimeContext = NULL;
00505     }
00506 
00507     /*
00508      * Initialize scan descriptor.
00509      */
00510     indexstate->ioss_ScanDesc = index_beginscan(currentRelation,
00511                                                 indexstate->ioss_RelationDesc,
00512                                                 estate->es_snapshot,
00513                                                 indexstate->ioss_NumScanKeys,
00514                                             indexstate->ioss_NumOrderByKeys);
00515 
00516     /* Set it up for index-only scan */
00517     indexstate->ioss_ScanDesc->xs_want_itup = true;
00518     indexstate->ioss_VMBuffer = InvalidBuffer;
00519 
00520     /*
00521      * If no run-time keys to calculate, go ahead and pass the scankeys to the
00522      * index AM.
00523      */
00524     if (indexstate->ioss_NumRuntimeKeys == 0)
00525         index_rescan(indexstate->ioss_ScanDesc,
00526                      indexstate->ioss_ScanKeys,
00527                      indexstate->ioss_NumScanKeys,
00528                      indexstate->ioss_OrderByKeys,
00529                      indexstate->ioss_NumOrderByKeys);
00530 
00531     /*
00532      * all done.
00533      */
00534     return indexstate;
00535 }