Header And Logo

PostgreSQL
| The world's most advanced open source database.

nodeSetOp.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * nodeSetOp.c
00004  *    Routines to handle INTERSECT and EXCEPT selection
00005  *
00006  * The input of a SetOp node consists of tuples from two relations,
00007  * which have been combined into one dataset, with a junk attribute added
00008  * that shows which relation each tuple came from.  In SETOP_SORTED mode,
00009  * the input has furthermore been sorted according to all the grouping
00010  * columns (ie, all the non-junk attributes).  The SetOp node scans each
00011  * group of identical tuples to determine how many came from each input
00012  * relation.  Then it is a simple matter to emit the output demanded by the
00013  * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
00014  *
00015  * In SETOP_HASHED mode, the input is delivered in no particular order,
00016  * except that we know all the tuples from one input relation will come before
00017  * all the tuples of the other.  The planner guarantees that the first input
00018  * relation is the left-hand one for EXCEPT, and tries to make the smaller
00019  * input relation come first for INTERSECT.  We build a hash table in memory
00020  * with one entry for each group of identical tuples, and count the number of
00021  * tuples in the group from each relation.  After seeing all the input, we
00022  * scan the hashtable and generate the correct output using those counts.
00023  * We can avoid making hashtable entries for any tuples appearing only in the
00024  * second input relation, since they cannot result in any output.
00025  *
00026  * This node type is not used for UNION or UNION ALL, since those can be
00027  * implemented more cheaply (there's no need for the junk attribute to
00028  * identify the source relation).
00029  *
00030  * Note that SetOp does no qual checking nor projection.  The delivered
00031  * output tuples are just copies of the first-to-arrive tuple in each
00032  * input group.
00033  *
00034  *
00035  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00036  * Portions Copyright (c) 1994, Regents of the University of California
00037  *
00038  *
00039  * IDENTIFICATION
00040  *    src/backend/executor/nodeSetOp.c
00041  *
00042  *-------------------------------------------------------------------------
00043  */
00044 
00045 #include "postgres.h"
00046 
00047 #include "access/htup_details.h"
00048 #include "executor/executor.h"
00049 #include "executor/nodeSetOp.h"
00050 #include "utils/memutils.h"
00051 
00052 
00053 /*
00054  * SetOpStatePerGroupData - per-group working state
00055  *
00056  * These values are working state that is initialized at the start of
00057  * an input tuple group and updated for each input tuple.
00058  *
00059  * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
00060  * the plan state node.  In SETOP_HASHED mode, the hash table contains one
00061  * of these for each tuple group.
00062  */
00063 typedef struct SetOpStatePerGroupData
00064 {
00065     long        numLeft;        /* number of left-input dups in group */
00066     long        numRight;       /* number of right-input dups in group */
00067 } SetOpStatePerGroupData;
00068 
00069 /*
00070  * To implement hashed mode, we need a hashtable that stores a
00071  * representative tuple and the duplicate counts for each distinct set
00072  * of grouping columns.  We compute the hash key from the grouping columns.
00073  */
00074 typedef struct SetOpHashEntryData *SetOpHashEntry;
00075 
00076 typedef struct SetOpHashEntryData
00077 {
00078     TupleHashEntryData shared;  /* common header for hash table entries */
00079     SetOpStatePerGroupData pergroup;
00080 }   SetOpHashEntryData;
00081 
00082 
00083 static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
00084 static void setop_fill_hash_table(SetOpState *setopstate);
00085 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
00086 
00087 
00088 /*
00089  * Initialize state for a new group of input values.
00090  */
00091 static inline void
00092 initialize_counts(SetOpStatePerGroup pergroup)
00093 {
00094     pergroup->numLeft = pergroup->numRight = 0;
00095 }
00096 
00097 /*
00098  * Advance the appropriate counter for one input tuple.
00099  */
00100 static inline void
00101 advance_counts(SetOpStatePerGroup pergroup, int flag)
00102 {
00103     if (flag)
00104         pergroup->numRight++;
00105     else
00106         pergroup->numLeft++;
00107 }
00108 
00109 /*
00110  * Fetch the "flag" column from an input tuple.
00111  * This is an integer column with value 0 for left side, 1 for right side.
00112  */
00113 static int
00114 fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
00115 {
00116     SetOp      *node = (SetOp *) setopstate->ps.plan;
00117     int         flag;
00118     bool        isNull;
00119 
00120     flag = DatumGetInt32(slot_getattr(inputslot,
00121                                       node->flagColIdx,
00122                                       &isNull));
00123     Assert(!isNull);
00124     Assert(flag == 0 || flag == 1);
00125     return flag;
00126 }
00127 
00128 /*
00129  * Initialize the hash table to empty.
00130  */
00131 static void
00132 build_hash_table(SetOpState *setopstate)
00133 {
00134     SetOp      *node = (SetOp *) setopstate->ps.plan;
00135 
00136     Assert(node->strategy == SETOP_HASHED);
00137     Assert(node->numGroups > 0);
00138 
00139     setopstate->hashtable = BuildTupleHashTable(node->numCols,
00140                                                 node->dupColIdx,
00141                                                 setopstate->eqfunctions,
00142                                                 setopstate->hashfunctions,
00143                                                 node->numGroups,
00144                                                 sizeof(SetOpHashEntryData),
00145                                                 setopstate->tableContext,
00146                                                 setopstate->tempContext);
00147 }
00148 
00149 /*
00150  * We've completed processing a tuple group.  Decide how many copies (if any)
00151  * of its representative row to emit, and store the count into numOutput.
00152  * This logic is straight from the SQL92 specification.
00153  */
00154 static void
00155 set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
00156 {
00157     SetOp      *plannode = (SetOp *) setopstate->ps.plan;
00158 
00159     switch (plannode->cmd)
00160     {
00161         case SETOPCMD_INTERSECT:
00162             if (pergroup->numLeft > 0 && pergroup->numRight > 0)
00163                 setopstate->numOutput = 1;
00164             else
00165                 setopstate->numOutput = 0;
00166             break;
00167         case SETOPCMD_INTERSECT_ALL:
00168             setopstate->numOutput =
00169                 (pergroup->numLeft < pergroup->numRight) ?
00170                 pergroup->numLeft : pergroup->numRight;
00171             break;
00172         case SETOPCMD_EXCEPT:
00173             if (pergroup->numLeft > 0 && pergroup->numRight == 0)
00174                 setopstate->numOutput = 1;
00175             else
00176                 setopstate->numOutput = 0;
00177             break;
00178         case SETOPCMD_EXCEPT_ALL:
00179             setopstate->numOutput =
00180                 (pergroup->numLeft < pergroup->numRight) ?
00181                 0 : (pergroup->numLeft - pergroup->numRight);
00182             break;
00183         default:
00184             elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
00185             break;
00186     }
00187 }
00188 
00189 
00190 /* ----------------------------------------------------------------
00191  *      ExecSetOp
00192  * ----------------------------------------------------------------
00193  */
00194 TupleTableSlot *                /* return: a tuple or NULL */
00195 ExecSetOp(SetOpState *node)
00196 {
00197     SetOp      *plannode = (SetOp *) node->ps.plan;
00198     TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
00199 
00200     /*
00201      * If the previously-returned tuple needs to be returned more than once,
00202      * keep returning it.
00203      */
00204     if (node->numOutput > 0)
00205     {
00206         node->numOutput--;
00207         return resultTupleSlot;
00208     }
00209 
00210     /* Otherwise, we're done if we are out of groups */
00211     if (node->setop_done)
00212         return NULL;
00213 
00214     /* Fetch the next tuple group according to the correct strategy */
00215     if (plannode->strategy == SETOP_HASHED)
00216     {
00217         if (!node->table_filled)
00218             setop_fill_hash_table(node);
00219         return setop_retrieve_hash_table(node);
00220     }
00221     else
00222         return setop_retrieve_direct(node);
00223 }
00224 
00225 /*
00226  * ExecSetOp for non-hashed case
00227  */
00228 static TupleTableSlot *
00229 setop_retrieve_direct(SetOpState *setopstate)
00230 {
00231     SetOp      *node = (SetOp *) setopstate->ps.plan;
00232     PlanState  *outerPlan;
00233     SetOpStatePerGroup pergroup;
00234     TupleTableSlot *outerslot;
00235     TupleTableSlot *resultTupleSlot;
00236 
00237     /*
00238      * get state info from node
00239      */
00240     outerPlan = outerPlanState(setopstate);
00241     pergroup = setopstate->pergroup;
00242     resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
00243 
00244     /*
00245      * We loop retrieving groups until we find one we should return
00246      */
00247     while (!setopstate->setop_done)
00248     {
00249         /*
00250          * If we don't already have the first tuple of the new group, fetch it
00251          * from the outer plan.
00252          */
00253         if (setopstate->grp_firstTuple == NULL)
00254         {
00255             outerslot = ExecProcNode(outerPlan);
00256             if (!TupIsNull(outerslot))
00257             {
00258                 /* Make a copy of the first input tuple */
00259                 setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
00260             }
00261             else
00262             {
00263                 /* outer plan produced no tuples at all */
00264                 setopstate->setop_done = true;
00265                 return NULL;
00266             }
00267         }
00268 
00269         /*
00270          * Store the copied first input tuple in the tuple table slot reserved
00271          * for it.  The tuple will be deleted when it is cleared from the
00272          * slot.
00273          */
00274         ExecStoreTuple(setopstate->grp_firstTuple,
00275                        resultTupleSlot,
00276                        InvalidBuffer,
00277                        true);
00278         setopstate->grp_firstTuple = NULL;      /* don't keep two pointers */
00279 
00280         /* Initialize working state for a new input tuple group */
00281         initialize_counts(pergroup);
00282 
00283         /* Count the first input tuple */
00284         advance_counts(pergroup,
00285                        fetch_tuple_flag(setopstate, resultTupleSlot));
00286 
00287         /*
00288          * Scan the outer plan until we exhaust it or cross a group boundary.
00289          */
00290         for (;;)
00291         {
00292             outerslot = ExecProcNode(outerPlan);
00293             if (TupIsNull(outerslot))
00294             {
00295                 /* no more outer-plan tuples available */
00296                 setopstate->setop_done = true;
00297                 break;
00298             }
00299 
00300             /*
00301              * Check whether we've crossed a group boundary.
00302              */
00303             if (!execTuplesMatch(resultTupleSlot,
00304                                  outerslot,
00305                                  node->numCols, node->dupColIdx,
00306                                  setopstate->eqfunctions,
00307                                  setopstate->tempContext))
00308             {
00309                 /*
00310                  * Save the first input tuple of the next group.
00311                  */
00312                 setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
00313                 break;
00314             }
00315 
00316             /* Still in same group, so count this tuple */
00317             advance_counts(pergroup,
00318                            fetch_tuple_flag(setopstate, outerslot));
00319         }
00320 
00321         /*
00322          * Done scanning input tuple group.  See if we should emit any copies
00323          * of result tuple, and if so return the first copy.
00324          */
00325         set_output_count(setopstate, pergroup);
00326 
00327         if (setopstate->numOutput > 0)
00328         {
00329             setopstate->numOutput--;
00330             return resultTupleSlot;
00331         }
00332     }
00333 
00334     /* No more groups */
00335     ExecClearTuple(resultTupleSlot);
00336     return NULL;
00337 }
00338 
00339 /*
00340  * ExecSetOp for hashed case: phase 1, read input and build hash table
00341  */
00342 static void
00343 setop_fill_hash_table(SetOpState *setopstate)
00344 {
00345     SetOp      *node = (SetOp *) setopstate->ps.plan;
00346     PlanState  *outerPlan;
00347     int         firstFlag;
00348     bool in_first_rel PG_USED_FOR_ASSERTS_ONLY;
00349 
00350     /*
00351      * get state info from node
00352      */
00353     outerPlan = outerPlanState(setopstate);
00354     firstFlag = node->firstFlag;
00355     /* verify planner didn't mess up */
00356     Assert(firstFlag == 0 ||
00357            (firstFlag == 1 &&
00358             (node->cmd == SETOPCMD_INTERSECT ||
00359              node->cmd == SETOPCMD_INTERSECT_ALL)));
00360 
00361     /*
00362      * Process each outer-plan tuple, and then fetch the next one, until we
00363      * exhaust the outer plan.
00364      */
00365     in_first_rel = true;
00366     for (;;)
00367     {
00368         TupleTableSlot *outerslot;
00369         int         flag;
00370         SetOpHashEntry entry;
00371         bool        isnew;
00372 
00373         outerslot = ExecProcNode(outerPlan);
00374         if (TupIsNull(outerslot))
00375             break;
00376 
00377         /* Identify whether it's left or right input */
00378         flag = fetch_tuple_flag(setopstate, outerslot);
00379 
00380         if (flag == firstFlag)
00381         {
00382             /* (still) in first input relation */
00383             Assert(in_first_rel);
00384 
00385             /* Find or build hashtable entry for this tuple's group */
00386             entry = (SetOpHashEntry)
00387                 LookupTupleHashEntry(setopstate->hashtable, outerslot, &isnew);
00388 
00389             /* If new tuple group, initialize counts */
00390             if (isnew)
00391                 initialize_counts(&entry->pergroup);
00392 
00393             /* Advance the counts */
00394             advance_counts(&entry->pergroup, flag);
00395         }
00396         else
00397         {
00398             /* reached second relation */
00399             in_first_rel = false;
00400 
00401             /* For tuples not seen previously, do not make hashtable entry */
00402             entry = (SetOpHashEntry)
00403                 LookupTupleHashEntry(setopstate->hashtable, outerslot, NULL);
00404 
00405             /* Advance the counts if entry is already present */
00406             if (entry)
00407                 advance_counts(&entry->pergroup, flag);
00408         }
00409 
00410         /* Must reset temp context after each hashtable lookup */
00411         MemoryContextReset(setopstate->tempContext);
00412     }
00413 
00414     setopstate->table_filled = true;
00415     /* Initialize to walk the hash table */
00416     ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
00417 }
00418 
00419 /*
00420  * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
00421  */
00422 static TupleTableSlot *
00423 setop_retrieve_hash_table(SetOpState *setopstate)
00424 {
00425     SetOpHashEntry entry;
00426     TupleTableSlot *resultTupleSlot;
00427 
00428     /*
00429      * get state info from node
00430      */
00431     resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
00432 
00433     /*
00434      * We loop retrieving groups until we find one we should return
00435      */
00436     while (!setopstate->setop_done)
00437     {
00438         /*
00439          * Find the next entry in the hash table
00440          */
00441         entry = (SetOpHashEntry) ScanTupleHashTable(&setopstate->hashiter);
00442         if (entry == NULL)
00443         {
00444             /* No more entries in hashtable, so done */
00445             setopstate->setop_done = true;
00446             return NULL;
00447         }
00448 
00449         /*
00450          * See if we should emit any copies of this tuple, and if so return
00451          * the first copy.
00452          */
00453         set_output_count(setopstate, &entry->pergroup);
00454 
00455         if (setopstate->numOutput > 0)
00456         {
00457             setopstate->numOutput--;
00458             return ExecStoreMinimalTuple(entry->shared.firstTuple,
00459                                          resultTupleSlot,
00460                                          false);
00461         }
00462     }
00463 
00464     /* No more groups */
00465     ExecClearTuple(resultTupleSlot);
00466     return NULL;
00467 }
00468 
00469 /* ----------------------------------------------------------------
00470  *      ExecInitSetOp
00471  *
00472  *      This initializes the setop node state structures and
00473  *      the node's subplan.
00474  * ----------------------------------------------------------------
00475  */
00476 SetOpState *
00477 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
00478 {
00479     SetOpState *setopstate;
00480 
00481     /* check for unsupported flags */
00482     Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
00483 
00484     /*
00485      * create state structure
00486      */
00487     setopstate = makeNode(SetOpState);
00488     setopstate->ps.plan = (Plan *) node;
00489     setopstate->ps.state = estate;
00490 
00491     setopstate->eqfunctions = NULL;
00492     setopstate->hashfunctions = NULL;
00493     setopstate->setop_done = false;
00494     setopstate->numOutput = 0;
00495     setopstate->pergroup = NULL;
00496     setopstate->grp_firstTuple = NULL;
00497     setopstate->hashtable = NULL;
00498     setopstate->tableContext = NULL;
00499 
00500     /*
00501      * Miscellaneous initialization
00502      *
00503      * SetOp nodes have no ExprContext initialization because they never call
00504      * ExecQual or ExecProject.  But they do need a per-tuple memory context
00505      * anyway for calling execTuplesMatch.
00506      */
00507     setopstate->tempContext =
00508         AllocSetContextCreate(CurrentMemoryContext,
00509                               "SetOp",
00510                               ALLOCSET_DEFAULT_MINSIZE,
00511                               ALLOCSET_DEFAULT_INITSIZE,
00512                               ALLOCSET_DEFAULT_MAXSIZE);
00513 
00514     /*
00515      * If hashing, we also need a longer-lived context to store the hash
00516      * table.  The table can't just be kept in the per-query context because
00517      * we want to be able to throw it away in ExecReScanSetOp.
00518      */
00519     if (node->strategy == SETOP_HASHED)
00520         setopstate->tableContext =
00521             AllocSetContextCreate(CurrentMemoryContext,
00522                                   "SetOp hash table",
00523                                   ALLOCSET_DEFAULT_MINSIZE,
00524                                   ALLOCSET_DEFAULT_INITSIZE,
00525                                   ALLOCSET_DEFAULT_MAXSIZE);
00526 
00527     /*
00528      * Tuple table initialization
00529      */
00530     ExecInitResultTupleSlot(estate, &setopstate->ps);
00531 
00532     /*
00533      * initialize child nodes
00534      *
00535      * If we are hashing then the child plan does not need to handle REWIND
00536      * efficiently; see ExecReScanSetOp.
00537      */
00538     if (node->strategy == SETOP_HASHED)
00539         eflags &= ~EXEC_FLAG_REWIND;
00540     outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
00541 
00542     /*
00543      * setop nodes do no projections, so initialize projection info for this
00544      * node appropriately
00545      */
00546     ExecAssignResultTypeFromTL(&setopstate->ps);
00547     setopstate->ps.ps_ProjInfo = NULL;
00548 
00549     /*
00550      * Precompute fmgr lookup data for inner loop. We need both equality and
00551      * hashing functions to do it by hashing, but only equality if not
00552      * hashing.
00553      */
00554     if (node->strategy == SETOP_HASHED)
00555         execTuplesHashPrepare(node->numCols,
00556                               node->dupOperators,
00557                               &setopstate->eqfunctions,
00558                               &setopstate->hashfunctions);
00559     else
00560         setopstate->eqfunctions =
00561             execTuplesMatchPrepare(node->numCols,
00562                                    node->dupOperators);
00563 
00564     if (node->strategy == SETOP_HASHED)
00565     {
00566         build_hash_table(setopstate);
00567         setopstate->table_filled = false;
00568     }
00569     else
00570     {
00571         setopstate->pergroup =
00572             (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
00573     }
00574 
00575     return setopstate;
00576 }
00577 
00578 /* ----------------------------------------------------------------
00579  *      ExecEndSetOp
00580  *
00581  *      This shuts down the subplan and frees resources allocated
00582  *      to this node.
00583  * ----------------------------------------------------------------
00584  */
00585 void
00586 ExecEndSetOp(SetOpState *node)
00587 {
00588     /* clean up tuple table */
00589     ExecClearTuple(node->ps.ps_ResultTupleSlot);
00590 
00591     /* free subsidiary stuff including hashtable */
00592     MemoryContextDelete(node->tempContext);
00593     if (node->tableContext)
00594         MemoryContextDelete(node->tableContext);
00595 
00596     ExecEndNode(outerPlanState(node));
00597 }
00598 
00599 
00600 void
00601 ExecReScanSetOp(SetOpState *node)
00602 {
00603     ExecClearTuple(node->ps.ps_ResultTupleSlot);
00604     node->setop_done = false;
00605     node->numOutput = 0;
00606 
00607     if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
00608     {
00609         /*
00610          * In the hashed case, if we haven't yet built the hash table then we
00611          * can just return; nothing done yet, so nothing to undo. If subnode's
00612          * chgParam is not NULL then it will be re-scanned by ExecProcNode,
00613          * else no reason to re-scan it at all.
00614          */
00615         if (!node->table_filled)
00616             return;
00617 
00618         /*
00619          * If we do have the hash table and the subplan does not have any
00620          * parameter changes, then we can just rescan the existing hash table;
00621          * no need to build it again.
00622          */
00623         if (node->ps.lefttree->chgParam == NULL)
00624         {
00625             ResetTupleHashIterator(node->hashtable, &node->hashiter);
00626             return;
00627         }
00628     }
00629 
00630     /* Release first tuple of group, if we have made a copy */
00631     if (node->grp_firstTuple != NULL)
00632     {
00633         heap_freetuple(node->grp_firstTuple);
00634         node->grp_firstTuple = NULL;
00635     }
00636 
00637     /* Release any hashtable storage */
00638     if (node->tableContext)
00639         MemoryContextResetAndDeleteChildren(node->tableContext);
00640 
00641     /* And rebuild empty hashtable if needed */
00642     if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
00643     {
00644         build_hash_table(node);
00645         node->table_filled = false;
00646     }
00647 
00648     /*
00649      * if chgParam of subnode is not null then plan will be re-scanned by
00650      * first ExecProcNode.
00651      */
00652     if (node->ps.lefttree->chgParam == NULL)
00653         ExecReScan(node->ps.lefttree);
00654 }