Header And Logo

PostgreSQL
| The world's most advanced open source database.

nodeHashjoin.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * nodeHashjoin.c
00004  *    Routines to handle hash join nodes
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  * Portions Copyright (c) 1994, Regents of the University of California
00008  *
00009  *
00010  * IDENTIFICATION
00011  *    src/backend/executor/nodeHashjoin.c
00012  *
00013  *-------------------------------------------------------------------------
00014  */
00015 
00016 #include "postgres.h"
00017 
00018 #include "access/htup_details.h"
00019 #include "executor/executor.h"
00020 #include "executor/hashjoin.h"
00021 #include "executor/nodeHash.h"
00022 #include "executor/nodeHashjoin.h"
00023 #include "miscadmin.h"
00024 #include "utils/memutils.h"
00025 
00026 
00027 /*
00028  * States of the ExecHashJoin state machine
00029  */
00030 #define HJ_BUILD_HASHTABLE      1
00031 #define HJ_NEED_NEW_OUTER       2
00032 #define HJ_SCAN_BUCKET          3
00033 #define HJ_FILL_OUTER_TUPLE     4
00034 #define HJ_FILL_INNER_TUPLES    5
00035 #define HJ_NEED_NEW_BATCH       6
00036 
00037 /* Returns true if doing null-fill on outer relation */
00038 #define HJ_FILL_OUTER(hjstate)  ((hjstate)->hj_NullInnerTupleSlot != NULL)
00039 /* Returns true if doing null-fill on inner relation */
00040 #define HJ_FILL_INNER(hjstate)  ((hjstate)->hj_NullOuterTupleSlot != NULL)
00041 
00042 static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode,
00043                           HashJoinState *hjstate,
00044                           uint32 *hashvalue);
00045 static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
00046                           BufFile *file,
00047                           uint32 *hashvalue,
00048                           TupleTableSlot *tupleSlot);
00049 static bool ExecHashJoinNewBatch(HashJoinState *hjstate);
00050 
00051 
00052 /* ----------------------------------------------------------------
00053  *      ExecHashJoin
00054  *
00055  *      This function implements the Hybrid Hashjoin algorithm.
00056  *
00057  *      Note: the relation we build hash table on is the "inner"
00058  *            the other one is "outer".
00059  * ----------------------------------------------------------------
00060  */
00061 TupleTableSlot *                /* return: a tuple or NULL */
00062 ExecHashJoin(HashJoinState *node)
00063 {
00064     PlanState  *outerNode;
00065     HashState  *hashNode;
00066     List       *joinqual;
00067     List       *otherqual;
00068     ExprContext *econtext;
00069     ExprDoneCond isDone;
00070     HashJoinTable hashtable;
00071     TupleTableSlot *outerTupleSlot;
00072     uint32      hashvalue;
00073     int         batchno;
00074 
00075     /*
00076      * get information from HashJoin node
00077      */
00078     joinqual = node->js.joinqual;
00079     otherqual = node->js.ps.qual;
00080     hashNode = (HashState *) innerPlanState(node);
00081     outerNode = outerPlanState(node);
00082     hashtable = node->hj_HashTable;
00083     econtext = node->js.ps.ps_ExprContext;
00084 
00085     /*
00086      * Check to see if we're still projecting out tuples from a previous join
00087      * tuple (because there is a function-returning-set in the projection
00088      * expressions).  If so, try to project another one.
00089      */
00090     if (node->js.ps.ps_TupFromTlist)
00091     {
00092         TupleTableSlot *result;
00093 
00094         result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
00095         if (isDone == ExprMultipleResult)
00096             return result;
00097         /* Done with that source tuple... */
00098         node->js.ps.ps_TupFromTlist = false;
00099     }
00100 
00101     /*
00102      * Reset per-tuple memory context to free any expression evaluation
00103      * storage allocated in the previous tuple cycle.  Note this can't happen
00104      * until we're done projecting out tuples from a join tuple.
00105      */
00106     ResetExprContext(econtext);
00107 
00108     /*
00109      * run the hash join state machine
00110      */
00111     for (;;)
00112     {
00113         switch (node->hj_JoinState)
00114         {
00115             case HJ_BUILD_HASHTABLE:
00116 
00117                 /*
00118                  * First time through: build hash table for inner relation.
00119                  */
00120                 Assert(hashtable == NULL);
00121 
00122                 /*
00123                  * If the outer relation is completely empty, and it's not
00124                  * right/full join, we can quit without building the hash
00125                  * table.  However, for an inner join it is only a win to
00126                  * check this when the outer relation's startup cost is less
00127                  * than the projected cost of building the hash table.
00128                  * Otherwise it's best to build the hash table first and see
00129                  * if the inner relation is empty.  (When it's a left join, we
00130                  * should always make this check, since we aren't going to be
00131                  * able to skip the join on the strength of an empty inner
00132                  * relation anyway.)
00133                  *
00134                  * If we are rescanning the join, we make use of information
00135                  * gained on the previous scan: don't bother to try the
00136                  * prefetch if the previous scan found the outer relation
00137                  * nonempty. This is not 100% reliable since with new
00138                  * parameters the outer relation might yield different
00139                  * results, but it's a good heuristic.
00140                  *
00141                  * The only way to make the check is to try to fetch a tuple
00142                  * from the outer plan node.  If we succeed, we have to stash
00143                  * it away for later consumption by ExecHashJoinOuterGetTuple.
00144                  */
00145                 if (HJ_FILL_INNER(node))
00146                 {
00147                     /* no chance to not build the hash table */
00148                     node->hj_FirstOuterTupleSlot = NULL;
00149                 }
00150                 else if (HJ_FILL_OUTER(node) ||
00151                          (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
00152                           !node->hj_OuterNotEmpty))
00153                 {
00154                     node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
00155                     if (TupIsNull(node->hj_FirstOuterTupleSlot))
00156                     {
00157                         node->hj_OuterNotEmpty = false;
00158                         return NULL;
00159                     }
00160                     else
00161                         node->hj_OuterNotEmpty = true;
00162                 }
00163                 else
00164                     node->hj_FirstOuterTupleSlot = NULL;
00165 
00166                 /*
00167                  * create the hash table
00168                  */
00169                 hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
00170                                                 node->hj_HashOperators,
00171                                                 HJ_FILL_INNER(node));
00172                 node->hj_HashTable = hashtable;
00173 
00174                 /*
00175                  * execute the Hash node, to build the hash table
00176                  */
00177                 hashNode->hashtable = hashtable;
00178                 (void) MultiExecProcNode((PlanState *) hashNode);
00179 
00180                 /*
00181                  * If the inner relation is completely empty, and we're not
00182                  * doing a left outer join, we can quit without scanning the
00183                  * outer relation.
00184                  */
00185                 if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
00186                     return NULL;
00187 
00188                 /*
00189                  * need to remember whether nbatch has increased since we
00190                  * began scanning the outer relation
00191                  */
00192                 hashtable->nbatch_outstart = hashtable->nbatch;
00193 
00194                 /*
00195                  * Reset OuterNotEmpty for scan.  (It's OK if we fetched a
00196                  * tuple above, because ExecHashJoinOuterGetTuple will
00197                  * immediately set it again.)
00198                  */
00199                 node->hj_OuterNotEmpty = false;
00200 
00201                 node->hj_JoinState = HJ_NEED_NEW_OUTER;
00202 
00203                 /* FALL THRU */
00204 
00205             case HJ_NEED_NEW_OUTER:
00206 
00207                 /*
00208                  * We don't have an outer tuple, try to get the next one
00209                  */
00210                 outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
00211                                                            node,
00212                                                            &hashvalue);
00213                 if (TupIsNull(outerTupleSlot))
00214                 {
00215                     /* end of batch, or maybe whole join */
00216                     if (HJ_FILL_INNER(node))
00217                     {
00218                         /* set up to scan for unmatched inner tuples */
00219                         ExecPrepHashTableForUnmatched(node);
00220                         node->hj_JoinState = HJ_FILL_INNER_TUPLES;
00221                     }
00222                     else
00223                         node->hj_JoinState = HJ_NEED_NEW_BATCH;
00224                     continue;
00225                 }
00226 
00227                 econtext->ecxt_outertuple = outerTupleSlot;
00228                 node->hj_MatchedOuter = false;
00229 
00230                 /*
00231                  * Find the corresponding bucket for this tuple in the main
00232                  * hash table or skew hash table.
00233                  */
00234                 node->hj_CurHashValue = hashvalue;
00235                 ExecHashGetBucketAndBatch(hashtable, hashvalue,
00236                                           &node->hj_CurBucketNo, &batchno);
00237                 node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
00238                                                                  hashvalue);
00239                 node->hj_CurTuple = NULL;
00240 
00241                 /*
00242                  * The tuple might not belong to the current batch (where
00243                  * "current batch" includes the skew buckets if any).
00244                  */
00245                 if (batchno != hashtable->curbatch &&
00246                     node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO)
00247                 {
00248                     /*
00249                      * Need to postpone this outer tuple to a later batch.
00250                      * Save it in the corresponding outer-batch file.
00251                      */
00252                     Assert(batchno > hashtable->curbatch);
00253                     ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
00254                                           hashvalue,
00255                                         &hashtable->outerBatchFile[batchno]);
00256                     /* Loop around, staying in HJ_NEED_NEW_OUTER state */
00257                     continue;
00258                 }
00259 
00260                 /* OK, let's scan the bucket for matches */
00261                 node->hj_JoinState = HJ_SCAN_BUCKET;
00262 
00263                 /* FALL THRU */
00264 
00265             case HJ_SCAN_BUCKET:
00266 
00267                 /*
00268                  * We check for interrupts here because this corresponds to
00269                  * where we'd fetch a row from a child plan node in other join
00270                  * types.
00271                  */
00272                 CHECK_FOR_INTERRUPTS();
00273 
00274                 /*
00275                  * Scan the selected hash bucket for matches to current outer
00276                  */
00277                 if (!ExecScanHashBucket(node, econtext))
00278                 {
00279                     /* out of matches; check for possible outer-join fill */
00280                     node->hj_JoinState = HJ_FILL_OUTER_TUPLE;
00281                     continue;
00282                 }
00283 
00284                 /*
00285                  * We've got a match, but still need to test non-hashed quals.
00286                  * ExecScanHashBucket already set up all the state needed to
00287                  * call ExecQual.
00288                  *
00289                  * If we pass the qual, then save state for next call and have
00290                  * ExecProject form the projection, store it in the tuple
00291                  * table, and return the slot.
00292                  *
00293                  * Only the joinquals determine tuple match status, but all
00294                  * quals must pass to actually return the tuple.
00295                  */
00296                 if (joinqual == NIL || ExecQual(joinqual, econtext, false))
00297                 {
00298                     node->hj_MatchedOuter = true;
00299                     HeapTupleHeaderSetMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple));
00300 
00301                     /* In an antijoin, we never return a matched tuple */
00302                     if (node->js.jointype == JOIN_ANTI)
00303                     {
00304                         node->hj_JoinState = HJ_NEED_NEW_OUTER;
00305                         continue;
00306                     }
00307 
00308                     /*
00309                      * In a semijoin, we'll consider returning the first
00310                      * match, but after that we're done with this outer tuple.
00311                      */
00312                     if (node->js.jointype == JOIN_SEMI)
00313                         node->hj_JoinState = HJ_NEED_NEW_OUTER;
00314 
00315                     if (otherqual == NIL ||
00316                         ExecQual(otherqual, econtext, false))
00317                     {
00318                         TupleTableSlot *result;
00319 
00320                         result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
00321 
00322                         if (isDone != ExprEndResult)
00323                         {
00324                             node->js.ps.ps_TupFromTlist =
00325                                 (isDone == ExprMultipleResult);
00326                             return result;
00327                         }
00328                     }
00329                     else
00330                         InstrCountFiltered2(node, 1);
00331                 }
00332                 else
00333                     InstrCountFiltered1(node, 1);
00334                 break;
00335 
00336             case HJ_FILL_OUTER_TUPLE:
00337 
00338                 /*
00339                  * The current outer tuple has run out of matches, so check
00340                  * whether to emit a dummy outer-join tuple.  Whether we emit
00341                  * one or not, the next state is NEED_NEW_OUTER.
00342                  */
00343                 node->hj_JoinState = HJ_NEED_NEW_OUTER;
00344 
00345                 if (!node->hj_MatchedOuter &&
00346                     HJ_FILL_OUTER(node))
00347                 {
00348                     /*
00349                      * Generate a fake join tuple with nulls for the inner
00350                      * tuple, and return it if it passes the non-join quals.
00351                      */
00352                     econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
00353 
00354                     if (otherqual == NIL ||
00355                         ExecQual(otherqual, econtext, false))
00356                     {
00357                         TupleTableSlot *result;
00358 
00359                         result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
00360 
00361                         if (isDone != ExprEndResult)
00362                         {
00363                             node->js.ps.ps_TupFromTlist =
00364                                 (isDone == ExprMultipleResult);
00365                             return result;
00366                         }
00367                     }
00368                     else
00369                         InstrCountFiltered2(node, 1);
00370                 }
00371                 break;
00372 
00373             case HJ_FILL_INNER_TUPLES:
00374 
00375                 /*
00376                  * We have finished a batch, but we are doing right/full join,
00377                  * so any unmatched inner tuples in the hashtable have to be
00378                  * emitted before we continue to the next batch.
00379                  */
00380                 if (!ExecScanHashTableForUnmatched(node, econtext))
00381                 {
00382                     /* no more unmatched tuples */
00383                     node->hj_JoinState = HJ_NEED_NEW_BATCH;
00384                     continue;
00385                 }
00386 
00387                 /*
00388                  * Generate a fake join tuple with nulls for the outer tuple,
00389                  * and return it if it passes the non-join quals.
00390                  */
00391                 econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
00392 
00393                 if (otherqual == NIL ||
00394                     ExecQual(otherqual, econtext, false))
00395                 {
00396                     TupleTableSlot *result;
00397 
00398                     result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
00399 
00400                     if (isDone != ExprEndResult)
00401                     {
00402                         node->js.ps.ps_TupFromTlist =
00403                             (isDone == ExprMultipleResult);
00404                         return result;
00405                     }
00406                 }
00407                 else
00408                     InstrCountFiltered2(node, 1);
00409                 break;
00410 
00411             case HJ_NEED_NEW_BATCH:
00412 
00413                 /*
00414                  * Try to advance to next batch.  Done if there are no more.
00415                  */
00416                 if (!ExecHashJoinNewBatch(node))
00417                     return NULL;    /* end of join */
00418                 node->hj_JoinState = HJ_NEED_NEW_OUTER;
00419                 break;
00420 
00421             default:
00422                 elog(ERROR, "unrecognized hashjoin state: %d",
00423                      (int) node->hj_JoinState);
00424         }
00425     }
00426 }
00427 
00428 /* ----------------------------------------------------------------
00429  *      ExecInitHashJoin
00430  *
00431  *      Init routine for HashJoin node.
00432  * ----------------------------------------------------------------
00433  */
00434 HashJoinState *
00435 ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
00436 {
00437     HashJoinState *hjstate;
00438     Plan       *outerNode;
00439     Hash       *hashNode;
00440     List       *lclauses;
00441     List       *rclauses;
00442     List       *hoperators;
00443     ListCell   *l;
00444 
00445     /* check for unsupported flags */
00446     Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
00447 
00448     /*
00449      * create state structure
00450      */
00451     hjstate = makeNode(HashJoinState);
00452     hjstate->js.ps.plan = (Plan *) node;
00453     hjstate->js.ps.state = estate;
00454 
00455     /*
00456      * Miscellaneous initialization
00457      *
00458      * create expression context for node
00459      */
00460     ExecAssignExprContext(estate, &hjstate->js.ps);
00461 
00462     /*
00463      * initialize child expressions
00464      */
00465     hjstate->js.ps.targetlist = (List *)
00466         ExecInitExpr((Expr *) node->join.plan.targetlist,
00467                      (PlanState *) hjstate);
00468     hjstate->js.ps.qual = (List *)
00469         ExecInitExpr((Expr *) node->join.plan.qual,
00470                      (PlanState *) hjstate);
00471     hjstate->js.jointype = node->join.jointype;
00472     hjstate->js.joinqual = (List *)
00473         ExecInitExpr((Expr *) node->join.joinqual,
00474                      (PlanState *) hjstate);
00475     hjstate->hashclauses = (List *)
00476         ExecInitExpr((Expr *) node->hashclauses,
00477                      (PlanState *) hjstate);
00478 
00479     /*
00480      * initialize child nodes
00481      *
00482      * Note: we could suppress the REWIND flag for the inner input, which
00483      * would amount to betting that the hash will be a single batch.  Not
00484      * clear if this would be a win or not.
00485      */
00486     outerNode = outerPlan(node);
00487     hashNode = (Hash *) innerPlan(node);
00488 
00489     outerPlanState(hjstate) = ExecInitNode(outerNode, estate, eflags);
00490     innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags);
00491 
00492     /*
00493      * tuple table initialization
00494      */
00495     ExecInitResultTupleSlot(estate, &hjstate->js.ps);
00496     hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
00497 
00498     /* set up null tuples for outer joins, if needed */
00499     switch (node->join.jointype)
00500     {
00501         case JOIN_INNER:
00502         case JOIN_SEMI:
00503             break;
00504         case JOIN_LEFT:
00505         case JOIN_ANTI:
00506             hjstate->hj_NullInnerTupleSlot =
00507                 ExecInitNullTupleSlot(estate,
00508                                  ExecGetResultType(innerPlanState(hjstate)));
00509             break;
00510         case JOIN_RIGHT:
00511             hjstate->hj_NullOuterTupleSlot =
00512                 ExecInitNullTupleSlot(estate,
00513                                  ExecGetResultType(outerPlanState(hjstate)));
00514             break;
00515         case JOIN_FULL:
00516             hjstate->hj_NullOuterTupleSlot =
00517                 ExecInitNullTupleSlot(estate,
00518                                  ExecGetResultType(outerPlanState(hjstate)));
00519             hjstate->hj_NullInnerTupleSlot =
00520                 ExecInitNullTupleSlot(estate,
00521                                  ExecGetResultType(innerPlanState(hjstate)));
00522             break;
00523         default:
00524             elog(ERROR, "unrecognized join type: %d",
00525                  (int) node->join.jointype);
00526     }
00527 
00528     /*
00529      * now for some voodoo.  our temporary tuple slot is actually the result
00530      * tuple slot of the Hash node (which is our inner plan).  we can do this
00531      * because Hash nodes don't return tuples via ExecProcNode() -- instead
00532      * the hash join node uses ExecScanHashBucket() to get at the contents of
00533      * the hash table.  -cim 6/9/91
00534      */
00535     {
00536         HashState  *hashstate = (HashState *) innerPlanState(hjstate);
00537         TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
00538 
00539         hjstate->hj_HashTupleSlot = slot;
00540     }
00541 
00542     /*
00543      * initialize tuple type and projection info
00544      */
00545     ExecAssignResultTypeFromTL(&hjstate->js.ps);
00546     ExecAssignProjectionInfo(&hjstate->js.ps, NULL);
00547 
00548     ExecSetSlotDescriptor(hjstate->hj_OuterTupleSlot,
00549                           ExecGetResultType(outerPlanState(hjstate)));
00550 
00551     /*
00552      * initialize hash-specific info
00553      */
00554     hjstate->hj_HashTable = NULL;
00555     hjstate->hj_FirstOuterTupleSlot = NULL;
00556 
00557     hjstate->hj_CurHashValue = 0;
00558     hjstate->hj_CurBucketNo = 0;
00559     hjstate->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO;
00560     hjstate->hj_CurTuple = NULL;
00561 
00562     /*
00563      * Deconstruct the hash clauses into outer and inner argument values, so
00564      * that we can evaluate those subexpressions separately.  Also make a list
00565      * of the hash operator OIDs, in preparation for looking up the hash
00566      * functions to use.
00567      */
00568     lclauses = NIL;
00569     rclauses = NIL;
00570     hoperators = NIL;
00571     foreach(l, hjstate->hashclauses)
00572     {
00573         FuncExprState *fstate = (FuncExprState *) lfirst(l);
00574         OpExpr     *hclause;
00575 
00576         Assert(IsA(fstate, FuncExprState));
00577         hclause = (OpExpr *) fstate->xprstate.expr;
00578         Assert(IsA(hclause, OpExpr));
00579         lclauses = lappend(lclauses, linitial(fstate->args));
00580         rclauses = lappend(rclauses, lsecond(fstate->args));
00581         hoperators = lappend_oid(hoperators, hclause->opno);
00582     }
00583     hjstate->hj_OuterHashKeys = lclauses;
00584     hjstate->hj_InnerHashKeys = rclauses;
00585     hjstate->hj_HashOperators = hoperators;
00586     /* child Hash node needs to evaluate inner hash keys, too */
00587     ((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;
00588 
00589     hjstate->js.ps.ps_TupFromTlist = false;
00590     hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
00591     hjstate->hj_MatchedOuter = false;
00592     hjstate->hj_OuterNotEmpty = false;
00593 
00594     return hjstate;
00595 }
00596 
00597 /* ----------------------------------------------------------------
00598  *      ExecEndHashJoin
00599  *
00600  *      clean up routine for HashJoin node
00601  * ----------------------------------------------------------------
00602  */
00603 void
00604 ExecEndHashJoin(HashJoinState *node)
00605 {
00606     /*
00607      * Free hash table
00608      */
00609     if (node->hj_HashTable)
00610     {
00611         ExecHashTableDestroy(node->hj_HashTable);
00612         node->hj_HashTable = NULL;
00613     }
00614 
00615     /*
00616      * Free the exprcontext
00617      */
00618     ExecFreeExprContext(&node->js.ps);
00619 
00620     /*
00621      * clean out the tuple table
00622      */
00623     ExecClearTuple(node->js.ps.ps_ResultTupleSlot);
00624     ExecClearTuple(node->hj_OuterTupleSlot);
00625     ExecClearTuple(node->hj_HashTupleSlot);
00626 
00627     /*
00628      * clean up subtrees
00629      */
00630     ExecEndNode(outerPlanState(node));
00631     ExecEndNode(innerPlanState(node));
00632 }
00633 
00634 /*
00635  * ExecHashJoinOuterGetTuple
00636  *
00637  *      get the next outer tuple for hashjoin: either by
00638  *      executing the outer plan node in the first pass, or from
00639  *      the temp files for the hashjoin batches.
00640  *
00641  * Returns a null slot if no more outer tuples (within the current batch).
00642  *
00643  * On success, the tuple's hash value is stored at *hashvalue --- this is
00644  * either originally computed, or re-read from the temp file.
00645  */
00646 static TupleTableSlot *
00647 ExecHashJoinOuterGetTuple(PlanState *outerNode,
00648                           HashJoinState *hjstate,
00649                           uint32 *hashvalue)
00650 {
00651     HashJoinTable hashtable = hjstate->hj_HashTable;
00652     int         curbatch = hashtable->curbatch;
00653     TupleTableSlot *slot;
00654 
00655     if (curbatch == 0)          /* if it is the first pass */
00656     {
00657         /*
00658          * Check to see if first outer tuple was already fetched by
00659          * ExecHashJoin() and not used yet.
00660          */
00661         slot = hjstate->hj_FirstOuterTupleSlot;
00662         if (!TupIsNull(slot))
00663             hjstate->hj_FirstOuterTupleSlot = NULL;
00664         else
00665             slot = ExecProcNode(outerNode);
00666 
00667         while (!TupIsNull(slot))
00668         {
00669             /*
00670              * We have to compute the tuple's hash value.
00671              */
00672             ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
00673 
00674             econtext->ecxt_outertuple = slot;
00675             if (ExecHashGetHashValue(hashtable, econtext,
00676                                      hjstate->hj_OuterHashKeys,
00677                                      true,      /* outer tuple */
00678                                      HJ_FILL_OUTER(hjstate),
00679                                      hashvalue))
00680             {
00681                 /* remember outer relation is not empty for possible rescan */
00682                 hjstate->hj_OuterNotEmpty = true;
00683 
00684                 return slot;
00685             }
00686 
00687             /*
00688              * That tuple couldn't match because of a NULL, so discard it and
00689              * continue with the next one.
00690              */
00691             slot = ExecProcNode(outerNode);
00692         }
00693     }
00694     else if (curbatch < hashtable->nbatch)
00695     {
00696         BufFile    *file = hashtable->outerBatchFile[curbatch];
00697 
00698         /*
00699          * In outer-join cases, we could get here even though the batch file
00700          * is empty.
00701          */
00702         if (file == NULL)
00703             return NULL;
00704 
00705         slot = ExecHashJoinGetSavedTuple(hjstate,
00706                                          file,
00707                                          hashvalue,
00708                                          hjstate->hj_OuterTupleSlot);
00709         if (!TupIsNull(slot))
00710             return slot;
00711     }
00712 
00713     /* End of this batch */
00714     return NULL;
00715 }
00716 
00717 /*
00718  * ExecHashJoinNewBatch
00719  *      switch to a new hashjoin batch
00720  *
00721  * Returns true if successful, false if there are no more batches.
00722  */
00723 static bool
00724 ExecHashJoinNewBatch(HashJoinState *hjstate)
00725 {
00726     HashJoinTable hashtable = hjstate->hj_HashTable;
00727     int         nbatch;
00728     int         curbatch;
00729     BufFile    *innerFile;
00730     TupleTableSlot *slot;
00731     uint32      hashvalue;
00732 
00733     nbatch = hashtable->nbatch;
00734     curbatch = hashtable->curbatch;
00735 
00736     if (curbatch > 0)
00737     {
00738         /*
00739          * We no longer need the previous outer batch file; close it right
00740          * away to free disk space.
00741          */
00742         if (hashtable->outerBatchFile[curbatch])
00743             BufFileClose(hashtable->outerBatchFile[curbatch]);
00744         hashtable->outerBatchFile[curbatch] = NULL;
00745     }
00746     else    /* we just finished the first batch */
00747     {
00748         /*
00749          * Reset some of the skew optimization state variables, since we no
00750          * longer need to consider skew tuples after the first batch. The
00751          * memory context reset we are about to do will release the skew
00752          * hashtable itself.
00753          */
00754         hashtable->skewEnabled = false;
00755         hashtable->skewBucket = NULL;
00756         hashtable->skewBucketNums = NULL;
00757         hashtable->nSkewBuckets = 0;
00758         hashtable->spaceUsedSkew = 0;
00759     }
00760 
00761     /*
00762      * We can always skip over any batches that are completely empty on both
00763      * sides.  We can sometimes skip over batches that are empty on only one
00764      * side, but there are exceptions:
00765      *
00766      * 1. In a left/full outer join, we have to process outer batches even if
00767      * the inner batch is empty.  Similarly, in a right/full outer join, we
00768      * have to process inner batches even if the outer batch is empty.
00769      *
00770      * 2. If we have increased nbatch since the initial estimate, we have to
00771      * scan inner batches since they might contain tuples that need to be
00772      * reassigned to later inner batches.
00773      *
00774      * 3. Similarly, if we have increased nbatch since starting the outer
00775      * scan, we have to rescan outer batches in case they contain tuples that
00776      * need to be reassigned.
00777      */
00778     curbatch++;
00779     while (curbatch < nbatch &&
00780            (hashtable->outerBatchFile[curbatch] == NULL ||
00781             hashtable->innerBatchFile[curbatch] == NULL))
00782     {
00783         if (hashtable->outerBatchFile[curbatch] &&
00784             HJ_FILL_OUTER(hjstate))
00785             break;              /* must process due to rule 1 */
00786         if (hashtable->innerBatchFile[curbatch] &&
00787             HJ_FILL_INNER(hjstate))
00788             break;              /* must process due to rule 1 */
00789         if (hashtable->innerBatchFile[curbatch] &&
00790             nbatch != hashtable->nbatch_original)
00791             break;              /* must process due to rule 2 */
00792         if (hashtable->outerBatchFile[curbatch] &&
00793             nbatch != hashtable->nbatch_outstart)
00794             break;              /* must process due to rule 3 */
00795         /* We can ignore this batch. */
00796         /* Release associated temp files right away. */
00797         if (hashtable->innerBatchFile[curbatch])
00798             BufFileClose(hashtable->innerBatchFile[curbatch]);
00799         hashtable->innerBatchFile[curbatch] = NULL;
00800         if (hashtable->outerBatchFile[curbatch])
00801             BufFileClose(hashtable->outerBatchFile[curbatch]);
00802         hashtable->outerBatchFile[curbatch] = NULL;
00803         curbatch++;
00804     }
00805 
00806     if (curbatch >= nbatch)
00807         return false;           /* no more batches */
00808 
00809     hashtable->curbatch = curbatch;
00810 
00811     /*
00812      * Reload the hash table with the new inner batch (which could be empty)
00813      */
00814     ExecHashTableReset(hashtable);
00815 
00816     innerFile = hashtable->innerBatchFile[curbatch];
00817 
00818     if (innerFile != NULL)
00819     {
00820         if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
00821             ereport(ERROR,
00822                     (errcode_for_file_access(),
00823                    errmsg("could not rewind hash-join temporary file: %m")));
00824 
00825         while ((slot = ExecHashJoinGetSavedTuple(hjstate,
00826                                                  innerFile,
00827                                                  &hashvalue,
00828                                                  hjstate->hj_HashTupleSlot)))
00829         {
00830             /*
00831              * NOTE: some tuples may be sent to future batches.  Also, it is
00832              * possible for hashtable->nbatch to be increased here!
00833              */
00834             ExecHashTableInsert(hashtable, slot, hashvalue);
00835         }
00836 
00837         /*
00838          * after we build the hash table, the inner batch file is no longer
00839          * needed
00840          */
00841         BufFileClose(innerFile);
00842         hashtable->innerBatchFile[curbatch] = NULL;
00843     }
00844 
00845     /*
00846      * Rewind outer batch file (if present), so that we can start reading it.
00847      */
00848     if (hashtable->outerBatchFile[curbatch] != NULL)
00849     {
00850         if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
00851             ereport(ERROR,
00852                     (errcode_for_file_access(),
00853                    errmsg("could not rewind hash-join temporary file: %m")));
00854     }
00855 
00856     return true;
00857 }
00858 
00859 /*
00860  * ExecHashJoinSaveTuple
00861  *      save a tuple to a batch file.
00862  *
00863  * The data recorded in the file for each tuple is its hash value,
00864  * then the tuple in MinimalTuple format.
00865  *
00866  * Note: it is important always to call this in the regular executor
00867  * context, not in a shorter-lived context; else the temp file buffers
00868  * will get messed up.
00869  */
00870 void
00871 ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
00872                       BufFile **fileptr)
00873 {
00874     BufFile    *file = *fileptr;
00875     size_t      written;
00876 
00877     if (file == NULL)
00878     {
00879         /* First write to this batch file, so open it. */
00880         file = BufFileCreateTemp(false);
00881         *fileptr = file;
00882     }
00883 
00884     written = BufFileWrite(file, (void *) &hashvalue, sizeof(uint32));
00885     if (written != sizeof(uint32))
00886         ereport(ERROR,
00887                 (errcode_for_file_access(),
00888                  errmsg("could not write to hash-join temporary file: %m")));
00889 
00890     written = BufFileWrite(file, (void *) tuple, tuple->t_len);
00891     if (written != tuple->t_len)
00892         ereport(ERROR,
00893                 (errcode_for_file_access(),
00894                  errmsg("could not write to hash-join temporary file: %m")));
00895 }
00896 
00897 /*
00898  * ExecHashJoinGetSavedTuple
00899  *      read the next tuple from a batch file.  Return NULL if no more.
00900  *
00901  * On success, *hashvalue is set to the tuple's hash value, and the tuple
00902  * itself is stored in the given slot.
00903  */
00904 static TupleTableSlot *
00905 ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
00906                           BufFile *file,
00907                           uint32 *hashvalue,
00908                           TupleTableSlot *tupleSlot)
00909 {
00910     uint32      header[2];
00911     size_t      nread;
00912     MinimalTuple tuple;
00913 
00914     /*
00915      * Since both the hash value and the MinimalTuple length word are uint32,
00916      * we can read them both in one BufFileRead() call without any type
00917      * cheating.
00918      */
00919     nread = BufFileRead(file, (void *) header, sizeof(header));
00920     if (nread == 0)             /* end of file */
00921     {
00922         ExecClearTuple(tupleSlot);
00923         return NULL;
00924     }
00925     if (nread != sizeof(header))
00926         ereport(ERROR,
00927                 (errcode_for_file_access(),
00928                  errmsg("could not read from hash-join temporary file: %m")));
00929     *hashvalue = header[0];
00930     tuple = (MinimalTuple) palloc(header[1]);
00931     tuple->t_len = header[1];
00932     nread = BufFileRead(file,
00933                         (void *) ((char *) tuple + sizeof(uint32)),
00934                         header[1] - sizeof(uint32));
00935     if (nread != header[1] - sizeof(uint32))
00936         ereport(ERROR,
00937                 (errcode_for_file_access(),
00938                  errmsg("could not read from hash-join temporary file: %m")));
00939     return ExecStoreMinimalTuple(tuple, tupleSlot, true);
00940 }
00941 
00942 
00943 void
00944 ExecReScanHashJoin(HashJoinState *node)
00945 {
00946     /*
00947      * In a multi-batch join, we currently have to do rescans the hard way,
00948      * primarily because batch temp files may have already been released. But
00949      * if it's a single-batch join, and there is no parameter change for the
00950      * inner subnode, then we can just re-use the existing hash table without
00951      * rebuilding it.
00952      */
00953     if (node->hj_HashTable != NULL)
00954     {
00955         if (node->hj_HashTable->nbatch == 1 &&
00956             node->js.ps.righttree->chgParam == NULL)
00957         {
00958             /*
00959              * Okay to reuse the hash table; needn't rescan inner, either.
00960              *
00961              * However, if it's a right/full join, we'd better reset the
00962              * inner-tuple match flags contained in the table.
00963              */
00964             if (HJ_FILL_INNER(node))
00965                 ExecHashTableResetMatchFlags(node->hj_HashTable);
00966 
00967             /*
00968              * Also, we need to reset our state about the emptiness of the
00969              * outer relation, so that the new scan of the outer will update
00970              * it correctly if it turns out to be empty this time. (There's no
00971              * harm in clearing it now because ExecHashJoin won't need the
00972              * info.  In the other cases, where the hash table doesn't exist
00973              * or we are destroying it, we leave this state alone because
00974              * ExecHashJoin will need it the first time through.)
00975              */
00976             node->hj_OuterNotEmpty = false;
00977 
00978             /* ExecHashJoin can skip the BUILD_HASHTABLE step */
00979             node->hj_JoinState = HJ_NEED_NEW_OUTER;
00980         }
00981         else
00982         {
00983             /* must destroy and rebuild hash table */
00984             ExecHashTableDestroy(node->hj_HashTable);
00985             node->hj_HashTable = NULL;
00986             node->hj_JoinState = HJ_BUILD_HASHTABLE;
00987 
00988             /*
00989              * if chgParam of subnode is not null then plan will be re-scanned
00990              * by first ExecProcNode.
00991              */
00992             if (node->js.ps.righttree->chgParam == NULL)
00993                 ExecReScan(node->js.ps.righttree);
00994         }
00995     }
00996 
00997     /* Always reset intra-tuple state */
00998     node->hj_CurHashValue = 0;
00999     node->hj_CurBucketNo = 0;
01000     node->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO;
01001     node->hj_CurTuple = NULL;
01002 
01003     node->js.ps.ps_TupFromTlist = false;
01004     node->hj_MatchedOuter = false;
01005     node->hj_FirstOuterTupleSlot = NULL;
01006 
01007     /*
01008      * if chgParam of subnode is not null then plan will be re-scanned by
01009      * first ExecProcNode.
01010      */
01011     if (node->js.ps.lefttree->chgParam == NULL)
01012         ExecReScan(node->js.ps.lefttree);
01013 }