#include "postgres.h"#include "access/htup_details.h"#include "executor/executor.h"#include "executor/hashjoin.h"#include "executor/nodeHash.h"#include "executor/nodeHashjoin.h"#include "miscadmin.h"#include "utils/memutils.h"
Go to the source code of this file.
Defines | |
| #define | HJ_BUILD_HASHTABLE 1 |
| #define | HJ_NEED_NEW_OUTER 2 |
| #define | HJ_SCAN_BUCKET 3 |
| #define | HJ_FILL_OUTER_TUPLE 4 |
| #define | HJ_FILL_INNER_TUPLES 5 |
| #define | HJ_NEED_NEW_BATCH 6 |
| #define | HJ_FILL_OUTER(hjstate) ((hjstate)->hj_NullInnerTupleSlot != NULL) |
| #define | HJ_FILL_INNER(hjstate) ((hjstate)->hj_NullOuterTupleSlot != NULL) |
Functions | |
| static TupleTableSlot * | ExecHashJoinOuterGetTuple (PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue) |
| static TupleTableSlot * | ExecHashJoinGetSavedTuple (HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot) |
| static bool | ExecHashJoinNewBatch (HashJoinState *hjstate) |
| TupleTableSlot * | ExecHashJoin (HashJoinState *node) |
| HashJoinState * | ExecInitHashJoin (HashJoin *node, EState *estate, int eflags) |
| void | ExecEndHashJoin (HashJoinState *node) |
| void | ExecHashJoinSaveTuple (MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr) |
| void | ExecReScanHashJoin (HashJoinState *node) |
| #define HJ_BUILD_HASHTABLE 1 |
Definition at line 30 of file nodeHashjoin.c.
Referenced by ExecHashJoin().
| #define HJ_FILL_INNER | ( | hjstate | ) | ((hjstate)->hj_NullOuterTupleSlot != NULL) |
Definition at line 40 of file nodeHashjoin.c.
Referenced by ExecHashJoin(), ExecHashJoinNewBatch(), and ExecReScanHashJoin().
| #define HJ_FILL_INNER_TUPLES 5 |
Definition at line 34 of file nodeHashjoin.c.
Referenced by ExecHashJoin().
| #define HJ_FILL_OUTER | ( | hjstate | ) | ((hjstate)->hj_NullInnerTupleSlot != NULL) |
Definition at line 38 of file nodeHashjoin.c.
Referenced by ExecHashJoin(), ExecHashJoinNewBatch(), and ExecHashJoinOuterGetTuple().
| #define HJ_FILL_OUTER_TUPLE 4 |
Definition at line 33 of file nodeHashjoin.c.
Referenced by ExecHashJoin().
| #define HJ_NEED_NEW_BATCH 6 |
Definition at line 35 of file nodeHashjoin.c.
Referenced by ExecHashJoin().
| #define HJ_NEED_NEW_OUTER 2 |
Definition at line 31 of file nodeHashjoin.c.
Referenced by ExecHashJoin().
| #define HJ_SCAN_BUCKET 3 |
Definition at line 32 of file nodeHashjoin.c.
Referenced by ExecHashJoin().
| void ExecEndHashJoin | ( | HashJoinState * | node | ) |
Definition at line 604 of file nodeHashjoin.c.
References ExecClearTuple(), ExecEndNode(), ExecFreeExprContext(), ExecHashTableDestroy(), HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, HashJoinState::hj_OuterTupleSlot, innerPlanState, HashJoinState::js, outerPlanState, JoinState::ps, and PlanState::ps_ResultTupleSlot.
Referenced by ExecEndNode().
{
/*
* Free hash table
*/
if (node->hj_HashTable)
{
ExecHashTableDestroy(node->hj_HashTable);
node->hj_HashTable = NULL;
}
/*
* Free the exprcontext
*/
ExecFreeExprContext(&node->js.ps);
/*
* clean out the tuple table
*/
ExecClearTuple(node->js.ps.ps_ResultTupleSlot);
ExecClearTuple(node->hj_OuterTupleSlot);
ExecClearTuple(node->hj_HashTupleSlot);
/*
* clean up subtrees
*/
ExecEndNode(outerPlanState(node));
ExecEndNode(innerPlanState(node));
}
| TupleTableSlot* ExecHashJoin | ( | HashJoinState * | node | ) |
Definition at line 62 of file nodeHashjoin.c.
References Assert, CHECK_FOR_INTERRUPTS, HashJoinTableData::curbatch, ExprContext::ecxt_innertuple, ExprContext::ecxt_outertuple, elog, ERROR, ExecFetchSlotMinimalTuple(), ExecHashGetBucketAndBatch(), ExecHashGetSkewBucket(), ExecHashJoinNewBatch(), ExecHashJoinOuterGetTuple(), ExecHashJoinSaveTuple(), ExecHashTableCreate(), ExecPrepHashTableForUnmatched(), ExecProcNode(), ExecProject(), ExecQual(), ExecScanHashBucket(), ExecScanHashTableForUnmatched(), ExprEndResult, ExprMultipleResult, HashState::hashtable, HeapTupleHeaderSetMatch, HJ_BUILD_HASHTABLE, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HJ_FILL_INNER, HJ_FILL_INNER_TUPLES, HJ_FILL_OUTER, HJ_FILL_OUTER_TUPLE, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashOperators, HashJoinState::hj_HashTable, HashJoinState::hj_JoinState, HashJoinState::hj_MatchedOuter, HJ_NEED_NEW_BATCH, HJ_NEED_NEW_OUTER, HashJoinState::hj_NullInnerTupleSlot, HashJoinState::hj_NullOuterTupleSlot, HashJoinState::hj_OuterNotEmpty, HJ_SCAN_BUCKET, HJTUPLE_MINTUPLE, innerPlanState, InstrCountFiltered1, InstrCountFiltered2, INVALID_SKEW_BUCKET_NO, JOIN_ANTI, JOIN_SEMI, JoinState::joinqual, JoinState::jointype, HashJoinState::js, MultiExecProcNode(), HashJoinTableData::nbatch, HashJoinTableData::nbatch_outstart, NIL, NULL, HashJoinTableData::outerBatchFile, outerPlanState, PlanState::plan, HashState::ps, JoinState::ps, PlanState::ps_ExprContext, PlanState::ps_ProjInfo, PlanState::ps_TupFromTlist, PlanState::qual, ResetExprContext, Plan::startup_cost, Plan::total_cost, HashJoinTableData::totalTuples, and TupIsNull.
Referenced by ExecProcNode().
{
PlanState *outerNode;
HashState *hashNode;
List *joinqual;
List *otherqual;
ExprContext *econtext;
ExprDoneCond isDone;
HashJoinTable hashtable;
TupleTableSlot *outerTupleSlot;
uint32 hashvalue;
int batchno;
/*
* get information from HashJoin node
*/
joinqual = node->js.joinqual;
otherqual = node->js.ps.qual;
hashNode = (HashState *) innerPlanState(node);
outerNode = outerPlanState(node);
hashtable = node->hj_HashTable;
econtext = node->js.ps.ps_ExprContext;
/*
* Check to see if we're still projecting out tuples from a previous join
* tuple (because there is a function-returning-set in the projection
* expressions). If so, try to project another one.
*/
if (node->js.ps.ps_TupFromTlist)
{
TupleTableSlot *result;
result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
if (isDone == ExprMultipleResult)
return result;
/* Done with that source tuple... */
node->js.ps.ps_TupFromTlist = false;
}
/*
* Reset per-tuple memory context to free any expression evaluation
* storage allocated in the previous tuple cycle. Note this can't happen
* until we're done projecting out tuples from a join tuple.
*/
ResetExprContext(econtext);
/*
* run the hash join state machine
*/
for (;;)
{
switch (node->hj_JoinState)
{
case HJ_BUILD_HASHTABLE:
/*
* First time through: build hash table for inner relation.
*/
Assert(hashtable == NULL);
/*
* If the outer relation is completely empty, and it's not
* right/full join, we can quit without building the hash
* table. However, for an inner join it is only a win to
* check this when the outer relation's startup cost is less
* than the projected cost of building the hash table.
* Otherwise it's best to build the hash table first and see
* if the inner relation is empty. (When it's a left join, we
* should always make this check, since we aren't going to be
* able to skip the join on the strength of an empty inner
* relation anyway.)
*
* If we are rescanning the join, we make use of information
* gained on the previous scan: don't bother to try the
* prefetch if the previous scan found the outer relation
* nonempty. This is not 100% reliable since with new
* parameters the outer relation might yield different
* results, but it's a good heuristic.
*
* The only way to make the check is to try to fetch a tuple
* from the outer plan node. If we succeed, we have to stash
* it away for later consumption by ExecHashJoinOuterGetTuple.
*/
if (HJ_FILL_INNER(node))
{
/* no chance to not build the hash table */
node->hj_FirstOuterTupleSlot = NULL;
}
else if (HJ_FILL_OUTER(node) ||
(outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
!node->hj_OuterNotEmpty))
{
node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
if (TupIsNull(node->hj_FirstOuterTupleSlot))
{
node->hj_OuterNotEmpty = false;
return NULL;
}
else
node->hj_OuterNotEmpty = true;
}
else
node->hj_FirstOuterTupleSlot = NULL;
/*
* create the hash table
*/
hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
node->hj_HashOperators,
HJ_FILL_INNER(node));
node->hj_HashTable = hashtable;
/*
* execute the Hash node, to build the hash table
*/
hashNode->hashtable = hashtable;
(void) MultiExecProcNode((PlanState *) hashNode);
/*
* If the inner relation is completely empty, and we're not
* doing a left outer join, we can quit without scanning the
* outer relation.
*/
if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
return NULL;
/*
* need to remember whether nbatch has increased since we
* began scanning the outer relation
*/
hashtable->nbatch_outstart = hashtable->nbatch;
/*
* Reset OuterNotEmpty for scan. (It's OK if we fetched a
* tuple above, because ExecHashJoinOuterGetTuple will
* immediately set it again.)
*/
node->hj_OuterNotEmpty = false;
node->hj_JoinState = HJ_NEED_NEW_OUTER;
/* FALL THRU */
case HJ_NEED_NEW_OUTER:
/*
* We don't have an outer tuple, try to get the next one
*/
outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
node,
&hashvalue);
if (TupIsNull(outerTupleSlot))
{
/* end of batch, or maybe whole join */
if (HJ_FILL_INNER(node))
{
/* set up to scan for unmatched inner tuples */
ExecPrepHashTableForUnmatched(node);
node->hj_JoinState = HJ_FILL_INNER_TUPLES;
}
else
node->hj_JoinState = HJ_NEED_NEW_BATCH;
continue;
}
econtext->ecxt_outertuple = outerTupleSlot;
node->hj_MatchedOuter = false;
/*
* Find the corresponding bucket for this tuple in the main
* hash table or skew hash table.
*/
node->hj_CurHashValue = hashvalue;
ExecHashGetBucketAndBatch(hashtable, hashvalue,
&node->hj_CurBucketNo, &batchno);
node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
hashvalue);
node->hj_CurTuple = NULL;
/*
* The tuple might not belong to the current batch (where
* "current batch" includes the skew buckets if any).
*/
if (batchno != hashtable->curbatch &&
node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO)
{
/*
* Need to postpone this outer tuple to a later batch.
* Save it in the corresponding outer-batch file.
*/
Assert(batchno > hashtable->curbatch);
ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
hashvalue,
&hashtable->outerBatchFile[batchno]);
/* Loop around, staying in HJ_NEED_NEW_OUTER state */
continue;
}
/* OK, let's scan the bucket for matches */
node->hj_JoinState = HJ_SCAN_BUCKET;
/* FALL THRU */
case HJ_SCAN_BUCKET:
/*
* We check for interrupts here because this corresponds to
* where we'd fetch a row from a child plan node in other join
* types.
*/
CHECK_FOR_INTERRUPTS();
/*
* Scan the selected hash bucket for matches to current outer
*/
if (!ExecScanHashBucket(node, econtext))
{
/* out of matches; check for possible outer-join fill */
node->hj_JoinState = HJ_FILL_OUTER_TUPLE;
continue;
}
/*
* We've got a match, but still need to test non-hashed quals.
* ExecScanHashBucket already set up all the state needed to
* call ExecQual.
*
* If we pass the qual, then save state for next call and have
* ExecProject form the projection, store it in the tuple
* table, and return the slot.
*
* Only the joinquals determine tuple match status, but all
* quals must pass to actually return the tuple.
*/
if (joinqual == NIL || ExecQual(joinqual, econtext, false))
{
node->hj_MatchedOuter = true;
HeapTupleHeaderSetMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple));
/* In an antijoin, we never return a matched tuple */
if (node->js.jointype == JOIN_ANTI)
{
node->hj_JoinState = HJ_NEED_NEW_OUTER;
continue;
}
/*
* In a semijoin, we'll consider returning the first
* match, but after that we're done with this outer tuple.
*/
if (node->js.jointype == JOIN_SEMI)
node->hj_JoinState = HJ_NEED_NEW_OUTER;
if (otherqual == NIL ||
ExecQual(otherqual, econtext, false))
{
TupleTableSlot *result;
result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
if (isDone != ExprEndResult)
{
node->js.ps.ps_TupFromTlist =
(isDone == ExprMultipleResult);
return result;
}
}
else
InstrCountFiltered2(node, 1);
}
else
InstrCountFiltered1(node, 1);
break;
case HJ_FILL_OUTER_TUPLE:
/*
* The current outer tuple has run out of matches, so check
* whether to emit a dummy outer-join tuple. Whether we emit
* one or not, the next state is NEED_NEW_OUTER.
*/
node->hj_JoinState = HJ_NEED_NEW_OUTER;
if (!node->hj_MatchedOuter &&
HJ_FILL_OUTER(node))
{
/*
* Generate a fake join tuple with nulls for the inner
* tuple, and return it if it passes the non-join quals.
*/
econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
if (otherqual == NIL ||
ExecQual(otherqual, econtext, false))
{
TupleTableSlot *result;
result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
if (isDone != ExprEndResult)
{
node->js.ps.ps_TupFromTlist =
(isDone == ExprMultipleResult);
return result;
}
}
else
InstrCountFiltered2(node, 1);
}
break;
case HJ_FILL_INNER_TUPLES:
/*
* We have finished a batch, but we are doing right/full join,
* so any unmatched inner tuples in the hashtable have to be
* emitted before we continue to the next batch.
*/
if (!ExecScanHashTableForUnmatched(node, econtext))
{
/* no more unmatched tuples */
node->hj_JoinState = HJ_NEED_NEW_BATCH;
continue;
}
/*
* Generate a fake join tuple with nulls for the outer tuple,
* and return it if it passes the non-join quals.
*/
econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
if (otherqual == NIL ||
ExecQual(otherqual, econtext, false))
{
TupleTableSlot *result;
result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
if (isDone != ExprEndResult)
{
node->js.ps.ps_TupFromTlist =
(isDone == ExprMultipleResult);
return result;
}
}
else
InstrCountFiltered2(node, 1);
break;
case HJ_NEED_NEW_BATCH:
/*
* Try to advance to next batch. Done if there are no more.
*/
if (!ExecHashJoinNewBatch(node))
return NULL; /* end of join */
node->hj_JoinState = HJ_NEED_NEW_OUTER;
break;
default:
elog(ERROR, "unrecognized hashjoin state: %d",
(int) node->hj_JoinState);
}
}
}
| static TupleTableSlot * ExecHashJoinGetSavedTuple | ( | HashJoinState * | hjstate, | |
| BufFile * | file, | |||
| uint32 * | hashvalue, | |||
| TupleTableSlot * | tupleSlot | |||
| ) | [static] |
Definition at line 905 of file nodeHashjoin.c.
References BufFileRead(), ereport, errcode_for_file_access(), errmsg(), ERROR, ExecClearTuple(), ExecStoreMinimalTuple(), header(), palloc(), and MinimalTupleData::t_len.
Referenced by ExecHashJoinNewBatch(), and ExecHashJoinOuterGetTuple().
{
uint32 header[2];
size_t nread;
MinimalTuple tuple;
/*
* Since both the hash value and the MinimalTuple length word are uint32,
* we can read them both in one BufFileRead() call without any type
* cheating.
*/
nread = BufFileRead(file, (void *) header, sizeof(header));
if (nread == 0) /* end of file */
{
ExecClearTuple(tupleSlot);
return NULL;
}
if (nread != sizeof(header))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read from hash-join temporary file: %m")));
*hashvalue = header[0];
tuple = (MinimalTuple) palloc(header[1]);
tuple->t_len = header[1];
nread = BufFileRead(file,
(void *) ((char *) tuple + sizeof(uint32)),
header[1] - sizeof(uint32));
if (nread != header[1] - sizeof(uint32))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read from hash-join temporary file: %m")));
return ExecStoreMinimalTuple(tuple, tupleSlot, true);
}
| static bool ExecHashJoinNewBatch | ( | HashJoinState * | hjstate | ) | [static] |
Definition at line 724 of file nodeHashjoin.c.
References BufFileClose(), BufFileSeek(), HashJoinTableData::curbatch, ereport, errcode_for_file_access(), errmsg(), ERROR, ExecHashJoinGetSavedTuple(), ExecHashTableInsert(), ExecHashTableReset(), HJ_FILL_INNER, HJ_FILL_OUTER, HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, HashJoinTableData::innerBatchFile, HashJoinTableData::nbatch, HashJoinTableData::nbatch_original, HashJoinTableData::nbatch_outstart, HashJoinTableData::nSkewBuckets, NULL, HashJoinTableData::outerBatchFile, HashJoinTableData::skewBucket, HashJoinTableData::skewBucketNums, HashJoinTableData::skewEnabled, and HashJoinTableData::spaceUsedSkew.
Referenced by ExecHashJoin().
{
HashJoinTable hashtable = hjstate->hj_HashTable;
int nbatch;
int curbatch;
BufFile *innerFile;
TupleTableSlot *slot;
uint32 hashvalue;
nbatch = hashtable->nbatch;
curbatch = hashtable->curbatch;
if (curbatch > 0)
{
/*
* We no longer need the previous outer batch file; close it right
* away to free disk space.
*/
if (hashtable->outerBatchFile[curbatch])
BufFileClose(hashtable->outerBatchFile[curbatch]);
hashtable->outerBatchFile[curbatch] = NULL;
}
else /* we just finished the first batch */
{
/*
* Reset some of the skew optimization state variables, since we no
* longer need to consider skew tuples after the first batch. The
* memory context reset we are about to do will release the skew
* hashtable itself.
*/
hashtable->skewEnabled = false;
hashtable->skewBucket = NULL;
hashtable->skewBucketNums = NULL;
hashtable->nSkewBuckets = 0;
hashtable->spaceUsedSkew = 0;
}
/*
* We can always skip over any batches that are completely empty on both
* sides. We can sometimes skip over batches that are empty on only one
* side, but there are exceptions:
*
* 1. In a left/full outer join, we have to process outer batches even if
* the inner batch is empty. Similarly, in a right/full outer join, we
* have to process inner batches even if the outer batch is empty.
*
* 2. If we have increased nbatch since the initial estimate, we have to
* scan inner batches since they might contain tuples that need to be
* reassigned to later inner batches.
*
* 3. Similarly, if we have increased nbatch since starting the outer
* scan, we have to rescan outer batches in case they contain tuples that
* need to be reassigned.
*/
curbatch++;
while (curbatch < nbatch &&
(hashtable->outerBatchFile[curbatch] == NULL ||
hashtable->innerBatchFile[curbatch] == NULL))
{
if (hashtable->outerBatchFile[curbatch] &&
HJ_FILL_OUTER(hjstate))
break; /* must process due to rule 1 */
if (hashtable->innerBatchFile[curbatch] &&
HJ_FILL_INNER(hjstate))
break; /* must process due to rule 1 */
if (hashtable->innerBatchFile[curbatch] &&
nbatch != hashtable->nbatch_original)
break; /* must process due to rule 2 */
if (hashtable->outerBatchFile[curbatch] &&
nbatch != hashtable->nbatch_outstart)
break; /* must process due to rule 3 */
/* We can ignore this batch. */
/* Release associated temp files right away. */
if (hashtable->innerBatchFile[curbatch])
BufFileClose(hashtable->innerBatchFile[curbatch]);
hashtable->innerBatchFile[curbatch] = NULL;
if (hashtable->outerBatchFile[curbatch])
BufFileClose(hashtable->outerBatchFile[curbatch]);
hashtable->outerBatchFile[curbatch] = NULL;
curbatch++;
}
if (curbatch >= nbatch)
return false; /* no more batches */
hashtable->curbatch = curbatch;
/*
* Reload the hash table with the new inner batch (which could be empty)
*/
ExecHashTableReset(hashtable);
innerFile = hashtable->innerBatchFile[curbatch];
if (innerFile != NULL)
{
if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not rewind hash-join temporary file: %m")));
while ((slot = ExecHashJoinGetSavedTuple(hjstate,
innerFile,
&hashvalue,
hjstate->hj_HashTupleSlot)))
{
/*
* NOTE: some tuples may be sent to future batches. Also, it is
* possible for hashtable->nbatch to be increased here!
*/
ExecHashTableInsert(hashtable, slot, hashvalue);
}
/*
* after we build the hash table, the inner batch file is no longer
* needed
*/
BufFileClose(innerFile);
hashtable->innerBatchFile[curbatch] = NULL;
}
/*
* Rewind outer batch file (if present), so that we can start reading it.
*/
if (hashtable->outerBatchFile[curbatch] != NULL)
{
if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not rewind hash-join temporary file: %m")));
}
return true;
}
| static TupleTableSlot * ExecHashJoinOuterGetTuple | ( | PlanState * | outerNode, | |
| HashJoinState * | hjstate, | |||
| uint32 * | hashvalue | |||
| ) | [static] |
Definition at line 647 of file nodeHashjoin.c.
References HashJoinTableData::curbatch, ExprContext::ecxt_outertuple, ExecHashGetHashValue(), ExecHashJoinGetSavedTuple(), ExecProcNode(), HJ_FILL_OUTER, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_OuterHashKeys, HashJoinState::hj_OuterNotEmpty, HashJoinState::hj_OuterTupleSlot, HashJoinState::js, NULL, HashJoinTableData::outerBatchFile, JoinState::ps, PlanState::ps_ExprContext, and TupIsNull.
Referenced by ExecHashJoin().
{
HashJoinTable hashtable = hjstate->hj_HashTable;
int curbatch = hashtable->curbatch;
TupleTableSlot *slot;
if (curbatch == 0) /* if it is the first pass */
{
/*
* Check to see if first outer tuple was already fetched by
* ExecHashJoin() and not used yet.
*/
slot = hjstate->hj_FirstOuterTupleSlot;
if (!TupIsNull(slot))
hjstate->hj_FirstOuterTupleSlot = NULL;
else
slot = ExecProcNode(outerNode);
while (!TupIsNull(slot))
{
/*
* We have to compute the tuple's hash value.
*/
ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
econtext->ecxt_outertuple = slot;
if (ExecHashGetHashValue(hashtable, econtext,
hjstate->hj_OuterHashKeys,
true, /* outer tuple */
HJ_FILL_OUTER(hjstate),
hashvalue))
{
/* remember outer relation is not empty for possible rescan */
hjstate->hj_OuterNotEmpty = true;
return slot;
}
/*
* That tuple couldn't match because of a NULL, so discard it and
* continue with the next one.
*/
slot = ExecProcNode(outerNode);
}
}
else if (curbatch < hashtable->nbatch)
{
BufFile *file = hashtable->outerBatchFile[curbatch];
/*
* In outer-join cases, we could get here even though the batch file
* is empty.
*/
if (file == NULL)
return NULL;
slot = ExecHashJoinGetSavedTuple(hjstate,
file,
hashvalue,
hjstate->hj_OuterTupleSlot);
if (!TupIsNull(slot))
return slot;
}
/* End of this batch */
return NULL;
}
| void ExecHashJoinSaveTuple | ( | MinimalTuple | tuple, | |
| uint32 | hashvalue, | |||
| BufFile ** | fileptr | |||
| ) |
Definition at line 871 of file nodeHashjoin.c.
References BufFileCreateTemp(), BufFileWrite(), ereport, errcode_for_file_access(), errmsg(), ERROR, NULL, and MinimalTupleData::t_len.
Referenced by ExecHashIncreaseNumBatches(), ExecHashJoin(), ExecHashRemoveNextSkewBucket(), and ExecHashTableInsert().
{
BufFile *file = *fileptr;
size_t written;
if (file == NULL)
{
/* First write to this batch file, so open it. */
file = BufFileCreateTemp(false);
*fileptr = file;
}
written = BufFileWrite(file, (void *) &hashvalue, sizeof(uint32));
if (written != sizeof(uint32))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to hash-join temporary file: %m")));
written = BufFileWrite(file, (void *) tuple, tuple->t_len);
if (written != tuple->t_len)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to hash-join temporary file: %m")));
}
| HashJoinState* ExecInitHashJoin | ( | HashJoin * | node, | |
| EState * | estate, | |||
| int | eflags | |||
| ) |
Definition at line 435 of file nodeHashjoin.c.
References FuncExprState::args, Assert, elog, ERROR, EXEC_FLAG_BACKWARD, EXEC_FLAG_MARK, ExecAssignExprContext(), ExecAssignProjectionInfo(), ExecAssignResultTypeFromTL(), ExecGetResultType(), ExecInitExpr(), ExecInitExtraTupleSlot(), ExecInitNode(), ExecInitNullTupleSlot(), ExecInitResultTupleSlot(), ExecSetSlotDescriptor(), ExprState::expr, HashJoin::hashclauses, HashJoinState::hashclauses, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashOperators, HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, HashJoinState::hj_InnerHashKeys, HashJoinState::hj_JoinState, HashJoinState::hj_MatchedOuter, HashJoinState::hj_NullInnerTupleSlot, HashJoinState::hj_NullOuterTupleSlot, HashJoinState::hj_OuterHashKeys, HashJoinState::hj_OuterNotEmpty, HashJoinState::hj_OuterTupleSlot, innerPlan, innerPlanState, IsA, HashJoin::join, JOIN_ANTI, JOIN_FULL, JOIN_INNER, JOIN_LEFT, JOIN_RIGHT, JOIN_SEMI, Join::joinqual, JoinState::joinqual, Join::jointype, JoinState::jointype, HashJoinState::js, lappend(), lappend_oid(), lfirst, linitial, lsecond, makeNode, NULL, OpExpr::opno, outerPlan, outerPlanState, Join::plan, PlanState::plan, HashState::ps, JoinState::ps, PlanState::ps_ResultTupleSlot, PlanState::ps_TupFromTlist, Plan::qual, PlanState::qual, PlanState::state, Plan::targetlist, PlanState::targetlist, and FuncExprState::xprstate.
Referenced by ExecInitNode().
{
HashJoinState *hjstate;
Plan *outerNode;
Hash *hashNode;
List *lclauses;
List *rclauses;
List *hoperators;
ListCell *l;
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
/*
* create state structure
*/
hjstate = makeNode(HashJoinState);
hjstate->js.ps.plan = (Plan *) node;
hjstate->js.ps.state = estate;
/*
* Miscellaneous initialization
*
* create expression context for node
*/
ExecAssignExprContext(estate, &hjstate->js.ps);
/*
* initialize child expressions
*/
hjstate->js.ps.targetlist = (List *)
ExecInitExpr((Expr *) node->join.plan.targetlist,
(PlanState *) hjstate);
hjstate->js.ps.qual = (List *)
ExecInitExpr((Expr *) node->join.plan.qual,
(PlanState *) hjstate);
hjstate->js.jointype = node->join.jointype;
hjstate->js.joinqual = (List *)
ExecInitExpr((Expr *) node->join.joinqual,
(PlanState *) hjstate);
hjstate->hashclauses = (List *)
ExecInitExpr((Expr *) node->hashclauses,
(PlanState *) hjstate);
/*
* initialize child nodes
*
* Note: we could suppress the REWIND flag for the inner input, which
* would amount to betting that the hash will be a single batch. Not
* clear if this would be a win or not.
*/
outerNode = outerPlan(node);
hashNode = (Hash *) innerPlan(node);
outerPlanState(hjstate) = ExecInitNode(outerNode, estate, eflags);
innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags);
/*
* tuple table initialization
*/
ExecInitResultTupleSlot(estate, &hjstate->js.ps);
hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
/* set up null tuples for outer joins, if needed */
switch (node->join.jointype)
{
case JOIN_INNER:
case JOIN_SEMI:
break;
case JOIN_LEFT:
case JOIN_ANTI:
hjstate->hj_NullInnerTupleSlot =
ExecInitNullTupleSlot(estate,
ExecGetResultType(innerPlanState(hjstate)));
break;
case JOIN_RIGHT:
hjstate->hj_NullOuterTupleSlot =
ExecInitNullTupleSlot(estate,
ExecGetResultType(outerPlanState(hjstate)));
break;
case JOIN_FULL:
hjstate->hj_NullOuterTupleSlot =
ExecInitNullTupleSlot(estate,
ExecGetResultType(outerPlanState(hjstate)));
hjstate->hj_NullInnerTupleSlot =
ExecInitNullTupleSlot(estate,
ExecGetResultType(innerPlanState(hjstate)));
break;
default:
elog(ERROR, "unrecognized join type: %d",
(int) node->join.jointype);
}
/*
* now for some voodoo. our temporary tuple slot is actually the result
* tuple slot of the Hash node (which is our inner plan). we can do this
* because Hash nodes don't return tuples via ExecProcNode() -- instead
* the hash join node uses ExecScanHashBucket() to get at the contents of
* the hash table. -cim 6/9/91
*/
{
HashState *hashstate = (HashState *) innerPlanState(hjstate);
TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
hjstate->hj_HashTupleSlot = slot;
}
/*
* initialize tuple type and projection info
*/
ExecAssignResultTypeFromTL(&hjstate->js.ps);
ExecAssignProjectionInfo(&hjstate->js.ps, NULL);
ExecSetSlotDescriptor(hjstate->hj_OuterTupleSlot,
ExecGetResultType(outerPlanState(hjstate)));
/*
* initialize hash-specific info
*/
hjstate->hj_HashTable = NULL;
hjstate->hj_FirstOuterTupleSlot = NULL;
hjstate->hj_CurHashValue = 0;
hjstate->hj_CurBucketNo = 0;
hjstate->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO;
hjstate->hj_CurTuple = NULL;
/*
* Deconstruct the hash clauses into outer and inner argument values, so
* that we can evaluate those subexpressions separately. Also make a list
* of the hash operator OIDs, in preparation for looking up the hash
* functions to use.
*/
lclauses = NIL;
rclauses = NIL;
hoperators = NIL;
foreach(l, hjstate->hashclauses)
{
FuncExprState *fstate = (FuncExprState *) lfirst(l);
OpExpr *hclause;
Assert(IsA(fstate, FuncExprState));
hclause = (OpExpr *) fstate->xprstate.expr;
Assert(IsA(hclause, OpExpr));
lclauses = lappend(lclauses, linitial(fstate->args));
rclauses = lappend(rclauses, lsecond(fstate->args));
hoperators = lappend_oid(hoperators, hclause->opno);
}
hjstate->hj_OuterHashKeys = lclauses;
hjstate->hj_InnerHashKeys = rclauses;
hjstate->hj_HashOperators = hoperators;
/* child Hash node needs to evaluate inner hash keys, too */
((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;
hjstate->js.ps.ps_TupFromTlist = false;
hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
hjstate->hj_MatchedOuter = false;
hjstate->hj_OuterNotEmpty = false;
return hjstate;
}
| void ExecReScanHashJoin | ( | HashJoinState * | node | ) |
Definition at line 944 of file nodeHashjoin.c.
References PlanState::chgParam, ExecHashTableDestroy(), ExecHashTableResetMatchFlags(), ExecReScan(), HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HJ_FILL_INNER, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_JoinState, HashJoinState::hj_MatchedOuter, HashJoinState::hj_OuterNotEmpty, HashJoinState::js, PlanState::lefttree, HashJoinTableData::nbatch, NULL, JoinState::ps, PlanState::ps_TupFromTlist, and PlanState::righttree.
Referenced by ExecReScan().
{
/*
* In a multi-batch join, we currently have to do rescans the hard way,
* primarily because batch temp files may have already been released. But
* if it's a single-batch join, and there is no parameter change for the
* inner subnode, then we can just re-use the existing hash table without
* rebuilding it.
*/
if (node->hj_HashTable != NULL)
{
if (node->hj_HashTable->nbatch == 1 &&
node->js.ps.righttree->chgParam == NULL)
{
/*
* Okay to reuse the hash table; needn't rescan inner, either.
*
* However, if it's a right/full join, we'd better reset the
* inner-tuple match flags contained in the table.
*/
if (HJ_FILL_INNER(node))
ExecHashTableResetMatchFlags(node->hj_HashTable);
/*
* Also, we need to reset our state about the emptiness of the
* outer relation, so that the new scan of the outer will update
* it correctly if it turns out to be empty this time. (There's no
* harm in clearing it now because ExecHashJoin won't need the
* info. In the other cases, where the hash table doesn't exist
* or we are destroying it, we leave this state alone because
* ExecHashJoin will need it the first time through.)
*/
node->hj_OuterNotEmpty = false;
/* ExecHashJoin can skip the BUILD_HASHTABLE step */
node->hj_JoinState = HJ_NEED_NEW_OUTER;
}
else
{
/* must destroy and rebuild hash table */
ExecHashTableDestroy(node->hj_HashTable);
node->hj_HashTable = NULL;
node->hj_JoinState = HJ_BUILD_HASHTABLE;
/*
* if chgParam of subnode is not null then plan will be re-scanned
* by first ExecProcNode.
*/
if (node->js.ps.righttree->chgParam == NULL)
ExecReScan(node->js.ps.righttree);
}
}
/* Always reset intra-tuple state */
node->hj_CurHashValue = 0;
node->hj_CurBucketNo = 0;
node->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO;
node->hj_CurTuple = NULL;
node->js.ps.ps_TupFromTlist = false;
node->hj_MatchedOuter = false;
node->hj_FirstOuterTupleSlot = NULL;
/*
* if chgParam of subnode is not null then plan will be re-scanned by
* first ExecProcNode.
*/
if (node->js.ps.lefttree->chgParam == NULL)
ExecReScan(node->js.ps.lefttree);
}
1.7.1