#include "postgres.h"#include <math.h>#include "executor/executor.h"#include "optimizer/cost.h"#include "optimizer/pathnode.h"#include "optimizer/paths.h"
Go to the source code of this file.
Defines | |
| #define | PATH_PARAM_BY_REL(path, rel) ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) |
Functions | |
| static void | sort_inner_and_outer (PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype, SpecialJoinInfo *sjinfo, Relids param_source_rels) |
| static void | match_unsorted_outer (PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels) |
| static void | hash_inner_and_outer (PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels) |
| static List * | select_mergejoin_clauses (PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype, bool *mergejoin_allowed) |
| void | add_paths_to_joinrel (PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, SpecialJoinInfo *sjinfo, List *restrictlist) |
| static void | try_nestloop_path (PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys) |
| static void | try_mergejoin_path (PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, SpecialJoinInfo *sjinfo, Relids param_source_rels, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys, List *mergeclauses, List *outersortkeys, List *innersortkeys) |
| static void | try_hashjoin_path (PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels, Path *outer_path, Path *inner_path, List *restrict_clauses, List *hashclauses) |
| static bool | clause_sides_match_join (RestrictInfo *rinfo, RelOptInfo *outerrel, RelOptInfo *innerrel) |
| #define PATH_PARAM_BY_REL | ( | path, | ||
| rel | ||||
| ) | ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) |
Definition at line 25 of file joinpath.c.
Referenced by hash_inner_and_outer(), match_unsorted_outer(), and sort_inner_and_outer().
| void add_paths_to_joinrel | ( | PlannerInfo * | root, | |
| RelOptInfo * | joinrel, | |||
| RelOptInfo * | outerrel, | |||
| RelOptInfo * | innerrel, | |||
| JoinType | jointype, | |||
| SpecialJoinInfo * | sjinfo, | |||
| List * | restrictlist | |||
| ) |
Definition at line 78 of file joinpath.c.
References PlannerInfo::all_baserels, bms_difference(), bms_is_member(), bms_join(), bms_overlap(), compute_semi_anti_join_factors(), enable_hashjoin, enable_mergejoin, hash_inner_and_outer(), JOIN_ANTI, JOIN_FULL, PlannerInfo::join_info_list, JOIN_SEMI, SpecialJoinInfo::jointype, PlannerInfo::lateral_info_list, LateralJoinInfo::lateral_lhs, LateralJoinInfo::lateral_rhs, lfirst, match_unsorted_outer(), SpecialJoinInfo::min_lefthand, SpecialJoinInfo::min_righthand, RelOptInfo::relids, select_mergejoin_clauses(), and sort_inner_and_outer().
Referenced by make_join_rel().
{
List *mergeclause_list = NIL;
bool mergejoin_allowed = true;
SemiAntiJoinFactors semifactors;
Relids param_source_rels = NULL;
ListCell *lc;
/*
* Find potential mergejoin clauses. We can skip this if we are not
* interested in doing a mergejoin. However, mergejoin may be our only
* way of implementing a full outer join, so override enable_mergejoin if
* it's a full join.
*/
if (enable_mergejoin || jointype == JOIN_FULL)
mergeclause_list = select_mergejoin_clauses(root,
joinrel,
outerrel,
innerrel,
restrictlist,
jointype,
&mergejoin_allowed);
/*
* If it's SEMI or ANTI join, compute correction factors for cost
* estimation. These will be the same for all paths.
*/
if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
compute_semi_anti_join_factors(root, outerrel, innerrel,
jointype, sjinfo, restrictlist,
&semifactors);
/*
* Decide whether it's sensible to generate parameterized paths for this
* joinrel, and if so, which relations such paths should require. There
* is no need to create a parameterized result path unless there is a join
* order restriction that prevents joining one of our input rels directly
* to the parameter source rel instead of joining to the other input rel.
* This restriction reduces the number of parameterized paths we have to
* deal with at higher join levels, without compromising the quality of
* the resulting plan. We express the restriction as a Relids set that
* must overlap the parameterization of any proposed join path.
*/
foreach(lc, root->join_info_list)
{
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
/*
* SJ is relevant to this join if we have some part of its RHS
* (possibly not all of it), and haven't yet joined to its LHS. (This
* test is pretty simplistic, but should be sufficient considering the
* join has already been proven legal.) If the SJ is relevant, it
* presents constraints for joining to anything not in its RHS.
*/
if (bms_overlap(joinrel->relids, sjinfo->min_righthand) &&
!bms_overlap(joinrel->relids, sjinfo->min_lefthand))
param_source_rels = bms_join(param_source_rels,
bms_difference(root->all_baserels,
sjinfo->min_righthand));
/* full joins constrain both sides symmetrically */
if (sjinfo->jointype == JOIN_FULL &&
bms_overlap(joinrel->relids, sjinfo->min_lefthand) &&
!bms_overlap(joinrel->relids, sjinfo->min_righthand))
param_source_rels = bms_join(param_source_rels,
bms_difference(root->all_baserels,
sjinfo->min_lefthand));
}
/*
* However, when a LATERAL subquery is involved, we have to be a bit
* laxer, because there will simply not be any paths for the joinrel that
* aren't parameterized by whatever the subquery is parameterized by,
* unless its parameterization is resolved within the joinrel. Hence, add
* to param_source_rels anything that is laterally referenced in either
* input and is not in the join already.
*/
foreach(lc, root->lateral_info_list)
{
LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(lc);
if (bms_is_member(ljinfo->lateral_rhs, joinrel->relids))
param_source_rels = bms_join(param_source_rels,
bms_difference(ljinfo->lateral_lhs,
joinrel->relids));
}
/*
* 1. Consider mergejoin paths where both relations must be explicitly
* sorted. Skip this if we can't mergejoin.
*/
if (mergejoin_allowed)
sort_inner_and_outer(root, joinrel, outerrel, innerrel,
restrictlist, mergeclause_list, jointype,
sjinfo, param_source_rels);
/*
* 2. Consider paths where the outer relation need not be explicitly
* sorted. This includes both nestloops and mergejoins where the outer
* path is already ordered. Again, skip this if we can't mergejoin.
* (That's okay because we know that nestloop can't handle right/full
* joins at all, so it wouldn't work in the prohibited cases either.)
*/
if (mergejoin_allowed)
match_unsorted_outer(root, joinrel, outerrel, innerrel,
restrictlist, mergeclause_list, jointype,
sjinfo, &semifactors, param_source_rels);
#ifdef NOT_USED
/*
* 3. Consider paths where the inner relation need not be explicitly
* sorted. This includes mergejoins only (nestloops were already built in
* match_unsorted_outer).
*
* Diked out as redundant 2/13/2000 -- tgl. There isn't any really
* significant difference between the inner and outer side of a mergejoin,
* so match_unsorted_inner creates no paths that aren't equivalent to
* those made by match_unsorted_outer when add_paths_to_joinrel() is
* invoked with the two rels given in the other order.
*/
if (mergejoin_allowed)
match_unsorted_inner(root, joinrel, outerrel, innerrel,
restrictlist, mergeclause_list, jointype,
sjinfo, &semifactors, param_source_rels);
#endif
/*
* 4. Consider paths where both outer and inner relations must be hashed
* before being joined. As above, disregard enable_hashjoin for full
* joins, because there may be no other alternative.
*/
if (enable_hashjoin || jointype == JOIN_FULL)
hash_inner_and_outer(root, joinrel, outerrel, innerrel,
restrictlist, jointype,
sjinfo, &semifactors, param_source_rels);
}
| static bool clause_sides_match_join | ( | RestrictInfo * | rinfo, | |
| RelOptInfo * | outerrel, | |||
| RelOptInfo * | innerrel | |||
| ) | [inline, static] |
Definition at line 451 of file joinpath.c.
References bms_is_subset(), RestrictInfo::left_relids, RestrictInfo::outer_is_left, RelOptInfo::relids, and RestrictInfo::right_relids.
Referenced by hash_inner_and_outer(), and select_mergejoin_clauses().
{
if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
bms_is_subset(rinfo->right_relids, innerrel->relids))
{
/* lefthand side is outer */
rinfo->outer_is_left = true;
return true;
}
else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
bms_is_subset(rinfo->right_relids, outerrel->relids))
{
/* righthand side is outer */
rinfo->outer_is_left = false;
return true;
}
return false; /* no good for these input relations */
}
| static void hash_inner_and_outer | ( | PlannerInfo * | root, | |
| RelOptInfo * | joinrel, | |||
| RelOptInfo * | outerrel, | |||
| RelOptInfo * | innerrel, | |||
| List * | restrictlist, | |||
| JoinType | jointype, | |||
| SpecialJoinInfo * | sjinfo, | |||
| SemiAntiJoinFactors * | semifactors, | |||
| Relids | param_source_rels | |||
| ) | [static] |
Definition at line 1085 of file joinpath.c.
References Assert, RestrictInfo::can_join, RelOptInfo::cheapest_parameterized_paths, RelOptInfo::cheapest_startup_path, RelOptInfo::cheapest_total_path, clause_sides_match_join(), create_unique_path(), RestrictInfo::hashjoinoperator, InvalidOid, IS_OUTER_JOIN, RestrictInfo::is_pushed_down, JOIN_UNIQUE_INNER, JOIN_UNIQUE_OUTER, lappend(), lfirst, NULL, PATH_PARAM_BY_REL, and try_hashjoin_path().
Referenced by add_paths_to_joinrel().
{
bool isouterjoin = IS_OUTER_JOIN(jointype);
List *hashclauses;
ListCell *l;
/*
* We need to build only one hashclauses list for any given pair of outer
* and inner relations; all of the hashable clauses will be used as keys.
*
* Scan the join's restrictinfo list to find hashjoinable clauses that are
* usable with this pair of sub-relations.
*/
hashclauses = NIL;
foreach(l, restrictlist)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
/*
* If processing an outer join, only use its own join clauses for
* hashing. For inner joins we need not be so picky.
*/
if (isouterjoin && restrictinfo->is_pushed_down)
continue;
if (!restrictinfo->can_join ||
restrictinfo->hashjoinoperator == InvalidOid)
continue; /* not hashjoinable */
/*
* Check if clause has the form "outer op inner" or "inner op outer".
*/
if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
continue; /* no good for these input relations */
hashclauses = lappend(hashclauses, restrictinfo);
}
/* If we found any usable hashclauses, make paths */
if (hashclauses)
{
/*
* We consider both the cheapest-total-cost and cheapest-startup-cost
* outer paths. There's no need to consider any but the
* cheapest-total-cost inner path, however.
*/
Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
Path *cheapest_total_outer = outerrel->cheapest_total_path;
Path *cheapest_total_inner = innerrel->cheapest_total_path;
/*
* If either cheapest-total path is parameterized by the other rel, we
* can't use a hashjoin. (There's no use looking for alternative
* input paths, since these should already be the least-parameterized
* available paths.)
*/
if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
return;
/* Unique-ify if need be; we ignore parameterized possibilities */
if (jointype == JOIN_UNIQUE_OUTER)
{
cheapest_total_outer = (Path *)
create_unique_path(root, outerrel,
cheapest_total_outer, sjinfo);
Assert(cheapest_total_outer);
jointype = JOIN_INNER;
try_hashjoin_path(root,
joinrel,
jointype,
sjinfo,
semifactors,
param_source_rels,
cheapest_total_outer,
cheapest_total_inner,
restrictlist,
hashclauses);
/* no possibility of cheap startup here */
}
else if (jointype == JOIN_UNIQUE_INNER)
{
cheapest_total_inner = (Path *)
create_unique_path(root, innerrel,
cheapest_total_inner, sjinfo);
Assert(cheapest_total_inner);
jointype = JOIN_INNER;
try_hashjoin_path(root,
joinrel,
jointype,
sjinfo,
semifactors,
param_source_rels,
cheapest_total_outer,
cheapest_total_inner,
restrictlist,
hashclauses);
if (cheapest_startup_outer != NULL &&
cheapest_startup_outer != cheapest_total_outer)
try_hashjoin_path(root,
joinrel,
jointype,
sjinfo,
semifactors,
param_source_rels,
cheapest_startup_outer,
cheapest_total_inner,
restrictlist,
hashclauses);
}
else
{
/*
* For other jointypes, we consider the cheapest startup outer
* together with the cheapest total inner, and then consider
* pairings of cheapest-total paths including parameterized ones.
* There is no use in generating parameterized paths on the basis
* of possibly cheap startup cost, so this is sufficient.
*/
ListCell *lc1;
ListCell *lc2;
if (cheapest_startup_outer != NULL)
try_hashjoin_path(root,
joinrel,
jointype,
sjinfo,
semifactors,
param_source_rels,
cheapest_startup_outer,
cheapest_total_inner,
restrictlist,
hashclauses);
foreach(lc1, outerrel->cheapest_parameterized_paths)
{
Path *outerpath = (Path *) lfirst(lc1);
/*
* We cannot use an outer path that is parameterized by the
* inner rel.
*/
if (PATH_PARAM_BY_REL(outerpath, innerrel))
continue;
foreach(lc2, innerrel->cheapest_parameterized_paths)
{
Path *innerpath = (Path *) lfirst(lc2);
/*
* We cannot use an inner path that is parameterized by
* the outer rel, either.
*/
if (PATH_PARAM_BY_REL(innerpath, outerrel))
continue;
if (outerpath == cheapest_startup_outer &&
innerpath == cheapest_total_inner)
continue; /* already tried it */
try_hashjoin_path(root,
joinrel,
jointype,
sjinfo,
semifactors,
param_source_rels,
outerpath,
innerpath,
restrictlist,
hashclauses);
}
}
}
}
}
| static void match_unsorted_outer | ( | PlannerInfo * | root, | |
| RelOptInfo * | joinrel, | |||
| RelOptInfo * | outerrel, | |||
| RelOptInfo * | innerrel, | |||
| List * | restrictlist, | |||
| List * | mergeclause_list, | |||
| JoinType | jointype, | |||
| SpecialJoinInfo * | sjinfo, | |||
| SemiAntiJoinFactors * | semifactors, | |||
| Relids | param_source_rels | |||
| ) | [static] |
Definition at line 673 of file joinpath.c.
References Assert, build_join_pathkeys(), RelOptInfo::cheapest_parameterized_paths, RelOptInfo::cheapest_total_path, compare_path_costs(), create_material_path(), create_unique_path(), elog, enable_material, ERROR, ExecMaterializesOutput(), find_mergeclauses_for_pathkeys(), get_cheapest_path_for_pathkeys(), JOIN_ANTI, JOIN_FULL, JOIN_INNER, JOIN_LEFT, JOIN_RIGHT, JOIN_SEMI, JOIN_UNIQUE_INNER, JOIN_UNIQUE_OUTER, lfirst, list_copy(), list_length(), list_truncate(), make_inner_pathkeys_for_merge(), NIL, NULL, PATH_PARAM_BY_REL, Path::pathkeys, pathkeys_contained_in(), RelOptInfo::pathlist, Path::pathtype, STARTUP_COST, TOTAL_COST, try_mergejoin_path(), and try_nestloop_path().
Referenced by add_paths_to_joinrel().
{
JoinType save_jointype = jointype;
bool nestjoinOK;
bool useallclauses;
Path *inner_cheapest_total = innerrel->cheapest_total_path;
Path *matpath = NULL;
ListCell *lc1;
/*
* Nestloop only supports inner, left, semi, and anti joins. Also, if we
* are doing a right or full mergejoin, we must use *all* the mergeclauses
* as join clauses, else we will not have a valid plan. (Although these
* two flags are currently inverses, keep them separate for clarity and
* possible future changes.)
*/
switch (jointype)
{
case JOIN_INNER:
case JOIN_LEFT:
case JOIN_SEMI:
case JOIN_ANTI:
nestjoinOK = true;
useallclauses = false;
break;
case JOIN_RIGHT:
case JOIN_FULL:
nestjoinOK = false;
useallclauses = true;
break;
case JOIN_UNIQUE_OUTER:
case JOIN_UNIQUE_INNER:
jointype = JOIN_INNER;
nestjoinOK = true;
useallclauses = false;
break;
default:
elog(ERROR, "unrecognized join type: %d",
(int) jointype);
nestjoinOK = false; /* keep compiler quiet */
useallclauses = false;
break;
}
/*
* If inner_cheapest_total is parameterized by the outer rel, ignore it;
* we will consider it below as a member of cheapest_parameterized_paths,
* but the other possibilities considered in this routine aren't usable.
*/
if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
inner_cheapest_total = NULL;
/*
* If we need to unique-ify the inner path, we will consider only the
* cheapest-total inner.
*/
if (save_jointype == JOIN_UNIQUE_INNER)
{
/* No way to do this with an inner path parameterized by outer rel */
if (inner_cheapest_total == NULL)
return;
inner_cheapest_total = (Path *)
create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
Assert(inner_cheapest_total);
}
else if (nestjoinOK)
{
/*
* Consider materializing the cheapest inner path, unless
* enable_material is off or the path in question materializes its
* output anyway.
*/
if (enable_material && inner_cheapest_total != NULL &&
!ExecMaterializesOutput(inner_cheapest_total->pathtype))
matpath = (Path *)
create_material_path(innerrel, inner_cheapest_total);
}
foreach(lc1, outerrel->pathlist)
{
Path *outerpath = (Path *) lfirst(lc1);
List *merge_pathkeys;
List *mergeclauses;
List *innersortkeys;
List *trialsortkeys;
Path *cheapest_startup_inner;
Path *cheapest_total_inner;
int num_sortkeys;
int sortkeycnt;
/*
* We cannot use an outer path that is parameterized by the inner rel.
*/
if (PATH_PARAM_BY_REL(outerpath, innerrel))
continue;
/*
* If we need to unique-ify the outer path, it's pointless to consider
* any but the cheapest outer. (XXX we don't consider parameterized
* outers, nor inners, for unique-ified cases. Should we?)
*/
if (save_jointype == JOIN_UNIQUE_OUTER)
{
if (outerpath != outerrel->cheapest_total_path)
continue;
outerpath = (Path *) create_unique_path(root, outerrel,
outerpath, sjinfo);
Assert(outerpath);
}
/*
* The result will have this sort order (even if it is implemented as
* a nestloop, and even if some of the mergeclauses are implemented by
* qpquals rather than as true mergeclauses):
*/
merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
outerpath->pathkeys);
if (save_jointype == JOIN_UNIQUE_INNER)
{
/*
* Consider nestloop join, but only with the unique-ified cheapest
* inner path
*/
try_nestloop_path(root,
joinrel,
jointype,
sjinfo,
semifactors,
param_source_rels,
outerpath,
inner_cheapest_total,
restrictlist,
merge_pathkeys);
}
else if (nestjoinOK)
{
/*
* Consider nestloop joins using this outer path and various
* available paths for the inner relation. We consider the
* cheapest-total paths for each available parameterization of the
* inner relation, including the unparameterized case.
*/
ListCell *lc2;
foreach(lc2, innerrel->cheapest_parameterized_paths)
{
Path *innerpath = (Path *) lfirst(lc2);
try_nestloop_path(root,
joinrel,
jointype,
sjinfo,
semifactors,
param_source_rels,
outerpath,
innerpath,
restrictlist,
merge_pathkeys);
}
/* Also consider materialized form of the cheapest inner path */
if (matpath != NULL)
try_nestloop_path(root,
joinrel,
jointype,
sjinfo,
semifactors,
param_source_rels,
outerpath,
matpath,
restrictlist,
merge_pathkeys);
}
/* Can't do anything else if outer path needs to be unique'd */
if (save_jointype == JOIN_UNIQUE_OUTER)
continue;
/* Can't do anything else if inner rel is parameterized by outer */
if (inner_cheapest_total == NULL)
continue;
/* Look for useful mergeclauses (if any) */
mergeclauses = find_mergeclauses_for_pathkeys(root,
outerpath->pathkeys,
true,
mergeclause_list);
/*
* Done with this outer path if no chance for a mergejoin.
*
* Special corner case: for "x FULL JOIN y ON true", there will be no
* join clauses at all. Ordinarily we'd generate a clauseless
* nestloop path, but since mergejoin is our only join type that
* supports FULL JOIN without any join clauses, it's necessary to
* generate a clauseless mergejoin path instead.
*/
if (mergeclauses == NIL)
{
if (jointype == JOIN_FULL)
/* okay to try for mergejoin */ ;
else
continue;
}
if (useallclauses && list_length(mergeclauses) != list_length(mergeclause_list))
continue;
/* Compute the required ordering of the inner path */
innersortkeys = make_inner_pathkeys_for_merge(root,
mergeclauses,
outerpath->pathkeys);
/*
* Generate a mergejoin on the basis of sorting the cheapest inner.
* Since a sort will be needed, only cheapest total cost matters. (But
* try_mergejoin_path will do the right thing if inner_cheapest_total
* is already correctly sorted.)
*/
try_mergejoin_path(root,
joinrel,
jointype,
sjinfo,
param_source_rels,
outerpath,
inner_cheapest_total,
restrictlist,
merge_pathkeys,
mergeclauses,
NIL,
innersortkeys);
/* Can't do anything else if inner path needs to be unique'd */
if (save_jointype == JOIN_UNIQUE_INNER)
continue;
/*
* Look for presorted inner paths that satisfy the innersortkey list
* --- or any truncation thereof, if we are allowed to build a
* mergejoin using a subset of the merge clauses. Here, we consider
* both cheap startup cost and cheap total cost.
*
* Currently we do not consider parameterized inner paths here. This
* interacts with decisions elsewhere that also discriminate against
* mergejoins with parameterized inputs; see comments in
* src/backend/optimizer/README.
*
* As we shorten the sortkey list, we should consider only paths that
* are strictly cheaper than (in particular, not the same as) any path
* found in an earlier iteration. Otherwise we'd be intentionally
* using fewer merge keys than a given path allows (treating the rest
* as plain joinquals), which is unlikely to be a good idea. Also,
* eliminating paths here on the basis of compare_path_costs is a lot
* cheaper than building the mergejoin path only to throw it away.
*
* If inner_cheapest_total is well enough sorted to have not required
* a sort in the path made above, we shouldn't make a duplicate path
* with it, either. We handle that case with the same logic that
* handles the previous consideration, by initializing the variables
* that track cheapest-so-far properly. Note that we do NOT reject
* inner_cheapest_total if we find it matches some shorter set of
* pathkeys. That case corresponds to using fewer mergekeys to avoid
* sorting inner_cheapest_total, whereas we did sort it above, so the
* plans being considered are different.
*/
if (pathkeys_contained_in(innersortkeys,
inner_cheapest_total->pathkeys))
{
/* inner_cheapest_total didn't require a sort */
cheapest_startup_inner = inner_cheapest_total;
cheapest_total_inner = inner_cheapest_total;
}
else
{
/* it did require a sort, at least for the full set of keys */
cheapest_startup_inner = NULL;
cheapest_total_inner = NULL;
}
num_sortkeys = list_length(innersortkeys);
if (num_sortkeys > 1 && !useallclauses)
trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
else
trialsortkeys = innersortkeys; /* won't really truncate */
for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
{
Path *innerpath;
List *newclauses = NIL;
/*
* Look for an inner path ordered well enough for the first
* 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
* destructively, which is why we made a copy...
*/
trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
trialsortkeys,
NULL,
TOTAL_COST);
if (innerpath != NULL &&
(cheapest_total_inner == NULL ||
compare_path_costs(innerpath, cheapest_total_inner,
TOTAL_COST) < 0))
{
/* Found a cheap (or even-cheaper) sorted path */
/* Select the right mergeclauses, if we didn't already */
if (sortkeycnt < num_sortkeys)
{
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
false,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
try_mergejoin_path(root,
joinrel,
jointype,
sjinfo,
param_source_rels,
outerpath,
innerpath,
restrictlist,
merge_pathkeys,
newclauses,
NIL,
NIL);
cheapest_total_inner = innerpath;
}
/* Same on the basis of cheapest startup cost ... */
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
trialsortkeys,
NULL,
STARTUP_COST);
if (innerpath != NULL &&
(cheapest_startup_inner == NULL ||
compare_path_costs(innerpath, cheapest_startup_inner,
STARTUP_COST) < 0))
{
/* Found a cheap (or even-cheaper) sorted path */
if (innerpath != cheapest_total_inner)
{
/*
* Avoid rebuilding clause list if we already made one;
* saves memory in big join trees...
*/
if (newclauses == NIL)
{
if (sortkeycnt < num_sortkeys)
{
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
false,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
}
try_mergejoin_path(root,
joinrel,
jointype,
sjinfo,
param_source_rels,
outerpath,
innerpath,
restrictlist,
merge_pathkeys,
newclauses,
NIL,
NIL);
}
cheapest_startup_inner = innerpath;
}
/*
* Don't consider truncated sortkeys if we need all clauses.
*/
if (useallclauses)
break;
}
}
}
| static List * select_mergejoin_clauses | ( | PlannerInfo * | root, | |
| RelOptInfo * | joinrel, | |||
| RelOptInfo * | outerrel, | |||
| RelOptInfo * | innerrel, | |||
| List * | restrictlist, | |||
| JoinType | jointype, | |||
| bool * | mergejoin_allowed | |||
| ) | [static] |
Definition at line 1292 of file joinpath.c.
References RestrictInfo::can_join, RestrictInfo::clause, clause_sides_match_join(), EC_MUST_BE_REDUNDANT, IS_OUTER_JOIN, RestrictInfo::is_pushed_down, IsA, JOIN_FULL, JOIN_RIGHT, lappend(), RestrictInfo::left_ec, lfirst, RestrictInfo::mergeopfamilies, NIL, RestrictInfo::right_ec, and update_mergeclause_eclasses().
Referenced by add_paths_to_joinrel().
{
List *result_list = NIL;
bool isouterjoin = IS_OUTER_JOIN(jointype);
bool have_nonmergeable_joinclause = false;
ListCell *l;
foreach(l, restrictlist)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
/*
* If processing an outer join, only use its own join clauses in the
* merge. For inner joins we can use pushed-down clauses too. (Note:
* we don't set have_nonmergeable_joinclause here because pushed-down
* clauses will become otherquals not joinquals.)
*/
if (isouterjoin && restrictinfo->is_pushed_down)
continue;
/* Check that clause is a mergeable operator clause */
if (!restrictinfo->can_join ||
restrictinfo->mergeopfamilies == NIL)
{
/*
* The executor can handle extra joinquals that are constants, but
* not anything else, when doing right/full merge join. (The
* reason to support constants is so we can do FULL JOIN ON
* FALSE.)
*/
if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
have_nonmergeable_joinclause = true;
continue; /* not mergejoinable */
}
/*
* Check if clause has the form "outer op inner" or "inner op outer".
*/
if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
{
have_nonmergeable_joinclause = true;
continue; /* no good for these input relations */
}
/*
* Insist that each side have a non-redundant eclass. This
* restriction is needed because various bits of the planner expect
* that each clause in a merge be associatable with some pathkey in a
* canonical pathkey list, but redundant eclasses can't appear in
* canonical sort orderings. (XXX it might be worth relaxing this,
* but not enough time to address it for 8.3.)
*
* Note: it would be bad if this condition failed for an otherwise
* mergejoinable FULL JOIN clause, since that would result in
* undesirable planner failure. I believe that is not possible
* however; a variable involved in a full join could only appear in
* below_outer_join eclasses, which aren't considered redundant.
*
* This case *can* happen for left/right join clauses: the outer-side
* variable could be equated to a constant. Because we will propagate
* that constant across the join clause, the loss of ability to do a
* mergejoin is not really all that big a deal, and so it's not clear
* that improving this is important.
*/
update_mergeclause_eclasses(root, restrictinfo);
if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
{
have_nonmergeable_joinclause = true;
continue; /* can't handle redundant eclasses */
}
result_list = lappend(result_list, restrictinfo);
}
/*
* Report whether mergejoin is allowed (see comment at top of function).
*/
switch (jointype)
{
case JOIN_RIGHT:
case JOIN_FULL:
*mergejoin_allowed = !have_nonmergeable_joinclause;
break;
default:
*mergejoin_allowed = true;
break;
}
return result_list;
}
| static void sort_inner_and_outer | ( | PlannerInfo * | root, | |
| RelOptInfo * | joinrel, | |||
| RelOptInfo * | outerrel, | |||
| RelOptInfo * | innerrel, | |||
| List * | restrictlist, | |||
| List * | mergeclause_list, | |||
| JoinType | jointype, | |||
| SpecialJoinInfo * | sjinfo, | |||
| Relids | param_source_rels | |||
| ) | [static] |
Definition at line 488 of file joinpath.c.
References Assert, build_join_pathkeys(), RelOptInfo::cheapest_total_path, create_unique_path(), find_mergeclauses_for_pathkeys(), JOIN_UNIQUE_INNER, JOIN_UNIQUE_OUTER, lcons(), lfirst, list_copy(), list_delete_ptr(), list_head(), list_length(), make_inner_pathkeys_for_merge(), PATH_PARAM_BY_REL, select_outer_pathkeys_for_merge(), and try_mergejoin_path().
Referenced by add_paths_to_joinrel().
{
Path *outer_path;
Path *inner_path;
List *all_pathkeys;
ListCell *l;
/*
* We only consider the cheapest-total-cost input paths, since we are
* assuming here that a sort is required. We will consider
* cheapest-startup-cost input paths later, and only if they don't need a
* sort.
*
* This function intentionally does not consider parameterized input
* paths, except when the cheapest-total is parameterized. If we did so,
* we'd have a combinatorial explosion of mergejoin paths of dubious
* value. This interacts with decisions elsewhere that also discriminate
* against mergejoins with parameterized inputs; see comments in
* src/backend/optimizer/README.
*/
outer_path = outerrel->cheapest_total_path;
inner_path = innerrel->cheapest_total_path;
/*
* If either cheapest-total path is parameterized by the other rel, we
* can't use a mergejoin. (There's no use looking for alternative input
* paths, since these should already be the least-parameterized available
* paths.)
*/
if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
PATH_PARAM_BY_REL(inner_path, outerrel))
return;
/*
* If unique-ification is requested, do it and then handle as a plain
* inner join.
*/
if (jointype == JOIN_UNIQUE_OUTER)
{
outer_path = (Path *) create_unique_path(root, outerrel,
outer_path, sjinfo);
Assert(outer_path);
jointype = JOIN_INNER;
}
else if (jointype == JOIN_UNIQUE_INNER)
{
inner_path = (Path *) create_unique_path(root, innerrel,
inner_path, sjinfo);
Assert(inner_path);
jointype = JOIN_INNER;
}
/*
* Each possible ordering of the available mergejoin clauses will generate
* a differently-sorted result path at essentially the same cost. We have
* no basis for choosing one over another at this level of joining, but
* some sort orders may be more useful than others for higher-level
* mergejoins, so it's worth considering multiple orderings.
*
* Actually, it's not quite true that every mergeclause ordering will
* generate a different path order, because some of the clauses may be
* partially redundant (refer to the same EquivalenceClasses). Therefore,
* what we do is convert the mergeclause list to a list of canonical
* pathkeys, and then consider different orderings of the pathkeys.
*
* Generating a path for *every* permutation of the pathkeys doesn't seem
* like a winning strategy; the cost in planning time is too high. For
* now, we generate one path for each pathkey, listing that pathkey first
* and the rest in random order. This should allow at least a one-clause
* mergejoin without re-sorting against any other possible mergejoin
* partner path. But if we've not guessed the right ordering of secondary
* keys, we may end up evaluating clauses as qpquals when they could have
* been done as mergeclauses. (In practice, it's rare that there's more
* than two or three mergeclauses, so expending a huge amount of thought
* on that is probably not worth it.)
*
* The pathkey order returned by select_outer_pathkeys_for_merge() has
* some heuristics behind it (see that function), so be sure to try it
* exactly as-is as well as making variants.
*/
all_pathkeys = select_outer_pathkeys_for_merge(root,
mergeclause_list,
joinrel);
foreach(l, all_pathkeys)
{
List *front_pathkey = (List *) lfirst(l);
List *cur_mergeclauses;
List *outerkeys;
List *innerkeys;
List *merge_pathkeys;
/* Make a pathkey list with this guy first */
if (l != list_head(all_pathkeys))
outerkeys = lcons(front_pathkey,
list_delete_ptr(list_copy(all_pathkeys),
front_pathkey));
else
outerkeys = all_pathkeys; /* no work at first one... */
/* Sort the mergeclauses into the corresponding ordering */
cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
outerkeys,
true,
mergeclause_list);
/* Should have used them all... */
Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list));
/* Build sort pathkeys for the inner side */
innerkeys = make_inner_pathkeys_for_merge(root,
cur_mergeclauses,
outerkeys);
/* Build pathkeys representing output sort order */
merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
outerkeys);
/*
* And now we can make the path.
*
* Note: it's possible that the cheapest paths will already be sorted
* properly. try_mergejoin_path will detect that case and suppress an
* explicit sort step, so we needn't do so here.
*/
try_mergejoin_path(root,
joinrel,
jointype,
sjinfo,
param_source_rels,
outer_path,
inner_path,
restrictlist,
merge_pathkeys,
cur_mergeclauses,
outerkeys,
innerkeys);
}
}
| static void try_hashjoin_path | ( | PlannerInfo * | root, | |
| RelOptInfo * | joinrel, | |||
| JoinType | jointype, | |||
| SpecialJoinInfo * | sjinfo, | |||
| SemiAntiJoinFactors * | semifactors, | |||
| Relids | param_source_rels, | |||
| Path * | outer_path, | |||
| Path * | inner_path, | |||
| List * | restrict_clauses, | |||
| List * | hashclauses | |||
| ) | [static] |
Definition at line 380 of file joinpath.c.
References add_path(), add_path_precheck(), bms_free(), bms_overlap(), calc_non_nestloop_required_outer(), create_hashjoin_path(), initial_cost_hashjoin(), NIL, JoinCostWorkspace::startup_cost, and JoinCostWorkspace::total_cost.
Referenced by hash_inner_and_outer().
{
Relids required_outer;
JoinCostWorkspace workspace;
/*
* Check to see if proposed path is still parameterized, and reject if the
* parameterization wouldn't be sensible.
*/
required_outer = calc_non_nestloop_required_outer(outer_path,
inner_path);
if (required_outer &&
!bms_overlap(required_outer, param_source_rels))
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
return;
}
/*
* See comments in try_nestloop_path(). Also note that hashjoin paths
* never have any output pathkeys, per comments in create_hashjoin_path.
*/
initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
outer_path, inner_path,
sjinfo, semifactors);
if (add_path_precheck(joinrel,
workspace.startup_cost, workspace.total_cost,
NIL, required_outer))
{
add_path(joinrel, (Path *)
create_hashjoin_path(root,
joinrel,
jointype,
&workspace,
sjinfo,
semifactors,
outer_path,
inner_path,
restrict_clauses,
required_outer,
hashclauses));
}
else
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
}
}
| static void try_mergejoin_path | ( | PlannerInfo * | root, | |
| RelOptInfo * | joinrel, | |||
| JoinType | jointype, | |||
| SpecialJoinInfo * | sjinfo, | |||
| Relids | param_source_rels, | |||
| Path * | outer_path, | |||
| Path * | inner_path, | |||
| List * | restrict_clauses, | |||
| List * | pathkeys, | |||
| List * | mergeclauses, | |||
| List * | outersortkeys, | |||
| List * | innersortkeys | |||
| ) | [static] |
Definition at line 299 of file joinpath.c.
References add_path(), add_path_precheck(), bms_free(), bms_overlap(), calc_non_nestloop_required_outer(), create_mergejoin_path(), initial_cost_mergejoin(), Path::pathkeys, pathkeys_contained_in(), JoinCostWorkspace::startup_cost, and JoinCostWorkspace::total_cost.
Referenced by match_unsorted_outer(), and sort_inner_and_outer().
{
Relids required_outer;
JoinCostWorkspace workspace;
/*
* Check to see if proposed path is still parameterized, and reject if the
* parameterization wouldn't be sensible.
*/
required_outer = calc_non_nestloop_required_outer(outer_path,
inner_path);
if (required_outer &&
!bms_overlap(required_outer, param_source_rels))
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
return;
}
/*
* If the given paths are already well enough ordered, we can skip doing
* an explicit sort.
*/
if (outersortkeys &&
pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
outersortkeys = NIL;
if (innersortkeys &&
pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
innersortkeys = NIL;
/*
* See comments in try_nestloop_path().
*/
initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
outer_path, inner_path,
outersortkeys, innersortkeys,
sjinfo);
if (add_path_precheck(joinrel,
workspace.startup_cost, workspace.total_cost,
pathkeys, required_outer))
{
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
jointype,
&workspace,
sjinfo,
outer_path,
inner_path,
restrict_clauses,
pathkeys,
required_outer,
mergeclauses,
outersortkeys,
innersortkeys));
}
else
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
}
}
| static void try_nestloop_path | ( | PlannerInfo * | root, | |
| RelOptInfo * | joinrel, | |||
| JoinType | jointype, | |||
| SpecialJoinInfo * | sjinfo, | |||
| SemiAntiJoinFactors * | semifactors, | |||
| Relids | param_source_rels, | |||
| Path * | outer_path, | |||
| Path * | inner_path, | |||
| List * | restrict_clauses, | |||
| List * | pathkeys | |||
| ) | [static] |
Definition at line 228 of file joinpath.c.
References add_path(), add_path_precheck(), bms_free(), bms_overlap(), calc_nestloop_required_outer(), create_nestloop_path(), initial_cost_nestloop(), JoinCostWorkspace::startup_cost, and JoinCostWorkspace::total_cost.
Referenced by match_unsorted_outer().
{
Relids required_outer;
JoinCostWorkspace workspace;
/*
* Check to see if proposed path is still parameterized, and reject if the
* parameterization wouldn't be sensible.
*/
required_outer = calc_nestloop_required_outer(outer_path,
inner_path);
if (required_outer &&
!bms_overlap(required_outer, param_source_rels))
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
return;
}
/*
* Do a precheck to quickly eliminate obviously-inferior paths. We
* calculate a cheap lower bound on the path's cost and then use
* add_path_precheck() to see if the path is clearly going to be dominated
* by some existing path for the joinrel. If not, do the full pushup with
* creating a fully valid path structure and submitting it to add_path().
* The latter two steps are expensive enough to make this two-phase
* methodology worthwhile.
*/
initial_cost_nestloop(root, &workspace, jointype,
outer_path, inner_path,
sjinfo, semifactors);
if (add_path_precheck(joinrel,
workspace.startup_cost, workspace.total_cost,
pathkeys, required_outer))
{
add_path(joinrel, (Path *)
create_nestloop_path(root,
joinrel,
jointype,
&workspace,
sjinfo,
semifactors,
outer_path,
inner_path,
restrict_clauses,
pathkeys,
required_outer));
}
else
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
}
}
1.7.1