#include "postgres.h"
#include <math.h>
#include "executor/executor.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
Go to the source code of this file.
Defines | |
#define | PATH_PARAM_BY_REL(path, rel) ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) |
Functions | |
static void | sort_inner_and_outer (PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype, SpecialJoinInfo *sjinfo, Relids param_source_rels) |
static void | match_unsorted_outer (PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels) |
static void | hash_inner_and_outer (PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels) |
static List * | select_mergejoin_clauses (PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype, bool *mergejoin_allowed) |
void | add_paths_to_joinrel (PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, SpecialJoinInfo *sjinfo, List *restrictlist) |
static void | try_nestloop_path (PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys) |
static void | try_mergejoin_path (PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, SpecialJoinInfo *sjinfo, Relids param_source_rels, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys, List *mergeclauses, List *outersortkeys, List *innersortkeys) |
static void | try_hashjoin_path (PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels, Path *outer_path, Path *inner_path, List *restrict_clauses, List *hashclauses) |
static bool | clause_sides_match_join (RestrictInfo *rinfo, RelOptInfo *outerrel, RelOptInfo *innerrel) |
#define PATH_PARAM_BY_REL | ( | path, | ||
rel | ||||
) | ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) |
Definition at line 25 of file joinpath.c.
Referenced by hash_inner_and_outer(), match_unsorted_outer(), and sort_inner_and_outer().
void add_paths_to_joinrel | ( | PlannerInfo * | root, | |
RelOptInfo * | joinrel, | |||
RelOptInfo * | outerrel, | |||
RelOptInfo * | innerrel, | |||
JoinType | jointype, | |||
SpecialJoinInfo * | sjinfo, | |||
List * | restrictlist | |||
) |
Definition at line 78 of file joinpath.c.
References PlannerInfo::all_baserels, bms_difference(), bms_is_member(), bms_join(), bms_overlap(), compute_semi_anti_join_factors(), enable_hashjoin, enable_mergejoin, hash_inner_and_outer(), JOIN_ANTI, JOIN_FULL, PlannerInfo::join_info_list, JOIN_SEMI, SpecialJoinInfo::jointype, PlannerInfo::lateral_info_list, LateralJoinInfo::lateral_lhs, LateralJoinInfo::lateral_rhs, lfirst, match_unsorted_outer(), SpecialJoinInfo::min_lefthand, SpecialJoinInfo::min_righthand, RelOptInfo::relids, select_mergejoin_clauses(), and sort_inner_and_outer().
Referenced by make_join_rel().
{ List *mergeclause_list = NIL; bool mergejoin_allowed = true; SemiAntiJoinFactors semifactors; Relids param_source_rels = NULL; ListCell *lc; /* * Find potential mergejoin clauses. We can skip this if we are not * interested in doing a mergejoin. However, mergejoin may be our only * way of implementing a full outer join, so override enable_mergejoin if * it's a full join. */ if (enable_mergejoin || jointype == JOIN_FULL) mergeclause_list = select_mergejoin_clauses(root, joinrel, outerrel, innerrel, restrictlist, jointype, &mergejoin_allowed); /* * If it's SEMI or ANTI join, compute correction factors for cost * estimation. These will be the same for all paths. */ if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) compute_semi_anti_join_factors(root, outerrel, innerrel, jointype, sjinfo, restrictlist, &semifactors); /* * Decide whether it's sensible to generate parameterized paths for this * joinrel, and if so, which relations such paths should require. There * is no need to create a parameterized result path unless there is a join * order restriction that prevents joining one of our input rels directly * to the parameter source rel instead of joining to the other input rel. * This restriction reduces the number of parameterized paths we have to * deal with at higher join levels, without compromising the quality of * the resulting plan. We express the restriction as a Relids set that * must overlap the parameterization of any proposed join path. */ foreach(lc, root->join_info_list) { SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); /* * SJ is relevant to this join if we have some part of its RHS * (possibly not all of it), and haven't yet joined to its LHS. (This * test is pretty simplistic, but should be sufficient considering the * join has already been proven legal.) If the SJ is relevant, it * presents constraints for joining to anything not in its RHS. */ if (bms_overlap(joinrel->relids, sjinfo->min_righthand) && !bms_overlap(joinrel->relids, sjinfo->min_lefthand)) param_source_rels = bms_join(param_source_rels, bms_difference(root->all_baserels, sjinfo->min_righthand)); /* full joins constrain both sides symmetrically */ if (sjinfo->jointype == JOIN_FULL && bms_overlap(joinrel->relids, sjinfo->min_lefthand) && !bms_overlap(joinrel->relids, sjinfo->min_righthand)) param_source_rels = bms_join(param_source_rels, bms_difference(root->all_baserels, sjinfo->min_lefthand)); } /* * However, when a LATERAL subquery is involved, we have to be a bit * laxer, because there will simply not be any paths for the joinrel that * aren't parameterized by whatever the subquery is parameterized by, * unless its parameterization is resolved within the joinrel. Hence, add * to param_source_rels anything that is laterally referenced in either * input and is not in the join already. */ foreach(lc, root->lateral_info_list) { LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(lc); if (bms_is_member(ljinfo->lateral_rhs, joinrel->relids)) param_source_rels = bms_join(param_source_rels, bms_difference(ljinfo->lateral_lhs, joinrel->relids)); } /* * 1. Consider mergejoin paths where both relations must be explicitly * sorted. Skip this if we can't mergejoin. */ if (mergejoin_allowed) sort_inner_and_outer(root, joinrel, outerrel, innerrel, restrictlist, mergeclause_list, jointype, sjinfo, param_source_rels); /* * 2. Consider paths where the outer relation need not be explicitly * sorted. This includes both nestloops and mergejoins where the outer * path is already ordered. Again, skip this if we can't mergejoin. * (That's okay because we know that nestloop can't handle right/full * joins at all, so it wouldn't work in the prohibited cases either.) */ if (mergejoin_allowed) match_unsorted_outer(root, joinrel, outerrel, innerrel, restrictlist, mergeclause_list, jointype, sjinfo, &semifactors, param_source_rels); #ifdef NOT_USED /* * 3. Consider paths where the inner relation need not be explicitly * sorted. This includes mergejoins only (nestloops were already built in * match_unsorted_outer). * * Diked out as redundant 2/13/2000 -- tgl. There isn't any really * significant difference between the inner and outer side of a mergejoin, * so match_unsorted_inner creates no paths that aren't equivalent to * those made by match_unsorted_outer when add_paths_to_joinrel() is * invoked with the two rels given in the other order. */ if (mergejoin_allowed) match_unsorted_inner(root, joinrel, outerrel, innerrel, restrictlist, mergeclause_list, jointype, sjinfo, &semifactors, param_source_rels); #endif /* * 4. Consider paths where both outer and inner relations must be hashed * before being joined. As above, disregard enable_hashjoin for full * joins, because there may be no other alternative. */ if (enable_hashjoin || jointype == JOIN_FULL) hash_inner_and_outer(root, joinrel, outerrel, innerrel, restrictlist, jointype, sjinfo, &semifactors, param_source_rels); }
static bool clause_sides_match_join | ( | RestrictInfo * | rinfo, | |
RelOptInfo * | outerrel, | |||
RelOptInfo * | innerrel | |||
) | [inline, static] |
Definition at line 451 of file joinpath.c.
References bms_is_subset(), RestrictInfo::left_relids, RestrictInfo::outer_is_left, RelOptInfo::relids, and RestrictInfo::right_relids.
Referenced by hash_inner_and_outer(), and select_mergejoin_clauses().
{ if (bms_is_subset(rinfo->left_relids, outerrel->relids) && bms_is_subset(rinfo->right_relids, innerrel->relids)) { /* lefthand side is outer */ rinfo->outer_is_left = true; return true; } else if (bms_is_subset(rinfo->left_relids, innerrel->relids) && bms_is_subset(rinfo->right_relids, outerrel->relids)) { /* righthand side is outer */ rinfo->outer_is_left = false; return true; } return false; /* no good for these input relations */ }
static void hash_inner_and_outer | ( | PlannerInfo * | root, | |
RelOptInfo * | joinrel, | |||
RelOptInfo * | outerrel, | |||
RelOptInfo * | innerrel, | |||
List * | restrictlist, | |||
JoinType | jointype, | |||
SpecialJoinInfo * | sjinfo, | |||
SemiAntiJoinFactors * | semifactors, | |||
Relids | param_source_rels | |||
) | [static] |
Definition at line 1085 of file joinpath.c.
References Assert, RestrictInfo::can_join, RelOptInfo::cheapest_parameterized_paths, RelOptInfo::cheapest_startup_path, RelOptInfo::cheapest_total_path, clause_sides_match_join(), create_unique_path(), RestrictInfo::hashjoinoperator, InvalidOid, IS_OUTER_JOIN, RestrictInfo::is_pushed_down, JOIN_UNIQUE_INNER, JOIN_UNIQUE_OUTER, lappend(), lfirst, NULL, PATH_PARAM_BY_REL, and try_hashjoin_path().
Referenced by add_paths_to_joinrel().
{ bool isouterjoin = IS_OUTER_JOIN(jointype); List *hashclauses; ListCell *l; /* * We need to build only one hashclauses list for any given pair of outer * and inner relations; all of the hashable clauses will be used as keys. * * Scan the join's restrictinfo list to find hashjoinable clauses that are * usable with this pair of sub-relations. */ hashclauses = NIL; foreach(l, restrictlist) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); /* * If processing an outer join, only use its own join clauses for * hashing. For inner joins we need not be so picky. */ if (isouterjoin && restrictinfo->is_pushed_down) continue; if (!restrictinfo->can_join || restrictinfo->hashjoinoperator == InvalidOid) continue; /* not hashjoinable */ /* * Check if clause has the form "outer op inner" or "inner op outer". */ if (!clause_sides_match_join(restrictinfo, outerrel, innerrel)) continue; /* no good for these input relations */ hashclauses = lappend(hashclauses, restrictinfo); } /* If we found any usable hashclauses, make paths */ if (hashclauses) { /* * We consider both the cheapest-total-cost and cheapest-startup-cost * outer paths. There's no need to consider any but the * cheapest-total-cost inner path, however. */ Path *cheapest_startup_outer = outerrel->cheapest_startup_path; Path *cheapest_total_outer = outerrel->cheapest_total_path; Path *cheapest_total_inner = innerrel->cheapest_total_path; /* * If either cheapest-total path is parameterized by the other rel, we * can't use a hashjoin. (There's no use looking for alternative * input paths, since these should already be the least-parameterized * available paths.) */ if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) || PATH_PARAM_BY_REL(cheapest_total_inner, outerrel)) return; /* Unique-ify if need be; we ignore parameterized possibilities */ if (jointype == JOIN_UNIQUE_OUTER) { cheapest_total_outer = (Path *) create_unique_path(root, outerrel, cheapest_total_outer, sjinfo); Assert(cheapest_total_outer); jointype = JOIN_INNER; try_hashjoin_path(root, joinrel, jointype, sjinfo, semifactors, param_source_rels, cheapest_total_outer, cheapest_total_inner, restrictlist, hashclauses); /* no possibility of cheap startup here */ } else if (jointype == JOIN_UNIQUE_INNER) { cheapest_total_inner = (Path *) create_unique_path(root, innerrel, cheapest_total_inner, sjinfo); Assert(cheapest_total_inner); jointype = JOIN_INNER; try_hashjoin_path(root, joinrel, jointype, sjinfo, semifactors, param_source_rels, cheapest_total_outer, cheapest_total_inner, restrictlist, hashclauses); if (cheapest_startup_outer != NULL && cheapest_startup_outer != cheapest_total_outer) try_hashjoin_path(root, joinrel, jointype, sjinfo, semifactors, param_source_rels, cheapest_startup_outer, cheapest_total_inner, restrictlist, hashclauses); } else { /* * For other jointypes, we consider the cheapest startup outer * together with the cheapest total inner, and then consider * pairings of cheapest-total paths including parameterized ones. * There is no use in generating parameterized paths on the basis * of possibly cheap startup cost, so this is sufficient. */ ListCell *lc1; ListCell *lc2; if (cheapest_startup_outer != NULL) try_hashjoin_path(root, joinrel, jointype, sjinfo, semifactors, param_source_rels, cheapest_startup_outer, cheapest_total_inner, restrictlist, hashclauses); foreach(lc1, outerrel->cheapest_parameterized_paths) { Path *outerpath = (Path *) lfirst(lc1); /* * We cannot use an outer path that is parameterized by the * inner rel. */ if (PATH_PARAM_BY_REL(outerpath, innerrel)) continue; foreach(lc2, innerrel->cheapest_parameterized_paths) { Path *innerpath = (Path *) lfirst(lc2); /* * We cannot use an inner path that is parameterized by * the outer rel, either. */ if (PATH_PARAM_BY_REL(innerpath, outerrel)) continue; if (outerpath == cheapest_startup_outer && innerpath == cheapest_total_inner) continue; /* already tried it */ try_hashjoin_path(root, joinrel, jointype, sjinfo, semifactors, param_source_rels, outerpath, innerpath, restrictlist, hashclauses); } } } } }
static void match_unsorted_outer | ( | PlannerInfo * | root, | |
RelOptInfo * | joinrel, | |||
RelOptInfo * | outerrel, | |||
RelOptInfo * | innerrel, | |||
List * | restrictlist, | |||
List * | mergeclause_list, | |||
JoinType | jointype, | |||
SpecialJoinInfo * | sjinfo, | |||
SemiAntiJoinFactors * | semifactors, | |||
Relids | param_source_rels | |||
) | [static] |
Definition at line 673 of file joinpath.c.
References Assert, build_join_pathkeys(), RelOptInfo::cheapest_parameterized_paths, RelOptInfo::cheapest_total_path, compare_path_costs(), create_material_path(), create_unique_path(), elog, enable_material, ERROR, ExecMaterializesOutput(), find_mergeclauses_for_pathkeys(), get_cheapest_path_for_pathkeys(), JOIN_ANTI, JOIN_FULL, JOIN_INNER, JOIN_LEFT, JOIN_RIGHT, JOIN_SEMI, JOIN_UNIQUE_INNER, JOIN_UNIQUE_OUTER, lfirst, list_copy(), list_length(), list_truncate(), make_inner_pathkeys_for_merge(), NIL, NULL, PATH_PARAM_BY_REL, Path::pathkeys, pathkeys_contained_in(), RelOptInfo::pathlist, Path::pathtype, STARTUP_COST, TOTAL_COST, try_mergejoin_path(), and try_nestloop_path().
Referenced by add_paths_to_joinrel().
{ JoinType save_jointype = jointype; bool nestjoinOK; bool useallclauses; Path *inner_cheapest_total = innerrel->cheapest_total_path; Path *matpath = NULL; ListCell *lc1; /* * Nestloop only supports inner, left, semi, and anti joins. Also, if we * are doing a right or full mergejoin, we must use *all* the mergeclauses * as join clauses, else we will not have a valid plan. (Although these * two flags are currently inverses, keep them separate for clarity and * possible future changes.) */ switch (jointype) { case JOIN_INNER: case JOIN_LEFT: case JOIN_SEMI: case JOIN_ANTI: nestjoinOK = true; useallclauses = false; break; case JOIN_RIGHT: case JOIN_FULL: nestjoinOK = false; useallclauses = true; break; case JOIN_UNIQUE_OUTER: case JOIN_UNIQUE_INNER: jointype = JOIN_INNER; nestjoinOK = true; useallclauses = false; break; default: elog(ERROR, "unrecognized join type: %d", (int) jointype); nestjoinOK = false; /* keep compiler quiet */ useallclauses = false; break; } /* * If inner_cheapest_total is parameterized by the outer rel, ignore it; * we will consider it below as a member of cheapest_parameterized_paths, * but the other possibilities considered in this routine aren't usable. */ if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel)) inner_cheapest_total = NULL; /* * If we need to unique-ify the inner path, we will consider only the * cheapest-total inner. */ if (save_jointype == JOIN_UNIQUE_INNER) { /* No way to do this with an inner path parameterized by outer rel */ if (inner_cheapest_total == NULL) return; inner_cheapest_total = (Path *) create_unique_path(root, innerrel, inner_cheapest_total, sjinfo); Assert(inner_cheapest_total); } else if (nestjoinOK) { /* * Consider materializing the cheapest inner path, unless * enable_material is off or the path in question materializes its * output anyway. */ if (enable_material && inner_cheapest_total != NULL && !ExecMaterializesOutput(inner_cheapest_total->pathtype)) matpath = (Path *) create_material_path(innerrel, inner_cheapest_total); } foreach(lc1, outerrel->pathlist) { Path *outerpath = (Path *) lfirst(lc1); List *merge_pathkeys; List *mergeclauses; List *innersortkeys; List *trialsortkeys; Path *cheapest_startup_inner; Path *cheapest_total_inner; int num_sortkeys; int sortkeycnt; /* * We cannot use an outer path that is parameterized by the inner rel. */ if (PATH_PARAM_BY_REL(outerpath, innerrel)) continue; /* * If we need to unique-ify the outer path, it's pointless to consider * any but the cheapest outer. (XXX we don't consider parameterized * outers, nor inners, for unique-ified cases. Should we?) */ if (save_jointype == JOIN_UNIQUE_OUTER) { if (outerpath != outerrel->cheapest_total_path) continue; outerpath = (Path *) create_unique_path(root, outerrel, outerpath, sjinfo); Assert(outerpath); } /* * The result will have this sort order (even if it is implemented as * a nestloop, and even if some of the mergeclauses are implemented by * qpquals rather than as true mergeclauses): */ merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, outerpath->pathkeys); if (save_jointype == JOIN_UNIQUE_INNER) { /* * Consider nestloop join, but only with the unique-ified cheapest * inner path */ try_nestloop_path(root, joinrel, jointype, sjinfo, semifactors, param_source_rels, outerpath, inner_cheapest_total, restrictlist, merge_pathkeys); } else if (nestjoinOK) { /* * Consider nestloop joins using this outer path and various * available paths for the inner relation. We consider the * cheapest-total paths for each available parameterization of the * inner relation, including the unparameterized case. */ ListCell *lc2; foreach(lc2, innerrel->cheapest_parameterized_paths) { Path *innerpath = (Path *) lfirst(lc2); try_nestloop_path(root, joinrel, jointype, sjinfo, semifactors, param_source_rels, outerpath, innerpath, restrictlist, merge_pathkeys); } /* Also consider materialized form of the cheapest inner path */ if (matpath != NULL) try_nestloop_path(root, joinrel, jointype, sjinfo, semifactors, param_source_rels, outerpath, matpath, restrictlist, merge_pathkeys); } /* Can't do anything else if outer path needs to be unique'd */ if (save_jointype == JOIN_UNIQUE_OUTER) continue; /* Can't do anything else if inner rel is parameterized by outer */ if (inner_cheapest_total == NULL) continue; /* Look for useful mergeclauses (if any) */ mergeclauses = find_mergeclauses_for_pathkeys(root, outerpath->pathkeys, true, mergeclause_list); /* * Done with this outer path if no chance for a mergejoin. * * Special corner case: for "x FULL JOIN y ON true", there will be no * join clauses at all. Ordinarily we'd generate a clauseless * nestloop path, but since mergejoin is our only join type that * supports FULL JOIN without any join clauses, it's necessary to * generate a clauseless mergejoin path instead. */ if (mergeclauses == NIL) { if (jointype == JOIN_FULL) /* okay to try for mergejoin */ ; else continue; } if (useallclauses && list_length(mergeclauses) != list_length(mergeclause_list)) continue; /* Compute the required ordering of the inner path */ innersortkeys = make_inner_pathkeys_for_merge(root, mergeclauses, outerpath->pathkeys); /* * Generate a mergejoin on the basis of sorting the cheapest inner. * Since a sort will be needed, only cheapest total cost matters. (But * try_mergejoin_path will do the right thing if inner_cheapest_total * is already correctly sorted.) */ try_mergejoin_path(root, joinrel, jointype, sjinfo, param_source_rels, outerpath, inner_cheapest_total, restrictlist, merge_pathkeys, mergeclauses, NIL, innersortkeys); /* Can't do anything else if inner path needs to be unique'd */ if (save_jointype == JOIN_UNIQUE_INNER) continue; /* * Look for presorted inner paths that satisfy the innersortkey list * --- or any truncation thereof, if we are allowed to build a * mergejoin using a subset of the merge clauses. Here, we consider * both cheap startup cost and cheap total cost. * * Currently we do not consider parameterized inner paths here. This * interacts with decisions elsewhere that also discriminate against * mergejoins with parameterized inputs; see comments in * src/backend/optimizer/README. * * As we shorten the sortkey list, we should consider only paths that * are strictly cheaper than (in particular, not the same as) any path * found in an earlier iteration. Otherwise we'd be intentionally * using fewer merge keys than a given path allows (treating the rest * as plain joinquals), which is unlikely to be a good idea. Also, * eliminating paths here on the basis of compare_path_costs is a lot * cheaper than building the mergejoin path only to throw it away. * * If inner_cheapest_total is well enough sorted to have not required * a sort in the path made above, we shouldn't make a duplicate path * with it, either. We handle that case with the same logic that * handles the previous consideration, by initializing the variables * that track cheapest-so-far properly. Note that we do NOT reject * inner_cheapest_total if we find it matches some shorter set of * pathkeys. That case corresponds to using fewer mergekeys to avoid * sorting inner_cheapest_total, whereas we did sort it above, so the * plans being considered are different. */ if (pathkeys_contained_in(innersortkeys, inner_cheapest_total->pathkeys)) { /* inner_cheapest_total didn't require a sort */ cheapest_startup_inner = inner_cheapest_total; cheapest_total_inner = inner_cheapest_total; } else { /* it did require a sort, at least for the full set of keys */ cheapest_startup_inner = NULL; cheapest_total_inner = NULL; } num_sortkeys = list_length(innersortkeys); if (num_sortkeys > 1 && !useallclauses) trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */ else trialsortkeys = innersortkeys; /* won't really truncate */ for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) { Path *innerpath; List *newclauses = NIL; /* * Look for an inner path ordered well enough for the first * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified * destructively, which is why we made a copy... */ trialsortkeys = list_truncate(trialsortkeys, sortkeycnt); innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, trialsortkeys, NULL, TOTAL_COST); if (innerpath != NULL && (cheapest_total_inner == NULL || compare_path_costs(innerpath, cheapest_total_inner, TOTAL_COST) < 0)) { /* Found a cheap (or even-cheaper) sorted path */ /* Select the right mergeclauses, if we didn't already */ if (sortkeycnt < num_sortkeys) { newclauses = find_mergeclauses_for_pathkeys(root, trialsortkeys, false, mergeclauses); Assert(newclauses != NIL); } else newclauses = mergeclauses; try_mergejoin_path(root, joinrel, jointype, sjinfo, param_source_rels, outerpath, innerpath, restrictlist, merge_pathkeys, newclauses, NIL, NIL); cheapest_total_inner = innerpath; } /* Same on the basis of cheapest startup cost ... */ innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, trialsortkeys, NULL, STARTUP_COST); if (innerpath != NULL && (cheapest_startup_inner == NULL || compare_path_costs(innerpath, cheapest_startup_inner, STARTUP_COST) < 0)) { /* Found a cheap (or even-cheaper) sorted path */ if (innerpath != cheapest_total_inner) { /* * Avoid rebuilding clause list if we already made one; * saves memory in big join trees... */ if (newclauses == NIL) { if (sortkeycnt < num_sortkeys) { newclauses = find_mergeclauses_for_pathkeys(root, trialsortkeys, false, mergeclauses); Assert(newclauses != NIL); } else newclauses = mergeclauses; } try_mergejoin_path(root, joinrel, jointype, sjinfo, param_source_rels, outerpath, innerpath, restrictlist, merge_pathkeys, newclauses, NIL, NIL); } cheapest_startup_inner = innerpath; } /* * Don't consider truncated sortkeys if we need all clauses. */ if (useallclauses) break; } } }
static List * select_mergejoin_clauses | ( | PlannerInfo * | root, | |
RelOptInfo * | joinrel, | |||
RelOptInfo * | outerrel, | |||
RelOptInfo * | innerrel, | |||
List * | restrictlist, | |||
JoinType | jointype, | |||
bool * | mergejoin_allowed | |||
) | [static] |
Definition at line 1292 of file joinpath.c.
References RestrictInfo::can_join, RestrictInfo::clause, clause_sides_match_join(), EC_MUST_BE_REDUNDANT, IS_OUTER_JOIN, RestrictInfo::is_pushed_down, IsA, JOIN_FULL, JOIN_RIGHT, lappend(), RestrictInfo::left_ec, lfirst, RestrictInfo::mergeopfamilies, NIL, RestrictInfo::right_ec, and update_mergeclause_eclasses().
Referenced by add_paths_to_joinrel().
{ List *result_list = NIL; bool isouterjoin = IS_OUTER_JOIN(jointype); bool have_nonmergeable_joinclause = false; ListCell *l; foreach(l, restrictlist) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); /* * If processing an outer join, only use its own join clauses in the * merge. For inner joins we can use pushed-down clauses too. (Note: * we don't set have_nonmergeable_joinclause here because pushed-down * clauses will become otherquals not joinquals.) */ if (isouterjoin && restrictinfo->is_pushed_down) continue; /* Check that clause is a mergeable operator clause */ if (!restrictinfo->can_join || restrictinfo->mergeopfamilies == NIL) { /* * The executor can handle extra joinquals that are constants, but * not anything else, when doing right/full merge join. (The * reason to support constants is so we can do FULL JOIN ON * FALSE.) */ if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const)) have_nonmergeable_joinclause = true; continue; /* not mergejoinable */ } /* * Check if clause has the form "outer op inner" or "inner op outer". */ if (!clause_sides_match_join(restrictinfo, outerrel, innerrel)) { have_nonmergeable_joinclause = true; continue; /* no good for these input relations */ } /* * Insist that each side have a non-redundant eclass. This * restriction is needed because various bits of the planner expect * that each clause in a merge be associatable with some pathkey in a * canonical pathkey list, but redundant eclasses can't appear in * canonical sort orderings. (XXX it might be worth relaxing this, * but not enough time to address it for 8.3.) * * Note: it would be bad if this condition failed for an otherwise * mergejoinable FULL JOIN clause, since that would result in * undesirable planner failure. I believe that is not possible * however; a variable involved in a full join could only appear in * below_outer_join eclasses, which aren't considered redundant. * * This case *can* happen for left/right join clauses: the outer-side * variable could be equated to a constant. Because we will propagate * that constant across the join clause, the loss of ability to do a * mergejoin is not really all that big a deal, and so it's not clear * that improving this is important. */ update_mergeclause_eclasses(root, restrictinfo); if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) || EC_MUST_BE_REDUNDANT(restrictinfo->right_ec)) { have_nonmergeable_joinclause = true; continue; /* can't handle redundant eclasses */ } result_list = lappend(result_list, restrictinfo); } /* * Report whether mergejoin is allowed (see comment at top of function). */ switch (jointype) { case JOIN_RIGHT: case JOIN_FULL: *mergejoin_allowed = !have_nonmergeable_joinclause; break; default: *mergejoin_allowed = true; break; } return result_list; }
static void sort_inner_and_outer | ( | PlannerInfo * | root, | |
RelOptInfo * | joinrel, | |||
RelOptInfo * | outerrel, | |||
RelOptInfo * | innerrel, | |||
List * | restrictlist, | |||
List * | mergeclause_list, | |||
JoinType | jointype, | |||
SpecialJoinInfo * | sjinfo, | |||
Relids | param_source_rels | |||
) | [static] |
Definition at line 488 of file joinpath.c.
References Assert, build_join_pathkeys(), RelOptInfo::cheapest_total_path, create_unique_path(), find_mergeclauses_for_pathkeys(), JOIN_UNIQUE_INNER, JOIN_UNIQUE_OUTER, lcons(), lfirst, list_copy(), list_delete_ptr(), list_head(), list_length(), make_inner_pathkeys_for_merge(), PATH_PARAM_BY_REL, select_outer_pathkeys_for_merge(), and try_mergejoin_path().
Referenced by add_paths_to_joinrel().
{ Path *outer_path; Path *inner_path; List *all_pathkeys; ListCell *l; /* * We only consider the cheapest-total-cost input paths, since we are * assuming here that a sort is required. We will consider * cheapest-startup-cost input paths later, and only if they don't need a * sort. * * This function intentionally does not consider parameterized input * paths, except when the cheapest-total is parameterized. If we did so, * we'd have a combinatorial explosion of mergejoin paths of dubious * value. This interacts with decisions elsewhere that also discriminate * against mergejoins with parameterized inputs; see comments in * src/backend/optimizer/README. */ outer_path = outerrel->cheapest_total_path; inner_path = innerrel->cheapest_total_path; /* * If either cheapest-total path is parameterized by the other rel, we * can't use a mergejoin. (There's no use looking for alternative input * paths, since these should already be the least-parameterized available * paths.) */ if (PATH_PARAM_BY_REL(outer_path, innerrel) || PATH_PARAM_BY_REL(inner_path, outerrel)) return; /* * If unique-ification is requested, do it and then handle as a plain * inner join. */ if (jointype == JOIN_UNIQUE_OUTER) { outer_path = (Path *) create_unique_path(root, outerrel, outer_path, sjinfo); Assert(outer_path); jointype = JOIN_INNER; } else if (jointype == JOIN_UNIQUE_INNER) { inner_path = (Path *) create_unique_path(root, innerrel, inner_path, sjinfo); Assert(inner_path); jointype = JOIN_INNER; } /* * Each possible ordering of the available mergejoin clauses will generate * a differently-sorted result path at essentially the same cost. We have * no basis for choosing one over another at this level of joining, but * some sort orders may be more useful than others for higher-level * mergejoins, so it's worth considering multiple orderings. * * Actually, it's not quite true that every mergeclause ordering will * generate a different path order, because some of the clauses may be * partially redundant (refer to the same EquivalenceClasses). Therefore, * what we do is convert the mergeclause list to a list of canonical * pathkeys, and then consider different orderings of the pathkeys. * * Generating a path for *every* permutation of the pathkeys doesn't seem * like a winning strategy; the cost in planning time is too high. For * now, we generate one path for each pathkey, listing that pathkey first * and the rest in random order. This should allow at least a one-clause * mergejoin without re-sorting against any other possible mergejoin * partner path. But if we've not guessed the right ordering of secondary * keys, we may end up evaluating clauses as qpquals when they could have * been done as mergeclauses. (In practice, it's rare that there's more * than two or three mergeclauses, so expending a huge amount of thought * on that is probably not worth it.) * * The pathkey order returned by select_outer_pathkeys_for_merge() has * some heuristics behind it (see that function), so be sure to try it * exactly as-is as well as making variants. */ all_pathkeys = select_outer_pathkeys_for_merge(root, mergeclause_list, joinrel); foreach(l, all_pathkeys) { List *front_pathkey = (List *) lfirst(l); List *cur_mergeclauses; List *outerkeys; List *innerkeys; List *merge_pathkeys; /* Make a pathkey list with this guy first */ if (l != list_head(all_pathkeys)) outerkeys = lcons(front_pathkey, list_delete_ptr(list_copy(all_pathkeys), front_pathkey)); else outerkeys = all_pathkeys; /* no work at first one... */ /* Sort the mergeclauses into the corresponding ordering */ cur_mergeclauses = find_mergeclauses_for_pathkeys(root, outerkeys, true, mergeclause_list); /* Should have used them all... */ Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list)); /* Build sort pathkeys for the inner side */ innerkeys = make_inner_pathkeys_for_merge(root, cur_mergeclauses, outerkeys); /* Build pathkeys representing output sort order */ merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, outerkeys); /* * And now we can make the path. * * Note: it's possible that the cheapest paths will already be sorted * properly. try_mergejoin_path will detect that case and suppress an * explicit sort step, so we needn't do so here. */ try_mergejoin_path(root, joinrel, jointype, sjinfo, param_source_rels, outer_path, inner_path, restrictlist, merge_pathkeys, cur_mergeclauses, outerkeys, innerkeys); } }
static void try_hashjoin_path | ( | PlannerInfo * | root, | |
RelOptInfo * | joinrel, | |||
JoinType | jointype, | |||
SpecialJoinInfo * | sjinfo, | |||
SemiAntiJoinFactors * | semifactors, | |||
Relids | param_source_rels, | |||
Path * | outer_path, | |||
Path * | inner_path, | |||
List * | restrict_clauses, | |||
List * | hashclauses | |||
) | [static] |
Definition at line 380 of file joinpath.c.
References add_path(), add_path_precheck(), bms_free(), bms_overlap(), calc_non_nestloop_required_outer(), create_hashjoin_path(), initial_cost_hashjoin(), NIL, JoinCostWorkspace::startup_cost, and JoinCostWorkspace::total_cost.
Referenced by hash_inner_and_outer().
{ Relids required_outer; JoinCostWorkspace workspace; /* * Check to see if proposed path is still parameterized, and reject if the * parameterization wouldn't be sensible. */ required_outer = calc_non_nestloop_required_outer(outer_path, inner_path); if (required_outer && !bms_overlap(required_outer, param_source_rels)) { /* Waste no memory when we reject a path here */ bms_free(required_outer); return; } /* * See comments in try_nestloop_path(). Also note that hashjoin paths * never have any output pathkeys, per comments in create_hashjoin_path. */ initial_cost_hashjoin(root, &workspace, jointype, hashclauses, outer_path, inner_path, sjinfo, semifactors); if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, NIL, required_outer)) { add_path(joinrel, (Path *) create_hashjoin_path(root, joinrel, jointype, &workspace, sjinfo, semifactors, outer_path, inner_path, restrict_clauses, required_outer, hashclauses)); } else { /* Waste no memory when we reject a path here */ bms_free(required_outer); } }
static void try_mergejoin_path | ( | PlannerInfo * | root, | |
RelOptInfo * | joinrel, | |||
JoinType | jointype, | |||
SpecialJoinInfo * | sjinfo, | |||
Relids | param_source_rels, | |||
Path * | outer_path, | |||
Path * | inner_path, | |||
List * | restrict_clauses, | |||
List * | pathkeys, | |||
List * | mergeclauses, | |||
List * | outersortkeys, | |||
List * | innersortkeys | |||
) | [static] |
Definition at line 299 of file joinpath.c.
References add_path(), add_path_precheck(), bms_free(), bms_overlap(), calc_non_nestloop_required_outer(), create_mergejoin_path(), initial_cost_mergejoin(), Path::pathkeys, pathkeys_contained_in(), JoinCostWorkspace::startup_cost, and JoinCostWorkspace::total_cost.
Referenced by match_unsorted_outer(), and sort_inner_and_outer().
{ Relids required_outer; JoinCostWorkspace workspace; /* * Check to see if proposed path is still parameterized, and reject if the * parameterization wouldn't be sensible. */ required_outer = calc_non_nestloop_required_outer(outer_path, inner_path); if (required_outer && !bms_overlap(required_outer, param_source_rels)) { /* Waste no memory when we reject a path here */ bms_free(required_outer); return; } /* * If the given paths are already well enough ordered, we can skip doing * an explicit sort. */ if (outersortkeys && pathkeys_contained_in(outersortkeys, outer_path->pathkeys)) outersortkeys = NIL; if (innersortkeys && pathkeys_contained_in(innersortkeys, inner_path->pathkeys)) innersortkeys = NIL; /* * See comments in try_nestloop_path(). */ initial_cost_mergejoin(root, &workspace, jointype, mergeclauses, outer_path, inner_path, outersortkeys, innersortkeys, sjinfo); if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, pathkeys, required_outer)) { add_path(joinrel, (Path *) create_mergejoin_path(root, joinrel, jointype, &workspace, sjinfo, outer_path, inner_path, restrict_clauses, pathkeys, required_outer, mergeclauses, outersortkeys, innersortkeys)); } else { /* Waste no memory when we reject a path here */ bms_free(required_outer); } }
static void try_nestloop_path | ( | PlannerInfo * | root, | |
RelOptInfo * | joinrel, | |||
JoinType | jointype, | |||
SpecialJoinInfo * | sjinfo, | |||
SemiAntiJoinFactors * | semifactors, | |||
Relids | param_source_rels, | |||
Path * | outer_path, | |||
Path * | inner_path, | |||
List * | restrict_clauses, | |||
List * | pathkeys | |||
) | [static] |
Definition at line 228 of file joinpath.c.
References add_path(), add_path_precheck(), bms_free(), bms_overlap(), calc_nestloop_required_outer(), create_nestloop_path(), initial_cost_nestloop(), JoinCostWorkspace::startup_cost, and JoinCostWorkspace::total_cost.
Referenced by match_unsorted_outer().
{ Relids required_outer; JoinCostWorkspace workspace; /* * Check to see if proposed path is still parameterized, and reject if the * parameterization wouldn't be sensible. */ required_outer = calc_nestloop_required_outer(outer_path, inner_path); if (required_outer && !bms_overlap(required_outer, param_source_rels)) { /* Waste no memory when we reject a path here */ bms_free(required_outer); return; } /* * Do a precheck to quickly eliminate obviously-inferior paths. We * calculate a cheap lower bound on the path's cost and then use * add_path_precheck() to see if the path is clearly going to be dominated * by some existing path for the joinrel. If not, do the full pushup with * creating a fully valid path structure and submitting it to add_path(). * The latter two steps are expensive enough to make this two-phase * methodology worthwhile. */ initial_cost_nestloop(root, &workspace, jointype, outer_path, inner_path, sjinfo, semifactors); if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, pathkeys, required_outer)) { add_path(joinrel, (Path *) create_nestloop_path(root, joinrel, jointype, &workspace, sjinfo, semifactors, outer_path, inner_path, restrict_clauses, pathkeys, required_outer)); } else { /* Waste no memory when we reject a path here */ bms_free(required_outer); } }