Header And Logo

PostgreSQL
| The world's most advanced open source database.

Defines | Functions

joinpath.c File Reference

#include "postgres.h"
#include <math.h>
#include "executor/executor.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
Include dependency graph for joinpath.c:

Go to the source code of this file.

Defines

#define PATH_PARAM_BY_REL(path, rel)   ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))

Functions

static void sort_inner_and_outer (PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype, SpecialJoinInfo *sjinfo, Relids param_source_rels)
static void match_unsorted_outer (PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels)
static void hash_inner_and_outer (PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels)
static Listselect_mergejoin_clauses (PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype, bool *mergejoin_allowed)
void add_paths_to_joinrel (PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, SpecialJoinInfo *sjinfo, List *restrictlist)
static void try_nestloop_path (PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys)
static void try_mergejoin_path (PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, SpecialJoinInfo *sjinfo, Relids param_source_rels, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys, List *mergeclauses, List *outersortkeys, List *innersortkeys)
static void try_hashjoin_path (PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels, Path *outer_path, Path *inner_path, List *restrict_clauses, List *hashclauses)
static bool clause_sides_match_join (RestrictInfo *rinfo, RelOptInfo *outerrel, RelOptInfo *innerrel)

Define Documentation

#define PATH_PARAM_BY_REL (   path,
  rel 
)    ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))

Definition at line 25 of file joinpath.c.

Referenced by hash_inner_and_outer(), match_unsorted_outer(), and sort_inner_and_outer().


Function Documentation

void add_paths_to_joinrel ( PlannerInfo root,
RelOptInfo joinrel,
RelOptInfo outerrel,
RelOptInfo innerrel,
JoinType  jointype,
SpecialJoinInfo sjinfo,
List restrictlist 
)

Definition at line 78 of file joinpath.c.

References PlannerInfo::all_baserels, bms_difference(), bms_is_member(), bms_join(), bms_overlap(), compute_semi_anti_join_factors(), enable_hashjoin, enable_mergejoin, hash_inner_and_outer(), JOIN_ANTI, JOIN_FULL, PlannerInfo::join_info_list, JOIN_SEMI, SpecialJoinInfo::jointype, PlannerInfo::lateral_info_list, LateralJoinInfo::lateral_lhs, LateralJoinInfo::lateral_rhs, lfirst, match_unsorted_outer(), SpecialJoinInfo::min_lefthand, SpecialJoinInfo::min_righthand, RelOptInfo::relids, select_mergejoin_clauses(), and sort_inner_and_outer().

Referenced by make_join_rel().

{
    List       *mergeclause_list = NIL;
    bool        mergejoin_allowed = true;
    SemiAntiJoinFactors semifactors;
    Relids      param_source_rels = NULL;
    ListCell   *lc;

    /*
     * Find potential mergejoin clauses.  We can skip this if we are not
     * interested in doing a mergejoin.  However, mergejoin may be our only
     * way of implementing a full outer join, so override enable_mergejoin if
     * it's a full join.
     */
    if (enable_mergejoin || jointype == JOIN_FULL)
        mergeclause_list = select_mergejoin_clauses(root,
                                                    joinrel,
                                                    outerrel,
                                                    innerrel,
                                                    restrictlist,
                                                    jointype,
                                                    &mergejoin_allowed);

    /*
     * If it's SEMI or ANTI join, compute correction factors for cost
     * estimation.  These will be the same for all paths.
     */
    if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
        compute_semi_anti_join_factors(root, outerrel, innerrel,
                                       jointype, sjinfo, restrictlist,
                                       &semifactors);

    /*
     * Decide whether it's sensible to generate parameterized paths for this
     * joinrel, and if so, which relations such paths should require.  There
     * is no need to create a parameterized result path unless there is a join
     * order restriction that prevents joining one of our input rels directly
     * to the parameter source rel instead of joining to the other input rel.
     * This restriction reduces the number of parameterized paths we have to
     * deal with at higher join levels, without compromising the quality of
     * the resulting plan.  We express the restriction as a Relids set that
     * must overlap the parameterization of any proposed join path.
     */
    foreach(lc, root->join_info_list)
    {
        SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);

        /*
         * SJ is relevant to this join if we have some part of its RHS
         * (possibly not all of it), and haven't yet joined to its LHS.  (This
         * test is pretty simplistic, but should be sufficient considering the
         * join has already been proven legal.)  If the SJ is relevant, it
         * presents constraints for joining to anything not in its RHS.
         */
        if (bms_overlap(joinrel->relids, sjinfo->min_righthand) &&
            !bms_overlap(joinrel->relids, sjinfo->min_lefthand))
            param_source_rels = bms_join(param_source_rels,
                                         bms_difference(root->all_baserels,
                                                     sjinfo->min_righthand));

        /* full joins constrain both sides symmetrically */
        if (sjinfo->jointype == JOIN_FULL &&
            bms_overlap(joinrel->relids, sjinfo->min_lefthand) &&
            !bms_overlap(joinrel->relids, sjinfo->min_righthand))
            param_source_rels = bms_join(param_source_rels,
                                         bms_difference(root->all_baserels,
                                                      sjinfo->min_lefthand));
    }

    /*
     * However, when a LATERAL subquery is involved, we have to be a bit
     * laxer, because there will simply not be any paths for the joinrel that
     * aren't parameterized by whatever the subquery is parameterized by,
     * unless its parameterization is resolved within the joinrel.  Hence, add
     * to param_source_rels anything that is laterally referenced in either
     * input and is not in the join already.
     */
    foreach(lc, root->lateral_info_list)
    {
        LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(lc);

        if (bms_is_member(ljinfo->lateral_rhs, joinrel->relids))
            param_source_rels = bms_join(param_source_rels,
                                         bms_difference(ljinfo->lateral_lhs,
                                                        joinrel->relids));
    }

    /*
     * 1. Consider mergejoin paths where both relations must be explicitly
     * sorted.  Skip this if we can't mergejoin.
     */
    if (mergejoin_allowed)
        sort_inner_and_outer(root, joinrel, outerrel, innerrel,
                             restrictlist, mergeclause_list, jointype,
                             sjinfo, param_source_rels);

    /*
     * 2. Consider paths where the outer relation need not be explicitly
     * sorted. This includes both nestloops and mergejoins where the outer
     * path is already ordered.  Again, skip this if we can't mergejoin.
     * (That's okay because we know that nestloop can't handle right/full
     * joins at all, so it wouldn't work in the prohibited cases either.)
     */
    if (mergejoin_allowed)
        match_unsorted_outer(root, joinrel, outerrel, innerrel,
                             restrictlist, mergeclause_list, jointype,
                             sjinfo, &semifactors, param_source_rels);

#ifdef NOT_USED

    /*
     * 3. Consider paths where the inner relation need not be explicitly
     * sorted.  This includes mergejoins only (nestloops were already built in
     * match_unsorted_outer).
     *
     * Diked out as redundant 2/13/2000 -- tgl.  There isn't any really
     * significant difference between the inner and outer side of a mergejoin,
     * so match_unsorted_inner creates no paths that aren't equivalent to
     * those made by match_unsorted_outer when add_paths_to_joinrel() is
     * invoked with the two rels given in the other order.
     */
    if (mergejoin_allowed)
        match_unsorted_inner(root, joinrel, outerrel, innerrel,
                             restrictlist, mergeclause_list, jointype,
                             sjinfo, &semifactors, param_source_rels);
#endif

    /*
     * 4. Consider paths where both outer and inner relations must be hashed
     * before being joined.  As above, disregard enable_hashjoin for full
     * joins, because there may be no other alternative.
     */
    if (enable_hashjoin || jointype == JOIN_FULL)
        hash_inner_and_outer(root, joinrel, outerrel, innerrel,
                             restrictlist, jointype,
                             sjinfo, &semifactors, param_source_rels);
}

static bool clause_sides_match_join ( RestrictInfo rinfo,
RelOptInfo outerrel,
RelOptInfo innerrel 
) [inline, static]

Definition at line 451 of file joinpath.c.

References bms_is_subset(), RestrictInfo::left_relids, RestrictInfo::outer_is_left, RelOptInfo::relids, and RestrictInfo::right_relids.

Referenced by hash_inner_and_outer(), and select_mergejoin_clauses().

{
    if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
        bms_is_subset(rinfo->right_relids, innerrel->relids))
    {
        /* lefthand side is outer */
        rinfo->outer_is_left = true;
        return true;
    }
    else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
             bms_is_subset(rinfo->right_relids, outerrel->relids))
    {
        /* righthand side is outer */
        rinfo->outer_is_left = false;
        return true;
    }
    return false;               /* no good for these input relations */
}

static void hash_inner_and_outer ( PlannerInfo root,
RelOptInfo joinrel,
RelOptInfo outerrel,
RelOptInfo innerrel,
List restrictlist,
JoinType  jointype,
SpecialJoinInfo sjinfo,
SemiAntiJoinFactors semifactors,
Relids  param_source_rels 
) [static]

Definition at line 1085 of file joinpath.c.

References Assert, RestrictInfo::can_join, RelOptInfo::cheapest_parameterized_paths, RelOptInfo::cheapest_startup_path, RelOptInfo::cheapest_total_path, clause_sides_match_join(), create_unique_path(), RestrictInfo::hashjoinoperator, InvalidOid, IS_OUTER_JOIN, RestrictInfo::is_pushed_down, JOIN_UNIQUE_INNER, JOIN_UNIQUE_OUTER, lappend(), lfirst, NULL, PATH_PARAM_BY_REL, and try_hashjoin_path().

Referenced by add_paths_to_joinrel().

{
    bool        isouterjoin = IS_OUTER_JOIN(jointype);
    List       *hashclauses;
    ListCell   *l;

    /*
     * We need to build only one hashclauses list for any given pair of outer
     * and inner relations; all of the hashable clauses will be used as keys.
     *
     * Scan the join's restrictinfo list to find hashjoinable clauses that are
     * usable with this pair of sub-relations.
     */
    hashclauses = NIL;
    foreach(l, restrictlist)
    {
        RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);

        /*
         * If processing an outer join, only use its own join clauses for
         * hashing.  For inner joins we need not be so picky.
         */
        if (isouterjoin && restrictinfo->is_pushed_down)
            continue;

        if (!restrictinfo->can_join ||
            restrictinfo->hashjoinoperator == InvalidOid)
            continue;           /* not hashjoinable */

        /*
         * Check if clause has the form "outer op inner" or "inner op outer".
         */
        if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
            continue;           /* no good for these input relations */

        hashclauses = lappend(hashclauses, restrictinfo);
    }

    /* If we found any usable hashclauses, make paths */
    if (hashclauses)
    {
        /*
         * We consider both the cheapest-total-cost and cheapest-startup-cost
         * outer paths.  There's no need to consider any but the
         * cheapest-total-cost inner path, however.
         */
        Path       *cheapest_startup_outer = outerrel->cheapest_startup_path;
        Path       *cheapest_total_outer = outerrel->cheapest_total_path;
        Path       *cheapest_total_inner = innerrel->cheapest_total_path;

        /*
         * If either cheapest-total path is parameterized by the other rel, we
         * can't use a hashjoin.  (There's no use looking for alternative
         * input paths, since these should already be the least-parameterized
         * available paths.)
         */
        if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
            PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
            return;

        /* Unique-ify if need be; we ignore parameterized possibilities */
        if (jointype == JOIN_UNIQUE_OUTER)
        {
            cheapest_total_outer = (Path *)
                create_unique_path(root, outerrel,
                                   cheapest_total_outer, sjinfo);
            Assert(cheapest_total_outer);
            jointype = JOIN_INNER;
            try_hashjoin_path(root,
                              joinrel,
                              jointype,
                              sjinfo,
                              semifactors,
                              param_source_rels,
                              cheapest_total_outer,
                              cheapest_total_inner,
                              restrictlist,
                              hashclauses);
            /* no possibility of cheap startup here */
        }
        else if (jointype == JOIN_UNIQUE_INNER)
        {
            cheapest_total_inner = (Path *)
                create_unique_path(root, innerrel,
                                   cheapest_total_inner, sjinfo);
            Assert(cheapest_total_inner);
            jointype = JOIN_INNER;
            try_hashjoin_path(root,
                              joinrel,
                              jointype,
                              sjinfo,
                              semifactors,
                              param_source_rels,
                              cheapest_total_outer,
                              cheapest_total_inner,
                              restrictlist,
                              hashclauses);
            if (cheapest_startup_outer != NULL &&
                cheapest_startup_outer != cheapest_total_outer)
                try_hashjoin_path(root,
                                  joinrel,
                                  jointype,
                                  sjinfo,
                                  semifactors,
                                  param_source_rels,
                                  cheapest_startup_outer,
                                  cheapest_total_inner,
                                  restrictlist,
                                  hashclauses);
        }
        else
        {
            /*
             * For other jointypes, we consider the cheapest startup outer
             * together with the cheapest total inner, and then consider
             * pairings of cheapest-total paths including parameterized ones.
             * There is no use in generating parameterized paths on the basis
             * of possibly cheap startup cost, so this is sufficient.
             */
            ListCell   *lc1;
            ListCell   *lc2;

            if (cheapest_startup_outer != NULL)
                try_hashjoin_path(root,
                                  joinrel,
                                  jointype,
                                  sjinfo,
                                  semifactors,
                                  param_source_rels,
                                  cheapest_startup_outer,
                                  cheapest_total_inner,
                                  restrictlist,
                                  hashclauses);

            foreach(lc1, outerrel->cheapest_parameterized_paths)
            {
                Path       *outerpath = (Path *) lfirst(lc1);

                /*
                 * We cannot use an outer path that is parameterized by the
                 * inner rel.
                 */
                if (PATH_PARAM_BY_REL(outerpath, innerrel))
                    continue;

                foreach(lc2, innerrel->cheapest_parameterized_paths)
                {
                    Path       *innerpath = (Path *) lfirst(lc2);

                    /*
                     * We cannot use an inner path that is parameterized by
                     * the outer rel, either.
                     */
                    if (PATH_PARAM_BY_REL(innerpath, outerrel))
                        continue;

                    if (outerpath == cheapest_startup_outer &&
                        innerpath == cheapest_total_inner)
                        continue;       /* already tried it */

                    try_hashjoin_path(root,
                                      joinrel,
                                      jointype,
                                      sjinfo,
                                      semifactors,
                                      param_source_rels,
                                      outerpath,
                                      innerpath,
                                      restrictlist,
                                      hashclauses);
                }
            }
        }
    }
}

static void match_unsorted_outer ( PlannerInfo root,
RelOptInfo joinrel,
RelOptInfo outerrel,
RelOptInfo innerrel,
List restrictlist,
List mergeclause_list,
JoinType  jointype,
SpecialJoinInfo sjinfo,
SemiAntiJoinFactors semifactors,
Relids  param_source_rels 
) [static]

Definition at line 673 of file joinpath.c.

References Assert, build_join_pathkeys(), RelOptInfo::cheapest_parameterized_paths, RelOptInfo::cheapest_total_path, compare_path_costs(), create_material_path(), create_unique_path(), elog, enable_material, ERROR, ExecMaterializesOutput(), find_mergeclauses_for_pathkeys(), get_cheapest_path_for_pathkeys(), JOIN_ANTI, JOIN_FULL, JOIN_INNER, JOIN_LEFT, JOIN_RIGHT, JOIN_SEMI, JOIN_UNIQUE_INNER, JOIN_UNIQUE_OUTER, lfirst, list_copy(), list_length(), list_truncate(), make_inner_pathkeys_for_merge(), NIL, NULL, PATH_PARAM_BY_REL, Path::pathkeys, pathkeys_contained_in(), RelOptInfo::pathlist, Path::pathtype, STARTUP_COST, TOTAL_COST, try_mergejoin_path(), and try_nestloop_path().

Referenced by add_paths_to_joinrel().

{
    JoinType    save_jointype = jointype;
    bool        nestjoinOK;
    bool        useallclauses;
    Path       *inner_cheapest_total = innerrel->cheapest_total_path;
    Path       *matpath = NULL;
    ListCell   *lc1;

    /*
     * Nestloop only supports inner, left, semi, and anti joins.  Also, if we
     * are doing a right or full mergejoin, we must use *all* the mergeclauses
     * as join clauses, else we will not have a valid plan.  (Although these
     * two flags are currently inverses, keep them separate for clarity and
     * possible future changes.)
     */
    switch (jointype)
    {
        case JOIN_INNER:
        case JOIN_LEFT:
        case JOIN_SEMI:
        case JOIN_ANTI:
            nestjoinOK = true;
            useallclauses = false;
            break;
        case JOIN_RIGHT:
        case JOIN_FULL:
            nestjoinOK = false;
            useallclauses = true;
            break;
        case JOIN_UNIQUE_OUTER:
        case JOIN_UNIQUE_INNER:
            jointype = JOIN_INNER;
            nestjoinOK = true;
            useallclauses = false;
            break;
        default:
            elog(ERROR, "unrecognized join type: %d",
                 (int) jointype);
            nestjoinOK = false; /* keep compiler quiet */
            useallclauses = false;
            break;
    }

    /*
     * If inner_cheapest_total is parameterized by the outer rel, ignore it;
     * we will consider it below as a member of cheapest_parameterized_paths,
     * but the other possibilities considered in this routine aren't usable.
     */
    if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
        inner_cheapest_total = NULL;

    /*
     * If we need to unique-ify the inner path, we will consider only the
     * cheapest-total inner.
     */
    if (save_jointype == JOIN_UNIQUE_INNER)
    {
        /* No way to do this with an inner path parameterized by outer rel */
        if (inner_cheapest_total == NULL)
            return;
        inner_cheapest_total = (Path *)
            create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
        Assert(inner_cheapest_total);
    }
    else if (nestjoinOK)
    {
        /*
         * Consider materializing the cheapest inner path, unless
         * enable_material is off or the path in question materializes its
         * output anyway.
         */
        if (enable_material && inner_cheapest_total != NULL &&
            !ExecMaterializesOutput(inner_cheapest_total->pathtype))
            matpath = (Path *)
                create_material_path(innerrel, inner_cheapest_total);
    }

    foreach(lc1, outerrel->pathlist)
    {
        Path       *outerpath = (Path *) lfirst(lc1);
        List       *merge_pathkeys;
        List       *mergeclauses;
        List       *innersortkeys;
        List       *trialsortkeys;
        Path       *cheapest_startup_inner;
        Path       *cheapest_total_inner;
        int         num_sortkeys;
        int         sortkeycnt;

        /*
         * We cannot use an outer path that is parameterized by the inner rel.
         */
        if (PATH_PARAM_BY_REL(outerpath, innerrel))
            continue;

        /*
         * If we need to unique-ify the outer path, it's pointless to consider
         * any but the cheapest outer.  (XXX we don't consider parameterized
         * outers, nor inners, for unique-ified cases.  Should we?)
         */
        if (save_jointype == JOIN_UNIQUE_OUTER)
        {
            if (outerpath != outerrel->cheapest_total_path)
                continue;
            outerpath = (Path *) create_unique_path(root, outerrel,
                                                    outerpath, sjinfo);
            Assert(outerpath);
        }

        /*
         * The result will have this sort order (even if it is implemented as
         * a nestloop, and even if some of the mergeclauses are implemented by
         * qpquals rather than as true mergeclauses):
         */
        merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
                                             outerpath->pathkeys);

        if (save_jointype == JOIN_UNIQUE_INNER)
        {
            /*
             * Consider nestloop join, but only with the unique-ified cheapest
             * inner path
             */
            try_nestloop_path(root,
                              joinrel,
                              jointype,
                              sjinfo,
                              semifactors,
                              param_source_rels,
                              outerpath,
                              inner_cheapest_total,
                              restrictlist,
                              merge_pathkeys);
        }
        else if (nestjoinOK)
        {
            /*
             * Consider nestloop joins using this outer path and various
             * available paths for the inner relation.  We consider the
             * cheapest-total paths for each available parameterization of the
             * inner relation, including the unparameterized case.
             */
            ListCell   *lc2;

            foreach(lc2, innerrel->cheapest_parameterized_paths)
            {
                Path       *innerpath = (Path *) lfirst(lc2);

                try_nestloop_path(root,
                                  joinrel,
                                  jointype,
                                  sjinfo,
                                  semifactors,
                                  param_source_rels,
                                  outerpath,
                                  innerpath,
                                  restrictlist,
                                  merge_pathkeys);
            }

            /* Also consider materialized form of the cheapest inner path */
            if (matpath != NULL)
                try_nestloop_path(root,
                                  joinrel,
                                  jointype,
                                  sjinfo,
                                  semifactors,
                                  param_source_rels,
                                  outerpath,
                                  matpath,
                                  restrictlist,
                                  merge_pathkeys);
        }

        /* Can't do anything else if outer path needs to be unique'd */
        if (save_jointype == JOIN_UNIQUE_OUTER)
            continue;

        /* Can't do anything else if inner rel is parameterized by outer */
        if (inner_cheapest_total == NULL)
            continue;

        /* Look for useful mergeclauses (if any) */
        mergeclauses = find_mergeclauses_for_pathkeys(root,
                                                      outerpath->pathkeys,
                                                      true,
                                                      mergeclause_list);

        /*
         * Done with this outer path if no chance for a mergejoin.
         *
         * Special corner case: for "x FULL JOIN y ON true", there will be no
         * join clauses at all.  Ordinarily we'd generate a clauseless
         * nestloop path, but since mergejoin is our only join type that
         * supports FULL JOIN without any join clauses, it's necessary to
         * generate a clauseless mergejoin path instead.
         */
        if (mergeclauses == NIL)
        {
            if (jointype == JOIN_FULL)
                 /* okay to try for mergejoin */ ;
            else
                continue;
        }
        if (useallclauses && list_length(mergeclauses) != list_length(mergeclause_list))
            continue;

        /* Compute the required ordering of the inner path */
        innersortkeys = make_inner_pathkeys_for_merge(root,
                                                      mergeclauses,
                                                      outerpath->pathkeys);

        /*
         * Generate a mergejoin on the basis of sorting the cheapest inner.
         * Since a sort will be needed, only cheapest total cost matters. (But
         * try_mergejoin_path will do the right thing if inner_cheapest_total
         * is already correctly sorted.)
         */
        try_mergejoin_path(root,
                           joinrel,
                           jointype,
                           sjinfo,
                           param_source_rels,
                           outerpath,
                           inner_cheapest_total,
                           restrictlist,
                           merge_pathkeys,
                           mergeclauses,
                           NIL,
                           innersortkeys);

        /* Can't do anything else if inner path needs to be unique'd */
        if (save_jointype == JOIN_UNIQUE_INNER)
            continue;

        /*
         * Look for presorted inner paths that satisfy the innersortkey list
         * --- or any truncation thereof, if we are allowed to build a
         * mergejoin using a subset of the merge clauses.  Here, we consider
         * both cheap startup cost and cheap total cost.
         *
         * Currently we do not consider parameterized inner paths here. This
         * interacts with decisions elsewhere that also discriminate against
         * mergejoins with parameterized inputs; see comments in
         * src/backend/optimizer/README.
         *
         * As we shorten the sortkey list, we should consider only paths that
         * are strictly cheaper than (in particular, not the same as) any path
         * found in an earlier iteration.  Otherwise we'd be intentionally
         * using fewer merge keys than a given path allows (treating the rest
         * as plain joinquals), which is unlikely to be a good idea.  Also,
         * eliminating paths here on the basis of compare_path_costs is a lot
         * cheaper than building the mergejoin path only to throw it away.
         *
         * If inner_cheapest_total is well enough sorted to have not required
         * a sort in the path made above, we shouldn't make a duplicate path
         * with it, either.  We handle that case with the same logic that
         * handles the previous consideration, by initializing the variables
         * that track cheapest-so-far properly.  Note that we do NOT reject
         * inner_cheapest_total if we find it matches some shorter set of
         * pathkeys.  That case corresponds to using fewer mergekeys to avoid
         * sorting inner_cheapest_total, whereas we did sort it above, so the
         * plans being considered are different.
         */
        if (pathkeys_contained_in(innersortkeys,
                                  inner_cheapest_total->pathkeys))
        {
            /* inner_cheapest_total didn't require a sort */
            cheapest_startup_inner = inner_cheapest_total;
            cheapest_total_inner = inner_cheapest_total;
        }
        else
        {
            /* it did require a sort, at least for the full set of keys */
            cheapest_startup_inner = NULL;
            cheapest_total_inner = NULL;
        }
        num_sortkeys = list_length(innersortkeys);
        if (num_sortkeys > 1 && !useallclauses)
            trialsortkeys = list_copy(innersortkeys);   /* need modifiable copy */
        else
            trialsortkeys = innersortkeys;      /* won't really truncate */

        for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
        {
            Path       *innerpath;
            List       *newclauses = NIL;

            /*
             * Look for an inner path ordered well enough for the first
             * 'sortkeycnt' innersortkeys.  NB: trialsortkeys list is modified
             * destructively, which is why we made a copy...
             */
            trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
            innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
                                                       trialsortkeys,
                                                       NULL,
                                                       TOTAL_COST);
            if (innerpath != NULL &&
                (cheapest_total_inner == NULL ||
                 compare_path_costs(innerpath, cheapest_total_inner,
                                    TOTAL_COST) < 0))
            {
                /* Found a cheap (or even-cheaper) sorted path */
                /* Select the right mergeclauses, if we didn't already */
                if (sortkeycnt < num_sortkeys)
                {
                    newclauses =
                        find_mergeclauses_for_pathkeys(root,
                                                       trialsortkeys,
                                                       false,
                                                       mergeclauses);
                    Assert(newclauses != NIL);
                }
                else
                    newclauses = mergeclauses;
                try_mergejoin_path(root,
                                   joinrel,
                                   jointype,
                                   sjinfo,
                                   param_source_rels,
                                   outerpath,
                                   innerpath,
                                   restrictlist,
                                   merge_pathkeys,
                                   newclauses,
                                   NIL,
                                   NIL);
                cheapest_total_inner = innerpath;
            }
            /* Same on the basis of cheapest startup cost ... */
            innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
                                                       trialsortkeys,
                                                       NULL,
                                                       STARTUP_COST);
            if (innerpath != NULL &&
                (cheapest_startup_inner == NULL ||
                 compare_path_costs(innerpath, cheapest_startup_inner,
                                    STARTUP_COST) < 0))
            {
                /* Found a cheap (or even-cheaper) sorted path */
                if (innerpath != cheapest_total_inner)
                {
                    /*
                     * Avoid rebuilding clause list if we already made one;
                     * saves memory in big join trees...
                     */
                    if (newclauses == NIL)
                    {
                        if (sortkeycnt < num_sortkeys)
                        {
                            newclauses =
                                find_mergeclauses_for_pathkeys(root,
                                                               trialsortkeys,
                                                               false,
                                                               mergeclauses);
                            Assert(newclauses != NIL);
                        }
                        else
                            newclauses = mergeclauses;
                    }
                    try_mergejoin_path(root,
                                       joinrel,
                                       jointype,
                                       sjinfo,
                                       param_source_rels,
                                       outerpath,
                                       innerpath,
                                       restrictlist,
                                       merge_pathkeys,
                                       newclauses,
                                       NIL,
                                       NIL);
                }
                cheapest_startup_inner = innerpath;
            }

            /*
             * Don't consider truncated sortkeys if we need all clauses.
             */
            if (useallclauses)
                break;
        }
    }
}

static List * select_mergejoin_clauses ( PlannerInfo root,
RelOptInfo joinrel,
RelOptInfo outerrel,
RelOptInfo innerrel,
List restrictlist,
JoinType  jointype,
bool mergejoin_allowed 
) [static]

Definition at line 1292 of file joinpath.c.

References RestrictInfo::can_join, RestrictInfo::clause, clause_sides_match_join(), EC_MUST_BE_REDUNDANT, IS_OUTER_JOIN, RestrictInfo::is_pushed_down, IsA, JOIN_FULL, JOIN_RIGHT, lappend(), RestrictInfo::left_ec, lfirst, RestrictInfo::mergeopfamilies, NIL, RestrictInfo::right_ec, and update_mergeclause_eclasses().

Referenced by add_paths_to_joinrel().

{
    List       *result_list = NIL;
    bool        isouterjoin = IS_OUTER_JOIN(jointype);
    bool        have_nonmergeable_joinclause = false;
    ListCell   *l;

    foreach(l, restrictlist)
    {
        RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);

        /*
         * If processing an outer join, only use its own join clauses in the
         * merge.  For inner joins we can use pushed-down clauses too. (Note:
         * we don't set have_nonmergeable_joinclause here because pushed-down
         * clauses will become otherquals not joinquals.)
         */
        if (isouterjoin && restrictinfo->is_pushed_down)
            continue;

        /* Check that clause is a mergeable operator clause */
        if (!restrictinfo->can_join ||
            restrictinfo->mergeopfamilies == NIL)
        {
            /*
             * The executor can handle extra joinquals that are constants, but
             * not anything else, when doing right/full merge join.  (The
             * reason to support constants is so we can do FULL JOIN ON
             * FALSE.)
             */
            if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
                have_nonmergeable_joinclause = true;
            continue;           /* not mergejoinable */
        }

        /*
         * Check if clause has the form "outer op inner" or "inner op outer".
         */
        if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
        {
            have_nonmergeable_joinclause = true;
            continue;           /* no good for these input relations */
        }

        /*
         * Insist that each side have a non-redundant eclass.  This
         * restriction is needed because various bits of the planner expect
         * that each clause in a merge be associatable with some pathkey in a
         * canonical pathkey list, but redundant eclasses can't appear in
         * canonical sort orderings.  (XXX it might be worth relaxing this,
         * but not enough time to address it for 8.3.)
         *
         * Note: it would be bad if this condition failed for an otherwise
         * mergejoinable FULL JOIN clause, since that would result in
         * undesirable planner failure.  I believe that is not possible
         * however; a variable involved in a full join could only appear in
         * below_outer_join eclasses, which aren't considered redundant.
         *
         * This case *can* happen for left/right join clauses: the outer-side
         * variable could be equated to a constant.  Because we will propagate
         * that constant across the join clause, the loss of ability to do a
         * mergejoin is not really all that big a deal, and so it's not clear
         * that improving this is important.
         */
        update_mergeclause_eclasses(root, restrictinfo);

        if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
            EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
        {
            have_nonmergeable_joinclause = true;
            continue;           /* can't handle redundant eclasses */
        }

        result_list = lappend(result_list, restrictinfo);
    }

    /*
     * Report whether mergejoin is allowed (see comment at top of function).
     */
    switch (jointype)
    {
        case JOIN_RIGHT:
        case JOIN_FULL:
            *mergejoin_allowed = !have_nonmergeable_joinclause;
            break;
        default:
            *mergejoin_allowed = true;
            break;
    }

    return result_list;
}

static void sort_inner_and_outer ( PlannerInfo root,
RelOptInfo joinrel,
RelOptInfo outerrel,
RelOptInfo innerrel,
List restrictlist,
List mergeclause_list,
JoinType  jointype,
SpecialJoinInfo sjinfo,
Relids  param_source_rels 
) [static]

Definition at line 488 of file joinpath.c.

References Assert, build_join_pathkeys(), RelOptInfo::cheapest_total_path, create_unique_path(), find_mergeclauses_for_pathkeys(), JOIN_UNIQUE_INNER, JOIN_UNIQUE_OUTER, lcons(), lfirst, list_copy(), list_delete_ptr(), list_head(), list_length(), make_inner_pathkeys_for_merge(), PATH_PARAM_BY_REL, select_outer_pathkeys_for_merge(), and try_mergejoin_path().

Referenced by add_paths_to_joinrel().

{
    Path       *outer_path;
    Path       *inner_path;
    List       *all_pathkeys;
    ListCell   *l;

    /*
     * We only consider the cheapest-total-cost input paths, since we are
     * assuming here that a sort is required.  We will consider
     * cheapest-startup-cost input paths later, and only if they don't need a
     * sort.
     *
     * This function intentionally does not consider parameterized input
     * paths, except when the cheapest-total is parameterized.  If we did so,
     * we'd have a combinatorial explosion of mergejoin paths of dubious
     * value.  This interacts with decisions elsewhere that also discriminate
     * against mergejoins with parameterized inputs; see comments in
     * src/backend/optimizer/README.
     */
    outer_path = outerrel->cheapest_total_path;
    inner_path = innerrel->cheapest_total_path;

    /*
     * If either cheapest-total path is parameterized by the other rel, we
     * can't use a mergejoin.  (There's no use looking for alternative input
     * paths, since these should already be the least-parameterized available
     * paths.)
     */
    if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
        PATH_PARAM_BY_REL(inner_path, outerrel))
        return;

    /*
     * If unique-ification is requested, do it and then handle as a plain
     * inner join.
     */
    if (jointype == JOIN_UNIQUE_OUTER)
    {
        outer_path = (Path *) create_unique_path(root, outerrel,
                                                 outer_path, sjinfo);
        Assert(outer_path);
        jointype = JOIN_INNER;
    }
    else if (jointype == JOIN_UNIQUE_INNER)
    {
        inner_path = (Path *) create_unique_path(root, innerrel,
                                                 inner_path, sjinfo);
        Assert(inner_path);
        jointype = JOIN_INNER;
    }

    /*
     * Each possible ordering of the available mergejoin clauses will generate
     * a differently-sorted result path at essentially the same cost.  We have
     * no basis for choosing one over another at this level of joining, but
     * some sort orders may be more useful than others for higher-level
     * mergejoins, so it's worth considering multiple orderings.
     *
     * Actually, it's not quite true that every mergeclause ordering will
     * generate a different path order, because some of the clauses may be
     * partially redundant (refer to the same EquivalenceClasses).  Therefore,
     * what we do is convert the mergeclause list to a list of canonical
     * pathkeys, and then consider different orderings of the pathkeys.
     *
     * Generating a path for *every* permutation of the pathkeys doesn't seem
     * like a winning strategy; the cost in planning time is too high. For
     * now, we generate one path for each pathkey, listing that pathkey first
     * and the rest in random order.  This should allow at least a one-clause
     * mergejoin without re-sorting against any other possible mergejoin
     * partner path.  But if we've not guessed the right ordering of secondary
     * keys, we may end up evaluating clauses as qpquals when they could have
     * been done as mergeclauses.  (In practice, it's rare that there's more
     * than two or three mergeclauses, so expending a huge amount of thought
     * on that is probably not worth it.)
     *
     * The pathkey order returned by select_outer_pathkeys_for_merge() has
     * some heuristics behind it (see that function), so be sure to try it
     * exactly as-is as well as making variants.
     */
    all_pathkeys = select_outer_pathkeys_for_merge(root,
                                                   mergeclause_list,
                                                   joinrel);

    foreach(l, all_pathkeys)
    {
        List       *front_pathkey = (List *) lfirst(l);
        List       *cur_mergeclauses;
        List       *outerkeys;
        List       *innerkeys;
        List       *merge_pathkeys;

        /* Make a pathkey list with this guy first */
        if (l != list_head(all_pathkeys))
            outerkeys = lcons(front_pathkey,
                              list_delete_ptr(list_copy(all_pathkeys),
                                              front_pathkey));
        else
            outerkeys = all_pathkeys;   /* no work at first one... */

        /* Sort the mergeclauses into the corresponding ordering */
        cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
                                                          outerkeys,
                                                          true,
                                                          mergeclause_list);

        /* Should have used them all... */
        Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list));

        /* Build sort pathkeys for the inner side */
        innerkeys = make_inner_pathkeys_for_merge(root,
                                                  cur_mergeclauses,
                                                  outerkeys);

        /* Build pathkeys representing output sort order */
        merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
                                             outerkeys);

        /*
         * And now we can make the path.
         *
         * Note: it's possible that the cheapest paths will already be sorted
         * properly.  try_mergejoin_path will detect that case and suppress an
         * explicit sort step, so we needn't do so here.
         */
        try_mergejoin_path(root,
                           joinrel,
                           jointype,
                           sjinfo,
                           param_source_rels,
                           outer_path,
                           inner_path,
                           restrictlist,
                           merge_pathkeys,
                           cur_mergeclauses,
                           outerkeys,
                           innerkeys);
    }
}

static void try_hashjoin_path ( PlannerInfo root,
RelOptInfo joinrel,
JoinType  jointype,
SpecialJoinInfo sjinfo,
SemiAntiJoinFactors semifactors,
Relids  param_source_rels,
Path outer_path,
Path inner_path,
List restrict_clauses,
List hashclauses 
) [static]

Definition at line 380 of file joinpath.c.

References add_path(), add_path_precheck(), bms_free(), bms_overlap(), calc_non_nestloop_required_outer(), create_hashjoin_path(), initial_cost_hashjoin(), NIL, JoinCostWorkspace::startup_cost, and JoinCostWorkspace::total_cost.

Referenced by hash_inner_and_outer().

{
    Relids      required_outer;
    JoinCostWorkspace workspace;

    /*
     * Check to see if proposed path is still parameterized, and reject if the
     * parameterization wouldn't be sensible.
     */
    required_outer = calc_non_nestloop_required_outer(outer_path,
                                                      inner_path);
    if (required_outer &&
        !bms_overlap(required_outer, param_source_rels))
    {
        /* Waste no memory when we reject a path here */
        bms_free(required_outer);
        return;
    }

    /*
     * See comments in try_nestloop_path().  Also note that hashjoin paths
     * never have any output pathkeys, per comments in create_hashjoin_path.
     */
    initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
                          outer_path, inner_path,
                          sjinfo, semifactors);

    if (add_path_precheck(joinrel,
                          workspace.startup_cost, workspace.total_cost,
                          NIL, required_outer))
    {
        add_path(joinrel, (Path *)
                 create_hashjoin_path(root,
                                      joinrel,
                                      jointype,
                                      &workspace,
                                      sjinfo,
                                      semifactors,
                                      outer_path,
                                      inner_path,
                                      restrict_clauses,
                                      required_outer,
                                      hashclauses));
    }
    else
    {
        /* Waste no memory when we reject a path here */
        bms_free(required_outer);
    }
}

static void try_mergejoin_path ( PlannerInfo root,
RelOptInfo joinrel,
JoinType  jointype,
SpecialJoinInfo sjinfo,
Relids  param_source_rels,
Path outer_path,
Path inner_path,
List restrict_clauses,
List pathkeys,
List mergeclauses,
List outersortkeys,
List innersortkeys 
) [static]

Definition at line 299 of file joinpath.c.

References add_path(), add_path_precheck(), bms_free(), bms_overlap(), calc_non_nestloop_required_outer(), create_mergejoin_path(), initial_cost_mergejoin(), Path::pathkeys, pathkeys_contained_in(), JoinCostWorkspace::startup_cost, and JoinCostWorkspace::total_cost.

Referenced by match_unsorted_outer(), and sort_inner_and_outer().

{
    Relids      required_outer;
    JoinCostWorkspace workspace;

    /*
     * Check to see if proposed path is still parameterized, and reject if the
     * parameterization wouldn't be sensible.
     */
    required_outer = calc_non_nestloop_required_outer(outer_path,
                                                      inner_path);
    if (required_outer &&
        !bms_overlap(required_outer, param_source_rels))
    {
        /* Waste no memory when we reject a path here */
        bms_free(required_outer);
        return;
    }

    /*
     * If the given paths are already well enough ordered, we can skip doing
     * an explicit sort.
     */
    if (outersortkeys &&
        pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
        outersortkeys = NIL;
    if (innersortkeys &&
        pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
        innersortkeys = NIL;

    /*
     * See comments in try_nestloop_path().
     */
    initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
                           outer_path, inner_path,
                           outersortkeys, innersortkeys,
                           sjinfo);

    if (add_path_precheck(joinrel,
                          workspace.startup_cost, workspace.total_cost,
                          pathkeys, required_outer))
    {
        add_path(joinrel, (Path *)
                 create_mergejoin_path(root,
                                       joinrel,
                                       jointype,
                                       &workspace,
                                       sjinfo,
                                       outer_path,
                                       inner_path,
                                       restrict_clauses,
                                       pathkeys,
                                       required_outer,
                                       mergeclauses,
                                       outersortkeys,
                                       innersortkeys));
    }
    else
    {
        /* Waste no memory when we reject a path here */
        bms_free(required_outer);
    }
}

static void try_nestloop_path ( PlannerInfo root,
RelOptInfo joinrel,
JoinType  jointype,
SpecialJoinInfo sjinfo,
SemiAntiJoinFactors semifactors,
Relids  param_source_rels,
Path outer_path,
Path inner_path,
List restrict_clauses,
List pathkeys 
) [static]

Definition at line 228 of file joinpath.c.

References add_path(), add_path_precheck(), bms_free(), bms_overlap(), calc_nestloop_required_outer(), create_nestloop_path(), initial_cost_nestloop(), JoinCostWorkspace::startup_cost, and JoinCostWorkspace::total_cost.

Referenced by match_unsorted_outer().

{
    Relids      required_outer;
    JoinCostWorkspace workspace;

    /*
     * Check to see if proposed path is still parameterized, and reject if the
     * parameterization wouldn't be sensible.
     */
    required_outer = calc_nestloop_required_outer(outer_path,
                                                  inner_path);
    if (required_outer &&
        !bms_overlap(required_outer, param_source_rels))
    {
        /* Waste no memory when we reject a path here */
        bms_free(required_outer);
        return;
    }

    /*
     * Do a precheck to quickly eliminate obviously-inferior paths.  We
     * calculate a cheap lower bound on the path's cost and then use
     * add_path_precheck() to see if the path is clearly going to be dominated
     * by some existing path for the joinrel.  If not, do the full pushup with
     * creating a fully valid path structure and submitting it to add_path().
     * The latter two steps are expensive enough to make this two-phase
     * methodology worthwhile.
     */
    initial_cost_nestloop(root, &workspace, jointype,
                          outer_path, inner_path,
                          sjinfo, semifactors);

    if (add_path_precheck(joinrel,
                          workspace.startup_cost, workspace.total_cost,
                          pathkeys, required_outer))
    {
        add_path(joinrel, (Path *)
                 create_nestloop_path(root,
                                      joinrel,
                                      jointype,
                                      &workspace,
                                      sjinfo,
                                      semifactors,
                                      outer_path,
                                      inner_path,
                                      restrict_clauses,
                                      pathkeys,
                                      required_outer));
    }
    else
    {
        /* Waste no memory when we reject a path here */
        bms_free(required_outer);
    }
}