Header And Logo

PostgreSQL
| The world's most advanced open source database.

execGrouping.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * execGrouping.c
00004  *    executor utility routines for grouping, hashing, and aggregation
00005  *
00006  * Note: we currently assume that equality and hashing functions are not
00007  * collation-sensitive, so the code in this file has no support for passing
00008  * collation settings through from callers.  That may have to change someday.
00009  *
00010  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00011  * Portions Copyright (c) 1994, Regents of the University of California
00012  *
00013  *
00014  * IDENTIFICATION
00015  *    src/backend/executor/execGrouping.c
00016  *
00017  *-------------------------------------------------------------------------
00018  */
00019 #include "postgres.h"
00020 
00021 #include "executor/executor.h"
00022 #include "miscadmin.h"
00023 #include "utils/lsyscache.h"
00024 #include "utils/memutils.h"
00025 
00026 
00027 static TupleHashTable CurTupleHashTable = NULL;
00028 
00029 static uint32 TupleHashTableHash(const void *key, Size keysize);
00030 static int TupleHashTableMatch(const void *key1, const void *key2,
00031                     Size keysize);
00032 
00033 
00034 /*****************************************************************************
00035  *      Utility routines for grouping tuples together
00036  *****************************************************************************/
00037 
00038 /*
00039  * execTuplesMatch
00040  *      Return true if two tuples match in all the indicated fields.
00041  *
00042  * This actually implements SQL's notion of "not distinct".  Two nulls
00043  * match, a null and a not-null don't match.
00044  *
00045  * slot1, slot2: the tuples to compare (must have same columns!)
00046  * numCols: the number of attributes to be examined
00047  * matchColIdx: array of attribute column numbers
00048  * eqFunctions: array of fmgr lookup info for the equality functions to use
00049  * evalContext: short-term memory context for executing the functions
00050  *
00051  * NB: evalContext is reset each time!
00052  */
00053 bool
00054 execTuplesMatch(TupleTableSlot *slot1,
00055                 TupleTableSlot *slot2,
00056                 int numCols,
00057                 AttrNumber *matchColIdx,
00058                 FmgrInfo *eqfunctions,
00059                 MemoryContext evalContext)
00060 {
00061     MemoryContext oldContext;
00062     bool        result;
00063     int         i;
00064 
00065     /* Reset and switch into the temp context. */
00066     MemoryContextReset(evalContext);
00067     oldContext = MemoryContextSwitchTo(evalContext);
00068 
00069     /*
00070      * We cannot report a match without checking all the fields, but we can
00071      * report a non-match as soon as we find unequal fields.  So, start
00072      * comparing at the last field (least significant sort key). That's the
00073      * most likely to be different if we are dealing with sorted input.
00074      */
00075     result = true;
00076 
00077     for (i = numCols; --i >= 0;)
00078     {
00079         AttrNumber  att = matchColIdx[i];
00080         Datum       attr1,
00081                     attr2;
00082         bool        isNull1,
00083                     isNull2;
00084 
00085         attr1 = slot_getattr(slot1, att, &isNull1);
00086 
00087         attr2 = slot_getattr(slot2, att, &isNull2);
00088 
00089         if (isNull1 != isNull2)
00090         {
00091             result = false;     /* one null and one not; they aren't equal */
00092             break;
00093         }
00094 
00095         if (isNull1)
00096             continue;           /* both are null, treat as equal */
00097 
00098         /* Apply the type-specific equality function */
00099 
00100         if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
00101                                         attr1, attr2)))
00102         {
00103             result = false;     /* they aren't equal */
00104             break;
00105         }
00106     }
00107 
00108     MemoryContextSwitchTo(oldContext);
00109 
00110     return result;
00111 }
00112 
00113 /*
00114  * execTuplesUnequal
00115  *      Return true if two tuples are definitely unequal in the indicated
00116  *      fields.
00117  *
00118  * Nulls are neither equal nor unequal to anything else.  A true result
00119  * is obtained only if there are non-null fields that compare not-equal.
00120  *
00121  * Parameters are identical to execTuplesMatch.
00122  */
00123 bool
00124 execTuplesUnequal(TupleTableSlot *slot1,
00125                   TupleTableSlot *slot2,
00126                   int numCols,
00127                   AttrNumber *matchColIdx,
00128                   FmgrInfo *eqfunctions,
00129                   MemoryContext evalContext)
00130 {
00131     MemoryContext oldContext;
00132     bool        result;
00133     int         i;
00134 
00135     /* Reset and switch into the temp context. */
00136     MemoryContextReset(evalContext);
00137     oldContext = MemoryContextSwitchTo(evalContext);
00138 
00139     /*
00140      * We cannot report a match without checking all the fields, but we can
00141      * report a non-match as soon as we find unequal fields.  So, start
00142      * comparing at the last field (least significant sort key). That's the
00143      * most likely to be different if we are dealing with sorted input.
00144      */
00145     result = false;
00146 
00147     for (i = numCols; --i >= 0;)
00148     {
00149         AttrNumber  att = matchColIdx[i];
00150         Datum       attr1,
00151                     attr2;
00152         bool        isNull1,
00153                     isNull2;
00154 
00155         attr1 = slot_getattr(slot1, att, &isNull1);
00156 
00157         if (isNull1)
00158             continue;           /* can't prove anything here */
00159 
00160         attr2 = slot_getattr(slot2, att, &isNull2);
00161 
00162         if (isNull2)
00163             continue;           /* can't prove anything here */
00164 
00165         /* Apply the type-specific equality function */
00166 
00167         if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
00168                                         attr1, attr2)))
00169         {
00170             result = true;      /* they are unequal */
00171             break;
00172         }
00173     }
00174 
00175     MemoryContextSwitchTo(oldContext);
00176 
00177     return result;
00178 }
00179 
00180 
00181 /*
00182  * execTuplesMatchPrepare
00183  *      Look up the equality functions needed for execTuplesMatch or
00184  *      execTuplesUnequal, given an array of equality operator OIDs.
00185  *
00186  * The result is a palloc'd array.
00187  */
00188 FmgrInfo *
00189 execTuplesMatchPrepare(int numCols,
00190                        Oid *eqOperators)
00191 {
00192     FmgrInfo   *eqFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
00193     int         i;
00194 
00195     for (i = 0; i < numCols; i++)
00196     {
00197         Oid         eq_opr = eqOperators[i];
00198         Oid         eq_function;
00199 
00200         eq_function = get_opcode(eq_opr);
00201         fmgr_info(eq_function, &eqFunctions[i]);
00202     }
00203 
00204     return eqFunctions;
00205 }
00206 
00207 /*
00208  * execTuplesHashPrepare
00209  *      Look up the equality and hashing functions needed for a TupleHashTable.
00210  *
00211  * This is similar to execTuplesMatchPrepare, but we also need to find the
00212  * hash functions associated with the equality operators.  *eqFunctions and
00213  * *hashFunctions receive the palloc'd result arrays.
00214  *
00215  * Note: we expect that the given operators are not cross-type comparisons.
00216  */
00217 void
00218 execTuplesHashPrepare(int numCols,
00219                       Oid *eqOperators,
00220                       FmgrInfo **eqFunctions,
00221                       FmgrInfo **hashFunctions)
00222 {
00223     int         i;
00224 
00225     *eqFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
00226     *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
00227 
00228     for (i = 0; i < numCols; i++)
00229     {
00230         Oid         eq_opr = eqOperators[i];
00231         Oid         eq_function;
00232         Oid         left_hash_function;
00233         Oid         right_hash_function;
00234 
00235         eq_function = get_opcode(eq_opr);
00236         if (!get_op_hash_functions(eq_opr,
00237                                    &left_hash_function, &right_hash_function))
00238             elog(ERROR, "could not find hash function for hash operator %u",
00239                  eq_opr);
00240         /* We're not supporting cross-type cases here */
00241         Assert(left_hash_function == right_hash_function);
00242         fmgr_info(eq_function, &(*eqFunctions)[i]);
00243         fmgr_info(right_hash_function, &(*hashFunctions)[i]);
00244     }
00245 }
00246 
00247 
00248 /*****************************************************************************
00249  *      Utility routines for all-in-memory hash tables
00250  *
00251  * These routines build hash tables for grouping tuples together (eg, for
00252  * hash aggregation).  There is one entry for each not-distinct set of tuples
00253  * presented.
00254  *****************************************************************************/
00255 
00256 /*
00257  * Construct an empty TupleHashTable
00258  *
00259  *  numCols, keyColIdx: identify the tuple fields to use as lookup key
00260  *  eqfunctions: equality comparison functions to use
00261  *  hashfunctions: datatype-specific hashing functions to use
00262  *  nbuckets: initial estimate of hashtable size
00263  *  entrysize: size of each entry (at least sizeof(TupleHashEntryData))
00264  *  tablecxt: memory context in which to store table and table entries
00265  *  tempcxt: short-lived context for evaluation hash and comparison functions
00266  *
00267  * The function arrays may be made with execTuplesHashPrepare().  Note they
00268  * are not cross-type functions, but expect to see the table datatype(s)
00269  * on both sides.
00270  *
00271  * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
00272  * storage that will live as long as the hashtable does.
00273  */
00274 TupleHashTable
00275 BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
00276                     FmgrInfo *eqfunctions,
00277                     FmgrInfo *hashfunctions,
00278                     long nbuckets, Size entrysize,
00279                     MemoryContext tablecxt, MemoryContext tempcxt)
00280 {
00281     TupleHashTable hashtable;
00282     HASHCTL     hash_ctl;
00283 
00284     Assert(nbuckets > 0);
00285     Assert(entrysize >= sizeof(TupleHashEntryData));
00286 
00287     /* Limit initial table size request to not more than work_mem */
00288     nbuckets = Min(nbuckets, (long) ((work_mem * 1024L) / entrysize));
00289 
00290     hashtable = (TupleHashTable) MemoryContextAlloc(tablecxt,
00291                                                  sizeof(TupleHashTableData));
00292 
00293     hashtable->numCols = numCols;
00294     hashtable->keyColIdx = keyColIdx;
00295     hashtable->tab_hash_funcs = hashfunctions;
00296     hashtable->tab_eq_funcs = eqfunctions;
00297     hashtable->tablecxt = tablecxt;
00298     hashtable->tempcxt = tempcxt;
00299     hashtable->entrysize = entrysize;
00300     hashtable->tableslot = NULL;    /* will be made on first lookup */
00301     hashtable->inputslot = NULL;
00302     hashtable->in_hash_funcs = NULL;
00303     hashtable->cur_eq_funcs = NULL;
00304 
00305     MemSet(&hash_ctl, 0, sizeof(hash_ctl));
00306     hash_ctl.keysize = sizeof(TupleHashEntryData);
00307     hash_ctl.entrysize = entrysize;
00308     hash_ctl.hash = TupleHashTableHash;
00309     hash_ctl.match = TupleHashTableMatch;
00310     hash_ctl.hcxt = tablecxt;
00311     hashtable->hashtab = hash_create("TupleHashTable", nbuckets,
00312                                      &hash_ctl,
00313                     HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
00314 
00315     return hashtable;
00316 }
00317 
00318 /*
00319  * Find or create a hashtable entry for the tuple group containing the
00320  * given tuple.  The tuple must be the same type as the hashtable entries.
00321  *
00322  * If isnew is NULL, we do not create new entries; we return NULL if no
00323  * match is found.
00324  *
00325  * If isnew isn't NULL, then a new entry is created if no existing entry
00326  * matches.  On return, *isnew is true if the entry is newly created,
00327  * false if it existed already.  Any extra space in a new entry has been
00328  * zeroed.
00329  */
00330 TupleHashEntry
00331 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
00332                      bool *isnew)
00333 {
00334     TupleHashEntry entry;
00335     MemoryContext oldContext;
00336     TupleHashTable saveCurHT;
00337     TupleHashEntryData dummy;
00338     bool        found;
00339 
00340     /* If first time through, clone the input slot to make table slot */
00341     if (hashtable->tableslot == NULL)
00342     {
00343         TupleDesc   tupdesc;
00344 
00345         oldContext = MemoryContextSwitchTo(hashtable->tablecxt);
00346 
00347         /*
00348          * We copy the input tuple descriptor just for safety --- we assume
00349          * all input tuples will have equivalent descriptors.
00350          */
00351         tupdesc = CreateTupleDescCopy(slot->tts_tupleDescriptor);
00352         hashtable->tableslot = MakeSingleTupleTableSlot(tupdesc);
00353         MemoryContextSwitchTo(oldContext);
00354     }
00355 
00356     /* Need to run the hash functions in short-lived context */
00357     oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
00358 
00359     /*
00360      * Set up data needed by hash and match functions
00361      *
00362      * We save and restore CurTupleHashTable just in case someone manages to
00363      * invoke this code re-entrantly.
00364      */
00365     hashtable->inputslot = slot;
00366     hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
00367     hashtable->cur_eq_funcs = hashtable->tab_eq_funcs;
00368 
00369     saveCurHT = CurTupleHashTable;
00370     CurTupleHashTable = hashtable;
00371 
00372     /* Search the hash table */
00373     dummy.firstTuple = NULL;    /* flag to reference inputslot */
00374     entry = (TupleHashEntry) hash_search(hashtable->hashtab,
00375                                          &dummy,
00376                                          isnew ? HASH_ENTER : HASH_FIND,
00377                                          &found);
00378 
00379     if (isnew)
00380     {
00381         if (found)
00382         {
00383             /* found pre-existing entry */
00384             *isnew = false;
00385         }
00386         else
00387         {
00388             /*
00389              * created new entry
00390              *
00391              * Zero any caller-requested space in the entry.  (This zaps the
00392              * "key data" dynahash.c copied into the new entry, but we don't
00393              * care since we're about to overwrite it anyway.)
00394              */
00395             MemSet(entry, 0, hashtable->entrysize);
00396 
00397             /* Copy the first tuple into the table context */
00398             MemoryContextSwitchTo(hashtable->tablecxt);
00399             entry->firstTuple = ExecCopySlotMinimalTuple(slot);
00400 
00401             *isnew = true;
00402         }
00403     }
00404 
00405     CurTupleHashTable = saveCurHT;
00406 
00407     MemoryContextSwitchTo(oldContext);
00408 
00409     return entry;
00410 }
00411 
00412 /*
00413  * Search for a hashtable entry matching the given tuple.  No entry is
00414  * created if there's not a match.  This is similar to the non-creating
00415  * case of LookupTupleHashEntry, except that it supports cross-type
00416  * comparisons, in which the given tuple is not of the same type as the
00417  * table entries.  The caller must provide the hash functions to use for
00418  * the input tuple, as well as the equality functions, since these may be
00419  * different from the table's internal functions.
00420  */
00421 TupleHashEntry
00422 FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
00423                    FmgrInfo *eqfunctions,
00424                    FmgrInfo *hashfunctions)
00425 {
00426     TupleHashEntry entry;
00427     MemoryContext oldContext;
00428     TupleHashTable saveCurHT;
00429     TupleHashEntryData dummy;
00430 
00431     /* Need to run the hash functions in short-lived context */
00432     oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
00433 
00434     /*
00435      * Set up data needed by hash and match functions
00436      *
00437      * We save and restore CurTupleHashTable just in case someone manages to
00438      * invoke this code re-entrantly.
00439      */
00440     hashtable->inputslot = slot;
00441     hashtable->in_hash_funcs = hashfunctions;
00442     hashtable->cur_eq_funcs = eqfunctions;
00443 
00444     saveCurHT = CurTupleHashTable;
00445     CurTupleHashTable = hashtable;
00446 
00447     /* Search the hash table */
00448     dummy.firstTuple = NULL;    /* flag to reference inputslot */
00449     entry = (TupleHashEntry) hash_search(hashtable->hashtab,
00450                                          &dummy,
00451                                          HASH_FIND,
00452                                          NULL);
00453 
00454     CurTupleHashTable = saveCurHT;
00455 
00456     MemoryContextSwitchTo(oldContext);
00457 
00458     return entry;
00459 }
00460 
00461 /*
00462  * Compute the hash value for a tuple
00463  *
00464  * The passed-in key is a pointer to TupleHashEntryData.  In an actual hash
00465  * table entry, the firstTuple field points to a tuple (in MinimalTuple
00466  * format).  LookupTupleHashEntry sets up a dummy TupleHashEntryData with a
00467  * NULL firstTuple field --- that cues us to look at the inputslot instead.
00468  * This convention avoids the need to materialize virtual input tuples unless
00469  * they actually need to get copied into the table.
00470  *
00471  * CurTupleHashTable must be set before calling this, since dynahash.c
00472  * doesn't provide any API that would let us get at the hashtable otherwise.
00473  *
00474  * Also, the caller must select an appropriate memory context for running
00475  * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
00476  */
00477 static uint32
00478 TupleHashTableHash(const void *key, Size keysize)
00479 {
00480     MinimalTuple tuple = ((const TupleHashEntryData *) key)->firstTuple;
00481     TupleTableSlot *slot;
00482     TupleHashTable hashtable = CurTupleHashTable;
00483     int         numCols = hashtable->numCols;
00484     AttrNumber *keyColIdx = hashtable->keyColIdx;
00485     FmgrInfo   *hashfunctions;
00486     uint32      hashkey = 0;
00487     int         i;
00488 
00489     if (tuple == NULL)
00490     {
00491         /* Process the current input tuple for the table */
00492         slot = hashtable->inputslot;
00493         hashfunctions = hashtable->in_hash_funcs;
00494     }
00495     else
00496     {
00497         /* Process a tuple already stored in the table */
00498         /* (this case never actually occurs in current dynahash.c code) */
00499         slot = hashtable->tableslot;
00500         ExecStoreMinimalTuple(tuple, slot, false);
00501         hashfunctions = hashtable->tab_hash_funcs;
00502     }
00503 
00504     for (i = 0; i < numCols; i++)
00505     {
00506         AttrNumber  att = keyColIdx[i];
00507         Datum       attr;
00508         bool        isNull;
00509 
00510         /* rotate hashkey left 1 bit at each step */
00511         hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
00512 
00513         attr = slot_getattr(slot, att, &isNull);
00514 
00515         if (!isNull)            /* treat nulls as having hash key 0 */
00516         {
00517             uint32      hkey;
00518 
00519             hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],
00520                                                 attr));
00521             hashkey ^= hkey;
00522         }
00523     }
00524 
00525     return hashkey;
00526 }
00527 
00528 /*
00529  * See whether two tuples (presumably of the same hash value) match
00530  *
00531  * As above, the passed pointers are pointers to TupleHashEntryData.
00532  *
00533  * CurTupleHashTable must be set before calling this, since dynahash.c
00534  * doesn't provide any API that would let us get at the hashtable otherwise.
00535  *
00536  * Also, the caller must select an appropriate memory context for running
00537  * the compare functions.  (dynahash.c doesn't change CurrentMemoryContext.)
00538  */
00539 static int
00540 TupleHashTableMatch(const void *key1, const void *key2, Size keysize)
00541 {
00542     MinimalTuple tuple1 = ((const TupleHashEntryData *) key1)->firstTuple;
00543 
00544 #ifdef USE_ASSERT_CHECKING
00545     MinimalTuple tuple2 = ((const TupleHashEntryData *) key2)->firstTuple;
00546 #endif
00547     TupleTableSlot *slot1;
00548     TupleTableSlot *slot2;
00549     TupleHashTable hashtable = CurTupleHashTable;
00550 
00551     /*
00552      * We assume that dynahash.c will only ever call us with the first
00553      * argument being an actual table entry, and the second argument being
00554      * LookupTupleHashEntry's dummy TupleHashEntryData.  The other direction
00555      * could be supported too, but is not currently used by dynahash.c.
00556      */
00557     Assert(tuple1 != NULL);
00558     slot1 = hashtable->tableslot;
00559     ExecStoreMinimalTuple(tuple1, slot1, false);
00560     Assert(tuple2 == NULL);
00561     slot2 = hashtable->inputslot;
00562 
00563     /* For crosstype comparisons, the inputslot must be first */
00564     if (execTuplesMatch(slot2,
00565                         slot1,
00566                         hashtable->numCols,
00567                         hashtable->keyColIdx,
00568                         hashtable->cur_eq_funcs,
00569                         hashtable->tempcxt))
00570         return 0;
00571     else
00572         return 1;
00573 }