Header And Logo

PostgreSQL
| The world's most advanced open source database.

tuplestore.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * tuplestore.c
00004  *    Generalized routines for temporary tuple storage.
00005  *
00006  * This module handles temporary storage of tuples for purposes such
00007  * as Materialize nodes, hashjoin batch files, etc.  It is essentially
00008  * a dumbed-down version of tuplesort.c; it does no sorting of tuples
00009  * but can only store and regurgitate a sequence of tuples.  However,
00010  * because no sort is required, it is allowed to start reading the sequence
00011  * before it has all been written.  This is particularly useful for cursors,
00012  * because it allows random access within the already-scanned portion of
00013  * a query without having to process the underlying scan to completion.
00014  * Also, it is possible to support multiple independent read pointers.
00015  *
00016  * A temporary file is used to handle the data if it exceeds the
00017  * space limit specified by the caller.
00018  *
00019  * The (approximate) amount of memory allowed to the tuplestore is specified
00020  * in kilobytes by the caller.  We absorb tuples and simply store them in an
00021  * in-memory array as long as we haven't exceeded maxKBytes.  If we do exceed
00022  * maxKBytes, we dump all the tuples into a temp file and then read from that
00023  * when needed.
00024  *
00025  * Upon creation, a tuplestore supports a single read pointer, numbered 0.
00026  * Additional read pointers can be created using tuplestore_alloc_read_pointer.
00027  * Mark/restore behavior is supported by copying read pointers.
00028  *
00029  * When the caller requests backward-scan capability, we write the temp file
00030  * in a format that allows either forward or backward scan.  Otherwise, only
00031  * forward scan is allowed.  A request for backward scan must be made before
00032  * putting any tuples into the tuplestore.  Rewind is normally allowed but
00033  * can be turned off via tuplestore_set_eflags; turning off rewind for all
00034  * read pointers enables truncation of the tuplestore at the oldest read point
00035  * for minimal memory usage.  (The caller must explicitly call tuplestore_trim
00036  * at appropriate times for truncation to actually happen.)
00037  *
00038  * Note: in TSS_WRITEFILE state, the temp file's seek position is the
00039  * current write position, and the write-position variables in the tuplestore
00040  * aren't kept up to date.  Similarly, in TSS_READFILE state the temp file's
00041  * seek position is the active read pointer's position, and that read pointer
00042  * isn't kept up to date.  We update the appropriate variables using ftell()
00043  * before switching to the other state or activating a different read pointer.
00044  *
00045  *
00046  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00047  * Portions Copyright (c) 1994, Regents of the University of California
00048  *
00049  * IDENTIFICATION
00050  *    src/backend/utils/sort/tuplestore.c
00051  *
00052  *-------------------------------------------------------------------------
00053  */
00054 
00055 #include "postgres.h"
00056 
00057 #include "access/htup_details.h"
00058 #include "commands/tablespace.h"
00059 #include "executor/executor.h"
00060 #include "storage/buffile.h"
00061 #include "utils/memutils.h"
00062 #include "utils/resowner.h"
00063 
00064 
00065 /*
00066  * Possible states of a Tuplestore object.  These denote the states that
00067  * persist between calls of Tuplestore routines.
00068  */
00069 typedef enum
00070 {
00071     TSS_INMEM,                  /* Tuples still fit in memory */
00072     TSS_WRITEFILE,              /* Writing to temp file */
00073     TSS_READFILE                /* Reading from temp file */
00074 } TupStoreStatus;
00075 
00076 /*
00077  * State for a single read pointer.  If we are in state INMEM then all the
00078  * read pointers' "current" fields denote the read positions.  In state
00079  * WRITEFILE, the file/offset fields denote the read positions.  In state
00080  * READFILE, inactive read pointers have valid file/offset, but the active
00081  * read pointer implicitly has position equal to the temp file's seek position.
00082  *
00083  * Special case: if eof_reached is true, then the pointer's read position is
00084  * implicitly equal to the write position, and current/file/offset aren't
00085  * maintained.  This way we need not update all the read pointers each time
00086  * we write.
00087  */
00088 typedef struct
00089 {
00090     int         eflags;         /* capability flags */
00091     bool        eof_reached;    /* read has reached EOF */
00092     int         current;        /* next array index to read */
00093     int         file;           /* temp file# */
00094     off_t       offset;         /* byte offset in file */
00095 } TSReadPointer;
00096 
00097 /*
00098  * Private state of a Tuplestore operation.
00099  */
00100 struct Tuplestorestate
00101 {
00102     TupStoreStatus status;      /* enumerated value as shown above */
00103     int         eflags;         /* capability flags (OR of pointers' flags) */
00104     bool        backward;       /* store extra length words in file? */
00105     bool        interXact;      /* keep open through transactions? */
00106     bool        truncated;      /* tuplestore_trim has removed tuples? */
00107     long        availMem;       /* remaining memory available, in bytes */
00108     long        allowedMem;     /* total memory allowed, in bytes */
00109     BufFile    *myfile;         /* underlying file, or NULL if none */
00110     MemoryContext context;      /* memory context for holding tuples */
00111     ResourceOwner resowner;     /* resowner for holding temp files */
00112 
00113     /*
00114      * These function pointers decouple the routines that must know what kind
00115      * of tuple we are handling from the routines that don't need to know it.
00116      * They are set up by the tuplestore_begin_xxx routines.
00117      *
00118      * (Although tuplestore.c currently only supports heap tuples, I've copied
00119      * this part of tuplesort.c so that extension to other kinds of objects
00120      * will be easy if it's ever needed.)
00121      *
00122      * Function to copy a supplied input tuple into palloc'd space. (NB: we
00123      * assume that a single pfree() is enough to release the tuple later, so
00124      * the representation must be "flat" in one palloc chunk.) state->availMem
00125      * must be decreased by the amount of space used.
00126      */
00127     void       *(*copytup) (Tuplestorestate *state, void *tup);
00128 
00129     /*
00130      * Function to write a stored tuple onto tape.  The representation of the
00131      * tuple on tape need not be the same as it is in memory; requirements on
00132      * the tape representation are given below.  After writing the tuple,
00133      * pfree() it, and increase state->availMem by the amount of memory space
00134      * thereby released.
00135      */
00136     void        (*writetup) (Tuplestorestate *state, void *tup);
00137 
00138     /*
00139      * Function to read a stored tuple from tape back into memory. 'len' is
00140      * the already-read length of the stored tuple.  Create and return a
00141      * palloc'd copy, and decrease state->availMem by the amount of memory
00142      * space consumed.
00143      */
00144     void       *(*readtup) (Tuplestorestate *state, unsigned int len);
00145 
00146     /*
00147      * This array holds pointers to tuples in memory if we are in state INMEM.
00148      * In states WRITEFILE and READFILE it's not used.
00149      *
00150      * When memtupdeleted > 0, the first memtupdeleted pointers are already
00151      * released due to a tuplestore_trim() operation, but we haven't expended
00152      * the effort to slide the remaining pointers down.  These unused pointers
00153      * are set to NULL to catch any invalid accesses.  Note that memtupcount
00154      * includes the deleted pointers.
00155      */
00156     void      **memtuples;      /* array of pointers to palloc'd tuples */
00157     int         memtupdeleted;  /* the first N slots are currently unused */
00158     int         memtupcount;    /* number of tuples currently present */
00159     int         memtupsize;     /* allocated length of memtuples array */
00160     bool        growmemtuples;  /* memtuples' growth still underway? */
00161 
00162     /*
00163      * These variables are used to keep track of the current positions.
00164      *
00165      * In state WRITEFILE, the current file seek position is the write point;
00166      * in state READFILE, the write position is remembered in writepos_xxx.
00167      * (The write position is the same as EOF, but since BufFileSeek doesn't
00168      * currently implement SEEK_END, we have to remember it explicitly.)
00169      */
00170     TSReadPointer *readptrs;    /* array of read pointers */
00171     int         activeptr;      /* index of the active read pointer */
00172     int         readptrcount;   /* number of pointers currently valid */
00173     int         readptrsize;    /* allocated length of readptrs array */
00174 
00175     int         writepos_file;  /* file# (valid if READFILE state) */
00176     off_t       writepos_offset;    /* offset (valid if READFILE state) */
00177 };
00178 
00179 #define COPYTUP(state,tup)  ((*(state)->copytup) (state, tup))
00180 #define WRITETUP(state,tup) ((*(state)->writetup) (state, tup))
00181 #define READTUP(state,len)  ((*(state)->readtup) (state, len))
00182 #define LACKMEM(state)      ((state)->availMem < 0)
00183 #define USEMEM(state,amt)   ((state)->availMem -= (amt))
00184 #define FREEMEM(state,amt)  ((state)->availMem += (amt))
00185 
00186 /*--------------------
00187  *
00188  * NOTES about on-tape representation of tuples:
00189  *
00190  * We require the first "unsigned int" of a stored tuple to be the total size
00191  * on-tape of the tuple, including itself (so it is never zero).
00192  * The remainder of the stored tuple
00193  * may or may not match the in-memory representation of the tuple ---
00194  * any conversion needed is the job of the writetup and readtup routines.
00195  *
00196  * If state->backward is true, then the stored representation of
00197  * the tuple must be followed by another "unsigned int" that is a copy of the
00198  * length --- so the total tape space used is actually sizeof(unsigned int)
00199  * more than the stored length value.  This allows read-backwards.  When
00200  * state->backward is not set, the write/read routines may omit the extra
00201  * length word.
00202  *
00203  * writetup is expected to write both length words as well as the tuple
00204  * data.  When readtup is called, the tape is positioned just after the
00205  * front length word; readtup must read the tuple data and advance past
00206  * the back length word (if present).
00207  *
00208  * The write/read routines can make use of the tuple description data
00209  * stored in the Tuplestorestate record, if needed. They are also expected
00210  * to adjust state->availMem by the amount of memory space (not tape space!)
00211  * released or consumed.  There is no error return from either writetup
00212  * or readtup; they should ereport() on failure.
00213  *
00214  *
00215  * NOTES about memory consumption calculations:
00216  *
00217  * We count space allocated for tuples against the maxKBytes limit,
00218  * plus the space used by the variable-size array memtuples.
00219  * Fixed-size space (primarily the BufFile I/O buffer) is not counted.
00220  * We don't worry about the size of the read pointer array, either.
00221  *
00222  * Note that we count actual space used (as shown by GetMemoryChunkSpace)
00223  * rather than the originally-requested size.  This is important since
00224  * palloc can add substantial overhead.  It's not a complete answer since
00225  * we won't count any wasted space in palloc allocation blocks, but it's
00226  * a lot better than what we were doing before 7.3.
00227  *
00228  *--------------------
00229  */
00230 
00231 
00232 static Tuplestorestate *tuplestore_begin_common(int eflags,
00233                         bool interXact,
00234                         int maxKBytes);
00235 static void tuplestore_puttuple_common(Tuplestorestate *state, void *tuple);
00236 static void dumptuples(Tuplestorestate *state);
00237 static unsigned int getlen(Tuplestorestate *state, bool eofOK);
00238 static void *copytup_heap(Tuplestorestate *state, void *tup);
00239 static void writetup_heap(Tuplestorestate *state, void *tup);
00240 static void *readtup_heap(Tuplestorestate *state, unsigned int len);
00241 
00242 
00243 /*
00244  *      tuplestore_begin_xxx
00245  *
00246  * Initialize for a tuple store operation.
00247  */
00248 static Tuplestorestate *
00249 tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
00250 {
00251     Tuplestorestate *state;
00252 
00253     state = (Tuplestorestate *) palloc0(sizeof(Tuplestorestate));
00254 
00255     state->status = TSS_INMEM;
00256     state->eflags = eflags;
00257     state->interXact = interXact;
00258     state->truncated = false;
00259     state->allowedMem = maxKBytes * 1024L;
00260     state->availMem = state->allowedMem;
00261     state->myfile = NULL;
00262     state->context = CurrentMemoryContext;
00263     state->resowner = CurrentResourceOwner;
00264 
00265     state->memtupdeleted = 0;
00266     state->memtupcount = 0;
00267     state->memtupsize = 1024;   /* initial guess */
00268     state->growmemtuples = true;
00269     state->memtuples = (void **) palloc(state->memtupsize * sizeof(void *));
00270 
00271     USEMEM(state, GetMemoryChunkSpace(state->memtuples));
00272 
00273     state->activeptr = 0;
00274     state->readptrcount = 1;
00275     state->readptrsize = 8;     /* arbitrary */
00276     state->readptrs = (TSReadPointer *)
00277         palloc(state->readptrsize * sizeof(TSReadPointer));
00278 
00279     state->readptrs[0].eflags = eflags;
00280     state->readptrs[0].eof_reached = false;
00281     state->readptrs[0].current = 0;
00282 
00283     return state;
00284 }
00285 
00286 /*
00287  * tuplestore_begin_heap
00288  *
00289  * Create a new tuplestore; other types of tuple stores (other than
00290  * "heap" tuple stores, for heap tuples) are possible, but not presently
00291  * implemented.
00292  *
00293  * randomAccess: if true, both forward and backward accesses to the
00294  * tuple store are allowed.
00295  *
00296  * interXact: if true, the files used for on-disk storage persist beyond the
00297  * end of the current transaction.  NOTE: It's the caller's responsibility to
00298  * create such a tuplestore in a memory context and resource owner that will
00299  * also survive transaction boundaries, and to ensure the tuplestore is closed
00300  * when it's no longer wanted.
00301  *
00302  * maxKBytes: how much data to store in memory (any data beyond this
00303  * amount is paged to disk).  When in doubt, use work_mem.
00304  */
00305 Tuplestorestate *
00306 tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
00307 {
00308     Tuplestorestate *state;
00309     int         eflags;
00310 
00311     /*
00312      * This interpretation of the meaning of randomAccess is compatible with
00313      * the pre-8.3 behavior of tuplestores.
00314      */
00315     eflags = randomAccess ?
00316         (EXEC_FLAG_BACKWARD | EXEC_FLAG_REWIND) :
00317         (EXEC_FLAG_REWIND);
00318 
00319     state = tuplestore_begin_common(eflags, interXact, maxKBytes);
00320 
00321     state->copytup = copytup_heap;
00322     state->writetup = writetup_heap;
00323     state->readtup = readtup_heap;
00324 
00325     return state;
00326 }
00327 
00328 /*
00329  * tuplestore_set_eflags
00330  *
00331  * Set the capability flags for read pointer 0 at a finer grain than is
00332  * allowed by tuplestore_begin_xxx.  This must be called before inserting
00333  * any data into the tuplestore.
00334  *
00335  * eflags is a bitmask following the meanings used for executor node
00336  * startup flags (see executor.h).  tuplestore pays attention to these bits:
00337  *      EXEC_FLAG_REWIND        need rewind to start
00338  *      EXEC_FLAG_BACKWARD      need backward fetch
00339  * If tuplestore_set_eflags is not called, REWIND is allowed, and BACKWARD
00340  * is set per "randomAccess" in the tuplestore_begin_xxx call.
00341  *
00342  * NOTE: setting BACKWARD without REWIND means the pointer can read backwards,
00343  * but not further than the truncation point (the furthest-back read pointer
00344  * position at the time of the last tuplestore_trim call).
00345  */
00346 void
00347 tuplestore_set_eflags(Tuplestorestate *state, int eflags)
00348 {
00349     int         i;
00350 
00351     if (state->status != TSS_INMEM || state->memtupcount != 0)
00352         elog(ERROR, "too late to call tuplestore_set_eflags");
00353 
00354     state->readptrs[0].eflags = eflags;
00355     for (i = 1; i < state->readptrcount; i++)
00356         eflags |= state->readptrs[i].eflags;
00357     state->eflags = eflags;
00358 }
00359 
00360 /*
00361  * tuplestore_alloc_read_pointer - allocate another read pointer.
00362  *
00363  * Returns the pointer's index.
00364  *
00365  * The new pointer initially copies the position of read pointer 0.
00366  * It can have its own eflags, but if any data has been inserted into
00367  * the tuplestore, these eflags must not represent an increase in
00368  * requirements.
00369  */
00370 int
00371 tuplestore_alloc_read_pointer(Tuplestorestate *state, int eflags)
00372 {
00373     /* Check for possible increase of requirements */
00374     if (state->status != TSS_INMEM || state->memtupcount != 0)
00375     {
00376         if ((state->eflags | eflags) != state->eflags)
00377             elog(ERROR, "too late to require new tuplestore eflags");
00378     }
00379 
00380     /* Make room for another read pointer if needed */
00381     if (state->readptrcount >= state->readptrsize)
00382     {
00383         int         newcnt = state->readptrsize * 2;
00384 
00385         state->readptrs = (TSReadPointer *)
00386             repalloc(state->readptrs, newcnt * sizeof(TSReadPointer));
00387         state->readptrsize = newcnt;
00388     }
00389 
00390     /* And set it up */
00391     state->readptrs[state->readptrcount] = state->readptrs[0];
00392     state->readptrs[state->readptrcount].eflags = eflags;
00393 
00394     state->eflags |= eflags;
00395 
00396     return state->readptrcount++;
00397 }
00398 
00399 /*
00400  * tuplestore_clear
00401  *
00402  *  Delete all the contents of a tuplestore, and reset its read pointers
00403  *  to the start.
00404  */
00405 void
00406 tuplestore_clear(Tuplestorestate *state)
00407 {
00408     int         i;
00409     TSReadPointer *readptr;
00410 
00411     if (state->myfile)
00412         BufFileClose(state->myfile);
00413     state->myfile = NULL;
00414     if (state->memtuples)
00415     {
00416         for (i = state->memtupdeleted; i < state->memtupcount; i++)
00417         {
00418             FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i]));
00419             pfree(state->memtuples[i]);
00420         }
00421     }
00422     state->status = TSS_INMEM;
00423     state->truncated = false;
00424     state->memtupdeleted = 0;
00425     state->memtupcount = 0;
00426     readptr = state->readptrs;
00427     for (i = 0; i < state->readptrcount; readptr++, i++)
00428     {
00429         readptr->eof_reached = false;
00430         readptr->current = 0;
00431     }
00432 }
00433 
00434 /*
00435  * tuplestore_end
00436  *
00437  *  Release resources and clean up.
00438  */
00439 void
00440 tuplestore_end(Tuplestorestate *state)
00441 {
00442     int         i;
00443 
00444     if (state->myfile)
00445         BufFileClose(state->myfile);
00446     if (state->memtuples)
00447     {
00448         for (i = state->memtupdeleted; i < state->memtupcount; i++)
00449             pfree(state->memtuples[i]);
00450         pfree(state->memtuples);
00451     }
00452     pfree(state->readptrs);
00453     pfree(state);
00454 }
00455 
00456 /*
00457  * tuplestore_select_read_pointer - make the specified read pointer active
00458  */
00459 void
00460 tuplestore_select_read_pointer(Tuplestorestate *state, int ptr)
00461 {
00462     TSReadPointer *readptr;
00463     TSReadPointer *oldptr;
00464 
00465     Assert(ptr >= 0 && ptr < state->readptrcount);
00466 
00467     /* No work if already active */
00468     if (ptr == state->activeptr)
00469         return;
00470 
00471     readptr = &state->readptrs[ptr];
00472     oldptr = &state->readptrs[state->activeptr];
00473 
00474     switch (state->status)
00475     {
00476         case TSS_INMEM:
00477         case TSS_WRITEFILE:
00478             /* no work */
00479             break;
00480         case TSS_READFILE:
00481 
00482             /*
00483              * First, save the current read position in the pointer about to
00484              * become inactive.
00485              */
00486             if (!oldptr->eof_reached)
00487                 BufFileTell(state->myfile,
00488                             &oldptr->file,
00489                             &oldptr->offset);
00490 
00491             /*
00492              * We have to make the temp file's seek position equal to the
00493              * logical position of the new read pointer.  In eof_reached
00494              * state, that's the EOF, which we have available from the saved
00495              * write position.
00496              */
00497             if (readptr->eof_reached)
00498             {
00499                 if (BufFileSeek(state->myfile,
00500                                 state->writepos_file,
00501                                 state->writepos_offset,
00502                                 SEEK_SET) != 0)
00503                     elog(ERROR, "tuplestore seek failed");
00504             }
00505             else
00506             {
00507                 if (BufFileSeek(state->myfile,
00508                                 readptr->file,
00509                                 readptr->offset,
00510                                 SEEK_SET) != 0)
00511                     elog(ERROR, "tuplestore seek failed");
00512             }
00513             break;
00514         default:
00515             elog(ERROR, "invalid tuplestore state");
00516             break;
00517     }
00518 
00519     state->activeptr = ptr;
00520 }
00521 
00522 /*
00523  * tuplestore_ateof
00524  *
00525  * Returns the active read pointer's eof_reached state.
00526  */
00527 bool
00528 tuplestore_ateof(Tuplestorestate *state)
00529 {
00530     return state->readptrs[state->activeptr].eof_reached;
00531 }
00532 
00533 /*
00534  * Grow the memtuples[] array, if possible within our memory constraint.
00535  * Return TRUE if we were able to enlarge the array, FALSE if not.
00536  *
00537  * Normally, at each increment we double the size of the array.  When we no
00538  * longer have enough memory to do that, we attempt one last, smaller increase
00539  * (and then clear the growmemtuples flag so we don't try any more).  That
00540  * allows us to use allowedMem as fully as possible; sticking to the pure
00541  * doubling rule could result in almost half of allowedMem going unused.
00542  * Because availMem moves around with tuple addition/removal, we need some
00543  * rule to prevent making repeated small increases in memtupsize, which would
00544  * just be useless thrashing.  The growmemtuples flag accomplishes that and
00545  * also prevents useless recalculations in this function.
00546  */
00547 static bool
00548 grow_memtuples(Tuplestorestate *state)
00549 {
00550     int         newmemtupsize;
00551     int         memtupsize = state->memtupsize;
00552     long        memNowUsed = state->allowedMem - state->availMem;
00553 
00554     /* Forget it if we've already maxed out memtuples, per comment above */
00555     if (!state->growmemtuples)
00556         return false;
00557 
00558     /* Select new value of memtupsize */
00559     if (memNowUsed <= state->availMem)
00560     {
00561         /*
00562          * It is surely safe to double memtupsize if we've used no more than
00563          * half of allowedMem.
00564          *
00565          * Note: it might seem that we need to worry about memtupsize * 2
00566          * overflowing an int, but the MaxAllocSize clamp applied below
00567          * ensures the existing memtupsize can't be large enough for that.
00568          */
00569         newmemtupsize = memtupsize * 2;
00570     }
00571     else
00572     {
00573         /*
00574          * This will be the last increment of memtupsize.  Abandon doubling
00575          * strategy and instead increase as much as we safely can.
00576          *
00577          * To stay within allowedMem, we can't increase memtupsize by more
00578          * than availMem / sizeof(void *) elements. In practice, we want
00579          * to increase it by considerably less, because we need to leave some
00580          * space for the tuples to which the new array slots will refer.  We
00581          * assume the new tuples will be about the same size as the tuples
00582          * we've already seen, and thus we can extrapolate from the space
00583          * consumption so far to estimate an appropriate new size for the
00584          * memtuples array.  The optimal value might be higher or lower than
00585          * this estimate, but it's hard to know that in advance.
00586          *
00587          * This calculation is safe against enlarging the array so much that
00588          * LACKMEM becomes true, because the memory currently used includes
00589          * the present array; thus, there would be enough allowedMem for the
00590          * new array elements even if no other memory were currently used.
00591          *
00592          * We do the arithmetic in float8, because otherwise the product of
00593          * memtupsize and allowedMem could overflow.  (A little algebra shows
00594          * that grow_ratio must be less than 2 here, so we are not risking
00595          * integer overflow this way.)  Any inaccuracy in the result should be
00596          * insignificant; but even if we computed a completely insane result,
00597          * the checks below will prevent anything really bad from happening.
00598          */
00599         double      grow_ratio;
00600 
00601         grow_ratio = (double) state->allowedMem / (double) memNowUsed;
00602         newmemtupsize = (int) (memtupsize * grow_ratio);
00603 
00604         /* We won't make any further enlargement attempts */
00605         state->growmemtuples = false;
00606     }
00607 
00608     /* Must enlarge array by at least one element, else report failure */
00609     if (newmemtupsize <= memtupsize)
00610         goto noalloc;
00611 
00612     /*
00613      * On a 64-bit machine, allowedMem could be more than MaxAllocSize.  Clamp
00614      * to ensure our request won't be rejected by palloc.
00615      */
00616     if ((Size) newmemtupsize >= MaxAllocSize / sizeof(void *))
00617     {
00618         newmemtupsize = (int) (MaxAllocSize / sizeof(void *));
00619         state->growmemtuples = false;   /* can't grow any more */
00620     }
00621 
00622     /*
00623      * We need to be sure that we do not cause LACKMEM to become true, else
00624      * the space management algorithm will go nuts.  The code above should
00625      * never generate a dangerous request, but to be safe, check explicitly
00626      * that the array growth fits within availMem.  (We could still cause
00627      * LACKMEM if the memory chunk overhead associated with the memtuples
00628      * array were to increase.  That shouldn't happen with any sane value of
00629      * allowedMem, because at any array size large enough to risk LACKMEM,
00630      * palloc would be treating both old and new arrays as separate chunks.
00631      * But we'll check LACKMEM explicitly below just in case.)
00632      */
00633     if (state->availMem < (long) ((newmemtupsize - memtupsize) * sizeof(void *)))
00634         goto noalloc;
00635 
00636     /* OK, do it */
00637     FREEMEM(state, GetMemoryChunkSpace(state->memtuples));
00638     state->memtupsize = newmemtupsize;
00639     state->memtuples = (void **)
00640         repalloc(state->memtuples,
00641                  state->memtupsize * sizeof(void *));
00642     USEMEM(state, GetMemoryChunkSpace(state->memtuples));
00643     if (LACKMEM(state))
00644         elog(ERROR, "unexpected out-of-memory situation during sort");
00645     return true;
00646 
00647 noalloc:
00648     /* If for any reason we didn't realloc, shut off future attempts */
00649     state->growmemtuples = false;
00650     return false;
00651 }
00652 
00653 /*
00654  * Accept one tuple and append it to the tuplestore.
00655  *
00656  * Note that the input tuple is always copied; the caller need not save it.
00657  *
00658  * If the active read pointer is currently "at EOF", it remains so (the read
00659  * pointer implicitly advances along with the write pointer); otherwise the
00660  * read pointer is unchanged.  Non-active read pointers do not move, which
00661  * means they are certain to not be "at EOF" immediately after puttuple.
00662  * This curious-seeming behavior is for the convenience of nodeMaterial.c and
00663  * nodeCtescan.c, which would otherwise need to do extra pointer repositioning
00664  * steps.
00665  *
00666  * tuplestore_puttupleslot() is a convenience routine to collect data from
00667  * a TupleTableSlot without an extra copy operation.
00668  */
00669 void
00670 tuplestore_puttupleslot(Tuplestorestate *state,
00671                         TupleTableSlot *slot)
00672 {
00673     MinimalTuple tuple;
00674     MemoryContext oldcxt = MemoryContextSwitchTo(state->context);
00675 
00676     /*
00677      * Form a MinimalTuple in working memory
00678      */
00679     tuple = ExecCopySlotMinimalTuple(slot);
00680     USEMEM(state, GetMemoryChunkSpace(tuple));
00681 
00682     tuplestore_puttuple_common(state, (void *) tuple);
00683 
00684     MemoryContextSwitchTo(oldcxt);
00685 }
00686 
00687 /*
00688  * "Standard" case to copy from a HeapTuple.  This is actually now somewhat
00689  * deprecated, but not worth getting rid of in view of the number of callers.
00690  */
00691 void
00692 tuplestore_puttuple(Tuplestorestate *state, HeapTuple tuple)
00693 {
00694     MemoryContext oldcxt = MemoryContextSwitchTo(state->context);
00695 
00696     /*
00697      * Copy the tuple.  (Must do this even in WRITEFILE case.  Note that
00698      * COPYTUP includes USEMEM, so we needn't do that here.)
00699      */
00700     tuple = COPYTUP(state, tuple);
00701 
00702     tuplestore_puttuple_common(state, (void *) tuple);
00703 
00704     MemoryContextSwitchTo(oldcxt);
00705 }
00706 
00707 /*
00708  * Similar to tuplestore_puttuple(), but work from values + nulls arrays.
00709  * This avoids an extra tuple-construction operation.
00710  */
00711 void
00712 tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc,
00713                      Datum *values, bool *isnull)
00714 {
00715     MinimalTuple tuple;
00716     MemoryContext oldcxt = MemoryContextSwitchTo(state->context);
00717 
00718     tuple = heap_form_minimal_tuple(tdesc, values, isnull);
00719     USEMEM(state, GetMemoryChunkSpace(tuple));
00720 
00721     tuplestore_puttuple_common(state, (void *) tuple);
00722 
00723     MemoryContextSwitchTo(oldcxt);
00724 }
00725 
00726 static void
00727 tuplestore_puttuple_common(Tuplestorestate *state, void *tuple)
00728 {
00729     TSReadPointer *readptr;
00730     int         i;
00731     ResourceOwner oldowner;
00732 
00733     switch (state->status)
00734     {
00735         case TSS_INMEM:
00736 
00737             /*
00738              * Update read pointers as needed; see API spec above.
00739              */
00740             readptr = state->readptrs;
00741             for (i = 0; i < state->readptrcount; readptr++, i++)
00742             {
00743                 if (readptr->eof_reached && i != state->activeptr)
00744                 {
00745                     readptr->eof_reached = false;
00746                     readptr->current = state->memtupcount;
00747                 }
00748             }
00749 
00750             /*
00751              * Grow the array as needed.  Note that we try to grow the array
00752              * when there is still one free slot remaining --- if we fail,
00753              * there'll still be room to store the incoming tuple, and then
00754              * we'll switch to tape-based operation.
00755              */
00756             if (state->memtupcount >= state->memtupsize - 1)
00757             {
00758                 (void) grow_memtuples(state);
00759                 Assert(state->memtupcount < state->memtupsize);
00760             }
00761 
00762             /* Stash the tuple in the in-memory array */
00763             state->memtuples[state->memtupcount++] = tuple;
00764 
00765             /*
00766              * Done if we still fit in available memory and have array slots.
00767              */
00768             if (state->memtupcount < state->memtupsize && !LACKMEM(state))
00769                 return;
00770 
00771             /*
00772              * Nope; time to switch to tape-based operation.  Make sure that
00773              * the temp file(s) are created in suitable temp tablespaces.
00774              */
00775             PrepareTempTablespaces();
00776 
00777             /* associate the file with the store's resource owner */
00778             oldowner = CurrentResourceOwner;
00779             CurrentResourceOwner = state->resowner;
00780 
00781             state->myfile = BufFileCreateTemp(state->interXact);
00782 
00783             CurrentResourceOwner = oldowner;
00784 
00785             /*
00786              * Freeze the decision about whether trailing length words will be
00787              * used.  We can't change this choice once data is on tape, even
00788              * though callers might drop the requirement.
00789              */
00790             state->backward = (state->eflags & EXEC_FLAG_BACKWARD) != 0;
00791             state->status = TSS_WRITEFILE;
00792             dumptuples(state);
00793             break;
00794         case TSS_WRITEFILE:
00795 
00796             /*
00797              * Update read pointers as needed; see API spec above. Note:
00798              * BufFileTell is quite cheap, so not worth trying to avoid
00799              * multiple calls.
00800              */
00801             readptr = state->readptrs;
00802             for (i = 0; i < state->readptrcount; readptr++, i++)
00803             {
00804                 if (readptr->eof_reached && i != state->activeptr)
00805                 {
00806                     readptr->eof_reached = false;
00807                     BufFileTell(state->myfile,
00808                                 &readptr->file,
00809                                 &readptr->offset);
00810                 }
00811             }
00812 
00813             WRITETUP(state, tuple);
00814             break;
00815         case TSS_READFILE:
00816 
00817             /*
00818              * Switch from reading to writing.
00819              */
00820             if (!state->readptrs[state->activeptr].eof_reached)
00821                 BufFileTell(state->myfile,
00822                             &state->readptrs[state->activeptr].file,
00823                             &state->readptrs[state->activeptr].offset);
00824             if (BufFileSeek(state->myfile,
00825                             state->writepos_file, state->writepos_offset,
00826                             SEEK_SET) != 0)
00827                 elog(ERROR, "tuplestore seek to EOF failed");
00828             state->status = TSS_WRITEFILE;
00829 
00830             /*
00831              * Update read pointers as needed; see API spec above.
00832              */
00833             readptr = state->readptrs;
00834             for (i = 0; i < state->readptrcount; readptr++, i++)
00835             {
00836                 if (readptr->eof_reached && i != state->activeptr)
00837                 {
00838                     readptr->eof_reached = false;
00839                     readptr->file = state->writepos_file;
00840                     readptr->offset = state->writepos_offset;
00841                 }
00842             }
00843 
00844             WRITETUP(state, tuple);
00845             break;
00846         default:
00847             elog(ERROR, "invalid tuplestore state");
00848             break;
00849     }
00850 }
00851 
00852 /*
00853  * Fetch the next tuple in either forward or back direction.
00854  * Returns NULL if no more tuples.  If should_free is set, the
00855  * caller must pfree the returned tuple when done with it.
00856  *
00857  * Backward scan is only allowed if randomAccess was set true or
00858  * EXEC_FLAG_BACKWARD was specified to tuplestore_set_eflags().
00859  */
00860 static void *
00861 tuplestore_gettuple(Tuplestorestate *state, bool forward,
00862                     bool *should_free)
00863 {
00864     TSReadPointer *readptr = &state->readptrs[state->activeptr];
00865     unsigned int tuplen;
00866     void       *tup;
00867 
00868     Assert(forward || (readptr->eflags & EXEC_FLAG_BACKWARD));
00869 
00870     switch (state->status)
00871     {
00872         case TSS_INMEM:
00873             *should_free = false;
00874             if (forward)
00875             {
00876                 if (readptr->eof_reached)
00877                     return NULL;
00878                 if (readptr->current < state->memtupcount)
00879                 {
00880                     /* We have another tuple, so return it */
00881                     return state->memtuples[readptr->current++];
00882                 }
00883                 readptr->eof_reached = true;
00884                 return NULL;
00885             }
00886             else
00887             {
00888                 /*
00889                  * if all tuples are fetched already then we return last
00890                  * tuple, else tuple before last returned.
00891                  */
00892                 if (readptr->eof_reached)
00893                 {
00894                     readptr->current = state->memtupcount;
00895                     readptr->eof_reached = false;
00896                 }
00897                 else
00898                 {
00899                     if (readptr->current <= state->memtupdeleted)
00900                     {
00901                         Assert(!state->truncated);
00902                         return NULL;
00903                     }
00904                     readptr->current--; /* last returned tuple */
00905                 }
00906                 if (readptr->current <= state->memtupdeleted)
00907                 {
00908                     Assert(!state->truncated);
00909                     return NULL;
00910                 }
00911                 return state->memtuples[readptr->current - 1];
00912             }
00913             break;
00914 
00915         case TSS_WRITEFILE:
00916             /* Skip state change if we'll just return NULL */
00917             if (readptr->eof_reached && forward)
00918                 return NULL;
00919 
00920             /*
00921              * Switch from writing to reading.
00922              */
00923             BufFileTell(state->myfile,
00924                         &state->writepos_file, &state->writepos_offset);
00925             if (!readptr->eof_reached)
00926                 if (BufFileSeek(state->myfile,
00927                                 readptr->file, readptr->offset,
00928                                 SEEK_SET) != 0)
00929                     elog(ERROR, "tuplestore seek failed");
00930             state->status = TSS_READFILE;
00931             /* FALL THRU into READFILE case */
00932 
00933         case TSS_READFILE:
00934             *should_free = true;
00935             if (forward)
00936             {
00937                 if ((tuplen = getlen(state, true)) != 0)
00938                 {
00939                     tup = READTUP(state, tuplen);
00940                     return tup;
00941                 }
00942                 else
00943                 {
00944                     readptr->eof_reached = true;
00945                     return NULL;
00946                 }
00947             }
00948 
00949             /*
00950              * Backward.
00951              *
00952              * if all tuples are fetched already then we return last tuple,
00953              * else tuple before last returned.
00954              *
00955              * Back up to fetch previously-returned tuple's ending length
00956              * word. If seek fails, assume we are at start of file.
00957              */
00958             if (BufFileSeek(state->myfile, 0, -(long) sizeof(unsigned int),
00959                             SEEK_CUR) != 0)
00960             {
00961                 /* even a failed backwards fetch gets you out of eof state */
00962                 readptr->eof_reached = false;
00963                 Assert(!state->truncated);
00964                 return NULL;
00965             }
00966             tuplen = getlen(state, false);
00967 
00968             if (readptr->eof_reached)
00969             {
00970                 readptr->eof_reached = false;
00971                 /* We will return the tuple returned before returning NULL */
00972             }
00973             else
00974             {
00975                 /*
00976                  * Back up to get ending length word of tuple before it.
00977                  */
00978                 if (BufFileSeek(state->myfile, 0,
00979                                 -(long) (tuplen + 2 * sizeof(unsigned int)),
00980                                 SEEK_CUR) != 0)
00981                 {
00982                     /*
00983                      * If that fails, presumably the prev tuple is the first
00984                      * in the file.  Back up so that it becomes next to read
00985                      * in forward direction (not obviously right, but that is
00986                      * what in-memory case does).
00987                      */
00988                     if (BufFileSeek(state->myfile, 0,
00989                                     -(long) (tuplen + sizeof(unsigned int)),
00990                                     SEEK_CUR) != 0)
00991                         elog(ERROR, "bogus tuple length in backward scan");
00992                     Assert(!state->truncated);
00993                     return NULL;
00994                 }
00995                 tuplen = getlen(state, false);
00996             }
00997 
00998             /*
00999              * Now we have the length of the prior tuple, back up and read it.
01000              * Note: READTUP expects we are positioned after the initial
01001              * length word of the tuple, so back up to that point.
01002              */
01003             if (BufFileSeek(state->myfile, 0,
01004                             -(long) tuplen,
01005                             SEEK_CUR) != 0)
01006                 elog(ERROR, "bogus tuple length in backward scan");
01007             tup = READTUP(state, tuplen);
01008             return tup;
01009 
01010         default:
01011             elog(ERROR, "invalid tuplestore state");
01012             return NULL;        /* keep compiler quiet */
01013     }
01014 }
01015 
01016 /*
01017  * tuplestore_gettupleslot - exported function to fetch a MinimalTuple
01018  *
01019  * If successful, put tuple in slot and return TRUE; else, clear the slot
01020  * and return FALSE.
01021  *
01022  * If copy is TRUE, the slot receives a copied tuple (allocated in current
01023  * memory context) that will stay valid regardless of future manipulations of
01024  * the tuplestore's state.  If copy is FALSE, the slot may just receive a
01025  * pointer to a tuple held within the tuplestore.  The latter is more
01026  * efficient but the slot contents may be corrupted if additional writes to
01027  * the tuplestore occur.  (If using tuplestore_trim, see comments therein.)
01028  */
01029 bool
01030 tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
01031                         bool copy, TupleTableSlot *slot)
01032 {
01033     MinimalTuple tuple;
01034     bool        should_free;
01035 
01036     tuple = (MinimalTuple) tuplestore_gettuple(state, forward, &should_free);
01037 
01038     if (tuple)
01039     {
01040         if (copy && !should_free)
01041         {
01042             tuple = heap_copy_minimal_tuple(tuple);
01043             should_free = true;
01044         }
01045         ExecStoreMinimalTuple(tuple, slot, should_free);
01046         return true;
01047     }
01048     else
01049     {
01050         ExecClearTuple(slot);
01051         return false;
01052     }
01053 }
01054 
01055 /*
01056  * tuplestore_advance - exported function to adjust position without fetching
01057  *
01058  * We could optimize this case to avoid palloc/pfree overhead, but for the
01059  * moment it doesn't seem worthwhile.  (XXX this probably needs to be
01060  * reconsidered given the needs of window functions.)
01061  */
01062 bool
01063 tuplestore_advance(Tuplestorestate *state, bool forward)
01064 {
01065     void       *tuple;
01066     bool        should_free;
01067 
01068     tuple = tuplestore_gettuple(state, forward, &should_free);
01069 
01070     if (tuple)
01071     {
01072         if (should_free)
01073             pfree(tuple);
01074         return true;
01075     }
01076     else
01077     {
01078         return false;
01079     }
01080 }
01081 
01082 /*
01083  * dumptuples - remove tuples from memory and write to tape
01084  *
01085  * As a side effect, we must convert each read pointer's position from
01086  * "current" to file/offset format.  But eof_reached pointers don't
01087  * need to change state.
01088  */
01089 static void
01090 dumptuples(Tuplestorestate *state)
01091 {
01092     int         i;
01093 
01094     for (i = state->memtupdeleted;; i++)
01095     {
01096         TSReadPointer *readptr = state->readptrs;
01097         int         j;
01098 
01099         for (j = 0; j < state->readptrcount; readptr++, j++)
01100         {
01101             if (i == readptr->current && !readptr->eof_reached)
01102                 BufFileTell(state->myfile,
01103                             &readptr->file, &readptr->offset);
01104         }
01105         if (i >= state->memtupcount)
01106             break;
01107         WRITETUP(state, state->memtuples[i]);
01108     }
01109     state->memtupdeleted = 0;
01110     state->memtupcount = 0;
01111 }
01112 
01113 /*
01114  * tuplestore_rescan        - rewind the active read pointer to start
01115  */
01116 void
01117 tuplestore_rescan(Tuplestorestate *state)
01118 {
01119     TSReadPointer *readptr = &state->readptrs[state->activeptr];
01120 
01121     Assert(readptr->eflags & EXEC_FLAG_REWIND);
01122     Assert(!state->truncated);
01123 
01124     switch (state->status)
01125     {
01126         case TSS_INMEM:
01127             readptr->eof_reached = false;
01128             readptr->current = 0;
01129             break;
01130         case TSS_WRITEFILE:
01131             readptr->eof_reached = false;
01132             readptr->file = 0;
01133             readptr->offset = 0L;
01134             break;
01135         case TSS_READFILE:
01136             readptr->eof_reached = false;
01137             if (BufFileSeek(state->myfile, 0, 0L, SEEK_SET) != 0)
01138                 elog(ERROR, "tuplestore seek to start failed");
01139             break;
01140         default:
01141             elog(ERROR, "invalid tuplestore state");
01142             break;
01143     }
01144 }
01145 
01146 /*
01147  * tuplestore_copy_read_pointer - copy a read pointer's state to another
01148  */
01149 void
01150 tuplestore_copy_read_pointer(Tuplestorestate *state,
01151                              int srcptr, int destptr)
01152 {
01153     TSReadPointer *sptr = &state->readptrs[srcptr];
01154     TSReadPointer *dptr = &state->readptrs[destptr];
01155 
01156     Assert(srcptr >= 0 && srcptr < state->readptrcount);
01157     Assert(destptr >= 0 && destptr < state->readptrcount);
01158 
01159     /* Assigning to self is a no-op */
01160     if (srcptr == destptr)
01161         return;
01162 
01163     if (dptr->eflags != sptr->eflags)
01164     {
01165         /* Possible change of overall eflags, so copy and then recompute */
01166         int         eflags;
01167         int         i;
01168 
01169         *dptr = *sptr;
01170         eflags = state->readptrs[0].eflags;
01171         for (i = 1; i < state->readptrcount; i++)
01172             eflags |= state->readptrs[i].eflags;
01173         state->eflags = eflags;
01174     }
01175     else
01176         *dptr = *sptr;
01177 
01178     switch (state->status)
01179     {
01180         case TSS_INMEM:
01181         case TSS_WRITEFILE:
01182             /* no work */
01183             break;
01184         case TSS_READFILE:
01185 
01186             /*
01187              * This case is a bit tricky since the active read pointer's
01188              * position corresponds to the seek point, not what is in its
01189              * variables.  Assigning to the active requires a seek, and
01190              * assigning from the active requires a tell, except when
01191              * eof_reached.
01192              */
01193             if (destptr == state->activeptr)
01194             {
01195                 if (dptr->eof_reached)
01196                 {
01197                     if (BufFileSeek(state->myfile,
01198                                     state->writepos_file,
01199                                     state->writepos_offset,
01200                                     SEEK_SET) != 0)
01201                         elog(ERROR, "tuplestore seek failed");
01202                 }
01203                 else
01204                 {
01205                     if (BufFileSeek(state->myfile,
01206                                     dptr->file, dptr->offset,
01207                                     SEEK_SET) != 0)
01208                         elog(ERROR, "tuplestore seek failed");
01209                 }
01210             }
01211             else if (srcptr == state->activeptr)
01212             {
01213                 if (!dptr->eof_reached)
01214                     BufFileTell(state->myfile,
01215                                 &dptr->file,
01216                                 &dptr->offset);
01217             }
01218             break;
01219         default:
01220             elog(ERROR, "invalid tuplestore state");
01221             break;
01222     }
01223 }
01224 
01225 /*
01226  * tuplestore_trim  - remove all no-longer-needed tuples
01227  *
01228  * Calling this function authorizes the tuplestore to delete all tuples
01229  * before the oldest read pointer, if no read pointer is marked as requiring
01230  * REWIND capability.
01231  *
01232  * Note: this is obviously safe if no pointer has BACKWARD capability either.
01233  * If a pointer is marked as BACKWARD but not REWIND capable, it means that
01234  * the pointer can be moved backward but not before the oldest other read
01235  * pointer.
01236  */
01237 void
01238 tuplestore_trim(Tuplestorestate *state)
01239 {
01240     int         oldest;
01241     int         nremove;
01242     int         i;
01243 
01244     /*
01245      * Truncation is disallowed if any read pointer requires rewind
01246      * capability.
01247      */
01248     if (state->eflags & EXEC_FLAG_REWIND)
01249         return;
01250 
01251     /*
01252      * We don't bother trimming temp files since it usually would mean more
01253      * work than just letting them sit in kernel buffers until they age out.
01254      */
01255     if (state->status != TSS_INMEM)
01256         return;
01257 
01258     /* Find the oldest read pointer */
01259     oldest = state->memtupcount;
01260     for (i = 0; i < state->readptrcount; i++)
01261     {
01262         if (!state->readptrs[i].eof_reached)
01263             oldest = Min(oldest, state->readptrs[i].current);
01264     }
01265 
01266     /*
01267      * Note: you might think we could remove all the tuples before the oldest
01268      * "current", since that one is the next to be returned.  However, since
01269      * tuplestore_gettuple returns a direct pointer to our internal copy of
01270      * the tuple, it's likely that the caller has still got the tuple just
01271      * before "current" referenced in a slot. So we keep one extra tuple
01272      * before the oldest "current".  (Strictly speaking, we could require such
01273      * callers to use the "copy" flag to tuplestore_gettupleslot, but for
01274      * efficiency we allow this one case to not use "copy".)
01275      */
01276     nremove = oldest - 1;
01277     if (nremove <= 0)
01278         return;                 /* nothing to do */
01279 
01280     Assert(nremove >= state->memtupdeleted);
01281     Assert(nremove <= state->memtupcount);
01282 
01283     /* Release no-longer-needed tuples */
01284     for (i = state->memtupdeleted; i < nremove; i++)
01285     {
01286         FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i]));
01287         pfree(state->memtuples[i]);
01288         state->memtuples[i] = NULL;
01289     }
01290     state->memtupdeleted = nremove;
01291 
01292     /* mark tuplestore as truncated (used for Assert crosschecks only) */
01293     state->truncated = true;
01294 
01295     /*
01296      * If nremove is less than 1/8th memtupcount, just stop here, leaving the
01297      * "deleted" slots as NULL.  This prevents us from expending O(N^2) time
01298      * repeatedly memmove-ing a large pointer array.  The worst case space
01299      * wastage is pretty small, since it's just pointers and not whole tuples.
01300      */
01301     if (nremove < state->memtupcount / 8)
01302         return;
01303 
01304     /*
01305      * Slide the array down and readjust pointers.
01306      *
01307      * In mergejoin's current usage, it's demonstrable that there will always
01308      * be exactly one non-removed tuple; so optimize that case.
01309      */
01310     if (nremove + 1 == state->memtupcount)
01311         state->memtuples[0] = state->memtuples[nremove];
01312     else
01313         memmove(state->memtuples, state->memtuples + nremove,
01314                 (state->memtupcount - nremove) * sizeof(void *));
01315 
01316     state->memtupdeleted = 0;
01317     state->memtupcount -= nremove;
01318     for (i = 0; i < state->readptrcount; i++)
01319     {
01320         if (!state->readptrs[i].eof_reached)
01321             state->readptrs[i].current -= nremove;
01322     }
01323 }
01324 
01325 /*
01326  * tuplestore_in_memory
01327  *
01328  * Returns true if the tuplestore has not spilled to disk.
01329  *
01330  * XXX exposing this is a violation of modularity ... should get rid of it.
01331  */
01332 bool
01333 tuplestore_in_memory(Tuplestorestate *state)
01334 {
01335     return (state->status == TSS_INMEM);
01336 }
01337 
01338 
01339 /*
01340  * Tape interface routines
01341  */
01342 
01343 static unsigned int
01344 getlen(Tuplestorestate *state, bool eofOK)
01345 {
01346     unsigned int len;
01347     size_t      nbytes;
01348 
01349     nbytes = BufFileRead(state->myfile, (void *) &len, sizeof(len));
01350     if (nbytes == sizeof(len))
01351         return len;
01352     if (nbytes != 0)
01353         elog(ERROR, "unexpected end of tape");
01354     if (!eofOK)
01355         elog(ERROR, "unexpected end of data");
01356     return 0;
01357 }
01358 
01359 
01360 /*
01361  * Routines specialized for HeapTuple case
01362  *
01363  * The stored form is actually a MinimalTuple, but for largely historical
01364  * reasons we allow COPYTUP to work from a HeapTuple.
01365  *
01366  * Since MinimalTuple already has length in its first word, we don't need
01367  * to write that separately.
01368  */
01369 
01370 static void *
01371 copytup_heap(Tuplestorestate *state, void *tup)
01372 {
01373     MinimalTuple tuple;
01374 
01375     tuple = minimal_tuple_from_heap_tuple((HeapTuple) tup);
01376     USEMEM(state, GetMemoryChunkSpace(tuple));
01377     return (void *) tuple;
01378 }
01379 
01380 static void
01381 writetup_heap(Tuplestorestate *state, void *tup)
01382 {
01383     MinimalTuple tuple = (MinimalTuple) tup;
01384 
01385     /* the part of the MinimalTuple we'll write: */
01386     char       *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
01387     unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET;
01388 
01389     /* total on-disk footprint: */
01390     unsigned int tuplen = tupbodylen + sizeof(int);
01391 
01392     if (BufFileWrite(state->myfile, (void *) &tuplen,
01393                      sizeof(tuplen)) != sizeof(tuplen))
01394         elog(ERROR, "write failed");
01395     if (BufFileWrite(state->myfile, (void *) tupbody,
01396                      tupbodylen) != (size_t) tupbodylen)
01397         elog(ERROR, "write failed");
01398     if (state->backward)        /* need trailing length word? */
01399         if (BufFileWrite(state->myfile, (void *) &tuplen,
01400                          sizeof(tuplen)) != sizeof(tuplen))
01401             elog(ERROR, "write failed");
01402 
01403     FREEMEM(state, GetMemoryChunkSpace(tuple));
01404     heap_free_minimal_tuple(tuple);
01405 }
01406 
01407 static void *
01408 readtup_heap(Tuplestorestate *state, unsigned int len)
01409 {
01410     unsigned int tupbodylen = len - sizeof(int);
01411     unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET;
01412     MinimalTuple tuple = (MinimalTuple) palloc(tuplen);
01413     char       *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
01414 
01415     USEMEM(state, GetMemoryChunkSpace(tuple));
01416     /* read in the tuple proper */
01417     tuple->t_len = tuplen;
01418     if (BufFileRead(state->myfile, (void *) tupbody,
01419                     tupbodylen) != (size_t) tupbodylen)
01420         elog(ERROR, "unexpected end of data");
01421     if (state->backward)        /* need trailing length word? */
01422         if (BufFileRead(state->myfile, (void *) &tuplen,
01423                         sizeof(tuplen)) != sizeof(tuplen))
01424             elog(ERROR, "unexpected end of data");
01425     return (void *) tuple;
01426 }