Header And Logo

PostgreSQL
| The world's most advanced open source database.

toasting.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * toasting.c
00004  *    This file contains routines to support creation of toast tables
00005  *
00006  *
00007  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00008  * Portions Copyright (c) 1994, Regents of the University of California
00009  *
00010  * IDENTIFICATION
00011  *    src/backend/catalog/toasting.c
00012  *
00013  *-------------------------------------------------------------------------
00014  */
00015 #include "postgres.h"
00016 
00017 #include "access/tuptoaster.h"
00018 #include "access/xact.h"
00019 #include "catalog/dependency.h"
00020 #include "catalog/heap.h"
00021 #include "catalog/index.h"
00022 #include "catalog/namespace.h"
00023 #include "catalog/pg_namespace.h"
00024 #include "catalog/pg_opclass.h"
00025 #include "catalog/pg_type.h"
00026 #include "catalog/toasting.h"
00027 #include "miscadmin.h"
00028 #include "nodes/makefuncs.h"
00029 #include "utils/builtins.h"
00030 #include "utils/rel.h"
00031 #include "utils/syscache.h"
00032 
00033 /* Potentially set by contrib/pg_upgrade_support functions */
00034 extern Oid  binary_upgrade_next_toast_pg_class_oid;
00035 
00036 Oid         binary_upgrade_next_toast_pg_type_oid = InvalidOid;
00037 
00038 static bool create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
00039                    Datum reloptions);
00040 static bool needs_toast_table(Relation rel);
00041 
00042 
00043 /*
00044  * AlterTableCreateToastTable
00045  *      If the table needs a toast table, and doesn't already have one,
00046  *      then create a toast table for it.
00047  *
00048  * reloptions for the toast table can be passed, too.  Pass (Datum) 0
00049  * for default reloptions.
00050  *
00051  * We expect the caller to have verified that the relation is a table and have
00052  * already done any necessary permission checks.  Callers expect this function
00053  * to end with CommandCounterIncrement if it makes any changes.
00054  */
00055 void
00056 AlterTableCreateToastTable(Oid relOid, Datum reloptions)
00057 {
00058     Relation    rel;
00059 
00060     /*
00061      * Grab an exclusive lock on the target table, since we'll update its
00062      * pg_class tuple. This is redundant for all present uses, since caller
00063      * will have such a lock already.  But the lock is needed to ensure that
00064      * concurrent readers of the pg_class tuple won't have visibility issues,
00065      * so let's be safe.
00066      */
00067     rel = heap_open(relOid, AccessExclusiveLock);
00068 
00069     /* create_toast_table does all the work */
00070     (void) create_toast_table(rel, InvalidOid, InvalidOid, reloptions);
00071 
00072     heap_close(rel, NoLock);
00073 }
00074 
00075 /*
00076  * Create a toast table during bootstrap
00077  *
00078  * Here we need to prespecify the OIDs of the toast table and its index
00079  */
00080 void
00081 BootstrapToastTable(char *relName, Oid toastOid, Oid toastIndexOid)
00082 {
00083     Relation    rel;
00084 
00085     rel = heap_openrv(makeRangeVar(NULL, relName, -1), AccessExclusiveLock);
00086 
00087     if (rel->rd_rel->relkind != RELKIND_RELATION &&
00088         rel->rd_rel->relkind != RELKIND_MATVIEW)
00089         ereport(ERROR,
00090                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
00091                  errmsg("\"%s\" is not a table or materialized view",
00092                         relName)));
00093 
00094     /* create_toast_table does all the work */
00095     if (!create_toast_table(rel, toastOid, toastIndexOid, (Datum) 0))
00096         elog(ERROR, "\"%s\" does not require a toast table",
00097              relName);
00098 
00099     heap_close(rel, NoLock);
00100 }
00101 
00102 
00103 /*
00104  * create_toast_table --- internal workhorse
00105  *
00106  * rel is already opened and locked
00107  * toastOid and toastIndexOid are normally InvalidOid, but during
00108  * bootstrap they can be nonzero to specify hand-assigned OIDs
00109  */
00110 static bool
00111 create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, Datum reloptions)
00112 {
00113     Oid         relOid = RelationGetRelid(rel);
00114     HeapTuple   reltup;
00115     TupleDesc   tupdesc;
00116     bool        shared_relation;
00117     bool        mapped_relation;
00118     Relation    toast_rel;
00119     Relation    class_rel;
00120     Oid         toast_relid;
00121     Oid         toast_typid = InvalidOid;
00122     Oid         namespaceid;
00123     char        toast_relname[NAMEDATALEN];
00124     char        toast_idxname[NAMEDATALEN];
00125     IndexInfo  *indexInfo;
00126     Oid         collationObjectId[2];
00127     Oid         classObjectId[2];
00128     int16       coloptions[2];
00129     ObjectAddress baseobject,
00130                 toastobject;
00131 
00132     /*
00133      * Toast table is shared if and only if its parent is.
00134      *
00135      * We cannot allow toasting a shared relation after initdb (because
00136      * there's no way to mark it toasted in other databases' pg_class).
00137      */
00138     shared_relation = rel->rd_rel->relisshared;
00139     if (shared_relation && !IsBootstrapProcessingMode())
00140         ereport(ERROR,
00141                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
00142                  errmsg("shared tables cannot be toasted after initdb")));
00143 
00144     /* It's mapped if and only if its parent is, too */
00145     mapped_relation = RelationIsMapped(rel);
00146 
00147     /*
00148      * Is it already toasted?
00149      */
00150     if (rel->rd_rel->reltoastrelid != InvalidOid)
00151         return false;
00152 
00153     /*
00154      * Check to see whether the table actually needs a TOAST table.
00155      *
00156      * If an update-in-place toast relfilenode is specified, force toast file
00157      * creation even if it seems not to need one.
00158      */
00159     if (!needs_toast_table(rel) &&
00160         (!IsBinaryUpgrade ||
00161          !OidIsValid(binary_upgrade_next_toast_pg_class_oid)))
00162         return false;
00163 
00164     /*
00165      * Create the toast table and its index
00166      */
00167     snprintf(toast_relname, sizeof(toast_relname),
00168              "pg_toast_%u", relOid);
00169     snprintf(toast_idxname, sizeof(toast_idxname),
00170              "pg_toast_%u_index", relOid);
00171 
00172     /* this is pretty painful...  need a tuple descriptor */
00173     tupdesc = CreateTemplateTupleDesc(3, false);
00174     TupleDescInitEntry(tupdesc, (AttrNumber) 1,
00175                        "chunk_id",
00176                        OIDOID,
00177                        -1, 0);
00178     TupleDescInitEntry(tupdesc, (AttrNumber) 2,
00179                        "chunk_seq",
00180                        INT4OID,
00181                        -1, 0);
00182     TupleDescInitEntry(tupdesc, (AttrNumber) 3,
00183                        "chunk_data",
00184                        BYTEAOID,
00185                        -1, 0);
00186 
00187     /*
00188      * Ensure that the toast table doesn't itself get toasted, or we'll be
00189      * toast :-(.  This is essential for chunk_data because type bytea is
00190      * toastable; hit the other two just to be sure.
00191      */
00192     tupdesc->attrs[0]->attstorage = 'p';
00193     tupdesc->attrs[1]->attstorage = 'p';
00194     tupdesc->attrs[2]->attstorage = 'p';
00195 
00196     /*
00197      * Toast tables for regular relations go in pg_toast; those for temp
00198      * relations go into the per-backend temp-toast-table namespace.
00199      */
00200     if (isTempOrToastNamespace(rel->rd_rel->relnamespace))
00201         namespaceid = GetTempToastNamespace();
00202     else
00203         namespaceid = PG_TOAST_NAMESPACE;
00204 
00205     /* Use binary-upgrade override for pg_type.oid, if supplied. */
00206     if (IsBinaryUpgrade && OidIsValid(binary_upgrade_next_toast_pg_type_oid))
00207     {
00208         toast_typid = binary_upgrade_next_toast_pg_type_oid;
00209         binary_upgrade_next_toast_pg_type_oid = InvalidOid;
00210     }
00211 
00212     toast_relid = heap_create_with_catalog(toast_relname,
00213                                            namespaceid,
00214                                            rel->rd_rel->reltablespace,
00215                                            toastOid,
00216                                            toast_typid,
00217                                            InvalidOid,
00218                                            rel->rd_rel->relowner,
00219                                            tupdesc,
00220                                            NIL,
00221                                            RELKIND_TOASTVALUE,
00222                                            rel->rd_rel->relpersistence,
00223                                            shared_relation,
00224                                            mapped_relation,
00225                                            true,
00226                                            0,
00227                                            ONCOMMIT_NOOP,
00228                                            reloptions,
00229                                            false,
00230                                            true,
00231                                            true);
00232     Assert(toast_relid != InvalidOid);
00233 
00234     /* make the toast relation visible, else heap_open will fail */
00235     CommandCounterIncrement();
00236 
00237     /* ShareLock is not really needed here, but take it anyway */
00238     toast_rel = heap_open(toast_relid, ShareLock);
00239 
00240     /*
00241      * Create unique index on chunk_id, chunk_seq.
00242      *
00243      * NOTE: the normal TOAST access routines could actually function with a
00244      * single-column index on chunk_id only. However, the slice access
00245      * routines use both columns for faster access to an individual chunk. In
00246      * addition, we want it to be unique as a check against the possibility of
00247      * duplicate TOAST chunk OIDs. The index might also be a little more
00248      * efficient this way, since btree isn't all that happy with large numbers
00249      * of equal keys.
00250      */
00251 
00252     indexInfo = makeNode(IndexInfo);
00253     indexInfo->ii_NumIndexAttrs = 2;
00254     indexInfo->ii_KeyAttrNumbers[0] = 1;
00255     indexInfo->ii_KeyAttrNumbers[1] = 2;
00256     indexInfo->ii_Expressions = NIL;
00257     indexInfo->ii_ExpressionsState = NIL;
00258     indexInfo->ii_Predicate = NIL;
00259     indexInfo->ii_PredicateState = NIL;
00260     indexInfo->ii_ExclusionOps = NULL;
00261     indexInfo->ii_ExclusionProcs = NULL;
00262     indexInfo->ii_ExclusionStrats = NULL;
00263     indexInfo->ii_Unique = true;
00264     indexInfo->ii_ReadyForInserts = true;
00265     indexInfo->ii_Concurrent = false;
00266     indexInfo->ii_BrokenHotChain = false;
00267 
00268     collationObjectId[0] = InvalidOid;
00269     collationObjectId[1] = InvalidOid;
00270 
00271     classObjectId[0] = OID_BTREE_OPS_OID;
00272     classObjectId[1] = INT4_BTREE_OPS_OID;
00273 
00274     coloptions[0] = 0;
00275     coloptions[1] = 0;
00276 
00277     index_create(toast_rel, toast_idxname, toastIndexOid, InvalidOid,
00278                  indexInfo,
00279                  list_make2("chunk_id", "chunk_seq"),
00280                  BTREE_AM_OID,
00281                  rel->rd_rel->reltablespace,
00282                  collationObjectId, classObjectId, coloptions, (Datum) 0,
00283                  true, false, false, false,
00284                  true, false, false, true);
00285 
00286     heap_close(toast_rel, NoLock);
00287 
00288     /*
00289      * Store the toast table's OID in the parent relation's pg_class row
00290      */
00291     class_rel = heap_open(RelationRelationId, RowExclusiveLock);
00292 
00293     reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relOid));
00294     if (!HeapTupleIsValid(reltup))
00295         elog(ERROR, "cache lookup failed for relation %u", relOid);
00296 
00297     ((Form_pg_class) GETSTRUCT(reltup))->reltoastrelid = toast_relid;
00298 
00299     if (!IsBootstrapProcessingMode())
00300     {
00301         /* normal case, use a transactional update */
00302         simple_heap_update(class_rel, &reltup->t_self, reltup);
00303 
00304         /* Keep catalog indexes current */
00305         CatalogUpdateIndexes(class_rel, reltup);
00306     }
00307     else
00308     {
00309         /* While bootstrapping, we cannot UPDATE, so overwrite in-place */
00310         heap_inplace_update(class_rel, reltup);
00311     }
00312 
00313     heap_freetuple(reltup);
00314 
00315     heap_close(class_rel, RowExclusiveLock);
00316 
00317     /*
00318      * Register dependency from the toast table to the master, so that the
00319      * toast table will be deleted if the master is.  Skip this in bootstrap
00320      * mode.
00321      */
00322     if (!IsBootstrapProcessingMode())
00323     {
00324         baseobject.classId = RelationRelationId;
00325         baseobject.objectId = relOid;
00326         baseobject.objectSubId = 0;
00327         toastobject.classId = RelationRelationId;
00328         toastobject.objectId = toast_relid;
00329         toastobject.objectSubId = 0;
00330 
00331         recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
00332     }
00333 
00334     /*
00335      * Make changes visible
00336      */
00337     CommandCounterIncrement();
00338 
00339     return true;
00340 }
00341 
00342 /*
00343  * Check to see whether the table needs a TOAST table.  It does only if
00344  * (1) there are any toastable attributes, and (2) the maximum length
00345  * of a tuple could exceed TOAST_TUPLE_THRESHOLD.  (We don't want to
00346  * create a toast table for something like "f1 varchar(20)".)
00347  */
00348 static bool
00349 needs_toast_table(Relation rel)
00350 {
00351     int32       data_length = 0;
00352     bool        maxlength_unknown = false;
00353     bool        has_toastable_attrs = false;
00354     TupleDesc   tupdesc;
00355     Form_pg_attribute *att;
00356     int32       tuple_length;
00357     int         i;
00358 
00359     tupdesc = rel->rd_att;
00360     att = tupdesc->attrs;
00361 
00362     for (i = 0; i < tupdesc->natts; i++)
00363     {
00364         if (att[i]->attisdropped)
00365             continue;
00366         data_length = att_align_nominal(data_length, att[i]->attalign);
00367         if (att[i]->attlen > 0)
00368         {
00369             /* Fixed-length types are never toastable */
00370             data_length += att[i]->attlen;
00371         }
00372         else
00373         {
00374             int32       maxlen = type_maximum_size(att[i]->atttypid,
00375                                                    att[i]->atttypmod);
00376 
00377             if (maxlen < 0)
00378                 maxlength_unknown = true;
00379             else
00380                 data_length += maxlen;
00381             if (att[i]->attstorage != 'p')
00382                 has_toastable_attrs = true;
00383         }
00384     }
00385     if (!has_toastable_attrs)
00386         return false;           /* nothing to toast? */
00387     if (maxlength_unknown)
00388         return true;            /* any unlimited-length attrs? */
00389     tuple_length = MAXALIGN(offsetof(HeapTupleHeaderData, t_bits) +
00390                             BITMAPLEN(tupdesc->natts)) +
00391         MAXALIGN(data_length);
00392     return (tuple_length > TOAST_TUPLE_THRESHOLD);
00393 }