Header And Logo

PostgreSQL
| The world's most advanced open source database.

index.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * index.c
00004  *    code to create and destroy POSTGRES index relations
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  * Portions Copyright (c) 1994, Regents of the University of California
00008  *
00009  *
00010  * IDENTIFICATION
00011  *    src/backend/catalog/index.c
00012  *
00013  *
00014  * INTERFACE ROUTINES
00015  *      index_create()          - Create a cataloged index relation
00016  *      index_drop()            - Removes index relation from catalogs
00017  *      BuildIndexInfo()        - Prepare to insert index tuples
00018  *      FormIndexDatum()        - Construct datum vector for one index tuple
00019  *
00020  *-------------------------------------------------------------------------
00021  */
00022 #include "postgres.h"
00023 
00024 #include <unistd.h>
00025 
00026 #include "access/multixact.h"
00027 #include "access/relscan.h"
00028 #include "access/sysattr.h"
00029 #include "access/transam.h"
00030 #include "access/visibilitymap.h"
00031 #include "access/xact.h"
00032 #include "bootstrap/bootstrap.h"
00033 #include "catalog/catalog.h"
00034 #include "catalog/dependency.h"
00035 #include "catalog/heap.h"
00036 #include "catalog/index.h"
00037 #include "catalog/objectaccess.h"
00038 #include "catalog/pg_collation.h"
00039 #include "catalog/pg_constraint.h"
00040 #include "catalog/pg_operator.h"
00041 #include "catalog/pg_opclass.h"
00042 #include "catalog/pg_tablespace.h"
00043 #include "catalog/pg_trigger.h"
00044 #include "catalog/pg_type.h"
00045 #include "catalog/storage.h"
00046 #include "commands/tablecmds.h"
00047 #include "commands/trigger.h"
00048 #include "executor/executor.h"
00049 #include "miscadmin.h"
00050 #include "nodes/makefuncs.h"
00051 #include "nodes/nodeFuncs.h"
00052 #include "optimizer/clauses.h"
00053 #include "parser/parser.h"
00054 #include "storage/bufmgr.h"
00055 #include "storage/lmgr.h"
00056 #include "storage/predicate.h"
00057 #include "storage/procarray.h"
00058 #include "storage/smgr.h"
00059 #include "utils/builtins.h"
00060 #include "utils/fmgroids.h"
00061 #include "utils/guc.h"
00062 #include "utils/inval.h"
00063 #include "utils/lsyscache.h"
00064 #include "utils/memutils.h"
00065 #include "utils/syscache.h"
00066 #include "utils/tuplesort.h"
00067 #include "utils/snapmgr.h"
00068 #include "utils/tqual.h"
00069 
00070 
00071 /* Potentially set by contrib/pg_upgrade_support functions */
00072 Oid         binary_upgrade_next_index_pg_class_oid = InvalidOid;
00073 
00074 /* state info for validate_index bulkdelete callback */
00075 typedef struct
00076 {
00077     Tuplesortstate *tuplesort;  /* for sorting the index TIDs */
00078     /* statistics (for debug purposes only): */
00079     double      htups,
00080                 itups,
00081                 tups_inserted;
00082 } v_i_state;
00083 
00084 /* non-export function prototypes */
00085 static bool relationHasPrimaryKey(Relation rel);
00086 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
00087                          IndexInfo *indexInfo,
00088                          List *indexColNames,
00089                          Oid accessMethodObjectId,
00090                          Oid *collationObjectId,
00091                          Oid *classObjectId);
00092 static void InitializeAttributeOids(Relation indexRelation,
00093                         int numatts, Oid indexoid);
00094 static void AppendAttributeTuples(Relation indexRelation, int numatts);
00095 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
00096                     IndexInfo *indexInfo,
00097                     Oid *collationOids,
00098                     Oid *classOids,
00099                     int16 *coloptions,
00100                     bool primary,
00101                     bool isexclusion,
00102                     bool immediate,
00103                     bool isvalid);
00104 static void index_update_stats(Relation rel,
00105                    bool hasindex, bool isprimary,
00106                    Oid reltoastidxid, double reltuples);
00107 static void IndexCheckExclusion(Relation heapRelation,
00108                     Relation indexRelation,
00109                     IndexInfo *indexInfo);
00110 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
00111 static void validate_index_heapscan(Relation heapRelation,
00112                         Relation indexRelation,
00113                         IndexInfo *indexInfo,
00114                         Snapshot snapshot,
00115                         v_i_state *state);
00116 static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
00117 static void SetReindexProcessing(Oid heapOid, Oid indexOid);
00118 static void ResetReindexProcessing(void);
00119 static void SetReindexPending(List *indexes);
00120 static void RemoveReindexPending(Oid indexOid);
00121 static void ResetReindexPending(void);
00122 
00123 
00124 /*
00125  * relationHasPrimaryKey
00126  *      See whether an existing relation has a primary key.
00127  *
00128  * Caller must have suitable lock on the relation.
00129  *
00130  * Note: we intentionally do not check IndexIsValid here; that's because this
00131  * is used to enforce the rule that there can be only one indisprimary index,
00132  * and we want that to be true even if said index is invalid.
00133  */
00134 static bool
00135 relationHasPrimaryKey(Relation rel)
00136 {
00137     bool        result = false;
00138     List       *indexoidlist;
00139     ListCell   *indexoidscan;
00140 
00141     /*
00142      * Get the list of index OIDs for the table from the relcache, and look up
00143      * each one in the pg_index syscache until we find one marked primary key
00144      * (hopefully there isn't more than one such).
00145      */
00146     indexoidlist = RelationGetIndexList(rel);
00147 
00148     foreach(indexoidscan, indexoidlist)
00149     {
00150         Oid         indexoid = lfirst_oid(indexoidscan);
00151         HeapTuple   indexTuple;
00152 
00153         indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
00154         if (!HeapTupleIsValid(indexTuple))      /* should not happen */
00155             elog(ERROR, "cache lookup failed for index %u", indexoid);
00156         result = ((Form_pg_index) GETSTRUCT(indexTuple))->indisprimary;
00157         ReleaseSysCache(indexTuple);
00158         if (result)
00159             break;
00160     }
00161 
00162     list_free(indexoidlist);
00163 
00164     return result;
00165 }
00166 
00167 /*
00168  * index_check_primary_key
00169  *      Apply special checks needed before creating a PRIMARY KEY index
00170  *
00171  * This processing used to be in DefineIndex(), but has been split out
00172  * so that it can be applied during ALTER TABLE ADD PRIMARY KEY USING INDEX.
00173  *
00174  * We check for a pre-existing primary key, and that all columns of the index
00175  * are simple column references (not expressions), and that all those
00176  * columns are marked NOT NULL.  If they aren't (which can only happen during
00177  * ALTER TABLE ADD CONSTRAINT, since the parser forces such columns to be
00178  * created NOT NULL during CREATE TABLE), do an ALTER SET NOT NULL to mark
00179  * them so --- or fail if they are not in fact nonnull.
00180  *
00181  * Caller had better have at least ShareLock on the table, else the not-null
00182  * checking isn't trustworthy.
00183  */
00184 void
00185 index_check_primary_key(Relation heapRel,
00186                         IndexInfo *indexInfo,
00187                         bool is_alter_table)
00188 {
00189     List       *cmds;
00190     int         i;
00191 
00192     /*
00193      * If ALTER TABLE, check that there isn't already a PRIMARY KEY. In CREATE
00194      * TABLE, we have faith that the parser rejected multiple pkey clauses;
00195      * and CREATE INDEX doesn't have a way to say PRIMARY KEY, so it's no
00196      * problem either.
00197      */
00198     if (is_alter_table &&
00199         relationHasPrimaryKey(heapRel))
00200     {
00201         ereport(ERROR,
00202                 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
00203              errmsg("multiple primary keys for table \"%s\" are not allowed",
00204                     RelationGetRelationName(heapRel))));
00205     }
00206 
00207     /*
00208      * Check that all of the attributes in a primary key are marked as not
00209      * null, otherwise attempt to ALTER TABLE .. SET NOT NULL
00210      */
00211     cmds = NIL;
00212     for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
00213     {
00214         AttrNumber  attnum = indexInfo->ii_KeyAttrNumbers[i];
00215         HeapTuple   atttuple;
00216         Form_pg_attribute attform;
00217 
00218         if (attnum == 0)
00219             ereport(ERROR,
00220                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
00221                      errmsg("primary keys cannot be expressions")));
00222 
00223         /* System attributes are never null, so no need to check */
00224         if (attnum < 0)
00225             continue;
00226 
00227         atttuple = SearchSysCache2(ATTNUM,
00228                                  ObjectIdGetDatum(RelationGetRelid(heapRel)),
00229                                    Int16GetDatum(attnum));
00230         if (!HeapTupleIsValid(atttuple))
00231             elog(ERROR, "cache lookup failed for attribute %d of relation %u",
00232                  attnum, RelationGetRelid(heapRel));
00233         attform = (Form_pg_attribute) GETSTRUCT(atttuple);
00234 
00235         if (!attform->attnotnull)
00236         {
00237             /* Add a subcommand to make this one NOT NULL */
00238             AlterTableCmd *cmd = makeNode(AlterTableCmd);
00239 
00240             cmd->subtype = AT_SetNotNull;
00241             cmd->name = pstrdup(NameStr(attform->attname));
00242             cmds = lappend(cmds, cmd);
00243         }
00244 
00245         ReleaseSysCache(atttuple);
00246     }
00247 
00248     /*
00249      * XXX: Shouldn't the ALTER TABLE .. SET NOT NULL cascade to child tables?
00250      * Currently, since the PRIMARY KEY itself doesn't cascade, we don't
00251      * cascade the notnull constraint(s) either; but this is pretty debatable.
00252      *
00253      * XXX: possible future improvement: when being called from ALTER TABLE,
00254      * it would be more efficient to merge this with the outer ALTER TABLE, so
00255      * as to avoid two scans.  But that seems to complicate DefineIndex's API
00256      * unduly.
00257      */
00258     if (cmds)
00259         AlterTableInternal(RelationGetRelid(heapRel), cmds, false);
00260 }
00261 
00262 /*
00263  *      ConstructTupleDescriptor
00264  *
00265  * Build an index tuple descriptor for a new index
00266  */
00267 static TupleDesc
00268 ConstructTupleDescriptor(Relation heapRelation,
00269                          IndexInfo *indexInfo,
00270                          List *indexColNames,
00271                          Oid accessMethodObjectId,
00272                          Oid *collationObjectId,
00273                          Oid *classObjectId)
00274 {
00275     int         numatts = indexInfo->ii_NumIndexAttrs;
00276     ListCell   *colnames_item = list_head(indexColNames);
00277     ListCell   *indexpr_item = list_head(indexInfo->ii_Expressions);
00278     HeapTuple   amtuple;
00279     Form_pg_am  amform;
00280     TupleDesc   heapTupDesc;
00281     TupleDesc   indexTupDesc;
00282     int         natts;          /* #atts in heap rel --- for error checks */
00283     int         i;
00284 
00285     /* We need access to the index AM's pg_am tuple */
00286     amtuple = SearchSysCache1(AMOID,
00287                               ObjectIdGetDatum(accessMethodObjectId));
00288     if (!HeapTupleIsValid(amtuple))
00289         elog(ERROR, "cache lookup failed for access method %u",
00290              accessMethodObjectId);
00291     amform = (Form_pg_am) GETSTRUCT(amtuple);
00292 
00293     /* ... and to the table's tuple descriptor */
00294     heapTupDesc = RelationGetDescr(heapRelation);
00295     natts = RelationGetForm(heapRelation)->relnatts;
00296 
00297     /*
00298      * allocate the new tuple descriptor
00299      */
00300     indexTupDesc = CreateTemplateTupleDesc(numatts, false);
00301 
00302     /*
00303      * For simple index columns, we copy the pg_attribute row from the parent
00304      * relation and modify it as necessary.  For expressions we have to cons
00305      * up a pg_attribute row the hard way.
00306      */
00307     for (i = 0; i < numatts; i++)
00308     {
00309         AttrNumber  atnum = indexInfo->ii_KeyAttrNumbers[i];
00310         Form_pg_attribute to = indexTupDesc->attrs[i];
00311         HeapTuple   tuple;
00312         Form_pg_type typeTup;
00313         Form_pg_opclass opclassTup;
00314         Oid         keyType;
00315 
00316         if (atnum != 0)
00317         {
00318             /* Simple index column */
00319             Form_pg_attribute from;
00320 
00321             if (atnum < 0)
00322             {
00323                 /*
00324                  * here we are indexing on a system attribute (-1...-n)
00325                  */
00326                 from = SystemAttributeDefinition(atnum,
00327                                            heapRelation->rd_rel->relhasoids);
00328             }
00329             else
00330             {
00331                 /*
00332                  * here we are indexing on a normal attribute (1...n)
00333                  */
00334                 if (atnum > natts)      /* safety check */
00335                     elog(ERROR, "invalid column number %d", atnum);
00336                 from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
00337             }
00338 
00339             /*
00340              * now that we've determined the "from", let's copy the tuple desc
00341              * data...
00342              */
00343             memcpy(to, from, ATTRIBUTE_FIXED_PART_SIZE);
00344 
00345             /*
00346              * Fix the stuff that should not be the same as the underlying
00347              * attr
00348              */
00349             to->attnum = i + 1;
00350 
00351             to->attstattarget = -1;
00352             to->attcacheoff = -1;
00353             to->attnotnull = false;
00354             to->atthasdef = false;
00355             to->attislocal = true;
00356             to->attinhcount = 0;
00357             to->attcollation = collationObjectId[i];
00358         }
00359         else
00360         {
00361             /* Expressional index */
00362             Node       *indexkey;
00363 
00364             MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
00365 
00366             if (indexpr_item == NULL)   /* shouldn't happen */
00367                 elog(ERROR, "too few entries in indexprs list");
00368             indexkey = (Node *) lfirst(indexpr_item);
00369             indexpr_item = lnext(indexpr_item);
00370 
00371             /*
00372              * Lookup the expression type in pg_type for the type length etc.
00373              */
00374             keyType = exprType(indexkey);
00375             tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
00376             if (!HeapTupleIsValid(tuple))
00377                 elog(ERROR, "cache lookup failed for type %u", keyType);
00378             typeTup = (Form_pg_type) GETSTRUCT(tuple);
00379 
00380             /*
00381              * Assign some of the attributes values. Leave the rest as 0.
00382              */
00383             to->attnum = i + 1;
00384             to->atttypid = keyType;
00385             to->attlen = typeTup->typlen;
00386             to->attbyval = typeTup->typbyval;
00387             to->attstorage = typeTup->typstorage;
00388             to->attalign = typeTup->typalign;
00389             to->attstattarget = -1;
00390             to->attcacheoff = -1;
00391             to->atttypmod = -1;
00392             to->attislocal = true;
00393             to->attcollation = collationObjectId[i];
00394 
00395             ReleaseSysCache(tuple);
00396 
00397             /*
00398              * Make sure the expression yields a type that's safe to store in
00399              * an index.  We need this defense because we have index opclasses
00400              * for pseudo-types such as "record", and the actually stored type
00401              * had better be safe; eg, a named composite type is okay, an
00402              * anonymous record type is not.  The test is the same as for
00403              * whether a table column is of a safe type (which is why we
00404              * needn't check for the non-expression case).
00405              */
00406             CheckAttributeType(NameStr(to->attname),
00407                                to->atttypid, to->attcollation,
00408                                NIL, false);
00409         }
00410 
00411         /*
00412          * We do not yet have the correct relation OID for the index, so just
00413          * set it invalid for now.  InitializeAttributeOids() will fix it
00414          * later.
00415          */
00416         to->attrelid = InvalidOid;
00417 
00418         /*
00419          * Set the attribute name as specified by caller.
00420          */
00421         if (colnames_item == NULL)      /* shouldn't happen */
00422             elog(ERROR, "too few entries in colnames list");
00423         namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
00424         colnames_item = lnext(colnames_item);
00425 
00426         /*
00427          * Check the opclass and index AM to see if either provides a keytype
00428          * (overriding the attribute type).  Opclass takes precedence.
00429          */
00430         tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i]));
00431         if (!HeapTupleIsValid(tuple))
00432             elog(ERROR, "cache lookup failed for opclass %u",
00433                  classObjectId[i]);
00434         opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
00435         if (OidIsValid(opclassTup->opckeytype))
00436             keyType = opclassTup->opckeytype;
00437         else
00438             keyType = amform->amkeytype;
00439         ReleaseSysCache(tuple);
00440 
00441         if (OidIsValid(keyType) && keyType != to->atttypid)
00442         {
00443             /* index value and heap value have different types */
00444             tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
00445             if (!HeapTupleIsValid(tuple))
00446                 elog(ERROR, "cache lookup failed for type %u", keyType);
00447             typeTup = (Form_pg_type) GETSTRUCT(tuple);
00448 
00449             to->atttypid = keyType;
00450             to->atttypmod = -1;
00451             to->attlen = typeTup->typlen;
00452             to->attbyval = typeTup->typbyval;
00453             to->attalign = typeTup->typalign;
00454             to->attstorage = typeTup->typstorage;
00455 
00456             ReleaseSysCache(tuple);
00457         }
00458     }
00459 
00460     ReleaseSysCache(amtuple);
00461 
00462     return indexTupDesc;
00463 }
00464 
00465 /* ----------------------------------------------------------------
00466  *      InitializeAttributeOids
00467  * ----------------------------------------------------------------
00468  */
00469 static void
00470 InitializeAttributeOids(Relation indexRelation,
00471                         int numatts,
00472                         Oid indexoid)
00473 {
00474     TupleDesc   tupleDescriptor;
00475     int         i;
00476 
00477     tupleDescriptor = RelationGetDescr(indexRelation);
00478 
00479     for (i = 0; i < numatts; i += 1)
00480         tupleDescriptor->attrs[i]->attrelid = indexoid;
00481 }
00482 
00483 /* ----------------------------------------------------------------
00484  *      AppendAttributeTuples
00485  * ----------------------------------------------------------------
00486  */
00487 static void
00488 AppendAttributeTuples(Relation indexRelation, int numatts)
00489 {
00490     Relation    pg_attribute;
00491     CatalogIndexState indstate;
00492     TupleDesc   indexTupDesc;
00493     int         i;
00494 
00495     /*
00496      * open the attribute relation and its indexes
00497      */
00498     pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
00499 
00500     indstate = CatalogOpenIndexes(pg_attribute);
00501 
00502     /*
00503      * insert data from new index's tupdesc into pg_attribute
00504      */
00505     indexTupDesc = RelationGetDescr(indexRelation);
00506 
00507     for (i = 0; i < numatts; i++)
00508     {
00509         /*
00510          * There used to be very grotty code here to set these fields, but I
00511          * think it's unnecessary.  They should be set already.
00512          */
00513         Assert(indexTupDesc->attrs[i]->attnum == i + 1);
00514         Assert(indexTupDesc->attrs[i]->attcacheoff == -1);
00515 
00516         InsertPgAttributeTuple(pg_attribute, indexTupDesc->attrs[i], indstate);
00517     }
00518 
00519     CatalogCloseIndexes(indstate);
00520 
00521     heap_close(pg_attribute, RowExclusiveLock);
00522 }
00523 
00524 /* ----------------------------------------------------------------
00525  *      UpdateIndexRelation
00526  *
00527  * Construct and insert a new entry in the pg_index catalog
00528  * ----------------------------------------------------------------
00529  */
00530 static void
00531 UpdateIndexRelation(Oid indexoid,
00532                     Oid heapoid,
00533                     IndexInfo *indexInfo,
00534                     Oid *collationOids,
00535                     Oid *classOids,
00536                     int16 *coloptions,
00537                     bool primary,
00538                     bool isexclusion,
00539                     bool immediate,
00540                     bool isvalid)
00541 {
00542     int2vector *indkey;
00543     oidvector  *indcollation;
00544     oidvector  *indclass;
00545     int2vector *indoption;
00546     Datum       exprsDatum;
00547     Datum       predDatum;
00548     Datum       values[Natts_pg_index];
00549     bool        nulls[Natts_pg_index];
00550     Relation    pg_index;
00551     HeapTuple   tuple;
00552     int         i;
00553 
00554     /*
00555      * Copy the index key, opclass, and indoption info into arrays (should we
00556      * make the caller pass them like this to start with?)
00557      */
00558     indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
00559     for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
00560         indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];
00561     indcollation = buildoidvector(collationOids, indexInfo->ii_NumIndexAttrs);
00562     indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
00563     indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexAttrs);
00564 
00565     /*
00566      * Convert the index expressions (if any) to a text datum
00567      */
00568     if (indexInfo->ii_Expressions != NIL)
00569     {
00570         char       *exprsString;
00571 
00572         exprsString = nodeToString(indexInfo->ii_Expressions);
00573         exprsDatum = CStringGetTextDatum(exprsString);
00574         pfree(exprsString);
00575     }
00576     else
00577         exprsDatum = (Datum) 0;
00578 
00579     /*
00580      * Convert the index predicate (if any) to a text datum.  Note we convert
00581      * implicit-AND format to normal explicit-AND for storage.
00582      */
00583     if (indexInfo->ii_Predicate != NIL)
00584     {
00585         char       *predString;
00586 
00587         predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
00588         predDatum = CStringGetTextDatum(predString);
00589         pfree(predString);
00590     }
00591     else
00592         predDatum = (Datum) 0;
00593 
00594     /*
00595      * open the system catalog index relation
00596      */
00597     pg_index = heap_open(IndexRelationId, RowExclusiveLock);
00598 
00599     /*
00600      * Build a pg_index tuple
00601      */
00602     MemSet(nulls, false, sizeof(nulls));
00603 
00604     values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
00605     values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
00606     values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
00607     values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
00608     values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
00609     values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
00610     values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
00611     values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
00612     values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
00613     values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
00614     /* we set isvalid and isready the same way */
00615     values[Anum_pg_index_indisready - 1] = BoolGetDatum(isvalid);
00616     values[Anum_pg_index_indislive - 1] = BoolGetDatum(true);
00617     values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
00618     values[Anum_pg_index_indcollation - 1] = PointerGetDatum(indcollation);
00619     values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
00620     values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
00621     values[Anum_pg_index_indexprs - 1] = exprsDatum;
00622     if (exprsDatum == (Datum) 0)
00623         nulls[Anum_pg_index_indexprs - 1] = true;
00624     values[Anum_pg_index_indpred - 1] = predDatum;
00625     if (predDatum == (Datum) 0)
00626         nulls[Anum_pg_index_indpred - 1] = true;
00627 
00628     tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
00629 
00630     /*
00631      * insert the tuple into the pg_index catalog
00632      */
00633     simple_heap_insert(pg_index, tuple);
00634 
00635     /* update the indexes on pg_index */
00636     CatalogUpdateIndexes(pg_index, tuple);
00637 
00638     /*
00639      * close the relation and free the tuple
00640      */
00641     heap_close(pg_index, RowExclusiveLock);
00642     heap_freetuple(tuple);
00643 }
00644 
00645 
00646 /*
00647  * index_create
00648  *
00649  * heapRelation: table to build index on (suitably locked by caller)
00650  * indexRelationName: what it say
00651  * indexRelationId: normally, pass InvalidOid to let this routine
00652  *      generate an OID for the index.  During bootstrap this may be
00653  *      nonzero to specify a preselected OID.
00654  * relFileNode: normally, pass InvalidOid to get new storage.  May be
00655  *      nonzero to attach an existing valid build.
00656  * indexInfo: same info executor uses to insert into the index
00657  * indexColNames: column names to use for index (List of char *)
00658  * accessMethodObjectId: OID of index AM to use
00659  * tableSpaceId: OID of tablespace to use
00660  * collationObjectId: array of collation OIDs, one per index column
00661  * classObjectId: array of index opclass OIDs, one per index column
00662  * coloptions: array of per-index-column indoption settings
00663  * reloptions: AM-specific options
00664  * isprimary: index is a PRIMARY KEY
00665  * isconstraint: index is owned by PRIMARY KEY, UNIQUE, or EXCLUSION constraint
00666  * deferrable: constraint is DEFERRABLE
00667  * initdeferred: constraint is INITIALLY DEFERRED
00668  * allow_system_table_mods: allow table to be a system catalog
00669  * skip_build: true to skip the index_build() step for the moment; caller
00670  *      must do it later (typically via reindex_index())
00671  * concurrent: if true, do not lock the table against writers.  The index
00672  *      will be marked "invalid" and the caller must take additional steps
00673  *      to fix it up.
00674  * is_internal: if true, post creation hook for new index
00675  *
00676  * Returns the OID of the created index.
00677  */
00678 Oid
00679 index_create(Relation heapRelation,
00680              const char *indexRelationName,
00681              Oid indexRelationId,
00682              Oid relFileNode,
00683              IndexInfo *indexInfo,
00684              List *indexColNames,
00685              Oid accessMethodObjectId,
00686              Oid tableSpaceId,
00687              Oid *collationObjectId,
00688              Oid *classObjectId,
00689              int16 *coloptions,
00690              Datum reloptions,
00691              bool isprimary,
00692              bool isconstraint,
00693              bool deferrable,
00694              bool initdeferred,
00695              bool allow_system_table_mods,
00696              bool skip_build,
00697              bool concurrent,
00698              bool is_internal)
00699 {
00700     Oid         heapRelationId = RelationGetRelid(heapRelation);
00701     Relation    pg_class;
00702     Relation    indexRelation;
00703     TupleDesc   indexTupDesc;
00704     bool        shared_relation;
00705     bool        mapped_relation;
00706     bool        is_exclusion;
00707     Oid         namespaceId;
00708     int         i;
00709     char        relpersistence;
00710 
00711     is_exclusion = (indexInfo->ii_ExclusionOps != NULL);
00712 
00713     pg_class = heap_open(RelationRelationId, RowExclusiveLock);
00714 
00715     /*
00716      * The index will be in the same namespace as its parent table, and is
00717      * shared across databases if and only if the parent is.  Likewise, it
00718      * will use the relfilenode map if and only if the parent does; and it
00719      * inherits the parent's relpersistence.
00720      */
00721     namespaceId = RelationGetNamespace(heapRelation);
00722     shared_relation = heapRelation->rd_rel->relisshared;
00723     mapped_relation = RelationIsMapped(heapRelation);
00724     relpersistence = heapRelation->rd_rel->relpersistence;
00725 
00726     /*
00727      * check parameters
00728      */
00729     if (indexInfo->ii_NumIndexAttrs < 1)
00730         elog(ERROR, "must index at least one column");
00731 
00732     if (!allow_system_table_mods &&
00733         IsSystemRelation(heapRelation) &&
00734         IsNormalProcessingMode())
00735         ereport(ERROR,
00736                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
00737                  errmsg("user-defined indexes on system catalog tables are not supported")));
00738 
00739     /*
00740      * concurrent index build on a system catalog is unsafe because we tend to
00741      * release locks before committing in catalogs
00742      */
00743     if (concurrent &&
00744         IsSystemRelation(heapRelation))
00745         ereport(ERROR,
00746                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
00747                  errmsg("concurrent index creation on system catalog tables is not supported")));
00748 
00749     /*
00750      * This case is currently not supported, but there's no way to ask for it
00751      * in the grammar anyway, so it can't happen.
00752      */
00753     if (concurrent && is_exclusion)
00754         ereport(ERROR,
00755                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
00756                  errmsg_internal("concurrent index creation for exclusion constraints is not supported")));
00757 
00758     /*
00759      * We cannot allow indexing a shared relation after initdb (because
00760      * there's no way to make the entry in other databases' pg_class).
00761      */
00762     if (shared_relation && !IsBootstrapProcessingMode())
00763         ereport(ERROR,
00764                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
00765                  errmsg("shared indexes cannot be created after initdb")));
00766 
00767     /*
00768      * Shared relations must be in pg_global, too (last-ditch check)
00769      */
00770     if (shared_relation && tableSpaceId != GLOBALTABLESPACE_OID)
00771         elog(ERROR, "shared relations must be placed in pg_global tablespace");
00772 
00773     if (get_relname_relid(indexRelationName, namespaceId))
00774         ereport(ERROR,
00775                 (errcode(ERRCODE_DUPLICATE_TABLE),
00776                  errmsg("relation \"%s\" already exists",
00777                         indexRelationName)));
00778 
00779     /*
00780      * construct tuple descriptor for index tuples
00781      */
00782     indexTupDesc = ConstructTupleDescriptor(heapRelation,
00783                                             indexInfo,
00784                                             indexColNames,
00785                                             accessMethodObjectId,
00786                                             collationObjectId,
00787                                             classObjectId);
00788 
00789     /*
00790      * Allocate an OID for the index, unless we were told what to use.
00791      *
00792      * The OID will be the relfilenode as well, so make sure it doesn't
00793      * collide with either pg_class OIDs or existing physical files.
00794      */
00795     if (!OidIsValid(indexRelationId))
00796     {
00797         /*
00798          * Use binary-upgrade override for pg_class.oid/relfilenode, if
00799          * supplied.
00800          */
00801         if (IsBinaryUpgrade &&
00802             OidIsValid(binary_upgrade_next_index_pg_class_oid))
00803         {
00804             indexRelationId = binary_upgrade_next_index_pg_class_oid;
00805             binary_upgrade_next_index_pg_class_oid = InvalidOid;
00806         }
00807         else
00808         {
00809             indexRelationId =
00810                 GetNewRelFileNode(tableSpaceId, pg_class, relpersistence);
00811         }
00812     }
00813 
00814     /*
00815      * create the index relation's relcache entry and physical disk file. (If
00816      * we fail further down, it's the smgr's responsibility to remove the disk
00817      * file again.)
00818      */
00819     indexRelation = heap_create(indexRelationName,
00820                                 namespaceId,
00821                                 tableSpaceId,
00822                                 indexRelationId,
00823                                 relFileNode,
00824                                 indexTupDesc,
00825                                 RELKIND_INDEX,
00826                                 relpersistence,
00827                                 shared_relation,
00828                                 mapped_relation);
00829 
00830     Assert(indexRelationId == RelationGetRelid(indexRelation));
00831 
00832     /*
00833      * Obtain exclusive lock on it.  Although no other backends can see it
00834      * until we commit, this prevents deadlock-risk complaints from lock
00835      * manager in cases such as CLUSTER.
00836      */
00837     LockRelation(indexRelation, AccessExclusiveLock);
00838 
00839     /*
00840      * Fill in fields of the index's pg_class entry that are not set correctly
00841      * by heap_create.
00842      *
00843      * XXX should have a cleaner way to create cataloged indexes
00844      */
00845     indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
00846     indexRelation->rd_rel->relam = accessMethodObjectId;
00847     indexRelation->rd_rel->relhasoids = false;
00848 
00849     /*
00850      * store index's pg_class entry
00851      */
00852     InsertPgClassTuple(pg_class, indexRelation,
00853                        RelationGetRelid(indexRelation),
00854                        (Datum) 0,
00855                        reloptions);
00856 
00857     /* done with pg_class */
00858     heap_close(pg_class, RowExclusiveLock);
00859 
00860     /*
00861      * now update the object id's of all the attribute tuple forms in the
00862      * index relation's tuple descriptor
00863      */
00864     InitializeAttributeOids(indexRelation,
00865                             indexInfo->ii_NumIndexAttrs,
00866                             indexRelationId);
00867 
00868     /*
00869      * append ATTRIBUTE tuples for the index
00870      */
00871     AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
00872 
00873     /* ----------------
00874      *    update pg_index
00875      *    (append INDEX tuple)
00876      *
00877      *    Note that this stows away a representation of "predicate".
00878      *    (Or, could define a rule to maintain the predicate) --Nels, Feb '92
00879      * ----------------
00880      */
00881     UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
00882                         collationObjectId, classObjectId, coloptions,
00883                         isprimary, is_exclusion,
00884                         !deferrable,
00885                         !concurrent);
00886 
00887     /*
00888      * Register constraint and dependencies for the index.
00889      *
00890      * If the index is from a CONSTRAINT clause, construct a pg_constraint
00891      * entry.  The index will be linked to the constraint, which in turn is
00892      * linked to the table.  If it's not a CONSTRAINT, we need to make a
00893      * dependency directly on the table.
00894      *
00895      * We don't need a dependency on the namespace, because there'll be an
00896      * indirect dependency via our parent table.
00897      *
00898      * During bootstrap we can't register any dependencies, and we don't try
00899      * to make a constraint either.
00900      */
00901     if (!IsBootstrapProcessingMode())
00902     {
00903         ObjectAddress myself,
00904                     referenced;
00905 
00906         myself.classId = RelationRelationId;
00907         myself.objectId = indexRelationId;
00908         myself.objectSubId = 0;
00909 
00910         if (isconstraint)
00911         {
00912             char        constraintType;
00913 
00914             if (isprimary)
00915                 constraintType = CONSTRAINT_PRIMARY;
00916             else if (indexInfo->ii_Unique)
00917                 constraintType = CONSTRAINT_UNIQUE;
00918             else if (is_exclusion)
00919                 constraintType = CONSTRAINT_EXCLUSION;
00920             else
00921             {
00922                 elog(ERROR, "constraint must be PRIMARY, UNIQUE or EXCLUDE");
00923                 constraintType = 0;     /* keep compiler quiet */
00924             }
00925 
00926             index_constraint_create(heapRelation,
00927                                     indexRelationId,
00928                                     indexInfo,
00929                                     indexRelationName,
00930                                     constraintType,
00931                                     deferrable,
00932                                     initdeferred,
00933                                     false,      /* already marked primary */
00934                                     false,      /* pg_index entry is OK */
00935                                     false,      /* no old dependencies */
00936                                     allow_system_table_mods,
00937                                     is_internal);
00938         }
00939         else
00940         {
00941             bool        have_simple_col = false;
00942 
00943             /* Create auto dependencies on simply-referenced columns */
00944             for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
00945             {
00946                 if (indexInfo->ii_KeyAttrNumbers[i] != 0)
00947                 {
00948                     referenced.classId = RelationRelationId;
00949                     referenced.objectId = heapRelationId;
00950                     referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];
00951 
00952                     recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
00953 
00954                     have_simple_col = true;
00955                 }
00956             }
00957 
00958             /*
00959              * If there are no simply-referenced columns, give the index an
00960              * auto dependency on the whole table.  In most cases, this will
00961              * be redundant, but it might not be if the index expressions and
00962              * predicate contain no Vars or only whole-row Vars.
00963              */
00964             if (!have_simple_col)
00965             {
00966                 referenced.classId = RelationRelationId;
00967                 referenced.objectId = heapRelationId;
00968                 referenced.objectSubId = 0;
00969 
00970                 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
00971             }
00972 
00973             /* Non-constraint indexes can't be deferrable */
00974             Assert(!deferrable);
00975             Assert(!initdeferred);
00976         }
00977 
00978         /* Store dependency on collations */
00979         /* The default collation is pinned, so don't bother recording it */
00980         for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
00981         {
00982             if (OidIsValid(collationObjectId[i]) &&
00983                 collationObjectId[i] != DEFAULT_COLLATION_OID)
00984             {
00985                 referenced.classId = CollationRelationId;
00986                 referenced.objectId = collationObjectId[i];
00987                 referenced.objectSubId = 0;
00988 
00989                 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
00990             }
00991         }
00992 
00993         /* Store dependency on operator classes */
00994         for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
00995         {
00996             referenced.classId = OperatorClassRelationId;
00997             referenced.objectId = classObjectId[i];
00998             referenced.objectSubId = 0;
00999 
01000             recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
01001         }
01002 
01003         /* Store dependencies on anything mentioned in index expressions */
01004         if (indexInfo->ii_Expressions)
01005         {
01006             recordDependencyOnSingleRelExpr(&myself,
01007                                           (Node *) indexInfo->ii_Expressions,
01008                                             heapRelationId,
01009                                             DEPENDENCY_NORMAL,
01010                                             DEPENDENCY_AUTO);
01011         }
01012 
01013         /* Store dependencies on anything mentioned in predicate */
01014         if (indexInfo->ii_Predicate)
01015         {
01016             recordDependencyOnSingleRelExpr(&myself,
01017                                             (Node *) indexInfo->ii_Predicate,
01018                                             heapRelationId,
01019                                             DEPENDENCY_NORMAL,
01020                                             DEPENDENCY_AUTO);
01021         }
01022     }
01023     else
01024     {
01025         /* Bootstrap mode - assert we weren't asked for constraint support */
01026         Assert(!isconstraint);
01027         Assert(!deferrable);
01028         Assert(!initdeferred);
01029     }
01030 
01031     /* Post creation hook for new index */
01032     InvokeObjectPostCreateHookArg(RelationRelationId,
01033                                   indexRelationId, 0, is_internal);
01034 
01035     /*
01036      * Advance the command counter so that we can see the newly-entered
01037      * catalog tuples for the index.
01038      */
01039     CommandCounterIncrement();
01040 
01041     /*
01042      * In bootstrap mode, we have to fill in the index strategy structure with
01043      * information from the catalogs.  If we aren't bootstrapping, then the
01044      * relcache entry has already been rebuilt thanks to sinval update during
01045      * CommandCounterIncrement.
01046      */
01047     if (IsBootstrapProcessingMode())
01048         RelationInitIndexAccessInfo(indexRelation);
01049     else
01050         Assert(indexRelation->rd_indexcxt != NULL);
01051 
01052     /*
01053      * If this is bootstrap (initdb) time, then we don't actually fill in the
01054      * index yet.  We'll be creating more indexes and classes later, so we
01055      * delay filling them in until just before we're done with bootstrapping.
01056      * Similarly, if the caller specified skip_build then filling the index is
01057      * delayed till later (ALTER TABLE can save work in some cases with this).
01058      * Otherwise, we call the AM routine that constructs the index.
01059      */
01060     if (IsBootstrapProcessingMode())
01061     {
01062         index_register(heapRelationId, indexRelationId, indexInfo);
01063     }
01064     else if (skip_build)
01065     {
01066         /*
01067          * Caller is responsible for filling the index later on.  However,
01068          * we'd better make sure that the heap relation is correctly marked as
01069          * having an index.
01070          */
01071         index_update_stats(heapRelation,
01072                            true,
01073                            isprimary,
01074                            InvalidOid,
01075                            -1.0);
01076         /* Make the above update visible */
01077         CommandCounterIncrement();
01078     }
01079     else
01080     {
01081         index_build(heapRelation, indexRelation, indexInfo, isprimary, false);
01082     }
01083 
01084     /*
01085      * Close the index; but we keep the lock that we acquired above until end
01086      * of transaction.  Closing the heap is caller's responsibility.
01087      */
01088     index_close(indexRelation, NoLock);
01089 
01090     return indexRelationId;
01091 }
01092 
01093 /*
01094  * index_constraint_create
01095  *
01096  * Set up a constraint associated with an index
01097  *
01098  * heapRelation: table owning the index (must be suitably locked by caller)
01099  * indexRelationId: OID of the index
01100  * indexInfo: same info executor uses to insert into the index
01101  * constraintName: what it say (generally, should match name of index)
01102  * constraintType: one of CONSTRAINT_PRIMARY, CONSTRAINT_UNIQUE, or
01103  *      CONSTRAINT_EXCLUSION
01104  * deferrable: constraint is DEFERRABLE
01105  * initdeferred: constraint is INITIALLY DEFERRED
01106  * mark_as_primary: if true, set flags to mark index as primary key
01107  * update_pgindex: if true, update pg_index row (else caller's done that)
01108  * remove_old_dependencies: if true, remove existing dependencies of index
01109  *      on table's columns
01110  * allow_system_table_mods: allow table to be a system catalog
01111  * is_internal: index is constructed due to internal process
01112  */
01113 void
01114 index_constraint_create(Relation heapRelation,
01115                         Oid indexRelationId,
01116                         IndexInfo *indexInfo,
01117                         const char *constraintName,
01118                         char constraintType,
01119                         bool deferrable,
01120                         bool initdeferred,
01121                         bool mark_as_primary,
01122                         bool update_pgindex,
01123                         bool remove_old_dependencies,
01124                         bool allow_system_table_mods,
01125                         bool is_internal)
01126 {
01127     Oid         namespaceId = RelationGetNamespace(heapRelation);
01128     ObjectAddress myself,
01129                 referenced;
01130     Oid         conOid;
01131 
01132     /* constraint creation support doesn't work while bootstrapping */
01133     Assert(!IsBootstrapProcessingMode());
01134 
01135     /* enforce system-table restriction */
01136     if (!allow_system_table_mods &&
01137         IsSystemRelation(heapRelation) &&
01138         IsNormalProcessingMode())
01139         ereport(ERROR,
01140                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
01141                  errmsg("user-defined indexes on system catalog tables are not supported")));
01142 
01143     /* primary/unique constraints shouldn't have any expressions */
01144     if (indexInfo->ii_Expressions &&
01145         constraintType != CONSTRAINT_EXCLUSION)
01146         elog(ERROR, "constraints cannot have index expressions");
01147 
01148     /*
01149      * If we're manufacturing a constraint for a pre-existing index, we need
01150      * to get rid of the existing auto dependencies for the index (the ones
01151      * that index_create() would have made instead of calling this function).
01152      *
01153      * Note: this code would not necessarily do the right thing if the index
01154      * has any expressions or predicate, but we'd never be turning such an
01155      * index into a UNIQUE or PRIMARY KEY constraint.
01156      */
01157     if (remove_old_dependencies)
01158         deleteDependencyRecordsForClass(RelationRelationId, indexRelationId,
01159                                         RelationRelationId, DEPENDENCY_AUTO);
01160 
01161     /*
01162      * Construct a pg_constraint entry.
01163      */
01164     conOid = CreateConstraintEntry(constraintName,
01165                                    namespaceId,
01166                                    constraintType,
01167                                    deferrable,
01168                                    initdeferred,
01169                                    true,
01170                                    RelationGetRelid(heapRelation),
01171                                    indexInfo->ii_KeyAttrNumbers,
01172                                    indexInfo->ii_NumIndexAttrs,
01173                                    InvalidOid,  /* no domain */
01174                                    indexRelationId,     /* index OID */
01175                                    InvalidOid,  /* no foreign key */
01176                                    NULL,
01177                                    NULL,
01178                                    NULL,
01179                                    NULL,
01180                                    0,
01181                                    ' ',
01182                                    ' ',
01183                                    ' ',
01184                                    indexInfo->ii_ExclusionOps,
01185                                    NULL,        /* no check constraint */
01186                                    NULL,
01187                                    NULL,
01188                                    true,        /* islocal */
01189                                    0,   /* inhcount */
01190                                    true,        /* noinherit */
01191                                    is_internal);
01192 
01193     /*
01194      * Register the index as internally dependent on the constraint.
01195      *
01196      * Note that the constraint has a dependency on the table, so we don't
01197      * need (or want) any direct dependency from the index to the table.
01198      */
01199     myself.classId = RelationRelationId;
01200     myself.objectId = indexRelationId;
01201     myself.objectSubId = 0;
01202 
01203     referenced.classId = ConstraintRelationId;
01204     referenced.objectId = conOid;
01205     referenced.objectSubId = 0;
01206 
01207     recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
01208 
01209     /*
01210      * If the constraint is deferrable, create the deferred uniqueness
01211      * checking trigger.  (The trigger will be given an internal dependency on
01212      * the constraint by CreateTrigger.)
01213      */
01214     if (deferrable)
01215     {
01216         RangeVar   *heapRel;
01217         CreateTrigStmt *trigger;
01218 
01219         heapRel = makeRangeVar(get_namespace_name(namespaceId),
01220                                pstrdup(RelationGetRelationName(heapRelation)),
01221                                -1);
01222 
01223         trigger = makeNode(CreateTrigStmt);
01224         trigger->trigname = (constraintType == CONSTRAINT_PRIMARY) ?
01225             "PK_ConstraintTrigger" :
01226             "Unique_ConstraintTrigger";
01227         trigger->relation = heapRel;
01228         trigger->funcname = SystemFuncName("unique_key_recheck");
01229         trigger->args = NIL;
01230         trigger->row = true;
01231         trigger->timing = TRIGGER_TYPE_AFTER;
01232         trigger->events = TRIGGER_TYPE_INSERT | TRIGGER_TYPE_UPDATE;
01233         trigger->columns = NIL;
01234         trigger->whenClause = NULL;
01235         trigger->isconstraint = true;
01236         trigger->deferrable = true;
01237         trigger->initdeferred = initdeferred;
01238         trigger->constrrel = NULL;
01239 
01240         (void) CreateTrigger(trigger, NULL, conOid, indexRelationId, true);
01241     }
01242 
01243     /*
01244      * If needed, mark the table as having a primary key.  We assume it can't
01245      * have been so marked already, so no need to clear the flag in the other
01246      * case.
01247      *
01248      * Note: this might better be done by callers.  We do it here to avoid
01249      * exposing index_update_stats() globally, but that wouldn't be necessary
01250      * if relhaspkey went away.
01251      */
01252     if (mark_as_primary)
01253         index_update_stats(heapRelation,
01254                            true,
01255                            true,
01256                            InvalidOid,
01257                            -1.0);
01258 
01259     /*
01260      * If needed, mark the index as primary and/or deferred in pg_index.
01261      *
01262      * Note: since this is a transactional update, it's unsafe against
01263      * concurrent SnapshotNow scans of pg_index.  When making an existing
01264      * index into a constraint, caller must have a table lock that prevents
01265      * concurrent table updates; if it's less than a full exclusive lock,
01266      * there is a risk that concurrent readers of the table will miss seeing
01267      * this index at all.
01268      */
01269     if (update_pgindex && (mark_as_primary || deferrable))
01270     {
01271         Relation    pg_index;
01272         HeapTuple   indexTuple;
01273         Form_pg_index indexForm;
01274         bool        dirty = false;
01275 
01276         pg_index = heap_open(IndexRelationId, RowExclusiveLock);
01277 
01278         indexTuple = SearchSysCacheCopy1(INDEXRELID,
01279                                          ObjectIdGetDatum(indexRelationId));
01280         if (!HeapTupleIsValid(indexTuple))
01281             elog(ERROR, "cache lookup failed for index %u", indexRelationId);
01282         indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
01283 
01284         if (mark_as_primary && !indexForm->indisprimary)
01285         {
01286             indexForm->indisprimary = true;
01287             dirty = true;
01288         }
01289 
01290         if (deferrable && indexForm->indimmediate)
01291         {
01292             indexForm->indimmediate = false;
01293             dirty = true;
01294         }
01295 
01296         if (dirty)
01297         {
01298             simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
01299             CatalogUpdateIndexes(pg_index, indexTuple);
01300 
01301             InvokeObjectPostAlterHookArg(IndexRelationId, indexRelationId, 0,
01302                                          InvalidOid, is_internal);
01303         }
01304 
01305         heap_freetuple(indexTuple);
01306         heap_close(pg_index, RowExclusiveLock);
01307     }
01308 }
01309 
01310 /*
01311  *      index_drop
01312  *
01313  * NOTE: this routine should now only be called through performDeletion(),
01314  * else associated dependencies won't be cleaned up.
01315  */
01316 void
01317 index_drop(Oid indexId, bool concurrent)
01318 {
01319     Oid         heapId;
01320     Relation    userHeapRelation;
01321     Relation    userIndexRelation;
01322     Relation    indexRelation;
01323     HeapTuple   tuple;
01324     bool        hasexprs;
01325     LockRelId   heaprelid,
01326                 indexrelid;
01327     LOCKTAG     heaplocktag;
01328     LOCKMODE    lockmode;
01329     VirtualTransactionId *old_lockholders;
01330 
01331     /*
01332      * To drop an index safely, we must grab exclusive lock on its parent
01333      * table.  Exclusive lock on the index alone is insufficient because
01334      * another backend might be about to execute a query on the parent table.
01335      * If it relies on a previously cached list of index OIDs, then it could
01336      * attempt to access the just-dropped index.  We must therefore take a
01337      * table lock strong enough to prevent all queries on the table from
01338      * proceeding until we commit and send out a shared-cache-inval notice
01339      * that will make them update their index lists.
01340      *
01341      * In the concurrent case we avoid this requirement by disabling index use
01342      * in multiple steps and waiting out any transactions that might be using
01343      * the index, so we don't need exclusive lock on the parent table. Instead
01344      * we take ShareUpdateExclusiveLock, to ensure that two sessions aren't
01345      * doing CREATE/DROP INDEX CONCURRENTLY on the same index.  (We will get
01346      * AccessExclusiveLock on the index below, once we're sure nobody else is
01347      * using it.)
01348      */
01349     heapId = IndexGetRelation(indexId, false);
01350     lockmode = concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock;
01351     userHeapRelation = heap_open(heapId, lockmode);
01352     userIndexRelation = index_open(indexId, lockmode);
01353 
01354     /*
01355      * We might still have open queries using it in our own session, which the
01356      * above locking won't prevent, so test explicitly.
01357      */
01358     CheckTableNotInUse(userIndexRelation, "DROP INDEX");
01359 
01360     /*
01361      * Drop Index Concurrently is more or less the reverse process of Create
01362      * Index Concurrently.
01363      *
01364      * First we unset indisvalid so queries starting afterwards don't use the
01365      * index to answer queries anymore.  We have to keep indisready = true so
01366      * transactions that are still scanning the index can continue to see
01367      * valid index contents.  For instance, if they are using READ COMMITTED
01368      * mode, and another transaction makes changes and commits, they need to
01369      * see those new tuples in the index.
01370      *
01371      * After all transactions that could possibly have used the index for
01372      * queries end, we can unset indisready and indislive, then wait till
01373      * nobody could be touching it anymore.  (Note: we need indislive because
01374      * this state must be distinct from the initial state during CREATE INDEX
01375      * CONCURRENTLY, which has indislive true while indisready and indisvalid
01376      * are false.  That's because in that state, transactions must examine the
01377      * index for HOT-safety decisions, while in this state we don't want them
01378      * to open it at all.)
01379      *
01380      * Since all predicate locks on the index are about to be made invalid, we
01381      * must promote them to predicate locks on the heap.  In the
01382      * non-concurrent case we can just do that now.  In the concurrent case
01383      * it's a bit trickier.  The predicate locks must be moved when there are
01384      * no index scans in progress on the index and no more can subsequently
01385      * start, so that no new predicate locks can be made on the index.  Also,
01386      * they must be moved before heap inserts stop maintaining the index, else
01387      * the conflict with the predicate lock on the index gap could be missed
01388      * before the lock on the heap relation is in place to detect a conflict
01389      * based on the heap tuple insert.
01390      */
01391     if (concurrent)
01392     {
01393         /*
01394          * We must commit our transaction in order to make the first pg_index
01395          * state update visible to other sessions.  If the DROP machinery has
01396          * already performed any other actions (removal of other objects,
01397          * pg_depend entries, etc), the commit would make those actions
01398          * permanent, which would leave us with inconsistent catalog state if
01399          * we fail partway through the following sequence.  Since DROP INDEX
01400          * CONCURRENTLY is restricted to dropping just one index that has no
01401          * dependencies, we should get here before anything's been done ---
01402          * but let's check that to be sure.  We can verify that the current
01403          * transaction has not executed any transactional updates by checking
01404          * that no XID has been assigned.
01405          */
01406         if (GetTopTransactionIdIfAny() != InvalidTransactionId)
01407             ereport(ERROR,
01408                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
01409                      errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
01410 
01411         /*
01412          * Mark index invalid by updating its pg_index entry
01413          */
01414         index_set_state_flags(indexId, INDEX_DROP_CLEAR_VALID);
01415 
01416         /*
01417          * Invalidate the relcache for the table, so that after this commit
01418          * all sessions will refresh any cached plans that might reference the
01419          * index.
01420          */
01421         CacheInvalidateRelcache(userHeapRelation);
01422 
01423         /* save lockrelid and locktag for below, then close but keep locks */
01424         heaprelid = userHeapRelation->rd_lockInfo.lockRelId;
01425         SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
01426         indexrelid = userIndexRelation->rd_lockInfo.lockRelId;
01427 
01428         heap_close(userHeapRelation, NoLock);
01429         index_close(userIndexRelation, NoLock);
01430 
01431         /*
01432          * We must commit our current transaction so that the indisvalid
01433          * update becomes visible to other transactions; then start another.
01434          * Note that any previously-built data structures are lost in the
01435          * commit.  The only data we keep past here are the relation IDs.
01436          *
01437          * Before committing, get a session-level lock on the table, to ensure
01438          * that neither it nor the index can be dropped before we finish. This
01439          * cannot block, even if someone else is waiting for access, because
01440          * we already have the same lock within our transaction.
01441          */
01442         LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
01443         LockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
01444 
01445         PopActiveSnapshot();
01446         CommitTransactionCommand();
01447         StartTransactionCommand();
01448 
01449         /*
01450          * Now we must wait until no running transaction could be using the
01451          * index for a query.  To do this, inquire which xacts currently would
01452          * conflict with AccessExclusiveLock on the table -- ie, which ones
01453          * have a lock of any kind on the table. Then wait for each of these
01454          * xacts to commit or abort. Note we do not need to worry about xacts
01455          * that open the table for reading after this point; they will see the
01456          * index as invalid when they open the relation.
01457          *
01458          * Note: the reason we use actual lock acquisition here, rather than
01459          * just checking the ProcArray and sleeping, is that deadlock is
01460          * possible if one of the transactions in question is blocked trying
01461          * to acquire an exclusive lock on our table.  The lock code will
01462          * detect deadlock and error out properly.
01463          *
01464          * Note: GetLockConflicts() never reports our own xid, hence we need
01465          * not check for that.  Also, prepared xacts are not reported, which
01466          * is fine since they certainly aren't going to do anything more.
01467          */
01468         old_lockholders = GetLockConflicts(&heaplocktag, AccessExclusiveLock);
01469 
01470         while (VirtualTransactionIdIsValid(*old_lockholders))
01471         {
01472             VirtualXactLock(*old_lockholders, true);
01473             old_lockholders++;
01474         }
01475 
01476         /*
01477          * No more predicate locks will be acquired on this index, and we're
01478          * about to stop doing inserts into the index which could show
01479          * conflicts with existing predicate locks, so now is the time to move
01480          * them to the heap relation.
01481          */
01482         userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
01483         userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
01484         TransferPredicateLocksToHeapRelation(userIndexRelation);
01485 
01486         /*
01487          * Now we are sure that nobody uses the index for queries; they just
01488          * might have it open for updating it.  So now we can unset indisready
01489          * and indislive, then wait till nobody could be using it at all
01490          * anymore.
01491          */
01492         index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
01493 
01494         /*
01495          * Invalidate the relcache for the table, so that after this commit
01496          * all sessions will refresh the table's index list.  Forgetting just
01497          * the index's relcache entry is not enough.
01498          */
01499         CacheInvalidateRelcache(userHeapRelation);
01500 
01501         /*
01502          * Close the relations again, though still holding session lock.
01503          */
01504         heap_close(userHeapRelation, NoLock);
01505         index_close(userIndexRelation, NoLock);
01506 
01507         /*
01508          * Again, commit the transaction to make the pg_index update visible
01509          * to other sessions.
01510          */
01511         CommitTransactionCommand();
01512         StartTransactionCommand();
01513 
01514         /*
01515          * Wait till every transaction that saw the old index state has
01516          * finished.  The logic here is the same as above.
01517          */
01518         old_lockholders = GetLockConflicts(&heaplocktag, AccessExclusiveLock);
01519 
01520         while (VirtualTransactionIdIsValid(*old_lockholders))
01521         {
01522             VirtualXactLock(*old_lockholders, true);
01523             old_lockholders++;
01524         }
01525 
01526         /*
01527          * Re-open relations to allow us to complete our actions.
01528          *
01529          * At this point, nothing should be accessing the index, but lets
01530          * leave nothing to chance and grab AccessExclusiveLock on the index
01531          * before the physical deletion.
01532          */
01533         userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
01534         userIndexRelation = index_open(indexId, AccessExclusiveLock);
01535     }
01536     else
01537     {
01538         /* Not concurrent, so just transfer predicate locks and we're good */
01539         TransferPredicateLocksToHeapRelation(userIndexRelation);
01540     }
01541 
01542     /*
01543      * Schedule physical removal of the files
01544      */
01545     RelationDropStorage(userIndexRelation);
01546 
01547     /*
01548      * Close and flush the index's relcache entry, to ensure relcache doesn't
01549      * try to rebuild it while we're deleting catalog entries. We keep the
01550      * lock though.
01551      */
01552     index_close(userIndexRelation, NoLock);
01553 
01554     RelationForgetRelation(indexId);
01555 
01556     /*
01557      * fix INDEX relation, and check for expressional index
01558      */
01559     indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
01560 
01561     tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
01562     if (!HeapTupleIsValid(tuple))
01563         elog(ERROR, "cache lookup failed for index %u", indexId);
01564 
01565     hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);
01566 
01567     simple_heap_delete(indexRelation, &tuple->t_self);
01568 
01569     ReleaseSysCache(tuple);
01570     heap_close(indexRelation, RowExclusiveLock);
01571 
01572     /*
01573      * if it has any expression columns, we might have stored statistics about
01574      * them.
01575      */
01576     if (hasexprs)
01577         RemoveStatistics(indexId, 0);
01578 
01579     /*
01580      * fix ATTRIBUTE relation
01581      */
01582     DeleteAttributeTuples(indexId);
01583 
01584     /*
01585      * fix RELATION relation
01586      */
01587     DeleteRelationTuple(indexId);
01588 
01589     /*
01590      * We are presently too lazy to attempt to compute the new correct value
01591      * of relhasindex (the next VACUUM will fix it if necessary). So there is
01592      * no need to update the pg_class tuple for the owning relation. But we
01593      * must send out a shared-cache-inval notice on the owning relation to
01594      * ensure other backends update their relcache lists of indexes.  (In the
01595      * concurrent case, this is redundant but harmless.)
01596      */
01597     CacheInvalidateRelcache(userHeapRelation);
01598 
01599     /*
01600      * Close owning rel, but keep lock
01601      */
01602     heap_close(userHeapRelation, NoLock);
01603 
01604     /*
01605      * Release the session locks before we go.
01606      */
01607     if (concurrent)
01608     {
01609         UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
01610         UnlockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
01611     }
01612 }
01613 
01614 /* ----------------------------------------------------------------
01615  *                      index_build support
01616  * ----------------------------------------------------------------
01617  */
01618 
01619 /* ----------------
01620  *      BuildIndexInfo
01621  *          Construct an IndexInfo record for an open index
01622  *
01623  * IndexInfo stores the information about the index that's needed by
01624  * FormIndexDatum, which is used for both index_build() and later insertion
01625  * of individual index tuples.  Normally we build an IndexInfo for an index
01626  * just once per command, and then use it for (potentially) many tuples.
01627  * ----------------
01628  */
01629 IndexInfo *
01630 BuildIndexInfo(Relation index)
01631 {
01632     IndexInfo  *ii = makeNode(IndexInfo);
01633     Form_pg_index indexStruct = index->rd_index;
01634     int         i;
01635     int         numKeys;
01636 
01637     /* check the number of keys, and copy attr numbers into the IndexInfo */
01638     numKeys = indexStruct->indnatts;
01639     if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
01640         elog(ERROR, "invalid indnatts %d for index %u",
01641              numKeys, RelationGetRelid(index));
01642     ii->ii_NumIndexAttrs = numKeys;
01643     for (i = 0; i < numKeys; i++)
01644         ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
01645 
01646     /* fetch any expressions needed for expressional indexes */
01647     ii->ii_Expressions = RelationGetIndexExpressions(index);
01648     ii->ii_ExpressionsState = NIL;
01649 
01650     /* fetch index predicate if any */
01651     ii->ii_Predicate = RelationGetIndexPredicate(index);
01652     ii->ii_PredicateState = NIL;
01653 
01654     /* fetch exclusion constraint info if any */
01655     if (indexStruct->indisexclusion)
01656     {
01657         RelationGetExclusionInfo(index,
01658                                  &ii->ii_ExclusionOps,
01659                                  &ii->ii_ExclusionProcs,
01660                                  &ii->ii_ExclusionStrats);
01661     }
01662     else
01663     {
01664         ii->ii_ExclusionOps = NULL;
01665         ii->ii_ExclusionProcs = NULL;
01666         ii->ii_ExclusionStrats = NULL;
01667     }
01668 
01669     /* other info */
01670     ii->ii_Unique = indexStruct->indisunique;
01671     ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
01672 
01673     /* initialize index-build state to default */
01674     ii->ii_Concurrent = false;
01675     ii->ii_BrokenHotChain = false;
01676 
01677     return ii;
01678 }
01679 
01680 /* ----------------
01681  *      FormIndexDatum
01682  *          Construct values[] and isnull[] arrays for a new index tuple.
01683  *
01684  *  indexInfo       Info about the index
01685  *  slot            Heap tuple for which we must prepare an index entry
01686  *  estate          executor state for evaluating any index expressions
01687  *  values          Array of index Datums (output area)
01688  *  isnull          Array of is-null indicators (output area)
01689  *
01690  * When there are no index expressions, estate may be NULL.  Otherwise it
01691  * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
01692  * context must point to the heap tuple passed in.
01693  *
01694  * Notice we don't actually call index_form_tuple() here; we just prepare
01695  * its input arrays values[] and isnull[].  This is because the index AM
01696  * may wish to alter the data before storage.
01697  * ----------------
01698  */
01699 void
01700 FormIndexDatum(IndexInfo *indexInfo,
01701                TupleTableSlot *slot,
01702                EState *estate,
01703                Datum *values,
01704                bool *isnull)
01705 {
01706     ListCell   *indexpr_item;
01707     int         i;
01708 
01709     if (indexInfo->ii_Expressions != NIL &&
01710         indexInfo->ii_ExpressionsState == NIL)
01711     {
01712         /* First time through, set up expression evaluation state */
01713         indexInfo->ii_ExpressionsState = (List *)
01714             ExecPrepareExpr((Expr *) indexInfo->ii_Expressions,
01715                             estate);
01716         /* Check caller has set up context correctly */
01717         Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
01718     }
01719     indexpr_item = list_head(indexInfo->ii_ExpressionsState);
01720 
01721     for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
01722     {
01723         int         keycol = indexInfo->ii_KeyAttrNumbers[i];
01724         Datum       iDatum;
01725         bool        isNull;
01726 
01727         if (keycol != 0)
01728         {
01729             /*
01730              * Plain index column; get the value we need directly from the
01731              * heap tuple.
01732              */
01733             iDatum = slot_getattr(slot, keycol, &isNull);
01734         }
01735         else
01736         {
01737             /*
01738              * Index expression --- need to evaluate it.
01739              */
01740             if (indexpr_item == NULL)
01741                 elog(ERROR, "wrong number of index expressions");
01742             iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
01743                                                GetPerTupleExprContext(estate),
01744                                                &isNull,
01745                                                NULL);
01746             indexpr_item = lnext(indexpr_item);
01747         }
01748         values[i] = iDatum;
01749         isnull[i] = isNull;
01750     }
01751 
01752     if (indexpr_item != NULL)
01753         elog(ERROR, "wrong number of index expressions");
01754 }
01755 
01756 
01757 /*
01758  * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
01759  *
01760  * This routine updates the pg_class row of either an index or its parent
01761  * relation after CREATE INDEX or REINDEX.  Its rather bizarre API is designed
01762  * to ensure we can do all the necessary work in just one update.
01763  *
01764  * hasindex: set relhasindex to this value
01765  * isprimary: if true, set relhaspkey true; else no change
01766  * reltoastidxid: if not InvalidOid, set reltoastidxid to this value;
01767  *      else no change
01768  * reltuples: if >= 0, set reltuples to this value; else no change
01769  *
01770  * If reltuples >= 0, relpages and relallvisible are also updated (using
01771  * RelationGetNumberOfBlocks() and visibilitymap_count()).
01772  *
01773  * NOTE: an important side-effect of this operation is that an SI invalidation
01774  * message is sent out to all backends --- including me --- causing relcache
01775  * entries to be flushed or updated with the new data.  This must happen even
01776  * if we find that no change is needed in the pg_class row.  When updating
01777  * a heap entry, this ensures that other backends find out about the new
01778  * index.  When updating an index, it's important because some index AMs
01779  * expect a relcache flush to occur after REINDEX.
01780  */
01781 static void
01782 index_update_stats(Relation rel,
01783                    bool hasindex, bool isprimary,
01784                    Oid reltoastidxid, double reltuples)
01785 {
01786     Oid         relid = RelationGetRelid(rel);
01787     Relation    pg_class;
01788     HeapTuple   tuple;
01789     Form_pg_class rd_rel;
01790     bool        dirty;
01791 
01792     /*
01793      * We always update the pg_class row using a non-transactional,
01794      * overwrite-in-place update.  There are several reasons for this:
01795      *
01796      * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
01797      *
01798      * 2. We could be reindexing pg_class itself, in which case we can't move
01799      * its pg_class row because CatalogUpdateIndexes might not know about all
01800      * the indexes yet (see reindex_relation).
01801      *
01802      * 3. Because we execute CREATE INDEX with just share lock on the parent
01803      * rel (to allow concurrent index creations), an ordinary update could
01804      * suffer a tuple-concurrently-updated failure against another CREATE
01805      * INDEX committing at about the same time.  We can avoid that by having
01806      * them both do nontransactional updates (we assume they will both be
01807      * trying to change the pg_class row to the same thing, so it doesn't
01808      * matter which goes first).
01809      *
01810      * 4. Even with just a single CREATE INDEX, there's a risk factor because
01811      * someone else might be trying to open the rel while we commit, and this
01812      * creates a race condition as to whether he will see both or neither of
01813      * the pg_class row versions as valid.  Again, a non-transactional update
01814      * avoids the risk.  It is indeterminate which state of the row the other
01815      * process will see, but it doesn't matter (if he's only taking
01816      * AccessShareLock, then it's not critical that he see relhasindex true).
01817      *
01818      * It is safe to use a non-transactional update even though our
01819      * transaction could still fail before committing.  Setting relhasindex
01820      * true is safe even if there are no indexes (VACUUM will eventually fix
01821      * it), likewise for relhaspkey.  And of course the new relpages and
01822      * reltuples counts are correct regardless.  However, we don't want to
01823      * change relpages (or relallvisible) if the caller isn't providing an
01824      * updated reltuples count, because that would bollix the
01825      * reltuples/relpages ratio which is what's really important.
01826      */
01827 
01828     pg_class = heap_open(RelationRelationId, RowExclusiveLock);
01829 
01830     /*
01831      * Make a copy of the tuple to update.  Normally we use the syscache, but
01832      * we can't rely on that during bootstrap or while reindexing pg_class
01833      * itself.
01834      */
01835     if (IsBootstrapProcessingMode() ||
01836         ReindexIsProcessingHeap(RelationRelationId))
01837     {
01838         /* don't assume syscache will work */
01839         HeapScanDesc pg_class_scan;
01840         ScanKeyData key[1];
01841 
01842         ScanKeyInit(&key[0],
01843                     ObjectIdAttributeNumber,
01844                     BTEqualStrategyNumber, F_OIDEQ,
01845                     ObjectIdGetDatum(relid));
01846 
01847         pg_class_scan = heap_beginscan(pg_class, SnapshotNow, 1, key);
01848         tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
01849         tuple = heap_copytuple(tuple);
01850         heap_endscan(pg_class_scan);
01851     }
01852     else
01853     {
01854         /* normal case, use syscache */
01855         tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
01856     }
01857 
01858     if (!HeapTupleIsValid(tuple))
01859         elog(ERROR, "could not find tuple for relation %u", relid);
01860     rd_rel = (Form_pg_class) GETSTRUCT(tuple);
01861 
01862     /* Apply required updates, if any, to copied tuple */
01863 
01864     dirty = false;
01865     if (rd_rel->relhasindex != hasindex)
01866     {
01867         rd_rel->relhasindex = hasindex;
01868         dirty = true;
01869     }
01870     if (isprimary)
01871     {
01872         if (!rd_rel->relhaspkey)
01873         {
01874             rd_rel->relhaspkey = true;
01875             dirty = true;
01876         }
01877     }
01878     if (OidIsValid(reltoastidxid))
01879     {
01880         Assert(rd_rel->relkind == RELKIND_TOASTVALUE);
01881         if (rd_rel->reltoastidxid != reltoastidxid)
01882         {
01883             rd_rel->reltoastidxid = reltoastidxid;
01884             dirty = true;
01885         }
01886     }
01887 
01888     if (reltuples >= 0)
01889     {
01890         BlockNumber relpages = RelationGetNumberOfBlocks(rel);
01891         BlockNumber relallvisible;
01892 
01893         if (rd_rel->relkind != RELKIND_INDEX)
01894             relallvisible = visibilitymap_count(rel);
01895         else    /* don't bother for indexes */
01896             relallvisible = 0;
01897 
01898         if (rd_rel->relpages != (int32) relpages)
01899         {
01900             rd_rel->relpages = (int32) relpages;
01901             dirty = true;
01902         }
01903         if (rd_rel->reltuples != (float4) reltuples)
01904         {
01905             rd_rel->reltuples = (float4) reltuples;
01906             dirty = true;
01907         }
01908         if (rd_rel->relallvisible != (int32) relallvisible)
01909         {
01910             rd_rel->relallvisible = (int32) relallvisible;
01911             dirty = true;
01912         }
01913     }
01914 
01915     /*
01916      * If anything changed, write out the tuple
01917      */
01918     if (dirty)
01919     {
01920         heap_inplace_update(pg_class, tuple);
01921         /* the above sends a cache inval message */
01922     }
01923     else
01924     {
01925         /* no need to change tuple, but force relcache inval anyway */
01926         CacheInvalidateRelcacheByTuple(tuple);
01927     }
01928 
01929     heap_freetuple(tuple);
01930 
01931     heap_close(pg_class, RowExclusiveLock);
01932 }
01933 
01934 
01935 /*
01936  * index_build - invoke access-method-specific index build procedure
01937  *
01938  * On entry, the index's catalog entries are valid, and its physical disk
01939  * file has been created but is empty.  We call the AM-specific build
01940  * procedure to fill in the index contents.  We then update the pg_class
01941  * entries of the index and heap relation as needed, using statistics
01942  * returned by ambuild as well as data passed by the caller.
01943  *
01944  * isprimary tells whether to mark the index as a primary-key index.
01945  * isreindex indicates we are recreating a previously-existing index.
01946  *
01947  * Note: when reindexing an existing index, isprimary can be false even if
01948  * the index is a PK; it's already properly marked and need not be re-marked.
01949  *
01950  * Note: before Postgres 8.2, the passed-in heap and index Relations
01951  * were automatically closed by this routine.  This is no longer the case.
01952  * The caller opened 'em, and the caller should close 'em.
01953  */
01954 void
01955 index_build(Relation heapRelation,
01956             Relation indexRelation,
01957             IndexInfo *indexInfo,
01958             bool isprimary,
01959             bool isreindex)
01960 {
01961     RegProcedure procedure;
01962     IndexBuildResult *stats;
01963     Oid         save_userid;
01964     int         save_sec_context;
01965     int         save_nestlevel;
01966 
01967     /*
01968      * sanity checks
01969      */
01970     Assert(RelationIsValid(indexRelation));
01971     Assert(PointerIsValid(indexRelation->rd_am));
01972 
01973     procedure = indexRelation->rd_am->ambuild;
01974     Assert(RegProcedureIsValid(procedure));
01975 
01976     ereport(DEBUG1,
01977             (errmsg("building index \"%s\" on table \"%s\"",
01978                     RelationGetRelationName(indexRelation),
01979                     RelationGetRelationName(heapRelation))));
01980 
01981     /*
01982      * Switch to the table owner's userid, so that any index functions are run
01983      * as that user.  Also lock down security-restricted operations and
01984      * arrange to make GUC variable changes local to this command.
01985      */
01986     GetUserIdAndSecContext(&save_userid, &save_sec_context);
01987     SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
01988                            save_sec_context | SECURITY_RESTRICTED_OPERATION);
01989     save_nestlevel = NewGUCNestLevel();
01990 
01991     /*
01992      * Call the access method's build procedure
01993      */
01994     stats = (IndexBuildResult *)
01995         DatumGetPointer(OidFunctionCall3(procedure,
01996                                          PointerGetDatum(heapRelation),
01997                                          PointerGetDatum(indexRelation),
01998                                          PointerGetDatum(indexInfo)));
01999     Assert(PointerIsValid(stats));
02000 
02001     /*
02002      * If this is an unlogged index, we may need to write out an init fork for
02003      * it -- but we must first check whether one already exists.  If, for
02004      * example, an unlogged relation is truncated in the transaction that
02005      * created it, or truncated twice in a subsequent transaction, the
02006      * relfilenode won't change, and nothing needs to be done here.
02007      */
02008     if (heapRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
02009         !smgrexists(indexRelation->rd_smgr, INIT_FORKNUM))
02010     {
02011         RegProcedure ambuildempty = indexRelation->rd_am->ambuildempty;
02012 
02013         RelationOpenSmgr(indexRelation);
02014         smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false);
02015         OidFunctionCall1(ambuildempty, PointerGetDatum(indexRelation));
02016     }
02017 
02018     /*
02019      * If we found any potentially broken HOT chains, mark the index as not
02020      * being usable until the current transaction is below the event horizon.
02021      * See src/backend/access/heap/README.HOT for discussion.
02022      *
02023      * However, when reindexing an existing index, we should do nothing here.
02024      * Any HOT chains that are broken with respect to the index must predate
02025      * the index's original creation, so there is no need to change the
02026      * index's usability horizon.  Moreover, we *must not* try to change the
02027      * index's pg_index entry while reindexing pg_index itself, and this
02028      * optimization nicely prevents that.
02029      *
02030      * We also need not set indcheckxmin during a concurrent index build,
02031      * because we won't set indisvalid true until all transactions that care
02032      * about the broken HOT chains are gone.
02033      *
02034      * Therefore, this code path can only be taken during non-concurrent
02035      * CREATE INDEX.  Thus the fact that heap_update will set the pg_index
02036      * tuple's xmin doesn't matter, because that tuple was created in the
02037      * current transaction anyway.  That also means we don't need to worry
02038      * about any concurrent readers of the tuple; no other transaction can see
02039      * it yet.
02040      */
02041     if (indexInfo->ii_BrokenHotChain && !isreindex &&
02042         !indexInfo->ii_Concurrent)
02043     {
02044         Oid         indexId = RelationGetRelid(indexRelation);
02045         Relation    pg_index;
02046         HeapTuple   indexTuple;
02047         Form_pg_index indexForm;
02048 
02049         pg_index = heap_open(IndexRelationId, RowExclusiveLock);
02050 
02051         indexTuple = SearchSysCacheCopy1(INDEXRELID,
02052                                          ObjectIdGetDatum(indexId));
02053         if (!HeapTupleIsValid(indexTuple))
02054             elog(ERROR, "cache lookup failed for index %u", indexId);
02055         indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
02056 
02057         /* If it's a new index, indcheckxmin shouldn't be set ... */
02058         Assert(!indexForm->indcheckxmin);
02059 
02060         indexForm->indcheckxmin = true;
02061         simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
02062         CatalogUpdateIndexes(pg_index, indexTuple);
02063 
02064         heap_freetuple(indexTuple);
02065         heap_close(pg_index, RowExclusiveLock);
02066     }
02067 
02068     /*
02069      * Update heap and index pg_class rows
02070      */
02071     index_update_stats(heapRelation,
02072                        true,
02073                        isprimary,
02074                        (heapRelation->rd_rel->relkind == RELKIND_TOASTVALUE) ?
02075                        RelationGetRelid(indexRelation) : InvalidOid,
02076                        stats->heap_tuples);
02077 
02078     index_update_stats(indexRelation,
02079                        false,
02080                        false,
02081                        InvalidOid,
02082                        stats->index_tuples);
02083 
02084     /* Make the updated catalog row versions visible */
02085     CommandCounterIncrement();
02086 
02087     /*
02088      * If it's for an exclusion constraint, make a second pass over the heap
02089      * to verify that the constraint is satisfied.  We must not do this until
02090      * the index is fully valid.  (Broken HOT chains shouldn't matter, though;
02091      * see comments for IndexCheckExclusion.)
02092      */
02093     if (indexInfo->ii_ExclusionOps != NULL)
02094         IndexCheckExclusion(heapRelation, indexRelation, indexInfo);
02095 
02096     /* Roll back any GUC changes executed by index functions */
02097     AtEOXact_GUC(false, save_nestlevel);
02098 
02099     /* Restore userid and security context */
02100     SetUserIdAndSecContext(save_userid, save_sec_context);
02101 }
02102 
02103 
02104 /*
02105  * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
02106  *
02107  * This is called back from an access-method-specific index build procedure
02108  * after the AM has done whatever setup it needs.  The parent heap relation
02109  * is scanned to find tuples that should be entered into the index.  Each
02110  * such tuple is passed to the AM's callback routine, which does the right
02111  * things to add it to the new index.  After we return, the AM's index
02112  * build procedure does whatever cleanup it needs.
02113  *
02114  * The total count of heap tuples is returned.  This is for updating pg_class
02115  * statistics.  (It's annoying not to be able to do that here, but we want
02116  * to merge that update with others; see index_update_stats.)  Note that the
02117  * index AM itself must keep track of the number of index tuples; we don't do
02118  * so here because the AM might reject some of the tuples for its own reasons,
02119  * such as being unable to store NULLs.
02120  *
02121  * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
02122  * any potentially broken HOT chains.  Currently, we set this if there are
02123  * any RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without
02124  * trying very hard to detect whether they're really incompatible with the
02125  * chain tip.
02126  */
02127 double
02128 IndexBuildHeapScan(Relation heapRelation,
02129                    Relation indexRelation,
02130                    IndexInfo *indexInfo,
02131                    bool allow_sync,
02132                    IndexBuildCallback callback,
02133                    void *callback_state)
02134 {
02135     bool        is_system_catalog;
02136     bool        checking_uniqueness;
02137     HeapScanDesc scan;
02138     HeapTuple   heapTuple;
02139     Datum       values[INDEX_MAX_KEYS];
02140     bool        isnull[INDEX_MAX_KEYS];
02141     double      reltuples;
02142     List       *predicate;
02143     TupleTableSlot *slot;
02144     EState     *estate;
02145     ExprContext *econtext;
02146     Snapshot    snapshot;
02147     TransactionId OldestXmin;
02148     BlockNumber root_blkno = InvalidBlockNumber;
02149     OffsetNumber root_offsets[MaxHeapTuplesPerPage];
02150 
02151     /*
02152      * sanity checks
02153      */
02154     Assert(OidIsValid(indexRelation->rd_rel->relam));
02155 
02156     /* Remember if it's a system catalog */
02157     is_system_catalog = IsSystemRelation(heapRelation);
02158 
02159     /* See whether we're verifying uniqueness/exclusion properties */
02160     checking_uniqueness = (indexInfo->ii_Unique ||
02161                            indexInfo->ii_ExclusionOps != NULL);
02162 
02163     /*
02164      * Need an EState for evaluation of index expressions and partial-index
02165      * predicates.  Also a slot to hold the current tuple.
02166      */
02167     estate = CreateExecutorState();
02168     econtext = GetPerTupleExprContext(estate);
02169     slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
02170 
02171     /* Arrange for econtext's scan tuple to be the tuple under test */
02172     econtext->ecxt_scantuple = slot;
02173 
02174     /* Set up execution state for predicate, if any. */
02175     predicate = (List *)
02176         ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
02177                         estate);
02178 
02179     /*
02180      * Prepare for scan of the base relation.  In a normal index build, we use
02181      * SnapshotAny because we must retrieve all tuples and do our own time
02182      * qual checks (because we have to index RECENTLY_DEAD tuples). In a
02183      * concurrent build, we take a regular MVCC snapshot and index whatever's
02184      * live according to that.  During bootstrap we just use SnapshotNow.
02185      */
02186     if (IsBootstrapProcessingMode())
02187     {
02188         snapshot = SnapshotNow;
02189         OldestXmin = InvalidTransactionId;      /* not used */
02190     }
02191     else if (indexInfo->ii_Concurrent)
02192     {
02193         snapshot = RegisterSnapshot(GetTransactionSnapshot());
02194         OldestXmin = InvalidTransactionId;      /* not used */
02195     }
02196     else
02197     {
02198         snapshot = SnapshotAny;
02199         /* okay to ignore lazy VACUUMs here */
02200         OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true);
02201     }
02202 
02203     scan = heap_beginscan_strat(heapRelation,   /* relation */
02204                                 snapshot,       /* snapshot */
02205                                 0,      /* number of keys */
02206                                 NULL,   /* scan key */
02207                                 true,   /* buffer access strategy OK */
02208                                 allow_sync);    /* syncscan OK? */
02209 
02210     reltuples = 0;
02211 
02212     /*
02213      * Scan all tuples in the base relation.
02214      */
02215     while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
02216     {
02217         bool        tupleIsAlive;
02218 
02219         CHECK_FOR_INTERRUPTS();
02220 
02221         /*
02222          * When dealing with a HOT-chain of updated tuples, we want to index
02223          * the values of the live tuple (if any), but index it under the TID
02224          * of the chain's root tuple.  This approach is necessary to preserve
02225          * the HOT-chain structure in the heap. So we need to be able to find
02226          * the root item offset for every tuple that's in a HOT-chain.  When
02227          * first reaching a new page of the relation, call
02228          * heap_get_root_tuples() to build a map of root item offsets on the
02229          * page.
02230          *
02231          * It might look unsafe to use this information across buffer
02232          * lock/unlock.  However, we hold ShareLock on the table so no
02233          * ordinary insert/update/delete should occur; and we hold pin on the
02234          * buffer continuously while visiting the page, so no pruning
02235          * operation can occur either.
02236          *
02237          * Also, although our opinions about tuple liveness could change while
02238          * we scan the page (due to concurrent transaction commits/aborts),
02239          * the chain root locations won't, so this info doesn't need to be
02240          * rebuilt after waiting for another transaction.
02241          *
02242          * Note the implied assumption that there is no more than one live
02243          * tuple per HOT-chain --- else we could create more than one index
02244          * entry pointing to the same root tuple.
02245          */
02246         if (scan->rs_cblock != root_blkno)
02247         {
02248             Page        page = BufferGetPage(scan->rs_cbuf);
02249 
02250             LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
02251             heap_get_root_tuples(page, root_offsets);
02252             LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
02253 
02254             root_blkno = scan->rs_cblock;
02255         }
02256 
02257         if (snapshot == SnapshotAny)
02258         {
02259             /* do our own time qual check */
02260             bool        indexIt;
02261             TransactionId xwait;
02262 
02263     recheck:
02264 
02265             /*
02266              * We could possibly get away with not locking the buffer here,
02267              * since caller should hold ShareLock on the relation, but let's
02268              * be conservative about it.  (This remark is still correct even
02269              * with HOT-pruning: our pin on the buffer prevents pruning.)
02270              */
02271             LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
02272 
02273             switch (HeapTupleSatisfiesVacuum(heapTuple->t_data, OldestXmin,
02274                                              scan->rs_cbuf))
02275             {
02276                 case HEAPTUPLE_DEAD:
02277                     /* Definitely dead, we can ignore it */
02278                     indexIt = false;
02279                     tupleIsAlive = false;
02280                     break;
02281                 case HEAPTUPLE_LIVE:
02282                     /* Normal case, index and unique-check it */
02283                     indexIt = true;
02284                     tupleIsAlive = true;
02285                     break;
02286                 case HEAPTUPLE_RECENTLY_DEAD:
02287 
02288                     /*
02289                      * If tuple is recently deleted then we must index it
02290                      * anyway to preserve MVCC semantics.  (Pre-existing
02291                      * transactions could try to use the index after we finish
02292                      * building it, and may need to see such tuples.)
02293                      *
02294                      * However, if it was HOT-updated then we must only index
02295                      * the live tuple at the end of the HOT-chain.  Since this
02296                      * breaks semantics for pre-existing snapshots, mark the
02297                      * index as unusable for them.
02298                      */
02299                     if (HeapTupleIsHotUpdated(heapTuple))
02300                     {
02301                         indexIt = false;
02302                         /* mark the index as unsafe for old snapshots */
02303                         indexInfo->ii_BrokenHotChain = true;
02304                     }
02305                     else
02306                         indexIt = true;
02307                     /* In any case, exclude the tuple from unique-checking */
02308                     tupleIsAlive = false;
02309                     break;
02310                 case HEAPTUPLE_INSERT_IN_PROGRESS:
02311 
02312                     /*
02313                      * Since caller should hold ShareLock or better, normally
02314                      * the only way to see this is if it was inserted earlier
02315                      * in our own transaction.  However, it can happen in
02316                      * system catalogs, since we tend to release write lock
02317                      * before commit there.  Give a warning if neither case
02318                      * applies.
02319                      */
02320                     xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
02321                     if (!TransactionIdIsCurrentTransactionId(xwait))
02322                     {
02323                         if (!is_system_catalog)
02324                             elog(WARNING, "concurrent insert in progress within table \"%s\"",
02325                                  RelationGetRelationName(heapRelation));
02326 
02327                         /*
02328                          * If we are performing uniqueness checks, indexing
02329                          * such a tuple could lead to a bogus uniqueness
02330                          * failure.  In that case we wait for the inserting
02331                          * transaction to finish and check again.
02332                          */
02333                         if (checking_uniqueness)
02334                         {
02335                             /*
02336                              * Must drop the lock on the buffer before we wait
02337                              */
02338                             LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
02339                             XactLockTableWait(xwait);
02340                             goto recheck;
02341                         }
02342                     }
02343 
02344                     /*
02345                      * We must index such tuples, since if the index build
02346                      * commits then they're good.
02347                      */
02348                     indexIt = true;
02349                     tupleIsAlive = true;
02350                     break;
02351                 case HEAPTUPLE_DELETE_IN_PROGRESS:
02352 
02353                     /*
02354                      * As with INSERT_IN_PROGRESS case, this is unexpected
02355                      * unless it's our own deletion or a system catalog.
02356                      */
02357                     xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
02358                     if (!TransactionIdIsCurrentTransactionId(xwait))
02359                     {
02360                         if (!is_system_catalog)
02361                             elog(WARNING, "concurrent delete in progress within table \"%s\"",
02362                                  RelationGetRelationName(heapRelation));
02363 
02364                         /*
02365                          * If we are performing uniqueness checks, assuming
02366                          * the tuple is dead could lead to missing a
02367                          * uniqueness violation.  In that case we wait for the
02368                          * deleting transaction to finish and check again.
02369                          *
02370                          * Also, if it's a HOT-updated tuple, we should not
02371                          * index it but rather the live tuple at the end of
02372                          * the HOT-chain.  However, the deleting transaction
02373                          * could abort, possibly leaving this tuple as live
02374                          * after all, in which case it has to be indexed. The
02375                          * only way to know what to do is to wait for the
02376                          * deleting transaction to finish and check again.
02377                          */
02378                         if (checking_uniqueness ||
02379                             HeapTupleIsHotUpdated(heapTuple))
02380                         {
02381                             /*
02382                              * Must drop the lock on the buffer before we wait
02383                              */
02384                             LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
02385                             XactLockTableWait(xwait);
02386                             goto recheck;
02387                         }
02388 
02389                         /*
02390                          * Otherwise index it but don't check for uniqueness,
02391                          * the same as a RECENTLY_DEAD tuple.
02392                          */
02393                         indexIt = true;
02394                     }
02395                     else if (HeapTupleIsHotUpdated(heapTuple))
02396                     {
02397                         /*
02398                          * It's a HOT-updated tuple deleted by our own xact.
02399                          * We can assume the deletion will commit (else the
02400                          * index contents don't matter), so treat the same as
02401                          * RECENTLY_DEAD HOT-updated tuples.
02402                          */
02403                         indexIt = false;
02404                         /* mark the index as unsafe for old snapshots */
02405                         indexInfo->ii_BrokenHotChain = true;
02406                     }
02407                     else
02408                     {
02409                         /*
02410                          * It's a regular tuple deleted by our own xact. Index
02411                          * it but don't check for uniqueness, the same as a
02412                          * RECENTLY_DEAD tuple.
02413                          */
02414                         indexIt = true;
02415                     }
02416                     /* In any case, exclude the tuple from unique-checking */
02417                     tupleIsAlive = false;
02418                     break;
02419                 default:
02420                     elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
02421                     indexIt = tupleIsAlive = false;     /* keep compiler quiet */
02422                     break;
02423             }
02424 
02425             LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
02426 
02427             if (!indexIt)
02428                 continue;
02429         }
02430         else
02431         {
02432             /* heap_getnext did the time qual check */
02433             tupleIsAlive = true;
02434         }
02435 
02436         reltuples += 1;
02437 
02438         MemoryContextReset(econtext->ecxt_per_tuple_memory);
02439 
02440         /* Set up for predicate or expression evaluation */
02441         ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
02442 
02443         /*
02444          * In a partial index, discard tuples that don't satisfy the
02445          * predicate.
02446          */
02447         if (predicate != NIL)
02448         {
02449             if (!ExecQual(predicate, econtext, false))
02450                 continue;
02451         }
02452 
02453         /*
02454          * For the current heap tuple, extract all the attributes we use in
02455          * this index, and note which are null.  This also performs evaluation
02456          * of any expressions needed.
02457          */
02458         FormIndexDatum(indexInfo,
02459                        slot,
02460                        estate,
02461                        values,
02462                        isnull);
02463 
02464         /*
02465          * You'd think we should go ahead and build the index tuple here, but
02466          * some index AMs want to do further processing on the data first.  So
02467          * pass the values[] and isnull[] arrays, instead.
02468          */
02469 
02470         if (HeapTupleIsHeapOnly(heapTuple))
02471         {
02472             /*
02473              * For a heap-only tuple, pretend its TID is that of the root. See
02474              * src/backend/access/heap/README.HOT for discussion.
02475              */
02476             HeapTupleData rootTuple;
02477             OffsetNumber offnum;
02478 
02479             rootTuple = *heapTuple;
02480             offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
02481 
02482             Assert(OffsetNumberIsValid(root_offsets[offnum - 1]));
02483 
02484             ItemPointerSetOffsetNumber(&rootTuple.t_self,
02485                                        root_offsets[offnum - 1]);
02486 
02487             /* Call the AM's callback routine to process the tuple */
02488             callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
02489                      callback_state);
02490         }
02491         else
02492         {
02493             /* Call the AM's callback routine to process the tuple */
02494             callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
02495                      callback_state);
02496         }
02497     }
02498 
02499     heap_endscan(scan);
02500 
02501     /* we can now forget our snapshot, if set */
02502     if (indexInfo->ii_Concurrent)
02503         UnregisterSnapshot(snapshot);
02504 
02505     ExecDropSingleTupleTableSlot(slot);
02506 
02507     FreeExecutorState(estate);
02508 
02509     /* These may have been pointing to the now-gone estate */
02510     indexInfo->ii_ExpressionsState = NIL;
02511     indexInfo->ii_PredicateState = NIL;
02512 
02513     return reltuples;
02514 }
02515 
02516 
02517 /*
02518  * IndexCheckExclusion - verify that a new exclusion constraint is satisfied
02519  *
02520  * When creating an exclusion constraint, we first build the index normally
02521  * and then rescan the heap to check for conflicts.  We assume that we only
02522  * need to validate tuples that are live according to SnapshotNow, and that
02523  * these were correctly indexed even in the presence of broken HOT chains.
02524  * This should be OK since we are holding at least ShareLock on the table,
02525  * meaning there can be no uncommitted updates from other transactions.
02526  * (Note: that wouldn't necessarily work for system catalogs, since many
02527  * operations release write lock early on the system catalogs.)
02528  */
02529 static void
02530 IndexCheckExclusion(Relation heapRelation,
02531                     Relation indexRelation,
02532                     IndexInfo *indexInfo)
02533 {
02534     HeapScanDesc scan;
02535     HeapTuple   heapTuple;
02536     Datum       values[INDEX_MAX_KEYS];
02537     bool        isnull[INDEX_MAX_KEYS];
02538     List       *predicate;
02539     TupleTableSlot *slot;
02540     EState     *estate;
02541     ExprContext *econtext;
02542 
02543     /*
02544      * If we are reindexing the target index, mark it as no longer being
02545      * reindexed, to forestall an Assert in index_beginscan when we try to use
02546      * the index for probes.  This is OK because the index is now fully valid.
02547      */
02548     if (ReindexIsCurrentlyProcessingIndex(RelationGetRelid(indexRelation)))
02549         ResetReindexProcessing();
02550 
02551     /*
02552      * Need an EState for evaluation of index expressions and partial-index
02553      * predicates.  Also a slot to hold the current tuple.
02554      */
02555     estate = CreateExecutorState();
02556     econtext = GetPerTupleExprContext(estate);
02557     slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
02558 
02559     /* Arrange for econtext's scan tuple to be the tuple under test */
02560     econtext->ecxt_scantuple = slot;
02561 
02562     /* Set up execution state for predicate, if any. */
02563     predicate = (List *)
02564         ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
02565                         estate);
02566 
02567     /*
02568      * Scan all live tuples in the base relation.
02569      */
02570     scan = heap_beginscan_strat(heapRelation,   /* relation */
02571                                 SnapshotNow,    /* snapshot */
02572                                 0,      /* number of keys */
02573                                 NULL,   /* scan key */
02574                                 true,   /* buffer access strategy OK */
02575                                 true);  /* syncscan OK */
02576 
02577     while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
02578     {
02579         CHECK_FOR_INTERRUPTS();
02580 
02581         MemoryContextReset(econtext->ecxt_per_tuple_memory);
02582 
02583         /* Set up for predicate or expression evaluation */
02584         ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
02585 
02586         /*
02587          * In a partial index, ignore tuples that don't satisfy the predicate.
02588          */
02589         if (predicate != NIL)
02590         {
02591             if (!ExecQual(predicate, econtext, false))
02592                 continue;
02593         }
02594 
02595         /*
02596          * Extract index column values, including computing expressions.
02597          */
02598         FormIndexDatum(indexInfo,
02599                        slot,
02600                        estate,
02601                        values,
02602                        isnull);
02603 
02604         /*
02605          * Check that this tuple has no conflicts.
02606          */
02607         check_exclusion_constraint(heapRelation,
02608                                    indexRelation, indexInfo,
02609                                    &(heapTuple->t_self), values, isnull,
02610                                    estate, true, false);
02611     }
02612 
02613     heap_endscan(scan);
02614 
02615     ExecDropSingleTupleTableSlot(slot);
02616 
02617     FreeExecutorState(estate);
02618 
02619     /* These may have been pointing to the now-gone estate */
02620     indexInfo->ii_ExpressionsState = NIL;
02621     indexInfo->ii_PredicateState = NIL;
02622 }
02623 
02624 
02625 /*
02626  * validate_index - support code for concurrent index builds
02627  *
02628  * We do a concurrent index build by first inserting the catalog entry for the
02629  * index via index_create(), marking it not indisready and not indisvalid.
02630  * Then we commit our transaction and start a new one, then we wait for all
02631  * transactions that could have been modifying the table to terminate.  Now
02632  * we know that any subsequently-started transactions will see the index and
02633  * honor its constraints on HOT updates; so while existing HOT-chains might
02634  * be broken with respect to the index, no currently live tuple will have an
02635  * incompatible HOT update done to it.  We now build the index normally via
02636  * index_build(), while holding a weak lock that allows concurrent
02637  * insert/update/delete.  Also, we index only tuples that are valid
02638  * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
02639  * build takes care to include recently-dead tuples.  This is OK because
02640  * we won't mark the index valid until all transactions that might be able
02641  * to see those tuples are gone.  The reason for doing that is to avoid
02642  * bogus unique-index failures due to concurrent UPDATEs (we might see
02643  * different versions of the same row as being valid when we pass over them,
02644  * if we used HeapTupleSatisfiesVacuum).  This leaves us with an index that
02645  * does not contain any tuples added to the table while we built the index.
02646  *
02647  * Next, we mark the index "indisready" (but still not "indisvalid") and
02648  * commit the second transaction and start a third.  Again we wait for all
02649  * transactions that could have been modifying the table to terminate.  Now
02650  * we know that any subsequently-started transactions will see the index and
02651  * insert their new tuples into it.  We then take a new reference snapshot
02652  * which is passed to validate_index().  Any tuples that are valid according
02653  * to this snap, but are not in the index, must be added to the index.
02654  * (Any tuples committed live after the snap will be inserted into the
02655  * index by their originating transaction.  Any tuples committed dead before
02656  * the snap need not be indexed, because we will wait out all transactions
02657  * that might care about them before we mark the index valid.)
02658  *
02659  * validate_index() works by first gathering all the TIDs currently in the
02660  * index, using a bulkdelete callback that just stores the TIDs and doesn't
02661  * ever say "delete it".  (This should be faster than a plain indexscan;
02662  * also, not all index AMs support full-index indexscan.)  Then we sort the
02663  * TIDs, and finally scan the table doing a "merge join" against the TID list
02664  * to see which tuples are missing from the index.  Thus we will ensure that
02665  * all tuples valid according to the reference snapshot are in the index.
02666  *
02667  * Building a unique index this way is tricky: we might try to insert a
02668  * tuple that is already dead or is in process of being deleted, and we
02669  * mustn't have a uniqueness failure against an updated version of the same
02670  * row.  We could try to check the tuple to see if it's already dead and tell
02671  * index_insert() not to do the uniqueness check, but that still leaves us
02672  * with a race condition against an in-progress update.  To handle that,
02673  * we expect the index AM to recheck liveness of the to-be-inserted tuple
02674  * before it declares a uniqueness error.
02675  *
02676  * After completing validate_index(), we wait until all transactions that
02677  * were alive at the time of the reference snapshot are gone; this is
02678  * necessary to be sure there are none left with a transaction snapshot
02679  * older than the reference (and hence possibly able to see tuples we did
02680  * not index).  Then we mark the index "indisvalid" and commit.  Subsequent
02681  * transactions will be able to use it for queries.
02682  *
02683  * Doing two full table scans is a brute-force strategy.  We could try to be
02684  * cleverer, eg storing new tuples in a special area of the table (perhaps
02685  * making the table append-only by setting use_fsm).  However that would
02686  * add yet more locking issues.
02687  */
02688 void
02689 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
02690 {
02691     Relation    heapRelation,
02692                 indexRelation;
02693     IndexInfo  *indexInfo;
02694     IndexVacuumInfo ivinfo;
02695     v_i_state   state;
02696     Oid         save_userid;
02697     int         save_sec_context;
02698     int         save_nestlevel;
02699 
02700     /* Open and lock the parent heap relation */
02701     heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
02702     /* And the target index relation */
02703     indexRelation = index_open(indexId, RowExclusiveLock);
02704 
02705     /*
02706      * Fetch info needed for index_insert.  (You might think this should be
02707      * passed in from DefineIndex, but its copy is long gone due to having
02708      * been built in a previous transaction.)
02709      */
02710     indexInfo = BuildIndexInfo(indexRelation);
02711 
02712     /* mark build is concurrent just for consistency */
02713     indexInfo->ii_Concurrent = true;
02714 
02715     /*
02716      * Switch to the table owner's userid, so that any index functions are run
02717      * as that user.  Also lock down security-restricted operations and
02718      * arrange to make GUC variable changes local to this command.
02719      */
02720     GetUserIdAndSecContext(&save_userid, &save_sec_context);
02721     SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
02722                            save_sec_context | SECURITY_RESTRICTED_OPERATION);
02723     save_nestlevel = NewGUCNestLevel();
02724 
02725     /*
02726      * Scan the index and gather up all the TIDs into a tuplesort object.
02727      */
02728     ivinfo.index = indexRelation;
02729     ivinfo.analyze_only = false;
02730     ivinfo.estimated_count = true;
02731     ivinfo.message_level = DEBUG2;
02732     ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
02733     ivinfo.strategy = NULL;
02734 
02735     state.tuplesort = tuplesort_begin_datum(TIDOID, TIDLessOperator,
02736                                             InvalidOid, false,
02737                                             maintenance_work_mem,
02738                                             false);
02739     state.htups = state.itups = state.tups_inserted = 0;
02740 
02741     (void) index_bulk_delete(&ivinfo, NULL,
02742                              validate_index_callback, (void *) &state);
02743 
02744     /* Execute the sort */
02745     tuplesort_performsort(state.tuplesort);
02746 
02747     /*
02748      * Now scan the heap and "merge" it with the index
02749      */
02750     validate_index_heapscan(heapRelation,
02751                             indexRelation,
02752                             indexInfo,
02753                             snapshot,
02754                             &state);
02755 
02756     /* Done with tuplesort object */
02757     tuplesort_end(state.tuplesort);
02758 
02759     elog(DEBUG2,
02760          "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
02761          state.htups, state.itups, state.tups_inserted);
02762 
02763     /* Roll back any GUC changes executed by index functions */
02764     AtEOXact_GUC(false, save_nestlevel);
02765 
02766     /* Restore userid and security context */
02767     SetUserIdAndSecContext(save_userid, save_sec_context);
02768 
02769     /* Close rels, but keep locks */
02770     index_close(indexRelation, NoLock);
02771     heap_close(heapRelation, NoLock);
02772 }
02773 
02774 /*
02775  * validate_index_callback - bulkdelete callback to collect the index TIDs
02776  */
02777 static bool
02778 validate_index_callback(ItemPointer itemptr, void *opaque)
02779 {
02780     v_i_state  *state = (v_i_state *) opaque;
02781 
02782     tuplesort_putdatum(state->tuplesort, PointerGetDatum(itemptr), false);
02783     state->itups += 1;
02784     return false;               /* never actually delete anything */
02785 }
02786 
02787 /*
02788  * validate_index_heapscan - second table scan for concurrent index build
02789  *
02790  * This has much code in common with IndexBuildHeapScan, but it's enough
02791  * different that it seems cleaner to have two routines not one.
02792  */
02793 static void
02794 validate_index_heapscan(Relation heapRelation,
02795                         Relation indexRelation,
02796                         IndexInfo *indexInfo,
02797                         Snapshot snapshot,
02798                         v_i_state *state)
02799 {
02800     HeapScanDesc scan;
02801     HeapTuple   heapTuple;
02802     Datum       values[INDEX_MAX_KEYS];
02803     bool        isnull[INDEX_MAX_KEYS];
02804     List       *predicate;
02805     TupleTableSlot *slot;
02806     EState     *estate;
02807     ExprContext *econtext;
02808     BlockNumber root_blkno = InvalidBlockNumber;
02809     OffsetNumber root_offsets[MaxHeapTuplesPerPage];
02810     bool        in_index[MaxHeapTuplesPerPage];
02811 
02812     /* state variables for the merge */
02813     ItemPointer indexcursor = NULL;
02814     bool        tuplesort_empty = false;
02815 
02816     /*
02817      * sanity checks
02818      */
02819     Assert(OidIsValid(indexRelation->rd_rel->relam));
02820 
02821     /*
02822      * Need an EState for evaluation of index expressions and partial-index
02823      * predicates.  Also a slot to hold the current tuple.
02824      */
02825     estate = CreateExecutorState();
02826     econtext = GetPerTupleExprContext(estate);
02827     slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
02828 
02829     /* Arrange for econtext's scan tuple to be the tuple under test */
02830     econtext->ecxt_scantuple = slot;
02831 
02832     /* Set up execution state for predicate, if any. */
02833     predicate = (List *)
02834         ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
02835                         estate);
02836 
02837     /*
02838      * Prepare for scan of the base relation.  We need just those tuples
02839      * satisfying the passed-in reference snapshot.  We must disable syncscan
02840      * here, because it's critical that we read from block zero forward to
02841      * match the sorted TIDs.
02842      */
02843     scan = heap_beginscan_strat(heapRelation,   /* relation */
02844                                 snapshot,       /* snapshot */
02845                                 0,      /* number of keys */
02846                                 NULL,   /* scan key */
02847                                 true,   /* buffer access strategy OK */
02848                                 false); /* syncscan not OK */
02849 
02850     /*
02851      * Scan all tuples matching the snapshot.
02852      */
02853     while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
02854     {
02855         ItemPointer heapcursor = &heapTuple->t_self;
02856         ItemPointerData rootTuple;
02857         OffsetNumber root_offnum;
02858 
02859         CHECK_FOR_INTERRUPTS();
02860 
02861         state->htups += 1;
02862 
02863         /*
02864          * As commented in IndexBuildHeapScan, we should index heap-only
02865          * tuples under the TIDs of their root tuples; so when we advance onto
02866          * a new heap page, build a map of root item offsets on the page.
02867          *
02868          * This complicates merging against the tuplesort output: we will
02869          * visit the live tuples in order by their offsets, but the root
02870          * offsets that we need to compare against the index contents might be
02871          * ordered differently.  So we might have to "look back" within the
02872          * tuplesort output, but only within the current page.  We handle that
02873          * by keeping a bool array in_index[] showing all the
02874          * already-passed-over tuplesort output TIDs of the current page. We
02875          * clear that array here, when advancing onto a new heap page.
02876          */
02877         if (scan->rs_cblock != root_blkno)
02878         {
02879             Page        page = BufferGetPage(scan->rs_cbuf);
02880 
02881             LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
02882             heap_get_root_tuples(page, root_offsets);
02883             LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
02884 
02885             memset(in_index, 0, sizeof(in_index));
02886 
02887             root_blkno = scan->rs_cblock;
02888         }
02889 
02890         /* Convert actual tuple TID to root TID */
02891         rootTuple = *heapcursor;
02892         root_offnum = ItemPointerGetOffsetNumber(heapcursor);
02893 
02894         if (HeapTupleIsHeapOnly(heapTuple))
02895         {
02896             root_offnum = root_offsets[root_offnum - 1];
02897             Assert(OffsetNumberIsValid(root_offnum));
02898             ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
02899         }
02900 
02901         /*
02902          * "merge" by skipping through the index tuples until we find or pass
02903          * the current root tuple.
02904          */
02905         while (!tuplesort_empty &&
02906                (!indexcursor ||
02907                 ItemPointerCompare(indexcursor, &rootTuple) < 0))
02908         {
02909             Datum       ts_val;
02910             bool        ts_isnull;
02911 
02912             if (indexcursor)
02913             {
02914                 /*
02915                  * Remember index items seen earlier on the current heap page
02916                  */
02917                 if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
02918                     in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
02919                 pfree(indexcursor);
02920             }
02921 
02922             tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
02923                                                   &ts_val, &ts_isnull);
02924             Assert(tuplesort_empty || !ts_isnull);
02925             indexcursor = (ItemPointer) DatumGetPointer(ts_val);
02926         }
02927 
02928         /*
02929          * If the tuplesort has overshot *and* we didn't see a match earlier,
02930          * then this tuple is missing from the index, so insert it.
02931          */
02932         if ((tuplesort_empty ||
02933              ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
02934             !in_index[root_offnum - 1])
02935         {
02936             MemoryContextReset(econtext->ecxt_per_tuple_memory);
02937 
02938             /* Set up for predicate or expression evaluation */
02939             ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
02940 
02941             /*
02942              * In a partial index, discard tuples that don't satisfy the
02943              * predicate.
02944              */
02945             if (predicate != NIL)
02946             {
02947                 if (!ExecQual(predicate, econtext, false))
02948                     continue;
02949             }
02950 
02951             /*
02952              * For the current heap tuple, extract all the attributes we use
02953              * in this index, and note which are null.  This also performs
02954              * evaluation of any expressions needed.
02955              */
02956             FormIndexDatum(indexInfo,
02957                            slot,
02958                            estate,
02959                            values,
02960                            isnull);
02961 
02962             /*
02963              * You'd think we should go ahead and build the index tuple here,
02964              * but some index AMs want to do further processing on the data
02965              * first. So pass the values[] and isnull[] arrays, instead.
02966              */
02967 
02968             /*
02969              * If the tuple is already committed dead, you might think we
02970              * could suppress uniqueness checking, but this is no longer true
02971              * in the presence of HOT, because the insert is actually a proxy
02972              * for a uniqueness check on the whole HOT-chain.  That is, the
02973              * tuple we have here could be dead because it was already
02974              * HOT-updated, and if so the updating transaction will not have
02975              * thought it should insert index entries.  The index AM will
02976              * check the whole HOT-chain and correctly detect a conflict if
02977              * there is one.
02978              */
02979 
02980             index_insert(indexRelation,
02981                          values,
02982                          isnull,
02983                          &rootTuple,
02984                          heapRelation,
02985                          indexInfo->ii_Unique ?
02986                          UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
02987 
02988             state->tups_inserted += 1;
02989         }
02990     }
02991 
02992     heap_endscan(scan);
02993 
02994     ExecDropSingleTupleTableSlot(slot);
02995 
02996     FreeExecutorState(estate);
02997 
02998     /* These may have been pointing to the now-gone estate */
02999     indexInfo->ii_ExpressionsState = NIL;
03000     indexInfo->ii_PredicateState = NIL;
03001 }
03002 
03003 
03004 /*
03005  * index_set_state_flags - adjust pg_index state flags
03006  *
03007  * This is used during CREATE/DROP INDEX CONCURRENTLY to adjust the pg_index
03008  * flags that denote the index's state.  We must use an in-place update of
03009  * the pg_index tuple, because we do not have exclusive lock on the parent
03010  * table and so other sessions might concurrently be doing SnapshotNow scans
03011  * of pg_index to identify the table's indexes.  A transactional update would
03012  * risk somebody not seeing the index at all.  Because the update is not
03013  * transactional and will not roll back on error, this must only be used as
03014  * the last step in a transaction that has not made any transactional catalog
03015  * updates!
03016  *
03017  * Note that heap_inplace_update does send a cache inval message for the
03018  * tuple, so other sessions will hear about the update as soon as we commit.
03019  */
03020 void
03021 index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
03022 {
03023     Relation    pg_index;
03024     HeapTuple   indexTuple;
03025     Form_pg_index indexForm;
03026 
03027     /* Assert that current xact hasn't done any transactional updates */
03028     Assert(GetTopTransactionIdIfAny() == InvalidTransactionId);
03029 
03030     /* Open pg_index and fetch a writable copy of the index's tuple */
03031     pg_index = heap_open(IndexRelationId, RowExclusiveLock);
03032 
03033     indexTuple = SearchSysCacheCopy1(INDEXRELID,
03034                                      ObjectIdGetDatum(indexId));
03035     if (!HeapTupleIsValid(indexTuple))
03036         elog(ERROR, "cache lookup failed for index %u", indexId);
03037     indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
03038 
03039     /* Perform the requested state change on the copy */
03040     switch (action)
03041     {
03042         case INDEX_CREATE_SET_READY:
03043             /* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
03044             Assert(indexForm->indislive);
03045             Assert(!indexForm->indisready);
03046             Assert(!indexForm->indisvalid);
03047             indexForm->indisready = true;
03048             break;
03049         case INDEX_CREATE_SET_VALID:
03050             /* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
03051             Assert(indexForm->indislive);
03052             Assert(indexForm->indisready);
03053             Assert(!indexForm->indisvalid);
03054             indexForm->indisvalid = true;
03055             break;
03056         case INDEX_DROP_CLEAR_VALID:
03057 
03058             /*
03059              * Clear indisvalid during a DROP INDEX CONCURRENTLY sequence
03060              *
03061              * If indisready == true we leave it set so the index still gets
03062              * maintained by active transactions.  We only need to ensure that
03063              * indisvalid is false.  (We don't assert that either is initially
03064              * true, though, since we want to be able to retry a DROP INDEX
03065              * CONCURRENTLY that failed partway through.)
03066              *
03067              * Note: the CLUSTER logic assumes that indisclustered cannot be
03068              * set on any invalid index, so clear that flag too.
03069              */
03070             indexForm->indisvalid = false;
03071             indexForm->indisclustered = false;
03072             break;
03073         case INDEX_DROP_SET_DEAD:
03074 
03075             /*
03076              * Clear indisready/indislive during DROP INDEX CONCURRENTLY
03077              *
03078              * We clear both indisready and indislive, because we not only
03079              * want to stop updates, we want to prevent sessions from touching
03080              * the index at all.
03081              */
03082             Assert(!indexForm->indisvalid);
03083             indexForm->indisready = false;
03084             indexForm->indislive = false;
03085             break;
03086     }
03087 
03088     /* ... and write it back in-place */
03089     heap_inplace_update(pg_index, indexTuple);
03090 
03091     heap_close(pg_index, RowExclusiveLock);
03092 }
03093 
03094 
03095 /*
03096  * IndexGetRelation: given an index's relation OID, get the OID of the
03097  * relation it is an index on.  Uses the system cache.
03098  */
03099 Oid
03100 IndexGetRelation(Oid indexId, bool missing_ok)
03101 {
03102     HeapTuple   tuple;
03103     Form_pg_index index;
03104     Oid         result;
03105 
03106     tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
03107     if (!HeapTupleIsValid(tuple))
03108     {
03109         if (missing_ok)
03110             return InvalidOid;
03111         elog(ERROR, "cache lookup failed for index %u", indexId);
03112     }
03113     index = (Form_pg_index) GETSTRUCT(tuple);
03114     Assert(index->indexrelid == indexId);
03115 
03116     result = index->indrelid;
03117     ReleaseSysCache(tuple);
03118     return result;
03119 }
03120 
03121 /*
03122  * reindex_index - This routine is used to recreate a single index
03123  */
03124 void
03125 reindex_index(Oid indexId, bool skip_constraint_checks)
03126 {
03127     Relation    iRel,
03128                 heapRelation;
03129     Oid         heapId;
03130     IndexInfo  *indexInfo;
03131     volatile bool skipped_constraint = false;
03132 
03133     /*
03134      * Open and lock the parent heap relation.  ShareLock is sufficient since
03135      * we only need to be sure no schema or data changes are going on.
03136      */
03137     heapId = IndexGetRelation(indexId, false);
03138     heapRelation = heap_open(heapId, ShareLock);
03139 
03140     /*
03141      * Open the target index relation and get an exclusive lock on it, to
03142      * ensure that no one else is touching this particular index.
03143      */
03144     iRel = index_open(indexId, AccessExclusiveLock);
03145 
03146     /*
03147      * Don't allow reindex on temp tables of other backends ... their local
03148      * buffer manager is not going to cope.
03149      */
03150     if (RELATION_IS_OTHER_TEMP(iRel))
03151         ereport(ERROR,
03152                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
03153                errmsg("cannot reindex temporary tables of other sessions")));
03154 
03155     /*
03156      * Also check for active uses of the index in the current transaction; we
03157      * don't want to reindex underneath an open indexscan.
03158      */
03159     CheckTableNotInUse(iRel, "REINDEX INDEX");
03160 
03161     /*
03162      * All predicate locks on the index are about to be made invalid. Promote
03163      * them to relation locks on the heap.
03164      */
03165     TransferPredicateLocksToHeapRelation(iRel);
03166 
03167     PG_TRY();
03168     {
03169         /* Suppress use of the target index while rebuilding it */
03170         SetReindexProcessing(heapId, indexId);
03171 
03172         /* Fetch info needed for index_build */
03173         indexInfo = BuildIndexInfo(iRel);
03174 
03175         /* If requested, skip checking uniqueness/exclusion constraints */
03176         if (skip_constraint_checks)
03177         {
03178             if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
03179                 skipped_constraint = true;
03180             indexInfo->ii_Unique = false;
03181             indexInfo->ii_ExclusionOps = NULL;
03182             indexInfo->ii_ExclusionProcs = NULL;
03183             indexInfo->ii_ExclusionStrats = NULL;
03184         }
03185 
03186         /* We'll build a new physical relation for the index */
03187         RelationSetNewRelfilenode(iRel, InvalidTransactionId,
03188                                   InvalidMultiXactId);
03189 
03190         /* Initialize the index and rebuild */
03191         /* Note: we do not need to re-establish pkey setting */
03192         index_build(heapRelation, iRel, indexInfo, false, true);
03193     }
03194     PG_CATCH();
03195     {
03196         /* Make sure flag gets cleared on error exit */
03197         ResetReindexProcessing();
03198         PG_RE_THROW();
03199     }
03200     PG_END_TRY();
03201     ResetReindexProcessing();
03202 
03203     /*
03204      * If the index is marked invalid/not-ready/dead (ie, it's from a failed
03205      * CREATE INDEX CONCURRENTLY, or a DROP INDEX CONCURRENTLY failed midway),
03206      * and we didn't skip a uniqueness check, we can now mark it valid.  This
03207      * allows REINDEX to be used to clean up in such cases.
03208      *
03209      * We can also reset indcheckxmin, because we have now done a
03210      * non-concurrent index build, *except* in the case where index_build
03211      * found some still-broken HOT chains. If it did, and we don't have to
03212      * change any of the other flags, we just leave indcheckxmin alone (note
03213      * that index_build won't have changed it, because this is a reindex).
03214      * This is okay and desirable because not updating the tuple leaves the
03215      * index's usability horizon (recorded as the tuple's xmin value) the same
03216      * as it was.
03217      *
03218      * But, if the index was invalid/not-ready/dead and there were broken HOT
03219      * chains, we had better force indcheckxmin true, because the normal
03220      * argument that the HOT chains couldn't conflict with the index is
03221      * suspect for an invalid index.  (A conflict is definitely possible if
03222      * the index was dead.  It probably shouldn't happen otherwise, but let's
03223      * be conservative.)  In this case advancing the usability horizon is
03224      * appropriate.
03225      *
03226      * Note that if we have to update the tuple, there is a risk of concurrent
03227      * transactions not seeing it during their SnapshotNow scans of pg_index.
03228      * While not especially desirable, this is safe because no such
03229      * transaction could be trying to update the table (since we have
03230      * ShareLock on it).  The worst case is that someone might transiently
03231      * fail to use the index for a query --- but it was probably unusable
03232      * before anyway, if we are updating the tuple.
03233      *
03234      * Another reason for avoiding unnecessary updates here is that while
03235      * reindexing pg_index itself, we must not try to update tuples in it.
03236      * pg_index's indexes should always have these flags in their clean state,
03237      * so that won't happen.
03238      */
03239     if (!skipped_constraint)
03240     {
03241         Relation    pg_index;
03242         HeapTuple   indexTuple;
03243         Form_pg_index indexForm;
03244         bool        index_bad;
03245 
03246         pg_index = heap_open(IndexRelationId, RowExclusiveLock);
03247 
03248         indexTuple = SearchSysCacheCopy1(INDEXRELID,
03249                                          ObjectIdGetDatum(indexId));
03250         if (!HeapTupleIsValid(indexTuple))
03251             elog(ERROR, "cache lookup failed for index %u", indexId);
03252         indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
03253 
03254         index_bad = (!indexForm->indisvalid ||
03255                      !indexForm->indisready ||
03256                      !indexForm->indislive);
03257         if (index_bad ||
03258             (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain))
03259         {
03260             if (!indexInfo->ii_BrokenHotChain)
03261                 indexForm->indcheckxmin = false;
03262             else if (index_bad)
03263                 indexForm->indcheckxmin = true;
03264             indexForm->indisvalid = true;
03265             indexForm->indisready = true;
03266             indexForm->indislive = true;
03267             simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
03268             CatalogUpdateIndexes(pg_index, indexTuple);
03269 
03270             /*
03271              * Invalidate the relcache for the table, so that after we commit
03272              * all sessions will refresh the table's index list.  This ensures
03273              * that if anyone misses seeing the pg_index row during this
03274              * update, they'll refresh their list before attempting any update
03275              * on the table.
03276              */
03277             CacheInvalidateRelcache(heapRelation);
03278         }
03279 
03280         heap_close(pg_index, RowExclusiveLock);
03281     }
03282 
03283     /* Close rels, but keep locks */
03284     index_close(iRel, NoLock);
03285     heap_close(heapRelation, NoLock);
03286 }
03287 
03288 /*
03289  * reindex_relation - This routine is used to recreate all indexes
03290  * of a relation (and optionally its toast relation too, if any).
03291  *
03292  * "flags" is a bitmask that can include any combination of these bits:
03293  *
03294  * REINDEX_REL_PROCESS_TOAST: if true, process the toast table too (if any).
03295  *
03296  * REINDEX_REL_SUPPRESS_INDEX_USE: if true, the relation was just completely
03297  * rebuilt by an operation such as VACUUM FULL or CLUSTER, and therefore its
03298  * indexes are inconsistent with it.  This makes things tricky if the relation
03299  * is a system catalog that we might consult during the reindexing.  To deal
03300  * with that case, we mark all of the indexes as pending rebuild so that they
03301  * won't be trusted until rebuilt.  The caller is required to call us *without*
03302  * having made the rebuilt table visible by doing CommandCounterIncrement;
03303  * we'll do CCI after having collected the index list.  (This way we can still
03304  * use catalog indexes while collecting the list.)
03305  *
03306  * REINDEX_REL_CHECK_CONSTRAINTS: if true, recheck unique and exclusion
03307  * constraint conditions, else don't.  To avoid deadlocks, VACUUM FULL or
03308  * CLUSTER on a system catalog must omit this flag.  REINDEX should be used to
03309  * rebuild an index if constraint inconsistency is suspected.  For optimal
03310  * performance, other callers should include the flag only after transforming
03311  * the data in a manner that risks a change in constraint validity.
03312  *
03313  * Returns true if any indexes were rebuilt (including toast table's index
03314  * when relevant).  Note that a CommandCounterIncrement will occur after each
03315  * index rebuild.
03316  */
03317 bool
03318 reindex_relation(Oid relid, int flags)
03319 {
03320     Relation    rel;
03321     Oid         toast_relid;
03322     List       *indexIds;
03323     bool        is_pg_class;
03324     bool        result;
03325 
03326     /*
03327      * Open and lock the relation.  ShareLock is sufficient since we only need
03328      * to prevent schema and data changes in it.  The lock level used here
03329      * should match ReindexTable().
03330      */
03331     rel = heap_open(relid, ShareLock);
03332 
03333     toast_relid = rel->rd_rel->reltoastrelid;
03334 
03335     /*
03336      * Get the list of index OIDs for this relation.  (We trust to the
03337      * relcache to get this with a sequential scan if ignoring system
03338      * indexes.)
03339      */
03340     indexIds = RelationGetIndexList(rel);
03341 
03342     /*
03343      * reindex_index will attempt to update the pg_class rows for the relation
03344      * and index.  If we are processing pg_class itself, we want to make sure
03345      * that the updates do not try to insert index entries into indexes we
03346      * have not processed yet.  (When we are trying to recover from corrupted
03347      * indexes, that could easily cause a crash.) We can accomplish this
03348      * because CatalogUpdateIndexes will use the relcache's index list to know
03349      * which indexes to update. We just force the index list to be only the
03350      * stuff we've processed.
03351      *
03352      * It is okay to not insert entries into the indexes we have not processed
03353      * yet because all of this is transaction-safe.  If we fail partway
03354      * through, the updated rows are dead and it doesn't matter whether they
03355      * have index entries.  Also, a new pg_class index will be created with a
03356      * correct entry for its own pg_class row because we do
03357      * RelationSetNewRelfilenode() before we do index_build().
03358      *
03359      * Note that we also clear pg_class's rd_oidindex until the loop is done,
03360      * so that that index can't be accessed either.  This means we cannot
03361      * safely generate new relation OIDs while in the loop; shouldn't be a
03362      * problem.
03363      */
03364     is_pg_class = (RelationGetRelid(rel) == RelationRelationId);
03365 
03366     /* Ensure rd_indexattr is valid; see comments for RelationSetIndexList */
03367     if (is_pg_class)
03368         (void) RelationGetIndexAttrBitmap(rel, false);
03369 
03370     PG_TRY();
03371     {
03372         List       *doneIndexes;
03373         ListCell   *indexId;
03374 
03375         if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
03376         {
03377             /* Suppress use of all the indexes until they are rebuilt */
03378             SetReindexPending(indexIds);
03379 
03380             /*
03381              * Make the new heap contents visible --- now things might be
03382              * inconsistent!
03383              */
03384             CommandCounterIncrement();
03385         }
03386 
03387         /* Reindex all the indexes. */
03388         doneIndexes = NIL;
03389         foreach(indexId, indexIds)
03390         {
03391             Oid         indexOid = lfirst_oid(indexId);
03392 
03393             if (is_pg_class)
03394                 RelationSetIndexList(rel, doneIndexes, InvalidOid);
03395 
03396             reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS));
03397 
03398             CommandCounterIncrement();
03399 
03400             /* Index should no longer be in the pending list */
03401             Assert(!ReindexIsProcessingIndex(indexOid));
03402 
03403             if (is_pg_class)
03404                 doneIndexes = lappend_oid(doneIndexes, indexOid);
03405         }
03406     }
03407     PG_CATCH();
03408     {
03409         /* Make sure list gets cleared on error exit */
03410         ResetReindexPending();
03411         PG_RE_THROW();
03412     }
03413     PG_END_TRY();
03414     ResetReindexPending();
03415 
03416     if (is_pg_class)
03417         RelationSetIndexList(rel, indexIds, ClassOidIndexId);
03418 
03419     /*
03420      * Close rel, but continue to hold the lock.
03421      */
03422     heap_close(rel, NoLock);
03423 
03424     result = (indexIds != NIL);
03425 
03426     /*
03427      * If the relation has a secondary toast rel, reindex that too while we
03428      * still hold the lock on the master table.
03429      */
03430     if ((flags & REINDEX_REL_PROCESS_TOAST) && OidIsValid(toast_relid))
03431         result |= reindex_relation(toast_relid, flags);
03432 
03433     return result;
03434 }
03435 
03436 
03437 /* ----------------------------------------------------------------
03438  *      System index reindexing support
03439  *
03440  * When we are busy reindexing a system index, this code provides support
03441  * for preventing catalog lookups from using that index.  We also make use
03442  * of this to catch attempted uses of user indexes during reindexing of
03443  * those indexes.
03444  * ----------------------------------------------------------------
03445  */
03446 
03447 static Oid  currentlyReindexedHeap = InvalidOid;
03448 static Oid  currentlyReindexedIndex = InvalidOid;
03449 static List *pendingReindexedIndexes = NIL;
03450 
03451 /*
03452  * ReindexIsProcessingHeap
03453  *      True if heap specified by OID is currently being reindexed.
03454  */
03455 bool
03456 ReindexIsProcessingHeap(Oid heapOid)
03457 {
03458     return heapOid == currentlyReindexedHeap;
03459 }
03460 
03461 /*
03462  * ReindexIsCurrentlyProcessingIndex
03463  *      True if index specified by OID is currently being reindexed.
03464  */
03465 static bool
03466 ReindexIsCurrentlyProcessingIndex(Oid indexOid)
03467 {
03468     return indexOid == currentlyReindexedIndex;
03469 }
03470 
03471 /*
03472  * ReindexIsProcessingIndex
03473  *      True if index specified by OID is currently being reindexed,
03474  *      or should be treated as invalid because it is awaiting reindex.
03475  */
03476 bool
03477 ReindexIsProcessingIndex(Oid indexOid)
03478 {
03479     return indexOid == currentlyReindexedIndex ||
03480         list_member_oid(pendingReindexedIndexes, indexOid);
03481 }
03482 
03483 /*
03484  * SetReindexProcessing
03485  *      Set flag that specified heap/index are being reindexed.
03486  *
03487  * NB: caller must use a PG_TRY block to ensure ResetReindexProcessing is done.
03488  */
03489 static void
03490 SetReindexProcessing(Oid heapOid, Oid indexOid)
03491 {
03492     Assert(OidIsValid(heapOid) && OidIsValid(indexOid));
03493     /* Reindexing is not re-entrant. */
03494     if (OidIsValid(currentlyReindexedHeap))
03495         elog(ERROR, "cannot reindex while reindexing");
03496     currentlyReindexedHeap = heapOid;
03497     currentlyReindexedIndex = indexOid;
03498     /* Index is no longer "pending" reindex. */
03499     RemoveReindexPending(indexOid);
03500 }
03501 
03502 /*
03503  * ResetReindexProcessing
03504  *      Unset reindexing status.
03505  */
03506 static void
03507 ResetReindexProcessing(void)
03508 {
03509     currentlyReindexedHeap = InvalidOid;
03510     currentlyReindexedIndex = InvalidOid;
03511 }
03512 
03513 /*
03514  * SetReindexPending
03515  *      Mark the given indexes as pending reindex.
03516  *
03517  * NB: caller must use a PG_TRY block to ensure ResetReindexPending is done.
03518  * Also, we assume that the current memory context stays valid throughout.
03519  */
03520 static void
03521 SetReindexPending(List *indexes)
03522 {
03523     /* Reindexing is not re-entrant. */
03524     if (pendingReindexedIndexes)
03525         elog(ERROR, "cannot reindex while reindexing");
03526     pendingReindexedIndexes = list_copy(indexes);
03527 }
03528 
03529 /*
03530  * RemoveReindexPending
03531  *      Remove the given index from the pending list.
03532  */
03533 static void
03534 RemoveReindexPending(Oid indexOid)
03535 {
03536     pendingReindexedIndexes = list_delete_oid(pendingReindexedIndexes,
03537                                               indexOid);
03538 }
03539 
03540 /*
03541  * ResetReindexPending
03542  *      Unset reindex-pending status.
03543  */
03544 static void
03545 ResetReindexPending(void)
03546 {
03547     pendingReindexedIndexes = NIL;
03548 }