Header And Logo

PostgreSQL
| The world's most advanced open source database.

btreefuncs.c

Go to the documentation of this file.
00001 /*
00002  * contrib/pageinspect/btreefuncs.c
00003  *
00004  *
00005  * btreefuncs.c
00006  *
00007  * Copyright (c) 2006 Satoshi Nagayasu <[email protected]>
00008  *
00009  * Permission to use, copy, modify, and distribute this software and
00010  * its documentation for any purpose, without fee, and without a
00011  * written agreement is hereby granted, provided that the above
00012  * copyright notice and this paragraph and the following two
00013  * paragraphs appear in all copies.
00014  *
00015  * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
00016  * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
00017  * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
00018  * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
00019  * OF THE POSSIBILITY OF SUCH DAMAGE.
00020  *
00021  * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
00022  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00023  * A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
00024  * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
00025  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
00026  */
00027 
00028 #include "postgres.h"
00029 
00030 #include "access/nbtree.h"
00031 #include "catalog/namespace.h"
00032 #include "funcapi.h"
00033 #include "miscadmin.h"
00034 #include "utils/builtins.h"
00035 #include "utils/rel.h"
00036 
00037 
00038 extern Datum bt_metap(PG_FUNCTION_ARGS);
00039 extern Datum bt_page_items(PG_FUNCTION_ARGS);
00040 extern Datum bt_page_stats(PG_FUNCTION_ARGS);
00041 
00042 PG_FUNCTION_INFO_V1(bt_metap);
00043 PG_FUNCTION_INFO_V1(bt_page_items);
00044 PG_FUNCTION_INFO_V1(bt_page_stats);
00045 
00046 #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
00047 #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
00048 
00049 #define CHECK_PAGE_OFFSET_RANGE(pg, offnum) { \
00050         if ( !(FirstOffsetNumber <= (offnum) && \
00051                         (offnum) <= PageGetMaxOffsetNumber(pg)) ) \
00052              elog(ERROR, "page offset number out of range"); }
00053 
00054 /* note: BlockNumber is unsigned, hence can't be negative */
00055 #define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \
00056         if ( RelationGetNumberOfBlocks(rel) <= (BlockNumber) (blkno) ) \
00057              elog(ERROR, "block number out of range"); }
00058 
00059 /* ------------------------------------------------
00060  * structure for single btree page statistics
00061  * ------------------------------------------------
00062  */
00063 typedef struct BTPageStat
00064 {
00065     uint32      blkno;
00066     uint32      live_items;
00067     uint32      dead_items;
00068     uint32      page_size;
00069     uint32      max_avail;
00070     uint32      free_size;
00071     uint32      avg_item_size;
00072     char        type;
00073 
00074     /* opaque data */
00075     BlockNumber btpo_prev;
00076     BlockNumber btpo_next;
00077     union
00078     {
00079         uint32      level;
00080         TransactionId xact;
00081     }           btpo;
00082     uint16      btpo_flags;
00083     BTCycleId   btpo_cycleid;
00084 } BTPageStat;
00085 
00086 
00087 /* -------------------------------------------------
00088  * GetBTPageStatistics()
00089  *
00090  * Collect statistics of single b-tree page
00091  * -------------------------------------------------
00092  */
00093 static void
00094 GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
00095 {
00096     Page        page = BufferGetPage(buffer);
00097     PageHeader  phdr = (PageHeader) page;
00098     OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
00099     BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
00100     int         item_size = 0;
00101     int         off;
00102 
00103     stat->blkno = blkno;
00104 
00105     stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData);
00106 
00107     stat->dead_items = stat->live_items = 0;
00108 
00109     stat->page_size = PageGetPageSize(page);
00110 
00111     /* page type (flags) */
00112     if (P_ISDELETED(opaque))
00113     {
00114         stat->type = 'd';
00115         stat->btpo.xact = opaque->btpo.xact;
00116         return;
00117     }
00118     else if (P_IGNORE(opaque))
00119         stat->type = 'e';
00120     else if (P_ISLEAF(opaque))
00121         stat->type = 'l';
00122     else if (P_ISROOT(opaque))
00123         stat->type = 'r';
00124     else
00125         stat->type = 'i';
00126 
00127     /* btpage opaque data */
00128     stat->btpo_prev = opaque->btpo_prev;
00129     stat->btpo_next = opaque->btpo_next;
00130     stat->btpo.level = opaque->btpo.level;
00131     stat->btpo_flags = opaque->btpo_flags;
00132     stat->btpo_cycleid = opaque->btpo_cycleid;
00133 
00134     /* count live and dead tuples, and free space */
00135     for (off = FirstOffsetNumber; off <= maxoff; off++)
00136     {
00137         IndexTuple  itup;
00138 
00139         ItemId      id = PageGetItemId(page, off);
00140 
00141         itup = (IndexTuple) PageGetItem(page, id);
00142 
00143         item_size += IndexTupleSize(itup);
00144 
00145         if (!ItemIdIsDead(id))
00146             stat->live_items++;
00147         else
00148             stat->dead_items++;
00149     }
00150     stat->free_size = PageGetFreeSpace(page);
00151 
00152     if ((stat->live_items + stat->dead_items) > 0)
00153         stat->avg_item_size = item_size / (stat->live_items + stat->dead_items);
00154     else
00155         stat->avg_item_size = 0;
00156 }
00157 
00158 /* -----------------------------------------------
00159  * bt_page_stats()
00160  *
00161  * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1);
00162  * -----------------------------------------------
00163  */
00164 Datum
00165 bt_page_stats(PG_FUNCTION_ARGS)
00166 {
00167     text       *relname = PG_GETARG_TEXT_P(0);
00168     uint32      blkno = PG_GETARG_UINT32(1);
00169     Buffer      buffer;
00170     Relation    rel;
00171     RangeVar   *relrv;
00172     Datum       result;
00173     HeapTuple   tuple;
00174     TupleDesc   tupleDesc;
00175     int         j;
00176     char       *values[11];
00177     BTPageStat  stat;
00178 
00179     if (!superuser())
00180         ereport(ERROR,
00181                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
00182                  (errmsg("must be superuser to use pageinspect functions"))));
00183 
00184     relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
00185     rel = relation_openrv(relrv, AccessShareLock);
00186 
00187     if (!IS_INDEX(rel) || !IS_BTREE(rel))
00188         elog(ERROR, "relation \"%s\" is not a btree index",
00189              RelationGetRelationName(rel));
00190 
00191     /*
00192      * Reject attempts to read non-local temporary relations; we would be
00193      * likely to get wrong data since we have no visibility into the owning
00194      * session's local buffers.
00195      */
00196     if (RELATION_IS_OTHER_TEMP(rel))
00197         ereport(ERROR,
00198                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
00199                  errmsg("cannot access temporary tables of other sessions")));
00200 
00201     if (blkno == 0)
00202         elog(ERROR, "block 0 is a meta page");
00203 
00204     CHECK_RELATION_BLOCK_RANGE(rel, blkno);
00205 
00206     buffer = ReadBuffer(rel, blkno);
00207     LockBuffer(buffer, BUFFER_LOCK_SHARE);
00208 
00209     /* keep compiler quiet */
00210     stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
00211     stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
00212 
00213     GetBTPageStatistics(blkno, buffer, &stat);
00214 
00215     UnlockReleaseBuffer(buffer);
00216     relation_close(rel, AccessShareLock);
00217 
00218     /* Build a tuple descriptor for our result type */
00219     if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
00220         elog(ERROR, "return type must be a row type");
00221 
00222     j = 0;
00223     values[j] = palloc(32);
00224     snprintf(values[j++], 32, "%d", stat.blkno);
00225     values[j] = palloc(32);
00226     snprintf(values[j++], 32, "%c", stat.type);
00227     values[j] = palloc(32);
00228     snprintf(values[j++], 32, "%d", stat.live_items);
00229     values[j] = palloc(32);
00230     snprintf(values[j++], 32, "%d", stat.dead_items);
00231     values[j] = palloc(32);
00232     snprintf(values[j++], 32, "%d", stat.avg_item_size);
00233     values[j] = palloc(32);
00234     snprintf(values[j++], 32, "%d", stat.page_size);
00235     values[j] = palloc(32);
00236     snprintf(values[j++], 32, "%d", stat.free_size);
00237     values[j] = palloc(32);
00238     snprintf(values[j++], 32, "%d", stat.btpo_prev);
00239     values[j] = palloc(32);
00240     snprintf(values[j++], 32, "%d", stat.btpo_next);
00241     values[j] = palloc(32);
00242     if (stat.type == 'd')
00243         snprintf(values[j++], 32, "%d", stat.btpo.xact);
00244     else
00245         snprintf(values[j++], 32, "%d", stat.btpo.level);
00246     values[j] = palloc(32);
00247     snprintf(values[j++], 32, "%d", stat.btpo_flags);
00248 
00249     tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
00250                                    values);
00251 
00252     result = HeapTupleGetDatum(tuple);
00253 
00254     PG_RETURN_DATUM(result);
00255 }
00256 
00257 /*-------------------------------------------------------
00258  * bt_page_items()
00259  *
00260  * Get IndexTupleData set in a btree page
00261  *
00262  * Usage: SELECT * FROM bt_page_items('t1_pkey', 1);
00263  *-------------------------------------------------------
00264  */
00265 
00266 /*
00267  * cross-call data structure for SRF
00268  */
00269 struct user_args
00270 {
00271     Page        page;
00272     OffsetNumber offset;
00273 };
00274 
00275 Datum
00276 bt_page_items(PG_FUNCTION_ARGS)
00277 {
00278     text       *relname = PG_GETARG_TEXT_P(0);
00279     uint32      blkno = PG_GETARG_UINT32(1);
00280     Datum       result;
00281     char       *values[6];
00282     HeapTuple   tuple;
00283     FuncCallContext *fctx;
00284     MemoryContext mctx;
00285     struct user_args *uargs;
00286 
00287     if (!superuser())
00288         ereport(ERROR,
00289                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
00290                  (errmsg("must be superuser to use pageinspect functions"))));
00291 
00292     if (SRF_IS_FIRSTCALL())
00293     {
00294         RangeVar   *relrv;
00295         Relation    rel;
00296         Buffer      buffer;
00297         BTPageOpaque opaque;
00298         TupleDesc   tupleDesc;
00299 
00300         fctx = SRF_FIRSTCALL_INIT();
00301 
00302         relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
00303         rel = relation_openrv(relrv, AccessShareLock);
00304 
00305         if (!IS_INDEX(rel) || !IS_BTREE(rel))
00306             elog(ERROR, "relation \"%s\" is not a btree index",
00307                  RelationGetRelationName(rel));
00308 
00309         /*
00310          * Reject attempts to read non-local temporary relations; we would be
00311          * likely to get wrong data since we have no visibility into the
00312          * owning session's local buffers.
00313          */
00314         if (RELATION_IS_OTHER_TEMP(rel))
00315             ereport(ERROR,
00316                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
00317                 errmsg("cannot access temporary tables of other sessions")));
00318 
00319         if (blkno == 0)
00320             elog(ERROR, "block 0 is a meta page");
00321 
00322         CHECK_RELATION_BLOCK_RANGE(rel, blkno);
00323 
00324         buffer = ReadBuffer(rel, blkno);
00325         LockBuffer(buffer, BUFFER_LOCK_SHARE);
00326 
00327         /*
00328          * We copy the page into local storage to avoid holding pin on the
00329          * buffer longer than we must, and possibly failing to release it at
00330          * all if the calling query doesn't fetch all rows.
00331          */
00332         mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
00333 
00334         uargs = palloc(sizeof(struct user_args));
00335 
00336         uargs->page = palloc(BLCKSZ);
00337         memcpy(uargs->page, BufferGetPage(buffer), BLCKSZ);
00338 
00339         UnlockReleaseBuffer(buffer);
00340         relation_close(rel, AccessShareLock);
00341 
00342         uargs->offset = FirstOffsetNumber;
00343 
00344         opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page);
00345 
00346         if (P_ISDELETED(opaque))
00347             elog(NOTICE, "page is deleted");
00348 
00349         fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
00350 
00351         /* Build a tuple descriptor for our result type */
00352         if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
00353             elog(ERROR, "return type must be a row type");
00354 
00355         fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
00356 
00357         fctx->user_fctx = uargs;
00358 
00359         MemoryContextSwitchTo(mctx);
00360     }
00361 
00362     fctx = SRF_PERCALL_SETUP();
00363     uargs = fctx->user_fctx;
00364 
00365     if (fctx->call_cntr < fctx->max_calls)
00366     {
00367         ItemId      id;
00368         IndexTuple  itup;
00369         int         j;
00370         int         off;
00371         int         dlen;
00372         char       *dump;
00373         char       *ptr;
00374 
00375         id = PageGetItemId(uargs->page, uargs->offset);
00376 
00377         if (!ItemIdIsValid(id))
00378             elog(ERROR, "invalid ItemId");
00379 
00380         itup = (IndexTuple) PageGetItem(uargs->page, id);
00381 
00382         j = 0;
00383         values[j] = palloc(32);
00384         snprintf(values[j++], 32, "%d", uargs->offset);
00385         values[j] = palloc(32);
00386         snprintf(values[j++], 32, "(%u,%u)",
00387                  BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)),
00388                  itup->t_tid.ip_posid);
00389         values[j] = palloc(32);
00390         snprintf(values[j++], 32, "%d", (int) IndexTupleSize(itup));
00391         values[j] = palloc(32);
00392         snprintf(values[j++], 32, "%c", IndexTupleHasNulls(itup) ? 't' : 'f');
00393         values[j] = palloc(32);
00394         snprintf(values[j++], 32, "%c", IndexTupleHasVarwidths(itup) ? 't' : 'f');
00395 
00396         ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
00397         dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
00398         dump = palloc0(dlen * 3 + 1);
00399         values[j] = dump;
00400         for (off = 0; off < dlen; off++)
00401         {
00402             if (off > 0)
00403                 *dump++ = ' ';
00404             sprintf(dump, "%02x", *(ptr + off) & 0xff);
00405             dump += 2;
00406         }
00407 
00408         tuple = BuildTupleFromCStrings(fctx->attinmeta, values);
00409         result = HeapTupleGetDatum(tuple);
00410 
00411         uargs->offset = uargs->offset + 1;
00412 
00413         SRF_RETURN_NEXT(fctx, result);
00414     }
00415     else
00416     {
00417         pfree(uargs->page);
00418         pfree(uargs);
00419         SRF_RETURN_DONE(fctx);
00420     }
00421 }
00422 
00423 
00424 /* ------------------------------------------------
00425  * bt_metap()
00426  *
00427  * Get a btree's meta-page information
00428  *
00429  * Usage: SELECT * FROM bt_metap('t1_pkey')
00430  * ------------------------------------------------
00431  */
00432 Datum
00433 bt_metap(PG_FUNCTION_ARGS)
00434 {
00435     text       *relname = PG_GETARG_TEXT_P(0);
00436     Datum       result;
00437     Relation    rel;
00438     RangeVar   *relrv;
00439     BTMetaPageData *metad;
00440     TupleDesc   tupleDesc;
00441     int         j;
00442     char       *values[6];
00443     Buffer      buffer;
00444     Page        page;
00445     HeapTuple   tuple;
00446 
00447     if (!superuser())
00448         ereport(ERROR,
00449                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
00450                  (errmsg("must be superuser to use pageinspect functions"))));
00451 
00452     relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
00453     rel = relation_openrv(relrv, AccessShareLock);
00454 
00455     if (!IS_INDEX(rel) || !IS_BTREE(rel))
00456         elog(ERROR, "relation \"%s\" is not a btree index",
00457              RelationGetRelationName(rel));
00458 
00459     /*
00460      * Reject attempts to read non-local temporary relations; we would be
00461      * likely to get wrong data since we have no visibility into the owning
00462      * session's local buffers.
00463      */
00464     if (RELATION_IS_OTHER_TEMP(rel))
00465         ereport(ERROR,
00466                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
00467                  errmsg("cannot access temporary tables of other sessions")));
00468 
00469     buffer = ReadBuffer(rel, 0);
00470     LockBuffer(buffer, BUFFER_LOCK_SHARE);
00471 
00472     page = BufferGetPage(buffer);
00473     metad = BTPageGetMeta(page);
00474 
00475     /* Build a tuple descriptor for our result type */
00476     if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
00477         elog(ERROR, "return type must be a row type");
00478 
00479     j = 0;
00480     values[j] = palloc(32);
00481     snprintf(values[j++], 32, "%d", metad->btm_magic);
00482     values[j] = palloc(32);
00483     snprintf(values[j++], 32, "%d", metad->btm_version);
00484     values[j] = palloc(32);
00485     snprintf(values[j++], 32, "%d", metad->btm_root);
00486     values[j] = palloc(32);
00487     snprintf(values[j++], 32, "%d", metad->btm_level);
00488     values[j] = palloc(32);
00489     snprintf(values[j++], 32, "%d", metad->btm_fastroot);
00490     values[j] = palloc(32);
00491     snprintf(values[j++], 32, "%d", metad->btm_fastlevel);
00492 
00493     tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
00494                                    values);
00495 
00496     result = HeapTupleGetDatum(tuple);
00497 
00498     UnlockReleaseBuffer(buffer);
00499     relation_close(rel, AccessShareLock);
00500 
00501     PG_RETURN_DATUM(result);
00502 }