Header And Logo

PostgreSQL
| The world's most advanced open source database.

pg_stat_statements.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * pg_stat_statements.c
00004  *      Track statement execution times across a whole database cluster.
00005  *
00006  * Execution costs are totalled for each distinct source query, and kept in
00007  * a shared hashtable.  (We track only as many distinct queries as will fit
00008  * in the designated amount of shared memory.)
00009  *
00010  * As of Postgres 9.2, this module normalizes query entries.  Normalization
00011  * is a process whereby similar queries, typically differing only in their
00012  * constants (though the exact rules are somewhat more subtle than that) are
00013  * recognized as equivalent, and are tracked as a single entry.  This is
00014  * particularly useful for non-prepared queries.
00015  *
00016  * Normalization is implemented by fingerprinting queries, selectively
00017  * serializing those fields of each query tree's nodes that are judged to be
00018  * essential to the query.  This is referred to as a query jumble.  This is
00019  * distinct from a regular serialization in that various extraneous
00020  * information is ignored as irrelevant or not essential to the query, such
00021  * as the collations of Vars and, most notably, the values of constants.
00022  *
00023  * This jumble is acquired at the end of parse analysis of each query, and
00024  * a 32-bit hash of it is stored into the query's Query.queryId field.
00025  * The server then copies this value around, making it available in plan
00026  * tree(s) generated from the query.  The executor can then use this value
00027  * to blame query costs on the proper queryId.
00028  *
00029  * Note about locking issues: to create or delete an entry in the shared
00030  * hashtable, one must hold pgss->lock exclusively.  Modifying any field
00031  * in an entry except the counters requires the same.  To look up an entry,
00032  * one must hold the lock shared.  To read or update the counters within
00033  * an entry, one must hold the lock shared or exclusive (so the entry doesn't
00034  * disappear!) and also take the entry's mutex spinlock.
00035  *
00036  *
00037  * Copyright (c) 2008-2013, PostgreSQL Global Development Group
00038  *
00039  * IDENTIFICATION
00040  *    contrib/pg_stat_statements/pg_stat_statements.c
00041  *
00042  *-------------------------------------------------------------------------
00043  */
00044 #include "postgres.h"
00045 
00046 #include <unistd.h>
00047 
00048 #include "access/hash.h"
00049 #include "executor/instrument.h"
00050 #include "funcapi.h"
00051 #include "mb/pg_wchar.h"
00052 #include "miscadmin.h"
00053 #include "parser/analyze.h"
00054 #include "parser/parsetree.h"
00055 #include "parser/scanner.h"
00056 #include "pgstat.h"
00057 #include "storage/fd.h"
00058 #include "storage/ipc.h"
00059 #include "storage/spin.h"
00060 #include "tcop/utility.h"
00061 #include "utils/builtins.h"
00062 
00063 
00064 PG_MODULE_MAGIC;
00065 
00066 /* Location of stats file */
00067 #define PGSS_DUMP_FILE  "global/pg_stat_statements.stat"
00068 
00069 /* This constant defines the magic number in the stats file header */
00070 static const uint32 PGSS_FILE_HEADER = 0x20120328;
00071 
00072 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
00073 #define USAGE_EXEC(duration)    (1.0)
00074 #define USAGE_INIT              (1.0)   /* including initial planning */
00075 #define ASSUMED_MEDIAN_INIT     (10.0)  /* initial assumed median usage */
00076 #define USAGE_DECREASE_FACTOR   (0.99)  /* decreased every entry_dealloc */
00077 #define STICKY_DECREASE_FACTOR  (0.50)  /* factor for sticky entries */
00078 #define USAGE_DEALLOC_PERCENT   5       /* free this % of entries at once */
00079 
00080 #define JUMBLE_SIZE             1024    /* query serialization buffer size */
00081 
00082 /*
00083  * Hashtable key that defines the identity of a hashtable entry.  We separate
00084  * queries by user and by database even if they are otherwise identical.
00085  *
00086  * Presently, the query encoding is fully determined by the source database
00087  * and so we don't really need it to be in the key.  But that might not always
00088  * be true. Anyway it's notationally convenient to pass it as part of the key.
00089  */
00090 typedef struct pgssHashKey
00091 {
00092     Oid         userid;         /* user OID */
00093     Oid         dbid;           /* database OID */
00094     int         encoding;       /* query encoding */
00095     uint32      queryid;        /* query identifier */
00096 } pgssHashKey;
00097 
00098 /*
00099  * The actual stats counters kept within pgssEntry.
00100  */
00101 typedef struct Counters
00102 {
00103     int64       calls;          /* # of times executed */
00104     double      total_time;     /* total execution time, in msec */
00105     int64       rows;           /* total # of retrieved or affected rows */
00106     int64       shared_blks_hit;    /* # of shared buffer hits */
00107     int64       shared_blks_read;       /* # of shared disk blocks read */
00108     int64       shared_blks_dirtied;    /* # of shared disk blocks dirtied */
00109     int64       shared_blks_written;    /* # of shared disk blocks written */
00110     int64       local_blks_hit; /* # of local buffer hits */
00111     int64       local_blks_read;    /* # of local disk blocks read */
00112     int64       local_blks_dirtied;     /* # of local disk blocks dirtied */
00113     int64       local_blks_written;     /* # of local disk blocks written */
00114     int64       temp_blks_read; /* # of temp blocks read */
00115     int64       temp_blks_written;      /* # of temp blocks written */
00116     double      blk_read_time;  /* time spent reading, in msec */
00117     double      blk_write_time; /* time spent writing, in msec */
00118     double      usage;          /* usage factor */
00119 } Counters;
00120 
00121 /*
00122  * Statistics per statement
00123  *
00124  * NB: see the file read/write code before changing field order here.
00125  */
00126 typedef struct pgssEntry
00127 {
00128     pgssHashKey key;            /* hash key of entry - MUST BE FIRST */
00129     Counters    counters;       /* the statistics for this query */
00130     int         query_len;      /* # of valid bytes in query string */
00131     slock_t     mutex;          /* protects the counters only */
00132     char        query[1];       /* VARIABLE LENGTH ARRAY - MUST BE LAST */
00133     /* Note: the allocated length of query[] is actually pgss->query_size */
00134 } pgssEntry;
00135 
00136 /*
00137  * Global shared state
00138  */
00139 typedef struct pgssSharedState
00140 {
00141     LWLockId    lock;           /* protects hashtable search/modification */
00142     int         query_size;     /* max query length in bytes */
00143     double      cur_median_usage;       /* current median usage in hashtable */
00144 } pgssSharedState;
00145 
00146 /*
00147  * Struct for tracking locations/lengths of constants during normalization
00148  */
00149 typedef struct pgssLocationLen
00150 {
00151     int         location;       /* start offset in query text */
00152     int         length;         /* length in bytes, or -1 to ignore */
00153 } pgssLocationLen;
00154 
00155 /*
00156  * Working state for computing a query jumble and producing a normalized
00157  * query string
00158  */
00159 typedef struct pgssJumbleState
00160 {
00161     /* Jumble of current query tree */
00162     unsigned char *jumble;
00163 
00164     /* Number of bytes used in jumble[] */
00165     Size        jumble_len;
00166 
00167     /* Array of locations of constants that should be removed */
00168     pgssLocationLen *clocations;
00169 
00170     /* Allocated length of clocations array */
00171     int         clocations_buf_size;
00172 
00173     /* Current number of valid entries in clocations array */
00174     int         clocations_count;
00175 } pgssJumbleState;
00176 
00177 /*---- Local variables ----*/
00178 
00179 /* Current nesting depth of ExecutorRun+ProcessUtility calls */
00180 static int  nested_level = 0;
00181 
00182 /* Saved hook values in case of unload */
00183 static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
00184 static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
00185 static ExecutorStart_hook_type prev_ExecutorStart = NULL;
00186 static ExecutorRun_hook_type prev_ExecutorRun = NULL;
00187 static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
00188 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
00189 static ProcessUtility_hook_type prev_ProcessUtility = NULL;
00190 
00191 /* Links to shared memory state */
00192 static pgssSharedState *pgss = NULL;
00193 static HTAB *pgss_hash = NULL;
00194 
00195 /*---- GUC variables ----*/
00196 
00197 typedef enum
00198 {
00199     PGSS_TRACK_NONE,            /* track no statements */
00200     PGSS_TRACK_TOP,             /* only top level statements */
00201     PGSS_TRACK_ALL              /* all statements, including nested ones */
00202 }   PGSSTrackLevel;
00203 
00204 static const struct config_enum_entry track_options[] =
00205 {
00206     {"none", PGSS_TRACK_NONE, false},
00207     {"top", PGSS_TRACK_TOP, false},
00208     {"all", PGSS_TRACK_ALL, false},
00209     {NULL, 0, false}
00210 };
00211 
00212 static int  pgss_max;           /* max # statements to track */
00213 static int  pgss_track;         /* tracking level */
00214 static bool pgss_track_utility; /* whether to track utility commands */
00215 static bool pgss_save;          /* whether to save stats across shutdown */
00216 
00217 
00218 #define pgss_enabled() \
00219     (pgss_track == PGSS_TRACK_ALL || \
00220     (pgss_track == PGSS_TRACK_TOP && nested_level == 0))
00221 
00222 /*---- Function declarations ----*/
00223 
00224 void        _PG_init(void);
00225 void        _PG_fini(void);
00226 
00227 Datum       pg_stat_statements_reset(PG_FUNCTION_ARGS);
00228 Datum       pg_stat_statements(PG_FUNCTION_ARGS);
00229 
00230 PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
00231 PG_FUNCTION_INFO_V1(pg_stat_statements);
00232 
00233 static void pgss_shmem_startup(void);
00234 static void pgss_shmem_shutdown(int code, Datum arg);
00235 static void pgss_post_parse_analyze(ParseState *pstate, Query *query);
00236 static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
00237 static void pgss_ExecutorRun(QueryDesc *queryDesc,
00238                  ScanDirection direction,
00239                  long count);
00240 static void pgss_ExecutorFinish(QueryDesc *queryDesc);
00241 static void pgss_ExecutorEnd(QueryDesc *queryDesc);
00242 static void pgss_ProcessUtility(Node *parsetree, const char *queryString,
00243                     ProcessUtilityContext context, ParamListInfo params,
00244                     DestReceiver *dest, char *completionTag);
00245 static uint32 pgss_hash_fn(const void *key, Size keysize);
00246 static int  pgss_match_fn(const void *key1, const void *key2, Size keysize);
00247 static uint32 pgss_hash_string(const char *str);
00248 static void pgss_store(const char *query, uint32 queryId,
00249            double total_time, uint64 rows,
00250            const BufferUsage *bufusage,
00251            pgssJumbleState *jstate);
00252 static Size pgss_memsize(void);
00253 static pgssEntry *entry_alloc(pgssHashKey *key, const char *query,
00254             int query_len, bool sticky);
00255 static void entry_dealloc(void);
00256 static void entry_reset(void);
00257 static void AppendJumble(pgssJumbleState *jstate,
00258              const unsigned char *item, Size size);
00259 static void JumbleQuery(pgssJumbleState *jstate, Query *query);
00260 static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
00261 static void JumbleExpr(pgssJumbleState *jstate, Node *node);
00262 static void RecordConstLocation(pgssJumbleState *jstate, int location);
00263 static char *generate_normalized_query(pgssJumbleState *jstate, const char *query,
00264                           int *query_len_p, int encoding);
00265 static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query);
00266 static int  comp_location(const void *a, const void *b);
00267 
00268 
00269 /*
00270  * Module load callback
00271  */
00272 void
00273 _PG_init(void)
00274 {
00275     /*
00276      * In order to create our shared memory area, we have to be loaded via
00277      * shared_preload_libraries.  If not, fall out without hooking into any of
00278      * the main system.  (We don't throw error here because it seems useful to
00279      * allow the pg_stat_statements functions to be created even when the
00280      * module isn't active.  The functions must protect themselves against
00281      * being called then, however.)
00282      */
00283     if (!process_shared_preload_libraries_in_progress)
00284         return;
00285 
00286     /*
00287      * Define (or redefine) custom GUC variables.
00288      */
00289     DefineCustomIntVariable("pg_stat_statements.max",
00290       "Sets the maximum number of statements tracked by pg_stat_statements.",
00291                             NULL,
00292                             &pgss_max,
00293                             1000,
00294                             100,
00295                             INT_MAX,
00296                             PGC_POSTMASTER,
00297                             0,
00298                             NULL,
00299                             NULL,
00300                             NULL);
00301 
00302     DefineCustomEnumVariable("pg_stat_statements.track",
00303                "Selects which statements are tracked by pg_stat_statements.",
00304                              NULL,
00305                              &pgss_track,
00306                              PGSS_TRACK_TOP,
00307                              track_options,
00308                              PGC_SUSET,
00309                              0,
00310                              NULL,
00311                              NULL,
00312                              NULL);
00313 
00314     DefineCustomBoolVariable("pg_stat_statements.track_utility",
00315        "Selects whether utility commands are tracked by pg_stat_statements.",
00316                              NULL,
00317                              &pgss_track_utility,
00318                              true,
00319                              PGC_SUSET,
00320                              0,
00321                              NULL,
00322                              NULL,
00323                              NULL);
00324 
00325     DefineCustomBoolVariable("pg_stat_statements.save",
00326                "Save pg_stat_statements statistics across server shutdowns.",
00327                              NULL,
00328                              &pgss_save,
00329                              true,
00330                              PGC_SIGHUP,
00331                              0,
00332                              NULL,
00333                              NULL,
00334                              NULL);
00335 
00336     EmitWarningsOnPlaceholders("pg_stat_statements");
00337 
00338     /*
00339      * Request additional shared resources.  (These are no-ops if we're not in
00340      * the postmaster process.)  We'll allocate or attach to the shared
00341      * resources in pgss_shmem_startup().
00342      */
00343     RequestAddinShmemSpace(pgss_memsize());
00344     RequestAddinLWLocks(1);
00345 
00346     /*
00347      * Install hooks.
00348      */
00349     prev_shmem_startup_hook = shmem_startup_hook;
00350     shmem_startup_hook = pgss_shmem_startup;
00351     prev_post_parse_analyze_hook = post_parse_analyze_hook;
00352     post_parse_analyze_hook = pgss_post_parse_analyze;
00353     prev_ExecutorStart = ExecutorStart_hook;
00354     ExecutorStart_hook = pgss_ExecutorStart;
00355     prev_ExecutorRun = ExecutorRun_hook;
00356     ExecutorRun_hook = pgss_ExecutorRun;
00357     prev_ExecutorFinish = ExecutorFinish_hook;
00358     ExecutorFinish_hook = pgss_ExecutorFinish;
00359     prev_ExecutorEnd = ExecutorEnd_hook;
00360     ExecutorEnd_hook = pgss_ExecutorEnd;
00361     prev_ProcessUtility = ProcessUtility_hook;
00362     ProcessUtility_hook = pgss_ProcessUtility;
00363 }
00364 
00365 /*
00366  * Module unload callback
00367  */
00368 void
00369 _PG_fini(void)
00370 {
00371     /* Uninstall hooks. */
00372     shmem_startup_hook = prev_shmem_startup_hook;
00373     post_parse_analyze_hook = prev_post_parse_analyze_hook;
00374     ExecutorStart_hook = prev_ExecutorStart;
00375     ExecutorRun_hook = prev_ExecutorRun;
00376     ExecutorFinish_hook = prev_ExecutorFinish;
00377     ExecutorEnd_hook = prev_ExecutorEnd;
00378     ProcessUtility_hook = prev_ProcessUtility;
00379 }
00380 
00381 /*
00382  * shmem_startup hook: allocate or attach to shared memory,
00383  * then load any pre-existing statistics from file.
00384  */
00385 static void
00386 pgss_shmem_startup(void)
00387 {
00388     bool        found;
00389     HASHCTL     info;
00390     FILE       *file;
00391     uint32      header;
00392     int32       num;
00393     int32       i;
00394     int         query_size;
00395     int         buffer_size;
00396     char       *buffer = NULL;
00397 
00398     if (prev_shmem_startup_hook)
00399         prev_shmem_startup_hook();
00400 
00401     /* reset in case this is a restart within the postmaster */
00402     pgss = NULL;
00403     pgss_hash = NULL;
00404 
00405     /*
00406      * Create or attach to the shared memory state, including hash table
00407      */
00408     LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
00409 
00410     pgss = ShmemInitStruct("pg_stat_statements",
00411                            sizeof(pgssSharedState),
00412                            &found);
00413 
00414     if (!found)
00415     {
00416         /* First time through ... */
00417         pgss->lock = LWLockAssign();
00418         pgss->query_size = pgstat_track_activity_query_size;
00419         pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
00420     }
00421 
00422     /* Be sure everyone agrees on the hash table entry size */
00423     query_size = pgss->query_size;
00424 
00425     memset(&info, 0, sizeof(info));
00426     info.keysize = sizeof(pgssHashKey);
00427     info.entrysize = offsetof(pgssEntry, query) +query_size;
00428     info.hash = pgss_hash_fn;
00429     info.match = pgss_match_fn;
00430     pgss_hash = ShmemInitHash("pg_stat_statements hash",
00431                               pgss_max, pgss_max,
00432                               &info,
00433                               HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
00434 
00435     LWLockRelease(AddinShmemInitLock);
00436 
00437     /*
00438      * If we're in the postmaster (or a standalone backend...), set up a shmem
00439      * exit hook to dump the statistics to disk.
00440      */
00441     if (!IsUnderPostmaster)
00442         on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
00443 
00444     /*
00445      * Attempt to load old statistics from the dump file, if this is the first
00446      * time through and we weren't told not to.
00447      */
00448     if (found || !pgss_save)
00449         return;
00450 
00451     /*
00452      * Note: we don't bother with locks here, because there should be no other
00453      * processes running when this code is reached.
00454      */
00455     file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
00456     if (file == NULL)
00457     {
00458         if (errno == ENOENT)
00459             return;             /* ignore not-found error */
00460         goto error;
00461     }
00462 
00463     buffer_size = query_size;
00464     buffer = (char *) palloc(buffer_size);
00465 
00466     if (fread(&header, sizeof(uint32), 1, file) != 1 ||
00467         header != PGSS_FILE_HEADER ||
00468         fread(&num, sizeof(int32), 1, file) != 1)
00469         goto error;
00470 
00471     for (i = 0; i < num; i++)
00472     {
00473         pgssEntry   temp;
00474         pgssEntry  *entry;
00475 
00476         if (fread(&temp, offsetof(pgssEntry, mutex), 1, file) != 1)
00477             goto error;
00478 
00479         /* Encoding is the only field we can easily sanity-check */
00480         if (!PG_VALID_BE_ENCODING(temp.key.encoding))
00481             goto error;
00482 
00483         /* Previous incarnation might have had a larger query_size */
00484         if (temp.query_len >= buffer_size)
00485         {
00486             buffer = (char *) repalloc(buffer, temp.query_len + 1);
00487             buffer_size = temp.query_len + 1;
00488         }
00489 
00490         if (fread(buffer, 1, temp.query_len, file) != temp.query_len)
00491             goto error;
00492         buffer[temp.query_len] = '\0';
00493 
00494         /* Skip loading "sticky" entries */
00495         if (temp.counters.calls == 0)
00496             continue;
00497 
00498         /* Clip to available length if needed */
00499         if (temp.query_len >= query_size)
00500             temp.query_len = pg_encoding_mbcliplen(temp.key.encoding,
00501                                                    buffer,
00502                                                    temp.query_len,
00503                                                    query_size - 1);
00504 
00505         /* make the hashtable entry (discards old entries if too many) */
00506         entry = entry_alloc(&temp.key, buffer, temp.query_len, false);
00507 
00508         /* copy in the actual stats */
00509         entry->counters = temp.counters;
00510     }
00511 
00512     pfree(buffer);
00513     FreeFile(file);
00514 
00515     /*
00516      * Remove the file so it's not included in backups/replication slaves,
00517      * etc. A new file will be written on next shutdown.
00518      */
00519     unlink(PGSS_DUMP_FILE);
00520 
00521     return;
00522 
00523 error:
00524     ereport(LOG,
00525             (errcode_for_file_access(),
00526              errmsg("could not read pg_stat_statement file \"%s\": %m",
00527                     PGSS_DUMP_FILE)));
00528     if (buffer)
00529         pfree(buffer);
00530     if (file)
00531         FreeFile(file);
00532     /* If possible, throw away the bogus file; ignore any error */
00533     unlink(PGSS_DUMP_FILE);
00534 }
00535 
00536 /*
00537  * shmem_shutdown hook: Dump statistics into file.
00538  *
00539  * Note: we don't bother with acquiring lock, because there should be no
00540  * other processes running when this is called.
00541  */
00542 static void
00543 pgss_shmem_shutdown(int code, Datum arg)
00544 {
00545     FILE       *file;
00546     HASH_SEQ_STATUS hash_seq;
00547     int32       num_entries;
00548     pgssEntry  *entry;
00549 
00550     /* Don't try to dump during a crash. */
00551     if (code)
00552         return;
00553 
00554     /* Safety check ... shouldn't get here unless shmem is set up. */
00555     if (!pgss || !pgss_hash)
00556         return;
00557 
00558     /* Don't dump if told not to. */
00559     if (!pgss_save)
00560         return;
00561 
00562     file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
00563     if (file == NULL)
00564         goto error;
00565 
00566     if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
00567         goto error;
00568     num_entries = hash_get_num_entries(pgss_hash);
00569     if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
00570         goto error;
00571 
00572     hash_seq_init(&hash_seq, pgss_hash);
00573     while ((entry = hash_seq_search(&hash_seq)) != NULL)
00574     {
00575         int         len = entry->query_len;
00576 
00577         if (fwrite(entry, offsetof(pgssEntry, mutex), 1, file) != 1 ||
00578             fwrite(entry->query, 1, len, file) != len)
00579             goto error;
00580     }
00581 
00582     if (FreeFile(file))
00583     {
00584         file = NULL;
00585         goto error;
00586     }
00587 
00588     /*
00589      * Rename file into place, so we atomically replace the old one.
00590      */
00591     if (rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE) != 0)
00592         ereport(LOG,
00593                 (errcode_for_file_access(),
00594                  errmsg("could not rename pg_stat_statement file \"%s\": %m",
00595                         PGSS_DUMP_FILE ".tmp")));
00596 
00597     return;
00598 
00599 error:
00600     ereport(LOG,
00601             (errcode_for_file_access(),
00602              errmsg("could not write pg_stat_statement file \"%s\": %m",
00603                     PGSS_DUMP_FILE ".tmp")));
00604     if (file)
00605         FreeFile(file);
00606     unlink(PGSS_DUMP_FILE ".tmp");
00607 }
00608 
00609 /*
00610  * Post-parse-analysis hook: mark query with a queryId
00611  */
00612 static void
00613 pgss_post_parse_analyze(ParseState *pstate, Query *query)
00614 {
00615     pgssJumbleState jstate;
00616 
00617     /* Assert we didn't do this already */
00618     Assert(query->queryId == 0);
00619 
00620     /* Safety check... */
00621     if (!pgss || !pgss_hash)
00622         return;
00623 
00624     /*
00625      * Utility statements get queryId zero.  We do this even in cases where
00626      * the statement contains an optimizable statement for which a queryId
00627      * could be derived (such as EXPLAIN or DECLARE CURSOR).  For such cases,
00628      * runtime control will first go through ProcessUtility and then the
00629      * executor, and we don't want the executor hooks to do anything, since we
00630      * are already measuring the statement's costs at the utility level.
00631      */
00632     if (query->utilityStmt)
00633     {
00634         query->queryId = 0;
00635         return;
00636     }
00637 
00638     /* Set up workspace for query jumbling */
00639     jstate.jumble = (unsigned char *) palloc(JUMBLE_SIZE);
00640     jstate.jumble_len = 0;
00641     jstate.clocations_buf_size = 32;
00642     jstate.clocations = (pgssLocationLen *)
00643         palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
00644     jstate.clocations_count = 0;
00645 
00646     /* Compute query ID and mark the Query node with it */
00647     JumbleQuery(&jstate, query);
00648     query->queryId = hash_any(jstate.jumble, jstate.jumble_len);
00649 
00650     /*
00651      * If we are unlucky enough to get a hash of zero, use 1 instead, to
00652      * prevent confusion with the utility-statement case.
00653      */
00654     if (query->queryId == 0)
00655         query->queryId = 1;
00656 
00657     /*
00658      * If we were able to identify any ignorable constants, we immediately
00659      * create a hash table entry for the query, so that we can record the
00660      * normalized form of the query string.  If there were no such constants,
00661      * the normalized string would be the same as the query text anyway, so
00662      * there's no need for an early entry.
00663      */
00664     if (jstate.clocations_count > 0)
00665         pgss_store(pstate->p_sourcetext,
00666                    query->queryId,
00667                    0,
00668                    0,
00669                    NULL,
00670                    &jstate);
00671 }
00672 
00673 /*
00674  * ExecutorStart hook: start up tracking if needed
00675  */
00676 static void
00677 pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
00678 {
00679     if (prev_ExecutorStart)
00680         prev_ExecutorStart(queryDesc, eflags);
00681     else
00682         standard_ExecutorStart(queryDesc, eflags);
00683 
00684     /*
00685      * If query has queryId zero, don't track it.  This prevents double
00686      * counting of optimizable statements that are directly contained in
00687      * utility statements.
00688      */
00689     if (pgss_enabled() && queryDesc->plannedstmt->queryId != 0)
00690     {
00691         /*
00692          * Set up to track total elapsed time in ExecutorRun.  Make sure the
00693          * space is allocated in the per-query context so it will go away at
00694          * ExecutorEnd.
00695          */
00696         if (queryDesc->totaltime == NULL)
00697         {
00698             MemoryContext oldcxt;
00699 
00700             oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
00701             queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL);
00702             MemoryContextSwitchTo(oldcxt);
00703         }
00704     }
00705 }
00706 
00707 /*
00708  * ExecutorRun hook: all we need do is track nesting depth
00709  */
00710 static void
00711 pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
00712 {
00713     nested_level++;
00714     PG_TRY();
00715     {
00716         if (prev_ExecutorRun)
00717             prev_ExecutorRun(queryDesc, direction, count);
00718         else
00719             standard_ExecutorRun(queryDesc, direction, count);
00720         nested_level--;
00721     }
00722     PG_CATCH();
00723     {
00724         nested_level--;
00725         PG_RE_THROW();
00726     }
00727     PG_END_TRY();
00728 }
00729 
00730 /*
00731  * ExecutorFinish hook: all we need do is track nesting depth
00732  */
00733 static void
00734 pgss_ExecutorFinish(QueryDesc *queryDesc)
00735 {
00736     nested_level++;
00737     PG_TRY();
00738     {
00739         if (prev_ExecutorFinish)
00740             prev_ExecutorFinish(queryDesc);
00741         else
00742             standard_ExecutorFinish(queryDesc);
00743         nested_level--;
00744     }
00745     PG_CATCH();
00746     {
00747         nested_level--;
00748         PG_RE_THROW();
00749     }
00750     PG_END_TRY();
00751 }
00752 
00753 /*
00754  * ExecutorEnd hook: store results if needed
00755  */
00756 static void
00757 pgss_ExecutorEnd(QueryDesc *queryDesc)
00758 {
00759     uint32      queryId = queryDesc->plannedstmt->queryId;
00760 
00761     if (queryId != 0 && queryDesc->totaltime && pgss_enabled())
00762     {
00763         /*
00764          * Make sure stats accumulation is done.  (Note: it's okay if several
00765          * levels of hook all do this.)
00766          */
00767         InstrEndLoop(queryDesc->totaltime);
00768 
00769         pgss_store(queryDesc->sourceText,
00770                    queryId,
00771                    queryDesc->totaltime->total * 1000.0,        /* convert to msec */
00772                    queryDesc->estate->es_processed,
00773                    &queryDesc->totaltime->bufusage,
00774                    NULL);
00775     }
00776 
00777     if (prev_ExecutorEnd)
00778         prev_ExecutorEnd(queryDesc);
00779     else
00780         standard_ExecutorEnd(queryDesc);
00781 }
00782 
00783 /*
00784  * ProcessUtility hook
00785  */
00786 static void
00787 pgss_ProcessUtility(Node *parsetree, const char *queryString,
00788                     ProcessUtilityContext context, ParamListInfo params,
00789                     DestReceiver *dest, char *completionTag)
00790 {
00791     /*
00792      * If it's an EXECUTE statement, we don't track it and don't increment the
00793      * nesting level.  This allows the cycles to be charged to the underlying
00794      * PREPARE instead (by the Executor hooks), which is much more useful.
00795      *
00796      * We also don't track execution of PREPARE.  If we did, we would get one
00797      * hash table entry for the PREPARE (with hash calculated from the query
00798      * string), and then a different one with the same query string (but hash
00799      * calculated from the query tree) would be used to accumulate costs of
00800      * ensuing EXECUTEs.  This would be confusing, and inconsistent with other
00801      * cases where planning time is not included at all.
00802      */
00803     if (pgss_track_utility && pgss_enabled() &&
00804         !IsA(parsetree, ExecuteStmt) &&
00805         !IsA(parsetree, PrepareStmt))
00806     {
00807         instr_time  start;
00808         instr_time  duration;
00809         uint64      rows = 0;
00810         BufferUsage bufusage_start,
00811                     bufusage;
00812         uint32      queryId;
00813 
00814         bufusage_start = pgBufferUsage;
00815         INSTR_TIME_SET_CURRENT(start);
00816 
00817         nested_level++;
00818         PG_TRY();
00819         {
00820             if (prev_ProcessUtility)
00821                 prev_ProcessUtility(parsetree, queryString,
00822                                     context, params,
00823                                     dest, completionTag);
00824             else
00825                 standard_ProcessUtility(parsetree, queryString,
00826                                         context, params,
00827                                         dest, completionTag);
00828             nested_level--;
00829         }
00830         PG_CATCH();
00831         {
00832             nested_level--;
00833             PG_RE_THROW();
00834         }
00835         PG_END_TRY();
00836 
00837         INSTR_TIME_SET_CURRENT(duration);
00838         INSTR_TIME_SUBTRACT(duration, start);
00839 
00840         /* parse command tag to retrieve the number of affected rows. */
00841         if (completionTag &&
00842             sscanf(completionTag, "COPY " UINT64_FORMAT, &rows) != 1)
00843             rows = 0;
00844 
00845         /* calc differences of buffer counters. */
00846         bufusage.shared_blks_hit =
00847             pgBufferUsage.shared_blks_hit - bufusage_start.shared_blks_hit;
00848         bufusage.shared_blks_read =
00849             pgBufferUsage.shared_blks_read - bufusage_start.shared_blks_read;
00850         bufusage.shared_blks_dirtied =
00851             pgBufferUsage.shared_blks_dirtied - bufusage_start.shared_blks_dirtied;
00852         bufusage.shared_blks_written =
00853             pgBufferUsage.shared_blks_written - bufusage_start.shared_blks_written;
00854         bufusage.local_blks_hit =
00855             pgBufferUsage.local_blks_hit - bufusage_start.local_blks_hit;
00856         bufusage.local_blks_read =
00857             pgBufferUsage.local_blks_read - bufusage_start.local_blks_read;
00858         bufusage.local_blks_dirtied =
00859             pgBufferUsage.local_blks_dirtied - bufusage_start.local_blks_dirtied;
00860         bufusage.local_blks_written =
00861             pgBufferUsage.local_blks_written - bufusage_start.local_blks_written;
00862         bufusage.temp_blks_read =
00863             pgBufferUsage.temp_blks_read - bufusage_start.temp_blks_read;
00864         bufusage.temp_blks_written =
00865             pgBufferUsage.temp_blks_written - bufusage_start.temp_blks_written;
00866         bufusage.blk_read_time = pgBufferUsage.blk_read_time;
00867         INSTR_TIME_SUBTRACT(bufusage.blk_read_time, bufusage_start.blk_read_time);
00868         bufusage.blk_write_time = pgBufferUsage.blk_write_time;
00869         INSTR_TIME_SUBTRACT(bufusage.blk_write_time, bufusage_start.blk_write_time);
00870 
00871         /* For utility statements, we just hash the query string directly */
00872         queryId = pgss_hash_string(queryString);
00873 
00874         pgss_store(queryString,
00875                    queryId,
00876                    INSTR_TIME_GET_MILLISEC(duration),
00877                    rows,
00878                    &bufusage,
00879                    NULL);
00880     }
00881     else
00882     {
00883         if (prev_ProcessUtility)
00884             prev_ProcessUtility(parsetree, queryString,
00885                                 context, params,
00886                                 dest, completionTag);
00887         else
00888             standard_ProcessUtility(parsetree, queryString,
00889                                     context, params,
00890                                     dest, completionTag);
00891     }
00892 }
00893 
00894 /*
00895  * Calculate hash value for a key
00896  */
00897 static uint32
00898 pgss_hash_fn(const void *key, Size keysize)
00899 {
00900     const pgssHashKey *k = (const pgssHashKey *) key;
00901 
00902     /* we don't bother to include encoding in the hash */
00903     return hash_uint32((uint32) k->userid) ^
00904         hash_uint32((uint32) k->dbid) ^
00905         hash_uint32((uint32) k->queryid);
00906 }
00907 
00908 /*
00909  * Compare two keys - zero means match
00910  */
00911 static int
00912 pgss_match_fn(const void *key1, const void *key2, Size keysize)
00913 {
00914     const pgssHashKey *k1 = (const pgssHashKey *) key1;
00915     const pgssHashKey *k2 = (const pgssHashKey *) key2;
00916 
00917     if (k1->userid == k2->userid &&
00918         k1->dbid == k2->dbid &&
00919         k1->encoding == k2->encoding &&
00920         k1->queryid == k2->queryid)
00921         return 0;
00922     else
00923         return 1;
00924 }
00925 
00926 /*
00927  * Given an arbitrarily long query string, produce a hash for the purposes of
00928  * identifying the query, without normalizing constants.  Used when hashing
00929  * utility statements.
00930  */
00931 static uint32
00932 pgss_hash_string(const char *str)
00933 {
00934     return hash_any((const unsigned char *) str, strlen(str));
00935 }
00936 
00937 /*
00938  * Store some statistics for a statement.
00939  *
00940  * If jstate is not NULL then we're trying to create an entry for which
00941  * we have no statistics as yet; we just want to record the normalized
00942  * query string.  total_time, rows, bufusage are ignored in this case.
00943  */
00944 static void
00945 pgss_store(const char *query, uint32 queryId,
00946            double total_time, uint64 rows,
00947            const BufferUsage *bufusage,
00948            pgssJumbleState *jstate)
00949 {
00950     pgssHashKey key;
00951     pgssEntry  *entry;
00952     char       *norm_query = NULL;
00953 
00954     Assert(query != NULL);
00955 
00956     /* Safety check... */
00957     if (!pgss || !pgss_hash)
00958         return;
00959 
00960     /* Set up key for hashtable search */
00961     key.userid = GetUserId();
00962     key.dbid = MyDatabaseId;
00963     key.encoding = GetDatabaseEncoding();
00964     key.queryid = queryId;
00965 
00966     /* Lookup the hash table entry with shared lock. */
00967     LWLockAcquire(pgss->lock, LW_SHARED);
00968 
00969     entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
00970 
00971     /* Create new entry, if not present */
00972     if (!entry)
00973     {
00974         int         query_len;
00975 
00976         /*
00977          * We'll need exclusive lock to make a new entry.  There is no point
00978          * in holding shared lock while we normalize the string, though.
00979          */
00980         LWLockRelease(pgss->lock);
00981 
00982         query_len = strlen(query);
00983 
00984         if (jstate)
00985         {
00986             /* Normalize the string if enabled */
00987             norm_query = generate_normalized_query(jstate, query,
00988                                                    &query_len,
00989                                                    key.encoding);
00990 
00991             /* Acquire exclusive lock as required by entry_alloc() */
00992             LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
00993 
00994             entry = entry_alloc(&key, norm_query, query_len, true);
00995         }
00996         else
00997         {
00998             /*
00999              * We're just going to store the query string as-is; but we have
01000              * to truncate it if over-length.
01001              */
01002             if (query_len >= pgss->query_size)
01003                 query_len = pg_encoding_mbcliplen(key.encoding,
01004                                                   query,
01005                                                   query_len,
01006                                                   pgss->query_size - 1);
01007 
01008             /* Acquire exclusive lock as required by entry_alloc() */
01009             LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
01010 
01011             entry = entry_alloc(&key, query, query_len, false);
01012         }
01013     }
01014 
01015     /* Increment the counts, except when jstate is not NULL */
01016     if (!jstate)
01017     {
01018         /*
01019          * Grab the spinlock while updating the counters (see comment about
01020          * locking rules at the head of the file)
01021          */
01022         volatile pgssEntry *e = (volatile pgssEntry *) entry;
01023 
01024         SpinLockAcquire(&e->mutex);
01025 
01026         /* "Unstick" entry if it was previously sticky */
01027         if (e->counters.calls == 0)
01028             e->counters.usage = USAGE_INIT;
01029 
01030         e->counters.calls += 1;
01031         e->counters.total_time += total_time;
01032         e->counters.rows += rows;
01033         e->counters.shared_blks_hit += bufusage->shared_blks_hit;
01034         e->counters.shared_blks_read += bufusage->shared_blks_read;
01035         e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
01036         e->counters.shared_blks_written += bufusage->shared_blks_written;
01037         e->counters.local_blks_hit += bufusage->local_blks_hit;
01038         e->counters.local_blks_read += bufusage->local_blks_read;
01039         e->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
01040         e->counters.local_blks_written += bufusage->local_blks_written;
01041         e->counters.temp_blks_read += bufusage->temp_blks_read;
01042         e->counters.temp_blks_written += bufusage->temp_blks_written;
01043         e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
01044         e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
01045         e->counters.usage += USAGE_EXEC(total_time);
01046 
01047         SpinLockRelease(&e->mutex);
01048     }
01049 
01050     LWLockRelease(pgss->lock);
01051 
01052     /* We postpone this pfree until we're out of the lock */
01053     if (norm_query)
01054         pfree(norm_query);
01055 }
01056 
01057 /*
01058  * Reset all statement statistics.
01059  */
01060 Datum
01061 pg_stat_statements_reset(PG_FUNCTION_ARGS)
01062 {
01063     if (!pgss || !pgss_hash)
01064         ereport(ERROR,
01065                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
01066                  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
01067     entry_reset();
01068     PG_RETURN_VOID();
01069 }
01070 
01071 #define PG_STAT_STATEMENTS_COLS_V1_0    14
01072 #define PG_STAT_STATEMENTS_COLS         18
01073 
01074 /*
01075  * Retrieve statement statistics.
01076  */
01077 Datum
01078 pg_stat_statements(PG_FUNCTION_ARGS)
01079 {
01080     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
01081     TupleDesc   tupdesc;
01082     Tuplestorestate *tupstore;
01083     MemoryContext per_query_ctx;
01084     MemoryContext oldcontext;
01085     Oid         userid = GetUserId();
01086     bool        is_superuser = superuser();
01087     HASH_SEQ_STATUS hash_seq;
01088     pgssEntry  *entry;
01089     bool        sql_supports_v1_1_counters = true;
01090 
01091     if (!pgss || !pgss_hash)
01092         ereport(ERROR,
01093                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
01094                  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
01095 
01096     /* check to see if caller supports us returning a tuplestore */
01097     if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
01098         ereport(ERROR,
01099                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
01100                  errmsg("set-valued function called in context that cannot accept a set")));
01101     if (!(rsinfo->allowedModes & SFRM_Materialize))
01102         ereport(ERROR,
01103                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
01104                  errmsg("materialize mode required, but it is not " \
01105                         "allowed in this context")));
01106 
01107     /* Build a tuple descriptor for our result type */
01108     if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
01109         elog(ERROR, "return type must be a row type");
01110     if (tupdesc->natts == PG_STAT_STATEMENTS_COLS_V1_0)
01111         sql_supports_v1_1_counters = false;
01112 
01113     per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
01114     oldcontext = MemoryContextSwitchTo(per_query_ctx);
01115 
01116     tupstore = tuplestore_begin_heap(true, false, work_mem);
01117     rsinfo->returnMode = SFRM_Materialize;
01118     rsinfo->setResult = tupstore;
01119     rsinfo->setDesc = tupdesc;
01120 
01121     MemoryContextSwitchTo(oldcontext);
01122 
01123     LWLockAcquire(pgss->lock, LW_SHARED);
01124 
01125     hash_seq_init(&hash_seq, pgss_hash);
01126     while ((entry = hash_seq_search(&hash_seq)) != NULL)
01127     {
01128         Datum       values[PG_STAT_STATEMENTS_COLS];
01129         bool        nulls[PG_STAT_STATEMENTS_COLS];
01130         int         i = 0;
01131         Counters    tmp;
01132 
01133         memset(values, 0, sizeof(values));
01134         memset(nulls, 0, sizeof(nulls));
01135 
01136         values[i++] = ObjectIdGetDatum(entry->key.userid);
01137         values[i++] = ObjectIdGetDatum(entry->key.dbid);
01138 
01139         if (is_superuser || entry->key.userid == userid)
01140         {
01141             char       *qstr;
01142 
01143             qstr = (char *)
01144                 pg_do_encoding_conversion((unsigned char *) entry->query,
01145                                           entry->query_len,
01146                                           entry->key.encoding,
01147                                           GetDatabaseEncoding());
01148             values[i++] = CStringGetTextDatum(qstr);
01149             if (qstr != entry->query)
01150                 pfree(qstr);
01151         }
01152         else
01153             values[i++] = CStringGetTextDatum("<insufficient privilege>");
01154 
01155         /* copy counters to a local variable to keep locking time short */
01156         {
01157             volatile pgssEntry *e = (volatile pgssEntry *) entry;
01158 
01159             SpinLockAcquire(&e->mutex);
01160             tmp = e->counters;
01161             SpinLockRelease(&e->mutex);
01162         }
01163 
01164         /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
01165         if (tmp.calls == 0)
01166             continue;
01167 
01168         values[i++] = Int64GetDatumFast(tmp.calls);
01169         values[i++] = Float8GetDatumFast(tmp.total_time);
01170         values[i++] = Int64GetDatumFast(tmp.rows);
01171         values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
01172         values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
01173         if (sql_supports_v1_1_counters)
01174             values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
01175         values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
01176         values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
01177         values[i++] = Int64GetDatumFast(tmp.local_blks_read);
01178         if (sql_supports_v1_1_counters)
01179             values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
01180         values[i++] = Int64GetDatumFast(tmp.local_blks_written);
01181         values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
01182         values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
01183         if (sql_supports_v1_1_counters)
01184         {
01185             values[i++] = Float8GetDatumFast(tmp.blk_read_time);
01186             values[i++] = Float8GetDatumFast(tmp.blk_write_time);
01187         }
01188 
01189         Assert(i == (sql_supports_v1_1_counters ?
01190                      PG_STAT_STATEMENTS_COLS : PG_STAT_STATEMENTS_COLS_V1_0));
01191 
01192         tuplestore_putvalues(tupstore, tupdesc, values, nulls);
01193     }
01194 
01195     LWLockRelease(pgss->lock);
01196 
01197     /* clean up and return the tuplestore */
01198     tuplestore_donestoring(tupstore);
01199 
01200     return (Datum) 0;
01201 }
01202 
01203 /*
01204  * Estimate shared memory space needed.
01205  */
01206 static Size
01207 pgss_memsize(void)
01208 {
01209     Size        size;
01210     Size        entrysize;
01211 
01212     size = MAXALIGN(sizeof(pgssSharedState));
01213     entrysize = offsetof(pgssEntry, query) +pgstat_track_activity_query_size;
01214     size = add_size(size, hash_estimate_size(pgss_max, entrysize));
01215 
01216     return size;
01217 }
01218 
01219 /*
01220  * Allocate a new hashtable entry.
01221  * caller must hold an exclusive lock on pgss->lock
01222  *
01223  * "query" need not be null-terminated; we rely on query_len instead
01224  *
01225  * If "sticky" is true, make the new entry artificially sticky so that it will
01226  * probably still be there when the query finishes execution.  We do this by
01227  * giving it a median usage value rather than the normal value.  (Strictly
01228  * speaking, query strings are normalized on a best effort basis, though it
01229  * would be difficult to demonstrate this even under artificial conditions.)
01230  *
01231  * Note: despite needing exclusive lock, it's not an error for the target
01232  * entry to already exist.  This is because pgss_store releases and
01233  * reacquires lock after failing to find a match; so someone else could
01234  * have made the entry while we waited to get exclusive lock.
01235  */
01236 static pgssEntry *
01237 entry_alloc(pgssHashKey *key, const char *query, int query_len, bool sticky)
01238 {
01239     pgssEntry  *entry;
01240     bool        found;
01241 
01242     /* Make space if needed */
01243     while (hash_get_num_entries(pgss_hash) >= pgss_max)
01244         entry_dealloc();
01245 
01246     /* Find or create an entry with desired hash code */
01247     entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
01248 
01249     if (!found)
01250     {
01251         /* New entry, initialize it */
01252 
01253         /* reset the statistics */
01254         memset(&entry->counters, 0, sizeof(Counters));
01255         /* set the appropriate initial usage count */
01256         entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
01257         /* re-initialize the mutex each time ... we assume no one using it */
01258         SpinLockInit(&entry->mutex);
01259         /* ... and don't forget the query text */
01260         Assert(query_len >= 0 && query_len < pgss->query_size);
01261         entry->query_len = query_len;
01262         memcpy(entry->query, query, query_len);
01263         entry->query[query_len] = '\0';
01264     }
01265 
01266     return entry;
01267 }
01268 
01269 /*
01270  * qsort comparator for sorting into increasing usage order
01271  */
01272 static int
01273 entry_cmp(const void *lhs, const void *rhs)
01274 {
01275     double      l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
01276     double      r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
01277 
01278     if (l_usage < r_usage)
01279         return -1;
01280     else if (l_usage > r_usage)
01281         return +1;
01282     else
01283         return 0;
01284 }
01285 
01286 /*
01287  * Deallocate least used entries.
01288  * Caller must hold an exclusive lock on pgss->lock.
01289  */
01290 static void
01291 entry_dealloc(void)
01292 {
01293     HASH_SEQ_STATUS hash_seq;
01294     pgssEntry **entries;
01295     pgssEntry  *entry;
01296     int         nvictims;
01297     int         i;
01298 
01299     /*
01300      * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
01301      * While we're scanning the table, apply the decay factor to the usage
01302      * values.
01303      */
01304 
01305     entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
01306 
01307     i = 0;
01308     hash_seq_init(&hash_seq, pgss_hash);
01309     while ((entry = hash_seq_search(&hash_seq)) != NULL)
01310     {
01311         entries[i++] = entry;
01312         /* "Sticky" entries get a different usage decay rate. */
01313         if (entry->counters.calls == 0)
01314             entry->counters.usage *= STICKY_DECREASE_FACTOR;
01315         else
01316             entry->counters.usage *= USAGE_DECREASE_FACTOR;
01317     }
01318 
01319     qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
01320 
01321     /* Also, record the (approximate) median usage */
01322     if (i > 0)
01323         pgss->cur_median_usage = entries[i / 2]->counters.usage;
01324 
01325     nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
01326     nvictims = Min(nvictims, i);
01327 
01328     for (i = 0; i < nvictims; i++)
01329     {
01330         hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
01331     }
01332 
01333     pfree(entries);
01334 }
01335 
01336 /*
01337  * Release all entries.
01338  */
01339 static void
01340 entry_reset(void)
01341 {
01342     HASH_SEQ_STATUS hash_seq;
01343     pgssEntry  *entry;
01344 
01345     LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
01346 
01347     hash_seq_init(&hash_seq, pgss_hash);
01348     while ((entry = hash_seq_search(&hash_seq)) != NULL)
01349     {
01350         hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
01351     }
01352 
01353     LWLockRelease(pgss->lock);
01354 }
01355 
01356 /*
01357  * AppendJumble: Append a value that is substantive in a given query to
01358  * the current jumble.
01359  */
01360 static void
01361 AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size)
01362 {
01363     unsigned char *jumble = jstate->jumble;
01364     Size        jumble_len = jstate->jumble_len;
01365 
01366     /*
01367      * Whenever the jumble buffer is full, we hash the current contents and
01368      * reset the buffer to contain just that hash value, thus relying on the
01369      * hash to summarize everything so far.
01370      */
01371     while (size > 0)
01372     {
01373         Size        part_size;
01374 
01375         if (jumble_len >= JUMBLE_SIZE)
01376         {
01377             uint32      start_hash = hash_any(jumble, JUMBLE_SIZE);
01378 
01379             memcpy(jumble, &start_hash, sizeof(start_hash));
01380             jumble_len = sizeof(start_hash);
01381         }
01382         part_size = Min(size, JUMBLE_SIZE - jumble_len);
01383         memcpy(jumble + jumble_len, item, part_size);
01384         jumble_len += part_size;
01385         item += part_size;
01386         size -= part_size;
01387     }
01388     jstate->jumble_len = jumble_len;
01389 }
01390 
01391 /*
01392  * Wrappers around AppendJumble to encapsulate details of serialization
01393  * of individual local variable elements.
01394  */
01395 #define APP_JUMB(item) \
01396     AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
01397 #define APP_JUMB_STRING(str) \
01398     AppendJumble(jstate, (const unsigned char *) (str), strlen(str) + 1)
01399 
01400 /*
01401  * JumbleQuery: Selectively serialize the query tree, appending significant
01402  * data to the "query jumble" while ignoring nonsignificant data.
01403  *
01404  * Rule of thumb for what to include is that we should ignore anything not
01405  * semantically significant (such as alias names) as well as anything that can
01406  * be deduced from child nodes (else we'd just be double-hashing that piece
01407  * of information).
01408  */
01409 static void
01410 JumbleQuery(pgssJumbleState *jstate, Query *query)
01411 {
01412     Assert(IsA(query, Query));
01413     Assert(query->utilityStmt == NULL);
01414 
01415     APP_JUMB(query->commandType);
01416     /* resultRelation is usually predictable from commandType */
01417     JumbleExpr(jstate, (Node *) query->cteList);
01418     JumbleRangeTable(jstate, query->rtable);
01419     JumbleExpr(jstate, (Node *) query->jointree);
01420     JumbleExpr(jstate, (Node *) query->targetList);
01421     JumbleExpr(jstate, (Node *) query->returningList);
01422     JumbleExpr(jstate, (Node *) query->groupClause);
01423     JumbleExpr(jstate, query->havingQual);
01424     JumbleExpr(jstate, (Node *) query->windowClause);
01425     JumbleExpr(jstate, (Node *) query->distinctClause);
01426     JumbleExpr(jstate, (Node *) query->sortClause);
01427     JumbleExpr(jstate, query->limitOffset);
01428     JumbleExpr(jstate, query->limitCount);
01429     /* we ignore rowMarks */
01430     JumbleExpr(jstate, query->setOperations);
01431 }
01432 
01433 /*
01434  * Jumble a range table
01435  */
01436 static void
01437 JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
01438 {
01439     ListCell   *lc;
01440 
01441     foreach(lc, rtable)
01442     {
01443         RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
01444 
01445         Assert(IsA(rte, RangeTblEntry));
01446         APP_JUMB(rte->rtekind);
01447         switch (rte->rtekind)
01448         {
01449             case RTE_RELATION:
01450                 APP_JUMB(rte->relid);
01451                 break;
01452             case RTE_SUBQUERY:
01453                 JumbleQuery(jstate, rte->subquery);
01454                 break;
01455             case RTE_JOIN:
01456                 APP_JUMB(rte->jointype);
01457                 break;
01458             case RTE_FUNCTION:
01459                 JumbleExpr(jstate, rte->funcexpr);
01460                 break;
01461             case RTE_VALUES:
01462                 JumbleExpr(jstate, (Node *) rte->values_lists);
01463                 break;
01464             case RTE_CTE:
01465 
01466                 /*
01467                  * Depending on the CTE name here isn't ideal, but it's the
01468                  * only info we have to identify the referenced WITH item.
01469                  */
01470                 APP_JUMB_STRING(rte->ctename);
01471                 APP_JUMB(rte->ctelevelsup);
01472                 break;
01473             default:
01474                 elog(ERROR, "unrecognized RTE kind: %d", (int) rte->rtekind);
01475                 break;
01476         }
01477     }
01478 }
01479 
01480 /*
01481  * Jumble an expression tree
01482  *
01483  * In general this function should handle all the same node types that
01484  * expression_tree_walker() does, and therefore it's coded to be as parallel
01485  * to that function as possible.  However, since we are only invoked on
01486  * queries immediately post-parse-analysis, we need not handle node types
01487  * that only appear in planning.
01488  *
01489  * Note: the reason we don't simply use expression_tree_walker() is that the
01490  * point of that function is to support tree walkers that don't care about
01491  * most tree node types, but here we care about all types.  We should complain
01492  * about any unrecognized node type.
01493  */
01494 static void
01495 JumbleExpr(pgssJumbleState *jstate, Node *node)
01496 {
01497     ListCell   *temp;
01498 
01499     if (node == NULL)
01500         return;
01501 
01502     /* Guard against stack overflow due to overly complex expressions */
01503     check_stack_depth();
01504 
01505     /*
01506      * We always emit the node's NodeTag, then any additional fields that are
01507      * considered significant, and then we recurse to any child nodes.
01508      */
01509     APP_JUMB(node->type);
01510 
01511     switch (nodeTag(node))
01512     {
01513         case T_Var:
01514             {
01515                 Var        *var = (Var *) node;
01516 
01517                 APP_JUMB(var->varno);
01518                 APP_JUMB(var->varattno);
01519                 APP_JUMB(var->varlevelsup);
01520             }
01521             break;
01522         case T_Const:
01523             {
01524                 Const      *c = (Const *) node;
01525 
01526                 /* We jumble only the constant's type, not its value */
01527                 APP_JUMB(c->consttype);
01528                 /* Also, record its parse location for query normalization */
01529                 RecordConstLocation(jstate, c->location);
01530             }
01531             break;
01532         case T_Param:
01533             {
01534                 Param      *p = (Param *) node;
01535 
01536                 APP_JUMB(p->paramkind);
01537                 APP_JUMB(p->paramid);
01538                 APP_JUMB(p->paramtype);
01539             }
01540             break;
01541         case T_Aggref:
01542             {
01543                 Aggref     *expr = (Aggref *) node;
01544 
01545                 APP_JUMB(expr->aggfnoid);
01546                 JumbleExpr(jstate, (Node *) expr->args);
01547                 JumbleExpr(jstate, (Node *) expr->aggorder);
01548                 JumbleExpr(jstate, (Node *) expr->aggdistinct);
01549             }
01550             break;
01551         case T_WindowFunc:
01552             {
01553                 WindowFunc *expr = (WindowFunc *) node;
01554 
01555                 APP_JUMB(expr->winfnoid);
01556                 APP_JUMB(expr->winref);
01557                 JumbleExpr(jstate, (Node *) expr->args);
01558             }
01559             break;
01560         case T_ArrayRef:
01561             {
01562                 ArrayRef   *aref = (ArrayRef *) node;
01563 
01564                 JumbleExpr(jstate, (Node *) aref->refupperindexpr);
01565                 JumbleExpr(jstate, (Node *) aref->reflowerindexpr);
01566                 JumbleExpr(jstate, (Node *) aref->refexpr);
01567                 JumbleExpr(jstate, (Node *) aref->refassgnexpr);
01568             }
01569             break;
01570         case T_FuncExpr:
01571             {
01572                 FuncExpr   *expr = (FuncExpr *) node;
01573 
01574                 APP_JUMB(expr->funcid);
01575                 JumbleExpr(jstate, (Node *) expr->args);
01576             }
01577             break;
01578         case T_NamedArgExpr:
01579             {
01580                 NamedArgExpr *nae = (NamedArgExpr *) node;
01581 
01582                 APP_JUMB(nae->argnumber);
01583                 JumbleExpr(jstate, (Node *) nae->arg);
01584             }
01585             break;
01586         case T_OpExpr:
01587         case T_DistinctExpr:    /* struct-equivalent to OpExpr */
01588         case T_NullIfExpr:      /* struct-equivalent to OpExpr */
01589             {
01590                 OpExpr     *expr = (OpExpr *) node;
01591 
01592                 APP_JUMB(expr->opno);
01593                 JumbleExpr(jstate, (Node *) expr->args);
01594             }
01595             break;
01596         case T_ScalarArrayOpExpr:
01597             {
01598                 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
01599 
01600                 APP_JUMB(expr->opno);
01601                 APP_JUMB(expr->useOr);
01602                 JumbleExpr(jstate, (Node *) expr->args);
01603             }
01604             break;
01605         case T_BoolExpr:
01606             {
01607                 BoolExpr   *expr = (BoolExpr *) node;
01608 
01609                 APP_JUMB(expr->boolop);
01610                 JumbleExpr(jstate, (Node *) expr->args);
01611             }
01612             break;
01613         case T_SubLink:
01614             {
01615                 SubLink    *sublink = (SubLink *) node;
01616 
01617                 APP_JUMB(sublink->subLinkType);
01618                 JumbleExpr(jstate, (Node *) sublink->testexpr);
01619                 JumbleQuery(jstate, (Query *) sublink->subselect);
01620             }
01621             break;
01622         case T_FieldSelect:
01623             {
01624                 FieldSelect *fs = (FieldSelect *) node;
01625 
01626                 APP_JUMB(fs->fieldnum);
01627                 JumbleExpr(jstate, (Node *) fs->arg);
01628             }
01629             break;
01630         case T_FieldStore:
01631             {
01632                 FieldStore *fstore = (FieldStore *) node;
01633 
01634                 JumbleExpr(jstate, (Node *) fstore->arg);
01635                 JumbleExpr(jstate, (Node *) fstore->newvals);
01636             }
01637             break;
01638         case T_RelabelType:
01639             {
01640                 RelabelType *rt = (RelabelType *) node;
01641 
01642                 APP_JUMB(rt->resulttype);
01643                 JumbleExpr(jstate, (Node *) rt->arg);
01644             }
01645             break;
01646         case T_CoerceViaIO:
01647             {
01648                 CoerceViaIO *cio = (CoerceViaIO *) node;
01649 
01650                 APP_JUMB(cio->resulttype);
01651                 JumbleExpr(jstate, (Node *) cio->arg);
01652             }
01653             break;
01654         case T_ArrayCoerceExpr:
01655             {
01656                 ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *) node;
01657 
01658                 APP_JUMB(acexpr->resulttype);
01659                 JumbleExpr(jstate, (Node *) acexpr->arg);
01660             }
01661             break;
01662         case T_ConvertRowtypeExpr:
01663             {
01664                 ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *) node;
01665 
01666                 APP_JUMB(crexpr->resulttype);
01667                 JumbleExpr(jstate, (Node *) crexpr->arg);
01668             }
01669             break;
01670         case T_CollateExpr:
01671             {
01672                 CollateExpr *ce = (CollateExpr *) node;
01673 
01674                 APP_JUMB(ce->collOid);
01675                 JumbleExpr(jstate, (Node *) ce->arg);
01676             }
01677             break;
01678         case T_CaseExpr:
01679             {
01680                 CaseExpr   *caseexpr = (CaseExpr *) node;
01681 
01682                 JumbleExpr(jstate, (Node *) caseexpr->arg);
01683                 foreach(temp, caseexpr->args)
01684                 {
01685                     CaseWhen   *when = (CaseWhen *) lfirst(temp);
01686 
01687                     Assert(IsA(when, CaseWhen));
01688                     JumbleExpr(jstate, (Node *) when->expr);
01689                     JumbleExpr(jstate, (Node *) when->result);
01690                 }
01691                 JumbleExpr(jstate, (Node *) caseexpr->defresult);
01692             }
01693             break;
01694         case T_CaseTestExpr:
01695             {
01696                 CaseTestExpr *ct = (CaseTestExpr *) node;
01697 
01698                 APP_JUMB(ct->typeId);
01699             }
01700             break;
01701         case T_ArrayExpr:
01702             JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
01703             break;
01704         case T_RowExpr:
01705             JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
01706             break;
01707         case T_RowCompareExpr:
01708             {
01709                 RowCompareExpr *rcexpr = (RowCompareExpr *) node;
01710 
01711                 APP_JUMB(rcexpr->rctype);
01712                 JumbleExpr(jstate, (Node *) rcexpr->largs);
01713                 JumbleExpr(jstate, (Node *) rcexpr->rargs);
01714             }
01715             break;
01716         case T_CoalesceExpr:
01717             JumbleExpr(jstate, (Node *) ((CoalesceExpr *) node)->args);
01718             break;
01719         case T_MinMaxExpr:
01720             {
01721                 MinMaxExpr *mmexpr = (MinMaxExpr *) node;
01722 
01723                 APP_JUMB(mmexpr->op);
01724                 JumbleExpr(jstate, (Node *) mmexpr->args);
01725             }
01726             break;
01727         case T_XmlExpr:
01728             {
01729                 XmlExpr    *xexpr = (XmlExpr *) node;
01730 
01731                 APP_JUMB(xexpr->op);
01732                 JumbleExpr(jstate, (Node *) xexpr->named_args);
01733                 JumbleExpr(jstate, (Node *) xexpr->args);
01734             }
01735             break;
01736         case T_NullTest:
01737             {
01738                 NullTest   *nt = (NullTest *) node;
01739 
01740                 APP_JUMB(nt->nulltesttype);
01741                 JumbleExpr(jstate, (Node *) nt->arg);
01742             }
01743             break;
01744         case T_BooleanTest:
01745             {
01746                 BooleanTest *bt = (BooleanTest *) node;
01747 
01748                 APP_JUMB(bt->booltesttype);
01749                 JumbleExpr(jstate, (Node *) bt->arg);
01750             }
01751             break;
01752         case T_CoerceToDomain:
01753             {
01754                 CoerceToDomain *cd = (CoerceToDomain *) node;
01755 
01756                 APP_JUMB(cd->resulttype);
01757                 JumbleExpr(jstate, (Node *) cd->arg);
01758             }
01759             break;
01760         case T_CoerceToDomainValue:
01761             {
01762                 CoerceToDomainValue *cdv = (CoerceToDomainValue *) node;
01763 
01764                 APP_JUMB(cdv->typeId);
01765             }
01766             break;
01767         case T_SetToDefault:
01768             {
01769                 SetToDefault *sd = (SetToDefault *) node;
01770 
01771                 APP_JUMB(sd->typeId);
01772             }
01773             break;
01774         case T_CurrentOfExpr:
01775             {
01776                 CurrentOfExpr *ce = (CurrentOfExpr *) node;
01777 
01778                 APP_JUMB(ce->cvarno);
01779                 if (ce->cursor_name)
01780                     APP_JUMB_STRING(ce->cursor_name);
01781                 APP_JUMB(ce->cursor_param);
01782             }
01783             break;
01784         case T_TargetEntry:
01785             {
01786                 TargetEntry *tle = (TargetEntry *) node;
01787 
01788                 APP_JUMB(tle->resno);
01789                 APP_JUMB(tle->ressortgroupref);
01790                 JumbleExpr(jstate, (Node *) tle->expr);
01791             }
01792             break;
01793         case T_RangeTblRef:
01794             {
01795                 RangeTblRef *rtr = (RangeTblRef *) node;
01796 
01797                 APP_JUMB(rtr->rtindex);
01798             }
01799             break;
01800         case T_JoinExpr:
01801             {
01802                 JoinExpr   *join = (JoinExpr *) node;
01803 
01804                 APP_JUMB(join->jointype);
01805                 APP_JUMB(join->isNatural);
01806                 APP_JUMB(join->rtindex);
01807                 JumbleExpr(jstate, join->larg);
01808                 JumbleExpr(jstate, join->rarg);
01809                 JumbleExpr(jstate, join->quals);
01810             }
01811             break;
01812         case T_FromExpr:
01813             {
01814                 FromExpr   *from = (FromExpr *) node;
01815 
01816                 JumbleExpr(jstate, (Node *) from->fromlist);
01817                 JumbleExpr(jstate, from->quals);
01818             }
01819             break;
01820         case T_List:
01821             foreach(temp, (List *) node)
01822             {
01823                 JumbleExpr(jstate, (Node *) lfirst(temp));
01824             }
01825             break;
01826         case T_SortGroupClause:
01827             {
01828                 SortGroupClause *sgc = (SortGroupClause *) node;
01829 
01830                 APP_JUMB(sgc->tleSortGroupRef);
01831                 APP_JUMB(sgc->eqop);
01832                 APP_JUMB(sgc->sortop);
01833                 APP_JUMB(sgc->nulls_first);
01834             }
01835             break;
01836         case T_WindowClause:
01837             {
01838                 WindowClause *wc = (WindowClause *) node;
01839 
01840                 APP_JUMB(wc->winref);
01841                 APP_JUMB(wc->frameOptions);
01842                 JumbleExpr(jstate, (Node *) wc->partitionClause);
01843                 JumbleExpr(jstate, (Node *) wc->orderClause);
01844                 JumbleExpr(jstate, wc->startOffset);
01845                 JumbleExpr(jstate, wc->endOffset);
01846             }
01847             break;
01848         case T_CommonTableExpr:
01849             {
01850                 CommonTableExpr *cte = (CommonTableExpr *) node;
01851 
01852                 /* we store the string name because RTE_CTE RTEs need it */
01853                 APP_JUMB_STRING(cte->ctename);
01854                 JumbleQuery(jstate, (Query *) cte->ctequery);
01855             }
01856             break;
01857         case T_SetOperationStmt:
01858             {
01859                 SetOperationStmt *setop = (SetOperationStmt *) node;
01860 
01861                 APP_JUMB(setop->op);
01862                 APP_JUMB(setop->all);
01863                 JumbleExpr(jstate, setop->larg);
01864                 JumbleExpr(jstate, setop->rarg);
01865             }
01866             break;
01867         default:
01868             /* Only a warning, since we can stumble along anyway */
01869             elog(WARNING, "unrecognized node type: %d",
01870                  (int) nodeTag(node));
01871             break;
01872     }
01873 }
01874 
01875 /*
01876  * Record location of constant within query string of query tree
01877  * that is currently being walked.
01878  */
01879 static void
01880 RecordConstLocation(pgssJumbleState *jstate, int location)
01881 {
01882     /* -1 indicates unknown or undefined location */
01883     if (location >= 0)
01884     {
01885         /* enlarge array if needed */
01886         if (jstate->clocations_count >= jstate->clocations_buf_size)
01887         {
01888             jstate->clocations_buf_size *= 2;
01889             jstate->clocations = (pgssLocationLen *)
01890                 repalloc(jstate->clocations,
01891                          jstate->clocations_buf_size *
01892                          sizeof(pgssLocationLen));
01893         }
01894         jstate->clocations[jstate->clocations_count].location = location;
01895         /* initialize lengths to -1 to simplify fill_in_constant_lengths */
01896         jstate->clocations[jstate->clocations_count].length = -1;
01897         jstate->clocations_count++;
01898     }
01899 }
01900 
01901 /*
01902  * Generate a normalized version of the query string that will be used to
01903  * represent all similar queries.
01904  *
01905  * Note that the normalized representation may well vary depending on
01906  * just which "equivalent" query is used to create the hashtable entry.
01907  * We assume this is OK.
01908  *
01909  * *query_len_p contains the input string length, and is updated with
01910  * the result string length (which cannot be longer) on exit.
01911  *
01912  * Returns a palloc'd string, which is not necessarily null-terminated.
01913  */
01914 static char *
01915 generate_normalized_query(pgssJumbleState *jstate, const char *query,
01916                           int *query_len_p, int encoding)
01917 {
01918     char       *norm_query;
01919     int         query_len = *query_len_p;
01920     int         max_output_len;
01921     int         i,
01922                 len_to_wrt,     /* Length (in bytes) to write */
01923                 quer_loc = 0,   /* Source query byte location */
01924                 n_quer_loc = 0, /* Normalized query byte location */
01925                 last_off = 0,   /* Offset from start for previous tok */
01926                 last_tok_len = 0;       /* Length (in bytes) of that tok */
01927 
01928     /*
01929      * Get constants' lengths (core system only gives us locations).  Note
01930      * this also ensures the items are sorted by location.
01931      */
01932     fill_in_constant_lengths(jstate, query);
01933 
01934     /* Allocate result buffer, ensuring we limit result to allowed size */
01935     max_output_len = Min(query_len, pgss->query_size - 1);
01936     norm_query = palloc(max_output_len);
01937 
01938     for (i = 0; i < jstate->clocations_count; i++)
01939     {
01940         int         off,        /* Offset from start for cur tok */
01941                     tok_len;    /* Length (in bytes) of that tok */
01942 
01943         off = jstate->clocations[i].location;
01944         tok_len = jstate->clocations[i].length;
01945 
01946         if (tok_len < 0)
01947             continue;           /* ignore any duplicates */
01948 
01949         /* Copy next chunk, or as much as will fit */
01950         len_to_wrt = off - last_off;
01951         len_to_wrt -= last_tok_len;
01952         len_to_wrt = Min(len_to_wrt, max_output_len - n_quer_loc);
01953 
01954         Assert(len_to_wrt >= 0);
01955         memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
01956         n_quer_loc += len_to_wrt;
01957 
01958         if (n_quer_loc < max_output_len)
01959             norm_query[n_quer_loc++] = '?';
01960 
01961         quer_loc = off + tok_len;
01962         last_off = off;
01963         last_tok_len = tok_len;
01964 
01965         /* If we run out of space, might as well stop iterating */
01966         if (n_quer_loc >= max_output_len)
01967             break;
01968     }
01969 
01970     /*
01971      * We've copied up until the last ignorable constant.  Copy over the
01972      * remaining bytes of the original query string, or at least as much as
01973      * will fit.
01974      */
01975     len_to_wrt = query_len - quer_loc;
01976     len_to_wrt = Min(len_to_wrt, max_output_len - n_quer_loc);
01977 
01978     Assert(len_to_wrt >= 0);
01979     memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
01980     n_quer_loc += len_to_wrt;
01981 
01982     /*
01983      * If we ran out of space, we need to do an encoding-aware truncation,
01984      * just to make sure we don't have an incomplete character at the end.
01985      */
01986     if (n_quer_loc >= max_output_len)
01987         query_len = pg_encoding_mbcliplen(encoding,
01988                                           norm_query,
01989                                           n_quer_loc,
01990                                           pgss->query_size - 1);
01991     else
01992         query_len = n_quer_loc;
01993 
01994     *query_len_p = query_len;
01995     return norm_query;
01996 }
01997 
01998 /*
01999  * Given a valid SQL string and an array of constant-location records,
02000  * fill in the textual lengths of those constants.
02001  *
02002  * The constants may use any allowed constant syntax, such as float literals,
02003  * bit-strings, single-quoted strings and dollar-quoted strings.  This is
02004  * accomplished by using the public API for the core scanner.
02005  *
02006  * It is the caller's job to ensure that the string is a valid SQL statement
02007  * with constants at the indicated locations.  Since in practice the string
02008  * has already been parsed, and the locations that the caller provides will
02009  * have originated from within the authoritative parser, this should not be
02010  * a problem.
02011  *
02012  * Duplicate constant pointers are possible, and will have their lengths
02013  * marked as '-1', so that they are later ignored.  (Actually, we assume the
02014  * lengths were initialized as -1 to start with, and don't change them here.)
02015  *
02016  * N.B. There is an assumption that a '-' character at a Const location begins
02017  * a negative numeric constant.  This precludes there ever being another
02018  * reason for a constant to start with a '-'.
02019  */
02020 static void
02021 fill_in_constant_lengths(pgssJumbleState *jstate, const char *query)
02022 {
02023     pgssLocationLen *locs;
02024     core_yyscan_t yyscanner;
02025     core_yy_extra_type yyextra;
02026     core_YYSTYPE yylval;
02027     YYLTYPE     yylloc;
02028     int         last_loc = -1;
02029     int         i;
02030 
02031     /*
02032      * Sort the records by location so that we can process them in order while
02033      * scanning the query text.
02034      */
02035     if (jstate->clocations_count > 1)
02036         qsort(jstate->clocations, jstate->clocations_count,
02037               sizeof(pgssLocationLen), comp_location);
02038     locs = jstate->clocations;
02039 
02040     /* initialize the flex scanner --- should match raw_parser() */
02041     yyscanner = scanner_init(query,
02042                              &yyextra,
02043                              ScanKeywords,
02044                              NumScanKeywords);
02045 
02046     /* Search for each constant, in sequence */
02047     for (i = 0; i < jstate->clocations_count; i++)
02048     {
02049         int         loc = locs[i].location;
02050         int         tok;
02051 
02052         Assert(loc >= 0);
02053 
02054         if (loc <= last_loc)
02055             continue;           /* Duplicate constant, ignore */
02056 
02057         /* Lex tokens until we find the desired constant */
02058         for (;;)
02059         {
02060             tok = core_yylex(&yylval, &yylloc, yyscanner);
02061 
02062             /* We should not hit end-of-string, but if we do, behave sanely */
02063             if (tok == 0)
02064                 break;          /* out of inner for-loop */
02065 
02066             /*
02067              * We should find the token position exactly, but if we somehow
02068              * run past it, work with that.
02069              */
02070             if (yylloc >= loc)
02071             {
02072                 if (query[loc] == '-')
02073                 {
02074                     /*
02075                      * It's a negative value - this is the one and only case
02076                      * where we replace more than a single token.
02077                      *
02078                      * Do not compensate for the core system's special-case
02079                      * adjustment of location to that of the leading '-'
02080                      * operator in the event of a negative constant.  It is
02081                      * also useful for our purposes to start from the minus
02082                      * symbol.  In this way, queries like "select * from foo
02083                      * where bar = 1" and "select * from foo where bar = -2"
02084                      * will have identical normalized query strings.
02085                      */
02086                     tok = core_yylex(&yylval, &yylloc, yyscanner);
02087                     if (tok == 0)
02088                         break;  /* out of inner for-loop */
02089                 }
02090 
02091                 /*
02092                  * We now rely on the assumption that flex has placed a zero
02093                  * byte after the text of the current token in scanbuf.
02094                  */
02095                 locs[i].length = strlen(yyextra.scanbuf + loc);
02096                 break;          /* out of inner for-loop */
02097             }
02098         }
02099 
02100         /* If we hit end-of-string, give up, leaving remaining lengths -1 */
02101         if (tok == 0)
02102             break;
02103 
02104         last_loc = loc;
02105     }
02106 
02107     scanner_finish(yyscanner);
02108 }
02109 
02110 /*
02111  * comp_location: comparator for qsorting pgssLocationLen structs by location
02112  */
02113 static int
02114 comp_location(const void *a, const void *b)
02115 {
02116     int         l = ((const pgssLocationLen *) a)->location;
02117     int         r = ((const pgssLocationLen *) b)->location;
02118 
02119     if (l < r)
02120         return -1;
02121     else if (l > r)
02122         return +1;
02123     else
02124         return 0;
02125 }