Header And Logo

PostgreSQL
| The world's most advanced open source database.

ts_utils.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * ts_utils.c
00004  *      various support functions
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  *
00008  *
00009  * IDENTIFICATION
00010  *    src/backend/tsearch/ts_utils.c
00011  *
00012  *-------------------------------------------------------------------------
00013  */
00014 
00015 #include "postgres.h"
00016 
00017 #include <ctype.h>
00018 
00019 #include "miscadmin.h"
00020 #include "tsearch/ts_locale.h"
00021 #include "tsearch/ts_utils.h"
00022 
00023 
00024 /*
00025  * Given the base name and extension of a tsearch config file, return
00026  * its full path name.  The base name is assumed to be user-supplied,
00027  * and is checked to prevent pathname attacks.  The extension is assumed
00028  * to be safe.
00029  *
00030  * The result is a palloc'd string.
00031  */
00032 char *
00033 get_tsearch_config_filename(const char *basename,
00034                             const char *extension)
00035 {
00036     char        sharepath[MAXPGPATH];
00037     char       *result;
00038 
00039     /*
00040      * We limit the basename to contain a-z, 0-9, and underscores.  This may
00041      * be overly restrictive, but we don't want to allow access to anything
00042      * outside the tsearch_data directory, so for instance '/' *must* be
00043      * rejected, and on some platforms '\' and ':' are risky as well. Allowing
00044      * uppercase might result in incompatible behavior between case-sensitive
00045      * and case-insensitive filesystems, and non-ASCII characters create other
00046      * interesting risks, so on the whole a tight policy seems best.
00047      */
00048     if (strspn(basename, "abcdefghijklmnopqrstuvwxyz0123456789_") != strlen(basename))
00049         ereport(ERROR,
00050                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00051                  errmsg("invalid text search configuration file name \"%s\"",
00052                         basename)));
00053 
00054     get_share_path(my_exec_path, sharepath);
00055     result = palloc(MAXPGPATH);
00056     snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s",
00057              sharepath, basename, extension);
00058 
00059     return result;
00060 }
00061 
00062 /*
00063  * Reads a stop-word file. Each word is run through 'wordop'
00064  * function, if given.  wordop may either modify the input in-place,
00065  * or palloc a new version.
00066  */
00067 void
00068 readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
00069 {
00070     char      **stop = NULL;
00071 
00072     s->len = 0;
00073     if (fname && *fname)
00074     {
00075         char       *filename = get_tsearch_config_filename(fname, "stop");
00076         tsearch_readline_state trst;
00077         char       *line;
00078         int         reallen = 0;
00079 
00080         if (!tsearch_readline_begin(&trst, filename))
00081             ereport(ERROR,
00082                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
00083                      errmsg("could not open stop-word file \"%s\": %m",
00084                             filename)));
00085 
00086         while ((line = tsearch_readline(&trst)) != NULL)
00087         {
00088             char       *pbuf = line;
00089 
00090             /* Trim trailing space */
00091             while (*pbuf && !t_isspace(pbuf))
00092                 pbuf += pg_mblen(pbuf);
00093             *pbuf = '\0';
00094 
00095             /* Skip empty lines */
00096             if (*line == '\0')
00097             {
00098                 pfree(line);
00099                 continue;
00100             }
00101 
00102             if (s->len >= reallen)
00103             {
00104                 if (reallen == 0)
00105                 {
00106                     reallen = 64;
00107                     stop = (char **) palloc(sizeof(char *) * reallen);
00108                 }
00109                 else
00110                 {
00111                     reallen *= 2;
00112                     stop = (char **) repalloc((void *) stop,
00113                                               sizeof(char *) * reallen);
00114                 }
00115             }
00116 
00117             if (wordop)
00118             {
00119                 stop[s->len] = wordop(line);
00120                 if (stop[s->len] != line)
00121                     pfree(line);
00122             }
00123             else
00124                 stop[s->len] = line;
00125 
00126             (s->len)++;
00127         }
00128 
00129         tsearch_readline_end(&trst);
00130         pfree(filename);
00131     }
00132 
00133     s->stop = stop;
00134 
00135     /* Sort to allow binary searching */
00136     if (s->stop && s->len > 0)
00137         qsort(s->stop, s->len, sizeof(char *), pg_qsort_strcmp);
00138 }
00139 
00140 bool
00141 searchstoplist(StopList *s, char *key)
00142 {
00143     return (s->stop && s->len > 0 &&
00144             bsearch(&key, s->stop, s->len,
00145                     sizeof(char *), pg_qsort_strcmp)) ? true : false;
00146 }