Header And Logo

PostgreSQL
| The world's most advanced open source database.

regexport.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * regexport.c
00004  *    Functions for exporting info about a regex's NFA
00005  *
00006  * In this implementation, the NFA defines a necessary but not sufficient
00007  * condition for a string to match the regex: that is, there can be strings
00008  * that match the NFA but don't match the full regex, but not vice versa.
00009  * Thus, for example, it is okay for the functions below to ignore lookahead
00010  * constraints, which merely constrain the string some more.
00011  *
00012  * Notice that these functions return info into caller-provided arrays
00013  * rather than doing their own malloc's.  This simplifies the APIs by
00014  * eliminating a class of error conditions, and in the case of colors
00015  * allows the caller to decide how big is too big to bother with.
00016  *
00017  *
00018  * Portions Copyright (c) 2013, PostgreSQL Global Development Group
00019  * Portions Copyright (c) 1998, 1999 Henry Spencer
00020  *
00021  * IDENTIFICATION
00022  *    src/backend/regex/regexport.c
00023  *
00024  *-------------------------------------------------------------------------
00025  */
00026 
00027 #include "regex/regguts.h"
00028 
00029 #include "regex/regexport.h"
00030 
00031 static void scancolormap(struct colormap * cm, int co,
00032              union tree * t, int level, chr partial,
00033              pg_wchar **chars, int *chars_len);
00034 
00035 
00036 /*
00037  * Get total number of NFA states.
00038  */
00039 int
00040 pg_reg_getnumstates(const regex_t *regex)
00041 {
00042     struct cnfa *cnfa;
00043 
00044     assert(regex != NULL && regex->re_magic == REMAGIC);
00045     cnfa = &((struct guts *) regex->re_guts)->search;
00046 
00047     return cnfa->nstates;
00048 }
00049 
00050 /*
00051  * Get initial state of NFA.
00052  */
00053 int
00054 pg_reg_getinitialstate(const regex_t *regex)
00055 {
00056     struct cnfa *cnfa;
00057 
00058     assert(regex != NULL && regex->re_magic == REMAGIC);
00059     cnfa = &((struct guts *) regex->re_guts)->search;
00060 
00061     return cnfa->pre;
00062 }
00063 
00064 /*
00065  * Get final state of NFA.
00066  */
00067 int
00068 pg_reg_getfinalstate(const regex_t *regex)
00069 {
00070     struct cnfa *cnfa;
00071 
00072     assert(regex != NULL && regex->re_magic == REMAGIC);
00073     cnfa = &((struct guts *) regex->re_guts)->search;
00074 
00075     return cnfa->post;
00076 }
00077 
00078 /*
00079  * Get number of outgoing NFA arcs of state number "st".
00080  *
00081  * Note: LACON arcs are ignored, both here and in pg_reg_getoutarcs().
00082  */
00083 int
00084 pg_reg_getnumoutarcs(const regex_t *regex, int st)
00085 {
00086     struct cnfa *cnfa;
00087     struct carc *ca;
00088     int         count;
00089 
00090     assert(regex != NULL && regex->re_magic == REMAGIC);
00091     cnfa = &((struct guts *) regex->re_guts)->search;
00092 
00093     if (st < 0 || st >= cnfa->nstates)
00094         return 0;
00095     count = 0;
00096     for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
00097     {
00098         if (ca->co < cnfa->ncolors)
00099             count++;
00100     }
00101     return count;
00102 }
00103 
00104 /*
00105  * Write array of outgoing NFA arcs of state number "st" into arcs[],
00106  * whose length arcs_len must be at least as long as indicated by
00107  * pg_reg_getnumoutarcs(), else not all arcs will be returned.
00108  */
00109 void
00110 pg_reg_getoutarcs(const regex_t *regex, int st,
00111                   regex_arc_t *arcs, int arcs_len)
00112 {
00113     struct cnfa *cnfa;
00114     struct carc *ca;
00115 
00116     assert(regex != NULL && regex->re_magic == REMAGIC);
00117     cnfa = &((struct guts *) regex->re_guts)->search;
00118 
00119     if (st < 0 || st >= cnfa->nstates || arcs_len <= 0)
00120         return;
00121     for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
00122     {
00123         if (ca->co < cnfa->ncolors)
00124         {
00125             arcs->co = ca->co;
00126             arcs->to = ca->to;
00127             arcs++;
00128             if (--arcs_len == 0)
00129                 break;
00130         }
00131     }
00132 }
00133 
00134 /*
00135  * Get total number of colors.
00136  */
00137 int
00138 pg_reg_getnumcolors(const regex_t *regex)
00139 {
00140     struct colormap *cm;
00141 
00142     assert(regex != NULL && regex->re_magic == REMAGIC);
00143     cm = &((struct guts *) regex->re_guts)->cmap;
00144 
00145     return cm->max + 1;
00146 }
00147 
00148 /*
00149  * Check if color is beginning of line/string.
00150  *
00151  * (We might at some point need to offer more refined handling of pseudocolors,
00152  * but this will do for now.)
00153  */
00154 int
00155 pg_reg_colorisbegin(const regex_t *regex, int co)
00156 {
00157     struct cnfa *cnfa;
00158 
00159     assert(regex != NULL && regex->re_magic == REMAGIC);
00160     cnfa = &((struct guts *) regex->re_guts)->search;
00161 
00162     if (co == cnfa->bos[0] || co == cnfa->bos[1])
00163         return true;
00164     else
00165         return false;
00166 }
00167 
00168 /*
00169  * Check if color is end of line/string.
00170  */
00171 int
00172 pg_reg_colorisend(const regex_t *regex, int co)
00173 {
00174     struct cnfa *cnfa;
00175 
00176     assert(regex != NULL && regex->re_magic == REMAGIC);
00177     cnfa = &((struct guts *) regex->re_guts)->search;
00178 
00179     if (co == cnfa->eos[0] || co == cnfa->eos[1])
00180         return true;
00181     else
00182         return false;
00183 }
00184 
00185 /*
00186  * Get number of member chrs of color number "co".
00187  *
00188  * Note: we return -1 if the color number is invalid, or if it is a special
00189  * color (WHITE or a pseudocolor), or if the number of members is uncertain.
00190  * The latter case cannot arise right now but is specified to allow for future
00191  * improvements (see musings about run-time handling of higher character codes
00192  * in regex/README).  Callers should not try to extract the members if -1 is
00193  * returned.
00194  */
00195 int
00196 pg_reg_getnumcharacters(const regex_t *regex, int co)
00197 {
00198     struct colormap *cm;
00199 
00200     assert(regex != NULL && regex->re_magic == REMAGIC);
00201     cm = &((struct guts *) regex->re_guts)->cmap;
00202 
00203     if (co <= 0 || co > cm->max)    /* we reject 0 which is WHITE */
00204         return -1;
00205     if (cm->cd[co].flags & PSEUDO)      /* also pseudocolors (BOS etc) */
00206         return -1;
00207 
00208     return cm->cd[co].nchrs;
00209 }
00210 
00211 /*
00212  * Write array of member chrs of color number "co" into chars[],
00213  * whose length chars_len must be at least as long as indicated by
00214  * pg_reg_getnumcharacters(), else not all chars will be returned.
00215  *
00216  * Fetching the members of WHITE or a pseudocolor is not supported.
00217  *
00218  * Caution: this is a relatively expensive operation.
00219  */
00220 void
00221 pg_reg_getcharacters(const regex_t *regex, int co,
00222                      pg_wchar *chars, int chars_len)
00223 {
00224     struct colormap *cm;
00225 
00226     assert(regex != NULL && regex->re_magic == REMAGIC);
00227     cm = &((struct guts *) regex->re_guts)->cmap;
00228 
00229     if (co <= 0 || co > cm->max || chars_len <= 0)
00230         return;
00231     if (cm->cd[co].flags & PSEUDO)
00232         return;
00233 
00234     /* Recursively search the colormap tree */
00235     scancolormap(cm, co, cm->tree, 0, 0, &chars, &chars_len);
00236 }
00237 
00238 /*
00239  * Recursively scan the colormap tree to find chrs belonging to color "co".
00240  * See regex/README for info about the tree structure.
00241  *
00242  * t: tree block to scan
00243  * level: level (from 0) of t
00244  * partial: partial chr code for chrs within t
00245  * chars, chars_len: output area
00246  */
00247 static void
00248 scancolormap(struct colormap * cm, int co,
00249              union tree * t, int level, chr partial,
00250              pg_wchar **chars, int *chars_len)
00251 {
00252     int         i;
00253 
00254     if (level < NBYTS - 1)
00255     {
00256         /* non-leaf node */
00257         for (i = 0; i < BYTTAB; i++)
00258         {
00259             /*
00260              * We do not support search for chrs of color 0 (WHITE), so
00261              * all-white subtrees need not be searched.  These can be
00262              * recognized because they are represented by the fill blocks in
00263              * the colormap struct.  This typically allows us to avoid
00264              * scanning large regions of higher-numbered chrs.
00265              */
00266             if (t->tptr[i] == &cm->tree[level + 1])
00267                 continue;
00268 
00269             /* Recursively scan next level down */
00270             scancolormap(cm, co,
00271                          t->tptr[i], level + 1,
00272                          (partial | (chr) i) << BYTBITS,
00273                          chars, chars_len);
00274         }
00275     }
00276     else
00277     {
00278         /* leaf node */
00279         for (i = 0; i < BYTTAB; i++)
00280         {
00281             if (t->tcolor[i] == co)
00282             {
00283                 if (*chars_len > 0)
00284                 {
00285                     **chars = partial | (chr) i;
00286                     (*chars)++;
00287                     (*chars_len)--;
00288                 }
00289             }
00290         }
00291     }
00292 }