Header And Logo

PostgreSQL
| The world's most advanced open source database.

regis.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * regis.c
00004  *      Fast regex subset
00005  *
00006  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00007  *
00008  *
00009  * IDENTIFICATION
00010  *    src/backend/tsearch/regis.c
00011  *
00012  *-------------------------------------------------------------------------
00013  */
00014 
00015 #include "postgres.h"
00016 
00017 #include "tsearch/dicts/regis.h"
00018 #include "tsearch/ts_locale.h"
00019 
00020 #define RS_IN_ONEOF 1
00021 #define RS_IN_ONEOF_IN  2
00022 #define RS_IN_NONEOF    3
00023 #define RS_IN_WAIT  4
00024 
00025 
00026 /*
00027  * Test whether a regex is of the subset supported here.
00028  * Keep this in sync with RS_compile!
00029  */
00030 bool
00031 RS_isRegis(const char *str)
00032 {
00033     int         state = RS_IN_WAIT;
00034     const char *c = str;
00035 
00036     while (*c)
00037     {
00038         if (state == RS_IN_WAIT)
00039         {
00040             if (t_isalpha(c))
00041                  /* okay */ ;
00042             else if (t_iseq(c, '['))
00043                 state = RS_IN_ONEOF;
00044             else
00045                 return false;
00046         }
00047         else if (state == RS_IN_ONEOF)
00048         {
00049             if (t_iseq(c, '^'))
00050                 state = RS_IN_NONEOF;
00051             else if (t_isalpha(c))
00052                 state = RS_IN_ONEOF_IN;
00053             else
00054                 return false;
00055         }
00056         else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
00057         {
00058             if (t_isalpha(c))
00059                  /* okay */ ;
00060             else if (t_iseq(c, ']'))
00061                 state = RS_IN_WAIT;
00062             else
00063                 return false;
00064         }
00065         else
00066             elog(ERROR, "internal error in RS_isRegis: state %d", state);
00067         c += pg_mblen(c);
00068     }
00069 
00070     return (state == RS_IN_WAIT);
00071 }
00072 
00073 static RegisNode *
00074 newRegisNode(RegisNode *prev, int len)
00075 {
00076     RegisNode  *ptr;
00077 
00078     ptr = (RegisNode *) palloc0(RNHDRSZ + len + 1);
00079     if (prev)
00080         prev->next = ptr;
00081     return ptr;
00082 }
00083 
00084 void
00085 RS_compile(Regis *r, bool issuffix, const char *str)
00086 {
00087     int         len = strlen(str);
00088     int         state = RS_IN_WAIT;
00089     const char *c = str;
00090     RegisNode  *ptr = NULL;
00091 
00092     memset(r, 0, sizeof(Regis));
00093     r->issuffix = (issuffix) ? 1 : 0;
00094 
00095     while (*c)
00096     {
00097         if (state == RS_IN_WAIT)
00098         {
00099             if (t_isalpha(c))
00100             {
00101                 if (ptr)
00102                     ptr = newRegisNode(ptr, len);
00103                 else
00104                     ptr = r->node = newRegisNode(NULL, len);
00105                 COPYCHAR(ptr->data, c);
00106                 ptr->type = RSF_ONEOF;
00107                 ptr->len = pg_mblen(c);
00108             }
00109             else if (t_iseq(c, '['))
00110             {
00111                 if (ptr)
00112                     ptr = newRegisNode(ptr, len);
00113                 else
00114                     ptr = r->node = newRegisNode(NULL, len);
00115                 ptr->type = RSF_ONEOF;
00116                 state = RS_IN_ONEOF;
00117             }
00118             else    /* shouldn't get here */
00119                 elog(ERROR, "invalid regis pattern: \"%s\"", str);
00120         }
00121         else if (state == RS_IN_ONEOF)
00122         {
00123             if (t_iseq(c, '^'))
00124             {
00125                 ptr->type = RSF_NONEOF;
00126                 state = RS_IN_NONEOF;
00127             }
00128             else if (t_isalpha(c))
00129             {
00130                 COPYCHAR(ptr->data, c);
00131                 ptr->len = pg_mblen(c);
00132                 state = RS_IN_ONEOF_IN;
00133             }
00134             else    /* shouldn't get here */
00135                 elog(ERROR, "invalid regis pattern: \"%s\"", str);
00136         }
00137         else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
00138         {
00139             if (t_isalpha(c))
00140             {
00141                 COPYCHAR(ptr->data + ptr->len, c);
00142                 ptr->len += pg_mblen(c);
00143             }
00144             else if (t_iseq(c, ']'))
00145                 state = RS_IN_WAIT;
00146             else    /* shouldn't get here */
00147                 elog(ERROR, "invalid regis pattern: \"%s\"", str);
00148         }
00149         else
00150             elog(ERROR, "internal error in RS_compile: state %d", state);
00151         c += pg_mblen(c);
00152     }
00153 
00154     if (state != RS_IN_WAIT)    /* shouldn't get here */
00155         elog(ERROR, "invalid regis pattern: \"%s\"", str);
00156 
00157     ptr = r->node;
00158     while (ptr)
00159     {
00160         r->nchar++;
00161         ptr = ptr->next;
00162     }
00163 }
00164 
00165 void
00166 RS_free(Regis *r)
00167 {
00168     RegisNode  *ptr = r->node,
00169                *tmp;
00170 
00171     while (ptr)
00172     {
00173         tmp = ptr->next;
00174         pfree(ptr);
00175         ptr = tmp;
00176     }
00177 
00178     r->node = NULL;
00179 }
00180 
00181 #ifdef USE_WIDE_UPPER_LOWER
00182 static bool
00183 mb_strchr(char *str, char *c)
00184 {
00185     int         clen,
00186                 plen,
00187                 i;
00188     char       *ptr = str;
00189     bool        res = false;
00190 
00191     clen = pg_mblen(c);
00192     while (*ptr && !res)
00193     {
00194         plen = pg_mblen(ptr);
00195         if (plen == clen)
00196         {
00197             i = plen;
00198             res = true;
00199             while (i--)
00200                 if (*(ptr + i) != *(c + i))
00201                 {
00202                     res = false;
00203                     break;
00204                 }
00205         }
00206 
00207         ptr += plen;
00208     }
00209 
00210     return res;
00211 }
00212 #else
00213 #define mb_strchr(s,c)  ( (strchr((s),*(c)) == NULL) ? false : true )
00214 #endif
00215 
00216 
00217 bool
00218 RS_execute(Regis *r, char *str)
00219 {
00220     RegisNode  *ptr = r->node;
00221     char       *c = str;
00222     int         len = 0;
00223 
00224     while (*c)
00225     {
00226         len++;
00227         c += pg_mblen(c);
00228     }
00229 
00230     if (len < r->nchar)
00231         return 0;
00232 
00233     c = str;
00234     if (r->issuffix)
00235     {
00236         len -= r->nchar;
00237         while (len-- > 0)
00238             c += pg_mblen(c);
00239     }
00240 
00241 
00242     while (ptr)
00243     {
00244         switch (ptr->type)
00245         {
00246             case RSF_ONEOF:
00247                 if (!mb_strchr((char *) ptr->data, c))
00248                     return false;
00249                 break;
00250             case RSF_NONEOF:
00251                 if (mb_strchr((char *) ptr->data, c))
00252                     return false;
00253                 break;
00254             default:
00255                 elog(ERROR, "unrecognized regis node type: %d", ptr->type);
00256         }
00257         ptr = ptr->next;
00258         c += pg_mblen(c);
00259     }
00260 
00261     return true;
00262 }