Header And Logo

PostgreSQL
| The world's most advanced open source database.

like.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * like.c
00004  *    like expression handling code.
00005  *
00006  *   NOTES
00007  *      A big hack of the regexp.c code!! Contributed by
00008  *      Keith Parks <[email protected]> (7/95).
00009  *
00010  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00011  * Portions Copyright (c) 1994, Regents of the University of California
00012  *
00013  * IDENTIFICATION
00014  *  src/backend/utils/adt/like.c
00015  *
00016  *-------------------------------------------------------------------------
00017  */
00018 #include "postgres.h"
00019 
00020 #include <ctype.h>
00021 
00022 #include "catalog/pg_collation.h"
00023 #include "mb/pg_wchar.h"
00024 #include "utils/builtins.h"
00025 #include "utils/pg_locale.h"
00026 
00027 
00028 #define LIKE_TRUE                       1
00029 #define LIKE_FALSE                      0
00030 #define LIKE_ABORT                      (-1)
00031 
00032 
00033 static int SB_MatchText(char *t, int tlen, char *p, int plen,
00034              pg_locale_t locale, bool locale_is_c);
00035 static text *SB_do_like_escape(text *, text *);
00036 
00037 static int MB_MatchText(char *t, int tlen, char *p, int plen,
00038              pg_locale_t locale, bool locale_is_c);
00039 static text *MB_do_like_escape(text *, text *);
00040 
00041 static int UTF8_MatchText(char *t, int tlen, char *p, int plen,
00042                pg_locale_t locale, bool locale_is_c);
00043 
00044 static int SB_IMatchText(char *t, int tlen, char *p, int plen,
00045               pg_locale_t locale, bool locale_is_c);
00046 
00047 static int  GenericMatchText(char *s, int slen, char *p, int plen);
00048 static int  Generic_Text_IC_like(text *str, text *pat, Oid collation);
00049 
00050 /*--------------------
00051  * Support routine for MatchText. Compares given multibyte streams
00052  * as wide characters. If they match, returns 1 otherwise returns 0.
00053  *--------------------
00054  */
00055 static inline int
00056 wchareq(char *p1, char *p2)
00057 {
00058     int         p1_len;
00059 
00060     /* Optimization:  quickly compare the first byte. */
00061     if (*p1 != *p2)
00062         return 0;
00063 
00064     p1_len = pg_mblen(p1);
00065     if (pg_mblen(p2) != p1_len)
00066         return 0;
00067 
00068     /* They are the same length */
00069     while (p1_len--)
00070     {
00071         if (*p1++ != *p2++)
00072             return 0;
00073     }
00074     return 1;
00075 }
00076 
00077 /*
00078  * Formerly we had a routine iwchareq() here that tried to do case-insensitive
00079  * comparison of multibyte characters.  It did not work at all, however,
00080  * because it relied on tolower() which has a single-byte API ... and
00081  * towlower() wouldn't be much better since we have no suitably cheap way
00082  * of getting a single character transformed to the system's wchar_t format.
00083  * So now, we just downcase the strings using lower() and apply regular LIKE
00084  * comparison.  This should be revisited when we install better locale support.
00085  */
00086 
00087 /*
00088  * We do handle case-insensitive matching for single-byte encodings using
00089  * fold-on-the-fly processing, however.
00090  */
00091 static char
00092 SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
00093 {
00094     if (locale_is_c)
00095         return pg_ascii_tolower(c);
00096 #ifdef HAVE_LOCALE_T
00097     else if (locale)
00098         return tolower_l(c, locale);
00099 #endif
00100     else
00101         return pg_tolower(c);
00102 }
00103 
00104 
00105 #define NextByte(p, plen)   ((p)++, (plen)--)
00106 
00107 /* Set up to compile like_match.c for multibyte characters */
00108 #define CHAREQ(p1, p2) wchareq((p1), (p2))
00109 #define NextChar(p, plen) \
00110     do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
00111 #define CopyAdvChar(dst, src, srclen) \
00112     do { int __l = pg_mblen(src); \
00113          (srclen) -= __l; \
00114          while (__l-- > 0) \
00115              *(dst)++ = *(src)++; \
00116        } while (0)
00117 
00118 #define MatchText   MB_MatchText
00119 #define do_like_escape  MB_do_like_escape
00120 
00121 #include "like_match.c"
00122 
00123 /* Set up to compile like_match.c for single-byte characters */
00124 #define CHAREQ(p1, p2) (*(p1) == *(p2))
00125 #define NextChar(p, plen) NextByte((p), (plen))
00126 #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
00127 
00128 #define MatchText   SB_MatchText
00129 #define do_like_escape  SB_do_like_escape
00130 
00131 #include "like_match.c"
00132 
00133 /* setup to compile like_match.c for single byte case insensitive matches */
00134 #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
00135 #define NextChar(p, plen) NextByte((p), (plen))
00136 #define MatchText SB_IMatchText
00137 
00138 #include "like_match.c"
00139 
00140 /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
00141 
00142 #define NextChar(p, plen) \
00143     do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
00144 #define MatchText   UTF8_MatchText
00145 
00146 #include "like_match.c"
00147 
00148 /* Generic for all cases not requiring inline case-folding */
00149 static inline int
00150 GenericMatchText(char *s, int slen, char *p, int plen)
00151 {
00152     if (pg_database_encoding_max_length() == 1)
00153         return SB_MatchText(s, slen, p, plen, 0, true);
00154     else if (GetDatabaseEncoding() == PG_UTF8)
00155         return UTF8_MatchText(s, slen, p, plen, 0, true);
00156     else
00157         return MB_MatchText(s, slen, p, plen, 0, true);
00158 }
00159 
00160 static inline int
00161 Generic_Text_IC_like(text *str, text *pat, Oid collation)
00162 {
00163     char       *s,
00164                *p;
00165     int         slen,
00166                 plen;
00167 
00168     /*
00169      * For efficiency reasons, in the single byte case we don't call lower()
00170      * on the pattern and text, but instead call SB_lower_char on each
00171      * character.  In the multi-byte case we don't have much choice :-(
00172      */
00173 
00174     if (pg_database_encoding_max_length() > 1)
00175     {
00176         /* lower's result is never packed, so OK to use old macros here */
00177         pat = DatumGetTextP(DirectFunctionCall1Coll(lower, collation,
00178                                                     PointerGetDatum(pat)));
00179         p = VARDATA(pat);
00180         plen = (VARSIZE(pat) - VARHDRSZ);
00181         str = DatumGetTextP(DirectFunctionCall1Coll(lower, collation,
00182                                                     PointerGetDatum(str)));
00183         s = VARDATA(str);
00184         slen = (VARSIZE(str) - VARHDRSZ);
00185         if (GetDatabaseEncoding() == PG_UTF8)
00186             return UTF8_MatchText(s, slen, p, plen, 0, true);
00187         else
00188             return MB_MatchText(s, slen, p, plen, 0, true);
00189     }
00190     else
00191     {
00192         /*
00193          * Here we need to prepare locale information for SB_lower_char. This
00194          * should match the methods used in str_tolower().
00195          */
00196         pg_locale_t locale = 0;
00197         bool        locale_is_c = false;
00198 
00199         if (lc_ctype_is_c(collation))
00200             locale_is_c = true;
00201         else if (collation != DEFAULT_COLLATION_OID)
00202         {
00203             if (!OidIsValid(collation))
00204             {
00205                 /*
00206                  * This typically means that the parser could not resolve a
00207                  * conflict of implicit collations, so report it that way.
00208                  */
00209                 ereport(ERROR,
00210                         (errcode(ERRCODE_INDETERMINATE_COLLATION),
00211                          errmsg("could not determine which collation to use for ILIKE"),
00212                          errhint("Use the COLLATE clause to set the collation explicitly.")));
00213             }
00214             locale = pg_newlocale_from_collation(collation);
00215         }
00216 
00217         p = VARDATA_ANY(pat);
00218         plen = VARSIZE_ANY_EXHDR(pat);
00219         s = VARDATA_ANY(str);
00220         slen = VARSIZE_ANY_EXHDR(str);
00221         return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
00222     }
00223 }
00224 
00225 /*
00226  *  interface routines called by the function manager
00227  */
00228 
00229 Datum
00230 namelike(PG_FUNCTION_ARGS)
00231 {
00232     Name        str = PG_GETARG_NAME(0);
00233     text       *pat = PG_GETARG_TEXT_PP(1);
00234     bool        result;
00235     char       *s,
00236                *p;
00237     int         slen,
00238                 plen;
00239 
00240     s = NameStr(*str);
00241     slen = strlen(s);
00242     p = VARDATA_ANY(pat);
00243     plen = VARSIZE_ANY_EXHDR(pat);
00244 
00245     result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
00246 
00247     PG_RETURN_BOOL(result);
00248 }
00249 
00250 Datum
00251 namenlike(PG_FUNCTION_ARGS)
00252 {
00253     Name        str = PG_GETARG_NAME(0);
00254     text       *pat = PG_GETARG_TEXT_PP(1);
00255     bool        result;
00256     char       *s,
00257                *p;
00258     int         slen,
00259                 plen;
00260 
00261     s = NameStr(*str);
00262     slen = strlen(s);
00263     p = VARDATA_ANY(pat);
00264     plen = VARSIZE_ANY_EXHDR(pat);
00265 
00266     result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
00267 
00268     PG_RETURN_BOOL(result);
00269 }
00270 
00271 Datum
00272 textlike(PG_FUNCTION_ARGS)
00273 {
00274     text       *str = PG_GETARG_TEXT_PP(0);
00275     text       *pat = PG_GETARG_TEXT_PP(1);
00276     bool        result;
00277     char       *s,
00278                *p;
00279     int         slen,
00280                 plen;
00281 
00282     s = VARDATA_ANY(str);
00283     slen = VARSIZE_ANY_EXHDR(str);
00284     p = VARDATA_ANY(pat);
00285     plen = VARSIZE_ANY_EXHDR(pat);
00286 
00287     result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
00288 
00289     PG_RETURN_BOOL(result);
00290 }
00291 
00292 Datum
00293 textnlike(PG_FUNCTION_ARGS)
00294 {
00295     text       *str = PG_GETARG_TEXT_PP(0);
00296     text       *pat = PG_GETARG_TEXT_PP(1);
00297     bool        result;
00298     char       *s,
00299                *p;
00300     int         slen,
00301                 plen;
00302 
00303     s = VARDATA_ANY(str);
00304     slen = VARSIZE_ANY_EXHDR(str);
00305     p = VARDATA_ANY(pat);
00306     plen = VARSIZE_ANY_EXHDR(pat);
00307 
00308     result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
00309 
00310     PG_RETURN_BOOL(result);
00311 }
00312 
00313 Datum
00314 bytealike(PG_FUNCTION_ARGS)
00315 {
00316     bytea      *str = PG_GETARG_BYTEA_PP(0);
00317     bytea      *pat = PG_GETARG_BYTEA_PP(1);
00318     bool        result;
00319     char       *s,
00320                *p;
00321     int         slen,
00322                 plen;
00323 
00324     s = VARDATA_ANY(str);
00325     slen = VARSIZE_ANY_EXHDR(str);
00326     p = VARDATA_ANY(pat);
00327     plen = VARSIZE_ANY_EXHDR(pat);
00328 
00329     result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
00330 
00331     PG_RETURN_BOOL(result);
00332 }
00333 
00334 Datum
00335 byteanlike(PG_FUNCTION_ARGS)
00336 {
00337     bytea      *str = PG_GETARG_BYTEA_PP(0);
00338     bytea      *pat = PG_GETARG_BYTEA_PP(1);
00339     bool        result;
00340     char       *s,
00341                *p;
00342     int         slen,
00343                 plen;
00344 
00345     s = VARDATA_ANY(str);
00346     slen = VARSIZE_ANY_EXHDR(str);
00347     p = VARDATA_ANY(pat);
00348     plen = VARSIZE_ANY_EXHDR(pat);
00349 
00350     result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
00351 
00352     PG_RETURN_BOOL(result);
00353 }
00354 
00355 /*
00356  * Case-insensitive versions
00357  */
00358 
00359 Datum
00360 nameiclike(PG_FUNCTION_ARGS)
00361 {
00362     Name        str = PG_GETARG_NAME(0);
00363     text       *pat = PG_GETARG_TEXT_PP(1);
00364     bool        result;
00365     text       *strtext;
00366 
00367     strtext = DatumGetTextP(DirectFunctionCall1(name_text,
00368                                                 NameGetDatum(str)));
00369     result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
00370 
00371     PG_RETURN_BOOL(result);
00372 }
00373 
00374 Datum
00375 nameicnlike(PG_FUNCTION_ARGS)
00376 {
00377     Name        str = PG_GETARG_NAME(0);
00378     text       *pat = PG_GETARG_TEXT_PP(1);
00379     bool        result;
00380     text       *strtext;
00381 
00382     strtext = DatumGetTextP(DirectFunctionCall1(name_text,
00383                                                 NameGetDatum(str)));
00384     result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
00385 
00386     PG_RETURN_BOOL(result);
00387 }
00388 
00389 Datum
00390 texticlike(PG_FUNCTION_ARGS)
00391 {
00392     text       *str = PG_GETARG_TEXT_PP(0);
00393     text       *pat = PG_GETARG_TEXT_PP(1);
00394     bool        result;
00395 
00396     result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
00397 
00398     PG_RETURN_BOOL(result);
00399 }
00400 
00401 Datum
00402 texticnlike(PG_FUNCTION_ARGS)
00403 {
00404     text       *str = PG_GETARG_TEXT_PP(0);
00405     text       *pat = PG_GETARG_TEXT_PP(1);
00406     bool        result;
00407 
00408     result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
00409 
00410     PG_RETURN_BOOL(result);
00411 }
00412 
00413 /*
00414  * like_escape() --- given a pattern and an ESCAPE string,
00415  * convert the pattern to use Postgres' standard backslash escape convention.
00416  */
00417 Datum
00418 like_escape(PG_FUNCTION_ARGS)
00419 {
00420     text       *pat = PG_GETARG_TEXT_PP(0);
00421     text       *esc = PG_GETARG_TEXT_PP(1);
00422     text       *result;
00423 
00424     if (pg_database_encoding_max_length() == 1)
00425         result = SB_do_like_escape(pat, esc);
00426     else
00427         result = MB_do_like_escape(pat, esc);
00428 
00429     PG_RETURN_TEXT_P(result);
00430 }
00431 
00432 /*
00433  * like_escape_bytea() --- given a pattern and an ESCAPE string,
00434  * convert the pattern to use Postgres' standard backslash escape convention.
00435  */
00436 Datum
00437 like_escape_bytea(PG_FUNCTION_ARGS)
00438 {
00439     bytea      *pat = PG_GETARG_BYTEA_PP(0);
00440     bytea      *esc = PG_GETARG_BYTEA_PP(1);
00441     bytea      *result = SB_do_like_escape((text *) pat, (text *) esc);
00442 
00443     PG_RETURN_BYTEA_P((bytea *) result);
00444 }