Header And Logo

PostgreSQL
| The world's most advanced open source database.

pgstrcasecmp.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * pgstrcasecmp.c
00004  *     Portable SQL-like case-independent comparisons and conversions.
00005  *
00006  * SQL99 specifies Unicode-aware case normalization, which we don't yet
00007  * have the infrastructure for.  Instead we use tolower() to provide a
00008  * locale-aware translation.  However, there are some locales where this
00009  * is not right either (eg, Turkish may do strange things with 'i' and
00010  * 'I').  Our current compromise is to use tolower() for characters with
00011  * the high bit set, and use an ASCII-only downcasing for 7-bit
00012  * characters.
00013  *
00014  * NB: this code should match downcase_truncate_identifier() in scansup.c.
00015  *
00016  * We also provide strict ASCII-only case conversion functions, which can
00017  * be used to implement C/POSIX case folding semantics no matter what the
00018  * C library thinks the locale is.
00019  *
00020  *
00021  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00022  *
00023  * src/port/pgstrcasecmp.c
00024  *
00025  *-------------------------------------------------------------------------
00026  */
00027 #include "c.h"
00028 
00029 #include <ctype.h>
00030 
00031 
00032 /*
00033  * Case-independent comparison of two null-terminated strings.
00034  */
00035 int
00036 pg_strcasecmp(const char *s1, const char *s2)
00037 {
00038     for (;;)
00039     {
00040         unsigned char ch1 = (unsigned char) *s1++;
00041         unsigned char ch2 = (unsigned char) *s2++;
00042 
00043         if (ch1 != ch2)
00044         {
00045             if (ch1 >= 'A' && ch1 <= 'Z')
00046                 ch1 += 'a' - 'A';
00047             else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
00048                 ch1 = tolower(ch1);
00049 
00050             if (ch2 >= 'A' && ch2 <= 'Z')
00051                 ch2 += 'a' - 'A';
00052             else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
00053                 ch2 = tolower(ch2);
00054 
00055             if (ch1 != ch2)
00056                 return (int) ch1 - (int) ch2;
00057         }
00058         if (ch1 == 0)
00059             break;
00060     }
00061     return 0;
00062 }
00063 
00064 /*
00065  * Case-independent comparison of two not-necessarily-null-terminated strings.
00066  * At most n bytes will be examined from each string.
00067  */
00068 int
00069 pg_strncasecmp(const char *s1, const char *s2, size_t n)
00070 {
00071     while (n-- > 0)
00072     {
00073         unsigned char ch1 = (unsigned char) *s1++;
00074         unsigned char ch2 = (unsigned char) *s2++;
00075 
00076         if (ch1 != ch2)
00077         {
00078             if (ch1 >= 'A' && ch1 <= 'Z')
00079                 ch1 += 'a' - 'A';
00080             else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
00081                 ch1 = tolower(ch1);
00082 
00083             if (ch2 >= 'A' && ch2 <= 'Z')
00084                 ch2 += 'a' - 'A';
00085             else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
00086                 ch2 = tolower(ch2);
00087 
00088             if (ch1 != ch2)
00089                 return (int) ch1 - (int) ch2;
00090         }
00091         if (ch1 == 0)
00092             break;
00093     }
00094     return 0;
00095 }
00096 
00097 /*
00098  * Fold a character to upper case.
00099  *
00100  * Unlike some versions of toupper(), this is safe to apply to characters
00101  * that aren't lower case letters.  Note however that the whole thing is
00102  * a bit bogus for multibyte character sets.
00103  */
00104 unsigned char
00105 pg_toupper(unsigned char ch)
00106 {
00107     if (ch >= 'a' && ch <= 'z')
00108         ch += 'A' - 'a';
00109     else if (IS_HIGHBIT_SET(ch) && islower(ch))
00110         ch = toupper(ch);
00111     return ch;
00112 }
00113 
00114 /*
00115  * Fold a character to lower case.
00116  *
00117  * Unlike some versions of tolower(), this is safe to apply to characters
00118  * that aren't upper case letters.  Note however that the whole thing is
00119  * a bit bogus for multibyte character sets.
00120  */
00121 unsigned char
00122 pg_tolower(unsigned char ch)
00123 {
00124     if (ch >= 'A' && ch <= 'Z')
00125         ch += 'a' - 'A';
00126     else if (IS_HIGHBIT_SET(ch) && isupper(ch))
00127         ch = tolower(ch);
00128     return ch;
00129 }
00130 
00131 /*
00132  * Fold a character to upper case, following C/POSIX locale rules.
00133  */
00134 unsigned char
00135 pg_ascii_toupper(unsigned char ch)
00136 {
00137     if (ch >= 'a' && ch <= 'z')
00138         ch += 'A' - 'a';
00139     return ch;
00140 }
00141 
00142 /*
00143  * Fold a character to lower case, following C/POSIX locale rules.
00144  */
00145 unsigned char
00146 pg_ascii_tolower(unsigned char ch)
00147 {
00148     if (ch >= 'A' && ch <= 'Z')
00149         ch += 'a' - 'A';
00150     return ch;
00151 }