00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "catalog/pg_collation.h"
00019 #include "utils/pg_locale.h"
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065 typedef enum
00066 {
00067 PG_REGEX_LOCALE_C,
00068 PG_REGEX_LOCALE_WIDE,
00069 PG_REGEX_LOCALE_1BYTE,
00070 PG_REGEX_LOCALE_WIDE_L,
00071 PG_REGEX_LOCALE_1BYTE_L
00072 } PG_Locale_Strategy;
00073
00074 static PG_Locale_Strategy pg_regex_strategy;
00075 static pg_locale_t pg_regex_locale;
00076 static Oid pg_regex_collation;
00077
00078
00079
00080
00081 #define PG_ISDIGIT 0x01
00082 #define PG_ISALPHA 0x02
00083 #define PG_ISALNUM (PG_ISDIGIT | PG_ISALPHA)
00084 #define PG_ISUPPER 0x04
00085 #define PG_ISLOWER 0x08
00086 #define PG_ISGRAPH 0x10
00087 #define PG_ISPRINT 0x20
00088 #define PG_ISPUNCT 0x40
00089 #define PG_ISSPACE 0x80
00090
00091 static const unsigned char pg_char_properties[128] = {
00092 0,
00093 0,
00094 0,
00095 0,
00096 0,
00097 0,
00098 0,
00099 0,
00100 0,
00101 PG_ISSPACE,
00102 PG_ISSPACE,
00103 PG_ISSPACE,
00104 PG_ISSPACE,
00105 PG_ISSPACE,
00106 0,
00107 0,
00108 0,
00109 0,
00110 0,
00111 0,
00112 0,
00113 0,
00114 0,
00115 0,
00116 0,
00117 0,
00118 0,
00119 0,
00120 0,
00121 0,
00122 0,
00123 0,
00124 PG_ISPRINT | PG_ISSPACE,
00125 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00126 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00127 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00128 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00129 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00130 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00131 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00132 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00133 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00134 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00135 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00136 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00137 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00138 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00139 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00140 PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
00141 PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
00142 PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
00143 PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
00144 PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
00145 PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
00146 PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
00147 PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
00148 PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
00149 PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
00150 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00151 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00152 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00153 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00154 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00155 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00156 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00157 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00158 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00159 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00160 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00161 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00162 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00163 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00164 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00165 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00166 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00167 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00168 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00169 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00170 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00171 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00172 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00173 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00174 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00175 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00176 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00177 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00178 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00179 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00180 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00181 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00182 PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
00183 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00184 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00185 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00186 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00187 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00188 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00189 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00190 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00191 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00192 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00193 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00194 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00195 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00196 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00197 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00198 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00199 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00200 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00201 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00202 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00203 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00204 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00205 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00206 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00207 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00208 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00209 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00210 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00211 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00212 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00213 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00214 PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
00215 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00216 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00217 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00218 PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
00219 0
00220 };
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230 void
00231 pg_set_regex_collation(Oid collation)
00232 {
00233 if (lc_ctype_is_c(collation))
00234 {
00235
00236 pg_regex_strategy = PG_REGEX_LOCALE_C;
00237 pg_regex_locale = 0;
00238 pg_regex_collation = C_COLLATION_OID;
00239 }
00240 else
00241 {
00242 if (collation == DEFAULT_COLLATION_OID)
00243 pg_regex_locale = 0;
00244 else if (OidIsValid(collation))
00245 {
00246
00247
00248
00249
00250
00251 pg_regex_locale = pg_newlocale_from_collation(collation);
00252 }
00253 else
00254 {
00255
00256
00257
00258
00259 ereport(ERROR,
00260 (errcode(ERRCODE_INDETERMINATE_COLLATION),
00261 errmsg("could not determine which collation to use for regular expression"),
00262 errhint("Use the COLLATE clause to set the collation explicitly.")));
00263 }
00264
00265 #ifdef USE_WIDE_UPPER_LOWER
00266 if (GetDatabaseEncoding() == PG_UTF8)
00267 {
00268 if (pg_regex_locale)
00269 pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
00270 else
00271 pg_regex_strategy = PG_REGEX_LOCALE_WIDE;
00272 }
00273 else
00274 #endif
00275 {
00276 if (pg_regex_locale)
00277 pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
00278 else
00279 pg_regex_strategy = PG_REGEX_LOCALE_1BYTE;
00280 }
00281
00282 pg_regex_collation = collation;
00283 }
00284 }
00285
00286 static int
00287 pg_wc_isdigit(pg_wchar c)
00288 {
00289 switch (pg_regex_strategy)
00290 {
00291 case PG_REGEX_LOCALE_C:
00292 return (c <= (pg_wchar) 127 &&
00293 (pg_char_properties[c] & PG_ISDIGIT));
00294 case PG_REGEX_LOCALE_WIDE:
00295 #ifdef USE_WIDE_UPPER_LOWER
00296 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00297 return iswdigit((wint_t) c);
00298 #endif
00299
00300 case PG_REGEX_LOCALE_1BYTE:
00301 return (c <= (pg_wchar) UCHAR_MAX &&
00302 isdigit((unsigned char) c));
00303 case PG_REGEX_LOCALE_WIDE_L:
00304 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
00305 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00306 return iswdigit_l((wint_t) c, pg_regex_locale);
00307 #endif
00308
00309 case PG_REGEX_LOCALE_1BYTE_L:
00310 #ifdef HAVE_LOCALE_T
00311 return (c <= (pg_wchar) UCHAR_MAX &&
00312 isdigit_l((unsigned char) c, pg_regex_locale));
00313 #endif
00314 break;
00315 }
00316 return 0;
00317 }
00318
00319 static int
00320 pg_wc_isalpha(pg_wchar c)
00321 {
00322 switch (pg_regex_strategy)
00323 {
00324 case PG_REGEX_LOCALE_C:
00325 return (c <= (pg_wchar) 127 &&
00326 (pg_char_properties[c] & PG_ISALPHA));
00327 case PG_REGEX_LOCALE_WIDE:
00328 #ifdef USE_WIDE_UPPER_LOWER
00329 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00330 return iswalpha((wint_t) c);
00331 #endif
00332
00333 case PG_REGEX_LOCALE_1BYTE:
00334 return (c <= (pg_wchar) UCHAR_MAX &&
00335 isalpha((unsigned char) c));
00336 case PG_REGEX_LOCALE_WIDE_L:
00337 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
00338 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00339 return iswalpha_l((wint_t) c, pg_regex_locale);
00340 #endif
00341
00342 case PG_REGEX_LOCALE_1BYTE_L:
00343 #ifdef HAVE_LOCALE_T
00344 return (c <= (pg_wchar) UCHAR_MAX &&
00345 isalpha_l((unsigned char) c, pg_regex_locale));
00346 #endif
00347 break;
00348 }
00349 return 0;
00350 }
00351
00352 static int
00353 pg_wc_isalnum(pg_wchar c)
00354 {
00355 switch (pg_regex_strategy)
00356 {
00357 case PG_REGEX_LOCALE_C:
00358 return (c <= (pg_wchar) 127 &&
00359 (pg_char_properties[c] & PG_ISALNUM));
00360 case PG_REGEX_LOCALE_WIDE:
00361 #ifdef USE_WIDE_UPPER_LOWER
00362 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00363 return iswalnum((wint_t) c);
00364 #endif
00365
00366 case PG_REGEX_LOCALE_1BYTE:
00367 return (c <= (pg_wchar) UCHAR_MAX &&
00368 isalnum((unsigned char) c));
00369 case PG_REGEX_LOCALE_WIDE_L:
00370 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
00371 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00372 return iswalnum_l((wint_t) c, pg_regex_locale);
00373 #endif
00374
00375 case PG_REGEX_LOCALE_1BYTE_L:
00376 #ifdef HAVE_LOCALE_T
00377 return (c <= (pg_wchar) UCHAR_MAX &&
00378 isalnum_l((unsigned char) c, pg_regex_locale));
00379 #endif
00380 break;
00381 }
00382 return 0;
00383 }
00384
00385 static int
00386 pg_wc_isupper(pg_wchar c)
00387 {
00388 switch (pg_regex_strategy)
00389 {
00390 case PG_REGEX_LOCALE_C:
00391 return (c <= (pg_wchar) 127 &&
00392 (pg_char_properties[c] & PG_ISUPPER));
00393 case PG_REGEX_LOCALE_WIDE:
00394 #ifdef USE_WIDE_UPPER_LOWER
00395 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00396 return iswupper((wint_t) c);
00397 #endif
00398
00399 case PG_REGEX_LOCALE_1BYTE:
00400 return (c <= (pg_wchar) UCHAR_MAX &&
00401 isupper((unsigned char) c));
00402 case PG_REGEX_LOCALE_WIDE_L:
00403 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
00404 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00405 return iswupper_l((wint_t) c, pg_regex_locale);
00406 #endif
00407
00408 case PG_REGEX_LOCALE_1BYTE_L:
00409 #ifdef HAVE_LOCALE_T
00410 return (c <= (pg_wchar) UCHAR_MAX &&
00411 isupper_l((unsigned char) c, pg_regex_locale));
00412 #endif
00413 break;
00414 }
00415 return 0;
00416 }
00417
00418 static int
00419 pg_wc_islower(pg_wchar c)
00420 {
00421 switch (pg_regex_strategy)
00422 {
00423 case PG_REGEX_LOCALE_C:
00424 return (c <= (pg_wchar) 127 &&
00425 (pg_char_properties[c] & PG_ISLOWER));
00426 case PG_REGEX_LOCALE_WIDE:
00427 #ifdef USE_WIDE_UPPER_LOWER
00428 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00429 return iswlower((wint_t) c);
00430 #endif
00431
00432 case PG_REGEX_LOCALE_1BYTE:
00433 return (c <= (pg_wchar) UCHAR_MAX &&
00434 islower((unsigned char) c));
00435 case PG_REGEX_LOCALE_WIDE_L:
00436 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
00437 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00438 return iswlower_l((wint_t) c, pg_regex_locale);
00439 #endif
00440
00441 case PG_REGEX_LOCALE_1BYTE_L:
00442 #ifdef HAVE_LOCALE_T
00443 return (c <= (pg_wchar) UCHAR_MAX &&
00444 islower_l((unsigned char) c, pg_regex_locale));
00445 #endif
00446 break;
00447 }
00448 return 0;
00449 }
00450
00451 static int
00452 pg_wc_isgraph(pg_wchar c)
00453 {
00454 switch (pg_regex_strategy)
00455 {
00456 case PG_REGEX_LOCALE_C:
00457 return (c <= (pg_wchar) 127 &&
00458 (pg_char_properties[c] & PG_ISGRAPH));
00459 case PG_REGEX_LOCALE_WIDE:
00460 #ifdef USE_WIDE_UPPER_LOWER
00461 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00462 return iswgraph((wint_t) c);
00463 #endif
00464
00465 case PG_REGEX_LOCALE_1BYTE:
00466 return (c <= (pg_wchar) UCHAR_MAX &&
00467 isgraph((unsigned char) c));
00468 case PG_REGEX_LOCALE_WIDE_L:
00469 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
00470 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00471 return iswgraph_l((wint_t) c, pg_regex_locale);
00472 #endif
00473
00474 case PG_REGEX_LOCALE_1BYTE_L:
00475 #ifdef HAVE_LOCALE_T
00476 return (c <= (pg_wchar) UCHAR_MAX &&
00477 isgraph_l((unsigned char) c, pg_regex_locale));
00478 #endif
00479 break;
00480 }
00481 return 0;
00482 }
00483
00484 static int
00485 pg_wc_isprint(pg_wchar c)
00486 {
00487 switch (pg_regex_strategy)
00488 {
00489 case PG_REGEX_LOCALE_C:
00490 return (c <= (pg_wchar) 127 &&
00491 (pg_char_properties[c] & PG_ISPRINT));
00492 case PG_REGEX_LOCALE_WIDE:
00493 #ifdef USE_WIDE_UPPER_LOWER
00494 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00495 return iswprint((wint_t) c);
00496 #endif
00497
00498 case PG_REGEX_LOCALE_1BYTE:
00499 return (c <= (pg_wchar) UCHAR_MAX &&
00500 isprint((unsigned char) c));
00501 case PG_REGEX_LOCALE_WIDE_L:
00502 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
00503 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00504 return iswprint_l((wint_t) c, pg_regex_locale);
00505 #endif
00506
00507 case PG_REGEX_LOCALE_1BYTE_L:
00508 #ifdef HAVE_LOCALE_T
00509 return (c <= (pg_wchar) UCHAR_MAX &&
00510 isprint_l((unsigned char) c, pg_regex_locale));
00511 #endif
00512 break;
00513 }
00514 return 0;
00515 }
00516
00517 static int
00518 pg_wc_ispunct(pg_wchar c)
00519 {
00520 switch (pg_regex_strategy)
00521 {
00522 case PG_REGEX_LOCALE_C:
00523 return (c <= (pg_wchar) 127 &&
00524 (pg_char_properties[c] & PG_ISPUNCT));
00525 case PG_REGEX_LOCALE_WIDE:
00526 #ifdef USE_WIDE_UPPER_LOWER
00527 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00528 return iswpunct((wint_t) c);
00529 #endif
00530
00531 case PG_REGEX_LOCALE_1BYTE:
00532 return (c <= (pg_wchar) UCHAR_MAX &&
00533 ispunct((unsigned char) c));
00534 case PG_REGEX_LOCALE_WIDE_L:
00535 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
00536 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00537 return iswpunct_l((wint_t) c, pg_regex_locale);
00538 #endif
00539
00540 case PG_REGEX_LOCALE_1BYTE_L:
00541 #ifdef HAVE_LOCALE_T
00542 return (c <= (pg_wchar) UCHAR_MAX &&
00543 ispunct_l((unsigned char) c, pg_regex_locale));
00544 #endif
00545 break;
00546 }
00547 return 0;
00548 }
00549
00550 static int
00551 pg_wc_isspace(pg_wchar c)
00552 {
00553 switch (pg_regex_strategy)
00554 {
00555 case PG_REGEX_LOCALE_C:
00556 return (c <= (pg_wchar) 127 &&
00557 (pg_char_properties[c] & PG_ISSPACE));
00558 case PG_REGEX_LOCALE_WIDE:
00559 #ifdef USE_WIDE_UPPER_LOWER
00560 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00561 return iswspace((wint_t) c);
00562 #endif
00563
00564 case PG_REGEX_LOCALE_1BYTE:
00565 return (c <= (pg_wchar) UCHAR_MAX &&
00566 isspace((unsigned char) c));
00567 case PG_REGEX_LOCALE_WIDE_L:
00568 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
00569 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00570 return iswspace_l((wint_t) c, pg_regex_locale);
00571 #endif
00572
00573 case PG_REGEX_LOCALE_1BYTE_L:
00574 #ifdef HAVE_LOCALE_T
00575 return (c <= (pg_wchar) UCHAR_MAX &&
00576 isspace_l((unsigned char) c, pg_regex_locale));
00577 #endif
00578 break;
00579 }
00580 return 0;
00581 }
00582
00583 static pg_wchar
00584 pg_wc_toupper(pg_wchar c)
00585 {
00586 switch (pg_regex_strategy)
00587 {
00588 case PG_REGEX_LOCALE_C:
00589 if (c <= (pg_wchar) 127)
00590 return pg_ascii_toupper((unsigned char) c);
00591 return c;
00592 case PG_REGEX_LOCALE_WIDE:
00593
00594 if (c <= (pg_wchar) 127)
00595 return pg_ascii_toupper((unsigned char) c);
00596 #ifdef USE_WIDE_UPPER_LOWER
00597 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00598 return towupper((wint_t) c);
00599 #endif
00600
00601 case PG_REGEX_LOCALE_1BYTE:
00602
00603 if (c <= (pg_wchar) 127)
00604 return pg_ascii_toupper((unsigned char) c);
00605 if (c <= (pg_wchar) UCHAR_MAX)
00606 return toupper((unsigned char) c);
00607 return c;
00608 case PG_REGEX_LOCALE_WIDE_L:
00609 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
00610 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00611 return towupper_l((wint_t) c, pg_regex_locale);
00612 #endif
00613
00614 case PG_REGEX_LOCALE_1BYTE_L:
00615 #ifdef HAVE_LOCALE_T
00616 if (c <= (pg_wchar) UCHAR_MAX)
00617 return toupper_l((unsigned char) c, pg_regex_locale);
00618 #endif
00619 return c;
00620 }
00621 return 0;
00622 }
00623
00624 static pg_wchar
00625 pg_wc_tolower(pg_wchar c)
00626 {
00627 switch (pg_regex_strategy)
00628 {
00629 case PG_REGEX_LOCALE_C:
00630 if (c <= (pg_wchar) 127)
00631 return pg_ascii_tolower((unsigned char) c);
00632 return c;
00633 case PG_REGEX_LOCALE_WIDE:
00634
00635 if (c <= (pg_wchar) 127)
00636 return pg_ascii_tolower((unsigned char) c);
00637 #ifdef USE_WIDE_UPPER_LOWER
00638 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00639 return towlower((wint_t) c);
00640 #endif
00641
00642 case PG_REGEX_LOCALE_1BYTE:
00643
00644 if (c <= (pg_wchar) 127)
00645 return pg_ascii_tolower((unsigned char) c);
00646 if (c <= (pg_wchar) UCHAR_MAX)
00647 return tolower((unsigned char) c);
00648 return c;
00649 case PG_REGEX_LOCALE_WIDE_L:
00650 #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
00651 if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
00652 return towlower_l((wint_t) c, pg_regex_locale);
00653 #endif
00654
00655 case PG_REGEX_LOCALE_1BYTE_L:
00656 #ifdef HAVE_LOCALE_T
00657 if (c <= (pg_wchar) UCHAR_MAX)
00658 return tolower_l((unsigned char) c, pg_regex_locale);
00659 #endif
00660 return c;
00661 }
00662 return 0;
00663 }
00664
00665
00666
00667
00668
00669
00670
00671
00672
00673
00674
00675
00676
00677
00678
00679 typedef int (*pg_wc_probefunc) (pg_wchar c);
00680
00681 typedef struct pg_ctype_cache
00682 {
00683 pg_wc_probefunc probefunc;
00684 Oid collation;
00685 struct cvec cv;
00686 struct pg_ctype_cache *next;
00687 } pg_ctype_cache;
00688
00689 static pg_ctype_cache *pg_ctype_cache_list = NULL;
00690
00691
00692
00693
00694 static bool
00695 store_match(pg_ctype_cache *pcc, pg_wchar chr1, int nchrs)
00696 {
00697 chr *newchrs;
00698
00699 if (nchrs > 1)
00700 {
00701 if (pcc->cv.nranges >= pcc->cv.rangespace)
00702 {
00703 pcc->cv.rangespace *= 2;
00704 newchrs = (chr *) realloc(pcc->cv.ranges,
00705 pcc->cv.rangespace * sizeof(chr) * 2);
00706 if (newchrs == NULL)
00707 return false;
00708 pcc->cv.ranges = newchrs;
00709 }
00710 pcc->cv.ranges[pcc->cv.nranges * 2] = chr1;
00711 pcc->cv.ranges[pcc->cv.nranges * 2 + 1] = chr1 + nchrs - 1;
00712 pcc->cv.nranges++;
00713 }
00714 else
00715 {
00716 assert(nchrs == 1);
00717 if (pcc->cv.nchrs >= pcc->cv.chrspace)
00718 {
00719 pcc->cv.chrspace *= 2;
00720 newchrs = (chr *) realloc(pcc->cv.chrs,
00721 pcc->cv.chrspace * sizeof(chr));
00722 if (newchrs == NULL)
00723 return false;
00724 pcc->cv.chrs = newchrs;
00725 }
00726 pcc->cv.chrs[pcc->cv.nchrs++] = chr1;
00727 }
00728 return true;
00729 }
00730
00731
00732
00733
00734
00735
00736
00737
00738 static struct cvec *
00739 pg_ctype_get_cache(pg_wc_probefunc probefunc)
00740 {
00741 pg_ctype_cache *pcc;
00742 pg_wchar max_chr;
00743 pg_wchar cur_chr;
00744 int nmatches;
00745 chr *newchrs;
00746
00747
00748
00749
00750 for (pcc = pg_ctype_cache_list; pcc != NULL; pcc = pcc->next)
00751 {
00752 if (pcc->probefunc == probefunc &&
00753 pcc->collation == pg_regex_collation)
00754 return &pcc->cv;
00755 }
00756
00757
00758
00759
00760 pcc = (pg_ctype_cache *) malloc(sizeof(pg_ctype_cache));
00761 if (pcc == NULL)
00762 return NULL;
00763 pcc->probefunc = probefunc;
00764 pcc->collation = pg_regex_collation;
00765 pcc->cv.nchrs = 0;
00766 pcc->cv.chrspace = 128;
00767 pcc->cv.chrs = (chr *) malloc(pcc->cv.chrspace * sizeof(chr));
00768 pcc->cv.nranges = 0;
00769 pcc->cv.rangespace = 64;
00770 pcc->cv.ranges = (chr *) malloc(pcc->cv.rangespace * sizeof(chr) * 2);
00771 if (pcc->cv.chrs == NULL || pcc->cv.ranges == NULL)
00772 goto out_of_memory;
00773
00774
00775
00776
00777
00778
00779
00780
00781
00782
00783
00784
00785
00786 switch (pg_regex_strategy)
00787 {
00788 case PG_REGEX_LOCALE_C:
00789 max_chr = (pg_wchar) 127;
00790 break;
00791 case PG_REGEX_LOCALE_WIDE:
00792 case PG_REGEX_LOCALE_WIDE_L:
00793 max_chr = (pg_wchar) 0x7FF;
00794 break;
00795 case PG_REGEX_LOCALE_1BYTE:
00796 case PG_REGEX_LOCALE_1BYTE_L:
00797 max_chr = (pg_wchar) UCHAR_MAX;
00798 break;
00799 default:
00800 max_chr = 0;
00801 break;
00802 }
00803
00804
00805
00806
00807 nmatches = 0;
00808
00809 for (cur_chr = 0; cur_chr <= max_chr; cur_chr++)
00810 {
00811 if ((*probefunc) (cur_chr))
00812 nmatches++;
00813 else if (nmatches > 0)
00814 {
00815 if (!store_match(pcc, cur_chr - nmatches, nmatches))
00816 goto out_of_memory;
00817 nmatches = 0;
00818 }
00819 }
00820
00821 if (nmatches > 0)
00822 if (!store_match(pcc, cur_chr - nmatches, nmatches))
00823 goto out_of_memory;
00824
00825
00826
00827
00828 if (pcc->cv.nchrs == 0)
00829 {
00830 free(pcc->cv.chrs);
00831 pcc->cv.chrs = NULL;
00832 pcc->cv.chrspace = 0;
00833 }
00834 else if (pcc->cv.nchrs < pcc->cv.chrspace)
00835 {
00836 newchrs = (chr *) realloc(pcc->cv.chrs,
00837 pcc->cv.nchrs * sizeof(chr));
00838 if (newchrs == NULL)
00839 goto out_of_memory;
00840 pcc->cv.chrs = newchrs;
00841 pcc->cv.chrspace = pcc->cv.nchrs;
00842 }
00843 if (pcc->cv.nranges == 0)
00844 {
00845 free(pcc->cv.ranges);
00846 pcc->cv.ranges = NULL;
00847 pcc->cv.rangespace = 0;
00848 }
00849 else if (pcc->cv.nranges < pcc->cv.rangespace)
00850 {
00851 newchrs = (chr *) realloc(pcc->cv.ranges,
00852 pcc->cv.nranges * sizeof(chr) * 2);
00853 if (newchrs == NULL)
00854 goto out_of_memory;
00855 pcc->cv.ranges = newchrs;
00856 pcc->cv.rangespace = pcc->cv.nranges;
00857 }
00858
00859
00860
00861
00862 pcc->next = pg_ctype_cache_list;
00863 pg_ctype_cache_list = pcc;
00864
00865 return &pcc->cv;
00866
00867
00868
00869
00870 out_of_memory:
00871 if (pcc->cv.chrs)
00872 free(pcc->cv.chrs);
00873 if (pcc->cv.ranges)
00874 free(pcc->cv.ranges);
00875 free(pcc);
00876
00877 return NULL;
00878 }