00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include "postgres.h"
00031
00032 #include "catalog/pg_type.h"
00033 #include "funcapi.h"
00034 #include "regex/regex.h"
00035 #include "utils/array.h"
00036 #include "utils/builtins.h"
00037
00038 #define PG_GETARG_TEXT_PP_IF_EXISTS(_n) \
00039 (PG_NARGS() > (_n) ? PG_GETARG_TEXT_PP(_n) : NULL)
00040
00041
00042
00043 typedef struct pg_re_flags
00044 {
00045 int cflags;
00046 bool glob;
00047 } pg_re_flags;
00048
00049
00050 typedef struct regexp_matches_ctx
00051 {
00052 text *orig_str;
00053 int nmatches;
00054 int npatterns;
00055
00056
00057 int *match_locs;
00058 int next_match;
00059
00060 Datum *elems;
00061 bool *nulls;
00062 } regexp_matches_ctx;
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089 #ifndef MAX_CACHED_RES
00090 #define MAX_CACHED_RES 32
00091 #endif
00092
00093
00094 typedef struct cached_re_str
00095 {
00096 char *cre_pat;
00097 int cre_pat_len;
00098 int cre_flags;
00099 Oid cre_collation;
00100 regex_t cre_re;
00101 } cached_re_str;
00102
00103 static int num_res = 0;
00104 static cached_re_str re_array[MAX_CACHED_RES];
00105
00106
00107
00108 static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
00109 text *flags,
00110 Oid collation,
00111 bool force_glob,
00112 bool use_subpatterns,
00113 bool ignore_degenerate);
00114 static void cleanup_regexp_matches(regexp_matches_ctx *matchctx);
00115 static ArrayType *build_regexp_matches_result(regexp_matches_ctx *matchctx);
00116 static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131 static regex_t *
00132 RE_compile_and_cache(text *text_re, int cflags, Oid collation)
00133 {
00134 int text_re_len = VARSIZE_ANY_EXHDR(text_re);
00135 char *text_re_val = VARDATA_ANY(text_re);
00136 pg_wchar *pattern;
00137 int pattern_len;
00138 int i;
00139 int regcomp_result;
00140 cached_re_str re_temp;
00141 char errMsg[100];
00142
00143
00144
00145
00146
00147
00148 for (i = 0; i < num_res; i++)
00149 {
00150 if (re_array[i].cre_pat_len == text_re_len &&
00151 re_array[i].cre_flags == cflags &&
00152 re_array[i].cre_collation == collation &&
00153 memcmp(re_array[i].cre_pat, text_re_val, text_re_len) == 0)
00154 {
00155
00156
00157
00158 if (i > 0)
00159 {
00160 re_temp = re_array[i];
00161 memmove(&re_array[1], &re_array[0], i * sizeof(cached_re_str));
00162 re_array[0] = re_temp;
00163 }
00164
00165 return &re_array[0].cre_re;
00166 }
00167 }
00168
00169
00170
00171
00172
00173
00174
00175 pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
00176 pattern_len = pg_mb2wchar_with_len(text_re_val,
00177 pattern,
00178 text_re_len);
00179
00180 regcomp_result = pg_regcomp(&re_temp.cre_re,
00181 pattern,
00182 pattern_len,
00183 cflags,
00184 collation);
00185
00186 pfree(pattern);
00187
00188 if (regcomp_result != REG_OKAY)
00189 {
00190
00191 pg_regerror(regcomp_result, &re_temp.cre_re, errMsg, sizeof(errMsg));
00192 ereport(ERROR,
00193 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
00194 errmsg("invalid regular expression: %s", errMsg)));
00195 }
00196
00197
00198
00199
00200
00201
00202
00203 re_temp.cre_pat = malloc(Max(text_re_len, 1));
00204 if (re_temp.cre_pat == NULL)
00205 {
00206 pg_regfree(&re_temp.cre_re);
00207 ereport(ERROR,
00208 (errcode(ERRCODE_OUT_OF_MEMORY),
00209 errmsg("out of memory")));
00210 }
00211 memcpy(re_temp.cre_pat, text_re_val, text_re_len);
00212 re_temp.cre_pat_len = text_re_len;
00213 re_temp.cre_flags = cflags;
00214 re_temp.cre_collation = collation;
00215
00216
00217
00218
00219
00220 if (num_res >= MAX_CACHED_RES)
00221 {
00222 --num_res;
00223 Assert(num_res < MAX_CACHED_RES);
00224 pg_regfree(&re_array[num_res].cre_re);
00225 free(re_array[num_res].cre_pat);
00226 }
00227
00228 if (num_res > 0)
00229 memmove(&re_array[1], &re_array[0], num_res * sizeof(cached_re_str));
00230
00231 re_array[0] = re_temp;
00232 num_res++;
00233
00234 return &re_array[0].cre_re;
00235 }
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251 static bool
00252 RE_wchar_execute(regex_t *re, pg_wchar *data, int data_len,
00253 int start_search, int nmatch, regmatch_t *pmatch)
00254 {
00255 int regexec_result;
00256 char errMsg[100];
00257
00258
00259 regexec_result = pg_regexec(re,
00260 data,
00261 data_len,
00262 start_search,
00263 NULL,
00264 nmatch,
00265 pmatch,
00266 0);
00267
00268 if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
00269 {
00270
00271 pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
00272 ereport(ERROR,
00273 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
00274 errmsg("regular expression failed: %s", errMsg)));
00275 }
00276
00277 return (regexec_result == REG_OKAY);
00278 }
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293 static bool
00294 RE_execute(regex_t *re, char *dat, int dat_len,
00295 int nmatch, regmatch_t *pmatch)
00296 {
00297 pg_wchar *data;
00298 int data_len;
00299 bool match;
00300
00301
00302 data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
00303 data_len = pg_mb2wchar_with_len(dat, data, dat_len);
00304
00305
00306 match = RE_wchar_execute(re, data, data_len, 0, nmatch, pmatch);
00307
00308 pfree(data);
00309 return match;
00310 }
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327 static bool
00328 RE_compile_and_execute(text *text_re, char *dat, int dat_len,
00329 int cflags, Oid collation,
00330 int nmatch, regmatch_t *pmatch)
00331 {
00332 regex_t *re;
00333
00334
00335 re = RE_compile_and_cache(text_re, cflags, collation);
00336
00337 return RE_execute(re, dat, dat_len, nmatch, pmatch);
00338 }
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350 static void
00351 parse_re_flags(pg_re_flags *flags, text *opts)
00352 {
00353
00354 flags->cflags = REG_ADVANCED;
00355 flags->glob = false;
00356
00357 if (opts)
00358 {
00359 char *opt_p = VARDATA_ANY(opts);
00360 int opt_len = VARSIZE_ANY_EXHDR(opts);
00361 int i;
00362
00363 for (i = 0; i < opt_len; i++)
00364 {
00365 switch (opt_p[i])
00366 {
00367 case 'g':
00368 flags->glob = true;
00369 break;
00370 case 'b':
00371 flags->cflags &= ~(REG_ADVANCED | REG_EXTENDED | REG_QUOTE);
00372 break;
00373 case 'c':
00374 flags->cflags &= ~REG_ICASE;
00375 break;
00376 case 'e':
00377 flags->cflags |= REG_EXTENDED;
00378 flags->cflags &= ~(REG_ADVANCED | REG_QUOTE);
00379 break;
00380 case 'i':
00381 flags->cflags |= REG_ICASE;
00382 break;
00383 case 'm':
00384 case 'n':
00385 flags->cflags |= REG_NEWLINE;
00386 break;
00387 case 'p':
00388 flags->cflags |= REG_NLSTOP;
00389 flags->cflags &= ~REG_NLANCH;
00390 break;
00391 case 'q':
00392 flags->cflags |= REG_QUOTE;
00393 flags->cflags &= ~(REG_ADVANCED | REG_EXTENDED);
00394 break;
00395 case 's':
00396 flags->cflags &= ~REG_NEWLINE;
00397 break;
00398 case 't':
00399 flags->cflags &= ~REG_EXPANDED;
00400 break;
00401 case 'w':
00402 flags->cflags &= ~REG_NLSTOP;
00403 flags->cflags |= REG_NLANCH;
00404 break;
00405 case 'x':
00406 flags->cflags |= REG_EXPANDED;
00407 break;
00408 default:
00409 ereport(ERROR,
00410 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00411 errmsg("invalid regexp option: \"%c\"",
00412 opt_p[i])));
00413 break;
00414 }
00415 }
00416 }
00417 }
00418
00419
00420
00421
00422
00423
00424 Datum
00425 nameregexeq(PG_FUNCTION_ARGS)
00426 {
00427 Name n = PG_GETARG_NAME(0);
00428 text *p = PG_GETARG_TEXT_PP(1);
00429
00430 PG_RETURN_BOOL(RE_compile_and_execute(p,
00431 NameStr(*n),
00432 strlen(NameStr(*n)),
00433 REG_ADVANCED,
00434 PG_GET_COLLATION(),
00435 0, NULL));
00436 }
00437
00438 Datum
00439 nameregexne(PG_FUNCTION_ARGS)
00440 {
00441 Name n = PG_GETARG_NAME(0);
00442 text *p = PG_GETARG_TEXT_PP(1);
00443
00444 PG_RETURN_BOOL(!RE_compile_and_execute(p,
00445 NameStr(*n),
00446 strlen(NameStr(*n)),
00447 REG_ADVANCED,
00448 PG_GET_COLLATION(),
00449 0, NULL));
00450 }
00451
00452 Datum
00453 textregexeq(PG_FUNCTION_ARGS)
00454 {
00455 text *s = PG_GETARG_TEXT_PP(0);
00456 text *p = PG_GETARG_TEXT_PP(1);
00457
00458 PG_RETURN_BOOL(RE_compile_and_execute(p,
00459 VARDATA_ANY(s),
00460 VARSIZE_ANY_EXHDR(s),
00461 REG_ADVANCED,
00462 PG_GET_COLLATION(),
00463 0, NULL));
00464 }
00465
00466 Datum
00467 textregexne(PG_FUNCTION_ARGS)
00468 {
00469 text *s = PG_GETARG_TEXT_PP(0);
00470 text *p = PG_GETARG_TEXT_PP(1);
00471
00472 PG_RETURN_BOOL(!RE_compile_and_execute(p,
00473 VARDATA_ANY(s),
00474 VARSIZE_ANY_EXHDR(s),
00475 REG_ADVANCED,
00476 PG_GET_COLLATION(),
00477 0, NULL));
00478 }
00479
00480
00481
00482
00483
00484
00485
00486
00487 Datum
00488 nameicregexeq(PG_FUNCTION_ARGS)
00489 {
00490 Name n = PG_GETARG_NAME(0);
00491 text *p = PG_GETARG_TEXT_PP(1);
00492
00493 PG_RETURN_BOOL(RE_compile_and_execute(p,
00494 NameStr(*n),
00495 strlen(NameStr(*n)),
00496 REG_ADVANCED | REG_ICASE,
00497 PG_GET_COLLATION(),
00498 0, NULL));
00499 }
00500
00501 Datum
00502 nameicregexne(PG_FUNCTION_ARGS)
00503 {
00504 Name n = PG_GETARG_NAME(0);
00505 text *p = PG_GETARG_TEXT_PP(1);
00506
00507 PG_RETURN_BOOL(!RE_compile_and_execute(p,
00508 NameStr(*n),
00509 strlen(NameStr(*n)),
00510 REG_ADVANCED | REG_ICASE,
00511 PG_GET_COLLATION(),
00512 0, NULL));
00513 }
00514
00515 Datum
00516 texticregexeq(PG_FUNCTION_ARGS)
00517 {
00518 text *s = PG_GETARG_TEXT_PP(0);
00519 text *p = PG_GETARG_TEXT_PP(1);
00520
00521 PG_RETURN_BOOL(RE_compile_and_execute(p,
00522 VARDATA_ANY(s),
00523 VARSIZE_ANY_EXHDR(s),
00524 REG_ADVANCED | REG_ICASE,
00525 PG_GET_COLLATION(),
00526 0, NULL));
00527 }
00528
00529 Datum
00530 texticregexne(PG_FUNCTION_ARGS)
00531 {
00532 text *s = PG_GETARG_TEXT_PP(0);
00533 text *p = PG_GETARG_TEXT_PP(1);
00534
00535 PG_RETURN_BOOL(!RE_compile_and_execute(p,
00536 VARDATA_ANY(s),
00537 VARSIZE_ANY_EXHDR(s),
00538 REG_ADVANCED | REG_ICASE,
00539 PG_GET_COLLATION(),
00540 0, NULL));
00541 }
00542
00543
00544
00545
00546
00547
00548 Datum
00549 textregexsubstr(PG_FUNCTION_ARGS)
00550 {
00551 text *s = PG_GETARG_TEXT_PP(0);
00552 text *p = PG_GETARG_TEXT_PP(1);
00553 regex_t *re;
00554 regmatch_t pmatch[2];
00555 int so,
00556 eo;
00557
00558
00559 re = RE_compile_and_cache(p, REG_ADVANCED, PG_GET_COLLATION());
00560
00561
00562
00563
00564
00565
00566
00567 if (!RE_execute(re,
00568 VARDATA_ANY(s), VARSIZE_ANY_EXHDR(s),
00569 2, pmatch))
00570 PG_RETURN_NULL();
00571
00572 if (re->re_nsub > 0)
00573 {
00574
00575 so = pmatch[1].rm_so;
00576 eo = pmatch[1].rm_eo;
00577 }
00578 else
00579 {
00580
00581 so = pmatch[0].rm_so;
00582 eo = pmatch[0].rm_eo;
00583 }
00584
00585
00586
00587
00588
00589
00590
00591 if (so < 0 || eo < 0)
00592 PG_RETURN_NULL();
00593
00594 return DirectFunctionCall3(text_substr,
00595 PointerGetDatum(s),
00596 Int32GetDatum(so + 1),
00597 Int32GetDatum(eo - so));
00598 }
00599
00600
00601
00602
00603
00604
00605
00606
00607 Datum
00608 textregexreplace_noopt(PG_FUNCTION_ARGS)
00609 {
00610 text *s = PG_GETARG_TEXT_PP(0);
00611 text *p = PG_GETARG_TEXT_PP(1);
00612 text *r = PG_GETARG_TEXT_PP(2);
00613 regex_t *re;
00614
00615 re = RE_compile_and_cache(p, REG_ADVANCED, PG_GET_COLLATION());
00616
00617 PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, false));
00618 }
00619
00620
00621
00622
00623
00624 Datum
00625 textregexreplace(PG_FUNCTION_ARGS)
00626 {
00627 text *s = PG_GETARG_TEXT_PP(0);
00628 text *p = PG_GETARG_TEXT_PP(1);
00629 text *r = PG_GETARG_TEXT_PP(2);
00630 text *opt = PG_GETARG_TEXT_PP(3);
00631 regex_t *re;
00632 pg_re_flags flags;
00633
00634 parse_re_flags(&flags, opt);
00635
00636 re = RE_compile_and_cache(p, flags.cflags, PG_GET_COLLATION());
00637
00638 PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, flags.glob));
00639 }
00640
00641
00642
00643
00644
00645
00646 Datum
00647 similar_escape(PG_FUNCTION_ARGS)
00648 {
00649 text *pat_text;
00650 text *esc_text;
00651 text *result;
00652 char *p,
00653 *e,
00654 *r;
00655 int plen,
00656 elen;
00657 bool afterescape = false;
00658 bool incharclass = false;
00659 int nquotes = 0;
00660
00661
00662 if (PG_ARGISNULL(0))
00663 PG_RETURN_NULL();
00664 pat_text = PG_GETARG_TEXT_PP(0);
00665 p = VARDATA_ANY(pat_text);
00666 plen = VARSIZE_ANY_EXHDR(pat_text);
00667 if (PG_ARGISNULL(1))
00668 {
00669
00670 e = "\\";
00671 elen = 1;
00672 }
00673 else
00674 {
00675 esc_text = PG_GETARG_TEXT_PP(1);
00676 e = VARDATA_ANY(esc_text);
00677 elen = VARSIZE_ANY_EXHDR(esc_text);
00678 if (elen == 0)
00679 e = NULL;
00680 else if (elen != 1)
00681 ereport(ERROR,
00682 (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
00683 errmsg("invalid escape string"),
00684 errhint("Escape string must be empty or one character.")));
00685 }
00686
00687
00688
00689
00690
00691
00692
00693
00694
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704 result = (text *) palloc(VARHDRSZ + 6 + 3 * plen);
00705 r = VARDATA(result);
00706
00707 *r++ = '^';
00708 *r++ = '(';
00709 *r++ = '?';
00710 *r++ = ':';
00711
00712 while (plen > 0)
00713 {
00714 char pchar = *p;
00715
00716 if (afterescape)
00717 {
00718 if (pchar == '"' && !incharclass)
00719 *r++ = ((nquotes++ % 2) == 0) ? '(' : ')';
00720 else
00721 {
00722 *r++ = '\\';
00723 *r++ = pchar;
00724 }
00725 afterescape = false;
00726 }
00727 else if (e && pchar == *e)
00728 {
00729
00730 afterescape = true;
00731 }
00732 else if (incharclass)
00733 {
00734 if (pchar == '\\')
00735 *r++ = '\\';
00736 *r++ = pchar;
00737 if (pchar == ']')
00738 incharclass = false;
00739 }
00740 else if (pchar == '[')
00741 {
00742 *r++ = pchar;
00743 incharclass = true;
00744 }
00745 else if (pchar == '%')
00746 {
00747 *r++ = '.';
00748 *r++ = '*';
00749 }
00750 else if (pchar == '_')
00751 *r++ = '.';
00752 else if (pchar == '(')
00753 {
00754
00755 *r++ = '(';
00756 *r++ = '?';
00757 *r++ = ':';
00758 }
00759 else if (pchar == '\\' || pchar == '.' ||
00760 pchar == '^' || pchar == '$')
00761 {
00762 *r++ = '\\';
00763 *r++ = pchar;
00764 }
00765 else
00766 *r++ = pchar;
00767 p++, plen--;
00768 }
00769
00770 *r++ = ')';
00771 *r++ = '$';
00772
00773 SET_VARSIZE(result, r - ((char *) result));
00774
00775 PG_RETURN_TEXT_P(result);
00776 }
00777
00778
00779
00780
00781
00782 Datum
00783 regexp_matches(PG_FUNCTION_ARGS)
00784 {
00785 FuncCallContext *funcctx;
00786 regexp_matches_ctx *matchctx;
00787
00788 if (SRF_IS_FIRSTCALL())
00789 {
00790 text *pattern = PG_GETARG_TEXT_PP(1);
00791 text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
00792 MemoryContext oldcontext;
00793
00794 funcctx = SRF_FIRSTCALL_INIT();
00795 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
00796
00797
00798 matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
00799 flags,
00800 PG_GET_COLLATION(),
00801 false, true, false);
00802
00803
00804 matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns);
00805 matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns);
00806
00807 MemoryContextSwitchTo(oldcontext);
00808 funcctx->user_fctx = (void *) matchctx;
00809 }
00810
00811 funcctx = SRF_PERCALL_SETUP();
00812 matchctx = (regexp_matches_ctx *) funcctx->user_fctx;
00813
00814 if (matchctx->next_match < matchctx->nmatches)
00815 {
00816 ArrayType *result_ary;
00817
00818 result_ary = build_regexp_matches_result(matchctx);
00819 matchctx->next_match++;
00820 SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
00821 }
00822
00823
00824 cleanup_regexp_matches(matchctx);
00825
00826 SRF_RETURN_DONE(funcctx);
00827 }
00828
00829
00830 Datum
00831 regexp_matches_no_flags(PG_FUNCTION_ARGS)
00832 {
00833 return regexp_matches(fcinfo);
00834 }
00835
00836
00837
00838
00839
00840
00841
00842
00843
00844
00845
00846
00847
00848 static regexp_matches_ctx *
00849 setup_regexp_matches(text *orig_str, text *pattern, text *flags,
00850 Oid collation,
00851 bool force_glob, bool use_subpatterns,
00852 bool ignore_degenerate)
00853 {
00854 regexp_matches_ctx *matchctx = palloc0(sizeof(regexp_matches_ctx));
00855 int orig_len;
00856 pg_wchar *wide_str;
00857 int wide_len;
00858 pg_re_flags re_flags;
00859 regex_t *cpattern;
00860 regmatch_t *pmatch;
00861 int pmatch_len;
00862 int array_len;
00863 int array_idx;
00864 int prev_match_end;
00865 int start_search;
00866
00867
00868 matchctx->orig_str = orig_str;
00869
00870
00871 orig_len = VARSIZE_ANY_EXHDR(orig_str);
00872 wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
00873 wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
00874
00875
00876 parse_re_flags(&re_flags, flags);
00877 if (force_glob)
00878 {
00879
00880 if (re_flags.glob)
00881 ereport(ERROR,
00882 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
00883 errmsg("regexp_split does not support the global option")));
00884
00885 re_flags.glob = true;
00886 }
00887
00888
00889 cpattern = RE_compile_and_cache(pattern, re_flags.cflags, collation);
00890
00891
00892 if (use_subpatterns && cpattern->re_nsub > 0)
00893 {
00894 matchctx->npatterns = cpattern->re_nsub;
00895 pmatch_len = cpattern->re_nsub + 1;
00896 }
00897 else
00898 {
00899 use_subpatterns = false;
00900 matchctx->npatterns = 1;
00901 pmatch_len = 1;
00902 }
00903
00904
00905 pmatch = palloc(sizeof(regmatch_t) * pmatch_len);
00906
00907
00908 array_len = re_flags.glob ? 256 : 32;
00909 matchctx->match_locs = (int *) palloc(sizeof(int) * array_len);
00910 array_idx = 0;
00911
00912
00913 prev_match_end = 0;
00914 start_search = 0;
00915 while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
00916 pmatch_len, pmatch))
00917 {
00918
00919
00920
00921
00922
00923 if (!ignore_degenerate ||
00924 (pmatch[0].rm_so < wide_len &&
00925 pmatch[0].rm_eo > prev_match_end))
00926 {
00927
00928 while (array_idx + matchctx->npatterns * 2 > array_len)
00929 {
00930 array_len *= 2;
00931 matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
00932 sizeof(int) * array_len);
00933 }
00934
00935
00936 if (use_subpatterns)
00937 {
00938 int i;
00939
00940 for (i = 1; i <= matchctx->npatterns; i++)
00941 {
00942 matchctx->match_locs[array_idx++] = pmatch[i].rm_so;
00943 matchctx->match_locs[array_idx++] = pmatch[i].rm_eo;
00944 }
00945 }
00946 else
00947 {
00948 matchctx->match_locs[array_idx++] = pmatch[0].rm_so;
00949 matchctx->match_locs[array_idx++] = pmatch[0].rm_eo;
00950 }
00951 matchctx->nmatches++;
00952 }
00953 prev_match_end = pmatch[0].rm_eo;
00954
00955
00956 if (!re_flags.glob)
00957 break;
00958
00959
00960
00961
00962
00963
00964
00965 if (start_search < pmatch[0].rm_eo)
00966 start_search = pmatch[0].rm_eo;
00967 else
00968 start_search++;
00969 if (start_search > wide_len)
00970 break;
00971 }
00972
00973
00974 pfree(wide_str);
00975 pfree(pmatch);
00976
00977 return matchctx;
00978 }
00979
00980
00981
00982
00983 static void
00984 cleanup_regexp_matches(regexp_matches_ctx *matchctx)
00985 {
00986 pfree(matchctx->orig_str);
00987 pfree(matchctx->match_locs);
00988 if (matchctx->elems)
00989 pfree(matchctx->elems);
00990 if (matchctx->nulls)
00991 pfree(matchctx->nulls);
00992 pfree(matchctx);
00993 }
00994
00995
00996
00997
00998 static ArrayType *
00999 build_regexp_matches_result(regexp_matches_ctx *matchctx)
01000 {
01001 Datum *elems = matchctx->elems;
01002 bool *nulls = matchctx->nulls;
01003 int dims[1];
01004 int lbs[1];
01005 int loc;
01006 int i;
01007
01008
01009 loc = matchctx->next_match * matchctx->npatterns * 2;
01010 for (i = 0; i < matchctx->npatterns; i++)
01011 {
01012 int so = matchctx->match_locs[loc++];
01013 int eo = matchctx->match_locs[loc++];
01014
01015 if (so < 0 || eo < 0)
01016 {
01017 elems[i] = (Datum) 0;
01018 nulls[i] = true;
01019 }
01020 else
01021 {
01022 elems[i] = DirectFunctionCall3(text_substr,
01023 PointerGetDatum(matchctx->orig_str),
01024 Int32GetDatum(so + 1),
01025 Int32GetDatum(eo - so));
01026 nulls[i] = false;
01027 }
01028 }
01029
01030
01031 dims[0] = matchctx->npatterns;
01032 lbs[0] = 1;
01033
01034 return construct_md_array(elems, nulls, 1, dims, lbs,
01035 TEXTOID, -1, false, 'i');
01036 }
01037
01038
01039
01040
01041
01042
01043 Datum
01044 regexp_split_to_table(PG_FUNCTION_ARGS)
01045 {
01046 FuncCallContext *funcctx;
01047 regexp_matches_ctx *splitctx;
01048
01049 if (SRF_IS_FIRSTCALL())
01050 {
01051 text *pattern = PG_GETARG_TEXT_PP(1);
01052 text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
01053 MemoryContext oldcontext;
01054
01055 funcctx = SRF_FIRSTCALL_INIT();
01056 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
01057
01058
01059 splitctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
01060 flags,
01061 PG_GET_COLLATION(),
01062 true, false, true);
01063
01064 MemoryContextSwitchTo(oldcontext);
01065 funcctx->user_fctx = (void *) splitctx;
01066 }
01067
01068 funcctx = SRF_PERCALL_SETUP();
01069 splitctx = (regexp_matches_ctx *) funcctx->user_fctx;
01070
01071 if (splitctx->next_match <= splitctx->nmatches)
01072 {
01073 Datum result = build_regexp_split_result(splitctx);
01074
01075 splitctx->next_match++;
01076 SRF_RETURN_NEXT(funcctx, result);
01077 }
01078
01079
01080 cleanup_regexp_matches(splitctx);
01081
01082 SRF_RETURN_DONE(funcctx);
01083 }
01084
01085
01086 Datum
01087 regexp_split_to_table_no_flags(PG_FUNCTION_ARGS)
01088 {
01089 return regexp_split_to_table(fcinfo);
01090 }
01091
01092
01093
01094
01095
01096
01097 Datum
01098 regexp_split_to_array(PG_FUNCTION_ARGS)
01099 {
01100 ArrayBuildState *astate = NULL;
01101 regexp_matches_ctx *splitctx;
01102
01103 splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
01104 PG_GETARG_TEXT_PP(1),
01105 PG_GETARG_TEXT_PP_IF_EXISTS(2),
01106 PG_GET_COLLATION(),
01107 true, false, true);
01108
01109 while (splitctx->next_match <= splitctx->nmatches)
01110 {
01111 astate = accumArrayResult(astate,
01112 build_regexp_split_result(splitctx),
01113 false,
01114 TEXTOID,
01115 CurrentMemoryContext);
01116 splitctx->next_match++;
01117 }
01118
01119
01120
01121
01122
01123
01124
01125 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
01126 }
01127
01128
01129 Datum
01130 regexp_split_to_array_no_flags(PG_FUNCTION_ARGS)
01131 {
01132 return regexp_split_to_array(fcinfo);
01133 }
01134
01135
01136
01137
01138
01139
01140
01141 static Datum
01142 build_regexp_split_result(regexp_matches_ctx *splitctx)
01143 {
01144 int startpos;
01145 int endpos;
01146
01147 if (splitctx->next_match > 0)
01148 startpos = splitctx->match_locs[splitctx->next_match * 2 - 1];
01149 else
01150 startpos = 0;
01151 if (startpos < 0)
01152 elog(ERROR, "invalid match ending position");
01153
01154 if (splitctx->next_match < splitctx->nmatches)
01155 {
01156 endpos = splitctx->match_locs[splitctx->next_match * 2];
01157 if (endpos < startpos)
01158 elog(ERROR, "invalid match starting position");
01159 return DirectFunctionCall3(text_substr,
01160 PointerGetDatum(splitctx->orig_str),
01161 Int32GetDatum(startpos + 1),
01162 Int32GetDatum(endpos - startpos));
01163 }
01164 else
01165 {
01166
01167 return DirectFunctionCall2(text_substr_no_len,
01168 PointerGetDatum(splitctx->orig_str),
01169 Int32GetDatum(startpos + 1));
01170 }
01171 }
01172
01173
01174
01175
01176
01177
01178
01179 char *
01180 regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation,
01181 bool *exact)
01182 {
01183 char *result;
01184 regex_t *re;
01185 int cflags;
01186 int re_result;
01187 pg_wchar *str;
01188 size_t slen;
01189 size_t maxlen;
01190 char errMsg[100];
01191
01192 *exact = false;
01193
01194
01195 cflags = REG_ADVANCED;
01196 if (case_insensitive)
01197 cflags |= REG_ICASE;
01198
01199 re = RE_compile_and_cache(text_re, cflags, collation);
01200
01201
01202 re_result = pg_regprefix(re, &str, &slen);
01203
01204 switch (re_result)
01205 {
01206 case REG_NOMATCH:
01207 return NULL;
01208
01209 case REG_PREFIX:
01210
01211 break;
01212
01213 case REG_EXACT:
01214 *exact = true;
01215
01216 break;
01217
01218 default:
01219
01220 pg_regerror(re_result, re, errMsg, sizeof(errMsg));
01221 ereport(ERROR,
01222 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
01223 errmsg("regular expression failed: %s", errMsg)));
01224 break;
01225 }
01226
01227
01228 maxlen = pg_database_encoding_max_length() * slen + 1;
01229 result = (char *) palloc(maxlen);
01230 slen = pg_wchar2mb_with_len(str, result, slen);
01231 Assert(slen < maxlen);
01232
01233 free(str);
01234
01235 return result;
01236 }