00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178 #include "postgres.h"
00179
00180 #include "trgm.h"
00181
00182 #include "regex/regexport.h"
00183 #include "tsearch/ts_locale.h"
00184 #include "utils/hsearch.h"
00185 #include "utils/memutils.h"
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204 #define MAX_EXPANDED_STATES 128
00205 #define MAX_EXPANDED_ARCS 1024
00206 #define MAX_TRGM_COUNT 256
00207 #define COLOR_COUNT_LIMIT 256
00208
00209
00210 typedef struct
00211 {
00212 char bytes[MAX_MULTIBYTE_CHAR_LEN];
00213 } trgm_mb_char;
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228 typedef struct
00229 {
00230 bool expandable;
00231 bool containsNonWord;
00232 int wordCharsCount;
00233 trgm_mb_char *wordChars;
00234 } TrgmColorInfo;
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252 typedef int TrgmColor;
00253
00254
00255 #define COLOR_UNKNOWN (-1)
00256 #define COLOR_BLANK (-2)
00257
00258 typedef struct
00259 {
00260 TrgmColor colors[2];
00261 } TrgmPrefix;
00262
00263
00264
00265
00266
00267 typedef struct
00268 {
00269 TrgmColor colors[3];
00270 } ColorTrgm;
00271
00272
00273
00274
00275
00276
00277 typedef struct
00278 {
00279 TrgmPrefix prefix;
00280 int nstate;
00281 } TrgmStateKey;
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296 typedef struct TrgmState
00297 {
00298 TrgmStateKey stateKey;
00299 List *arcs;
00300 List *enterKeys;
00301 bool fin;
00302 bool init;
00303 struct TrgmState *parent;
00304 List *children;
00305 int number;
00306 } TrgmState;
00307
00308
00309
00310
00311 typedef struct
00312 {
00313 ColorTrgm ctrgm;
00314 TrgmState *target;
00315 } TrgmArc;
00316
00317
00318
00319
00320
00321
00322 typedef struct
00323 {
00324 TrgmState *source;
00325 TrgmState *target;
00326 } TrgmArcInfo;
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337 typedef struct
00338 {
00339 ColorTrgm ctrgm;
00340 int number;
00341 int count;
00342 bool expanded;
00343 List *arcs;
00344 } ColorTrgmInfo;
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363 typedef struct
00364 {
00365
00366 regex_t *regex;
00367 TrgmColorInfo *colorInfo;
00368 int ncolors;
00369
00370
00371 HTAB *states;
00372 TrgmState *initState;
00373
00374
00375 List *queue;
00376 List *keysQueue;
00377 int arcsCount;
00378 bool overflowed;
00379
00380
00381 ColorTrgmInfo *colorTrgms;
00382 int colorTrgmsCount;
00383 int totalTrgmCount;
00384 } TrgmNFA;
00385
00386
00387
00388
00389 typedef struct
00390 {
00391 int targetState;
00392 int colorTrgm;
00393 } TrgmPackedArc;
00394
00395 typedef struct
00396 {
00397 int arcsCount;
00398 TrgmPackedArc *arcs;
00399 } TrgmPackedState;
00400
00401
00402 struct TrgmPackedGraph
00403 {
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413 int colorTrigramsCount;
00414 int *colorTrigramGroups;
00415
00416
00417
00418
00419
00420 int statesCount;
00421 TrgmPackedState *states;
00422
00423
00424 bool *colorTrigramsActive;
00425 bool *statesActive;
00426 int *statesQueue;
00427 };
00428
00429
00430
00431
00432 typedef struct
00433 {
00434 int sourceState;
00435 int targetState;
00436 int colorTrgm;
00437 } TrgmPackArcInfo;
00438
00439
00440
00441 static TRGM *createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph,
00442 MemoryContext rcontext);
00443 static void RE_compile(regex_t *regex, text *text_re,
00444 int cflags, Oid collation);
00445 static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA);
00446 static bool convertPgWchar(pg_wchar c, trgm_mb_char *result);
00447 static void transformGraph(TrgmNFA *trgmNFA);
00448 static void processState(TrgmNFA *trgmNFA, TrgmState *state);
00449 static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key);
00450 static void addKeyToQueue(TrgmNFA *trgmNFA, TrgmStateKey *key);
00451 static void addArcs(TrgmNFA *trgmNFA, TrgmState *state);
00452 static void addArc(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key,
00453 TrgmColor co, TrgmStateKey *destKey);
00454 static bool validArcLabel(TrgmStateKey *key, TrgmColor co);
00455 static TrgmState *getState(TrgmNFA *trgmNFA, TrgmStateKey *key);
00456 static bool prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2);
00457 static bool selectColorTrigrams(TrgmNFA *trgmNFA);
00458 static TRGM *expandColorTrigrams(TrgmNFA *trgmNFA, MemoryContext rcontext);
00459 static void fillTrgm(trgm *ptrgm, trgm_mb_char s[3]);
00460 static void mergeStates(TrgmState *state1, TrgmState *state2);
00461 static int colorTrgmInfoCmp(const void *p1, const void *p2);
00462 static int colorTrgmInfoCountCmp(const void *p1, const void *p2);
00463 static TrgmPackedGraph *packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext);
00464 static int packArcInfoCmp(const void *a1, const void *a2);
00465
00466 #ifdef TRGM_REGEXP_DEBUG
00467 static void printSourceNFA(regex_t *regex, TrgmColorInfo *colors, int ncolors);
00468 static void printTrgmNFA(TrgmNFA *trgmNFA);
00469 static void printTrgmColor(StringInfo buf, TrgmColor co);
00470 static void printTrgmPackedGraph(TrgmPackedGraph *packedGraph, TRGM *trigrams);
00471 #endif
00472
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483 TRGM *
00484 createTrgmNFA(text *text_re, Oid collation,
00485 TrgmPackedGraph **graph, MemoryContext rcontext)
00486 {
00487 TRGM *trg;
00488 regex_t regex;
00489 MemoryContext tmpcontext;
00490 MemoryContext oldcontext;
00491
00492
00493
00494
00495
00496
00497
00498 tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
00499 "createTrgmNFA temporary context",
00500 ALLOCSET_DEFAULT_MINSIZE,
00501 ALLOCSET_DEFAULT_INITSIZE,
00502 ALLOCSET_DEFAULT_MAXSIZE);
00503 oldcontext = MemoryContextSwitchTo(tmpcontext);
00504
00505
00506
00507
00508 #ifdef IGNORECASE
00509 RE_compile(®ex, text_re, REG_ADVANCED | REG_ICASE, collation);
00510 #else
00511 RE_compile(®ex, text_re, REG_ADVANCED, collation);
00512 #endif
00513
00514
00515
00516
00517
00518
00519 PG_TRY();
00520 {
00521 trg = createTrgmNFAInternal(®ex, graph, rcontext);
00522 }
00523 PG_CATCH();
00524 {
00525 pg_regfree(®ex);
00526 PG_RE_THROW();
00527 }
00528 PG_END_TRY();
00529
00530 pg_regfree(®ex);
00531
00532
00533 MemoryContextSwitchTo(oldcontext);
00534 MemoryContextDelete(tmpcontext);
00535
00536 return trg;
00537 }
00538
00539
00540
00541
00542 static TRGM *
00543 createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph,
00544 MemoryContext rcontext)
00545 {
00546 TRGM *trg;
00547 TrgmNFA trgmNFA;
00548
00549 trgmNFA.regex = regex;
00550
00551
00552 getColorInfo(regex, &trgmNFA);
00553
00554 #ifdef TRGM_REGEXP_DEBUG
00555 printSourceNFA(regex, trgmNFA.colorInfo, trgmNFA.ncolors);
00556 #endif
00557
00558
00559
00560
00561 transformGraph(&trgmNFA);
00562
00563 #ifdef TRGM_REGEXP_DEBUG
00564 printTrgmNFA(&trgmNFA);
00565 #endif
00566
00567
00568
00569
00570
00571
00572 if (trgmNFA.initState->fin)
00573 return NULL;
00574
00575
00576
00577
00578 if (!selectColorTrigrams(&trgmNFA))
00579 return NULL;
00580
00581
00582
00583
00584
00585 trg = expandColorTrigrams(&trgmNFA, rcontext);
00586
00587 *graph = packGraph(&trgmNFA, rcontext);
00588
00589 #ifdef TRGM_REGEXP_DEBUG
00590 printTrgmPackedGraph(*graph, trg);
00591 #endif
00592
00593 return trg;
00594 }
00595
00596
00597
00598
00599
00600
00601
00602
00603 bool
00604 trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
00605 {
00606 int i,
00607 j,
00608 k,
00609 queueIn,
00610 queueOut;
00611
00612
00613
00614
00615 memset(graph->colorTrigramsActive, 0,
00616 sizeof(bool) * graph->colorTrigramsCount);
00617 memset(graph->statesActive, 0, sizeof(bool) * graph->statesCount);
00618
00619
00620
00621
00622
00623
00624 j = 0;
00625 for (i = 0; i < graph->colorTrigramsCount; i++)
00626 {
00627 int cnt = graph->colorTrigramGroups[i];
00628
00629 for (k = j; k < j + cnt; k++)
00630 {
00631 if (check[k])
00632 {
00633
00634
00635
00636
00637 graph->colorTrigramsActive[i] = true;
00638 break;
00639 }
00640 }
00641 j = j + cnt;
00642 }
00643
00644
00645
00646
00647
00648
00649
00650 graph->statesActive[0] = true;
00651 graph->statesQueue[0] = 0;
00652 queueIn = 0;
00653 queueOut = 1;
00654
00655
00656 while (queueIn < queueOut)
00657 {
00658 int stateno = graph->statesQueue[queueIn++];
00659 TrgmPackedState *state = &graph->states[stateno];
00660 int cnt = state->arcsCount;
00661
00662
00663 for (i = 0; i < cnt; i++)
00664 {
00665 TrgmPackedArc *arc = &state->arcs[i];
00666
00667
00668
00669
00670
00671
00672 if (graph->colorTrigramsActive[arc->colorTrgm])
00673 {
00674 int nextstate = arc->targetState;
00675
00676 if (nextstate == 1)
00677 return true;
00678
00679 if (!graph->statesActive[nextstate])
00680 {
00681 graph->statesActive[nextstate] = true;
00682 graph->statesQueue[queueOut++] = nextstate;
00683 }
00684 }
00685 }
00686 }
00687
00688
00689 return false;
00690 }
00691
00692
00693
00694
00695
00696 static void
00697 RE_compile(regex_t *regex, text *text_re, int cflags, Oid collation)
00698 {
00699 int text_re_len = VARSIZE_ANY_EXHDR(text_re);
00700 char *text_re_val = VARDATA_ANY(text_re);
00701 pg_wchar *pattern;
00702 int pattern_len;
00703 int regcomp_result;
00704 char errMsg[100];
00705
00706
00707 pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
00708 pattern_len = pg_mb2wchar_with_len(text_re_val,
00709 pattern,
00710 text_re_len);
00711
00712
00713 regcomp_result = pg_regcomp(regex,
00714 pattern,
00715 pattern_len,
00716 cflags,
00717 collation);
00718
00719 pfree(pattern);
00720
00721 if (regcomp_result != REG_OKAY)
00722 {
00723
00724 pg_regerror(regcomp_result, regex, errMsg, sizeof(errMsg));
00725 ereport(ERROR,
00726 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
00727 errmsg("invalid regular expression: %s", errMsg)));
00728 }
00729 }
00730
00731
00732
00733
00734
00735
00736
00737
00738
00739
00740 static void
00741 getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
00742 {
00743 int colorsCount = pg_reg_getnumcolors(regex);
00744 int i;
00745
00746 trgmNFA->ncolors = colorsCount;
00747 trgmNFA->colorInfo = (TrgmColorInfo *)
00748 palloc0(colorsCount * sizeof(TrgmColorInfo));
00749
00750
00751
00752
00753 for (i = 0; i < colorsCount; i++)
00754 {
00755 TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[i];
00756 int charsCount = pg_reg_getnumcharacters(regex, i);
00757 pg_wchar *chars;
00758 int j;
00759
00760 if (charsCount < 0 || charsCount > COLOR_COUNT_LIMIT)
00761 {
00762
00763 colorInfo->expandable = false;
00764 continue;
00765 }
00766
00767 colorInfo->expandable = true;
00768 colorInfo->containsNonWord = false;
00769 colorInfo->wordChars = (trgm_mb_char *)
00770 palloc(sizeof(trgm_mb_char) * charsCount);
00771 colorInfo->wordCharsCount = 0;
00772
00773
00774 chars = (pg_wchar *) palloc(sizeof(pg_wchar) * charsCount);
00775 pg_reg_getcharacters(regex, i, chars, charsCount);
00776
00777
00778
00779
00780
00781
00782
00783 for (j = 0; j < charsCount; j++)
00784 {
00785 trgm_mb_char c;
00786
00787 if (!convertPgWchar(chars[j], &c))
00788 continue;
00789 if (ISWORDCHR(c.bytes))
00790 colorInfo->wordChars[colorInfo->wordCharsCount++] = c;
00791 else
00792 colorInfo->containsNonWord = true;
00793 }
00794
00795 pfree(chars);
00796 }
00797 }
00798
00799
00800
00801
00802
00803 static bool
00804 convertPgWchar(pg_wchar c, trgm_mb_char *result)
00805 {
00806
00807 char s[MAX_MULTIBYTE_CHAR_LEN + 1];
00808
00809
00810
00811
00812
00813
00814 if (c == 0)
00815 return false;
00816
00817
00818 memset(s, 0, sizeof(s));
00819 pg_wchar2mb_with_len(&c, s, 1);
00820
00821
00822
00823
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834 #ifdef IGNORECASE
00835 {
00836 char *lowerCased = lowerstr(s);
00837
00838 if (strcmp(lowerCased, s) != 0)
00839 {
00840 pfree(lowerCased);
00841 return false;
00842 }
00843 pfree(lowerCased);
00844 }
00845 #endif
00846
00847
00848 strncpy(result->bytes, s, MAX_MULTIBYTE_CHAR_LEN);
00849 return true;
00850 }
00851
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869 static void
00870 transformGraph(TrgmNFA *trgmNFA)
00871 {
00872 HASHCTL hashCtl;
00873 TrgmStateKey initkey;
00874 TrgmState *initstate;
00875
00876
00877 trgmNFA->queue = NIL;
00878 trgmNFA->keysQueue = NIL;
00879 trgmNFA->arcsCount = 0;
00880 trgmNFA->overflowed = false;
00881
00882
00883 hashCtl.keysize = sizeof(TrgmStateKey);
00884 hashCtl.entrysize = sizeof(TrgmState);
00885 hashCtl.hcxt = CurrentMemoryContext;
00886 hashCtl.hash = tag_hash;
00887 trgmNFA->states = hash_create("Trigram NFA",
00888 1024,
00889 &hashCtl,
00890 HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION);
00891
00892
00893 MemSet(&initkey, 0, sizeof(initkey));
00894 initkey.prefix.colors[0] = COLOR_UNKNOWN;
00895 initkey.prefix.colors[1] = COLOR_UNKNOWN;
00896 initkey.nstate = pg_reg_getinitialstate(trgmNFA->regex);
00897
00898 initstate = getState(trgmNFA, &initkey);
00899 initstate->init = true;
00900 trgmNFA->initState = initstate;
00901
00902
00903
00904
00905
00906 while (trgmNFA->queue != NIL)
00907 {
00908 TrgmState *state = (TrgmState *) linitial(trgmNFA->queue);
00909
00910 trgmNFA->queue = list_delete_first(trgmNFA->queue);
00911
00912
00913
00914
00915
00916 if (trgmNFA->overflowed)
00917 state->fin = true;
00918 else
00919 processState(trgmNFA, state);
00920
00921
00922 if (trgmNFA->arcsCount > MAX_EXPANDED_ARCS ||
00923 hash_get_num_entries(trgmNFA->states) > MAX_EXPANDED_STATES)
00924 trgmNFA->overflowed = true;
00925 }
00926 }
00927
00928
00929
00930
00931 static void
00932 processState(TrgmNFA *trgmNFA, TrgmState *state)
00933 {
00934
00935 trgmNFA->keysQueue = NIL;
00936
00937
00938
00939
00940
00941 addKey(trgmNFA, state, &state->stateKey);
00942 while (trgmNFA->keysQueue != NIL && !state->fin)
00943 {
00944 TrgmStateKey *key = (TrgmStateKey *) linitial(trgmNFA->keysQueue);
00945
00946 trgmNFA->keysQueue = list_delete_first(trgmNFA->keysQueue);
00947 addKey(trgmNFA, state, key);
00948 }
00949
00950
00951
00952
00953
00954 if (!state->fin)
00955 addArcs(trgmNFA, state);
00956 }
00957
00958
00959
00960
00961
00962
00963
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973
00974 static void
00975 addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
00976 {
00977 regex_arc_t *arcs;
00978 TrgmStateKey destKey;
00979 ListCell *cell,
00980 *prev,
00981 *next;
00982 int i,
00983 arcsCount;
00984
00985
00986
00987
00988
00989 MemSet(&destKey, 0, sizeof(destKey));
00990
00991
00992
00993
00994
00995
00996 prev = NULL;
00997 cell = list_head(state->enterKeys);
00998 while (cell)
00999 {
01000 TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
01001
01002 next = lnext(cell);
01003 if (existingKey->nstate == key->nstate)
01004 {
01005 if (prefixContains(&existingKey->prefix, &key->prefix))
01006 {
01007
01008 return;
01009 }
01010 if (prefixContains(&key->prefix, &existingKey->prefix))
01011 {
01012
01013
01014
01015
01016 state->enterKeys = list_delete_cell(state->enterKeys,
01017 cell, prev);
01018 }
01019 else
01020 prev = cell;
01021 }
01022 else
01023 prev = cell;
01024 cell = next;
01025 }
01026
01027
01028 state->enterKeys = lappend(state->enterKeys, key);
01029
01030
01031 if (key->nstate == pg_reg_getfinalstate(trgmNFA->regex))
01032 {
01033 state->fin = true;
01034 return;
01035 }
01036
01037
01038
01039
01040
01041 arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
01042 arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
01043 pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
01044
01045 for (i = 0; i < arcsCount; i++)
01046 {
01047 regex_arc_t *arc = &arcs[i];
01048
01049 if (pg_reg_colorisbegin(trgmNFA->regex, arc->co))
01050 {
01051
01052
01053
01054
01055
01056
01057 destKey.prefix.colors[0] = COLOR_BLANK;
01058 destKey.prefix.colors[1] = COLOR_BLANK;
01059 destKey.nstate = arc->to;
01060
01061
01062 addKeyToQueue(trgmNFA, &destKey);
01063 }
01064 else if (pg_reg_colorisend(trgmNFA->regex, arc->co))
01065 {
01066
01067
01068
01069
01070
01071
01072 destKey.prefix.colors[0] = COLOR_UNKNOWN;
01073 destKey.prefix.colors[1] = COLOR_UNKNOWN;
01074 destKey.nstate = arc->to;
01075
01076
01077 addKeyToQueue(trgmNFA, &destKey);
01078 }
01079 else
01080 {
01081
01082 TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];
01083
01084 if (colorInfo->expandable)
01085 {
01086 if (colorInfo->containsNonWord &&
01087 !validArcLabel(key, COLOR_BLANK))
01088 {
01089
01090
01091
01092
01093
01094
01095
01096
01097
01098
01099 destKey.prefix.colors[0] = COLOR_BLANK;
01100 destKey.prefix.colors[1] = COLOR_BLANK;
01101 destKey.nstate = arc->to;
01102 addKeyToQueue(trgmNFA, &destKey);
01103 }
01104
01105 if (colorInfo->wordCharsCount > 0 &&
01106 !validArcLabel(key, arc->co))
01107 {
01108
01109
01110
01111
01112
01113
01114
01115
01116 destKey.prefix.colors[0] = key->prefix.colors[1];
01117 destKey.prefix.colors[1] = arc->co;
01118 destKey.nstate = arc->to;
01119 addKeyToQueue(trgmNFA, &destKey);
01120 }
01121 }
01122 else
01123 {
01124
01125
01126
01127
01128
01129
01130
01131 destKey.prefix.colors[0] = COLOR_UNKNOWN;
01132 destKey.prefix.colors[1] = COLOR_UNKNOWN;
01133 destKey.nstate = arc->to;
01134 addKeyToQueue(trgmNFA, &destKey);
01135 }
01136 }
01137 }
01138
01139 pfree(arcs);
01140 }
01141
01142
01143
01144
01145 static void
01146 addKeyToQueue(TrgmNFA *trgmNFA, TrgmStateKey *key)
01147 {
01148 TrgmStateKey *keyCopy = (TrgmStateKey *) palloc(sizeof(TrgmStateKey));
01149
01150 memcpy(keyCopy, key, sizeof(TrgmStateKey));
01151 trgmNFA->keysQueue = lappend(trgmNFA->keysQueue, keyCopy);
01152 }
01153
01154
01155
01156
01157 static void
01158 addArcs(TrgmNFA *trgmNFA, TrgmState *state)
01159 {
01160 TrgmStateKey destKey;
01161 ListCell *cell;
01162 regex_arc_t *arcs;
01163 int arcsCount,
01164 i;
01165
01166
01167
01168
01169
01170 MemSet(&destKey, 0, sizeof(destKey));
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181 foreach(cell, state->enterKeys)
01182 {
01183 TrgmStateKey *key = (TrgmStateKey *) lfirst(cell);
01184
01185 arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
01186 arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
01187 pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
01188
01189 for (i = 0; i < arcsCount; i++)
01190 {
01191 regex_arc_t *arc = &arcs[i];
01192 TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];
01193
01194
01195
01196
01197
01198
01199
01200
01201
01202 if (!colorInfo->expandable)
01203 continue;
01204
01205 if (colorInfo->containsNonWord)
01206 {
01207
01208
01209
01210
01211
01212
01213
01214 destKey.prefix.colors[0] = key->prefix.colors[1];
01215 destKey.prefix.colors[1] = COLOR_BLANK;
01216 destKey.nstate = arc->to;
01217
01218 addArc(trgmNFA, state, key, COLOR_BLANK, &destKey);
01219 }
01220
01221 if (colorInfo->wordCharsCount > 0)
01222 {
01223
01224
01225
01226
01227
01228
01229 destKey.prefix.colors[0] = key->prefix.colors[1];
01230 destKey.prefix.colors[1] = arc->co;
01231 destKey.nstate = arc->to;
01232
01233 addArc(trgmNFA, state, key, arc->co, &destKey);
01234 }
01235 }
01236
01237 pfree(arcs);
01238 }
01239 }
01240
01241
01242
01243
01244
01245
01246
01247
01248
01249 static void
01250 addArc(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key,
01251 TrgmColor co, TrgmStateKey *destKey)
01252 {
01253 TrgmArc *arc;
01254 ListCell *cell;
01255
01256
01257 if (!validArcLabel(key, co))
01258 return;
01259
01260
01261
01262
01263
01264
01265
01266 foreach(cell, state->enterKeys)
01267 {
01268 TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
01269
01270 if (existingKey->nstate == destKey->nstate &&
01271 prefixContains(&existingKey->prefix, &destKey->prefix))
01272 return;
01273 }
01274
01275
01276 arc = (TrgmArc *) palloc(sizeof(TrgmArc));
01277 arc->target = getState(trgmNFA, destKey);
01278 arc->ctrgm.colors[0] = key->prefix.colors[0];
01279 arc->ctrgm.colors[1] = key->prefix.colors[1];
01280 arc->ctrgm.colors[2] = co;
01281
01282 state->arcs = lappend(state->arcs, arc);
01283 trgmNFA->arcsCount++;
01284 }
01285
01286
01287
01288
01289
01290
01291 static bool
01292 validArcLabel(TrgmStateKey *key, TrgmColor co)
01293 {
01294
01295
01296
01297
01298 if (key->prefix.colors[0] == COLOR_UNKNOWN)
01299 return false;
01300
01301
01302 Assert(key->prefix.colors[1] != COLOR_UNKNOWN);
01303
01304 Assert(co != COLOR_UNKNOWN);
01305
01306
01307
01308
01309
01310 if (key->prefix.colors[0] == COLOR_BLANK &&
01311 key->prefix.colors[1] == COLOR_BLANK &&
01312 co == COLOR_BLANK)
01313 return false;
01314
01315
01316
01317
01318
01319
01320
01321
01322
01323
01324 if (key->prefix.colors[0] != COLOR_BLANK &&
01325 key->prefix.colors[1] == COLOR_BLANK)
01326 return false;
01327
01328
01329
01330
01331
01332
01333
01334
01335
01336
01337
01338
01339 return true;
01340 }
01341
01342
01343
01344
01345
01346 static TrgmState *
01347 getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
01348 {
01349 TrgmState *state;
01350 bool found;
01351
01352 state = (TrgmState *) hash_search(trgmNFA->states, key, HASH_ENTER,
01353 &found);
01354 if (!found)
01355 {
01356
01357 state->arcs = NIL;
01358 state->enterKeys = NIL;
01359 state->init = false;
01360 state->fin = false;
01361 state->parent = NULL;
01362 state->children = NIL;
01363 state->number = -1;
01364
01365 trgmNFA->queue = lappend(trgmNFA->queue, state);
01366 }
01367 return state;
01368 }
01369
01370
01371
01372
01373
01374
01375
01376 static bool
01377 prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2)
01378 {
01379 if (prefix1->colors[1] == COLOR_UNKNOWN)
01380 {
01381
01382 return true;
01383 }
01384 else if (prefix1->colors[0] == COLOR_UNKNOWN)
01385 {
01386
01387
01388
01389
01390 if (prefix1->colors[1] == prefix2->colors[1])
01391 return true;
01392 else
01393 return false;
01394 }
01395 else
01396 {
01397
01398 if (prefix1->colors[0] == prefix2->colors[0] &&
01399 prefix1->colors[1] == prefix2->colors[1])
01400 return true;
01401 else
01402 return false;
01403 }
01404 }
01405
01406
01407
01408
01409
01410
01411
01412
01413
01414
01415
01416
01417
01418 static bool
01419 selectColorTrigrams(TrgmNFA *trgmNFA)
01420 {
01421 HASH_SEQ_STATUS scan_status;
01422 int arcsCount = trgmNFA->arcsCount,
01423 i;
01424 TrgmState *state;
01425 ColorTrgmInfo *colorTrgms;
01426 int64 totalTrgmCount;
01427 int number;
01428
01429
01430 colorTrgms = (ColorTrgmInfo *) palloc(sizeof(ColorTrgmInfo) * arcsCount);
01431 trgmNFA->colorTrgms = colorTrgms;
01432
01433 i = 0;
01434 hash_seq_init(&scan_status, trgmNFA->states);
01435 while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
01436 {
01437 ListCell *cell;
01438
01439 foreach(cell, state->arcs)
01440 {
01441 TrgmArc *arc = (TrgmArc *) lfirst(cell);
01442 TrgmArcInfo *arcInfo = (TrgmArcInfo *) palloc(sizeof(TrgmArcInfo));
01443
01444 arcInfo->source = state;
01445 arcInfo->target = arc->target;
01446 colorTrgms[i].arcs = list_make1(arcInfo);
01447 colorTrgms[i].expanded = true;
01448 colorTrgms[i].ctrgm = arc->ctrgm;
01449 i++;
01450 }
01451 }
01452 Assert(i == arcsCount);
01453
01454
01455 if (arcsCount >= 2)
01456 {
01457 ColorTrgmInfo *p1,
01458 *p2;
01459
01460
01461 qsort(colorTrgms, arcsCount, sizeof(ColorTrgmInfo), colorTrgmInfoCmp);
01462
01463
01464 p2 = colorTrgms;
01465 for (p1 = colorTrgms + 1; p1 < colorTrgms + arcsCount; p1++)
01466 {
01467 if (colorTrgmInfoCmp(p1, p2) > 0)
01468 {
01469 p2++;
01470 *p2 = *p1;
01471 }
01472 else
01473 {
01474 p2->arcs = list_concat(p2->arcs, p1->arcs);
01475 }
01476 }
01477 trgmNFA->colorTrgmsCount = (p2 - colorTrgms) + 1;
01478 }
01479 else
01480 {
01481 trgmNFA->colorTrgmsCount = arcsCount;
01482 }
01483
01484
01485
01486
01487
01488
01489
01490
01491
01492 totalTrgmCount = 0;
01493 for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
01494 {
01495 ColorTrgmInfo *trgmInfo = &colorTrgms[i];
01496 int j,
01497 count = 1;
01498
01499 for (j = 0; j < 3; j++)
01500 {
01501 TrgmColor c = trgmInfo->ctrgm.colors[j];
01502
01503 if (c != COLOR_BLANK)
01504 count *= trgmNFA->colorInfo[c].wordCharsCount;
01505 }
01506 trgmInfo->count = count;
01507 totalTrgmCount += count;
01508 }
01509
01510
01511 qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
01512 colorTrgmInfoCountCmp);
01513
01514
01515
01516
01517
01518
01519
01520
01521
01522
01523
01524 for (i = 0;
01525 (i < trgmNFA->colorTrgmsCount) && (totalTrgmCount > MAX_TRGM_COUNT);
01526 i++)
01527 {
01528 ColorTrgmInfo *trgmInfo = &colorTrgms[i];
01529 bool canRemove = true;
01530 ListCell *cell;
01531
01532
01533
01534
01535
01536 foreach(cell, trgmInfo->arcs)
01537 {
01538 TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
01539 TrgmState *source = arcInfo->source,
01540 *target = arcInfo->target;
01541
01542
01543 while (source->parent)
01544 source = source->parent;
01545 while (target->parent)
01546 target = target->parent;
01547
01548 if ((source->init || target->init) &&
01549 (source->fin || target->fin))
01550 {
01551 canRemove = false;
01552 break;
01553 }
01554 }
01555 if (!canRemove)
01556 continue;
01557
01558
01559 foreach(cell, trgmInfo->arcs)
01560 {
01561 TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
01562 TrgmState *source = arcInfo->source,
01563 *target = arcInfo->target;
01564
01565 while (source->parent)
01566 source = source->parent;
01567 while (target->parent)
01568 target = target->parent;
01569 if (source != target)
01570 mergeStates(source, target);
01571 }
01572
01573
01574 trgmInfo->expanded = false;
01575 totalTrgmCount -= trgmInfo->count;
01576 }
01577
01578
01579 if (totalTrgmCount > MAX_TRGM_COUNT)
01580 return false;
01581
01582 trgmNFA->totalTrgmCount = (int) totalTrgmCount;
01583
01584
01585
01586
01587
01588 number = 0;
01589 qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
01590 colorTrgmInfoCmp);
01591 for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
01592 {
01593 if (colorTrgms[i].expanded)
01594 {
01595 colorTrgms[i].number = number;
01596 number++;
01597 }
01598 }
01599
01600 return true;
01601 }
01602
01603
01604
01605
01606
01607
01608
01609 static TRGM *
01610 expandColorTrigrams(TrgmNFA *trgmNFA, MemoryContext rcontext)
01611 {
01612 TRGM *trg;
01613 trgm *p;
01614 int i;
01615 TrgmColorInfo blankColor;
01616 trgm_mb_char blankChar;
01617
01618
01619 memset(blankChar.bytes, 0, sizeof(blankChar.bytes));
01620 blankColor.wordCharsCount = 1;
01621 blankColor.wordChars = &blankChar;
01622
01623
01624 trg = (TRGM *)
01625 MemoryContextAllocZero(rcontext,
01626 TRGMHDRSIZE +
01627 trgmNFA->totalTrgmCount * sizeof(trgm));
01628 trg->flag = ARRKEY;
01629 SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, trgmNFA->totalTrgmCount));
01630 p = GETARR(trg);
01631 for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
01632 {
01633 ColorTrgmInfo *colorTrgm = &trgmNFA->colorTrgms[i];
01634 TrgmColorInfo *c[3];
01635 trgm_mb_char s[3];
01636 int j,
01637 i1,
01638 i2,
01639 i3;
01640
01641
01642 if (!colorTrgm->expanded)
01643 continue;
01644
01645
01646 for (j = 0; j < 3; j++)
01647 {
01648 if (colorTrgm->ctrgm.colors[j] != COLOR_BLANK)
01649 c[j] = &trgmNFA->colorInfo[colorTrgm->ctrgm.colors[j]];
01650 else
01651 c[j] = &blankColor;
01652 }
01653
01654
01655 for (i1 = 0; i1 < c[0]->wordCharsCount; i1++)
01656 {
01657 s[0] = c[0]->wordChars[i1];
01658 for (i2 = 0; i2 < c[1]->wordCharsCount; i2++)
01659 {
01660 s[1] = c[1]->wordChars[i2];
01661 for (i3 = 0; i3 < c[2]->wordCharsCount; i3++)
01662 {
01663 s[2] = c[2]->wordChars[i3];
01664 fillTrgm(p, s);
01665 p++;
01666 }
01667 }
01668 }
01669 }
01670
01671 return trg;
01672 }
01673
01674
01675
01676
01677 static void
01678 fillTrgm(trgm *ptrgm, trgm_mb_char s[3])
01679 {
01680 char str[3 * MAX_MULTIBYTE_CHAR_LEN],
01681 *p;
01682 int i,
01683 j;
01684
01685
01686 p = str;
01687
01688 for (i = 0; i < 3; i++)
01689 {
01690 if (s[i].bytes[0] != 0)
01691 {
01692 for (j = 0; j < MAX_MULTIBYTE_CHAR_LEN && s[i].bytes[j]; j++)
01693 *p++ = s[i].bytes[j];
01694 }
01695 else
01696 {
01697
01698 *p++ = ' ';
01699 }
01700 }
01701
01702
01703 compact_trigram(ptrgm, str, p - str);
01704 }
01705
01706
01707
01708
01709 static void
01710 mergeStates(TrgmState *state1, TrgmState *state2)
01711 {
01712 ListCell *cell;
01713
01714 Assert(state1 != state2);
01715 Assert(!state1->parent);
01716 Assert(!state2->parent);
01717
01718
01719 state1->init |= state2->init;
01720 state1->fin |= state2->fin;
01721
01722
01723 foreach(cell, state2->children)
01724 {
01725 TrgmState *state = (TrgmState *) lfirst(cell);
01726
01727 state->parent = state1;
01728 }
01729 state2->parent = state1;
01730 state1->children = list_concat(state1->children, state2->children);
01731 state1->children = lappend(state1->children, state2);
01732 state2->children = NIL;
01733 }
01734
01735
01736
01737
01738 static int
01739 colorTrgmInfoCmp(const void *p1, const void *p2)
01740 {
01741 const ColorTrgmInfo *c1 = (const ColorTrgmInfo *) p1;
01742 const ColorTrgmInfo *c2 = (const ColorTrgmInfo *) p2;
01743
01744 return memcmp(&c1->ctrgm, &c2->ctrgm, sizeof(ColorTrgm));
01745 }
01746
01747
01748
01749
01750
01751 static int
01752 colorTrgmInfoCountCmp(const void *p1, const void *p2)
01753 {
01754 const ColorTrgmInfo *c1 = (const ColorTrgmInfo *) p1;
01755 const ColorTrgmInfo *c2 = (const ColorTrgmInfo *) p2;
01756
01757 if (c1->count < c2->count)
01758 return 1;
01759 else if (c1->count == c2->count)
01760 return 0;
01761 else
01762 return -1;
01763 }
01764
01765
01766
01767
01768
01769
01770
01771
01772
01773
01774
01775
01776 static TrgmPackedGraph *
01777 packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
01778 {
01779 int number = 2,
01780 arcIndex,
01781 arcsCount;
01782 HASH_SEQ_STATUS scan_status;
01783 TrgmState *state;
01784 TrgmPackArcInfo *arcs,
01785 *p1,
01786 *p2;
01787 TrgmPackedArc *packedArcs;
01788 TrgmPackedGraph *result;
01789 int i,
01790 j;
01791
01792
01793 hash_seq_init(&scan_status, trgmNFA->states);
01794 while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
01795 {
01796 while (state->parent)
01797 state = state->parent;
01798
01799 if (state->number < 0)
01800 {
01801 if (state->init)
01802 state->number = 0;
01803 else if (state->fin)
01804 state->number = 1;
01805 else
01806 {
01807 state->number = number;
01808 number++;
01809 }
01810 }
01811 }
01812
01813
01814 arcs = (TrgmPackArcInfo *)
01815 palloc(sizeof(TrgmPackArcInfo) * trgmNFA->arcsCount);
01816 arcIndex = 0;
01817 hash_seq_init(&scan_status, trgmNFA->states);
01818 while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
01819 {
01820 TrgmState *source = state;
01821 ListCell *cell;
01822
01823 while (source->parent)
01824 source = source->parent;
01825
01826 foreach(cell, state->arcs)
01827 {
01828 TrgmArc *arc = (TrgmArc *) lfirst(cell);
01829 TrgmState *target = arc->target;
01830
01831 while (target->parent)
01832 target = target->parent;
01833
01834 if (source->number != target->number)
01835 {
01836 ColorTrgmInfo *ctrgm;
01837
01838 ctrgm = (ColorTrgmInfo *) bsearch(&arc->ctrgm,
01839 trgmNFA->colorTrgms,
01840 trgmNFA->colorTrgmsCount,
01841 sizeof(ColorTrgmInfo),
01842 colorTrgmInfoCmp);
01843 Assert(ctrgm != NULL);
01844 Assert(ctrgm->expanded);
01845
01846 arcs[arcIndex].sourceState = source->number;
01847 arcs[arcIndex].targetState = target->number;
01848 arcs[arcIndex].colorTrgm = ctrgm->number;
01849 arcIndex++;
01850 }
01851 }
01852 }
01853
01854
01855 qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp);
01856
01857
01858
01859 p2 = arcs;
01860 for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
01861 {
01862 if (packArcInfoCmp(p1, p2) > 0)
01863 {
01864 p2++;
01865 *p2 = *p1;
01866 }
01867 }
01868 arcsCount = (p2 - arcs) + 1;
01869
01870
01871 result = (TrgmPackedGraph *)
01872 MemoryContextAlloc(rcontext, sizeof(TrgmPackedGraph));
01873
01874
01875 result->colorTrigramsCount = 0;
01876 for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
01877 {
01878 if (trgmNFA->colorTrgms[i].expanded)
01879 result->colorTrigramsCount++;
01880 }
01881 result->colorTrigramGroups = (int *)
01882 MemoryContextAlloc(rcontext, sizeof(int) * result->colorTrigramsCount);
01883 j = 0;
01884 for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
01885 {
01886 if (trgmNFA->colorTrgms[i].expanded)
01887 {
01888 result->colorTrigramGroups[j] = trgmNFA->colorTrgms[i].count;
01889 j++;
01890 }
01891 }
01892
01893
01894 result->statesCount = number;
01895 result->states = (TrgmPackedState *)
01896 MemoryContextAlloc(rcontext, number * sizeof(TrgmPackedState));
01897 packedArcs = (TrgmPackedArc *)
01898 MemoryContextAlloc(rcontext, arcsCount * sizeof(TrgmPackedArc));
01899 j = 0;
01900 for (i = 0; i < number; i++)
01901 {
01902 int cnt = 0;
01903
01904 result->states[i].arcs = &packedArcs[j];
01905 while (j < arcsCount && arcs[j].sourceState == i)
01906 {
01907 packedArcs[j].targetState = arcs[j].targetState;
01908 packedArcs[j].colorTrgm = arcs[j].colorTrgm;
01909 cnt++;
01910 j++;
01911 }
01912 result->states[i].arcsCount = cnt;
01913 }
01914
01915
01916 result->colorTrigramsActive = (bool *)
01917 MemoryContextAlloc(rcontext, sizeof(bool) * result->colorTrigramsCount);
01918 result->statesActive = (bool *)
01919 MemoryContextAlloc(rcontext, sizeof(bool) * result->statesCount);
01920 result->statesQueue = (int *)
01921 MemoryContextAlloc(rcontext, sizeof(int) * result->statesCount);
01922
01923 return result;
01924 }
01925
01926
01927
01928
01929
01930
01931 static int
01932 packArcInfoCmp(const void *a1, const void *a2)
01933 {
01934 const TrgmPackArcInfo *p1 = (const TrgmPackArcInfo *) a1;
01935 const TrgmPackArcInfo *p2 = (const TrgmPackArcInfo *) a2;
01936
01937 if (p1->sourceState < p2->sourceState)
01938 return -1;
01939 if (p1->sourceState > p2->sourceState)
01940 return 1;
01941 if (p1->colorTrgm < p2->colorTrgm)
01942 return -1;
01943 if (p1->colorTrgm > p2->colorTrgm)
01944 return 1;
01945 if (p1->targetState < p2->targetState)
01946 return -1;
01947 if (p1->targetState > p2->targetState)
01948 return 1;
01949 return 0;
01950 }
01951
01952
01953
01954
01955
01956
01957
01958
01959
01960 #ifdef TRGM_REGEXP_DEBUG
01961
01962
01963
01964
01965 static void
01966 printSourceNFA(regex_t *regex, TrgmColorInfo *colors, int ncolors)
01967 {
01968 StringInfoData buf;
01969 int nstates = pg_reg_getnumstates(regex);
01970 int state;
01971 int i;
01972
01973 initStringInfo(&buf);
01974
01975 appendStringInfo(&buf, "\ndigraph sourceNFA {\n");
01976
01977 for (state = 0; state < nstates; state++)
01978 {
01979 regex_arc_t *arcs;
01980 int i,
01981 arcsCount;
01982
01983 appendStringInfo(&buf, "s%d", state);
01984 if (pg_reg_getfinalstate(regex) == state)
01985 appendStringInfo(&buf, " [shape = doublecircle]");
01986 appendStringInfo(&buf, ";\n");
01987
01988 arcsCount = pg_reg_getnumoutarcs(regex, state);
01989 arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
01990 pg_reg_getoutarcs(regex, state, arcs, arcsCount);
01991
01992 for (i = 0; i < arcsCount; i++)
01993 {
01994 appendStringInfo(&buf, " s%d -> s%d [label = \"%d\"];\n",
01995 state, arcs[i].to, arcs[i].co);
01996 }
01997
01998 pfree(arcs);
01999 }
02000
02001 appendStringInfo(&buf, " node [shape = point ]; initial;\n");
02002 appendStringInfo(&buf, " initial -> s%d;\n",
02003 pg_reg_getinitialstate(regex));
02004
02005
02006 appendStringInfo(&buf, " { rank = sink;\n");
02007 appendStringInfo(&buf, " Colors [shape = none, margin=0, label=<\n");
02008
02009 for (i = 0; i < ncolors; i++)
02010 {
02011 TrgmColorInfo *color = &colors[i];
02012 int j;
02013
02014 appendStringInfo(&buf, "<br/>Color %d: ", i);
02015 if (color->expandable)
02016 {
02017 for (j = 0; j < color->wordCharsCount; j++)
02018 {
02019 char s[MAX_MULTIBYTE_CHAR_LEN + 1];
02020
02021 memcpy(s, color->wordChars[j].bytes, MAX_MULTIBYTE_CHAR_LEN);
02022 s[MAX_MULTIBYTE_CHAR_LEN] = '\0';
02023 appendStringInfo(&buf, "%s", s);
02024 }
02025 }
02026 else
02027 appendStringInfo(&buf, "not expandable");
02028 appendStringInfo(&buf, "\n");
02029 }
02030
02031 appendStringInfo(&buf, " >];\n");
02032 appendStringInfo(&buf, " }\n");
02033 appendStringInfo(&buf, "}\n");
02034
02035 {
02036
02037 FILE *fp = fopen("/tmp/source.dot", "w");
02038
02039 fprintf(fp, "%s", buf.data);
02040 fclose(fp);
02041 }
02042
02043 pfree(buf.data);
02044 }
02045
02046
02047
02048
02049 static void
02050 printTrgmNFA(TrgmNFA *trgmNFA)
02051 {
02052 StringInfoData buf;
02053 HASH_SEQ_STATUS scan_status;
02054 TrgmState *state;
02055 TrgmState *initstate = NULL;
02056
02057 initStringInfo(&buf);
02058
02059 appendStringInfo(&buf, "\ndigraph transformedNFA {\n");
02060
02061 hash_seq_init(&scan_status, trgmNFA->states);
02062 while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
02063 {
02064 ListCell *cell;
02065
02066 appendStringInfo(&buf, "s%p", (void *) state);
02067 if (state->fin)
02068 appendStringInfo(&buf, " [shape = doublecircle]");
02069 if (state->init)
02070 initstate = state;
02071 appendStringInfo(&buf, " [label = \"%d\"]", state->stateKey.nstate);
02072 appendStringInfo(&buf, ";\n");
02073
02074 foreach(cell, state->arcs)
02075 {
02076 TrgmArc *arc = (TrgmArc *) lfirst(cell);
02077
02078 appendStringInfo(&buf, " s%p -> s%p [label = \"",
02079 (void *) state, (void *) arc->target);
02080 printTrgmColor(&buf, arc->ctrgm.colors[0]);
02081 appendStringInfo(&buf, " ");
02082 printTrgmColor(&buf, arc->ctrgm.colors[1]);
02083 appendStringInfo(&buf, " ");
02084 printTrgmColor(&buf, arc->ctrgm.colors[2]);
02085 appendStringInfo(&buf, "\"];\n");
02086 }
02087 }
02088
02089 if (initstate)
02090 {
02091 appendStringInfo(&buf, " node [shape = point ]; initial;\n");
02092 appendStringInfo(&buf, " initial -> s%p;\n", (void *) initstate);
02093 }
02094
02095 appendStringInfo(&buf, "}\n");
02096
02097 {
02098
02099 FILE *fp = fopen("/tmp/transformed.dot", "w");
02100
02101 fprintf(fp, "%s", buf.data);
02102 fclose(fp);
02103 }
02104
02105 pfree(buf.data);
02106 }
02107
02108
02109
02110
02111 static void
02112 printTrgmColor(StringInfo buf, TrgmColor co)
02113 {
02114 if (co == COLOR_UNKNOWN)
02115 appendStringInfo(buf, "u");
02116 else if (co == COLOR_BLANK)
02117 appendStringInfo(buf, "b");
02118 else
02119 appendStringInfo(buf, "%d", (int) co);
02120 }
02121
02122
02123
02124
02125 static void
02126 printTrgmPackedGraph(TrgmPackedGraph *packedGraph, TRGM *trigrams)
02127 {
02128 StringInfoData buf;
02129 trgm *p;
02130 int i;
02131
02132 initStringInfo(&buf);
02133
02134 appendStringInfo(&buf, "\ndigraph packedGraph {\n");
02135
02136 for (i = 0; i < packedGraph->statesCount; i++)
02137 {
02138 TrgmPackedState *state = &packedGraph->states[i];
02139 int j;
02140
02141 appendStringInfo(&buf, " s%d", i);
02142 if (i == 1)
02143 appendStringInfo(&buf, " [shape = doublecircle]");
02144
02145 appendStringInfo(&buf, " [label = <s%d>];\n", i);
02146
02147 for (j = 0; j < state->arcsCount; j++)
02148 {
02149 TrgmPackedArc *arc = &state->arcs[j];
02150
02151 appendStringInfo(&buf, " s%d -> s%d [label = \"trigram %d\"];\n",
02152 i, arc->targetState, arc->colorTrgm);
02153 }
02154 }
02155
02156 appendStringInfo(&buf, " node [shape = point ]; initial;\n");
02157 appendStringInfo(&buf, " initial -> s%d;\n", 0);
02158
02159
02160 appendStringInfo(&buf, " { rank = sink;\n");
02161 appendStringInfo(&buf, " Trigrams [shape = none, margin=0, label=<\n");
02162
02163 p = GETARR(trigrams);
02164 for (i = 0; i < packedGraph->colorTrigramsCount; i++)
02165 {
02166 int count = packedGraph->colorTrigramGroups[i];
02167 int j;
02168
02169 appendStringInfo(&buf, "<br/>Trigram %d: ", i);
02170
02171 for (j = 0; j < count; j++)
02172 {
02173 if (j > 0)
02174 appendStringInfo(&buf, ", ");
02175
02176
02177
02178
02179 appendStringInfo(&buf, "\"%c%c%c\"", (*p)[0], (*p)[1], (*p)[2]);
02180 p++;
02181 }
02182 }
02183
02184 appendStringInfo(&buf, " >];\n");
02185 appendStringInfo(&buf, " }\n");
02186 appendStringInfo(&buf, "}\n");
02187
02188 {
02189
02190 FILE *fp = fopen("/tmp/packed.dot", "w");
02191
02192 fprintf(fp, "%s", buf.data);
02193 fclose(fp);
02194 }
02195
02196 pfree(buf.data);
02197 }
02198
02199 #endif