00001 /*------------------------------------------------------------------------- 00002 * 00003 * test_parser.c 00004 * Simple example of a text search parser 00005 * 00006 * Copyright (c) 2007-2013, PostgreSQL Global Development Group 00007 * 00008 * IDENTIFICATION 00009 * contrib/test_parser/test_parser.c 00010 * 00011 *------------------------------------------------------------------------- 00012 */ 00013 #include "postgres.h" 00014 00015 #include "fmgr.h" 00016 00017 PG_MODULE_MAGIC; 00018 00019 00020 /* 00021 * types 00022 */ 00023 00024 /* self-defined type */ 00025 typedef struct 00026 { 00027 char *buffer; /* text to parse */ 00028 int len; /* length of the text in buffer */ 00029 int pos; /* position of the parser */ 00030 } ParserState; 00031 00032 /* copy-paste from wparser.h of tsearch2 */ 00033 typedef struct 00034 { 00035 int lexid; 00036 char *alias; 00037 char *descr; 00038 } LexDescr; 00039 00040 /* 00041 * prototypes 00042 */ 00043 PG_FUNCTION_INFO_V1(testprs_start); 00044 Datum testprs_start(PG_FUNCTION_ARGS); 00045 00046 PG_FUNCTION_INFO_V1(testprs_getlexeme); 00047 Datum testprs_getlexeme(PG_FUNCTION_ARGS); 00048 00049 PG_FUNCTION_INFO_V1(testprs_end); 00050 Datum testprs_end(PG_FUNCTION_ARGS); 00051 00052 PG_FUNCTION_INFO_V1(testprs_lextype); 00053 Datum testprs_lextype(PG_FUNCTION_ARGS); 00054 00055 /* 00056 * functions 00057 */ 00058 Datum 00059 testprs_start(PG_FUNCTION_ARGS) 00060 { 00061 ParserState *pst = (ParserState *) palloc0(sizeof(ParserState)); 00062 00063 pst->buffer = (char *) PG_GETARG_POINTER(0); 00064 pst->len = PG_GETARG_INT32(1); 00065 pst->pos = 0; 00066 00067 PG_RETURN_POINTER(pst); 00068 } 00069 00070 Datum 00071 testprs_getlexeme(PG_FUNCTION_ARGS) 00072 { 00073 ParserState *pst = (ParserState *) PG_GETARG_POINTER(0); 00074 char **t = (char **) PG_GETARG_POINTER(1); 00075 int *tlen = (int *) PG_GETARG_POINTER(2); 00076 int startpos = pst->pos; 00077 int type; 00078 00079 *t = pst->buffer + pst->pos; 00080 00081 if (pst->pos < pst->len && 00082 (pst->buffer)[pst->pos] == ' ') 00083 { 00084 /* blank type */ 00085 type = 12; 00086 /* go to the next non-space character */ 00087 while (pst->pos < pst->len && 00088 (pst->buffer)[pst->pos] == ' ') 00089 (pst->pos)++; 00090 } 00091 else 00092 { 00093 /* word type */ 00094 type = 3; 00095 /* go to the next space character */ 00096 while (pst->pos < pst->len && 00097 (pst->buffer)[pst->pos] != ' ') 00098 (pst->pos)++; 00099 } 00100 00101 *tlen = pst->pos - startpos; 00102 00103 /* we are finished if (*tlen == 0) */ 00104 if (*tlen == 0) 00105 type = 0; 00106 00107 PG_RETURN_INT32(type); 00108 } 00109 00110 Datum 00111 testprs_end(PG_FUNCTION_ARGS) 00112 { 00113 ParserState *pst = (ParserState *) PG_GETARG_POINTER(0); 00114 00115 pfree(pst); 00116 PG_RETURN_VOID(); 00117 } 00118 00119 Datum 00120 testprs_lextype(PG_FUNCTION_ARGS) 00121 { 00122 /* 00123 * Remarks: - we have to return the blanks for headline reason - we use 00124 * the same lexids like Teodor in the default word parser; in this way we 00125 * can reuse the headline function of the default word parser. 00126 */ 00127 LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2 + 1)); 00128 00129 /* there are only two types in this parser */ 00130 descr[0].lexid = 3; 00131 descr[0].alias = pstrdup("word"); 00132 descr[0].descr = pstrdup("Word"); 00133 descr[1].lexid = 12; 00134 descr[1].alias = pstrdup("blank"); 00135 descr[1].descr = pstrdup("Space symbols"); 00136 descr[2].lexid = 0; 00137 00138 PG_RETURN_POINTER(descr); 00139 }