Header And Logo

PostgreSQL
| The world's most advanced open source database.

test_parser.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * test_parser.c
00004  *    Simple example of a text search parser
00005  *
00006  * Copyright (c) 2007-2013, PostgreSQL Global Development Group
00007  *
00008  * IDENTIFICATION
00009  *    contrib/test_parser/test_parser.c
00010  *
00011  *-------------------------------------------------------------------------
00012  */
00013 #include "postgres.h"
00014 
00015 #include "fmgr.h"
00016 
00017 PG_MODULE_MAGIC;
00018 
00019 
00020 /*
00021  * types
00022  */
00023 
00024 /* self-defined type */
00025 typedef struct
00026 {
00027     char       *buffer;         /* text to parse */
00028     int         len;            /* length of the text in buffer */
00029     int         pos;            /* position of the parser */
00030 } ParserState;
00031 
00032 /* copy-paste from wparser.h of tsearch2 */
00033 typedef struct
00034 {
00035     int         lexid;
00036     char       *alias;
00037     char       *descr;
00038 } LexDescr;
00039 
00040 /*
00041  * prototypes
00042  */
00043 PG_FUNCTION_INFO_V1(testprs_start);
00044 Datum       testprs_start(PG_FUNCTION_ARGS);
00045 
00046 PG_FUNCTION_INFO_V1(testprs_getlexeme);
00047 Datum       testprs_getlexeme(PG_FUNCTION_ARGS);
00048 
00049 PG_FUNCTION_INFO_V1(testprs_end);
00050 Datum       testprs_end(PG_FUNCTION_ARGS);
00051 
00052 PG_FUNCTION_INFO_V1(testprs_lextype);
00053 Datum       testprs_lextype(PG_FUNCTION_ARGS);
00054 
00055 /*
00056  * functions
00057  */
00058 Datum
00059 testprs_start(PG_FUNCTION_ARGS)
00060 {
00061     ParserState *pst = (ParserState *) palloc0(sizeof(ParserState));
00062 
00063     pst->buffer = (char *) PG_GETARG_POINTER(0);
00064     pst->len = PG_GETARG_INT32(1);
00065     pst->pos = 0;
00066 
00067     PG_RETURN_POINTER(pst);
00068 }
00069 
00070 Datum
00071 testprs_getlexeme(PG_FUNCTION_ARGS)
00072 {
00073     ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
00074     char      **t = (char **) PG_GETARG_POINTER(1);
00075     int        *tlen = (int *) PG_GETARG_POINTER(2);
00076     int         startpos = pst->pos;
00077     int         type;
00078 
00079     *t = pst->buffer + pst->pos;
00080 
00081     if (pst->pos < pst->len &&
00082         (pst->buffer)[pst->pos] == ' ')
00083     {
00084         /* blank type */
00085         type = 12;
00086         /* go to the next non-space character */
00087         while (pst->pos < pst->len &&
00088                (pst->buffer)[pst->pos] == ' ')
00089             (pst->pos)++;
00090     }
00091     else
00092     {
00093         /* word type */
00094         type = 3;
00095         /* go to the next space character */
00096         while (pst->pos < pst->len &&
00097                (pst->buffer)[pst->pos] != ' ')
00098             (pst->pos)++;
00099     }
00100 
00101     *tlen = pst->pos - startpos;
00102 
00103     /* we are finished if (*tlen == 0) */
00104     if (*tlen == 0)
00105         type = 0;
00106 
00107     PG_RETURN_INT32(type);
00108 }
00109 
00110 Datum
00111 testprs_end(PG_FUNCTION_ARGS)
00112 {
00113     ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
00114 
00115     pfree(pst);
00116     PG_RETURN_VOID();
00117 }
00118 
00119 Datum
00120 testprs_lextype(PG_FUNCTION_ARGS)
00121 {
00122     /*
00123      * Remarks: - we have to return the blanks for headline reason - we use
00124      * the same lexids like Teodor in the default word parser; in this way we
00125      * can reuse the headline function of the default word parser.
00126      */
00127     LexDescr   *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2 + 1));
00128 
00129     /* there are only two types in this parser */
00130     descr[0].lexid = 3;
00131     descr[0].alias = pstrdup("word");
00132     descr[0].descr = pstrdup("Word");
00133     descr[1].lexid = 12;
00134     descr[1].alias = pstrdup("blank");
00135     descr[1].descr = pstrdup("Space symbols");
00136     descr[2].lexid = 0;
00137 
00138     PG_RETURN_POINTER(descr);
00139 }