Header And Logo

PostgreSQL
| The world's most advanced open source database.

ts_public.h

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * ts_public.h
00004  *    Public interface to various tsearch modules, such as
00005  *    parsers and dictionaries.
00006  *
00007  * Copyright (c) 1998-2013, PostgreSQL Global Development Group
00008  *
00009  * src/include/tsearch/ts_public.h
00010  *
00011  *-------------------------------------------------------------------------
00012  */
00013 #ifndef _PG_TS_PUBLIC_H_
00014 #define _PG_TS_PUBLIC_H_
00015 
00016 #include "tsearch/ts_type.h"
00017 
00018 /*
00019  * Parser's framework
00020  */
00021 
00022 /*
00023  * returning type for prslextype method of parser
00024  */
00025 typedef struct
00026 {
00027     int         lexid;
00028     char       *alias;
00029     char       *descr;
00030 } LexDescr;
00031 
00032 /*
00033  * Interface to headline generator
00034  */
00035 typedef struct
00036 {
00037     uint32      selected:1,
00038                 in:1,
00039                 replace:1,
00040                 repeated:1,
00041                 skip:1,
00042                 unused:3,
00043                 type:8,
00044                 len:16;
00045     char       *word;
00046     QueryOperand *item;
00047 } HeadlineWordEntry;
00048 
00049 typedef struct
00050 {
00051     HeadlineWordEntry *words;
00052     int32       lenwords;
00053     int32       curwords;
00054     char       *startsel;
00055     char       *stopsel;
00056     char       *fragdelim;
00057     int16       startsellen;
00058     int16       stopsellen;
00059     int16       fragdelimlen;
00060 } HeadlineParsedText;
00061 
00062 /*
00063  * Common useful things for tsearch subsystem
00064  */
00065 extern char *get_tsearch_config_filename(const char *basename,
00066                             const char *extension);
00067 
00068 /*
00069  * Often useful stopword list management
00070  */
00071 typedef struct
00072 {
00073     int         len;
00074     char      **stop;
00075 } StopList;
00076 
00077 extern void readstoplist(const char *fname, StopList *s,
00078              char *(*wordop) (const char *));
00079 extern bool searchstoplist(StopList *s, char *key);
00080 
00081 /*
00082  * Interface with dictionaries
00083  */
00084 
00085 /* return struct for any lexize function */
00086 typedef struct
00087 {
00088     /*----------
00089      * Number of current variant of split word.  For example the Norwegian
00090      * word 'fotballklubber' has two variants to split: ( fotball, klubb )
00091      * and ( fot, ball, klubb ). So, dictionary should return:
00092      *
00093      * nvariant    lexeme
00094      *     1       fotball
00095      *     1       klubb
00096      *     2       fot
00097      *     2       ball
00098      *     2       klubb
00099      *
00100      * In general, a TSLexeme will be considered to belong to the same split
00101      * variant as the previous one if they have the same nvariant value.
00102      * The exact values don't matter, only changes from one lexeme to next.
00103      *----------
00104      */
00105     uint16      nvariant;
00106 
00107     uint16      flags;          /* See flag bits below */
00108 
00109     char       *lexeme;         /* C string */
00110 } TSLexeme;
00111 
00112 /* Flag bits that can appear in TSLexeme.flags */
00113 #define TSL_ADDPOS      0x01
00114 #define TSL_PREFIX      0x02
00115 #define TSL_FILTER      0x04
00116 
00117 /*
00118  * Struct for supporting complex dictionaries like thesaurus.
00119  * 4th argument for dictlexize method is a pointer to this
00120  */
00121 typedef struct
00122 {
00123     bool        isend;          /* in: marks for lexize_info about text end is
00124                                  * reached */
00125     bool        getnext;        /* out: dict wants next lexeme */
00126     void       *private_state;  /* internal dict state between calls with
00127                                  * getnext == true */
00128 } DictSubState;
00129 
00130 #endif   /* _PG_TS_PUBLIC_H_ */