Header And Logo

PostgreSQL
| The world's most advanced open source database.

scanner.h

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * scanner.h
00004  *      API for the core scanner (flex machine)
00005  *
00006  * The core scanner is also used by PL/pgsql, so we provide a public API
00007  * for it.  However, the rest of the backend is only expected to use the
00008  * higher-level API provided by parser.h.
00009  *
00010  *
00011  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00012  * Portions Copyright (c) 1994, Regents of the University of California
00013  *
00014  * src/include/parser/scanner.h
00015  *
00016  *-------------------------------------------------------------------------
00017  */
00018 
00019 #ifndef SCANNER_H
00020 #define SCANNER_H
00021 
00022 #include "parser/keywords.h"
00023 
00024 /*
00025  * The scanner returns extra data about scanned tokens in this union type.
00026  * Note that this is a subset of the fields used in YYSTYPE of the bison
00027  * parsers built atop the scanner.
00028  */
00029 typedef union core_YYSTYPE
00030 {
00031     int         ival;           /* for integer literals */
00032     char       *str;            /* for identifiers and non-integer literals */
00033     const char *keyword;        /* canonical spelling of keywords */
00034 } core_YYSTYPE;
00035 
00036 /*
00037  * We track token locations in terms of byte offsets from the start of the
00038  * source string, not the column number/line number representation that
00039  * bison uses by default.  Also, to minimize overhead we track only one
00040  * location (usually the first token location) for each construct, not
00041  * the beginning and ending locations as bison does by default.  It's
00042  * therefore sufficient to make YYLTYPE an int.
00043  */
00044 #define YYLTYPE  int
00045 
00046 /*
00047  * Another important component of the scanner's API is the token code numbers.
00048  * However, those are not defined in this file, because bison insists on
00049  * defining them for itself.  The token codes used by the core scanner are
00050  * the ASCII characters plus these:
00051  *  %token <str>    IDENT FCONST SCONST BCONST XCONST Op
00052  *  %token <ival>   ICONST PARAM
00053  *  %token          TYPECAST DOT_DOT COLON_EQUALS
00054  * The above token definitions *must* be the first ones declared in any
00055  * bison parser built atop this scanner, so that they will have consistent
00056  * numbers assigned to them (specifically, IDENT = 258 and so on).
00057  */
00058 
00059 /*
00060  * The YY_EXTRA data that a flex scanner allows us to pass around.
00061  * Private state needed by the core scanner goes here.  Note that the actual
00062  * yy_extra struct may be larger and have this as its first component, thus
00063  * allowing the calling parser to keep some fields of its own in YY_EXTRA.
00064  */
00065 typedef struct core_yy_extra_type
00066 {
00067     /*
00068      * The string the scanner is physically scanning.  We keep this mainly so
00069      * that we can cheaply compute the offset of the current token (yytext).
00070      */
00071     char       *scanbuf;
00072     Size        scanbuflen;
00073 
00074     /*
00075      * The keyword list to use.
00076      */
00077     const ScanKeyword *keywords;
00078     int         num_keywords;
00079 
00080     /*
00081      * literalbuf is used to accumulate literal values when multiple rules are
00082      * needed to parse a single literal.  Call startlit() to reset buffer to
00083      * empty, addlit() to add text.  NOTE: the string in literalbuf is NOT
00084      * necessarily null-terminated, but there always IS room to add a trailing
00085      * null at offset literallen.  We store a null only when we need it.
00086      */
00087     char       *literalbuf;     /* palloc'd expandable buffer */
00088     int         literallen;     /* actual current string length */
00089     int         literalalloc;   /* current allocated buffer size */
00090 
00091     int         xcdepth;        /* depth of nesting in slash-star comments */
00092     char       *dolqstart;      /* current $foo$ quote start string */
00093 
00094     /* first part of UTF16 surrogate pair for Unicode escapes */
00095     int32       utf16_first_part;
00096 
00097     /* state variables for literal-lexing warnings */
00098     bool        warn_on_first_escape;
00099     bool        saw_non_ascii;
00100 } core_yy_extra_type;
00101 
00102 /*
00103  * The type of yyscanner is opaque outside scan.l.
00104  */
00105 typedef void *core_yyscan_t;
00106 
00107 
00108 /* Entry points in parser/scan.l */
00109 extern core_yyscan_t scanner_init(const char *str,
00110              core_yy_extra_type *yyext,
00111              const ScanKeyword *keywords,
00112              int num_keywords);
00113 extern void scanner_finish(core_yyscan_t yyscanner);
00114 extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp,
00115            core_yyscan_t yyscanner);
00116 extern int  scanner_errposition(int location, core_yyscan_t yyscanner);
00117 extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner);
00118 
00119 #endif   /* SCANNER_H */