00001 /*------------------------------------------------------------------------- 00002 * 00003 * scanner.h 00004 * API for the core scanner (flex machine) 00005 * 00006 * The core scanner is also used by PL/pgsql, so we provide a public API 00007 * for it. However, the rest of the backend is only expected to use the 00008 * higher-level API provided by parser.h. 00009 * 00010 * 00011 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group 00012 * Portions Copyright (c) 1994, Regents of the University of California 00013 * 00014 * src/include/parser/scanner.h 00015 * 00016 *------------------------------------------------------------------------- 00017 */ 00018 00019 #ifndef SCANNER_H 00020 #define SCANNER_H 00021 00022 #include "parser/keywords.h" 00023 00024 /* 00025 * The scanner returns extra data about scanned tokens in this union type. 00026 * Note that this is a subset of the fields used in YYSTYPE of the bison 00027 * parsers built atop the scanner. 00028 */ 00029 typedef union core_YYSTYPE 00030 { 00031 int ival; /* for integer literals */ 00032 char *str; /* for identifiers and non-integer literals */ 00033 const char *keyword; /* canonical spelling of keywords */ 00034 } core_YYSTYPE; 00035 00036 /* 00037 * We track token locations in terms of byte offsets from the start of the 00038 * source string, not the column number/line number representation that 00039 * bison uses by default. Also, to minimize overhead we track only one 00040 * location (usually the first token location) for each construct, not 00041 * the beginning and ending locations as bison does by default. It's 00042 * therefore sufficient to make YYLTYPE an int. 00043 */ 00044 #define YYLTYPE int 00045 00046 /* 00047 * Another important component of the scanner's API is the token code numbers. 00048 * However, those are not defined in this file, because bison insists on 00049 * defining them for itself. The token codes used by the core scanner are 00050 * the ASCII characters plus these: 00051 * %token <str> IDENT FCONST SCONST BCONST XCONST Op 00052 * %token <ival> ICONST PARAM 00053 * %token TYPECAST DOT_DOT COLON_EQUALS 00054 * The above token definitions *must* be the first ones declared in any 00055 * bison parser built atop this scanner, so that they will have consistent 00056 * numbers assigned to them (specifically, IDENT = 258 and so on). 00057 */ 00058 00059 /* 00060 * The YY_EXTRA data that a flex scanner allows us to pass around. 00061 * Private state needed by the core scanner goes here. Note that the actual 00062 * yy_extra struct may be larger and have this as its first component, thus 00063 * allowing the calling parser to keep some fields of its own in YY_EXTRA. 00064 */ 00065 typedef struct core_yy_extra_type 00066 { 00067 /* 00068 * The string the scanner is physically scanning. We keep this mainly so 00069 * that we can cheaply compute the offset of the current token (yytext). 00070 */ 00071 char *scanbuf; 00072 Size scanbuflen; 00073 00074 /* 00075 * The keyword list to use. 00076 */ 00077 const ScanKeyword *keywords; 00078 int num_keywords; 00079 00080 /* 00081 * literalbuf is used to accumulate literal values when multiple rules are 00082 * needed to parse a single literal. Call startlit() to reset buffer to 00083 * empty, addlit() to add text. NOTE: the string in literalbuf is NOT 00084 * necessarily null-terminated, but there always IS room to add a trailing 00085 * null at offset literallen. We store a null only when we need it. 00086 */ 00087 char *literalbuf; /* palloc'd expandable buffer */ 00088 int literallen; /* actual current string length */ 00089 int literalalloc; /* current allocated buffer size */ 00090 00091 int xcdepth; /* depth of nesting in slash-star comments */ 00092 char *dolqstart; /* current $foo$ quote start string */ 00093 00094 /* first part of UTF16 surrogate pair for Unicode escapes */ 00095 int32 utf16_first_part; 00096 00097 /* state variables for literal-lexing warnings */ 00098 bool warn_on_first_escape; 00099 bool saw_non_ascii; 00100 } core_yy_extra_type; 00101 00102 /* 00103 * The type of yyscanner is opaque outside scan.l. 00104 */ 00105 typedef void *core_yyscan_t; 00106 00107 00108 /* Entry points in parser/scan.l */ 00109 extern core_yyscan_t scanner_init(const char *str, 00110 core_yy_extra_type *yyext, 00111 const ScanKeyword *keywords, 00112 int num_keywords); 00113 extern void scanner_finish(core_yyscan_t yyscanner); 00114 extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, 00115 core_yyscan_t yyscanner); 00116 extern int scanner_errposition(int location, core_yyscan_t yyscanner); 00117 extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner); 00118 00119 #endif /* SCANNER_H */