Header And Logo

PostgreSQL
| The world's most advanced open source database.

pl_scanner.c

Go to the documentation of this file.
00001 /*-------------------------------------------------------------------------
00002  *
00003  * pl_scanner.c
00004  *    lexical scanning for PL/pgSQL
00005  *
00006  *
00007  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
00008  * Portions Copyright (c) 1994, Regents of the University of California
00009  *
00010  *
00011  * IDENTIFICATION
00012  *    src/pl/plpgsql/src/pl_scanner.c
00013  *
00014  *-------------------------------------------------------------------------
00015  */
00016 #include "plpgsql.h"
00017 
00018 #include "mb/pg_wchar.h"
00019 #include "parser/scanner.h"
00020 
00021 #include "pl_gram.h"            /* must be after parser/scanner.h */
00022 
00023 #define PG_KEYWORD(a,b,c) {a,b,c},
00024 
00025 
00026 /* Klugy flag to tell scanner how to look up identifiers */
00027 IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
00028 
00029 /*
00030  * A word about keywords:
00031  *
00032  * We keep reserved and unreserved keywords in separate arrays.  The
00033  * reserved keywords are passed to the core scanner, so they will be
00034  * recognized before (and instead of) any variable name.  Unreserved
00035  * words are checked for separately, after determining that the identifier
00036  * isn't a known variable name.  If plpgsql_IdentifierLookup is DECLARE then
00037  * no variable names will be recognized, so the unreserved words always work.
00038  * (Note in particular that this helps us avoid reserving keywords that are
00039  * only needed in DECLARE sections.)
00040  *
00041  * In certain contexts it is desirable to prefer recognizing an unreserved
00042  * keyword over recognizing a variable name.  Those cases are handled in
00043  * pl_gram.y using tok_is_keyword().
00044  *
00045  * For the most part, the reserved keywords are those that start a PL/pgSQL
00046  * statement (and so would conflict with an assignment to a variable of the
00047  * same name).  We also don't sweat it much about reserving keywords that
00048  * are reserved in the core grammar.  Try to avoid reserving other words.
00049  */
00050 
00051 /*
00052  * Lists of keyword (name, token-value, category) entries.
00053  *
00054  * !!WARNING!!: These lists must be sorted by ASCII name, because binary
00055  *       search is used to locate entries.
00056  *
00057  * Be careful not to put the same word in both lists.  Also be sure that
00058  * pl_gram.y's unreserved_keyword production agrees with the second list.
00059  */
00060 
00061 static const ScanKeyword reserved_keywords[] = {
00062     PG_KEYWORD("all", K_ALL, RESERVED_KEYWORD)
00063     PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD)
00064     PG_KEYWORD("by", K_BY, RESERVED_KEYWORD)
00065     PG_KEYWORD("case", K_CASE, RESERVED_KEYWORD)
00066     PG_KEYWORD("close", K_CLOSE, RESERVED_KEYWORD)
00067     PG_KEYWORD("collate", K_COLLATE, RESERVED_KEYWORD)
00068     PG_KEYWORD("continue", K_CONTINUE, RESERVED_KEYWORD)
00069     PG_KEYWORD("declare", K_DECLARE, RESERVED_KEYWORD)
00070     PG_KEYWORD("default", K_DEFAULT, RESERVED_KEYWORD)
00071     PG_KEYWORD("diagnostics", K_DIAGNOSTICS, RESERVED_KEYWORD)
00072     PG_KEYWORD("else", K_ELSE, RESERVED_KEYWORD)
00073     PG_KEYWORD("elseif", K_ELSIF, RESERVED_KEYWORD)
00074     PG_KEYWORD("elsif", K_ELSIF, RESERVED_KEYWORD)
00075     PG_KEYWORD("end", K_END, RESERVED_KEYWORD)
00076     PG_KEYWORD("exception", K_EXCEPTION, RESERVED_KEYWORD)
00077     PG_KEYWORD("execute", K_EXECUTE, RESERVED_KEYWORD)
00078     PG_KEYWORD("exit", K_EXIT, RESERVED_KEYWORD)
00079     PG_KEYWORD("fetch", K_FETCH, RESERVED_KEYWORD)
00080     PG_KEYWORD("for", K_FOR, RESERVED_KEYWORD)
00081     PG_KEYWORD("foreach", K_FOREACH, RESERVED_KEYWORD)
00082     PG_KEYWORD("from", K_FROM, RESERVED_KEYWORD)
00083     PG_KEYWORD("get", K_GET, RESERVED_KEYWORD)
00084     PG_KEYWORD("if", K_IF, RESERVED_KEYWORD)
00085     PG_KEYWORD("in", K_IN, RESERVED_KEYWORD)
00086     PG_KEYWORD("insert", K_INSERT, RESERVED_KEYWORD)
00087     PG_KEYWORD("into", K_INTO, RESERVED_KEYWORD)
00088     PG_KEYWORD("loop", K_LOOP, RESERVED_KEYWORD)
00089     PG_KEYWORD("move", K_MOVE, RESERVED_KEYWORD)
00090     PG_KEYWORD("not", K_NOT, RESERVED_KEYWORD)
00091     PG_KEYWORD("null", K_NULL, RESERVED_KEYWORD)
00092     PG_KEYWORD("open", K_OPEN, RESERVED_KEYWORD)
00093     PG_KEYWORD("or", K_OR, RESERVED_KEYWORD)
00094     PG_KEYWORD("perform", K_PERFORM, RESERVED_KEYWORD)
00095     PG_KEYWORD("raise", K_RAISE, RESERVED_KEYWORD)
00096     PG_KEYWORD("return", K_RETURN, RESERVED_KEYWORD)
00097     PG_KEYWORD("strict", K_STRICT, RESERVED_KEYWORD)
00098     PG_KEYWORD("then", K_THEN, RESERVED_KEYWORD)
00099     PG_KEYWORD("to", K_TO, RESERVED_KEYWORD)
00100     PG_KEYWORD("using", K_USING, RESERVED_KEYWORD)
00101     PG_KEYWORD("when", K_WHEN, RESERVED_KEYWORD)
00102     PG_KEYWORD("while", K_WHILE, RESERVED_KEYWORD)
00103 };
00104 
00105 static const int num_reserved_keywords = lengthof(reserved_keywords);
00106 
00107 static const ScanKeyword unreserved_keywords[] = {
00108     PG_KEYWORD("absolute", K_ABSOLUTE, UNRESERVED_KEYWORD)
00109     PG_KEYWORD("alias", K_ALIAS, UNRESERVED_KEYWORD)
00110     PG_KEYWORD("array", K_ARRAY, UNRESERVED_KEYWORD)
00111     PG_KEYWORD("backward", K_BACKWARD, UNRESERVED_KEYWORD)
00112     PG_KEYWORD("constant", K_CONSTANT, UNRESERVED_KEYWORD)
00113     PG_KEYWORD("current", K_CURRENT, UNRESERVED_KEYWORD)
00114     PG_KEYWORD("cursor", K_CURSOR, UNRESERVED_KEYWORD)
00115     PG_KEYWORD("debug", K_DEBUG, UNRESERVED_KEYWORD)
00116     PG_KEYWORD("detail", K_DETAIL, UNRESERVED_KEYWORD)
00117     PG_KEYWORD("dump", K_DUMP, UNRESERVED_KEYWORD)
00118     PG_KEYWORD("errcode", K_ERRCODE, UNRESERVED_KEYWORD)
00119     PG_KEYWORD("error", K_ERROR, UNRESERVED_KEYWORD)
00120     PG_KEYWORD("first", K_FIRST, UNRESERVED_KEYWORD)
00121     PG_KEYWORD("forward", K_FORWARD, UNRESERVED_KEYWORD)
00122     PG_KEYWORD("hint", K_HINT, UNRESERVED_KEYWORD)
00123     PG_KEYWORD("info", K_INFO, UNRESERVED_KEYWORD)
00124     PG_KEYWORD("is", K_IS, UNRESERVED_KEYWORD)
00125     PG_KEYWORD("last", K_LAST, UNRESERVED_KEYWORD)
00126     PG_KEYWORD("log", K_LOG, UNRESERVED_KEYWORD)
00127     PG_KEYWORD("message", K_MESSAGE, UNRESERVED_KEYWORD)
00128     PG_KEYWORD("message_text", K_MESSAGE_TEXT, UNRESERVED_KEYWORD)
00129     PG_KEYWORD("next", K_NEXT, UNRESERVED_KEYWORD)
00130     PG_KEYWORD("no", K_NO, UNRESERVED_KEYWORD)
00131     PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD)
00132     PG_KEYWORD("option", K_OPTION, UNRESERVED_KEYWORD)
00133     PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT, UNRESERVED_KEYWORD)
00134     PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL, UNRESERVED_KEYWORD)
00135     PG_KEYWORD("pg_exception_hint", K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD)
00136     PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD)
00137     PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD)
00138     PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD)
00139     PG_KEYWORD("result_oid", K_RESULT_OID, UNRESERVED_KEYWORD)
00140     PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE, UNRESERVED_KEYWORD)
00141     PG_KEYWORD("reverse", K_REVERSE, UNRESERVED_KEYWORD)
00142     PG_KEYWORD("row_count", K_ROW_COUNT, UNRESERVED_KEYWORD)
00143     PG_KEYWORD("rowtype", K_ROWTYPE, UNRESERVED_KEYWORD)
00144     PG_KEYWORD("scroll", K_SCROLL, UNRESERVED_KEYWORD)
00145     PG_KEYWORD("slice", K_SLICE, UNRESERVED_KEYWORD)
00146     PG_KEYWORD("sqlstate", K_SQLSTATE, UNRESERVED_KEYWORD)
00147     PG_KEYWORD("stacked", K_STACKED, UNRESERVED_KEYWORD)
00148     PG_KEYWORD("type", K_TYPE, UNRESERVED_KEYWORD)
00149     PG_KEYWORD("use_column", K_USE_COLUMN, UNRESERVED_KEYWORD)
00150     PG_KEYWORD("use_variable", K_USE_VARIABLE, UNRESERVED_KEYWORD)
00151     PG_KEYWORD("variable_conflict", K_VARIABLE_CONFLICT, UNRESERVED_KEYWORD)
00152     PG_KEYWORD("warning", K_WARNING, UNRESERVED_KEYWORD)
00153 };
00154 
00155 static const int num_unreserved_keywords = lengthof(unreserved_keywords);
00156 
00157 
00158 /* Auxiliary data about a token (other than the token type) */
00159 typedef struct
00160 {
00161     YYSTYPE     lval;           /* semantic information */
00162     YYLTYPE     lloc;           /* offset in scanbuf */
00163     int         leng;           /* length in bytes */
00164 } TokenAuxData;
00165 
00166 /*
00167  * Scanner working state.  At some point we might wish to fold all this
00168  * into a YY_EXTRA struct.  For the moment, there is no need for plpgsql's
00169  * lexer to be re-entrant, and the notational burden of passing a yyscanner
00170  * pointer around is great enough to not want to do it without need.
00171  */
00172 
00173 /* The stuff the core lexer needs */
00174 static core_yyscan_t yyscanner = NULL;
00175 static core_yy_extra_type core_yy;
00176 
00177 /* The original input string */
00178 static const char *scanorig;
00179 
00180 /* Current token's length (corresponds to plpgsql_yylval and plpgsql_yylloc) */
00181 static int  plpgsql_yyleng;
00182 
00183 /* Token pushback stack */
00184 #define MAX_PUSHBACKS 4
00185 
00186 static int  num_pushbacks;
00187 static int  pushback_token[MAX_PUSHBACKS];
00188 static TokenAuxData pushback_auxdata[MAX_PUSHBACKS];
00189 
00190 /* State for plpgsql_location_to_lineno() */
00191 static const char *cur_line_start;
00192 static const char *cur_line_end;
00193 static int  cur_line_num;
00194 
00195 /* Internal functions */
00196 static int  internal_yylex(TokenAuxData *auxdata);
00197 static void push_back_token(int token, TokenAuxData *auxdata);
00198 static void location_lineno_init(void);
00199 
00200 
00201 /*
00202  * This is the yylex routine called from the PL/pgSQL grammar.
00203  * It is a wrapper around the core lexer, with the ability to recognize
00204  * PL/pgSQL variables and return them as special T_DATUM tokens.  If a
00205  * word or compound word does not match any variable name, or if matching
00206  * is turned off by plpgsql_IdentifierLookup, it is returned as
00207  * T_WORD or T_CWORD respectively, or as an unreserved keyword if it
00208  * matches one of those.
00209  */
00210 int
00211 plpgsql_yylex(void)
00212 {
00213     int         tok1;
00214     TokenAuxData aux1;
00215     const ScanKeyword *kw;
00216 
00217     tok1 = internal_yylex(&aux1);
00218     if (tok1 == IDENT || tok1 == PARAM)
00219     {
00220         int         tok2;
00221         TokenAuxData aux2;
00222 
00223         tok2 = internal_yylex(&aux2);
00224         if (tok2 == '.')
00225         {
00226             int         tok3;
00227             TokenAuxData aux3;
00228 
00229             tok3 = internal_yylex(&aux3);
00230             if (tok3 == IDENT)
00231             {
00232                 int         tok4;
00233                 TokenAuxData aux4;
00234 
00235                 tok4 = internal_yylex(&aux4);
00236                 if (tok4 == '.')
00237                 {
00238                     int         tok5;
00239                     TokenAuxData aux5;
00240 
00241                     tok5 = internal_yylex(&aux5);
00242                     if (tok5 == IDENT)
00243                     {
00244                         if (plpgsql_parse_tripword(aux1.lval.str,
00245                                                    aux3.lval.str,
00246                                                    aux5.lval.str,
00247                                                    &aux1.lval.wdatum,
00248                                                    &aux1.lval.cword))
00249                             tok1 = T_DATUM;
00250                         else
00251                             tok1 = T_CWORD;
00252                     }
00253                     else
00254                     {
00255                         /* not A.B.C, so just process A.B */
00256                         push_back_token(tok5, &aux5);
00257                         push_back_token(tok4, &aux4);
00258                         if (plpgsql_parse_dblword(aux1.lval.str,
00259                                                   aux3.lval.str,
00260                                                   &aux1.lval.wdatum,
00261                                                   &aux1.lval.cword))
00262                             tok1 = T_DATUM;
00263                         else
00264                             tok1 = T_CWORD;
00265                     }
00266                 }
00267                 else
00268                 {
00269                     /* not A.B.C, so just process A.B */
00270                     push_back_token(tok4, &aux4);
00271                     if (plpgsql_parse_dblword(aux1.lval.str,
00272                                               aux3.lval.str,
00273                                               &aux1.lval.wdatum,
00274                                               &aux1.lval.cword))
00275                         tok1 = T_DATUM;
00276                     else
00277                         tok1 = T_CWORD;
00278                 }
00279             }
00280             else
00281             {
00282                 /* not A.B, so just process A */
00283                 push_back_token(tok3, &aux3);
00284                 push_back_token(tok2, &aux2);
00285                 if (plpgsql_parse_word(aux1.lval.str,
00286                                        core_yy.scanbuf + aux1.lloc,
00287                                        &aux1.lval.wdatum,
00288                                        &aux1.lval.word))
00289                     tok1 = T_DATUM;
00290                 else if (!aux1.lval.word.quoted &&
00291                          (kw = ScanKeywordLookup(aux1.lval.word.ident,
00292                                                  unreserved_keywords,
00293                                                  num_unreserved_keywords)))
00294                 {
00295                     aux1.lval.keyword = kw->name;
00296                     tok1 = kw->value;
00297                 }
00298                 else
00299                     tok1 = T_WORD;
00300             }
00301         }
00302         else
00303         {
00304             /* not A.B, so just process A */
00305             push_back_token(tok2, &aux2);
00306             if (plpgsql_parse_word(aux1.lval.str,
00307                                    core_yy.scanbuf + aux1.lloc,
00308                                    &aux1.lval.wdatum,
00309                                    &aux1.lval.word))
00310                 tok1 = T_DATUM;
00311             else if (!aux1.lval.word.quoted &&
00312                      (kw = ScanKeywordLookup(aux1.lval.word.ident,
00313                                              unreserved_keywords,
00314                                              num_unreserved_keywords)))
00315             {
00316                 aux1.lval.keyword = kw->name;
00317                 tok1 = kw->value;
00318             }
00319             else
00320                 tok1 = T_WORD;
00321         }
00322     }
00323     else
00324     {
00325         /* Not a potential plpgsql variable name, just return the data */
00326     }
00327 
00328     plpgsql_yylval = aux1.lval;
00329     plpgsql_yylloc = aux1.lloc;
00330     plpgsql_yyleng = aux1.leng;
00331     return tok1;
00332 }
00333 
00334 /*
00335  * Internal yylex function.  This wraps the core lexer and adds one feature:
00336  * a token pushback stack.  We also make a couple of trivial single-token
00337  * translations from what the core lexer does to what we want, in particular
00338  * interfacing from the core_YYSTYPE to YYSTYPE union.
00339  */
00340 static int
00341 internal_yylex(TokenAuxData *auxdata)
00342 {
00343     int         token;
00344     const char *yytext;
00345 
00346     if (num_pushbacks > 0)
00347     {
00348         num_pushbacks--;
00349         token = pushback_token[num_pushbacks];
00350         *auxdata = pushback_auxdata[num_pushbacks];
00351     }
00352     else
00353     {
00354         token = core_yylex(&auxdata->lval.core_yystype,
00355                            &auxdata->lloc,
00356                            yyscanner);
00357 
00358         /* remember the length of yytext before it gets changed */
00359         yytext = core_yy.scanbuf + auxdata->lloc;
00360         auxdata->leng = strlen(yytext);
00361 
00362         /* Check for << >> and #, which the core considers operators */
00363         if (token == Op)
00364         {
00365             if (strcmp(auxdata->lval.str, "<<") == 0)
00366                 token = LESS_LESS;
00367             else if (strcmp(auxdata->lval.str, ">>") == 0)
00368                 token = GREATER_GREATER;
00369             else if (strcmp(auxdata->lval.str, "#") == 0)
00370                 token = '#';
00371         }
00372 
00373         /* The core returns PARAM as ival, but we treat it like IDENT */
00374         else if (token == PARAM)
00375         {
00376             auxdata->lval.str = pstrdup(yytext);
00377         }
00378     }
00379 
00380     return token;
00381 }
00382 
00383 /*
00384  * Push back a token to be re-read by next internal_yylex() call.
00385  */
00386 static void
00387 push_back_token(int token, TokenAuxData *auxdata)
00388 {
00389     if (num_pushbacks >= MAX_PUSHBACKS)
00390         elog(ERROR, "too many tokens pushed back");
00391     pushback_token[num_pushbacks] = token;
00392     pushback_auxdata[num_pushbacks] = *auxdata;
00393     num_pushbacks++;
00394 }
00395 
00396 /*
00397  * Push back a single token to be re-read by next plpgsql_yylex() call.
00398  *
00399  * NOTE: this does not cause yylval or yylloc to "back up".  Also, it
00400  * is not a good idea to push back a token code other than what you read.
00401  */
00402 void
00403 plpgsql_push_back_token(int token)
00404 {
00405     TokenAuxData auxdata;
00406 
00407     auxdata.lval = plpgsql_yylval;
00408     auxdata.lloc = plpgsql_yylloc;
00409     auxdata.leng = plpgsql_yyleng;
00410     push_back_token(token, &auxdata);
00411 }
00412 
00413 /*
00414  * Tell whether a token is an unreserved keyword.
00415  *
00416  * (If it is, its lowercased form was returned as the token value, so we
00417  * do not need to offer that data here.)
00418  */
00419 bool
00420 plpgsql_token_is_unreserved_keyword(int token)
00421 {
00422     int         i;
00423 
00424     for (i = 0; i < num_unreserved_keywords; i++)
00425     {
00426         if (unreserved_keywords[i].value == token)
00427             return true;
00428     }
00429     return false;
00430 }
00431 
00432 /*
00433  * Append the function text starting at startlocation and extending to
00434  * (not including) endlocation onto the existing contents of "buf".
00435  */
00436 void
00437 plpgsql_append_source_text(StringInfo buf,
00438                            int startlocation, int endlocation)
00439 {
00440     Assert(startlocation <= endlocation);
00441     appendBinaryStringInfo(buf, scanorig + startlocation,
00442                            endlocation - startlocation);
00443 }
00444 
00445 /*
00446  * Peek one token ahead in the input stream.  Only the token code is
00447  * made available, not any of the auxiliary info such as location.
00448  *
00449  * NB: no variable or unreserved keyword lookup is performed here, they will
00450  * be returned as IDENT. Reserved keywords are resolved as usual.
00451  */
00452 int
00453 plpgsql_peek(void)
00454 {
00455     int         tok1;
00456     TokenAuxData aux1;
00457 
00458     tok1 = internal_yylex(&aux1);
00459     push_back_token(tok1, &aux1);
00460     return tok1;
00461 }
00462 
00463 /*
00464  * Peek two tokens ahead in the input stream. The first token and its
00465  * location in the query are returned in *tok1_p and *tok1_loc, second token
00466  * and its location in *tok2_p and *tok2_loc.
00467  *
00468  * NB: no variable or unreserved keyword lookup is performed here, they will
00469  * be returned as IDENT. Reserved keywords are resolved as usual.
00470  */
00471 void
00472 plpgsql_peek2(int *tok1_p, int *tok2_p, int *tok1_loc, int *tok2_loc)
00473 {
00474     int         tok1,
00475                 tok2;
00476     TokenAuxData aux1,
00477                 aux2;
00478 
00479     tok1 = internal_yylex(&aux1);
00480     tok2 = internal_yylex(&aux2);
00481 
00482     *tok1_p = tok1;
00483     if (tok1_loc)
00484         *tok1_loc = aux1.lloc;
00485     *tok2_p = tok2;
00486     if (tok2_loc)
00487         *tok2_loc = aux2.lloc;
00488 
00489     push_back_token(tok2, &aux2);
00490     push_back_token(tok1, &aux1);
00491 }
00492 
00493 /*
00494  * plpgsql_scanner_errposition
00495  *      Report an error cursor position, if possible.
00496  *
00497  * This is expected to be used within an ereport() call.  The return value
00498  * is a dummy (always 0, in fact).
00499  *
00500  * Note that this can only be used for messages emitted during initial
00501  * parsing of a plpgsql function, since it requires the scanorig string
00502  * to still be available.
00503  */
00504 int
00505 plpgsql_scanner_errposition(int location)
00506 {
00507     int         pos;
00508 
00509     if (location < 0 || scanorig == NULL)
00510         return 0;               /* no-op if location is unknown */
00511 
00512     /* Convert byte offset to character number */
00513     pos = pg_mbstrlen_with_len(scanorig, location) + 1;
00514     /* And pass it to the ereport mechanism */
00515     (void) internalerrposition(pos);
00516     /* Also pass the function body string */
00517     return internalerrquery(scanorig);
00518 }
00519 
00520 /*
00521  * plpgsql_yyerror
00522  *      Report a lexer or grammar error.
00523  *
00524  * The message's cursor position refers to the current token (the one
00525  * last returned by plpgsql_yylex()).
00526  * This is OK for syntax error messages from the Bison parser, because Bison
00527  * parsers report error as soon as the first unparsable token is reached.
00528  * Beware of using yyerror for other purposes, as the cursor position might
00529  * be misleading!
00530  */
00531 void __attribute__((noreturn))
00532 plpgsql_yyerror(const char *message)
00533 {
00534     char       *yytext = core_yy.scanbuf + plpgsql_yylloc;
00535 
00536     if (*yytext == '\0')
00537     {
00538         ereport(ERROR,
00539                 (errcode(ERRCODE_SYNTAX_ERROR),
00540         /* translator: %s is typically the translation of "syntax error" */
00541                  errmsg("%s at end of input", _(message)),
00542                  plpgsql_scanner_errposition(plpgsql_yylloc)));
00543     }
00544     else
00545     {
00546         /*
00547          * If we have done any lookahead then flex will have restored the
00548          * character after the end-of-token.  Zap it again so that we report
00549          * only the single token here.  This modifies scanbuf but we no longer
00550          * care about that.
00551          */
00552         yytext[plpgsql_yyleng] = '\0';
00553 
00554         ereport(ERROR,
00555                 (errcode(ERRCODE_SYNTAX_ERROR),
00556         /* translator: first %s is typically the translation of "syntax error" */
00557                  errmsg("%s at or near \"%s\"", _(message), yytext),
00558                  plpgsql_scanner_errposition(plpgsql_yylloc)));
00559     }
00560 }
00561 
00562 /*
00563  * Given a location (a byte offset in the function source text),
00564  * return a line number.
00565  *
00566  * We expect that this is typically called for a sequence of increasing
00567  * location values, so optimize accordingly by tracking the endpoints
00568  * of the "current" line.
00569  */
00570 int
00571 plpgsql_location_to_lineno(int location)
00572 {
00573     const char *loc;
00574 
00575     if (location < 0 || scanorig == NULL)
00576         return 0;               /* garbage in, garbage out */
00577     loc = scanorig + location;
00578 
00579     /* be correct, but not fast, if input location goes backwards */
00580     if (loc < cur_line_start)
00581         location_lineno_init();
00582 
00583     while (cur_line_end != NULL && loc > cur_line_end)
00584     {
00585         cur_line_start = cur_line_end + 1;
00586         cur_line_num++;
00587         cur_line_end = strchr(cur_line_start, '\n');
00588     }
00589 
00590     return cur_line_num;
00591 }
00592 
00593 /* initialize or reset the state for plpgsql_location_to_lineno */
00594 static void
00595 location_lineno_init(void)
00596 {
00597     cur_line_start = scanorig;
00598     cur_line_num = 1;
00599 
00600     cur_line_end = strchr(cur_line_start, '\n');
00601 }
00602 
00603 /* return the most recently computed lineno */
00604 int
00605 plpgsql_latest_lineno(void)
00606 {
00607     return cur_line_num;
00608 }
00609 
00610 
00611 /*
00612  * Called before any actual parsing is done
00613  *
00614  * Note: the passed "str" must remain valid until plpgsql_scanner_finish().
00615  * Although it is not fed directly to flex, we need the original string
00616  * to cite in error messages.
00617  */
00618 void
00619 plpgsql_scanner_init(const char *str)
00620 {
00621     /* Start up the core scanner */
00622     yyscanner = scanner_init(str, &core_yy,
00623                              reserved_keywords, num_reserved_keywords);
00624 
00625     /*
00626      * scanorig points to the original string, which unlike the scanner's
00627      * scanbuf won't be modified on-the-fly by flex.  Notice that although
00628      * yytext points into scanbuf, we rely on being able to apply locations
00629      * (offsets from string start) to scanorig as well.
00630      */
00631     scanorig = str;
00632 
00633     /* Other setup */
00634     plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
00635 
00636     num_pushbacks = 0;
00637 
00638     location_lineno_init();
00639 }
00640 
00641 /*
00642  * Called after parsing is done to clean up after plpgsql_scanner_init()
00643  */
00644 void
00645 plpgsql_scanner_finish(void)
00646 {
00647     /* release storage */
00648     scanner_finish(yyscanner);
00649     /* avoid leaving any dangling pointers */
00650     yyscanner = NULL;
00651     scanorig = NULL;
00652 }