TrinityCore
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
parser.h
Go to the documentation of this file.
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: [email protected] (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Implements parsing of .proto files to FileDescriptorProtos.
36 
37 #ifndef GOOGLE_PROTOBUF_COMPILER_PARSER_H__
38 #define GOOGLE_PROTOBUF_COMPILER_PARSER_H__
39 
40 #include <map>
41 #include <string>
42 #include <utility>
47 
48 namespace google {
49 namespace protobuf { class Message; }
50 
51 namespace protobuf {
52 namespace compiler {
53 
54 // Defined in this file.
55 class Parser;
56 class SourceLocationTable;
57 
58 // Implements parsing of protocol definitions (such as .proto files).
59 //
60 // Note that most users will be more interested in the Importer class.
61 // Parser is a lower-level class which simply converts a single .proto file
62 // to a FileDescriptorProto. It does not resolve import directives or perform
63 // many other kinds of validation needed to construct a complete
64 // FileDescriptor.
66  public:
67  Parser();
68  ~Parser();
69 
70  // Parse the entire input and construct a FileDescriptorProto representing
71  // it. Returns true if no errors occurred, false otherwise.
72  bool Parse(io::Tokenizer* input, FileDescriptorProto* file);
73 
74  // Optional fetaures:
75 
76  // DEPRECATED: New code should use the SourceCodeInfo embedded in the
77  // FileDescriptorProto.
78  //
79  // Requests that locations of certain definitions be recorded to the given
80  // SourceLocationTable while parsing. This can be used to look up exact line
81  // and column numbers for errors reported by DescriptorPool during validation.
82  // Set to NULL (the default) to discard source location information.
84  source_location_table_ = location_table;
85  }
86 
87  // Requests that errors be recorded to the given ErrorCollector while
88  // parsing. Set to NULL (the default) to discard error messages.
89  void RecordErrorsTo(io::ErrorCollector* error_collector) {
90  error_collector_ = error_collector;
91  }
92 
93  // Returns the identifier used in the "syntax = " declaration, if one was
94  // seen during the last call to Parse(), or the empty string otherwise.
95  const string& GetSyntaxIdentifier() { return syntax_identifier_; }
96 
97  // If set true, input files will be required to begin with a syntax
98  // identifier. Otherwise, files may omit this. If a syntax identifier
99  // is provided, it must be 'syntax = "proto2";' and must appear at the
100  // top of this file regardless of whether or not it was required.
102  require_syntax_identifier_ = value;
103  }
104 
105  // Call SetStopAfterSyntaxIdentifier(true) to tell the parser to stop
106  // parsing as soon as it has seen the syntax identifier, or lack thereof.
107  // This is useful for quickly identifying the syntax of the file without
108  // parsing the whole thing. If this is enabled, no error will be recorded
109  // if the syntax identifier is something other than "proto2" (since
110  // presumably the caller intends to deal with that), but other kinds of
111  // errors (e.g. parse errors) will still be reported. When this is enabled,
112  // you may pass a NULL FileDescriptorProto to Parse().
114  stop_after_syntax_identifier_ = value;
115  }
116 
117  private:
118  class LocationRecorder;
119 
120  // =================================================================
121  // Error recovery helpers
122 
123  // Consume the rest of the current statement. This consumes tokens
124  // until it sees one of:
125  // ';' Consumes the token and returns.
126  // '{' Consumes the brace then calls SkipRestOfBlock().
127  // '}' Returns without consuming.
128  // EOF Returns (can't consume).
129  // The Parser often calls SkipStatement() after encountering a syntax
130  // error. This allows it to go on parsing the following lines, allowing
131  // it to report more than just one error in the file.
132  void SkipStatement();
133 
134  // Consume the rest of the current block, including nested blocks,
135  // ending after the closing '}' is encountered and consumed, or at EOF.
136  void SkipRestOfBlock();
137 
138  // -----------------------------------------------------------------
139  // Single-token consuming helpers
140  //
141  // These make parsing code more readable.
142 
143  // True if the current token is TYPE_END.
144  inline bool AtEnd();
145 
146  // True if the next token matches the given text.
147  inline bool LookingAt(const char* text);
148  // True if the next token is of the given type.
149  inline bool LookingAtType(io::Tokenizer::TokenType token_type);
150 
151  // If the next token exactly matches the text given, consume it and return
152  // true. Otherwise, return false without logging an error.
153  bool TryConsume(const char* text);
154 
155  // These attempt to read some kind of token from the input. If successful,
156  // they return true. Otherwise they return false and add the given error
157  // to the error list.
158 
159  // Consume a token with the exact text given.
160  bool Consume(const char* text, const char* error);
161  // Same as above, but automatically generates the error "Expected \"text\".",
162  // where "text" is the expected token text.
163  bool Consume(const char* text);
164  // Consume a token of type IDENTIFIER and store its text in "output".
165  bool ConsumeIdentifier(string* output, const char* error);
166  // Consume an integer and store its value in "output".
167  bool ConsumeInteger(int* output, const char* error);
168  // Consume a signed integer and store its value in "output".
169  bool ConsumeSignedInteger(int* output, const char* error);
170  // Consume a 64-bit integer and store its value in "output". If the value
171  // is greater than max_value, an error will be reported.
172  bool ConsumeInteger64(uint64 max_value, uint64* output, const char* error);
173  // Consume a number and store its value in "output". This will accept
174  // tokens of either INTEGER or FLOAT type.
175  bool ConsumeNumber(double* output, const char* error);
176  // Consume a string literal and store its (unescaped) value in "output".
177  bool ConsumeString(string* output, const char* error);
178 
179  // Consume a token representing the end of the statement. Comments between
180  // this token and the next will be harvested for documentation. The given
181  // LocationRecorder should refer to the declaration that was just parsed;
182  // it will be populated with these comments.
183  //
184  // TODO(kenton): The LocationRecorder is const because historically locations
185  // have been passed around by const reference, for no particularly good
186  // reason. We should probably go through and change them all to mutable
187  // pointer to make this more intuitive.
188  bool TryConsumeEndOfDeclaration(const char* text,
189  const LocationRecorder* location);
190  bool ConsumeEndOfDeclaration(const char* text,
191  const LocationRecorder* location);
192 
193  // -----------------------------------------------------------------
194  // Error logging helpers
195 
196  // Invokes error_collector_->AddError(), if error_collector_ is not NULL.
197  void AddError(int line, int column, const string& error);
198 
199  // Invokes error_collector_->AddError() with the line and column number
200  // of the current token.
201  void AddError(const string& error);
202 
203  // Records a location in the SourceCodeInfo.location table (see
204  // descriptor.proto). We use RAII to ensure that the start and end locations
205  // are recorded -- the constructor records the start location and the
206  // destructor records the end location. Since the parser is
207  // recursive-descent, this works out beautifully.
209  public:
210  // Construct the file's "root" location.
211  LocationRecorder(Parser* parser);
212 
213  // Construct a location that represents a declaration nested within the
214  // given parent. E.g. a field's location is nested within the location
215  // for a message type. The parent's path will be copied, so you should
216  // call AddPath() only to add the path components leading from the parent
217  // to the child (as opposed to leading from the root to the child).
218  LocationRecorder(const LocationRecorder& parent);
219 
220  // Convenience constructors that call AddPath() one or two times.
221  LocationRecorder(const LocationRecorder& parent, int path1);
222  LocationRecorder(const LocationRecorder& parent, int path1, int path2);
223 
224  ~LocationRecorder();
225 
226  // Add a path component. See SourceCodeInfo.Location.path in
227  // descriptor.proto.
228  void AddPath(int path_component);
229 
230  // By default the location is considered to start at the current token at
231  // the time the LocationRecorder is created. StartAt() sets the start
232  // location to the given token instead.
233  void StartAt(const io::Tokenizer::Token& token);
234 
235  // Start at the same location as some other LocationRecorder.
236  void StartAt(const LocationRecorder& other);
237 
238  // By default the location is considered to end at the previous token at
239  // the time the LocationRecorder is destroyed. EndAt() sets the end
240  // location to the given token instead.
241  void EndAt(const io::Tokenizer::Token& token);
242 
243  // Records the start point of this location to the SourceLocationTable that
244  // was passed to RecordSourceLocationsTo(), if any. SourceLocationTable
245  // is an older way of keeping track of source locations which is still
246  // used in some places.
247  void RecordLegacyLocation(const Message* descriptor,
249 
250  // Attaches leading and trailing comments to the location. The two strings
251  // will be swapped into place, so after this is called *leading and
252  // *trailing will be empty.
253  //
254  // TODO(kenton): See comment on TryConsumeEndOfDeclaration(), above, for
255  // why this is const.
256  void AttachComments(string* leading, string* trailing) const;
257 
258  private:
261 
262  void Init(const LocationRecorder& parent);
263  };
264 
265  // =================================================================
266  // Parsers for various language constructs
267 
268  // Parses the "syntax = \"proto2\";" line at the top of the file. Returns
269  // false if it failed to parse or if the syntax identifier was not
270  // recognized.
271  bool ParseSyntaxIdentifier();
272 
273  // These methods parse various individual bits of code. They return
274  // false if they completely fail to parse the construct. In this case,
275  // it is probably necessary to skip the rest of the statement to recover.
276  // However, if these methods return true, it does NOT mean that there
277  // were no errors; only that there were no *syntax* errors. For instance,
278  // if a service method is defined using proper syntax but uses a primitive
279  // type as its input or output, ParseMethodField() still returns true
280  // and only reports the error by calling AddError(). In practice, this
281  // makes logic much simpler for the caller.
282 
283  // Parse a top-level message, enum, service, etc.
284  bool ParseTopLevelStatement(FileDescriptorProto* file,
285  const LocationRecorder& root_location);
286 
287  // Parse various language high-level language construrcts.
288  bool ParseMessageDefinition(DescriptorProto* message,
289  const LocationRecorder& message_location,
290  const FileDescriptorProto* containing_file);
291  bool ParseEnumDefinition(EnumDescriptorProto* enum_type,
292  const LocationRecorder& enum_location,
293  const FileDescriptorProto* containing_file);
294  bool ParseServiceDefinition(ServiceDescriptorProto* service,
295  const LocationRecorder& service_location,
296  const FileDescriptorProto* containing_file);
297  bool ParsePackage(FileDescriptorProto* file,
298  const LocationRecorder& root_location,
299  const FileDescriptorProto* containing_file);
300  bool ParseImport(RepeatedPtrField<string>* dependency,
301  RepeatedField<int32>* public_dependency,
302  RepeatedField<int32>* weak_dependency,
303  const LocationRecorder& root_location,
304  const FileDescriptorProto* containing_file);
305  bool ParseOption(Message* options,
306  const LocationRecorder& options_location,
307  const FileDescriptorProto* containing_file);
308 
309  // These methods parse the contents of a message, enum, or service type and
310  // add them to the given object. They consume the entire block including
311  // the beginning and ending brace.
312  bool ParseMessageBlock(DescriptorProto* message,
313  const LocationRecorder& message_location,
314  const FileDescriptorProto* containing_file);
315  bool ParseEnumBlock(EnumDescriptorProto* enum_type,
316  const LocationRecorder& enum_location,
317  const FileDescriptorProto* containing_file);
318  bool ParseServiceBlock(ServiceDescriptorProto* service,
319  const LocationRecorder& service_location,
320  const FileDescriptorProto* containing_file);
321 
322  // Parse one statement within a message, enum, or service block, inclunding
323  // final semicolon.
324  bool ParseMessageStatement(DescriptorProto* message,
325  const LocationRecorder& message_location,
326  const FileDescriptorProto* containing_file);
327  bool ParseEnumStatement(EnumDescriptorProto* message,
328  const LocationRecorder& enum_location,
329  const FileDescriptorProto* containing_file);
330  bool ParseServiceStatement(ServiceDescriptorProto* message,
331  const LocationRecorder& service_location,
332  const FileDescriptorProto* containing_file);
333 
334  // Parse a field of a message. If the field is a group, its type will be
335  // added to "messages".
336  //
337  // parent_location and location_field_number_for_nested_type are needed when
338  // parsing groups -- we need to generate a nested message type within the
339  // parent and record its location accordingly. Since the parent could be
340  // either a FileDescriptorProto or a DescriptorProto, we must pass in the
341  // correct field number to use.
342  bool ParseMessageField(FieldDescriptorProto* field,
344  const LocationRecorder& parent_location,
345  int location_field_number_for_nested_type,
346  const LocationRecorder& field_location,
347  const FileDescriptorProto* containing_file);
348 
349  // Like ParseMessageField() but expects the label has already been filled in
350  // by the caller.
351  bool ParseMessageFieldNoLabel(FieldDescriptorProto* field,
353  const LocationRecorder& parent_location,
354  int location_field_number_for_nested_type,
355  const LocationRecorder& field_location,
356  const FileDescriptorProto* containing_file);
357 
358  // Parse an "extensions" declaration.
359  bool ParseExtensions(DescriptorProto* message,
360  const LocationRecorder& extensions_location,
361  const FileDescriptorProto* containing_file);
362 
363  // Parse an "extend" declaration. (See also comments for
364  // ParseMessageField().)
365  bool ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
367  const LocationRecorder& parent_location,
368  int location_field_number_for_nested_type,
369  const LocationRecorder& extend_location,
370  const FileDescriptorProto* containing_file);
371 
372  // Parse a "oneof" declaration. The caller is responsible for setting
373  // oneof_decl->label() since it will have had to parse the label before it
374  // knew it was parsing a oneof.
375  bool ParseOneof(OneofDescriptorProto* oneof_decl,
376  DescriptorProto* containing_type,
377  int oneof_index,
378  const LocationRecorder& oneof_location,
379  const LocationRecorder& containing_type_location,
380  const FileDescriptorProto* containing_file);
381 
382  // Parse a single enum value within an enum block.
383  bool ParseEnumConstant(EnumValueDescriptorProto* enum_value,
384  const LocationRecorder& enum_value_location,
385  const FileDescriptorProto* containing_file);
386 
387  // Parse enum constant options, i.e. the list in square brackets at the end
388  // of the enum constant value definition.
389  bool ParseEnumConstantOptions(EnumValueDescriptorProto* value,
390  const LocationRecorder& enum_value_location,
391  const FileDescriptorProto* containing_file);
392 
393  // Parse a single method within a service definition.
394  bool ParseServiceMethod(MethodDescriptorProto* method,
395  const LocationRecorder& method_location,
396  const FileDescriptorProto* containing_file);
397 
398 
399  // Parse options of a single method or stream.
400  bool ParseOptions(const LocationRecorder& parent_location,
401  const FileDescriptorProto* containing_file,
402  const int optionsFieldNumber,
403  Message* mutable_options);
404 
405  // Parse "required", "optional", or "repeated" and fill in "label"
406  // with the value.
407  bool ParseLabel(FieldDescriptorProto::Label* label,
408  const FileDescriptorProto* containing_file);
409 
410  // Parse a type name and fill in "type" (if it is a primitive) or
411  // "type_name" (if it is not) with the type parsed.
412  bool ParseType(FieldDescriptorProto::Type* type,
413  string* type_name);
414  // Parse a user-defined type and fill in "type_name" with the name.
415  // If a primitive type is named, it is treated as an error.
416  bool ParseUserDefinedType(string* type_name);
417 
418  // Parses field options, i.e. the stuff in square brackets at the end
419  // of a field definition. Also parses default value.
420  bool ParseFieldOptions(FieldDescriptorProto* field,
421  const LocationRecorder& field_location,
422  const FileDescriptorProto* containing_file);
423 
424  // Parse the "default" option. This needs special handling because its
425  // type is the field's type.
426  bool ParseDefaultAssignment(FieldDescriptorProto* field,
427  const LocationRecorder& field_location,
428  const FileDescriptorProto* containing_file);
429 
430  enum OptionStyle {
431  OPTION_ASSIGNMENT, // just "name = value"
432  OPTION_STATEMENT // "option name = value;"
433  };
434 
435  // Parse a single option name/value pair, e.g. "ctype = CORD". The name
436  // identifies a field of the given Message, and the value of that field
437  // is set to the parsed value.
438  bool ParseOption(Message* options,
439  const LocationRecorder& options_location,
440  const FileDescriptorProto* containing_file,
441  OptionStyle style);
442 
443  // Parses a single part of a multipart option name. A multipart name consists
444  // of names separated by dots. Each name is either an identifier or a series
445  // of identifiers separated by dots and enclosed in parentheses. E.g.,
446  // "foo.(bar.baz).qux".
447  bool ParseOptionNamePart(UninterpretedOption* uninterpreted_option,
448  const LocationRecorder& part_location,
449  const FileDescriptorProto* containing_file);
450 
451  // Parses a string surrounded by balanced braces. Strips off the outer
452  // braces and stores the enclosed string in *value.
453  // E.g.,
454  // { foo } *value gets 'foo'
455  // { foo { bar: box } } *value gets 'foo { bar: box }'
456  // {} *value gets ''
457  //
458  // REQUIRES: LookingAt("{")
459  // When finished successfully, we are looking at the first token past
460  // the ending brace.
461  bool ParseUninterpretedBlock(string* value);
462 
463  // =================================================================
464 
473 
474  // Leading doc comments for the next declaration. These are not complete
475  // yet; use ConsumeEndOfDeclaration() to get the complete comments.
477 
479 };
480 
481 // A table mapping (descriptor, ErrorLocation) pairs -- as reported by
482 // DescriptorPool when validating descriptors -- to line and column numbers
483 // within the original source code.
484 //
485 // This is semi-obsolete: FileDescriptorProto.source_code_info now contains
486 // far more complete information about source locations. However, as of this
487 // writing you still need to use SourceLocationTable when integrating with
488 // DescriptorPool.
490  public:
493 
494  // Finds the precise location of the given error and fills in *line and
495  // *column with the line and column numbers. If not found, sets *line to
496  // -1 and *column to 0 (since line = -1 is used to mean "error has no exact
497  // location" in the ErrorCollector interface). Returns true if found, false
498  // otherwise.
499  bool Find(const Message* descriptor,
501  int* line, int* column) const;
502 
503  // Adds a location to the table.
504  void Add(const Message* descriptor,
506  int line, int column);
507 
508  // Clears the contents of the table.
509  void Clear();
510 
511  private:
512  typedef map<
513  pair<const Message*, DescriptorPool::ErrorCollector::ErrorLocation>,
514  pair<int, int> > LocationMap;
515  LocationMap location_map_;
516 };
517 
518 } // namespace compiler
519 } // namespace protobuf
520 
521 } // namespace google
522 #endif // GOOGLE_PROTOBUF_COMPILER_PARSER_H__
SourceLocationTable * source_location_table_
Definition: parser.h:468
Definition: tokenizer.h:57
Definition: descriptor.pb.h:2511
OptionStyle
Definition: parser.h:430
SourceCodeInfo * source_code_info_
Definition: parser.h:467
Definition: descriptor.pb.h:1052
FieldDescriptorProto_Type
Definition: descriptor.pb.h:60
void SetRequireSyntaxIdentifier(bool value)
Definition: parser.h:101
LocationMap location_map_
Definition: parser.h:515
Definition: descriptor.pb.h:2797
#define GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TypeName)
Definition: common.h:89
Definition: tokenizer.h:83
string upcoming_doc_comments_
Definition: parser.h:476
SourceCodeInfo::Location * location_
Definition: parser.h:260
Definition: message.h:165
#define output
Definition: wire_format_lite.h:381
Definition: descriptor.pb.h:1161
Definition: descriptor.pb.h:1267
bool stop_after_syntax_identifier_
Definition: parser.h:471
TokenType
Definition: tokenizer.h:91
bool had_errors_
Definition: parser.h:469
string syntax_identifier_
Definition: parser.h:472
const string & GetSyntaxIdentifier()
Definition: parser.h:95
void RecordSourceLocationsTo(SourceLocationTable *location_table)
Definition: parser.h:83
Definition: descriptor.pb.h:722
#define input
Definition: wire_format_lite.h:242
Definition: descriptor.pb.h:546
Definition: descriptor.pb.h:1376
uint64_t uint64
Definition: common.h:178
Definition: descriptor.pb.h:968
#define LIBPROTOBUF_EXPORT
Definition: common.h:105
io::ErrorCollector * error_collector_
Definition: parser.h:466
SPECIFIC_TYPE * Find(ContainerUnorderedMap< SPECIFIC_TYPE, KEY_TYPE > const &elements, KEY_TYPE const &handle, SPECIFIC_TYPE *)
Definition: TypeContainerFunctions.h:74
map< pair< const Message *, DescriptorPool::ErrorCollector::ErrorLocation >, pair< int, int > > LocationMap
Definition: parser.h:514
io::Tokenizer * input_
Definition: parser.h:465
ErrorLocation
Definition: descriptor.h:1257
Definition: BnetFileGenerator.h:47
const FieldDescriptor value
Definition: descriptor.h:1522
bool require_syntax_identifier_
Definition: parser.h:470
Definition: parser.h:65
Definition: descriptor.pb.h:239
Definition: tokenizer.h:117
FieldDescriptorProto_Label
Definition: descriptor.pb.h:95
void SetStopAfterSyntaxIdentifier(bool value)
Definition: parser.h:113
Definition: descriptor.pb.h:2670
void RecordErrorsTo(io::ErrorCollector *error_collector)
Definition: parser.h:89