LLVM API Documentation
00001 //===--- YAMLParser.h - Simple YAML parser --------------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This is a YAML 1.2 parser. 00011 // 00012 // See http://www.yaml.org/spec/1.2/spec.html for the full standard. 00013 // 00014 // This currently does not implement the following: 00015 // * Multi-line literal folding. 00016 // * Tag resolution. 00017 // * UTF-16. 00018 // * BOMs anywhere other than the first Unicode scalar value in the file. 00019 // 00020 // The most important class here is Stream. This represents a YAML stream with 00021 // 0, 1, or many documents. 00022 // 00023 // SourceMgr sm; 00024 // StringRef input = getInput(); 00025 // yaml::Stream stream(input, sm); 00026 // 00027 // for (yaml::document_iterator di = stream.begin(), de = stream.end(); 00028 // di != de; ++di) { 00029 // yaml::Node *n = di->getRoot(); 00030 // if (n) { 00031 // // Do something with n... 00032 // } else 00033 // break; 00034 // } 00035 // 00036 //===----------------------------------------------------------------------===// 00037 00038 #ifndef LLVM_SUPPORT_YAMLPARSER_H 00039 #define LLVM_SUPPORT_YAMLPARSER_H 00040 00041 #include "llvm/ADT/SmallString.h" 00042 #include "llvm/ADT/StringRef.h" 00043 #include "llvm/Support/Allocator.h" 00044 #include "llvm/Support/MemoryBuffer.h" 00045 #include "llvm/Support/SMLoc.h" 00046 #include <limits> 00047 #include <map> 00048 #include <utility> 00049 00050 namespace llvm { 00051 class SourceMgr; 00052 class raw_ostream; 00053 class Twine; 00054 00055 namespace yaml { 00056 00057 class document_iterator; 00058 class Document; 00059 class Node; 00060 class Scanner; 00061 struct Token; 00062 00063 /// \brief Dump all the tokens in this stream to OS. 00064 /// \returns true if there was an error, false otherwise. 00065 bool dumpTokens(StringRef Input, raw_ostream &); 00066 00067 /// \brief Scans all tokens in input without outputting anything. This is used 00068 /// for benchmarking the tokenizer. 00069 /// \returns true if there was an error, false otherwise. 00070 bool scanTokens(StringRef Input); 00071 00072 /// \brief Escape \a Input for a double quoted scalar. 00073 std::string escape(StringRef Input); 00074 00075 /// \brief This class represents a YAML stream potentially containing multiple 00076 /// documents. 00077 class Stream { 00078 public: 00079 /// \brief This keeps a reference to the string referenced by \p Input. 00080 Stream(StringRef Input, SourceMgr &); 00081 00082 Stream(MemoryBufferRef InputBuffer, SourceMgr &); 00083 ~Stream(); 00084 00085 document_iterator begin(); 00086 document_iterator end(); 00087 void skip(); 00088 bool failed(); 00089 bool validate() { 00090 skip(); 00091 return !failed(); 00092 } 00093 00094 void printError(Node *N, const Twine &Msg); 00095 00096 private: 00097 std::unique_ptr<Scanner> scanner; 00098 std::unique_ptr<Document> CurrentDoc; 00099 00100 friend class Document; 00101 }; 00102 00103 /// \brief Abstract base class for all Nodes. 00104 class Node { 00105 virtual void anchor(); 00106 00107 public: 00108 enum NodeKind { 00109 NK_Null, 00110 NK_Scalar, 00111 NK_KeyValue, 00112 NK_Mapping, 00113 NK_Sequence, 00114 NK_Alias 00115 }; 00116 00117 Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor, 00118 StringRef Tag); 00119 00120 /// \brief Get the value of the anchor attached to this node. If it does not 00121 /// have one, getAnchor().size() will be 0. 00122 StringRef getAnchor() const { return Anchor; } 00123 00124 /// \brief Get the tag as it was written in the document. This does not 00125 /// perform tag resolution. 00126 StringRef getRawTag() const { return Tag; } 00127 00128 /// \brief Get the verbatium tag for a given Node. This performs tag resoluton 00129 /// and substitution. 00130 std::string getVerbatimTag() const; 00131 00132 SMRange getSourceRange() const { return SourceRange; } 00133 void setSourceRange(SMRange SR) { SourceRange = SR; } 00134 00135 // These functions forward to Document and Scanner. 00136 Token &peekNext(); 00137 Token getNext(); 00138 Node *parseBlockNode(); 00139 BumpPtrAllocator &getAllocator(); 00140 void setError(const Twine &Message, Token &Location) const; 00141 bool failed() const; 00142 00143 virtual void skip() {} 00144 00145 unsigned int getType() const { return TypeID; } 00146 00147 void *operator new(size_t Size, BumpPtrAllocator &Alloc, 00148 size_t Alignment = 16) throw() { 00149 return Alloc.Allocate(Size, Alignment); 00150 } 00151 00152 void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t Size) throw() { 00153 Alloc.Deallocate(Ptr, Size); 00154 } 00155 00156 protected: 00157 std::unique_ptr<Document> &Doc; 00158 SMRange SourceRange; 00159 00160 void operator delete(void *) throw() {} 00161 00162 virtual ~Node() {} 00163 00164 private: 00165 unsigned int TypeID; 00166 StringRef Anchor; 00167 /// \brief The tag as typed in the document. 00168 StringRef Tag; 00169 }; 00170 00171 /// \brief A null value. 00172 /// 00173 /// Example: 00174 /// !!null null 00175 class NullNode : public Node { 00176 void anchor() override; 00177 00178 public: 00179 NullNode(std::unique_ptr<Document> &D) 00180 : Node(NK_Null, D, StringRef(), StringRef()) {} 00181 00182 static inline bool classof(const Node *N) { return N->getType() == NK_Null; } 00183 }; 00184 00185 /// \brief A scalar node is an opaque datum that can be presented as a 00186 /// series of zero or more Unicode scalar values. 00187 /// 00188 /// Example: 00189 /// Adena 00190 class ScalarNode : public Node { 00191 void anchor() override; 00192 00193 public: 00194 ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 00195 StringRef Val) 00196 : Node(NK_Scalar, D, Anchor, Tag), Value(Val) { 00197 SMLoc Start = SMLoc::getFromPointer(Val.begin()); 00198 SMLoc End = SMLoc::getFromPointer(Val.end()); 00199 SourceRange = SMRange(Start, End); 00200 } 00201 00202 // Return Value without any escaping or folding or other fun YAML stuff. This 00203 // is the exact bytes that are contained in the file (after conversion to 00204 // utf8). 00205 StringRef getRawValue() const { return Value; } 00206 00207 /// \brief Gets the value of this node as a StringRef. 00208 /// 00209 /// \param Storage is used to store the content of the returned StringRef iff 00210 /// it requires any modification from how it appeared in the source. 00211 /// This happens with escaped characters and multi-line literals. 00212 StringRef getValue(SmallVectorImpl<char> &Storage) const; 00213 00214 static inline bool classof(const Node *N) { 00215 return N->getType() == NK_Scalar; 00216 } 00217 00218 private: 00219 StringRef Value; 00220 00221 StringRef unescapeDoubleQuoted(StringRef UnquotedValue, 00222 StringRef::size_type Start, 00223 SmallVectorImpl<char> &Storage) const; 00224 }; 00225 00226 /// \brief A key and value pair. While not technically a Node under the YAML 00227 /// representation graph, it is easier to treat them this way. 00228 /// 00229 /// TODO: Consider making this not a child of Node. 00230 /// 00231 /// Example: 00232 /// Section: .text 00233 class KeyValueNode : public Node { 00234 void anchor() override; 00235 00236 public: 00237 KeyValueNode(std::unique_ptr<Document> &D) 00238 : Node(NK_KeyValue, D, StringRef(), StringRef()), Key(nullptr), 00239 Value(nullptr) {} 00240 00241 /// \brief Parse and return the key. 00242 /// 00243 /// This may be called multiple times. 00244 /// 00245 /// \returns The key, or nullptr if failed() == true. 00246 Node *getKey(); 00247 00248 /// \brief Parse and return the value. 00249 /// 00250 /// This may be called multiple times. 00251 /// 00252 /// \returns The value, or nullptr if failed() == true. 00253 Node *getValue(); 00254 00255 void skip() override { 00256 getKey()->skip(); 00257 getValue()->skip(); 00258 } 00259 00260 static inline bool classof(const Node *N) { 00261 return N->getType() == NK_KeyValue; 00262 } 00263 00264 private: 00265 Node *Key; 00266 Node *Value; 00267 }; 00268 00269 /// \brief This is an iterator abstraction over YAML collections shared by both 00270 /// sequences and maps. 00271 /// 00272 /// BaseT must have a ValueT* member named CurrentEntry and a member function 00273 /// increment() which must set CurrentEntry to 0 to create an end iterator. 00274 template <class BaseT, class ValueT> 00275 class basic_collection_iterator 00276 : public std::iterator<std::forward_iterator_tag, ValueT> { 00277 public: 00278 basic_collection_iterator() : Base(nullptr) {} 00279 basic_collection_iterator(BaseT *B) : Base(B) {} 00280 00281 ValueT *operator->() const { 00282 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 00283 return Base->CurrentEntry; 00284 } 00285 00286 ValueT &operator*() const { 00287 assert(Base && Base->CurrentEntry && 00288 "Attempted to dereference end iterator!"); 00289 return *Base->CurrentEntry; 00290 } 00291 00292 operator ValueT *() const { 00293 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 00294 return Base->CurrentEntry; 00295 } 00296 00297 bool operator!=(const basic_collection_iterator &Other) const { 00298 if (Base != Other.Base) 00299 return true; 00300 return (Base && Other.Base) && 00301 Base->CurrentEntry != Other.Base->CurrentEntry; 00302 } 00303 00304 basic_collection_iterator &operator++() { 00305 assert(Base && "Attempted to advance iterator past end!"); 00306 Base->increment(); 00307 // Create an end iterator. 00308 if (!Base->CurrentEntry) 00309 Base = nullptr; 00310 return *this; 00311 } 00312 00313 private: 00314 BaseT *Base; 00315 }; 00316 00317 // The following two templates are used for both MappingNode and Sequence Node. 00318 template <class CollectionType> 00319 typename CollectionType::iterator begin(CollectionType &C) { 00320 assert(C.IsAtBeginning && "You may only iterate over a collection once!"); 00321 C.IsAtBeginning = false; 00322 typename CollectionType::iterator ret(&C); 00323 ++ret; 00324 return ret; 00325 } 00326 00327 template <class CollectionType> void skip(CollectionType &C) { 00328 // TODO: support skipping from the middle of a parsed collection ;/ 00329 assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!"); 00330 if (C.IsAtBeginning) 00331 for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e; 00332 ++i) 00333 i->skip(); 00334 } 00335 00336 /// \brief Represents a YAML map created from either a block map for a flow map. 00337 /// 00338 /// This parses the YAML stream as increment() is called. 00339 /// 00340 /// Example: 00341 /// Name: _main 00342 /// Scope: Global 00343 class MappingNode : public Node { 00344 void anchor() override; 00345 00346 public: 00347 enum MappingType { 00348 MT_Block, 00349 MT_Flow, 00350 MT_Inline ///< An inline mapping node is used for "[key: value]". 00351 }; 00352 00353 MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 00354 MappingType MT) 00355 : Node(NK_Mapping, D, Anchor, Tag), Type(MT), IsAtBeginning(true), 00356 IsAtEnd(false), CurrentEntry(nullptr) {} 00357 00358 friend class basic_collection_iterator<MappingNode, KeyValueNode>; 00359 typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator; 00360 template <class T> friend typename T::iterator yaml::begin(T &); 00361 template <class T> friend void yaml::skip(T &); 00362 00363 iterator begin() { return yaml::begin(*this); } 00364 00365 iterator end() { return iterator(); } 00366 00367 void skip() override { yaml::skip(*this); } 00368 00369 static inline bool classof(const Node *N) { 00370 return N->getType() == NK_Mapping; 00371 } 00372 00373 private: 00374 MappingType Type; 00375 bool IsAtBeginning; 00376 bool IsAtEnd; 00377 KeyValueNode *CurrentEntry; 00378 00379 void increment(); 00380 }; 00381 00382 /// \brief Represents a YAML sequence created from either a block sequence for a 00383 /// flow sequence. 00384 /// 00385 /// This parses the YAML stream as increment() is called. 00386 /// 00387 /// Example: 00388 /// - Hello 00389 /// - World 00390 class SequenceNode : public Node { 00391 void anchor() override; 00392 00393 public: 00394 enum SequenceType { 00395 ST_Block, 00396 ST_Flow, 00397 // Use for: 00398 // 00399 // key: 00400 // - val1 00401 // - val2 00402 // 00403 // As a BlockMappingEntry and BlockEnd are not created in this case. 00404 ST_Indentless 00405 }; 00406 00407 SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 00408 SequenceType ST) 00409 : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST), IsAtBeginning(true), 00410 IsAtEnd(false), 00411 WasPreviousTokenFlowEntry(true), // Start with an imaginary ','. 00412 CurrentEntry(nullptr) {} 00413 00414 friend class basic_collection_iterator<SequenceNode, Node>; 00415 typedef basic_collection_iterator<SequenceNode, Node> iterator; 00416 template <class T> friend typename T::iterator yaml::begin(T &); 00417 template <class T> friend void yaml::skip(T &); 00418 00419 void increment(); 00420 00421 iterator begin() { return yaml::begin(*this); } 00422 00423 iterator end() { return iterator(); } 00424 00425 void skip() override { yaml::skip(*this); } 00426 00427 static inline bool classof(const Node *N) { 00428 return N->getType() == NK_Sequence; 00429 } 00430 00431 private: 00432 SequenceType SeqType; 00433 bool IsAtBeginning; 00434 bool IsAtEnd; 00435 bool WasPreviousTokenFlowEntry; 00436 Node *CurrentEntry; 00437 }; 00438 00439 /// \brief Represents an alias to a Node with an anchor. 00440 /// 00441 /// Example: 00442 /// *AnchorName 00443 class AliasNode : public Node { 00444 void anchor() override; 00445 00446 public: 00447 AliasNode(std::unique_ptr<Document> &D, StringRef Val) 00448 : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {} 00449 00450 StringRef getName() const { return Name; } 00451 Node *getTarget(); 00452 00453 static inline bool classof(const Node *N) { return N->getType() == NK_Alias; } 00454 00455 private: 00456 StringRef Name; 00457 }; 00458 00459 /// \brief A YAML Stream is a sequence of Documents. A document contains a root 00460 /// node. 00461 class Document { 00462 public: 00463 /// \brief Root for parsing a node. Returns a single node. 00464 Node *parseBlockNode(); 00465 00466 Document(Stream &ParentStream); 00467 00468 /// \brief Finish parsing the current document and return true if there are 00469 /// more. Return false otherwise. 00470 bool skip(); 00471 00472 /// \brief Parse and return the root level node. 00473 Node *getRoot() { 00474 if (Root) 00475 return Root; 00476 return Root = parseBlockNode(); 00477 } 00478 00479 const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; } 00480 00481 private: 00482 friend class Node; 00483 friend class document_iterator; 00484 00485 /// \brief Stream to read tokens from. 00486 Stream &stream; 00487 00488 /// \brief Used to allocate nodes to. All are destroyed without calling their 00489 /// destructor when the document is destroyed. 00490 BumpPtrAllocator NodeAllocator; 00491 00492 /// \brief The root node. Used to support skipping a partially parsed 00493 /// document. 00494 Node *Root; 00495 00496 /// \brief Maps tag prefixes to their expansion. 00497 std::map<StringRef, StringRef> TagMap; 00498 00499 Token &peekNext(); 00500 Token getNext(); 00501 void setError(const Twine &Message, Token &Location) const; 00502 bool failed() const; 00503 00504 /// \brief Parse %BLAH directives and return true if any were encountered. 00505 bool parseDirectives(); 00506 00507 /// \brief Parse %YAML 00508 void parseYAMLDirective(); 00509 00510 /// \brief Parse %TAG 00511 void parseTAGDirective(); 00512 00513 /// \brief Consume the next token and error if it is not \a TK. 00514 bool expectToken(int TK); 00515 }; 00516 00517 /// \brief Iterator abstraction for Documents over a Stream. 00518 class document_iterator { 00519 public: 00520 document_iterator() : Doc(nullptr) {} 00521 document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {} 00522 00523 bool operator==(const document_iterator &Other) { 00524 if (isAtEnd() || Other.isAtEnd()) 00525 return isAtEnd() && Other.isAtEnd(); 00526 00527 return Doc == Other.Doc; 00528 } 00529 bool operator!=(const document_iterator &Other) { return !(*this == Other); } 00530 00531 document_iterator operator++() { 00532 assert(Doc && "incrementing iterator past the end."); 00533 if (!(*Doc)->skip()) { 00534 Doc->reset(nullptr); 00535 } else { 00536 Stream &S = (*Doc)->stream; 00537 Doc->reset(new Document(S)); 00538 } 00539 return *this; 00540 } 00541 00542 Document &operator*() { return *Doc->get(); } 00543 00544 std::unique_ptr<Document> &operator->() { return *Doc; } 00545 00546 private: 00547 bool isAtEnd() const { return !Doc || !*Doc; } 00548 00549 std::unique_ptr<Document> *Doc; 00550 }; 00551 00552 } // End namespace yaml. 00553 00554 } // End namespace llvm. 00555 00556 #endif