clang API Documentation

ASTMatchers/Dynamic/Parser.h
Go to the documentation of this file.
00001 //===--- Parser.h - Matcher expression parser -----*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 ///
00010 /// \file
00011 /// \brief Simple matcher expression parser.
00012 ///
00013 /// The parser understands matcher expressions of the form:
00014 ///   MatcherName(Arg0, Arg1, ..., ArgN)
00015 /// as well as simple types like strings.
00016 /// The parser does not know how to process the matchers. It delegates this task
00017 /// to a Sema object received as an argument.
00018 ///
00019 /// \code
00020 /// Grammar for the expressions supported:
00021 /// <Expression>        := <Literal> | <NamedValue> | <MatcherExpression>
00022 /// <Literal>           := <StringLiteral> | <Unsigned>
00023 /// <StringLiteral>     := "quoted string"
00024 /// <Unsigned>          := [0-9]+
00025 /// <NamedValue>        := <Identifier>
00026 /// <MatcherExpression> := <Identifier>(<ArgumentList>) |
00027 ///                        <Identifier>(<ArgumentList>).bind(<StringLiteral>)
00028 /// <Identifier>        := [a-zA-Z]+
00029 /// <ArgumentList>      := <Expression> | <Expression>,<ArgumentList>
00030 /// \endcode
00031 ///
00032 //===----------------------------------------------------------------------===//
00033 
00034 #ifndef LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H
00035 #define LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H
00036 
00037 #include "clang/ASTMatchers/Dynamic/Diagnostics.h"
00038 #include "clang/ASTMatchers/Dynamic/Registry.h"
00039 #include "clang/ASTMatchers/Dynamic/VariantValue.h"
00040 #include "clang/Basic/LLVM.h"
00041 #include "llvm/ADT/ArrayRef.h"
00042 #include "llvm/ADT/Optional.h"
00043 #include "llvm/ADT/StringRef.h"
00044 
00045 namespace clang {
00046 namespace ast_matchers {
00047 namespace dynamic {
00048 
00049 /// \brief Matcher expression parser.
00050 class Parser {
00051 public:
00052   /// \brief Interface to connect the parser with the registry and more.
00053   ///
00054   /// The parser uses the Sema instance passed into
00055   /// parseMatcherExpression() to handle all matcher tokens. The simplest
00056   /// processor implementation would simply call into the registry to create
00057   /// the matchers.
00058   /// However, a more complex processor might decide to intercept the matcher
00059   /// creation and do some extra work. For example, it could apply some
00060   /// transformation to the matcher by adding some id() nodes, or could detect
00061   /// specific matcher nodes for more efficient lookup.
00062   class Sema {
00063   public:
00064     virtual ~Sema();
00065 
00066     /// \brief Process a matcher expression.
00067     ///
00068     /// All the arguments passed here have already been processed.
00069     ///
00070     /// \param Ctor A matcher constructor looked up by lookupMatcherCtor.
00071     ///
00072     /// \param NameRange The location of the name in the matcher source.
00073     ///   Useful for error reporting.
00074     ///
00075     /// \param BindID The ID to use to bind the matcher, or a null \c StringRef
00076     ///   if no ID is specified.
00077     ///
00078     /// \param Args The argument list for the matcher.
00079     ///
00080     /// \return The matcher objects constructed by the processor, or a null
00081     ///   matcher if an error occurred. In that case, \c Error will contain a
00082     ///   description of the error.
00083     virtual VariantMatcher actOnMatcherExpression(MatcherCtor Ctor,
00084                                                   const SourceRange &NameRange,
00085                                                   StringRef BindID,
00086                                                   ArrayRef<ParserValue> Args,
00087                                                   Diagnostics *Error) = 0;
00088 
00089     /// \brief Look up a matcher by name.
00090     ///
00091     /// \param MatcherName The matcher name found by the parser.
00092     ///
00093     /// \return The matcher constructor, or Optional<MatcherCtor>() if not
00094     /// found.
00095     virtual llvm::Optional<MatcherCtor>
00096     lookupMatcherCtor(StringRef MatcherName) = 0;
00097 
00098     /// \brief Compute the list of completion types for \p Context.
00099     ///
00100     /// Each element of \p Context represents a matcher invocation, going from
00101     /// outermost to innermost. Elements are pairs consisting of a reference to
00102     /// the matcher constructor and the index of the next element in the
00103     /// argument list of that matcher (or for the last element, the index of
00104     /// the completion point in the argument list). An empty list requests
00105     /// completion for the root matcher.
00106     virtual std::vector<ArgKind> getAcceptedCompletionTypes(
00107         llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context);
00108 
00109     /// \brief Compute the list of completions that match any of
00110     /// \p AcceptedTypes.
00111     ///
00112     /// \param AcceptedTypes All types accepted for this completion.
00113     ///
00114     /// \return All completions for the specified types.
00115     /// Completions should be valid when used in \c lookupMatcherCtor().
00116     /// The matcher constructed from the return of \c lookupMatcherCtor()
00117     /// should be convertible to some type in \p AcceptedTypes.
00118     virtual std::vector<MatcherCompletion>
00119     getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes);
00120   };
00121 
00122   /// \brief Sema implementation that uses the matcher registry to process the
00123   ///   tokens.
00124   class RegistrySema : public Parser::Sema {
00125    public:
00126     virtual ~RegistrySema();
00127 
00128     llvm::Optional<MatcherCtor>
00129     lookupMatcherCtor(StringRef MatcherName) override;
00130 
00131     VariantMatcher actOnMatcherExpression(MatcherCtor Ctor,
00132                                           const SourceRange &NameRange,
00133                                           StringRef BindID,
00134                                           ArrayRef<ParserValue> Args,
00135                                           Diagnostics *Error) override;
00136 
00137     std::vector<ArgKind> getAcceptedCompletionTypes(
00138         llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) override;
00139 
00140     std::vector<MatcherCompletion>
00141     getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) override;
00142   };
00143 
00144   typedef llvm::StringMap<VariantValue> NamedValueMap;
00145 
00146   /// \brief Parse a matcher expression.
00147   ///
00148   /// \param MatcherCode The matcher expression to parse.
00149   ///
00150   /// \param S The Sema instance that will help the parser
00151   ///   construct the matchers. If null, it uses the default registry.
00152   ///
00153   /// \param NamedValues A map of precomputed named values.  This provides
00154   ///   the dictionary for the <NamedValue> rule of the grammar.
00155   ///   If null, it is ignored.
00156   ///
00157   /// \return The matcher object constructed by the processor, or an empty
00158   ///   Optional if an error occurred. In that case, \c Error will contain a
00159   ///   description of the error.
00160   ///   The caller takes ownership of the DynTypedMatcher object returned.
00161   static llvm::Optional<DynTypedMatcher>
00162   parseMatcherExpression(StringRef MatcherCode, Sema *S,
00163                          const NamedValueMap *NamedValues,
00164                          Diagnostics *Error);
00165   static llvm::Optional<DynTypedMatcher>
00166   parseMatcherExpression(StringRef MatcherCode, Sema *S,
00167                          Diagnostics *Error) {
00168     return parseMatcherExpression(MatcherCode, S, nullptr, Error);
00169   }
00170   static llvm::Optional<DynTypedMatcher>
00171   parseMatcherExpression(StringRef MatcherCode, Diagnostics *Error) {
00172     return parseMatcherExpression(MatcherCode, nullptr, Error);
00173   }
00174 
00175   /// \brief Parse an expression.
00176   ///
00177   /// Parses any expression supported by this parser. In general, the
00178   /// \c parseMatcherExpression function is a better approach to get a matcher
00179   /// object.
00180   ///
00181   /// \param S The Sema instance that will help the parser
00182   ///   construct the matchers. If null, it uses the default registry.
00183   ///
00184   /// \param NamedValues A map of precomputed named values.  This provides
00185   ///   the dictionary for the <NamedValue> rule of the grammar.
00186   ///   If null, it is ignored.
00187   static bool parseExpression(StringRef Code, Sema *S,
00188                               const NamedValueMap *NamedValues,
00189                               VariantValue *Value, Diagnostics *Error);
00190   static bool parseExpression(StringRef Code, Sema *S,
00191                               VariantValue *Value, Diagnostics *Error) {
00192     return parseExpression(Code, S, nullptr, Value, Error);
00193   }
00194   static bool parseExpression(StringRef Code, VariantValue *Value,
00195                               Diagnostics *Error) {
00196     return parseExpression(Code, nullptr, Value, Error);
00197   }
00198 
00199   /// \brief Complete an expression at the given offset.
00200   ///
00201   /// \param S The Sema instance that will help the parser
00202   ///   construct the matchers. If null, it uses the default registry.
00203   ///
00204   /// \param NamedValues A map of precomputed named values.  This provides
00205   ///   the dictionary for the <NamedValue> rule of the grammar.
00206   ///   If null, it is ignored.
00207   ///
00208   /// \return The list of completions, which may be empty if there are no
00209   /// available completions or if an error occurred.
00210   static std::vector<MatcherCompletion>
00211   completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S,
00212                      const NamedValueMap *NamedValues);
00213   static std::vector<MatcherCompletion>
00214   completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S) {
00215     return completeExpression(Code, CompletionOffset, S, nullptr);
00216   }
00217   static std::vector<MatcherCompletion>
00218   completeExpression(StringRef Code, unsigned CompletionOffset) {
00219     return completeExpression(Code, CompletionOffset, nullptr);
00220   }
00221 
00222 private:
00223   class CodeTokenizer;
00224   struct ScopedContextEntry;
00225   struct TokenInfo;
00226 
00227   Parser(CodeTokenizer *Tokenizer, Sema *S,
00228          const NamedValueMap *NamedValues,
00229          Diagnostics *Error);
00230 
00231   bool parseExpressionImpl(VariantValue *Value);
00232   bool parseMatcherExpressionImpl(const TokenInfo &NameToken,
00233                                   VariantValue *Value);
00234   bool parseIdentifierPrefixImpl(VariantValue *Value);
00235 
00236   void addCompletion(const TokenInfo &CompToken,
00237                      const MatcherCompletion &Completion);
00238   void addExpressionCompletions();
00239 
00240   std::vector<MatcherCompletion>
00241   getNamedValueCompletions(ArrayRef<ArgKind> AcceptedTypes);
00242 
00243   CodeTokenizer *const Tokenizer;
00244   Sema *const S;
00245   const NamedValueMap *const NamedValues;
00246   Diagnostics *const Error;
00247 
00248   typedef std::vector<std::pair<MatcherCtor, unsigned> > ContextStackTy;
00249   ContextStackTy ContextStack;
00250   std::vector<MatcherCompletion> Completions;
00251 };
00252 
00253 }  // namespace dynamic
00254 }  // namespace ast_matchers
00255 }  // namespace clang
00256 
00257 #endif  // LLVM_CLANG_AST_MATCHERS_DYNAMIC_PARSER_H