LLVM API Documentation

LLParser.h
Go to the documentation of this file.
00001 //===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 //  This file defines the parser class for .ll files.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #ifndef LLVM_LIB_ASMPARSER_LLPARSER_H
00015 #define LLVM_LIB_ASMPARSER_LLPARSER_H
00016 
00017 #include "LLLexer.h"
00018 #include "llvm/ADT/DenseMap.h"
00019 #include "llvm/ADT/StringMap.h"
00020 #include "llvm/IR/Attributes.h"
00021 #include "llvm/IR/Instructions.h"
00022 #include "llvm/IR/Module.h"
00023 #include "llvm/IR/Operator.h"
00024 #include "llvm/IR/Type.h"
00025 #include "llvm/IR/ValueHandle.h"
00026 #include <map>
00027 
00028 namespace llvm {
00029   class Module;
00030   class OpaqueType;
00031   class Function;
00032   class Value;
00033   class BasicBlock;
00034   class Instruction;
00035   class Constant;
00036   class GlobalValue;
00037   class Comdat;
00038   class MDString;
00039   class MDNode;
00040   class StructType;
00041 
00042   /// ValID - Represents a reference of a definition of some sort with no type.
00043   /// There are several cases where we have to parse the value but where the
00044   /// type can depend on later context.  This may either be a numeric reference
00045   /// or a symbolic (%var) reference.  This is just a discriminated union.
00046   struct ValID {
00047     enum {
00048       t_LocalID, t_GlobalID,      // ID in UIntVal.
00049       t_LocalName, t_GlobalName,  // Name in StrVal.
00050       t_APSInt, t_APFloat,        // Value in APSIntVal/APFloatVal.
00051       t_Null, t_Undef, t_Zero,    // No value.
00052       t_EmptyArray,               // No value:  []
00053       t_Constant,                 // Value in ConstantVal.
00054       t_InlineAsm,                // Value in StrVal/StrVal2/UIntVal.
00055       t_MDNode,                   // Value in MDNodeVal.
00056       t_MDString,                 // Value in MDStringVal.
00057       t_ConstantStruct,           // Value in ConstantStructElts.
00058       t_PackedConstantStruct      // Value in ConstantStructElts.
00059     } Kind;
00060 
00061     LLLexer::LocTy Loc;
00062     unsigned UIntVal;
00063     std::string StrVal, StrVal2;
00064     APSInt APSIntVal;
00065     APFloat APFloatVal;
00066     Constant *ConstantVal;
00067     MDNode *MDNodeVal;
00068     MDString *MDStringVal;
00069     Constant **ConstantStructElts;
00070 
00071     ValID() : Kind(t_LocalID), APFloatVal(0.0) {}
00072     ~ValID() {
00073       if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct)
00074         delete [] ConstantStructElts;
00075     }
00076 
00077     bool operator<(const ValID &RHS) const {
00078       if (Kind == t_LocalID || Kind == t_GlobalID)
00079         return UIntVal < RHS.UIntVal;
00080       assert((Kind == t_LocalName || Kind == t_GlobalName ||
00081               Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) &&
00082              "Ordering not defined for this ValID kind yet");
00083       return StrVal < RHS.StrVal;
00084     }
00085   };
00086 
00087   class LLParser {
00088   public:
00089     typedef LLLexer::LocTy LocTy;
00090   private:
00091     LLVMContext &Context;
00092     LLLexer Lex;
00093     Module *M;
00094 
00095     // Instruction metadata resolution.  Each instruction can have a list of
00096     // MDRef info associated with them.
00097     //
00098     // The simpler approach of just creating temporary MDNodes and then calling
00099     // RAUW on them when the definition is processed doesn't work because some
00100     // instruction metadata kinds, such as dbg, get stored in the IR in an
00101     // "optimized" format which doesn't participate in the normal value use
00102     // lists. This means that RAUW doesn't work, even on temporary MDNodes
00103     // which otherwise support RAUW. Instead, we defer resolving MDNode
00104     // references until the definitions have been processed.
00105     struct MDRef {
00106       SMLoc Loc;
00107       unsigned MDKind, MDSlot;
00108     };
00109     DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata;
00110 
00111     SmallVector<Instruction*, 64> InstsWithTBAATag;
00112 
00113     // Type resolution handling data structures.  The location is set when we
00114     // have processed a use of the type but not a definition yet.
00115     StringMap<std::pair<Type*, LocTy> > NamedTypes;
00116     std::vector<std::pair<Type*, LocTy> > NumberedTypes;
00117 
00118     std::vector<TrackingVH<MDNode> > NumberedMetadata;
00119     std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes;
00120 
00121     // Global Value reference information.
00122     std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
00123     std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
00124     std::vector<GlobalValue*> NumberedVals;
00125 
00126     // Comdat forward reference information.
00127     std::map<std::string, LocTy> ForwardRefComdats;
00128 
00129     // References to blockaddress.  The key is the function ValID, the value is
00130     // a list of references to blocks in that function.
00131     std::map<ValID, std::map<ValID, GlobalValue *>> ForwardRefBlockAddresses;
00132     class PerFunctionState;
00133     /// Reference to per-function state to allow basic blocks to be
00134     /// forward-referenced by blockaddress instructions within the same
00135     /// function.
00136     PerFunctionState *BlockAddressPFS;
00137 
00138     // Attribute builder reference information.
00139     std::map<Value*, std::vector<unsigned> > ForwardRefAttrGroups;
00140     std::map<unsigned, AttrBuilder> NumberedAttrBuilders;
00141 
00142   public:
00143     LLParser(StringRef F, SourceMgr &SM, SMDiagnostic &Err, Module *m)
00144         : Context(m->getContext()), Lex(F, SM, Err, m->getContext()), M(m),
00145           BlockAddressPFS(nullptr) {}
00146     bool Run();
00147 
00148     LLVMContext &getContext() { return Context; }
00149 
00150   private:
00151 
00152     bool Error(LocTy L, const Twine &Msg) const {
00153       return Lex.Error(L, Msg);
00154     }
00155     bool TokError(const Twine &Msg) const {
00156       return Error(Lex.getLoc(), Msg);
00157     }
00158 
00159     /// GetGlobalVal - Get a value with the specified name or ID, creating a
00160     /// forward reference record if needed.  This can return null if the value
00161     /// exists but does not have the right type.
00162     GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc);
00163     GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc);
00164 
00165     /// Get a Comdat with the specified name, creating a forward reference
00166     /// record if needed.
00167     Comdat *getComdat(const std::string &N, LocTy Loc);
00168 
00169     // Helper Routines.
00170     bool ParseToken(lltok::Kind T, const char *ErrMsg);
00171     bool EatIfPresent(lltok::Kind T) {
00172       if (Lex.getKind() != T) return false;
00173       Lex.Lex();
00174       return true;
00175     }
00176 
00177     FastMathFlags EatFastMathFlagsIfPresent() {
00178       FastMathFlags FMF;
00179       while (true)
00180         switch (Lex.getKind()) {
00181         case lltok::kw_fast: FMF.setUnsafeAlgebra();   Lex.Lex(); continue;
00182         case lltok::kw_nnan: FMF.setNoNaNs();          Lex.Lex(); continue;
00183         case lltok::kw_ninf: FMF.setNoInfs();          Lex.Lex(); continue;
00184         case lltok::kw_nsz:  FMF.setNoSignedZeros();   Lex.Lex(); continue;
00185         case lltok::kw_arcp: FMF.setAllowReciprocal(); Lex.Lex(); continue;
00186         default: return FMF;
00187         }
00188       return FMF;
00189     }
00190 
00191     bool ParseOptionalToken(lltok::Kind T, bool &Present,
00192                             LocTy *Loc = nullptr) {
00193       if (Lex.getKind() != T) {
00194         Present = false;
00195       } else {
00196         if (Loc)
00197           *Loc = Lex.getLoc();
00198         Lex.Lex();
00199         Present = true;
00200       }
00201       return false;
00202     }
00203     bool ParseStringConstant(std::string &Result);
00204     bool ParseUInt32(unsigned &Val);
00205     bool ParseUInt32(unsigned &Val, LocTy &Loc) {
00206       Loc = Lex.getLoc();
00207       return ParseUInt32(Val);
00208     }
00209     bool ParseUInt64(uint64_t &Val);
00210     bool ParseUInt64(uint64_t &Val, LocTy &Loc) {
00211       Loc = Lex.getLoc();
00212       return ParseUInt64(Val);
00213     }
00214 
00215     bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
00216     bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
00217     bool parseOptionalUnnamedAddr(bool &UnnamedAddr) {
00218       return ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr);
00219     }
00220     bool ParseOptionalAddrSpace(unsigned &AddrSpace);
00221     bool ParseOptionalParamAttrs(AttrBuilder &B);
00222     bool ParseOptionalReturnAttrs(AttrBuilder &B);
00223     bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
00224     bool ParseOptionalLinkage(unsigned &Linkage) {
00225       bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
00226     }
00227     bool ParseOptionalVisibility(unsigned &Visibility);
00228     bool ParseOptionalDLLStorageClass(unsigned &DLLStorageClass);
00229     bool ParseOptionalCallingConv(unsigned &CC);
00230     bool ParseOptionalAlignment(unsigned &Alignment);
00231     bool ParseOptionalDereferenceableBytes(uint64_t &Bytes);
00232     bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
00233                                AtomicOrdering &Ordering);
00234     bool ParseOrdering(AtomicOrdering &Ordering);
00235     bool ParseOptionalStackAlignment(unsigned &Alignment);
00236     bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
00237     bool ParseOptionalCommaInAlloca(bool &IsInAlloca);
00238     bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
00239     bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
00240       bool AteExtraComma;
00241       if (ParseIndexList(Indices, AteExtraComma)) return true;
00242       if (AteExtraComma)
00243         return TokError("expected index");
00244       return false;
00245     }
00246 
00247     // Top-Level Entities
00248     bool ParseTopLevelEntities();
00249     bool ValidateEndOfModule();
00250     bool ParseTargetDefinition();
00251     bool ParseModuleAsm();
00252     bool ParseDepLibs();        // FIXME: Remove in 4.0.
00253     bool ParseUnnamedType();
00254     bool ParseNamedType();
00255     bool ParseDeclare();
00256     bool ParseDefine();
00257 
00258     bool ParseGlobalType(bool &IsConstant);
00259     bool ParseUnnamedGlobal();
00260     bool ParseNamedGlobal();
00261     bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
00262                      bool HasLinkage, unsigned Visibility,
00263                      unsigned DLLStorageClass,
00264                      GlobalVariable::ThreadLocalMode TLM, bool UnnamedAddr);
00265     bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Linkage,
00266                     unsigned Visibility, unsigned DLLStorageClass,
00267                     GlobalVariable::ThreadLocalMode TLM, bool UnnamedAddr);
00268     bool parseComdat();
00269     bool ParseStandaloneMetadata();
00270     bool ParseNamedMetadata();
00271     bool ParseMDString(MDString *&Result);
00272     bool ParseMDNodeID(MDNode *&Result);
00273     bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo);
00274     bool ParseUnnamedAttrGrp();
00275     bool ParseFnAttributeValuePairs(AttrBuilder &B,
00276                                     std::vector<unsigned> &FwdRefAttrGrps,
00277                                     bool inAttrGrp, LocTy &BuiltinLoc);
00278 
00279     // Type Parsing.
00280     bool ParseType(Type *&Result, bool AllowVoid = false);
00281     bool ParseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) {
00282       Loc = Lex.getLoc();
00283       return ParseType(Result, AllowVoid);
00284     }
00285     bool ParseAnonStructType(Type *&Result, bool Packed);
00286     bool ParseStructBody(SmallVectorImpl<Type*> &Body);
00287     bool ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
00288                                std::pair<Type*, LocTy> &Entry,
00289                                Type *&ResultTy);
00290 
00291     bool ParseArrayVectorType(Type *&Result, bool isVector);
00292     bool ParseFunctionType(Type *&Result);
00293 
00294     // Function Semantic Analysis.
00295     class PerFunctionState {
00296       LLParser &P;
00297       Function &F;
00298       std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
00299       std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
00300       std::vector<Value*> NumberedVals;
00301 
00302       /// FunctionNumber - If this is an unnamed function, this is the slot
00303       /// number of it, otherwise it is -1.
00304       int FunctionNumber;
00305     public:
00306       PerFunctionState(LLParser &p, Function &f, int FunctionNumber);
00307       ~PerFunctionState();
00308 
00309       Function &getFunction() const { return F; }
00310 
00311       bool FinishFunction();
00312 
00313       /// GetVal - Get a value with the specified name or ID, creating a
00314       /// forward reference record if needed.  This can return null if the value
00315       /// exists but does not have the right type.
00316       Value *GetVal(const std::string &Name, Type *Ty, LocTy Loc);
00317       Value *GetVal(unsigned ID, Type *Ty, LocTy Loc);
00318 
00319       /// SetInstName - After an instruction is parsed and inserted into its
00320       /// basic block, this installs its name.
00321       bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
00322                        Instruction *Inst);
00323 
00324       /// GetBB - Get a basic block with the specified name or ID, creating a
00325       /// forward reference record if needed.  This can return null if the value
00326       /// is not a BasicBlock.
00327       BasicBlock *GetBB(const std::string &Name, LocTy Loc);
00328       BasicBlock *GetBB(unsigned ID, LocTy Loc);
00329 
00330       /// DefineBB - Define the specified basic block, which is either named or
00331       /// unnamed.  If there is an error, this returns null otherwise it returns
00332       /// the block being defined.
00333       BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
00334 
00335       bool resolveForwardRefBlockAddresses();
00336     };
00337 
00338     bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
00339                              PerFunctionState *PFS);
00340 
00341     bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
00342     bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) {
00343       return ParseValue(Ty, V, &PFS);
00344     }
00345     bool ParseValue(Type *Ty, Value *&V, LocTy &Loc,
00346                     PerFunctionState &PFS) {
00347       Loc = Lex.getLoc();
00348       return ParseValue(Ty, V, &PFS);
00349     }
00350 
00351     bool ParseTypeAndValue(Value *&V, PerFunctionState *PFS);
00352     bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
00353       return ParseTypeAndValue(V, &PFS);
00354     }
00355     bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
00356       Loc = Lex.getLoc();
00357       return ParseTypeAndValue(V, PFS);
00358     }
00359     bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
00360                                 PerFunctionState &PFS);
00361     bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
00362       LocTy Loc;
00363       return ParseTypeAndBasicBlock(BB, Loc, PFS);
00364     }
00365 
00366 
00367     struct ParamInfo {
00368       LocTy Loc;
00369       Value *V;
00370       AttributeSet Attrs;
00371       ParamInfo(LocTy loc, Value *v, AttributeSet attrs)
00372         : Loc(loc), V(v), Attrs(attrs) {}
00373     };
00374     bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
00375                             PerFunctionState &PFS,
00376                             bool IsMustTailCall = false,
00377                             bool InVarArgsFunc = false);
00378 
00379     // Constant Parsing.
00380     bool ParseValID(ValID &ID, PerFunctionState *PFS = nullptr);
00381     bool ParseGlobalValue(Type *Ty, Constant *&V);
00382     bool ParseGlobalTypeAndValue(Constant *&V);
00383     bool ParseGlobalValueVector(SmallVectorImpl<Constant *> &Elts);
00384     bool parseOptionalComdat(Comdat *&C);
00385     bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS);
00386     bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
00387     bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
00388     bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS);
00389 
00390     // Function Parsing.
00391     struct ArgInfo {
00392       LocTy Loc;
00393       Type *Ty;
00394       AttributeSet Attrs;
00395       std::string Name;
00396       ArgInfo(LocTy L, Type *ty, AttributeSet Attr, const std::string &N)
00397         : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
00398     };
00399     bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
00400     bool ParseFunctionHeader(Function *&Fn, bool isDefine);
00401     bool ParseFunctionBody(Function &Fn);
00402     bool ParseBasicBlock(PerFunctionState &PFS);
00403 
00404     enum TailCallType { TCT_None, TCT_Tail, TCT_MustTail };
00405 
00406     // Instruction Parsing.  Each instruction parsing routine can return with a
00407     // normal result, an error result, or return having eaten an extra comma.
00408     enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 };
00409     int ParseInstruction(Instruction *&Inst, BasicBlock *BB,
00410                          PerFunctionState &PFS);
00411     bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
00412 
00413     bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
00414     bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
00415     bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
00416     bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
00417     bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
00418     bool ParseResume(Instruction *&Inst, PerFunctionState &PFS);
00419 
00420     bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
00421                          unsigned OperandType);
00422     bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
00423     bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
00424     bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
00425     bool ParseSelect(Instruction *&I, PerFunctionState &PFS);
00426     bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS);
00427     bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS);
00428     bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS);
00429     bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
00430     int ParsePHI(Instruction *&I, PerFunctionState &PFS);
00431     bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS);
00432     bool ParseCall(Instruction *&I, PerFunctionState &PFS,
00433                    CallInst::TailCallKind IsTail);
00434     int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
00435     int ParseLoad(Instruction *&I, PerFunctionState &PFS);
00436     int ParseStore(Instruction *&I, PerFunctionState &PFS);
00437     int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS);
00438     int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS);
00439     int ParseFence(Instruction *&I, PerFunctionState &PFS);
00440     int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
00441     int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
00442     int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
00443 
00444     // Use-list order directives.
00445     bool ParseUseListOrder(PerFunctionState *PFS = nullptr);
00446     bool ParseUseListOrderBB();
00447     bool ParseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes);
00448     bool sortUseListOrder(Value *V, ArrayRef<unsigned> Indexes, SMLoc Loc);
00449   };
00450 } // End llvm namespace
00451 
00452 #endif