LLVM API Documentation

BitstreamReader.h
Go to the documentation of this file.
00001 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This header defines the BitstreamReader class.  This class can be used to
00011 // read an arbitrary bitstream, regardless of its contents.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #ifndef LLVM_BITCODE_BITSTREAMREADER_H
00016 #define LLVM_BITCODE_BITSTREAMREADER_H
00017 
00018 #include "llvm/Bitcode/BitCodes.h"
00019 #include "llvm/Support/Endian.h"
00020 #include "llvm/Support/StreamableMemoryObject.h"
00021 #include <climits>
00022 #include <string>
00023 #include <vector>
00024 
00025 namespace llvm {
00026 
00027   class Deserializer;
00028 
00029 /// BitstreamReader - This class is used to read from an LLVM bitcode stream,
00030 /// maintaining information that is global to decoding the entire file.  While
00031 /// a file is being read, multiple cursors can be independently advanced or
00032 /// skipped around within the file.  These are represented by the
00033 /// BitstreamCursor class.
00034 class BitstreamReader {
00035 public:
00036   /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
00037   /// These describe abbreviations that all blocks of the specified ID inherit.
00038   struct BlockInfo {
00039     unsigned BlockID;
00040     std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs;
00041     std::string Name;
00042 
00043     std::vector<std::pair<unsigned, std::string> > RecordNames;
00044   };
00045 private:
00046   std::unique_ptr<StreamableMemoryObject> BitcodeBytes;
00047 
00048   std::vector<BlockInfo> BlockInfoRecords;
00049 
00050   /// IgnoreBlockInfoNames - This is set to true if we don't care about the
00051   /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer
00052   /// uses this.
00053   bool IgnoreBlockInfoNames;
00054 
00055   BitstreamReader(const BitstreamReader&) LLVM_DELETED_FUNCTION;
00056   void operator=(const BitstreamReader&) LLVM_DELETED_FUNCTION;
00057 public:
00058   BitstreamReader() : IgnoreBlockInfoNames(true) {
00059   }
00060 
00061   BitstreamReader(const unsigned char *Start, const unsigned char *End)
00062       : IgnoreBlockInfoNames(true) {
00063     init(Start, End);
00064   }
00065 
00066   BitstreamReader(StreamableMemoryObject *bytes) : IgnoreBlockInfoNames(true) {
00067     BitcodeBytes.reset(bytes);
00068   }
00069 
00070   BitstreamReader(BitstreamReader &&Other) {
00071     *this = std::move(Other);
00072   }
00073 
00074   BitstreamReader &operator=(BitstreamReader &&Other) {
00075     BitcodeBytes = std::move(Other.BitcodeBytes);
00076     // Explicitly swap block info, so that nothing gets destroyed twice.
00077     std::swap(BlockInfoRecords, Other.BlockInfoRecords);
00078     IgnoreBlockInfoNames = Other.IgnoreBlockInfoNames;
00079     return *this;
00080   }
00081 
00082   void init(const unsigned char *Start, const unsigned char *End) {
00083     assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
00084     BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
00085   }
00086 
00087   StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
00088 
00089   /// CollectBlockInfoNames - This is called by clients that want block/record
00090   /// name information.
00091   void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
00092   bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
00093 
00094   //===--------------------------------------------------------------------===//
00095   // Block Manipulation
00096   //===--------------------------------------------------------------------===//
00097 
00098   /// hasBlockInfoRecords - Return true if we've already read and processed the
00099   /// block info block for this Bitstream.  We only process it for the first
00100   /// cursor that walks over it.
00101   bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
00102 
00103   /// getBlockInfo - If there is block info for the specified ID, return it,
00104   /// otherwise return null.
00105   const BlockInfo *getBlockInfo(unsigned BlockID) const {
00106     // Common case, the most recent entry matches BlockID.
00107     if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
00108       return &BlockInfoRecords.back();
00109 
00110     for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
00111          i != e; ++i)
00112       if (BlockInfoRecords[i].BlockID == BlockID)
00113         return &BlockInfoRecords[i];
00114     return nullptr;
00115   }
00116 
00117   BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
00118     if (const BlockInfo *BI = getBlockInfo(BlockID))
00119       return *const_cast<BlockInfo*>(BI);
00120 
00121     // Otherwise, add a new record.
00122     BlockInfoRecords.push_back(BlockInfo());
00123     BlockInfoRecords.back().BlockID = BlockID;
00124     return BlockInfoRecords.back();
00125   }
00126 
00127   /// Takes block info from the other bitstream reader.
00128   ///
00129   /// This is a "take" operation because BlockInfo records are non-trivial, and
00130   /// indeed rather expensive.
00131   void takeBlockInfo(BitstreamReader &&Other) {
00132     assert(!hasBlockInfoRecords());
00133     BlockInfoRecords = std::move(Other.BlockInfoRecords);
00134   }
00135 };
00136 
00137 
00138 /// BitstreamEntry - When advancing through a bitstream cursor, each advance can
00139 /// discover a few different kinds of entries:
00140 ///   Error    - Malformed bitcode was found.
00141 ///   EndBlock - We've reached the end of the current block, (or the end of the
00142 ///              file, which is treated like a series of EndBlock records.
00143 ///   SubBlock - This is the start of a new subblock of a specific ID.
00144 ///   Record   - This is a record with a specific AbbrevID.
00145 ///
00146 struct BitstreamEntry {
00147   enum {
00148     Error,
00149     EndBlock,
00150     SubBlock,
00151     Record
00152   } Kind;
00153 
00154   unsigned ID;
00155 
00156   static BitstreamEntry getError() {
00157     BitstreamEntry E; E.Kind = Error; return E;
00158   }
00159   static BitstreamEntry getEndBlock() {
00160     BitstreamEntry E; E.Kind = EndBlock; return E;
00161   }
00162   static BitstreamEntry getSubBlock(unsigned ID) {
00163     BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
00164   }
00165   static BitstreamEntry getRecord(unsigned AbbrevID) {
00166     BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
00167   }
00168 };
00169 
00170 /// BitstreamCursor - This represents a position within a bitcode file.  There
00171 /// may be multiple independent cursors reading within one bitstream, each
00172 /// maintaining their own local state.
00173 ///
00174 /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
00175 /// be passed by value.
00176 class BitstreamCursor {
00177   friend class Deserializer;
00178   BitstreamReader *BitStream;
00179   size_t NextChar;
00180 
00181 
00182   /// CurWord/word_t - This is the current data we have pulled from the stream
00183   /// but have not returned to the client.  This is specifically and
00184   /// intentionally defined to follow the word size of the host machine for
00185   /// efficiency.  We use word_t in places that are aware of this to make it
00186   /// perfectly explicit what is going on.
00187   typedef uint32_t word_t;
00188   word_t CurWord;
00189 
00190   /// BitsInCurWord - This is the number of bits in CurWord that are valid. This
00191   /// is always from [0...31/63] inclusive (depending on word size).
00192   unsigned BitsInCurWord;
00193 
00194   // CurCodeSize - This is the declared size of code values used for the current
00195   // block, in bits.
00196   unsigned CurCodeSize;
00197 
00198   /// CurAbbrevs - Abbrevs installed at in this block.
00199   std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
00200 
00201   struct Block {
00202     unsigned PrevCodeSize;
00203     std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
00204     explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
00205   };
00206 
00207   /// BlockScope - This tracks the codesize of parent blocks.
00208   SmallVector<Block, 8> BlockScope;
00209 
00210 
00211 public:
00212   BitstreamCursor() : BitStream(nullptr), NextChar(0) {}
00213 
00214   explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
00215     NextChar = 0;
00216     CurWord = 0;
00217     BitsInCurWord = 0;
00218     CurCodeSize = 2;
00219   }
00220 
00221   void init(BitstreamReader &R) {
00222     freeState();
00223 
00224     BitStream = &R;
00225     NextChar = 0;
00226     CurWord = 0;
00227     BitsInCurWord = 0;
00228     CurCodeSize = 2;
00229   }
00230 
00231   void freeState();
00232 
00233   bool isEndPos(size_t pos) {
00234     return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
00235   }
00236 
00237   bool canSkipToPos(size_t pos) const {
00238     // pos can be skipped to if it is a valid address or one byte past the end.
00239     return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
00240         static_cast<uint64_t>(pos - 1));
00241   }
00242 
00243   uint32_t getWord(size_t pos) {
00244     uint8_t buf[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
00245     BitStream->getBitcodeBytes().readBytes(pos, sizeof(buf), buf);
00246     return *reinterpret_cast<support::ulittle32_t *>(buf);
00247   }
00248 
00249   bool AtEndOfStream() {
00250     return BitsInCurWord == 0 && isEndPos(NextChar);
00251   }
00252 
00253   /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
00254   unsigned getAbbrevIDWidth() const { return CurCodeSize; }
00255 
00256   /// GetCurrentBitNo - Return the bit # of the bit we are reading.
00257   uint64_t GetCurrentBitNo() const {
00258     return NextChar*CHAR_BIT - BitsInCurWord;
00259   }
00260 
00261   BitstreamReader *getBitStreamReader() {
00262     return BitStream;
00263   }
00264   const BitstreamReader *getBitStreamReader() const {
00265     return BitStream;
00266   }
00267 
00268   /// Flags that modify the behavior of advance().
00269   enum {
00270     /// AF_DontPopBlockAtEnd - If this flag is used, the advance() method does
00271     /// not automatically pop the block scope when the end of a block is
00272     /// reached.
00273     AF_DontPopBlockAtEnd = 1,
00274 
00275     /// AF_DontAutoprocessAbbrevs - If this flag is used, abbrev entries are
00276     /// returned just like normal records.
00277     AF_DontAutoprocessAbbrevs = 2
00278   };
00279 
00280   /// advance - Advance the current bitstream, returning the next entry in the
00281   /// stream.
00282   BitstreamEntry advance(unsigned Flags = 0) {
00283     while (1) {
00284       unsigned Code = ReadCode();
00285       if (Code == bitc::END_BLOCK) {
00286         // Pop the end of the block unless Flags tells us not to.
00287         if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
00288           return BitstreamEntry::getError();
00289         return BitstreamEntry::getEndBlock();
00290       }
00291 
00292       if (Code == bitc::ENTER_SUBBLOCK)
00293         return BitstreamEntry::getSubBlock(ReadSubBlockID());
00294 
00295       if (Code == bitc::DEFINE_ABBREV &&
00296           !(Flags & AF_DontAutoprocessAbbrevs)) {
00297         // We read and accumulate abbrev's, the client can't do anything with
00298         // them anyway.
00299         ReadAbbrevRecord();
00300         continue;
00301       }
00302 
00303       return BitstreamEntry::getRecord(Code);
00304     }
00305   }
00306 
00307   /// advanceSkippingSubblocks - This is a convenience function for clients that
00308   /// don't expect any subblocks.  This just skips over them automatically.
00309   BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
00310     while (1) {
00311       // If we found a normal entry, return it.
00312       BitstreamEntry Entry = advance(Flags);
00313       if (Entry.Kind != BitstreamEntry::SubBlock)
00314         return Entry;
00315 
00316       // If we found a sub-block, just skip over it and check the next entry.
00317       if (SkipBlock())
00318         return BitstreamEntry::getError();
00319     }
00320   }
00321 
00322   /// JumpToBit - Reset the stream to the specified bit number.
00323   void JumpToBit(uint64_t BitNo) {
00324     uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1);
00325     unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
00326     assert(canSkipToPos(ByteNo) && "Invalid location");
00327 
00328     // Move the cursor to the right word.
00329     NextChar = ByteNo;
00330     BitsInCurWord = 0;
00331     CurWord = 0;
00332 
00333     // Skip over any bits that are already consumed.
00334     if (WordBitNo) {
00335       if (sizeof(word_t) > 4)
00336         Read64(WordBitNo);
00337       else
00338         Read(WordBitNo);
00339     }
00340   }
00341 
00342 
00343   uint32_t Read(unsigned NumBits) {
00344     assert(NumBits && NumBits <= 32 &&
00345            "Cannot return zero or more than 32 bits!");
00346 
00347     // If the field is fully contained by CurWord, return it quickly.
00348     if (BitsInCurWord >= NumBits) {
00349       uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits));
00350       CurWord >>= NumBits;
00351       BitsInCurWord -= NumBits;
00352       return R;
00353     }
00354 
00355     // If we run out of data, stop at the end of the stream.
00356     if (isEndPos(NextChar)) {
00357       CurWord = 0;
00358       BitsInCurWord = 0;
00359       return 0;
00360     }
00361 
00362     uint32_t R = uint32_t(CurWord);
00363 
00364     // Read the next word from the stream.
00365     uint8_t Array[sizeof(word_t)] = {0};
00366 
00367     BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array), Array);
00368 
00369     // Handle big-endian byte-swapping if necessary.
00370     support::detail::packed_endian_specific_integral
00371       <word_t, support::little, support::unaligned> EndianValue;
00372     memcpy(&EndianValue, Array, sizeof(Array));
00373 
00374     CurWord = EndianValue;
00375 
00376     NextChar += sizeof(word_t);
00377 
00378     // Extract NumBits-BitsInCurWord from what we just read.
00379     unsigned BitsLeft = NumBits-BitsInCurWord;
00380 
00381     // Be careful here, BitsLeft is in the range [1..32]/[1..64] inclusive.
00382     R |= uint32_t((CurWord & (word_t(~0ULL) >> (sizeof(word_t)*8-BitsLeft)))
00383                     << BitsInCurWord);
00384 
00385     // BitsLeft bits have just been used up from CurWord.  BitsLeft is in the
00386     // range [1..32]/[1..64] so be careful how we shift.
00387     if (BitsLeft != sizeof(word_t)*8)
00388       CurWord >>= BitsLeft;
00389     else
00390       CurWord = 0;
00391     BitsInCurWord = sizeof(word_t)*8-BitsLeft;
00392     return R;
00393   }
00394 
00395   uint64_t Read64(unsigned NumBits) {
00396     if (NumBits <= 32) return Read(NumBits);
00397 
00398     uint64_t V = Read(32);
00399     return V | (uint64_t)Read(NumBits-32) << 32;
00400   }
00401 
00402   uint32_t ReadVBR(unsigned NumBits) {
00403     uint32_t Piece = Read(NumBits);
00404     if ((Piece & (1U << (NumBits-1))) == 0)
00405       return Piece;
00406 
00407     uint32_t Result = 0;
00408     unsigned NextBit = 0;
00409     while (1) {
00410       Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
00411 
00412       if ((Piece & (1U << (NumBits-1))) == 0)
00413         return Result;
00414 
00415       NextBit += NumBits-1;
00416       Piece = Read(NumBits);
00417     }
00418   }
00419 
00420   // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size.  The
00421   // chunk size of the VBR must still be <= 32 bits though.
00422   uint64_t ReadVBR64(unsigned NumBits) {
00423     uint32_t Piece = Read(NumBits);
00424     if ((Piece & (1U << (NumBits-1))) == 0)
00425       return uint64_t(Piece);
00426 
00427     uint64_t Result = 0;
00428     unsigned NextBit = 0;
00429     while (1) {
00430       Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
00431 
00432       if ((Piece & (1U << (NumBits-1))) == 0)
00433         return Result;
00434 
00435       NextBit += NumBits-1;
00436       Piece = Read(NumBits);
00437     }
00438   }
00439 
00440 private:
00441   void SkipToFourByteBoundary() {
00442     // If word_t is 64-bits and if we've read less than 32 bits, just dump
00443     // the bits we have up to the next 32-bit boundary.
00444     if (sizeof(word_t) > 4 &&
00445         BitsInCurWord >= 32) {
00446       CurWord >>= BitsInCurWord-32;
00447       BitsInCurWord = 32;
00448       return;
00449     }
00450 
00451     BitsInCurWord = 0;
00452     CurWord = 0;
00453   }
00454 public:
00455 
00456   unsigned ReadCode() {
00457     return Read(CurCodeSize);
00458   }
00459 
00460 
00461   // Block header:
00462   //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
00463 
00464   /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for
00465   /// the block.
00466   unsigned ReadSubBlockID() {
00467     return ReadVBR(bitc::BlockIDWidth);
00468   }
00469 
00470   /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip
00471   /// over the body of this block.  If the block record is malformed, return
00472   /// true.
00473   bool SkipBlock() {
00474     // Read and ignore the codelen value.  Since we are skipping this block, we
00475     // don't care what code widths are used inside of it.
00476     ReadVBR(bitc::CodeLenWidth);
00477     SkipToFourByteBoundary();
00478     unsigned NumFourBytes = Read(bitc::BlockSizeWidth);
00479 
00480     // Check that the block wasn't partially defined, and that the offset isn't
00481     // bogus.
00482     size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
00483     if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
00484       return true;
00485 
00486     JumpToBit(SkipTo);
00487     return false;
00488   }
00489 
00490   /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
00491   /// the block, and return true if the block has an error.
00492   bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
00493 
00494   bool ReadBlockEnd() {
00495     if (BlockScope.empty()) return true;
00496 
00497     // Block tail:
00498     //    [END_BLOCK, <align4bytes>]
00499     SkipToFourByteBoundary();
00500 
00501     popBlockScope();
00502     return false;
00503   }
00504 
00505 private:
00506 
00507   void popBlockScope() {
00508     CurCodeSize = BlockScope.back().PrevCodeSize;
00509 
00510     CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
00511     BlockScope.pop_back();
00512   }
00513 
00514   //===--------------------------------------------------------------------===//
00515   // Record Processing
00516   //===--------------------------------------------------------------------===//
00517 
00518 private:
00519   void readAbbreviatedLiteral(const BitCodeAbbrevOp &Op,
00520                               SmallVectorImpl<uint64_t> &Vals);
00521   void readAbbreviatedField(const BitCodeAbbrevOp &Op,
00522                             SmallVectorImpl<uint64_t> &Vals);
00523   void skipAbbreviatedField(const BitCodeAbbrevOp &Op);
00524 
00525 public:
00526 
00527   /// getAbbrev - Return the abbreviation for the specified AbbrevId.
00528   const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
00529     unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV;
00530     assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
00531     return CurAbbrevs[AbbrevNo].get();
00532   }
00533 
00534   /// skipRecord - Read the current record and discard it.
00535   void skipRecord(unsigned AbbrevID);
00536 
00537   unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
00538                       StringRef *Blob = nullptr);
00539 
00540   //===--------------------------------------------------------------------===//
00541   // Abbrev Processing
00542   //===--------------------------------------------------------------------===//
00543   void ReadAbbrevRecord();
00544 
00545   bool ReadBlockInfoBlock();
00546 };
00547 
00548 } // End llvm namespace
00549 
00550 #endif