LLVM API Documentation

BitstreamWriter.h
Go to the documentation of this file.
00001 //===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This header defines the BitstreamWriter class.  This class can be used to
00011 // write an arbitrary bitstream, regardless of its contents.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #ifndef LLVM_BITCODE_BITSTREAMWRITER_H
00016 #define LLVM_BITCODE_BITSTREAMWRITER_H
00017 
00018 #include "llvm/ADT/SmallVector.h"
00019 #include "llvm/ADT/StringRef.h"
00020 #include "llvm/Bitcode/BitCodes.h"
00021 #include <vector>
00022 
00023 namespace llvm {
00024 
00025 class BitstreamWriter {
00026   SmallVectorImpl<char> &Out;
00027 
00028   /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
00029   unsigned CurBit;
00030 
00031   /// CurValue - The current value.  Only bits < CurBit are valid.
00032   uint32_t CurValue;
00033 
00034   /// CurCodeSize - This is the declared size of code values used for the
00035   /// current block, in bits.
00036   unsigned CurCodeSize;
00037 
00038   /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
00039   /// selected BLOCK ID.
00040   unsigned BlockInfoCurBID;
00041 
00042   /// CurAbbrevs - Abbrevs installed at in this block.
00043   std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
00044 
00045   struct Block {
00046     unsigned PrevCodeSize;
00047     unsigned StartSizeWord;
00048     std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
00049     Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
00050   };
00051 
00052   /// BlockScope - This tracks the current blocks that we have entered.
00053   std::vector<Block> BlockScope;
00054 
00055   /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
00056   /// These describe abbreviations that all blocks of the specified ID inherit.
00057   struct BlockInfo {
00058     unsigned BlockID;
00059     std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs;
00060   };
00061   std::vector<BlockInfo> BlockInfoRecords;
00062 
00063   // BackpatchWord - Backpatch a 32-bit word in the output with the specified
00064   // value.
00065   void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
00066     Out[ByteNo++] = (unsigned char)(NewWord >>  0);
00067     Out[ByteNo++] = (unsigned char)(NewWord >>  8);
00068     Out[ByteNo++] = (unsigned char)(NewWord >> 16);
00069     Out[ByteNo  ] = (unsigned char)(NewWord >> 24);
00070   }
00071 
00072   void WriteByte(unsigned char Value) {
00073     Out.push_back(Value);
00074   }
00075 
00076   void WriteWord(unsigned Value) {
00077     unsigned char Bytes[4] = {
00078       (unsigned char)(Value >>  0),
00079       (unsigned char)(Value >>  8),
00080       (unsigned char)(Value >> 16),
00081       (unsigned char)(Value >> 24) };
00082     Out.append(&Bytes[0], &Bytes[4]);
00083   }
00084 
00085   unsigned GetBufferOffset() const {
00086     return Out.size();
00087   }
00088 
00089   unsigned GetWordIndex() const {
00090     unsigned Offset = GetBufferOffset();
00091     assert((Offset & 3) == 0 && "Not 32-bit aligned");
00092     return Offset / 4;
00093   }
00094 
00095 public:
00096   explicit BitstreamWriter(SmallVectorImpl<char> &O)
00097     : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
00098 
00099   ~BitstreamWriter() {
00100     assert(CurBit == 0 && "Unflushed data remaining");
00101     assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance");
00102   }
00103 
00104   /// \brief Retrieve the current position in the stream, in bits.
00105   uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; }
00106 
00107   //===--------------------------------------------------------------------===//
00108   // Basic Primitives for emitting bits to the stream.
00109   //===--------------------------------------------------------------------===//
00110 
00111   void Emit(uint32_t Val, unsigned NumBits) {
00112     assert(NumBits && NumBits <= 32 && "Invalid value size!");
00113     assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
00114     CurValue |= Val << CurBit;
00115     if (CurBit + NumBits < 32) {
00116       CurBit += NumBits;
00117       return;
00118     }
00119 
00120     // Add the current word.
00121     WriteWord(CurValue);
00122 
00123     if (CurBit)
00124       CurValue = Val >> (32-CurBit);
00125     else
00126       CurValue = 0;
00127     CurBit = (CurBit+NumBits) & 31;
00128   }
00129 
00130   void Emit64(uint64_t Val, unsigned NumBits) {
00131     if (NumBits <= 32)
00132       Emit((uint32_t)Val, NumBits);
00133     else {
00134       Emit((uint32_t)Val, 32);
00135       Emit((uint32_t)(Val >> 32), NumBits-32);
00136     }
00137   }
00138 
00139   void FlushToWord() {
00140     if (CurBit) {
00141       WriteWord(CurValue);
00142       CurBit = 0;
00143       CurValue = 0;
00144     }
00145   }
00146 
00147   void EmitVBR(uint32_t Val, unsigned NumBits) {
00148     assert(NumBits <= 32 && "Too many bits to emit!");
00149     uint32_t Threshold = 1U << (NumBits-1);
00150 
00151     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
00152     while (Val >= Threshold) {
00153       Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits);
00154       Val >>= NumBits-1;
00155     }
00156 
00157     Emit(Val, NumBits);
00158   }
00159 
00160   void EmitVBR64(uint64_t Val, unsigned NumBits) {
00161     assert(NumBits <= 32 && "Too many bits to emit!");
00162     if ((uint32_t)Val == Val)
00163       return EmitVBR((uint32_t)Val, NumBits);
00164 
00165     uint32_t Threshold = 1U << (NumBits-1);
00166 
00167     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
00168     while (Val >= Threshold) {
00169       Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) |
00170            (1 << (NumBits-1)), NumBits);
00171       Val >>= NumBits-1;
00172     }
00173 
00174     Emit((uint32_t)Val, NumBits);
00175   }
00176 
00177   /// EmitCode - Emit the specified code.
00178   void EmitCode(unsigned Val) {
00179     Emit(Val, CurCodeSize);
00180   }
00181 
00182   //===--------------------------------------------------------------------===//
00183   // Block Manipulation
00184   //===--------------------------------------------------------------------===//
00185 
00186   /// getBlockInfo - If there is block info for the specified ID, return it,
00187   /// otherwise return null.
00188   BlockInfo *getBlockInfo(unsigned BlockID) {
00189     // Common case, the most recent entry matches BlockID.
00190     if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
00191       return &BlockInfoRecords.back();
00192 
00193     for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
00194          i != e; ++i)
00195       if (BlockInfoRecords[i].BlockID == BlockID)
00196         return &BlockInfoRecords[i];
00197     return nullptr;
00198   }
00199 
00200   void EnterSubblock(unsigned BlockID, unsigned CodeLen) {
00201     // Block header:
00202     //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
00203     EmitCode(bitc::ENTER_SUBBLOCK);
00204     EmitVBR(BlockID, bitc::BlockIDWidth);
00205     EmitVBR(CodeLen, bitc::CodeLenWidth);
00206     FlushToWord();
00207 
00208     unsigned BlockSizeWordIndex = GetWordIndex();
00209     unsigned OldCodeSize = CurCodeSize;
00210 
00211     // Emit a placeholder, which will be replaced when the block is popped.
00212     Emit(0, bitc::BlockSizeWidth);
00213 
00214     CurCodeSize = CodeLen;
00215 
00216     // Push the outer block's abbrev set onto the stack, start out with an
00217     // empty abbrev set.
00218     BlockScope.push_back(Block(OldCodeSize, BlockSizeWordIndex));
00219     BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
00220 
00221     // If there is a blockinfo for this BlockID, add all the predefined abbrevs
00222     // to the abbrev list.
00223     if (BlockInfo *Info = getBlockInfo(BlockID)) {
00224       CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
00225                         Info->Abbrevs.end());
00226     }
00227   }
00228 
00229   void ExitBlock() {
00230     assert(!BlockScope.empty() && "Block scope imbalance!");
00231     const Block &B = BlockScope.back();
00232 
00233     // Block tail:
00234     //    [END_BLOCK, <align4bytes>]
00235     EmitCode(bitc::END_BLOCK);
00236     FlushToWord();
00237 
00238     // Compute the size of the block, in words, not counting the size field.
00239     unsigned SizeInWords = GetWordIndex() - B.StartSizeWord - 1;
00240     unsigned ByteNo = B.StartSizeWord*4;
00241 
00242     // Update the block size field in the header of this sub-block.
00243     BackpatchWord(ByteNo, SizeInWords);
00244 
00245     // Restore the inner block's code size and abbrev table.
00246     CurCodeSize = B.PrevCodeSize;
00247     CurAbbrevs = std::move(B.PrevAbbrevs);
00248     BlockScope.pop_back();
00249   }
00250 
00251   //===--------------------------------------------------------------------===//
00252   // Record Emission
00253   //===--------------------------------------------------------------------===//
00254 
00255 private:
00256   /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev
00257   /// record.  This is a no-op, since the abbrev specifies the literal to use.
00258   template<typename uintty>
00259   void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) {
00260     assert(Op.isLiteral() && "Not a literal");
00261     // If the abbrev specifies the literal value to use, don't emit
00262     // anything.
00263     assert(V == Op.getLiteralValue() &&
00264            "Invalid abbrev for record!");
00265   }
00266 
00267   /// EmitAbbreviatedField - Emit a single scalar field value with the specified
00268   /// encoding.
00269   template<typename uintty>
00270   void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) {
00271     assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!");
00272 
00273     // Encode the value as we are commanded.
00274     switch (Op.getEncoding()) {
00275     default: llvm_unreachable("Unknown encoding!");
00276     case BitCodeAbbrevOp::Fixed:
00277       if (Op.getEncodingData())
00278         Emit((unsigned)V, (unsigned)Op.getEncodingData());
00279       break;
00280     case BitCodeAbbrevOp::VBR:
00281       if (Op.getEncodingData())
00282         EmitVBR64(V, (unsigned)Op.getEncodingData());
00283       break;
00284     case BitCodeAbbrevOp::Char6:
00285       Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6);
00286       break;
00287     }
00288   }
00289 
00290   /// EmitRecordWithAbbrevImpl - This is the core implementation of the record
00291   /// emission code.  If BlobData is non-null, then it specifies an array of
00292   /// data that should be emitted as part of the Blob or Array operand that is
00293   /// known to exist at the end of the record.
00294   template<typename uintty>
00295   void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
00296                                 StringRef Blob) {
00297     const char *BlobData = Blob.data();
00298     unsigned BlobLen = (unsigned) Blob.size();
00299     unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV;
00300     assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
00301     const BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo].get();
00302 
00303     EmitCode(Abbrev);
00304 
00305     unsigned RecordIdx = 0;
00306     for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
00307          i != e; ++i) {
00308       const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
00309       if (Op.isLiteral()) {
00310         assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
00311         EmitAbbreviatedLiteral(Op, Vals[RecordIdx]);
00312         ++RecordIdx;
00313       } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
00314         // Array case.
00315         assert(i+2 == e && "array op not second to last?");
00316         const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
00317 
00318         // If this record has blob data, emit it, otherwise we must have record
00319         // entries to encode this way.
00320         if (BlobData) {
00321           assert(RecordIdx == Vals.size() &&
00322                  "Blob data and record entries specified for array!");
00323           // Emit a vbr6 to indicate the number of elements present.
00324           EmitVBR(static_cast<uint32_t>(BlobLen), 6);
00325 
00326           // Emit each field.
00327           for (unsigned i = 0; i != BlobLen; ++i)
00328             EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]);
00329 
00330           // Know that blob data is consumed for assertion below.
00331           BlobData = nullptr;
00332         } else {
00333           // Emit a vbr6 to indicate the number of elements present.
00334           EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
00335 
00336           // Emit each field.
00337           for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx)
00338             EmitAbbreviatedField(EltEnc, Vals[RecordIdx]);
00339         }
00340       } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) {
00341         // If this record has blob data, emit it, otherwise we must have record
00342         // entries to encode this way.
00343 
00344         // Emit a vbr6 to indicate the number of elements present.
00345         if (BlobData) {
00346           EmitVBR(static_cast<uint32_t>(BlobLen), 6);
00347           assert(RecordIdx == Vals.size() &&
00348                  "Blob data and record entries specified for blob operand!");
00349         } else {
00350           EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
00351         }
00352 
00353         // Flush to a 32-bit alignment boundary.
00354         FlushToWord();
00355 
00356         // Emit each field as a literal byte.
00357         if (BlobData) {
00358           for (unsigned i = 0; i != BlobLen; ++i)
00359             WriteByte((unsigned char)BlobData[i]);
00360 
00361           // Know that blob data is consumed for assertion below.
00362           BlobData = nullptr;
00363         } else {
00364           for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) {
00365             assert(isUInt<8>(Vals[RecordIdx]) &&
00366                    "Value too large to emit as blob");
00367             WriteByte((unsigned char)Vals[RecordIdx]);
00368           }
00369         }
00370 
00371         // Align end to 32-bits.
00372         while (GetBufferOffset() & 3)
00373           WriteByte(0);
00374       } else {  // Single scalar field.
00375         assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
00376         EmitAbbreviatedField(Op, Vals[RecordIdx]);
00377         ++RecordIdx;
00378       }
00379     }
00380     assert(RecordIdx == Vals.size() && "Not all record operands emitted!");
00381     assert(BlobData == nullptr &&
00382            "Blob data specified for record that doesn't use it!");
00383   }
00384 
00385 public:
00386 
00387   /// EmitRecord - Emit the specified record to the stream, using an abbrev if
00388   /// we have one to compress the output.
00389   template<typename uintty>
00390   void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals,
00391                   unsigned Abbrev = 0) {
00392     if (!Abbrev) {
00393       // If we don't have an abbrev to use, emit this in its fully unabbreviated
00394       // form.
00395       EmitCode(bitc::UNABBREV_RECORD);
00396       EmitVBR(Code, 6);
00397       EmitVBR(static_cast<uint32_t>(Vals.size()), 6);
00398       for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i)
00399         EmitVBR64(Vals[i], 6);
00400       return;
00401     }
00402 
00403     // Insert the code into Vals to treat it uniformly.
00404     Vals.insert(Vals.begin(), Code);
00405 
00406     EmitRecordWithAbbrev(Abbrev, Vals);
00407   }
00408 
00409   /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
00410   /// Unlike EmitRecord, the code for the record should be included in Vals as
00411   /// the first entry.
00412   template<typename uintty>
00413   void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) {
00414     EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef());
00415   }
00416 
00417   /// EmitRecordWithBlob - Emit the specified record to the stream, using an
00418   /// abbrev that includes a blob at the end.  The blob data to emit is
00419   /// specified by the pointer and length specified at the end.  In contrast to
00420   /// EmitRecord, this routine expects that the first entry in Vals is the code
00421   /// of the record.
00422   template<typename uintty>
00423   void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
00424                           StringRef Blob) {
00425     EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob);
00426   }
00427   template<typename uintty>
00428   void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
00429                           const char *BlobData, unsigned BlobLen) {
00430     return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(BlobData, BlobLen));
00431   }
00432 
00433   /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records
00434   /// that end with an array.
00435   template<typename uintty>
00436   void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
00437                           StringRef Array) {
00438     EmitRecordWithAbbrevImpl(Abbrev, Vals, Array);
00439   }
00440   template<typename uintty>
00441   void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
00442                           const char *ArrayData, unsigned ArrayLen) {
00443     return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData,
00444                                                             ArrayLen));
00445   }
00446 
00447   //===--------------------------------------------------------------------===//
00448   // Abbrev Emission
00449   //===--------------------------------------------------------------------===//
00450 
00451 private:
00452   // Emit the abbreviation as a DEFINE_ABBREV record.
00453   void EncodeAbbrev(BitCodeAbbrev *Abbv) {
00454     EmitCode(bitc::DEFINE_ABBREV);
00455     EmitVBR(Abbv->getNumOperandInfos(), 5);
00456     for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
00457          i != e; ++i) {
00458       const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
00459       Emit(Op.isLiteral(), 1);
00460       if (Op.isLiteral()) {
00461         EmitVBR64(Op.getLiteralValue(), 8);
00462       } else {
00463         Emit(Op.getEncoding(), 3);
00464         if (Op.hasEncodingData())
00465           EmitVBR64(Op.getEncodingData(), 5);
00466       }
00467     }
00468   }
00469 public:
00470 
00471   /// EmitAbbrev - This emits an abbreviation to the stream.  Note that this
00472   /// method takes ownership of the specified abbrev.
00473   unsigned EmitAbbrev(BitCodeAbbrev *Abbv) {
00474     // Emit the abbreviation as a record.
00475     EncodeAbbrev(Abbv);
00476     CurAbbrevs.push_back(Abbv);
00477     return static_cast<unsigned>(CurAbbrevs.size())-1 +
00478       bitc::FIRST_APPLICATION_ABBREV;
00479   }
00480 
00481   //===--------------------------------------------------------------------===//
00482   // BlockInfo Block Emission
00483   //===--------------------------------------------------------------------===//
00484 
00485   /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
00486   void EnterBlockInfoBlock(unsigned CodeWidth) {
00487     EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth);
00488     BlockInfoCurBID = ~0U;
00489   }
00490 private:
00491   /// SwitchToBlockID - If we aren't already talking about the specified block
00492   /// ID, emit a BLOCKINFO_CODE_SETBID record.
00493   void SwitchToBlockID(unsigned BlockID) {
00494     if (BlockInfoCurBID == BlockID) return;
00495     SmallVector<unsigned, 2> V;
00496     V.push_back(BlockID);
00497     EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V);
00498     BlockInfoCurBID = BlockID;
00499   }
00500 
00501   BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
00502     if (BlockInfo *BI = getBlockInfo(BlockID))
00503       return *BI;
00504 
00505     // Otherwise, add a new record.
00506     BlockInfoRecords.push_back(BlockInfo());
00507     BlockInfoRecords.back().BlockID = BlockID;
00508     return BlockInfoRecords.back();
00509   }
00510 
00511 public:
00512 
00513   /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified
00514   /// BlockID.
00515   unsigned EmitBlockInfoAbbrev(unsigned BlockID, BitCodeAbbrev *Abbv) {
00516     SwitchToBlockID(BlockID);
00517     EncodeAbbrev(Abbv);
00518 
00519     // Add the abbrev to the specified block record.
00520     BlockInfo &Info = getOrCreateBlockInfo(BlockID);
00521     Info.Abbrevs.push_back(Abbv);
00522 
00523     return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV;
00524   }
00525 };
00526 
00527 
00528 } // End llvm namespace
00529 
00530 #endif