LLVM API Documentation
00001 //===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This header defines the BitstreamWriter class. This class can be used to 00011 // write an arbitrary bitstream, regardless of its contents. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #ifndef LLVM_BITCODE_BITSTREAMWRITER_H 00016 #define LLVM_BITCODE_BITSTREAMWRITER_H 00017 00018 #include "llvm/ADT/SmallVector.h" 00019 #include "llvm/ADT/StringRef.h" 00020 #include "llvm/Bitcode/BitCodes.h" 00021 #include <vector> 00022 00023 namespace llvm { 00024 00025 class BitstreamWriter { 00026 SmallVectorImpl<char> &Out; 00027 00028 /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use. 00029 unsigned CurBit; 00030 00031 /// CurValue - The current value. Only bits < CurBit are valid. 00032 uint32_t CurValue; 00033 00034 /// CurCodeSize - This is the declared size of code values used for the 00035 /// current block, in bits. 00036 unsigned CurCodeSize; 00037 00038 /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently 00039 /// selected BLOCK ID. 00040 unsigned BlockInfoCurBID; 00041 00042 /// CurAbbrevs - Abbrevs installed at in this block. 00043 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs; 00044 00045 struct Block { 00046 unsigned PrevCodeSize; 00047 unsigned StartSizeWord; 00048 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs; 00049 Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {} 00050 }; 00051 00052 /// BlockScope - This tracks the current blocks that we have entered. 00053 std::vector<Block> BlockScope; 00054 00055 /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks. 00056 /// These describe abbreviations that all blocks of the specified ID inherit. 00057 struct BlockInfo { 00058 unsigned BlockID; 00059 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs; 00060 }; 00061 std::vector<BlockInfo> BlockInfoRecords; 00062 00063 // BackpatchWord - Backpatch a 32-bit word in the output with the specified 00064 // value. 00065 void BackpatchWord(unsigned ByteNo, unsigned NewWord) { 00066 Out[ByteNo++] = (unsigned char)(NewWord >> 0); 00067 Out[ByteNo++] = (unsigned char)(NewWord >> 8); 00068 Out[ByteNo++] = (unsigned char)(NewWord >> 16); 00069 Out[ByteNo ] = (unsigned char)(NewWord >> 24); 00070 } 00071 00072 void WriteByte(unsigned char Value) { 00073 Out.push_back(Value); 00074 } 00075 00076 void WriteWord(unsigned Value) { 00077 unsigned char Bytes[4] = { 00078 (unsigned char)(Value >> 0), 00079 (unsigned char)(Value >> 8), 00080 (unsigned char)(Value >> 16), 00081 (unsigned char)(Value >> 24) }; 00082 Out.append(&Bytes[0], &Bytes[4]); 00083 } 00084 00085 unsigned GetBufferOffset() const { 00086 return Out.size(); 00087 } 00088 00089 unsigned GetWordIndex() const { 00090 unsigned Offset = GetBufferOffset(); 00091 assert((Offset & 3) == 0 && "Not 32-bit aligned"); 00092 return Offset / 4; 00093 } 00094 00095 public: 00096 explicit BitstreamWriter(SmallVectorImpl<char> &O) 00097 : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {} 00098 00099 ~BitstreamWriter() { 00100 assert(CurBit == 0 && "Unflushed data remaining"); 00101 assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance"); 00102 } 00103 00104 /// \brief Retrieve the current position in the stream, in bits. 00105 uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; } 00106 00107 //===--------------------------------------------------------------------===// 00108 // Basic Primitives for emitting bits to the stream. 00109 //===--------------------------------------------------------------------===// 00110 00111 void Emit(uint32_t Val, unsigned NumBits) { 00112 assert(NumBits && NumBits <= 32 && "Invalid value size!"); 00113 assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!"); 00114 CurValue |= Val << CurBit; 00115 if (CurBit + NumBits < 32) { 00116 CurBit += NumBits; 00117 return; 00118 } 00119 00120 // Add the current word. 00121 WriteWord(CurValue); 00122 00123 if (CurBit) 00124 CurValue = Val >> (32-CurBit); 00125 else 00126 CurValue = 0; 00127 CurBit = (CurBit+NumBits) & 31; 00128 } 00129 00130 void Emit64(uint64_t Val, unsigned NumBits) { 00131 if (NumBits <= 32) 00132 Emit((uint32_t)Val, NumBits); 00133 else { 00134 Emit((uint32_t)Val, 32); 00135 Emit((uint32_t)(Val >> 32), NumBits-32); 00136 } 00137 } 00138 00139 void FlushToWord() { 00140 if (CurBit) { 00141 WriteWord(CurValue); 00142 CurBit = 0; 00143 CurValue = 0; 00144 } 00145 } 00146 00147 void EmitVBR(uint32_t Val, unsigned NumBits) { 00148 assert(NumBits <= 32 && "Too many bits to emit!"); 00149 uint32_t Threshold = 1U << (NumBits-1); 00150 00151 // Emit the bits with VBR encoding, NumBits-1 bits at a time. 00152 while (Val >= Threshold) { 00153 Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits); 00154 Val >>= NumBits-1; 00155 } 00156 00157 Emit(Val, NumBits); 00158 } 00159 00160 void EmitVBR64(uint64_t Val, unsigned NumBits) { 00161 assert(NumBits <= 32 && "Too many bits to emit!"); 00162 if ((uint32_t)Val == Val) 00163 return EmitVBR((uint32_t)Val, NumBits); 00164 00165 uint32_t Threshold = 1U << (NumBits-1); 00166 00167 // Emit the bits with VBR encoding, NumBits-1 bits at a time. 00168 while (Val >= Threshold) { 00169 Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) | 00170 (1 << (NumBits-1)), NumBits); 00171 Val >>= NumBits-1; 00172 } 00173 00174 Emit((uint32_t)Val, NumBits); 00175 } 00176 00177 /// EmitCode - Emit the specified code. 00178 void EmitCode(unsigned Val) { 00179 Emit(Val, CurCodeSize); 00180 } 00181 00182 //===--------------------------------------------------------------------===// 00183 // Block Manipulation 00184 //===--------------------------------------------------------------------===// 00185 00186 /// getBlockInfo - If there is block info for the specified ID, return it, 00187 /// otherwise return null. 00188 BlockInfo *getBlockInfo(unsigned BlockID) { 00189 // Common case, the most recent entry matches BlockID. 00190 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) 00191 return &BlockInfoRecords.back(); 00192 00193 for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size()); 00194 i != e; ++i) 00195 if (BlockInfoRecords[i].BlockID == BlockID) 00196 return &BlockInfoRecords[i]; 00197 return nullptr; 00198 } 00199 00200 void EnterSubblock(unsigned BlockID, unsigned CodeLen) { 00201 // Block header: 00202 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 00203 EmitCode(bitc::ENTER_SUBBLOCK); 00204 EmitVBR(BlockID, bitc::BlockIDWidth); 00205 EmitVBR(CodeLen, bitc::CodeLenWidth); 00206 FlushToWord(); 00207 00208 unsigned BlockSizeWordIndex = GetWordIndex(); 00209 unsigned OldCodeSize = CurCodeSize; 00210 00211 // Emit a placeholder, which will be replaced when the block is popped. 00212 Emit(0, bitc::BlockSizeWidth); 00213 00214 CurCodeSize = CodeLen; 00215 00216 // Push the outer block's abbrev set onto the stack, start out with an 00217 // empty abbrev set. 00218 BlockScope.push_back(Block(OldCodeSize, BlockSizeWordIndex)); 00219 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 00220 00221 // If there is a blockinfo for this BlockID, add all the predefined abbrevs 00222 // to the abbrev list. 00223 if (BlockInfo *Info = getBlockInfo(BlockID)) { 00224 CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(), 00225 Info->Abbrevs.end()); 00226 } 00227 } 00228 00229 void ExitBlock() { 00230 assert(!BlockScope.empty() && "Block scope imbalance!"); 00231 const Block &B = BlockScope.back(); 00232 00233 // Block tail: 00234 // [END_BLOCK, <align4bytes>] 00235 EmitCode(bitc::END_BLOCK); 00236 FlushToWord(); 00237 00238 // Compute the size of the block, in words, not counting the size field. 00239 unsigned SizeInWords = GetWordIndex() - B.StartSizeWord - 1; 00240 unsigned ByteNo = B.StartSizeWord*4; 00241 00242 // Update the block size field in the header of this sub-block. 00243 BackpatchWord(ByteNo, SizeInWords); 00244 00245 // Restore the inner block's code size and abbrev table. 00246 CurCodeSize = B.PrevCodeSize; 00247 CurAbbrevs = std::move(B.PrevAbbrevs); 00248 BlockScope.pop_back(); 00249 } 00250 00251 //===--------------------------------------------------------------------===// 00252 // Record Emission 00253 //===--------------------------------------------------------------------===// 00254 00255 private: 00256 /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev 00257 /// record. This is a no-op, since the abbrev specifies the literal to use. 00258 template<typename uintty> 00259 void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) { 00260 assert(Op.isLiteral() && "Not a literal"); 00261 // If the abbrev specifies the literal value to use, don't emit 00262 // anything. 00263 assert(V == Op.getLiteralValue() && 00264 "Invalid abbrev for record!"); 00265 } 00266 00267 /// EmitAbbreviatedField - Emit a single scalar field value with the specified 00268 /// encoding. 00269 template<typename uintty> 00270 void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) { 00271 assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!"); 00272 00273 // Encode the value as we are commanded. 00274 switch (Op.getEncoding()) { 00275 default: llvm_unreachable("Unknown encoding!"); 00276 case BitCodeAbbrevOp::Fixed: 00277 if (Op.getEncodingData()) 00278 Emit((unsigned)V, (unsigned)Op.getEncodingData()); 00279 break; 00280 case BitCodeAbbrevOp::VBR: 00281 if (Op.getEncodingData()) 00282 EmitVBR64(V, (unsigned)Op.getEncodingData()); 00283 break; 00284 case BitCodeAbbrevOp::Char6: 00285 Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6); 00286 break; 00287 } 00288 } 00289 00290 /// EmitRecordWithAbbrevImpl - This is the core implementation of the record 00291 /// emission code. If BlobData is non-null, then it specifies an array of 00292 /// data that should be emitted as part of the Blob or Array operand that is 00293 /// known to exist at the end of the record. 00294 template<typename uintty> 00295 void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 00296 StringRef Blob) { 00297 const char *BlobData = Blob.data(); 00298 unsigned BlobLen = (unsigned) Blob.size(); 00299 unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV; 00300 assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); 00301 const BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo].get(); 00302 00303 EmitCode(Abbrev); 00304 00305 unsigned RecordIdx = 0; 00306 for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos()); 00307 i != e; ++i) { 00308 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 00309 if (Op.isLiteral()) { 00310 assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); 00311 EmitAbbreviatedLiteral(Op, Vals[RecordIdx]); 00312 ++RecordIdx; 00313 } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 00314 // Array case. 00315 assert(i+2 == e && "array op not second to last?"); 00316 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 00317 00318 // If this record has blob data, emit it, otherwise we must have record 00319 // entries to encode this way. 00320 if (BlobData) { 00321 assert(RecordIdx == Vals.size() && 00322 "Blob data and record entries specified for array!"); 00323 // Emit a vbr6 to indicate the number of elements present. 00324 EmitVBR(static_cast<uint32_t>(BlobLen), 6); 00325 00326 // Emit each field. 00327 for (unsigned i = 0; i != BlobLen; ++i) 00328 EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]); 00329 00330 // Know that blob data is consumed for assertion below. 00331 BlobData = nullptr; 00332 } else { 00333 // Emit a vbr6 to indicate the number of elements present. 00334 EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6); 00335 00336 // Emit each field. 00337 for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) 00338 EmitAbbreviatedField(EltEnc, Vals[RecordIdx]); 00339 } 00340 } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) { 00341 // If this record has blob data, emit it, otherwise we must have record 00342 // entries to encode this way. 00343 00344 // Emit a vbr6 to indicate the number of elements present. 00345 if (BlobData) { 00346 EmitVBR(static_cast<uint32_t>(BlobLen), 6); 00347 assert(RecordIdx == Vals.size() && 00348 "Blob data and record entries specified for blob operand!"); 00349 } else { 00350 EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6); 00351 } 00352 00353 // Flush to a 32-bit alignment boundary. 00354 FlushToWord(); 00355 00356 // Emit each field as a literal byte. 00357 if (BlobData) { 00358 for (unsigned i = 0; i != BlobLen; ++i) 00359 WriteByte((unsigned char)BlobData[i]); 00360 00361 // Know that blob data is consumed for assertion below. 00362 BlobData = nullptr; 00363 } else { 00364 for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) { 00365 assert(isUInt<8>(Vals[RecordIdx]) && 00366 "Value too large to emit as blob"); 00367 WriteByte((unsigned char)Vals[RecordIdx]); 00368 } 00369 } 00370 00371 // Align end to 32-bits. 00372 while (GetBufferOffset() & 3) 00373 WriteByte(0); 00374 } else { // Single scalar field. 00375 assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); 00376 EmitAbbreviatedField(Op, Vals[RecordIdx]); 00377 ++RecordIdx; 00378 } 00379 } 00380 assert(RecordIdx == Vals.size() && "Not all record operands emitted!"); 00381 assert(BlobData == nullptr && 00382 "Blob data specified for record that doesn't use it!"); 00383 } 00384 00385 public: 00386 00387 /// EmitRecord - Emit the specified record to the stream, using an abbrev if 00388 /// we have one to compress the output. 00389 template<typename uintty> 00390 void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals, 00391 unsigned Abbrev = 0) { 00392 if (!Abbrev) { 00393 // If we don't have an abbrev to use, emit this in its fully unabbreviated 00394 // form. 00395 EmitCode(bitc::UNABBREV_RECORD); 00396 EmitVBR(Code, 6); 00397 EmitVBR(static_cast<uint32_t>(Vals.size()), 6); 00398 for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i) 00399 EmitVBR64(Vals[i], 6); 00400 return; 00401 } 00402 00403 // Insert the code into Vals to treat it uniformly. 00404 Vals.insert(Vals.begin(), Code); 00405 00406 EmitRecordWithAbbrev(Abbrev, Vals); 00407 } 00408 00409 /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation. 00410 /// Unlike EmitRecord, the code for the record should be included in Vals as 00411 /// the first entry. 00412 template<typename uintty> 00413 void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) { 00414 EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef()); 00415 } 00416 00417 /// EmitRecordWithBlob - Emit the specified record to the stream, using an 00418 /// abbrev that includes a blob at the end. The blob data to emit is 00419 /// specified by the pointer and length specified at the end. In contrast to 00420 /// EmitRecord, this routine expects that the first entry in Vals is the code 00421 /// of the record. 00422 template<typename uintty> 00423 void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 00424 StringRef Blob) { 00425 EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob); 00426 } 00427 template<typename uintty> 00428 void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 00429 const char *BlobData, unsigned BlobLen) { 00430 return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(BlobData, BlobLen)); 00431 } 00432 00433 /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records 00434 /// that end with an array. 00435 template<typename uintty> 00436 void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 00437 StringRef Array) { 00438 EmitRecordWithAbbrevImpl(Abbrev, Vals, Array); 00439 } 00440 template<typename uintty> 00441 void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 00442 const char *ArrayData, unsigned ArrayLen) { 00443 return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData, 00444 ArrayLen)); 00445 } 00446 00447 //===--------------------------------------------------------------------===// 00448 // Abbrev Emission 00449 //===--------------------------------------------------------------------===// 00450 00451 private: 00452 // Emit the abbreviation as a DEFINE_ABBREV record. 00453 void EncodeAbbrev(BitCodeAbbrev *Abbv) { 00454 EmitCode(bitc::DEFINE_ABBREV); 00455 EmitVBR(Abbv->getNumOperandInfos(), 5); 00456 for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos()); 00457 i != e; ++i) { 00458 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 00459 Emit(Op.isLiteral(), 1); 00460 if (Op.isLiteral()) { 00461 EmitVBR64(Op.getLiteralValue(), 8); 00462 } else { 00463 Emit(Op.getEncoding(), 3); 00464 if (Op.hasEncodingData()) 00465 EmitVBR64(Op.getEncodingData(), 5); 00466 } 00467 } 00468 } 00469 public: 00470 00471 /// EmitAbbrev - This emits an abbreviation to the stream. Note that this 00472 /// method takes ownership of the specified abbrev. 00473 unsigned EmitAbbrev(BitCodeAbbrev *Abbv) { 00474 // Emit the abbreviation as a record. 00475 EncodeAbbrev(Abbv); 00476 CurAbbrevs.push_back(Abbv); 00477 return static_cast<unsigned>(CurAbbrevs.size())-1 + 00478 bitc::FIRST_APPLICATION_ABBREV; 00479 } 00480 00481 //===--------------------------------------------------------------------===// 00482 // BlockInfo Block Emission 00483 //===--------------------------------------------------------------------===// 00484 00485 /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK. 00486 void EnterBlockInfoBlock(unsigned CodeWidth) { 00487 EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth); 00488 BlockInfoCurBID = ~0U; 00489 } 00490 private: 00491 /// SwitchToBlockID - If we aren't already talking about the specified block 00492 /// ID, emit a BLOCKINFO_CODE_SETBID record. 00493 void SwitchToBlockID(unsigned BlockID) { 00494 if (BlockInfoCurBID == BlockID) return; 00495 SmallVector<unsigned, 2> V; 00496 V.push_back(BlockID); 00497 EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V); 00498 BlockInfoCurBID = BlockID; 00499 } 00500 00501 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { 00502 if (BlockInfo *BI = getBlockInfo(BlockID)) 00503 return *BI; 00504 00505 // Otherwise, add a new record. 00506 BlockInfoRecords.push_back(BlockInfo()); 00507 BlockInfoRecords.back().BlockID = BlockID; 00508 return BlockInfoRecords.back(); 00509 } 00510 00511 public: 00512 00513 /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified 00514 /// BlockID. 00515 unsigned EmitBlockInfoAbbrev(unsigned BlockID, BitCodeAbbrev *Abbv) { 00516 SwitchToBlockID(BlockID); 00517 EncodeAbbrev(Abbv); 00518 00519 // Add the abbrev to the specified block record. 00520 BlockInfo &Info = getOrCreateBlockInfo(BlockID); 00521 Info.Abbrevs.push_back(Abbv); 00522 00523 return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV; 00524 } 00525 }; 00526 00527 00528 } // End llvm namespace 00529 00530 #endif