LLVM API Documentation

DwarfAccelTable.h
Go to the documentation of this file.
00001 //==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains support for writing dwarf accelerator tables.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
00015 #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
00016 
00017 #include "DIE.h"
00018 #include "llvm/ADT/ArrayRef.h"
00019 #include "llvm/ADT/StringMap.h"
00020 #include "llvm/MC/MCSymbol.h"
00021 #include "llvm/Support/Compiler.h"
00022 #include "llvm/Support/DataTypes.h"
00023 #include "llvm/Support/Debug.h"
00024 #include "llvm/Support/Dwarf.h"
00025 #include "llvm/Support/ErrorHandling.h"
00026 #include "llvm/Support/Format.h"
00027 #include "llvm/Support/FormattedStream.h"
00028 #include <vector>
00029 
00030 // The dwarf accelerator tables are an indirect hash table optimized
00031 // for null lookup rather than access to known data. They are output into
00032 // an on-disk format that looks like this:
00033 //
00034 // .-------------.
00035 // |  HEADER     |
00036 // |-------------|
00037 // |  BUCKETS    |
00038 // |-------------|
00039 // |  HASHES     |
00040 // |-------------|
00041 // |  OFFSETS    |
00042 // |-------------|
00043 // |  DATA       |
00044 // `-------------'
00045 //
00046 // where the header contains a magic number, version, type of hash function,
00047 // the number of buckets, total number of hashes, and room for a special
00048 // struct of data and the length of that struct.
00049 //
00050 // The buckets contain an index (e.g. 6) into the hashes array. The hashes
00051 // section contains all of the 32-bit hash values in contiguous memory, and
00052 // the offsets contain the offset into the data area for the particular
00053 // hash.
00054 //
00055 // For a lookup example, we could hash a function name and take it modulo the
00056 // number of buckets giving us our bucket. From there we take the bucket value
00057 // as an index into the hashes table and look at each successive hash as long
00058 // as the hash value is still the same modulo result (bucket value) as earlier.
00059 // If we have a match we look at that same entry in the offsets table and
00060 // grab the offset in the data for our final match.
00061 
00062 namespace llvm {
00063 
00064 class AsmPrinter;
00065 class DwarfFile;
00066 
00067 class DwarfAccelTable {
00068 
00069   static uint32_t HashDJB(StringRef Str) {
00070     uint32_t h = 5381;
00071     for (unsigned i = 0, e = Str.size(); i != e; ++i)
00072       h = ((h << 5) + h) + Str[i];
00073     return h;
00074   }
00075 
00076   // Helper function to compute the number of buckets needed based on
00077   // the number of unique hashes.
00078   void ComputeBucketCount(void);
00079 
00080   struct TableHeader {
00081     uint32_t magic;           // 'HASH' magic value to allow endian detection
00082     uint16_t version;         // Version number.
00083     uint16_t hash_function;   // The hash function enumeration that was used.
00084     uint32_t bucket_count;    // The number of buckets in this hash table.
00085     uint32_t hashes_count;    // The total number of unique hash values
00086                               // and hash data offsets in this table.
00087     uint32_t header_data_len; // The bytes to skip to get to the hash
00088                               // indexes (buckets) for correct alignment.
00089     // Also written to disk is the implementation specific header data.
00090 
00091     static const uint32_t MagicHash = 0x48415348;
00092 
00093     TableHeader(uint32_t data_len)
00094         : magic(MagicHash), version(1),
00095           hash_function(dwarf::DW_hash_function_djb), bucket_count(0),
00096           hashes_count(0), header_data_len(data_len) {}
00097 
00098 #ifndef NDEBUG
00099     void print(raw_ostream &O) {
00100       O << "Magic: " << format("0x%x", magic) << "\n"
00101         << "Version: " << version << "\n"
00102         << "Hash Function: " << hash_function << "\n"
00103         << "Bucket Count: " << bucket_count << "\n"
00104         << "Header Data Length: " << header_data_len << "\n";
00105     }
00106     void dump() { print(dbgs()); }
00107 #endif
00108   };
00109 
00110 public:
00111   // The HeaderData describes the form of each set of data. In general this
00112   // is as a list of atoms (atom_count) where each atom contains a type
00113   // (AtomType type) of data, and an encoding form (form). In the case of
00114   // data that is referenced via DW_FORM_ref_* the die_offset_base is
00115   // used to describe the offset for all forms in the list of atoms.
00116   // This also serves as a public interface of sorts.
00117   // When written to disk this will have the form:
00118   //
00119   // uint32_t die_offset_base
00120   // uint32_t atom_count
00121   // atom_count Atoms
00122 
00123   // Make these public so that they can be used as a general interface to
00124   // the class.
00125   struct Atom {
00126     uint16_t type; // enum AtomType
00127     uint16_t form; // DWARF DW_FORM_ defines
00128 
00129     LLVM_CONSTEXPR Atom(uint16_t type, uint16_t form)
00130         : type(type), form(form) {}
00131 #ifndef NDEBUG
00132     void print(raw_ostream &O) {
00133       O << "Type: " << dwarf::AtomTypeString(type) << "\n"
00134         << "Form: " << dwarf::FormEncodingString(form) << "\n";
00135     }
00136     void dump() { print(dbgs()); }
00137 #endif
00138   };
00139 
00140 private:
00141   struct TableHeaderData {
00142     uint32_t die_offset_base;
00143     SmallVector<Atom, 3> Atoms;
00144 
00145     TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0)
00146         : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {}
00147 
00148 #ifndef NDEBUG
00149     void print(raw_ostream &O) {
00150       O << "die_offset_base: " << die_offset_base << "\n";
00151       for (size_t i = 0; i < Atoms.size(); i++)
00152         Atoms[i].print(O);
00153     }
00154     void dump() { print(dbgs()); }
00155 #endif
00156   };
00157 
00158   // The data itself consists of a str_offset, a count of the DIEs in the
00159   // hash and the offsets to the DIEs themselves.
00160   // On disk each data section is ended with a 0 KeyType as the end of the
00161   // hash chain.
00162   // On output this looks like:
00163   // uint32_t str_offset
00164   // uint32_t hash_data_count
00165   // HashData[hash_data_count]
00166 public:
00167   struct HashDataContents {
00168     const DIE *Die;   // Offsets
00169     char Flags; // Specific flags to output
00170 
00171     HashDataContents(const DIE *D, char Flags) : Die(D), Flags(Flags) {}
00172 #ifndef NDEBUG
00173     void print(raw_ostream &O) const {
00174       O << "  Offset: " << Die->getOffset() << "\n";
00175       O << "  Tag: " << dwarf::TagString(Die->getTag()) << "\n";
00176       O << "  Flags: " << Flags << "\n";
00177     }
00178 #endif
00179   };
00180 
00181 private:
00182   // String Data
00183   struct DataArray {
00184     MCSymbol *StrSym;
00185     std::vector<HashDataContents *> Values;
00186     DataArray() : StrSym(nullptr) {}
00187   };
00188   friend struct HashData;
00189   struct HashData {
00190     StringRef Str;
00191     uint32_t HashValue;
00192     MCSymbol *Sym;
00193     DwarfAccelTable::DataArray &Data; // offsets
00194     HashData(StringRef S, DwarfAccelTable::DataArray &Data)
00195         : Str(S), Data(Data) {
00196       HashValue = DwarfAccelTable::HashDJB(S);
00197     }
00198 #ifndef NDEBUG
00199     void print(raw_ostream &O) {
00200       O << "Name: " << Str << "\n";
00201       O << "  Hash Value: " << format("0x%x", HashValue) << "\n";
00202       O << "  Symbol: ";
00203       if (Sym)
00204         Sym->print(O);
00205       else
00206         O << "<none>";
00207       O << "\n";
00208       for (HashDataContents *C : Data.Values) {
00209         O << "  Offset: " << C->Die->getOffset() << "\n";
00210         O << "  Tag: " << dwarf::TagString(C->Die->getTag()) << "\n";
00211         O << "  Flags: " << C->Flags << "\n";
00212       }
00213     }
00214     void dump() { print(dbgs()); }
00215 #endif
00216   };
00217 
00218   DwarfAccelTable(const DwarfAccelTable &) LLVM_DELETED_FUNCTION;
00219   void operator=(const DwarfAccelTable &) LLVM_DELETED_FUNCTION;
00220 
00221   // Internal Functions
00222   void EmitHeader(AsmPrinter *);
00223   void EmitBuckets(AsmPrinter *);
00224   void EmitHashes(AsmPrinter *);
00225   void EmitOffsets(AsmPrinter *, MCSymbol *);
00226   void EmitData(AsmPrinter *, DwarfFile *D, MCSymbol *StrSym);
00227 
00228   // Allocator for HashData and HashDataContents.
00229   BumpPtrAllocator Allocator;
00230 
00231   // Output Variables
00232   TableHeader Header;
00233   TableHeaderData HeaderData;
00234   std::vector<HashData *> Data;
00235 
00236   typedef StringMap<DataArray, BumpPtrAllocator &> StringEntries;
00237   StringEntries Entries;
00238 
00239   // Buckets/Hashes/Offsets
00240   typedef std::vector<HashData *> HashList;
00241   typedef std::vector<HashList> BucketList;
00242   BucketList Buckets;
00243   HashList Hashes;
00244 
00245   // Public Implementation
00246 public:
00247   DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
00248   void AddName(StringRef Name, MCSymbol *StrSym, const DIE *Die,
00249                char Flags = 0);
00250   void FinalizeTable(AsmPrinter *, StringRef);
00251   void Emit(AsmPrinter *, MCSymbol *, DwarfFile *, MCSymbol *StrSym);
00252 #ifndef NDEBUG
00253   void print(raw_ostream &O);
00254   void dump() { print(dbgs()); }
00255 #endif
00256 };
00257 }
00258 #endif