LLVM API Documentation
00001 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 00010 #include "llvm/MC/MCMachObjectWriter.h" 00011 #include "llvm/ADT/StringMap.h" 00012 #include "llvm/ADT/Twine.h" 00013 #include "llvm/MC/MCAsmBackend.h" 00014 #include "llvm/MC/MCAsmLayout.h" 00015 #include "llvm/MC/MCAssembler.h" 00016 #include "llvm/MC/MCExpr.h" 00017 #include "llvm/MC/MCFixupKindInfo.h" 00018 #include "llvm/MC/MCMachOSymbolFlags.h" 00019 #include "llvm/MC/MCObjectWriter.h" 00020 #include "llvm/MC/MCSectionMachO.h" 00021 #include "llvm/MC/MCSymbol.h" 00022 #include "llvm/MC/MCValue.h" 00023 #include "llvm/Support/Debug.h" 00024 #include "llvm/Support/ErrorHandling.h" 00025 #include "llvm/Support/MachO.h" 00026 #include <vector> 00027 using namespace llvm; 00028 00029 #define DEBUG_TYPE "mc" 00030 00031 void MachObjectWriter::reset() { 00032 Relocations.clear(); 00033 IndirectSymBase.clear(); 00034 StringTable.clear(); 00035 LocalSymbolData.clear(); 00036 ExternalSymbolData.clear(); 00037 UndefinedSymbolData.clear(); 00038 MCObjectWriter::reset(); 00039 } 00040 00041 bool MachObjectWriter:: 00042 doesSymbolRequireExternRelocation(const MCSymbolData *SD) { 00043 // Undefined symbols are always extern. 00044 if (SD->Symbol->isUndefined()) 00045 return true; 00046 00047 // References to weak definitions require external relocation entries; the 00048 // definition may not always be the one in the same object file. 00049 if (SD->getFlags() & SF_WeakDefinition) 00050 return true; 00051 00052 // Otherwise, we can use an internal relocation. 00053 return false; 00054 } 00055 00056 bool MachObjectWriter:: 00057 MachSymbolData::operator<(const MachSymbolData &RHS) const { 00058 return SymbolData->getSymbol().getName() < 00059 RHS.SymbolData->getSymbol().getName(); 00060 } 00061 00062 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { 00063 const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo( 00064 (MCFixupKind) Kind); 00065 00066 return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; 00067 } 00068 00069 uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment, 00070 const MCAsmLayout &Layout) const { 00071 return getSectionAddress(Fragment->getParent()) + 00072 Layout.getFragmentOffset(Fragment); 00073 } 00074 00075 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbolData* SD, 00076 const MCAsmLayout &Layout) const { 00077 const MCSymbol &S = SD->getSymbol(); 00078 00079 // If this is a variable, then recursively evaluate now. 00080 if (S.isVariable()) { 00081 if (const MCConstantExpr *C = 00082 dyn_cast<const MCConstantExpr>(S.getVariableValue())) 00083 return C->getValue(); 00084 00085 00086 MCValue Target; 00087 if (!S.getVariableValue()->EvaluateAsRelocatable(Target, &Layout, nullptr)) 00088 report_fatal_error("unable to evaluate offset for variable '" + 00089 S.getName() + "'"); 00090 00091 // Verify that any used symbols are defined. 00092 if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined()) 00093 report_fatal_error("unable to evaluate offset to undefined symbol '" + 00094 Target.getSymA()->getSymbol().getName() + "'"); 00095 if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined()) 00096 report_fatal_error("unable to evaluate offset to undefined symbol '" + 00097 Target.getSymB()->getSymbol().getName() + "'"); 00098 00099 uint64_t Address = Target.getConstant(); 00100 if (Target.getSymA()) 00101 Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( 00102 Target.getSymA()->getSymbol()), Layout); 00103 if (Target.getSymB()) 00104 Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( 00105 Target.getSymB()->getSymbol()), Layout); 00106 return Address; 00107 } 00108 00109 return getSectionAddress(SD->getFragment()->getParent()) + 00110 Layout.getSymbolOffset(SD); 00111 } 00112 00113 uint64_t MachObjectWriter::getPaddingSize(const MCSectionData *SD, 00114 const MCAsmLayout &Layout) const { 00115 uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD); 00116 unsigned Next = SD->getLayoutOrder() + 1; 00117 if (Next >= Layout.getSectionOrder().size()) 00118 return 0; 00119 00120 const MCSectionData &NextSD = *Layout.getSectionOrder()[Next]; 00121 if (NextSD.getSection().isVirtualSection()) 00122 return 0; 00123 return OffsetToAlignment(EndAddr, NextSD.getAlignment()); 00124 } 00125 00126 void MachObjectWriter::WriteHeader(unsigned NumLoadCommands, 00127 unsigned LoadCommandsSize, 00128 bool SubsectionsViaSymbols) { 00129 uint32_t Flags = 0; 00130 00131 if (SubsectionsViaSymbols) 00132 Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS; 00133 00134 // struct mach_header (28 bytes) or 00135 // struct mach_header_64 (32 bytes) 00136 00137 uint64_t Start = OS.tell(); 00138 (void) Start; 00139 00140 Write32(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC); 00141 00142 Write32(TargetObjectWriter->getCPUType()); 00143 Write32(TargetObjectWriter->getCPUSubtype()); 00144 00145 Write32(MachO::MH_OBJECT); 00146 Write32(NumLoadCommands); 00147 Write32(LoadCommandsSize); 00148 Write32(Flags); 00149 if (is64Bit()) 00150 Write32(0); // reserved 00151 00152 assert(OS.tell() - Start == 00153 (is64Bit()?sizeof(MachO::mach_header_64): sizeof(MachO::mach_header))); 00154 } 00155 00156 /// WriteSegmentLoadCommand - Write a segment load command. 00157 /// 00158 /// \param NumSections The number of sections in this segment. 00159 /// \param SectionDataSize The total size of the sections. 00160 void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections, 00161 uint64_t VMSize, 00162 uint64_t SectionDataStartOffset, 00163 uint64_t SectionDataSize) { 00164 // struct segment_command (56 bytes) or 00165 // struct segment_command_64 (72 bytes) 00166 00167 uint64_t Start = OS.tell(); 00168 (void) Start; 00169 00170 unsigned SegmentLoadCommandSize = 00171 is64Bit() ? sizeof(MachO::segment_command_64): 00172 sizeof(MachO::segment_command); 00173 Write32(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT); 00174 Write32(SegmentLoadCommandSize + 00175 NumSections * (is64Bit() ? sizeof(MachO::section_64) : 00176 sizeof(MachO::section))); 00177 00178 WriteBytes("", 16); 00179 if (is64Bit()) { 00180 Write64(0); // vmaddr 00181 Write64(VMSize); // vmsize 00182 Write64(SectionDataStartOffset); // file offset 00183 Write64(SectionDataSize); // file size 00184 } else { 00185 Write32(0); // vmaddr 00186 Write32(VMSize); // vmsize 00187 Write32(SectionDataStartOffset); // file offset 00188 Write32(SectionDataSize); // file size 00189 } 00190 // maxprot 00191 Write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); 00192 // initprot 00193 Write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); 00194 Write32(NumSections); 00195 Write32(0); // flags 00196 00197 assert(OS.tell() - Start == SegmentLoadCommandSize); 00198 } 00199 00200 void MachObjectWriter::WriteSection(const MCAssembler &Asm, 00201 const MCAsmLayout &Layout, 00202 const MCSectionData &SD, 00203 uint64_t FileOffset, 00204 uint64_t RelocationsStart, 00205 unsigned NumRelocations) { 00206 uint64_t SectionSize = Layout.getSectionAddressSize(&SD); 00207 00208 // The offset is unused for virtual sections. 00209 if (SD.getSection().isVirtualSection()) { 00210 assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!"); 00211 FileOffset = 0; 00212 } 00213 00214 // struct section (68 bytes) or 00215 // struct section_64 (80 bytes) 00216 00217 uint64_t Start = OS.tell(); 00218 (void) Start; 00219 00220 const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection()); 00221 WriteBytes(Section.getSectionName(), 16); 00222 WriteBytes(Section.getSegmentName(), 16); 00223 if (is64Bit()) { 00224 Write64(getSectionAddress(&SD)); // address 00225 Write64(SectionSize); // size 00226 } else { 00227 Write32(getSectionAddress(&SD)); // address 00228 Write32(SectionSize); // size 00229 } 00230 Write32(FileOffset); 00231 00232 unsigned Flags = Section.getTypeAndAttributes(); 00233 if (SD.hasInstructions()) 00234 Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS; 00235 00236 assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); 00237 Write32(Log2_32(SD.getAlignment())); 00238 Write32(NumRelocations ? RelocationsStart : 0); 00239 Write32(NumRelocations); 00240 Write32(Flags); 00241 Write32(IndirectSymBase.lookup(&SD)); // reserved1 00242 Write32(Section.getStubSize()); // reserved2 00243 if (is64Bit()) 00244 Write32(0); // reserved3 00245 00246 assert(OS.tell() - Start == (is64Bit() ? sizeof(MachO::section_64) : 00247 sizeof(MachO::section))); 00248 } 00249 00250 void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset, 00251 uint32_t NumSymbols, 00252 uint32_t StringTableOffset, 00253 uint32_t StringTableSize) { 00254 // struct symtab_command (24 bytes) 00255 00256 uint64_t Start = OS.tell(); 00257 (void) Start; 00258 00259 Write32(MachO::LC_SYMTAB); 00260 Write32(sizeof(MachO::symtab_command)); 00261 Write32(SymbolOffset); 00262 Write32(NumSymbols); 00263 Write32(StringTableOffset); 00264 Write32(StringTableSize); 00265 00266 assert(OS.tell() - Start == sizeof(MachO::symtab_command)); 00267 } 00268 00269 void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, 00270 uint32_t NumLocalSymbols, 00271 uint32_t FirstExternalSymbol, 00272 uint32_t NumExternalSymbols, 00273 uint32_t FirstUndefinedSymbol, 00274 uint32_t NumUndefinedSymbols, 00275 uint32_t IndirectSymbolOffset, 00276 uint32_t NumIndirectSymbols) { 00277 // struct dysymtab_command (80 bytes) 00278 00279 uint64_t Start = OS.tell(); 00280 (void) Start; 00281 00282 Write32(MachO::LC_DYSYMTAB); 00283 Write32(sizeof(MachO::dysymtab_command)); 00284 Write32(FirstLocalSymbol); 00285 Write32(NumLocalSymbols); 00286 Write32(FirstExternalSymbol); 00287 Write32(NumExternalSymbols); 00288 Write32(FirstUndefinedSymbol); 00289 Write32(NumUndefinedSymbols); 00290 Write32(0); // tocoff 00291 Write32(0); // ntoc 00292 Write32(0); // modtaboff 00293 Write32(0); // nmodtab 00294 Write32(0); // extrefsymoff 00295 Write32(0); // nextrefsyms 00296 Write32(IndirectSymbolOffset); 00297 Write32(NumIndirectSymbols); 00298 Write32(0); // extreloff 00299 Write32(0); // nextrel 00300 Write32(0); // locreloff 00301 Write32(0); // nlocrel 00302 00303 assert(OS.tell() - Start == sizeof(MachO::dysymtab_command)); 00304 } 00305 00306 MachObjectWriter::MachSymbolData * 00307 MachObjectWriter::findSymbolData(const MCSymbol &Sym) { 00308 for (auto &Entry : LocalSymbolData) 00309 if (&Entry.SymbolData->getSymbol() == &Sym) 00310 return &Entry; 00311 00312 for (auto &Entry : ExternalSymbolData) 00313 if (&Entry.SymbolData->getSymbol() == &Sym) 00314 return &Entry; 00315 00316 for (auto &Entry : UndefinedSymbolData) 00317 if (&Entry.SymbolData->getSymbol() == &Sym) 00318 return &Entry; 00319 00320 return nullptr; 00321 } 00322 00323 void MachObjectWriter::WriteNlist(MachSymbolData &MSD, 00324 const MCAsmLayout &Layout) { 00325 MCSymbolData &Data = *MSD.SymbolData; 00326 const MCSymbol *Symbol = &Data.getSymbol(); 00327 const MCSymbol *AliasedSymbol = &Symbol->AliasedSymbol(); 00328 uint8_t SectionIndex = MSD.SectionIndex; 00329 uint8_t Type = 0; 00330 uint16_t Flags = Data.getFlags(); 00331 uint64_t Address = 0; 00332 bool IsAlias = Symbol != AliasedSymbol; 00333 00334 MachSymbolData *AliaseeInfo; 00335 if (IsAlias) { 00336 AliaseeInfo = findSymbolData(*AliasedSymbol); 00337 if (AliaseeInfo) 00338 SectionIndex = AliaseeInfo->SectionIndex; 00339 Symbol = AliasedSymbol; 00340 } 00341 00342 // Set the N_TYPE bits. See <mach-o/nlist.h>. 00343 // 00344 // FIXME: Are the prebound or indirect fields possible here? 00345 if (IsAlias && Symbol->isUndefined()) 00346 Type = MachO::N_INDR; 00347 else if (Symbol->isUndefined()) 00348 Type = MachO::N_UNDF; 00349 else if (Symbol->isAbsolute()) 00350 Type = MachO::N_ABS; 00351 else 00352 Type = MachO::N_SECT; 00353 00354 // FIXME: Set STAB bits. 00355 00356 if (Data.isPrivateExtern()) 00357 Type |= MachO::N_PEXT; 00358 00359 // Set external bit. 00360 if (Data.isExternal() || (!IsAlias && Symbol->isUndefined())) 00361 Type |= MachO::N_EXT; 00362 00363 // Compute the symbol address. 00364 if (IsAlias && Symbol->isUndefined()) 00365 Address = AliaseeInfo->StringIndex; 00366 else if (Symbol->isDefined()) 00367 Address = getSymbolAddress(&Data, Layout); 00368 else if (Data.isCommon()) { 00369 // Common symbols are encoded with the size in the address 00370 // field, and their alignment in the flags. 00371 Address = Data.getCommonSize(); 00372 00373 // Common alignment is packed into the 'desc' bits. 00374 if (unsigned Align = Data.getCommonAlignment()) { 00375 unsigned Log2Size = Log2_32(Align); 00376 assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); 00377 if (Log2Size > 15) 00378 report_fatal_error("invalid 'common' alignment '" + 00379 Twine(Align) + "' for '" + Symbol->getName() + "'", 00380 false); 00381 // FIXME: Keep this mask with the SymbolFlags enumeration. 00382 Flags = (Flags & 0xF0FF) | (Log2Size << 8); 00383 } 00384 } 00385 00386 if (Layout.getAssembler().isThumbFunc(Symbol)) 00387 Flags |= SF_ThumbFunc; 00388 00389 // struct nlist (12 bytes) 00390 00391 Write32(MSD.StringIndex); 00392 Write8(Type); 00393 Write8(SectionIndex); 00394 00395 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' 00396 // value. 00397 Write16(Flags); 00398 if (is64Bit()) 00399 Write64(Address); 00400 else 00401 Write32(Address); 00402 } 00403 00404 void MachObjectWriter::WriteLinkeditLoadCommand(uint32_t Type, 00405 uint32_t DataOffset, 00406 uint32_t DataSize) { 00407 uint64_t Start = OS.tell(); 00408 (void) Start; 00409 00410 Write32(Type); 00411 Write32(sizeof(MachO::linkedit_data_command)); 00412 Write32(DataOffset); 00413 Write32(DataSize); 00414 00415 assert(OS.tell() - Start == sizeof(MachO::linkedit_data_command)); 00416 } 00417 00418 static unsigned ComputeLinkerOptionsLoadCommandSize( 00419 const std::vector<std::string> &Options, bool is64Bit) 00420 { 00421 unsigned Size = sizeof(MachO::linker_options_command); 00422 for (unsigned i = 0, e = Options.size(); i != e; ++i) 00423 Size += Options[i].size() + 1; 00424 return RoundUpToAlignment(Size, is64Bit ? 8 : 4); 00425 } 00426 00427 void MachObjectWriter::WriteLinkerOptionsLoadCommand( 00428 const std::vector<std::string> &Options) 00429 { 00430 unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit()); 00431 uint64_t Start = OS.tell(); 00432 (void) Start; 00433 00434 Write32(MachO::LC_LINKER_OPTIONS); 00435 Write32(Size); 00436 Write32(Options.size()); 00437 uint64_t BytesWritten = sizeof(MachO::linker_options_command); 00438 for (unsigned i = 0, e = Options.size(); i != e; ++i) { 00439 // Write each string, including the null byte. 00440 const std::string &Option = Options[i]; 00441 WriteBytes(Option.c_str(), Option.size() + 1); 00442 BytesWritten += Option.size() + 1; 00443 } 00444 00445 // Pad to a multiple of the pointer size. 00446 WriteBytes("", OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4)); 00447 00448 assert(OS.tell() - Start == Size); 00449 } 00450 00451 00452 void MachObjectWriter::RecordRelocation(const MCAssembler &Asm, 00453 const MCAsmLayout &Layout, 00454 const MCFragment *Fragment, 00455 const MCFixup &Fixup, 00456 MCValue Target, 00457 bool &IsPCRel, 00458 uint64_t &FixedValue) { 00459 TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup, 00460 Target, FixedValue); 00461 } 00462 00463 void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) { 00464 // This is the point where 'as' creates actual symbols for indirect symbols 00465 // (in the following two passes). It would be easier for us to do this sooner 00466 // when we see the attribute, but that makes getting the order in the symbol 00467 // table much more complicated than it is worth. 00468 // 00469 // FIXME: Revisit this when the dust settles. 00470 00471 // Report errors for use of .indirect_symbol not in a symbol pointer section 00472 // or stub section. 00473 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 00474 ie = Asm.indirect_symbol_end(); it != ie; ++it) { 00475 const MCSectionMachO &Section = 00476 cast<MCSectionMachO>(it->SectionData->getSection()); 00477 00478 if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS && 00479 Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && 00480 Section.getType() != MachO::S_SYMBOL_STUBS) { 00481 MCSymbol &Symbol = *it->Symbol; 00482 report_fatal_error("indirect symbol '" + Symbol.getName() + 00483 "' not in a symbol pointer or stub section"); 00484 } 00485 } 00486 00487 // Bind non-lazy symbol pointers first. 00488 unsigned IndirectIndex = 0; 00489 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 00490 ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { 00491 const MCSectionMachO &Section = 00492 cast<MCSectionMachO>(it->SectionData->getSection()); 00493 00494 if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS) 00495 continue; 00496 00497 // Initialize the section indirect symbol base, if necessary. 00498 IndirectSymBase.insert(std::make_pair(it->SectionData, IndirectIndex)); 00499 00500 Asm.getOrCreateSymbolData(*it->Symbol); 00501 } 00502 00503 // Then lazy symbol pointers and symbol stubs. 00504 IndirectIndex = 0; 00505 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 00506 ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { 00507 const MCSectionMachO &Section = 00508 cast<MCSectionMachO>(it->SectionData->getSection()); 00509 00510 if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && 00511 Section.getType() != MachO::S_SYMBOL_STUBS) 00512 continue; 00513 00514 // Initialize the section indirect symbol base, if necessary. 00515 IndirectSymBase.insert(std::make_pair(it->SectionData, IndirectIndex)); 00516 00517 // Set the symbol type to undefined lazy, but only on construction. 00518 // 00519 // FIXME: Do not hardcode. 00520 bool Created; 00521 MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created); 00522 if (Created) 00523 Entry.setFlags(Entry.getFlags() | 0x0001); 00524 } 00525 } 00526 00527 /// ComputeSymbolTable - Compute the symbol table data 00528 /// 00529 /// \param StringTable [out] - The string table data. 00530 /// \param StringIndexMap [out] - Map from symbol names to offsets in the 00531 /// string table. 00532 void MachObjectWriter:: 00533 ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, 00534 std::vector<MachSymbolData> &LocalSymbolData, 00535 std::vector<MachSymbolData> &ExternalSymbolData, 00536 std::vector<MachSymbolData> &UndefinedSymbolData) { 00537 // Build section lookup table. 00538 DenseMap<const MCSection*, uint8_t> SectionIndexMap; 00539 unsigned Index = 1; 00540 for (MCAssembler::iterator it = Asm.begin(), 00541 ie = Asm.end(); it != ie; ++it, ++Index) 00542 SectionIndexMap[&it->getSection()] = Index; 00543 assert(Index <= 256 && "Too many sections!"); 00544 00545 // Index 0 is always the empty string. 00546 StringMap<uint64_t> StringIndexMap; 00547 StringTable += '\x00'; 00548 00549 // Build the symbol arrays and the string table, but only for non-local 00550 // symbols. 00551 // 00552 // The particular order that we collect the symbols and create the string 00553 // table, then sort the symbols is chosen to match 'as'. Even though it 00554 // doesn't matter for correctness, this is important for letting us diff .o 00555 // files. 00556 for (MCSymbolData &SD : Asm.symbols()) { 00557 const MCSymbol &Symbol = SD.getSymbol(); 00558 00559 // Ignore non-linker visible symbols. 00560 if (!Asm.isSymbolLinkerVisible(SD.getSymbol())) 00561 continue; 00562 00563 if (!SD.isExternal() && !Symbol.isUndefined()) 00564 continue; 00565 00566 uint64_t &Entry = StringIndexMap[Symbol.getName()]; 00567 if (!Entry) { 00568 Entry = StringTable.size(); 00569 StringTable += Symbol.getName(); 00570 StringTable += '\x00'; 00571 } 00572 00573 MachSymbolData MSD; 00574 MSD.SymbolData = &SD; 00575 MSD.StringIndex = Entry; 00576 00577 if (Symbol.isUndefined()) { 00578 MSD.SectionIndex = 0; 00579 UndefinedSymbolData.push_back(MSD); 00580 } else if (Symbol.isAbsolute()) { 00581 MSD.SectionIndex = 0; 00582 ExternalSymbolData.push_back(MSD); 00583 } else { 00584 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 00585 assert(MSD.SectionIndex && "Invalid section index!"); 00586 ExternalSymbolData.push_back(MSD); 00587 } 00588 } 00589 00590 // Now add the data for local symbols. 00591 for (MCSymbolData &SD : Asm.symbols()) { 00592 const MCSymbol &Symbol = SD.getSymbol(); 00593 00594 // Ignore non-linker visible symbols. 00595 if (!Asm.isSymbolLinkerVisible(SD.getSymbol())) 00596 continue; 00597 00598 if (SD.isExternal() || Symbol.isUndefined()) 00599 continue; 00600 00601 uint64_t &Entry = StringIndexMap[Symbol.getName()]; 00602 if (!Entry) { 00603 Entry = StringTable.size(); 00604 StringTable += Symbol.getName(); 00605 StringTable += '\x00'; 00606 } 00607 00608 MachSymbolData MSD; 00609 MSD.SymbolData = &SD; 00610 MSD.StringIndex = Entry; 00611 00612 if (Symbol.isAbsolute()) { 00613 MSD.SectionIndex = 0; 00614 LocalSymbolData.push_back(MSD); 00615 } else { 00616 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 00617 assert(MSD.SectionIndex && "Invalid section index!"); 00618 LocalSymbolData.push_back(MSD); 00619 } 00620 } 00621 00622 // External and undefined symbols are required to be in lexicographic order. 00623 std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); 00624 std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); 00625 00626 // Set the symbol indices. 00627 Index = 0; 00628 for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) 00629 LocalSymbolData[i].SymbolData->setIndex(Index++); 00630 for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) 00631 ExternalSymbolData[i].SymbolData->setIndex(Index++); 00632 for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) 00633 UndefinedSymbolData[i].SymbolData->setIndex(Index++); 00634 00635 // The string table is padded to a multiple of 4. 00636 while (StringTable.size() % 4) 00637 StringTable += '\x00'; 00638 } 00639 00640 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm, 00641 const MCAsmLayout &Layout) { 00642 uint64_t StartAddress = 0; 00643 const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder(); 00644 for (int i = 0, n = Order.size(); i != n ; ++i) { 00645 const MCSectionData *SD = Order[i]; 00646 StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment()); 00647 SectionAddress[SD] = StartAddress; 00648 StartAddress += Layout.getSectionAddressSize(SD); 00649 00650 // Explicitly pad the section to match the alignment requirements of the 00651 // following one. This is for 'gas' compatibility, it shouldn't 00652 /// strictly be necessary. 00653 StartAddress += getPaddingSize(SD, Layout); 00654 } 00655 } 00656 00657 void MachObjectWriter::markAbsoluteVariableSymbols(MCAssembler &Asm, 00658 const MCAsmLayout &Layout) { 00659 for (MCSymbolData &SD : Asm.symbols()) { 00660 if (!SD.getSymbol().isVariable()) 00661 continue; 00662 00663 // Is the variable is a symbol difference (SA - SB + C) expression, 00664 // and neither symbol is external, mark the variable as absolute. 00665 const MCExpr *Expr = SD.getSymbol().getVariableValue(); 00666 MCValue Value; 00667 if (Expr->EvaluateAsRelocatable(Value, &Layout, nullptr)) { 00668 if (Value.getSymA() && Value.getSymB()) 00669 const_cast<MCSymbol*>(&SD.getSymbol())->setAbsolute(); 00670 } 00671 } 00672 } 00673 00674 void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, 00675 const MCAsmLayout &Layout) { 00676 computeSectionAddresses(Asm, Layout); 00677 00678 // Create symbol data for any indirect symbols. 00679 BindIndirectSymbols(Asm); 00680 00681 // Mark symbol difference expressions in variables (from .set or = directives) 00682 // as absolute. 00683 markAbsoluteVariableSymbols(Asm, Layout); 00684 00685 // Compute symbol table information and bind symbol indices. 00686 ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData, 00687 UndefinedSymbolData); 00688 } 00689 00690 bool MachObjectWriter:: 00691 IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, 00692 const MCSymbolData &DataA, 00693 const MCFragment &FB, 00694 bool InSet, 00695 bool IsPCRel) const { 00696 if (InSet) 00697 return true; 00698 00699 // The effective address is 00700 // addr(atom(A)) + offset(A) 00701 // - addr(atom(B)) - offset(B) 00702 // and the offsets are not relocatable, so the fixup is fully resolved when 00703 // addr(atom(A)) - addr(atom(B)) == 0. 00704 const MCSymbolData *A_Base = nullptr, *B_Base = nullptr; 00705 00706 const MCSymbol &SA = DataA.getSymbol().AliasedSymbol(); 00707 const MCSection &SecA = SA.getSection(); 00708 const MCSection &SecB = FB.getParent()->getSection(); 00709 00710 if (IsPCRel) { 00711 // The simple (Darwin, except on x86_64) way of dealing with this was to 00712 // assume that any reference to a temporary symbol *must* be a temporary 00713 // symbol in the same atom, unless the sections differ. Therefore, any PCrel 00714 // relocation to a temporary symbol (in the same section) is fully 00715 // resolved. This also works in conjunction with absolutized .set, which 00716 // requires the compiler to use .set to absolutize the differences between 00717 // symbols which the compiler knows to be assembly time constants, so we 00718 // don't need to worry about considering symbol differences fully resolved. 00719 // 00720 // If the file isn't using sub-sections-via-symbols, we can make the 00721 // same assumptions about any symbol that we normally make about 00722 // assembler locals. 00723 00724 bool hasReliableSymbolDifference = isX86_64(); 00725 if (!hasReliableSymbolDifference) { 00726 if (!SA.isInSection() || &SecA != &SecB || 00727 (!SA.isTemporary() && 00728 FB.getAtom() != Asm.getSymbolData(SA).getFragment()->getAtom() && 00729 Asm.getSubsectionsViaSymbols())) 00730 return false; 00731 return true; 00732 } 00733 // For Darwin x86_64, there is one special case when the reference IsPCRel. 00734 // If the fragment with the reference does not have a base symbol but meets 00735 // the simple way of dealing with this, in that it is a temporary symbol in 00736 // the same atom then it is assumed to be fully resolved. This is needed so 00737 // a relocation entry is not created and so the static linker does not 00738 // mess up the reference later. 00739 else if(!FB.getAtom() && 00740 SA.isTemporary() && SA.isInSection() && &SecA == &SecB){ 00741 return true; 00742 } 00743 } else { 00744 if (!TargetObjectWriter->useAggressiveSymbolFolding()) 00745 return false; 00746 } 00747 00748 const MCFragment *FA = Asm.getSymbolData(SA).getFragment(); 00749 00750 // Bail if the symbol has no fragment. 00751 if (!FA) 00752 return false; 00753 00754 A_Base = FA->getAtom(); 00755 if (!A_Base) 00756 return false; 00757 00758 B_Base = FB.getAtom(); 00759 if (!B_Base) 00760 return false; 00761 00762 // If the atoms are the same, they are guaranteed to have the same address. 00763 if (A_Base == B_Base) 00764 return true; 00765 00766 // Otherwise, we can't prove this is fully resolved. 00767 return false; 00768 } 00769 00770 void MachObjectWriter::WriteObject(MCAssembler &Asm, 00771 const MCAsmLayout &Layout) { 00772 unsigned NumSections = Asm.size(); 00773 const MCAssembler::VersionMinInfoType &VersionInfo = 00774 Layout.getAssembler().getVersionMinInfo(); 00775 00776 // The section data starts after the header, the segment load command (and 00777 // section headers) and the symbol table. 00778 unsigned NumLoadCommands = 1; 00779 uint64_t LoadCommandsSize = is64Bit() ? 00780 sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64): 00781 sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section); 00782 00783 // Add the deployment target version info load command size, if used. 00784 if (VersionInfo.Major != 0) { 00785 ++NumLoadCommands; 00786 LoadCommandsSize += sizeof(MachO::version_min_command); 00787 } 00788 00789 // Add the data-in-code load command size, if used. 00790 unsigned NumDataRegions = Asm.getDataRegions().size(); 00791 if (NumDataRegions) { 00792 ++NumLoadCommands; 00793 LoadCommandsSize += sizeof(MachO::linkedit_data_command); 00794 } 00795 00796 // Add the loh load command size, if used. 00797 uint64_t LOHRawSize = Asm.getLOHContainer().getEmitSize(*this, Layout); 00798 uint64_t LOHSize = RoundUpToAlignment(LOHRawSize, is64Bit() ? 8 : 4); 00799 if (LOHSize) { 00800 ++NumLoadCommands; 00801 LoadCommandsSize += sizeof(MachO::linkedit_data_command); 00802 } 00803 00804 // Add the symbol table load command sizes, if used. 00805 unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + 00806 UndefinedSymbolData.size(); 00807 if (NumSymbols) { 00808 NumLoadCommands += 2; 00809 LoadCommandsSize += (sizeof(MachO::symtab_command) + 00810 sizeof(MachO::dysymtab_command)); 00811 } 00812 00813 // Add the linker option load commands sizes. 00814 const std::vector<std::vector<std::string> > &LinkerOptions = 00815 Asm.getLinkerOptions(); 00816 for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) { 00817 ++NumLoadCommands; 00818 LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(LinkerOptions[i], 00819 is64Bit()); 00820 } 00821 00822 // Compute the total size of the section data, as well as its file size and vm 00823 // size. 00824 uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) : 00825 sizeof(MachO::mach_header)) + LoadCommandsSize; 00826 uint64_t SectionDataSize = 0; 00827 uint64_t SectionDataFileSize = 0; 00828 uint64_t VMSize = 0; 00829 for (MCAssembler::const_iterator it = Asm.begin(), 00830 ie = Asm.end(); it != ie; ++it) { 00831 const MCSectionData &SD = *it; 00832 uint64_t Address = getSectionAddress(&SD); 00833 uint64_t Size = Layout.getSectionAddressSize(&SD); 00834 uint64_t FileSize = Layout.getSectionFileSize(&SD); 00835 FileSize += getPaddingSize(&SD, Layout); 00836 00837 VMSize = std::max(VMSize, Address + Size); 00838 00839 if (SD.getSection().isVirtualSection()) 00840 continue; 00841 00842 SectionDataSize = std::max(SectionDataSize, Address + Size); 00843 SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); 00844 } 00845 00846 // The section data is padded to 4 bytes. 00847 // 00848 // FIXME: Is this machine dependent? 00849 unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); 00850 SectionDataFileSize += SectionDataPadding; 00851 00852 // Write the prolog, starting with the header and load command... 00853 WriteHeader(NumLoadCommands, LoadCommandsSize, 00854 Asm.getSubsectionsViaSymbols()); 00855 WriteSegmentLoadCommand(NumSections, VMSize, 00856 SectionDataStart, SectionDataSize); 00857 00858 // ... and then the section headers. 00859 uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; 00860 for (MCAssembler::const_iterator it = Asm.begin(), 00861 ie = Asm.end(); it != ie; ++it) { 00862 std::vector<MachO::any_relocation_info> &Relocs = Relocations[it]; 00863 unsigned NumRelocs = Relocs.size(); 00864 uint64_t SectionStart = SectionDataStart + getSectionAddress(it); 00865 WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); 00866 RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info); 00867 } 00868 00869 // Write out the deployment target information, if it's available. 00870 if (VersionInfo.Major != 0) { 00871 assert(VersionInfo.Update < 256 && "unencodable update target version"); 00872 assert(VersionInfo.Minor < 256 && "unencodable minor target version"); 00873 assert(VersionInfo.Major < 65536 && "unencodable major target version"); 00874 uint32_t EncodedVersion = VersionInfo.Update | (VersionInfo.Minor << 8) | 00875 (VersionInfo.Major << 16); 00876 Write32(VersionInfo.Kind == MCVM_OSXVersionMin ? MachO::LC_VERSION_MIN_MACOSX : 00877 MachO::LC_VERSION_MIN_IPHONEOS); 00878 Write32(sizeof(MachO::version_min_command)); 00879 Write32(EncodedVersion); 00880 Write32(0); // reserved. 00881 } 00882 00883 // Write the data-in-code load command, if used. 00884 uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8; 00885 if (NumDataRegions) { 00886 uint64_t DataRegionsOffset = RelocTableEnd; 00887 uint64_t DataRegionsSize = NumDataRegions * 8; 00888 WriteLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset, 00889 DataRegionsSize); 00890 } 00891 00892 // Write the loh load command, if used. 00893 uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize; 00894 if (LOHSize) 00895 WriteLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT, 00896 DataInCodeTableEnd, LOHSize); 00897 00898 // Write the symbol table load command, if used. 00899 if (NumSymbols) { 00900 unsigned FirstLocalSymbol = 0; 00901 unsigned NumLocalSymbols = LocalSymbolData.size(); 00902 unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; 00903 unsigned NumExternalSymbols = ExternalSymbolData.size(); 00904 unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; 00905 unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); 00906 unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); 00907 unsigned NumSymTabSymbols = 00908 NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; 00909 uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; 00910 uint64_t IndirectSymbolOffset = 0; 00911 00912 // If used, the indirect symbols are written after the section data. 00913 if (NumIndirectSymbols) 00914 IndirectSymbolOffset = LOHTableEnd; 00915 00916 // The symbol table is written after the indirect symbol data. 00917 uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize; 00918 00919 // The string table is written after symbol table. 00920 uint64_t StringTableOffset = 00921 SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? 00922 sizeof(MachO::nlist_64) : 00923 sizeof(MachO::nlist)); 00924 WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, 00925 StringTableOffset, StringTable.size()); 00926 00927 WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, 00928 FirstExternalSymbol, NumExternalSymbols, 00929 FirstUndefinedSymbol, NumUndefinedSymbols, 00930 IndirectSymbolOffset, NumIndirectSymbols); 00931 } 00932 00933 // Write the linker options load commands. 00934 for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) { 00935 WriteLinkerOptionsLoadCommand(LinkerOptions[i]); 00936 } 00937 00938 // Write the actual section data. 00939 for (MCAssembler::const_iterator it = Asm.begin(), 00940 ie = Asm.end(); it != ie; ++it) { 00941 Asm.writeSectionData(it, Layout); 00942 00943 uint64_t Pad = getPaddingSize(it, Layout); 00944 for (unsigned int i = 0; i < Pad; ++i) 00945 Write8(0); 00946 } 00947 00948 // Write the extra padding. 00949 WriteZeros(SectionDataPadding); 00950 00951 // Write the relocation entries. 00952 for (MCAssembler::const_iterator it = Asm.begin(), 00953 ie = Asm.end(); it != ie; ++it) { 00954 // Write the section relocation entries, in reverse order to match 'as' 00955 // (approximately, the exact algorithm is more complicated than this). 00956 std::vector<MachO::any_relocation_info> &Relocs = Relocations[it]; 00957 for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { 00958 Write32(Relocs[e - i - 1].r_word0); 00959 Write32(Relocs[e - i - 1].r_word1); 00960 } 00961 } 00962 00963 // Write out the data-in-code region payload, if there is one. 00964 for (MCAssembler::const_data_region_iterator 00965 it = Asm.data_region_begin(), ie = Asm.data_region_end(); 00966 it != ie; ++it) { 00967 const DataRegionData *Data = &(*it); 00968 uint64_t Start = 00969 getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->Start), 00970 Layout); 00971 uint64_t End = 00972 getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->End), 00973 Layout); 00974 DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind 00975 << " start: " << Start << "(" << Data->Start->getName() << ")" 00976 << " end: " << End << "(" << Data->End->getName() << ")" 00977 << " size: " << End - Start 00978 << "\n"); 00979 Write32(Start); 00980 Write16(End - Start); 00981 Write16(Data->Kind); 00982 } 00983 00984 // Write out the loh commands, if there is one. 00985 if (LOHSize) { 00986 #ifndef NDEBUG 00987 unsigned Start = OS.tell(); 00988 #endif 00989 Asm.getLOHContainer().Emit(*this, Layout); 00990 // Pad to a multiple of the pointer size. 00991 WriteBytes("", OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4)); 00992 assert(OS.tell() - Start == LOHSize); 00993 } 00994 00995 // Write the symbol table data, if used. 00996 if (NumSymbols) { 00997 // Write the indirect symbol entries. 00998 for (MCAssembler::const_indirect_symbol_iterator 00999 it = Asm.indirect_symbol_begin(), 01000 ie = Asm.indirect_symbol_end(); it != ie; ++it) { 01001 // Indirect symbols in the non-lazy symbol pointer section have some 01002 // special handling. 01003 const MCSectionMachO &Section = 01004 static_cast<const MCSectionMachO&>(it->SectionData->getSection()); 01005 if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) { 01006 // If this symbol is defined and internal, mark it as such. 01007 if (it->Symbol->isDefined() && 01008 !Asm.getSymbolData(*it->Symbol).isExternal()) { 01009 uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL; 01010 if (it->Symbol->isAbsolute()) 01011 Flags |= MachO::INDIRECT_SYMBOL_ABS; 01012 Write32(Flags); 01013 continue; 01014 } 01015 } 01016 01017 Write32(Asm.getSymbolData(*it->Symbol).getIndex()); 01018 } 01019 01020 // FIXME: Check that offsets match computed ones. 01021 01022 // Write the symbol table entries. 01023 for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) 01024 WriteNlist(LocalSymbolData[i], Layout); 01025 for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) 01026 WriteNlist(ExternalSymbolData[i], Layout); 01027 for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) 01028 WriteNlist(UndefinedSymbolData[i], Layout); 01029 01030 // Write the string table. 01031 OS << StringTable.str(); 01032 } 01033 } 01034 01035 MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW, 01036 raw_ostream &OS, 01037 bool IsLittleEndian) { 01038 return new MachObjectWriter(MOTW, OS, IsLittleEndian); 01039 }