LLVM API Documentation
00001 //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // Top-level implementation for the NVPTX target. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "NVPTXTargetMachine.h" 00015 #include "MCTargetDesc/NVPTXMCAsmInfo.h" 00016 #include "NVPTX.h" 00017 #include "NVPTXAllocaHoisting.h" 00018 #include "NVPTXLowerAggrCopies.h" 00019 #include "llvm/Analysis/Passes.h" 00020 #include "llvm/CodeGen/AsmPrinter.h" 00021 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 00022 #include "llvm/CodeGen/MachineModuleInfo.h" 00023 #include "llvm/CodeGen/Passes.h" 00024 #include "llvm/IR/DataLayout.h" 00025 #include "llvm/IR/IRPrintingPasses.h" 00026 #include "llvm/IR/Verifier.h" 00027 #include "llvm/MC/MCAsmInfo.h" 00028 #include "llvm/MC/MCInstrInfo.h" 00029 #include "llvm/MC/MCStreamer.h" 00030 #include "llvm/MC/MCSubtargetInfo.h" 00031 #include "llvm/PassManager.h" 00032 #include "llvm/Support/CommandLine.h" 00033 #include "llvm/Support/Debug.h" 00034 #include "llvm/Support/FormattedStream.h" 00035 #include "llvm/Support/TargetRegistry.h" 00036 #include "llvm/Support/raw_ostream.h" 00037 #include "llvm/Target/TargetInstrInfo.h" 00038 #include "llvm/Target/TargetLowering.h" 00039 #include "llvm/Target/TargetLoweringObjectFile.h" 00040 #include "llvm/Target/TargetMachine.h" 00041 #include "llvm/Target/TargetOptions.h" 00042 #include "llvm/Target/TargetRegisterInfo.h" 00043 #include "llvm/Target/TargetSubtargetInfo.h" 00044 #include "llvm/Transforms/Scalar.h" 00045 00046 using namespace llvm; 00047 00048 namespace llvm { 00049 void initializeNVVMReflectPass(PassRegistry&); 00050 void initializeGenericToNVVMPass(PassRegistry&); 00051 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); 00052 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); 00053 } 00054 00055 extern "C" void LLVMInitializeNVPTXTarget() { 00056 // Register the target. 00057 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); 00058 RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); 00059 00060 // FIXME: This pass is really intended to be invoked during IR optimization, 00061 // but it's very NVPTX-specific. 00062 initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); 00063 initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); 00064 initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); 00065 initializeNVPTXFavorNonGenericAddrSpacesPass( 00066 *PassRegistry::getPassRegistry()); 00067 } 00068 00069 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, 00070 StringRef CPU, StringRef FS, 00071 const TargetOptions &Options, 00072 Reloc::Model RM, CodeModel::Model CM, 00073 CodeGenOpt::Level OL, bool is64bit) 00074 : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), 00075 Subtarget(TT, CPU, FS, *this, is64bit) { 00076 initAsmInfo(); 00077 } 00078 00079 void NVPTXTargetMachine32::anchor() {} 00080 00081 NVPTXTargetMachine32::NVPTXTargetMachine32( 00082 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 00083 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 00084 CodeGenOpt::Level OL) 00085 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} 00086 00087 void NVPTXTargetMachine64::anchor() {} 00088 00089 NVPTXTargetMachine64::NVPTXTargetMachine64( 00090 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 00091 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 00092 CodeGenOpt::Level OL) 00093 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} 00094 00095 namespace { 00096 class NVPTXPassConfig : public TargetPassConfig { 00097 public: 00098 NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) 00099 : TargetPassConfig(TM, PM) {} 00100 00101 NVPTXTargetMachine &getNVPTXTargetMachine() const { 00102 return getTM<NVPTXTargetMachine>(); 00103 } 00104 00105 void addIRPasses() override; 00106 bool addInstSelector() override; 00107 bool addPreRegAlloc() override; 00108 bool addPostRegAlloc() override; 00109 void addMachineSSAOptimization() override; 00110 00111 FunctionPass *createTargetRegisterAllocator(bool) override; 00112 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 00113 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 00114 }; 00115 } // end anonymous namespace 00116 00117 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { 00118 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); 00119 return PassConfig; 00120 } 00121 00122 void NVPTXPassConfig::addIRPasses() { 00123 // The following passes are known to not play well with virtual regs hanging 00124 // around after register allocation (which in our case, is *all* registers). 00125 // We explicitly disable them here. We do, however, need some functionality 00126 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the 00127 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). 00128 disablePass(&PrologEpilogCodeInserterID); 00129 disablePass(&MachineCopyPropagationID); 00130 disablePass(&BranchFolderPassID); 00131 disablePass(&TailDuplicateID); 00132 00133 addPass(createNVPTXImageOptimizerPass()); 00134 TargetPassConfig::addIRPasses(); 00135 addPass(createNVPTXAssignValidGlobalNamesPass()); 00136 addPass(createGenericToNVVMPass()); 00137 addPass(createNVPTXFavorNonGenericAddrSpacesPass()); 00138 addPass(createSeparateConstOffsetFromGEPPass()); 00139 // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used 00140 // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates 00141 // significantly better code than EarlyCSE for some of our benchmarks. 00142 if (getOptLevel() == CodeGenOpt::Aggressive) 00143 addPass(createGVNPass()); 00144 else 00145 addPass(createEarlyCSEPass()); 00146 // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave 00147 // some dead code. We could remove dead code in an ad-hoc manner, but that 00148 // requires manual work and might be error-prone. 00149 // 00150 // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts, 00151 // and leave them unused. 00152 // 00153 // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the 00154 // old index and some of its intermediate results may become unused. 00155 addPass(createDeadCodeEliminationPass()); 00156 } 00157 00158 bool NVPTXPassConfig::addInstSelector() { 00159 const NVPTXSubtarget &ST = 00160 getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>(); 00161 00162 addPass(createLowerAggrCopies()); 00163 addPass(createAllocaHoisting()); 00164 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); 00165 00166 if (!ST.hasImageHandles()) 00167 addPass(createNVPTXReplaceImageHandlesPass()); 00168 00169 return false; 00170 } 00171 00172 bool NVPTXPassConfig::addPreRegAlloc() { return false; } 00173 bool NVPTXPassConfig::addPostRegAlloc() { 00174 addPass(createNVPTXPrologEpilogPass()); 00175 return false; 00176 } 00177 00178 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { 00179 return nullptr; // No reg alloc 00180 } 00181 00182 void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 00183 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 00184 addPass(&PHIEliminationID); 00185 addPass(&TwoAddressInstructionPassID); 00186 } 00187 00188 void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 00189 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 00190 00191 addPass(&ProcessImplicitDefsID); 00192 addPass(&LiveVariablesID); 00193 addPass(&MachineLoopInfoID); 00194 addPass(&PHIEliminationID); 00195 00196 addPass(&TwoAddressInstructionPassID); 00197 addPass(&RegisterCoalescerID); 00198 00199 // PreRA instruction scheduling. 00200 if (addPass(&MachineSchedulerID)) 00201 printAndVerify("After Machine Scheduling"); 00202 00203 00204 addPass(&StackSlotColoringID); 00205 00206 // FIXME: Needs physical registers 00207 //addPass(&PostRAMachineLICMID); 00208 00209 printAndVerify("After StackSlotColoring"); 00210 } 00211 00212 void NVPTXPassConfig::addMachineSSAOptimization() { 00213 // Pre-ra tail duplication. 00214 if (addPass(&EarlyTailDuplicateID)) 00215 printAndVerify("After Pre-RegAlloc TailDuplicate"); 00216 00217 // Optimize PHIs before DCE: removing dead PHI cycles may make more 00218 // instructions dead. 00219 addPass(&OptimizePHIsID); 00220 00221 // This pass merges large allocas. StackSlotColoring is a different pass 00222 // which merges spill slots. 00223 addPass(&StackColoringID); 00224 00225 // If the target requests it, assign local variables to stack slots relative 00226 // to one another and simplify frame index references where possible. 00227 addPass(&LocalStackSlotAllocationID); 00228 00229 // With optimization, dead code should already be eliminated. However 00230 // there is one known exception: lowered code for arguments that are only 00231 // used by tail calls, where the tail calls reuse the incoming stack 00232 // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). 00233 addPass(&DeadMachineInstructionElimID); 00234 printAndVerify("After codegen DCE pass"); 00235 00236 // Allow targets to insert passes that improve instruction level parallelism, 00237 // like if-conversion. Such passes will typically need dominator trees and 00238 // loop info, just like LICM and CSE below. 00239 if (addILPOpts()) 00240 printAndVerify("After ILP optimizations"); 00241 00242 addPass(&MachineLICMID); 00243 addPass(&MachineCSEID); 00244 00245 addPass(&MachineSinkingID); 00246 printAndVerify("After Machine LICM, CSE and Sinking passes"); 00247 00248 addPass(&PeepholeOptimizerID); 00249 printAndVerify("After codegen peephole optimization pass"); 00250 }