LLVM API Documentation

NVPTXTargetMachine.cpp
Go to the documentation of this file.
00001 //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // Top-level implementation for the NVPTX target.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "NVPTXTargetMachine.h"
00015 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
00016 #include "NVPTX.h"
00017 #include "NVPTXAllocaHoisting.h"
00018 #include "NVPTXLowerAggrCopies.h"
00019 #include "llvm/Analysis/Passes.h"
00020 #include "llvm/CodeGen/AsmPrinter.h"
00021 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
00022 #include "llvm/CodeGen/MachineModuleInfo.h"
00023 #include "llvm/CodeGen/Passes.h"
00024 #include "llvm/IR/DataLayout.h"
00025 #include "llvm/IR/IRPrintingPasses.h"
00026 #include "llvm/IR/Verifier.h"
00027 #include "llvm/MC/MCAsmInfo.h"
00028 #include "llvm/MC/MCInstrInfo.h"
00029 #include "llvm/MC/MCStreamer.h"
00030 #include "llvm/MC/MCSubtargetInfo.h"
00031 #include "llvm/PassManager.h"
00032 #include "llvm/Support/CommandLine.h"
00033 #include "llvm/Support/Debug.h"
00034 #include "llvm/Support/FormattedStream.h"
00035 #include "llvm/Support/TargetRegistry.h"
00036 #include "llvm/Support/raw_ostream.h"
00037 #include "llvm/Target/TargetInstrInfo.h"
00038 #include "llvm/Target/TargetLowering.h"
00039 #include "llvm/Target/TargetLoweringObjectFile.h"
00040 #include "llvm/Target/TargetMachine.h"
00041 #include "llvm/Target/TargetOptions.h"
00042 #include "llvm/Target/TargetRegisterInfo.h"
00043 #include "llvm/Target/TargetSubtargetInfo.h"
00044 #include "llvm/Transforms/Scalar.h"
00045 
00046 using namespace llvm;
00047 
00048 namespace llvm {
00049 void initializeNVVMReflectPass(PassRegistry&);
00050 void initializeGenericToNVVMPass(PassRegistry&);
00051 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
00052 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
00053 }
00054 
00055 extern "C" void LLVMInitializeNVPTXTarget() {
00056   // Register the target.
00057   RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
00058   RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64);
00059 
00060   // FIXME: This pass is really intended to be invoked during IR optimization,
00061   // but it's very NVPTX-specific.
00062   initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
00063   initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
00064   initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
00065   initializeNVPTXFavorNonGenericAddrSpacesPass(
00066     *PassRegistry::getPassRegistry());
00067 }
00068 
00069 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
00070                                        StringRef CPU, StringRef FS,
00071                                        const TargetOptions &Options,
00072                                        Reloc::Model RM, CodeModel::Model CM,
00073                                        CodeGenOpt::Level OL, bool is64bit)
00074     : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
00075       Subtarget(TT, CPU, FS, *this, is64bit) {
00076   initAsmInfo();
00077 }
00078 
00079 void NVPTXTargetMachine32::anchor() {}
00080 
00081 NVPTXTargetMachine32::NVPTXTargetMachine32(
00082     const Target &T, StringRef TT, StringRef CPU, StringRef FS,
00083     const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
00084     CodeGenOpt::Level OL)
00085     : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
00086 
00087 void NVPTXTargetMachine64::anchor() {}
00088 
00089 NVPTXTargetMachine64::NVPTXTargetMachine64(
00090     const Target &T, StringRef TT, StringRef CPU, StringRef FS,
00091     const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
00092     CodeGenOpt::Level OL)
00093     : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
00094 
00095 namespace {
00096 class NVPTXPassConfig : public TargetPassConfig {
00097 public:
00098   NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
00099       : TargetPassConfig(TM, PM) {}
00100 
00101   NVPTXTargetMachine &getNVPTXTargetMachine() const {
00102     return getTM<NVPTXTargetMachine>();
00103   }
00104 
00105   void addIRPasses() override;
00106   bool addInstSelector() override;
00107   bool addPreRegAlloc() override;
00108   bool addPostRegAlloc() override;
00109   void addMachineSSAOptimization() override;
00110 
00111   FunctionPass *createTargetRegisterAllocator(bool) override;
00112   void addFastRegAlloc(FunctionPass *RegAllocPass) override;
00113   void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
00114 };
00115 } // end anonymous namespace
00116 
00117 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
00118   NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
00119   return PassConfig;
00120 }
00121 
00122 void NVPTXPassConfig::addIRPasses() {
00123   // The following passes are known to not play well with virtual regs hanging
00124   // around after register allocation (which in our case, is *all* registers).
00125   // We explicitly disable them here.  We do, however, need some functionality
00126   // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
00127   // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
00128   disablePass(&PrologEpilogCodeInserterID);
00129   disablePass(&MachineCopyPropagationID);
00130   disablePass(&BranchFolderPassID);
00131   disablePass(&TailDuplicateID);
00132 
00133   addPass(createNVPTXImageOptimizerPass());
00134   TargetPassConfig::addIRPasses();
00135   addPass(createNVPTXAssignValidGlobalNamesPass());
00136   addPass(createGenericToNVVMPass());
00137   addPass(createNVPTXFavorNonGenericAddrSpacesPass());
00138   addPass(createSeparateConstOffsetFromGEPPass());
00139   // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used
00140   // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates
00141   // significantly better code than EarlyCSE for some of our benchmarks.
00142   if (getOptLevel() == CodeGenOpt::Aggressive)
00143     addPass(createGVNPass());
00144   else
00145     addPass(createEarlyCSEPass());
00146   // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave
00147   // some dead code.  We could remove dead code in an ad-hoc manner, but that
00148   // requires manual work and might be error-prone.
00149   //
00150   // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts,
00151   // and leave them unused.
00152   //
00153   // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the
00154   // old index and some of its intermediate results may become unused.
00155   addPass(createDeadCodeEliminationPass());
00156 }
00157 
00158 bool NVPTXPassConfig::addInstSelector() {
00159   const NVPTXSubtarget &ST =
00160     getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>();
00161 
00162   addPass(createLowerAggrCopies());
00163   addPass(createAllocaHoisting());
00164   addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
00165 
00166   if (!ST.hasImageHandles())
00167     addPass(createNVPTXReplaceImageHandlesPass());
00168 
00169   return false;
00170 }
00171 
00172 bool NVPTXPassConfig::addPreRegAlloc() { return false; }
00173 bool NVPTXPassConfig::addPostRegAlloc() {
00174   addPass(createNVPTXPrologEpilogPass());
00175   return false;
00176 }
00177 
00178 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
00179   return nullptr; // No reg alloc
00180 }
00181 
00182 void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
00183   assert(!RegAllocPass && "NVPTX uses no regalloc!");
00184   addPass(&PHIEliminationID);
00185   addPass(&TwoAddressInstructionPassID);
00186 }
00187 
00188 void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
00189   assert(!RegAllocPass && "NVPTX uses no regalloc!");
00190 
00191   addPass(&ProcessImplicitDefsID);
00192   addPass(&LiveVariablesID);
00193   addPass(&MachineLoopInfoID);
00194   addPass(&PHIEliminationID);
00195 
00196   addPass(&TwoAddressInstructionPassID);
00197   addPass(&RegisterCoalescerID);
00198 
00199   // PreRA instruction scheduling.
00200   if (addPass(&MachineSchedulerID))
00201     printAndVerify("After Machine Scheduling");
00202 
00203 
00204   addPass(&StackSlotColoringID);
00205 
00206   // FIXME: Needs physical registers
00207   //addPass(&PostRAMachineLICMID);
00208 
00209   printAndVerify("After StackSlotColoring");
00210 }
00211 
00212 void NVPTXPassConfig::addMachineSSAOptimization() {
00213   // Pre-ra tail duplication.
00214   if (addPass(&EarlyTailDuplicateID))
00215     printAndVerify("After Pre-RegAlloc TailDuplicate");
00216 
00217   // Optimize PHIs before DCE: removing dead PHI cycles may make more
00218   // instructions dead.
00219   addPass(&OptimizePHIsID);
00220 
00221   // This pass merges large allocas. StackSlotColoring is a different pass
00222   // which merges spill slots.
00223   addPass(&StackColoringID);
00224 
00225   // If the target requests it, assign local variables to stack slots relative
00226   // to one another and simplify frame index references where possible.
00227   addPass(&LocalStackSlotAllocationID);
00228 
00229   // With optimization, dead code should already be eliminated. However
00230   // there is one known exception: lowered code for arguments that are only
00231   // used by tail calls, where the tail calls reuse the incoming stack
00232   // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
00233   addPass(&DeadMachineInstructionElimID);
00234   printAndVerify("After codegen DCE pass");
00235 
00236   // Allow targets to insert passes that improve instruction level parallelism,
00237   // like if-conversion. Such passes will typically need dominator trees and
00238   // loop info, just like LICM and CSE below.
00239   if (addILPOpts())
00240     printAndVerify("After ILP optimizations");
00241 
00242   addPass(&MachineLICMID);
00243   addPass(&MachineCSEID);
00244 
00245   addPass(&MachineSinkingID);
00246   printAndVerify("After Machine LICM, CSE and Sinking passes");
00247 
00248   addPass(&PeepholeOptimizerID);
00249   printAndVerify("After codegen peephole optimization pass");
00250 }