LLVM API Documentation
00001 //===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This pass identifies floating point stores that should not be combined into 00011 // store pairs. Later we may do the same for floating point loads. 00012 // ===---------------------------------------------------------------------===// 00013 00014 #include "AArch64InstrInfo.h" 00015 #include "llvm/CodeGen/MachineFunction.h" 00016 #include "llvm/CodeGen/MachineFunctionPass.h" 00017 #include "llvm/CodeGen/MachineInstr.h" 00018 #include "llvm/CodeGen/MachineTraceMetrics.h" 00019 #include "llvm/CodeGen/TargetSchedule.h" 00020 #include "llvm/Support/Debug.h" 00021 #include "llvm/Support/raw_ostream.h" 00022 #include "llvm/Target/TargetInstrInfo.h" 00023 00024 using namespace llvm; 00025 00026 #define DEBUG_TYPE "aarch64-stp-suppress" 00027 00028 namespace { 00029 class AArch64StorePairSuppress : public MachineFunctionPass { 00030 const AArch64InstrInfo *TII; 00031 const TargetRegisterInfo *TRI; 00032 const MachineRegisterInfo *MRI; 00033 MachineFunction *MF; 00034 TargetSchedModel SchedModel; 00035 MachineTraceMetrics *Traces; 00036 MachineTraceMetrics::Ensemble *MinInstr; 00037 00038 public: 00039 static char ID; 00040 AArch64StorePairSuppress() : MachineFunctionPass(ID) {} 00041 00042 const char *getPassName() const override { 00043 return "AArch64 Store Pair Suppression"; 00044 } 00045 00046 bool runOnMachineFunction(MachineFunction &F) override; 00047 00048 private: 00049 bool shouldAddSTPToBlock(const MachineBasicBlock *BB); 00050 00051 bool isNarrowFPStore(const MachineInstr &MI); 00052 00053 void getAnalysisUsage(AnalysisUsage &AU) const override { 00054 AU.setPreservesCFG(); 00055 AU.addRequired<MachineTraceMetrics>(); 00056 AU.addPreserved<MachineTraceMetrics>(); 00057 MachineFunctionPass::getAnalysisUsage(AU); 00058 } 00059 }; 00060 char AArch64StorePairSuppress::ID = 0; 00061 } // anonymous 00062 00063 FunctionPass *llvm::createAArch64StorePairSuppressPass() { 00064 return new AArch64StorePairSuppress(); 00065 } 00066 00067 /// Return true if an STP can be added to this block without increasing the 00068 /// critical resource height. STP is good to form in Ld/St limited blocks and 00069 /// bad to form in float-point limited blocks. This is true independent of the 00070 /// critical path. If the critical path is longer than the resource height, the 00071 /// extra vector ops can limit physreg renaming. Otherwise, it could simply 00072 /// oversaturate the vector units. 00073 bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) { 00074 if (!MinInstr) 00075 MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); 00076 00077 MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB); 00078 unsigned ResLength = BBTrace.getResourceLength(); 00079 00080 // Get the machine model's scheduling class for STPQi. 00081 // Bypass TargetSchedule's SchedClass resolution since we only have an opcode. 00082 unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass(); 00083 const MCSchedClassDesc *SCDesc = 00084 SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx); 00085 00086 // If a subtarget does not define resources for STPQi, bail here. 00087 if (SCDesc->isValid() && !SCDesc->isVariant()) { 00088 unsigned ResLenWithSTP = BBTrace.getResourceLength(None, SCDesc); 00089 if (ResLenWithSTP > ResLength) { 00090 DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber() 00091 << " resources " << ResLength << " -> " << ResLenWithSTP 00092 << "\n"); 00093 return false; 00094 } 00095 } 00096 return true; 00097 } 00098 00099 /// Return true if this is a floating-point store smaller than the V reg. On 00100 /// cyclone, these require a vector shuffle before storing a pair. 00101 /// Ideally we would call getMatchingPairOpcode() and have the machine model 00102 /// tell us if it's profitable with no cpu knowledge here. 00103 /// 00104 /// FIXME: We plan to develop a decent Target abstraction for simple loads and 00105 /// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer. 00106 bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) { 00107 switch (MI.getOpcode()) { 00108 default: 00109 return false; 00110 case AArch64::STRSui: 00111 case AArch64::STRDui: 00112 case AArch64::STURSi: 00113 case AArch64::STURDi: 00114 return true; 00115 } 00116 } 00117 00118 bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) { 00119 MF = &mf; 00120 TII = 00121 static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo()); 00122 TRI = MF->getSubtarget().getRegisterInfo(); 00123 MRI = &MF->getRegInfo(); 00124 const TargetSubtargetInfo &ST = 00125 MF->getTarget().getSubtarget<TargetSubtargetInfo>(); 00126 SchedModel.init(ST.getSchedModel(), &ST, TII); 00127 00128 Traces = &getAnalysis<MachineTraceMetrics>(); 00129 MinInstr = nullptr; 00130 00131 DEBUG(dbgs() << "*** " << getPassName() << ": " << MF->getName() << '\n'); 00132 00133 if (!SchedModel.hasInstrSchedModel()) { 00134 DEBUG(dbgs() << " Skipping pass: no machine model present.\n"); 00135 return false; 00136 } 00137 00138 // Check for a sequence of stores to the same base address. We don't need to 00139 // precisely determine whether a store pair can be formed. But we do want to 00140 // filter out most situations where we can't form store pairs to avoid 00141 // computing trace metrics in those cases. 00142 for (auto &MBB : *MF) { 00143 bool SuppressSTP = false; 00144 unsigned PrevBaseReg = 0; 00145 for (auto &MI : MBB) { 00146 if (!isNarrowFPStore(MI)) 00147 continue; 00148 unsigned BaseReg; 00149 unsigned Offset; 00150 if (TII->getLdStBaseRegImmOfs(&MI, BaseReg, Offset, TRI)) { 00151 if (PrevBaseReg == BaseReg) { 00152 // If this block can take STPs, skip ahead to the next block. 00153 if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent())) 00154 break; 00155 // Otherwise, continue unpairing the stores in this block. 00156 DEBUG(dbgs() << "Unpairing store " << MI << "\n"); 00157 SuppressSTP = true; 00158 TII->suppressLdStPair(&MI); 00159 } 00160 PrevBaseReg = BaseReg; 00161 } else 00162 PrevBaseReg = 0; 00163 } 00164 } 00165 // This pass just sets some internal MachineMemOperand flags. It can't really 00166 // invalidate anything. 00167 return false; 00168 }