LLVM API Documentation
00001 //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 /// \file 00011 /// \brief Insert wait instructions for memory reads and writes. 00012 /// 00013 /// Memory reads and writes are issued asynchronously, so we need to insert 00014 /// S_WAITCNT instructions when we want to access any of their results or 00015 /// overwrite any register that's used asynchronously. 00016 // 00017 //===----------------------------------------------------------------------===// 00018 00019 #include "AMDGPU.h" 00020 #include "AMDGPUSubtarget.h" 00021 #include "SIInstrInfo.h" 00022 #include "SIMachineFunctionInfo.h" 00023 #include "llvm/CodeGen/MachineFunction.h" 00024 #include "llvm/CodeGen/MachineFunctionPass.h" 00025 #include "llvm/CodeGen/MachineInstrBuilder.h" 00026 #include "llvm/CodeGen/MachineRegisterInfo.h" 00027 00028 using namespace llvm; 00029 00030 namespace { 00031 00032 /// \brief One variable for each of the hardware counters 00033 typedef union { 00034 struct { 00035 unsigned VM; 00036 unsigned EXP; 00037 unsigned LGKM; 00038 } Named; 00039 unsigned Array[3]; 00040 00041 } Counters; 00042 00043 typedef Counters RegCounters[512]; 00044 typedef std::pair<unsigned, unsigned> RegInterval; 00045 00046 class SIInsertWaits : public MachineFunctionPass { 00047 00048 private: 00049 static char ID; 00050 const SIInstrInfo *TII; 00051 const SIRegisterInfo *TRI; 00052 const MachineRegisterInfo *MRI; 00053 00054 /// \brief Constant hardware limits 00055 static const Counters WaitCounts; 00056 00057 /// \brief Constant zero value 00058 static const Counters ZeroCounts; 00059 00060 /// \brief Counter values we have already waited on. 00061 Counters WaitedOn; 00062 00063 /// \brief Counter values for last instruction issued. 00064 Counters LastIssued; 00065 00066 /// \brief Registers used by async instructions. 00067 RegCounters UsedRegs; 00068 00069 /// \brief Registers defined by async instructions. 00070 RegCounters DefinedRegs; 00071 00072 /// \brief Different export instruction types seen since last wait. 00073 unsigned ExpInstrTypesSeen; 00074 00075 /// \brief Get increment/decrement amount for this instruction. 00076 Counters getHwCounts(MachineInstr &MI); 00077 00078 /// \brief Is operand relevant for async execution? 00079 bool isOpRelevant(MachineOperand &Op); 00080 00081 /// \brief Get register interval an operand affects. 00082 RegInterval getRegInterval(MachineOperand &Op); 00083 00084 /// \brief Handle instructions async components 00085 void pushInstruction(MachineInstr &MI); 00086 00087 /// \brief Insert the actual wait instruction 00088 bool insertWait(MachineBasicBlock &MBB, 00089 MachineBasicBlock::iterator I, 00090 const Counters &Counts); 00091 00092 /// \brief Do we need def2def checks? 00093 bool unorderedDefines(MachineInstr &MI); 00094 00095 /// \brief Resolve all operand dependencies to counter requirements 00096 Counters handleOperands(MachineInstr &MI); 00097 00098 public: 00099 SIInsertWaits(TargetMachine &tm) : 00100 MachineFunctionPass(ID), 00101 TII(nullptr), 00102 TRI(nullptr), 00103 ExpInstrTypesSeen(0) { } 00104 00105 bool runOnMachineFunction(MachineFunction &MF) override; 00106 00107 const char *getPassName() const override { 00108 return "SI insert wait instructions"; 00109 } 00110 00111 }; 00112 00113 } // End anonymous namespace 00114 00115 char SIInsertWaits::ID = 0; 00116 00117 const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } }; 00118 const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } }; 00119 00120 FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) { 00121 return new SIInsertWaits(tm); 00122 } 00123 00124 Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { 00125 00126 uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags; 00127 Counters Result; 00128 00129 Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT); 00130 00131 // Only consider stores or EXP for EXP_CNT 00132 Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT && 00133 (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore())); 00134 00135 // LGKM may uses larger values 00136 if (TSFlags & SIInstrFlags::LGKM_CNT) { 00137 00138 if (TII->isSMRD(MI.getOpcode())) { 00139 00140 MachineOperand &Op = MI.getOperand(0); 00141 assert(Op.isReg() && "First LGKM operand must be a register!"); 00142 00143 unsigned Reg = Op.getReg(); 00144 unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); 00145 Result.Named.LGKM = Size > 4 ? 2 : 1; 00146 00147 } else { 00148 // DS 00149 Result.Named.LGKM = 1; 00150 } 00151 00152 } else { 00153 Result.Named.LGKM = 0; 00154 } 00155 00156 return Result; 00157 } 00158 00159 bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { 00160 00161 // Constants are always irrelevant 00162 if (!Op.isReg()) 00163 return false; 00164 00165 // Defines are always relevant 00166 if (Op.isDef()) 00167 return true; 00168 00169 // For exports all registers are relevant 00170 MachineInstr &MI = *Op.getParent(); 00171 if (MI.getOpcode() == AMDGPU::EXP) 00172 return true; 00173 00174 // For stores the stored value is also relevant 00175 if (!MI.getDesc().mayStore()) 00176 return false; 00177 00178 for (MachineInstr::mop_iterator I = MI.operands_begin(), 00179 E = MI.operands_end(); I != E; ++I) { 00180 00181 if (I->isReg() && I->isUse()) 00182 return Op.isIdenticalTo(*I); 00183 } 00184 00185 return false; 00186 } 00187 00188 RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) { 00189 00190 if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg())) 00191 return std::make_pair(0, 0); 00192 00193 unsigned Reg = Op.getReg(); 00194 unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); 00195 00196 assert(Size >= 4); 00197 00198 RegInterval Result; 00199 Result.first = TRI->getEncodingValue(Reg); 00200 Result.second = Result.first + Size / 4; 00201 00202 return Result; 00203 } 00204 00205 void SIInsertWaits::pushInstruction(MachineInstr &MI) { 00206 00207 // Get the hardware counter increments and sum them up 00208 Counters Increment = getHwCounts(MI); 00209 unsigned Sum = 0; 00210 00211 for (unsigned i = 0; i < 3; ++i) { 00212 LastIssued.Array[i] += Increment.Array[i]; 00213 Sum += Increment.Array[i]; 00214 } 00215 00216 // If we don't increase anything then that's it 00217 if (Sum == 0) 00218 return; 00219 00220 // Remember which export instructions we have seen 00221 if (Increment.Named.EXP) { 00222 ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2; 00223 } 00224 00225 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 00226 00227 MachineOperand &Op = MI.getOperand(i); 00228 if (!isOpRelevant(Op)) 00229 continue; 00230 00231 RegInterval Interval = getRegInterval(Op); 00232 for (unsigned j = Interval.first; j < Interval.second; ++j) { 00233 00234 // Remember which registers we define 00235 if (Op.isDef()) 00236 DefinedRegs[j] = LastIssued; 00237 00238 // and which one we are using 00239 if (Op.isUse()) 00240 UsedRegs[j] = LastIssued; 00241 } 00242 } 00243 } 00244 00245 bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, 00246 MachineBasicBlock::iterator I, 00247 const Counters &Required) { 00248 00249 // End of program? No need to wait on anything 00250 if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM) 00251 return false; 00252 00253 // Figure out if the async instructions execute in order 00254 bool Ordered[3]; 00255 00256 // VM_CNT is always ordered 00257 Ordered[0] = true; 00258 00259 // EXP_CNT is unordered if we have both EXP & VM-writes 00260 Ordered[1] = ExpInstrTypesSeen == 3; 00261 00262 // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS 00263 Ordered[2] = false; 00264 00265 // The values we are going to put into the S_WAITCNT instruction 00266 Counters Counts = WaitCounts; 00267 00268 // Do we really need to wait? 00269 bool NeedWait = false; 00270 00271 for (unsigned i = 0; i < 3; ++i) { 00272 00273 if (Required.Array[i] <= WaitedOn.Array[i]) 00274 continue; 00275 00276 NeedWait = true; 00277 00278 if (Ordered[i]) { 00279 unsigned Value = LastIssued.Array[i] - Required.Array[i]; 00280 00281 // Adjust the value to the real hardware possibilities. 00282 Counts.Array[i] = std::min(Value, WaitCounts.Array[i]); 00283 00284 } else 00285 Counts.Array[i] = 0; 00286 00287 // Remember on what we have waited on. 00288 WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i]; 00289 } 00290 00291 if (!NeedWait) 00292 return false; 00293 00294 // Reset EXP_CNT instruction types 00295 if (Counts.Named.EXP == 0) 00296 ExpInstrTypesSeen = 0; 00297 00298 // Build the wait instruction 00299 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) 00300 .addImm((Counts.Named.VM & 0xF) | 00301 ((Counts.Named.EXP & 0x7) << 4) | 00302 ((Counts.Named.LGKM & 0x7) << 8)); 00303 00304 return true; 00305 } 00306 00307 /// \brief helper function for handleOperands 00308 static void increaseCounters(Counters &Dst, const Counters &Src) { 00309 00310 for (unsigned i = 0; i < 3; ++i) 00311 Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]); 00312 } 00313 00314 Counters SIInsertWaits::handleOperands(MachineInstr &MI) { 00315 00316 Counters Result = ZeroCounts; 00317 00318 // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish, 00319 // but we also want to wait for any other outstanding transfers before 00320 // signalling other hardware blocks 00321 if (MI.getOpcode() == AMDGPU::S_SENDMSG) 00322 return LastIssued; 00323 00324 // For each register affected by this 00325 // instruction increase the result sequence 00326 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 00327 00328 MachineOperand &Op = MI.getOperand(i); 00329 RegInterval Interval = getRegInterval(Op); 00330 for (unsigned j = Interval.first; j < Interval.second; ++j) { 00331 00332 if (Op.isDef()) { 00333 increaseCounters(Result, UsedRegs[j]); 00334 increaseCounters(Result, DefinedRegs[j]); 00335 } 00336 00337 if (Op.isUse()) 00338 increaseCounters(Result, DefinedRegs[j]); 00339 } 00340 } 00341 00342 return Result; 00343 } 00344 00345 // FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States" 00346 // around other non-memory instructions. 00347 bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { 00348 bool Changes = false; 00349 00350 TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); 00351 TRI = 00352 static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); 00353 00354 MRI = &MF.getRegInfo(); 00355 00356 WaitedOn = ZeroCounts; 00357 LastIssued = ZeroCounts; 00358 00359 memset(&UsedRegs, 0, sizeof(UsedRegs)); 00360 memset(&DefinedRegs, 0, sizeof(DefinedRegs)); 00361 00362 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 00363 BI != BE; ++BI) { 00364 00365 MachineBasicBlock &MBB = *BI; 00366 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 00367 I != E; ++I) { 00368 00369 Changes |= insertWait(MBB, I, handleOperands(*I)); 00370 pushInstruction(*I); 00371 } 00372 00373 // Wait for everything at the end of the MBB 00374 Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued); 00375 } 00376 00377 return Changes; 00378 }