LLVM API Documentation
00001 //===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 /// \file 00011 /// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold 00012 /// 128 Alu instructions ; these instructions can access up to 4 prefetched 00013 /// 4 lines of 16 registers from constant buffers. Such ALU clauses are 00014 /// initiated by CF_ALU instructions. 00015 //===----------------------------------------------------------------------===// 00016 00017 #include "AMDGPU.h" 00018 #include "R600Defines.h" 00019 #include "R600InstrInfo.h" 00020 #include "R600MachineFunctionInfo.h" 00021 #include "R600RegisterInfo.h" 00022 #include "AMDGPUSubtarget.h" 00023 #include "llvm/CodeGen/MachineFunctionPass.h" 00024 #include "llvm/CodeGen/MachineInstrBuilder.h" 00025 #include "llvm/CodeGen/MachineRegisterInfo.h" 00026 00027 using namespace llvm; 00028 00029 namespace llvm { 00030 void initializeR600EmitClauseMarkersPass(PassRegistry&); 00031 } 00032 00033 namespace { 00034 00035 class R600EmitClauseMarkers : public MachineFunctionPass { 00036 00037 private: 00038 const R600InstrInfo *TII; 00039 int Address; 00040 00041 unsigned OccupiedDwords(MachineInstr *MI) const { 00042 switch (MI->getOpcode()) { 00043 case AMDGPU::INTERP_PAIR_XY: 00044 case AMDGPU::INTERP_PAIR_ZW: 00045 case AMDGPU::INTERP_VEC_LOAD: 00046 case AMDGPU::DOT_4: 00047 return 4; 00048 case AMDGPU::KILL: 00049 return 0; 00050 default: 00051 break; 00052 } 00053 00054 // These will be expanded to two ALU instructions in the 00055 // ExpandSpecialInstructions pass. 00056 if (TII->isLDSRetInstr(MI->getOpcode())) 00057 return 2; 00058 00059 if(TII->isVector(*MI) || 00060 TII->isCubeOp(MI->getOpcode()) || 00061 TII->isReductionOp(MI->getOpcode())) 00062 return 4; 00063 00064 unsigned NumLiteral = 0; 00065 for (MachineInstr::mop_iterator It = MI->operands_begin(), 00066 E = MI->operands_end(); It != E; ++It) { 00067 MachineOperand &MO = *It; 00068 if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) 00069 ++NumLiteral; 00070 } 00071 return 1 + NumLiteral; 00072 } 00073 00074 bool isALU(const MachineInstr *MI) const { 00075 if (TII->isALUInstr(MI->getOpcode())) 00076 return true; 00077 if (TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode())) 00078 return true; 00079 switch (MI->getOpcode()) { 00080 case AMDGPU::PRED_X: 00081 case AMDGPU::INTERP_PAIR_XY: 00082 case AMDGPU::INTERP_PAIR_ZW: 00083 case AMDGPU::INTERP_VEC_LOAD: 00084 case AMDGPU::COPY: 00085 case AMDGPU::DOT_4: 00086 return true; 00087 default: 00088 return false; 00089 } 00090 } 00091 00092 bool IsTrivialInst(MachineInstr *MI) const { 00093 switch (MI->getOpcode()) { 00094 case AMDGPU::KILL: 00095 case AMDGPU::RETURN: 00096 case AMDGPU::IMPLICIT_DEF: 00097 return true; 00098 default: 00099 return false; 00100 } 00101 } 00102 00103 std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const { 00104 // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2 00105 // (See also R600ISelLowering.cpp) 00106 // ConstIndex value is in [0, 4095]; 00107 return std::pair<unsigned, unsigned>( 00108 ((Sel >> 2) - 512) >> 12, // KC_BANK 00109 // Line Number of ConstIndex 00110 // A line contains 16 constant registers however KCX bank can lock 00111 // two line at the same time ; thus we want to get an even line number. 00112 // Line number can be retrieved with (>>4), using (>>5) <<1 generates 00113 // an even number. 00114 ((((Sel >> 2) - 512) & 4095) >> 5) << 1); 00115 } 00116 00117 bool SubstituteKCacheBank(MachineInstr *MI, 00118 std::vector<std::pair<unsigned, unsigned> > &CachedConsts, 00119 bool UpdateInstr = true) const { 00120 std::vector<std::pair<unsigned, unsigned> > UsedKCache; 00121 00122 if (!TII->isALUInstr(MI->getOpcode()) && MI->getOpcode() != AMDGPU::DOT_4) 00123 return true; 00124 00125 const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Consts = 00126 TII->getSrcs(MI); 00127 assert((TII->isALUInstr(MI->getOpcode()) || 00128 MI->getOpcode() == AMDGPU::DOT_4) && "Can't assign Const"); 00129 for (unsigned i = 0, n = Consts.size(); i < n; ++i) { 00130 if (Consts[i].first->getReg() != AMDGPU::ALU_CONST) 00131 continue; 00132 unsigned Sel = Consts[i].second; 00133 unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31; 00134 unsigned KCacheIndex = Index * 4 + Chan; 00135 const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel); 00136 if (CachedConsts.empty()) { 00137 CachedConsts.push_back(BankLine); 00138 UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); 00139 continue; 00140 } 00141 if (CachedConsts[0] == BankLine) { 00142 UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); 00143 continue; 00144 } 00145 if (CachedConsts.size() == 1) { 00146 CachedConsts.push_back(BankLine); 00147 UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); 00148 continue; 00149 } 00150 if (CachedConsts[1] == BankLine) { 00151 UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); 00152 continue; 00153 } 00154 return false; 00155 } 00156 00157 if (!UpdateInstr) 00158 return true; 00159 00160 for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) { 00161 if (Consts[i].first->getReg() != AMDGPU::ALU_CONST) 00162 continue; 00163 switch(UsedKCache[j].first) { 00164 case 0: 00165 Consts[i].first->setReg( 00166 AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[j].second)); 00167 break; 00168 case 1: 00169 Consts[i].first->setReg( 00170 AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[j].second)); 00171 break; 00172 default: 00173 llvm_unreachable("Wrong Cache Line"); 00174 } 00175 j++; 00176 } 00177 return true; 00178 } 00179 00180 bool canClauseLocalKillFitInClause( 00181 unsigned AluInstCount, 00182 std::vector<std::pair<unsigned, unsigned> > KCacheBanks, 00183 MachineBasicBlock::iterator Def, 00184 MachineBasicBlock::iterator BBEnd) { 00185 const R600RegisterInfo &TRI = TII->getRegisterInfo(); 00186 for (MachineInstr::const_mop_iterator 00187 MOI = Def->operands_begin(), 00188 MOE = Def->operands_end(); MOI != MOE; ++MOI) { 00189 if (!MOI->isReg() || !MOI->isDef() || 00190 TRI.isPhysRegLiveAcrossClauses(MOI->getReg())) 00191 continue; 00192 00193 // Def defines a clause local register, so check that its use will fit 00194 // in the clause. 00195 unsigned LastUseCount = 0; 00196 for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) { 00197 AluInstCount += OccupiedDwords(UseI); 00198 // Make sure we won't need to end the clause due to KCache limitations. 00199 if (!SubstituteKCacheBank(UseI, KCacheBanks, false)) 00200 return false; 00201 00202 // We have reached the maximum instruction limit before finding the 00203 // use that kills this register, so we cannot use this def in the 00204 // current clause. 00205 if (AluInstCount >= TII->getMaxAlusPerClause()) 00206 return false; 00207 00208 // Register kill flags have been cleared by the time we get to this 00209 // pass, but it is safe to assume that all uses of this register 00210 // occur in the same basic block as its definition, because 00211 // it is illegal for the scheduler to schedule them in 00212 // different blocks. 00213 if (UseI->findRegisterUseOperandIdx(MOI->getReg())) 00214 LastUseCount = AluInstCount; 00215 00216 if (UseI != Def && UseI->findRegisterDefOperandIdx(MOI->getReg()) != -1) 00217 break; 00218 } 00219 if (LastUseCount) 00220 return LastUseCount <= TII->getMaxAlusPerClause(); 00221 llvm_unreachable("Clause local register live at end of clause."); 00222 } 00223 return true; 00224 } 00225 00226 MachineBasicBlock::iterator 00227 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { 00228 MachineBasicBlock::iterator ClauseHead = I; 00229 std::vector<std::pair<unsigned, unsigned> > KCacheBanks; 00230 bool PushBeforeModifier = false; 00231 unsigned AluInstCount = 0; 00232 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 00233 if (IsTrivialInst(I)) 00234 continue; 00235 if (!isALU(I)) 00236 break; 00237 if (AluInstCount > TII->getMaxAlusPerClause()) 00238 break; 00239 if (I->getOpcode() == AMDGPU::PRED_X) { 00240 // We put PRED_X in its own clause to ensure that ifcvt won't create 00241 // clauses with more than 128 insts. 00242 // IfCvt is indeed checking that "then" and "else" branches of an if 00243 // statement have less than ~60 insts thus converted clauses can't be 00244 // bigger than ~121 insts (predicate setter needs to be in the same 00245 // clause as predicated alus). 00246 if (AluInstCount > 0) 00247 break; 00248 if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH) 00249 PushBeforeModifier = true; 00250 AluInstCount ++; 00251 continue; 00252 } 00253 // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as: 00254 // 00255 // * KILL or INTERP instructions 00256 // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits 00257 // * Uses waterfalling (i.e. INDEX_MODE = AR.X) 00258 // 00259 // XXX: These checks have not been implemented yet. 00260 if (TII->mustBeLastInClause(I->getOpcode())) { 00261 I++; 00262 break; 00263 } 00264 00265 // If this instruction defines a clause local register, make sure 00266 // its use can fit in this clause. 00267 if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E)) 00268 break; 00269 00270 if (!SubstituteKCacheBank(I, KCacheBanks)) 00271 break; 00272 AluInstCount += OccupiedDwords(I); 00273 } 00274 unsigned Opcode = PushBeforeModifier ? 00275 AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU; 00276 BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode)) 00277 // We don't use the ADDR field until R600ControlFlowFinalizer pass, where 00278 // it is safe to assume it is 0. However if we always put 0 here, the ifcvt 00279 // pass may assume that identical ALU clause starter at the beginning of a 00280 // true and false branch can be factorized which is not the case. 00281 .addImm(Address++) // ADDR 00282 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0 00283 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1 00284 .addImm(KCacheBanks.empty()?0:2) // KM0 00285 .addImm((KCacheBanks.size() < 2)?0:2) // KM1 00286 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0 00287 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1 00288 .addImm(AluInstCount) // COUNT 00289 .addImm(1); // Enabled 00290 return I; 00291 } 00292 00293 public: 00294 static char ID; 00295 R600EmitClauseMarkers() : MachineFunctionPass(ID), TII(nullptr), Address(0) { 00296 00297 initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry()); 00298 } 00299 00300 bool runOnMachineFunction(MachineFunction &MF) override { 00301 TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo()); 00302 00303 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 00304 BB != BB_E; ++BB) { 00305 MachineBasicBlock &MBB = *BB; 00306 MachineBasicBlock::iterator I = MBB.begin(); 00307 if (I->getOpcode() == AMDGPU::CF_ALU) 00308 continue; // BB was already parsed 00309 for (MachineBasicBlock::iterator E = MBB.end(); I != E;) { 00310 if (isALU(I)) 00311 I = MakeALUClause(MBB, I); 00312 else 00313 ++I; 00314 } 00315 } 00316 return false; 00317 } 00318 00319 const char *getPassName() const override { 00320 return "R600 Emit Clause Markers Pass"; 00321 } 00322 }; 00323 00324 char R600EmitClauseMarkers::ID = 0; 00325 00326 } // end anonymous namespace 00327 00328 INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers", 00329 "R600 Emit Clause Markters", false, false) 00330 INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers", 00331 "R600 Emit Clause Markters", false, false) 00332 00333 llvm::FunctionPass *llvm::createR600EmitClauseMarkers() { 00334 return new R600EmitClauseMarkers(); 00335 } 00336