LLVM API Documentation
00001 //===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 /// \file 00011 /// This pass compute turns all control flow pseudo instructions into native one 00012 /// computing their address on the fly ; it also sets STACK_SIZE info. 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "llvm/Support/Debug.h" 00016 #include "AMDGPU.h" 00017 #include "AMDGPUSubtarget.h" 00018 #include "R600Defines.h" 00019 #include "R600InstrInfo.h" 00020 #include "R600MachineFunctionInfo.h" 00021 #include "R600RegisterInfo.h" 00022 #include "llvm/CodeGen/MachineFunctionPass.h" 00023 #include "llvm/CodeGen/MachineInstrBuilder.h" 00024 #include "llvm/CodeGen/MachineRegisterInfo.h" 00025 #include "llvm/Support/raw_ostream.h" 00026 00027 using namespace llvm; 00028 00029 #define DEBUG_TYPE "r600cf" 00030 00031 namespace { 00032 00033 struct CFStack { 00034 00035 enum StackItem { 00036 ENTRY = 0, 00037 SUB_ENTRY = 1, 00038 FIRST_NON_WQM_PUSH = 2, 00039 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3 00040 }; 00041 00042 const AMDGPUSubtarget &ST; 00043 std::vector<StackItem> BranchStack; 00044 std::vector<StackItem> LoopStack; 00045 unsigned MaxStackSize; 00046 unsigned CurrentEntries; 00047 unsigned CurrentSubEntries; 00048 00049 CFStack(const AMDGPUSubtarget &st, unsigned ShaderType) : ST(st), 00050 // We need to reserve a stack entry for CALL_FS in vertex shaders. 00051 MaxStackSize(ShaderType == ShaderType::VERTEX ? 1 : 0), 00052 CurrentEntries(0), CurrentSubEntries(0) { } 00053 00054 unsigned getLoopDepth(); 00055 bool branchStackContains(CFStack::StackItem); 00056 bool requiresWorkAroundForInst(unsigned Opcode); 00057 unsigned getSubEntrySize(CFStack::StackItem Item); 00058 void updateMaxStackSize(); 00059 void pushBranch(unsigned Opcode, bool isWQM = false); 00060 void pushLoop(); 00061 void popBranch(); 00062 void popLoop(); 00063 }; 00064 00065 unsigned CFStack::getLoopDepth() { 00066 return LoopStack.size(); 00067 } 00068 00069 bool CFStack::branchStackContains(CFStack::StackItem Item) { 00070 for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(), 00071 E = BranchStack.end(); I != E; ++I) { 00072 if (*I == Item) 00073 return true; 00074 } 00075 return false; 00076 } 00077 00078 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { 00079 if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST.hasCaymanISA() && 00080 getLoopDepth() > 1) 00081 return true; 00082 00083 if (!ST.hasCFAluBug()) 00084 return false; 00085 00086 switch(Opcode) { 00087 default: return false; 00088 case AMDGPU::CF_ALU_PUSH_BEFORE: 00089 case AMDGPU::CF_ALU_ELSE_AFTER: 00090 case AMDGPU::CF_ALU_BREAK: 00091 case AMDGPU::CF_ALU_CONTINUE: 00092 if (CurrentSubEntries == 0) 00093 return false; 00094 if (ST.getWavefrontSize() == 64) { 00095 // We are being conservative here. We only require this work-around if 00096 // CurrentSubEntries > 3 && 00097 // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) 00098 // 00099 // We have to be conservative, because we don't know for certain that 00100 // our stack allocation algorithm for Evergreen/NI is correct. Applying this 00101 // work-around when CurrentSubEntries > 3 allows us to over-allocate stack 00102 // resources without any problems. 00103 return CurrentSubEntries > 3; 00104 } else { 00105 assert(ST.getWavefrontSize() == 32); 00106 // We are being conservative here. We only require the work-around if 00107 // CurrentSubEntries > 7 && 00108 // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) 00109 // See the comment on the wavefront size == 64 case for why we are 00110 // being conservative. 00111 return CurrentSubEntries > 7; 00112 } 00113 } 00114 } 00115 00116 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { 00117 switch(Item) { 00118 default: 00119 return 0; 00120 case CFStack::FIRST_NON_WQM_PUSH: 00121 assert(!ST.hasCaymanISA()); 00122 if (ST.getGeneration() <= AMDGPUSubtarget::R700) { 00123 // +1 For the push operation. 00124 // +2 Extra space required. 00125 return 3; 00126 } else { 00127 // Some documentation says that this is not necessary on Evergreen, 00128 // but experimentation has show that we need to allocate 1 extra 00129 // sub-entry for the first non-WQM push. 00130 // +1 For the push operation. 00131 // +1 Extra space required. 00132 return 2; 00133 } 00134 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY: 00135 assert(ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN); 00136 // +1 For the push operation. 00137 // +1 Extra space required. 00138 return 2; 00139 case CFStack::SUB_ENTRY: 00140 return 1; 00141 } 00142 } 00143 00144 void CFStack::updateMaxStackSize() { 00145 unsigned CurrentStackSize = CurrentEntries + 00146 (RoundUpToAlignment(CurrentSubEntries, 4) / 4); 00147 MaxStackSize = std::max(CurrentStackSize, MaxStackSize); 00148 } 00149 00150 void CFStack::pushBranch(unsigned Opcode, bool isWQM) { 00151 CFStack::StackItem Item = CFStack::ENTRY; 00152 switch(Opcode) { 00153 case AMDGPU::CF_PUSH_EG: 00154 case AMDGPU::CF_ALU_PUSH_BEFORE: 00155 if (!isWQM) { 00156 if (!ST.hasCaymanISA() && !branchStackContains(CFStack::FIRST_NON_WQM_PUSH)) 00157 Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI 00158 // See comment in 00159 // CFStack::getSubEntrySize() 00160 else if (CurrentEntries > 0 && 00161 ST.getGeneration() > AMDGPUSubtarget::EVERGREEN && 00162 !ST.hasCaymanISA() && 00163 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)) 00164 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY; 00165 else 00166 Item = CFStack::SUB_ENTRY; 00167 } else 00168 Item = CFStack::ENTRY; 00169 break; 00170 } 00171 BranchStack.push_back(Item); 00172 if (Item == CFStack::ENTRY) 00173 CurrentEntries++; 00174 else 00175 CurrentSubEntries += getSubEntrySize(Item); 00176 updateMaxStackSize(); 00177 } 00178 00179 void CFStack::pushLoop() { 00180 LoopStack.push_back(CFStack::ENTRY); 00181 CurrentEntries++; 00182 updateMaxStackSize(); 00183 } 00184 00185 void CFStack::popBranch() { 00186 CFStack::StackItem Top = BranchStack.back(); 00187 if (Top == CFStack::ENTRY) 00188 CurrentEntries--; 00189 else 00190 CurrentSubEntries-= getSubEntrySize(Top); 00191 BranchStack.pop_back(); 00192 } 00193 00194 void CFStack::popLoop() { 00195 CurrentEntries--; 00196 LoopStack.pop_back(); 00197 } 00198 00199 class R600ControlFlowFinalizer : public MachineFunctionPass { 00200 00201 private: 00202 typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile; 00203 00204 enum ControlFlowInstruction { 00205 CF_TC, 00206 CF_VC, 00207 CF_CALL_FS, 00208 CF_WHILE_LOOP, 00209 CF_END_LOOP, 00210 CF_LOOP_BREAK, 00211 CF_LOOP_CONTINUE, 00212 CF_JUMP, 00213 CF_ELSE, 00214 CF_POP, 00215 CF_END 00216 }; 00217 00218 static char ID; 00219 const R600InstrInfo *TII; 00220 const R600RegisterInfo *TRI; 00221 unsigned MaxFetchInst; 00222 const AMDGPUSubtarget &ST; 00223 00224 bool IsTrivialInst(MachineInstr *MI) const { 00225 switch (MI->getOpcode()) { 00226 case AMDGPU::KILL: 00227 case AMDGPU::RETURN: 00228 return true; 00229 default: 00230 return false; 00231 } 00232 } 00233 00234 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { 00235 unsigned Opcode = 0; 00236 bool isEg = (ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN); 00237 switch (CFI) { 00238 case CF_TC: 00239 Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600; 00240 break; 00241 case CF_VC: 00242 Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600; 00243 break; 00244 case CF_CALL_FS: 00245 Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600; 00246 break; 00247 case CF_WHILE_LOOP: 00248 Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600; 00249 break; 00250 case CF_END_LOOP: 00251 Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600; 00252 break; 00253 case CF_LOOP_BREAK: 00254 Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600; 00255 break; 00256 case CF_LOOP_CONTINUE: 00257 Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600; 00258 break; 00259 case CF_JUMP: 00260 Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600; 00261 break; 00262 case CF_ELSE: 00263 Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600; 00264 break; 00265 case CF_POP: 00266 Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600; 00267 break; 00268 case CF_END: 00269 if (ST.hasCaymanISA()) { 00270 Opcode = AMDGPU::CF_END_CM; 00271 break; 00272 } 00273 Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600; 00274 break; 00275 } 00276 assert (Opcode && "No opcode selected"); 00277 return TII->get(Opcode); 00278 } 00279 00280 bool isCompatibleWithClause(const MachineInstr *MI, 00281 std::set<unsigned> &DstRegs) const { 00282 unsigned DstMI, SrcMI; 00283 for (MachineInstr::const_mop_iterator I = MI->operands_begin(), 00284 E = MI->operands_end(); I != E; ++I) { 00285 const MachineOperand &MO = *I; 00286 if (!MO.isReg()) 00287 continue; 00288 if (MO.isDef()) { 00289 unsigned Reg = MO.getReg(); 00290 if (AMDGPU::R600_Reg128RegClass.contains(Reg)) 00291 DstMI = Reg; 00292 else 00293 DstMI = TRI->getMatchingSuperReg(Reg, 00294 TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)), 00295 &AMDGPU::R600_Reg128RegClass); 00296 } 00297 if (MO.isUse()) { 00298 unsigned Reg = MO.getReg(); 00299 if (AMDGPU::R600_Reg128RegClass.contains(Reg)) 00300 SrcMI = Reg; 00301 else 00302 SrcMI = TRI->getMatchingSuperReg(Reg, 00303 TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)), 00304 &AMDGPU::R600_Reg128RegClass); 00305 } 00306 } 00307 if ((DstRegs.find(SrcMI) == DstRegs.end())) { 00308 DstRegs.insert(DstMI); 00309 return true; 00310 } else 00311 return false; 00312 } 00313 00314 ClauseFile 00315 MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 00316 const { 00317 MachineBasicBlock::iterator ClauseHead = I; 00318 std::vector<MachineInstr *> ClauseContent; 00319 unsigned AluInstCount = 0; 00320 bool IsTex = TII->usesTextureCache(ClauseHead); 00321 std::set<unsigned> DstRegs; 00322 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 00323 if (IsTrivialInst(I)) 00324 continue; 00325 if (AluInstCount >= MaxFetchInst) 00326 break; 00327 if ((IsTex && !TII->usesTextureCache(I)) || 00328 (!IsTex && !TII->usesVertexCache(I))) 00329 break; 00330 if (!isCompatibleWithClause(I, DstRegs)) 00331 break; 00332 AluInstCount ++; 00333 ClauseContent.push_back(I); 00334 } 00335 MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), 00336 getHWInstrDesc(IsTex?CF_TC:CF_VC)) 00337 .addImm(0) // ADDR 00338 .addImm(AluInstCount - 1); // COUNT 00339 return ClauseFile(MIb, ClauseContent); 00340 } 00341 00342 void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const { 00343 static const unsigned LiteralRegs[] = { 00344 AMDGPU::ALU_LITERAL_X, 00345 AMDGPU::ALU_LITERAL_Y, 00346 AMDGPU::ALU_LITERAL_Z, 00347 AMDGPU::ALU_LITERAL_W 00348 }; 00349 const SmallVector<std::pair<MachineOperand *, int64_t>, 3 > Srcs = 00350 TII->getSrcs(MI); 00351 for (unsigned i = 0, e = Srcs.size(); i < e; ++i) { 00352 if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X) 00353 continue; 00354 int64_t Imm = Srcs[i].second; 00355 std::vector<int64_t>::iterator It = 00356 std::find(Lits.begin(), Lits.end(), Imm); 00357 if (It != Lits.end()) { 00358 unsigned Index = It - Lits.begin(); 00359 Srcs[i].first->setReg(LiteralRegs[Index]); 00360 } else { 00361 assert(Lits.size() < 4 && "Too many literals in Instruction Group"); 00362 Srcs[i].first->setReg(LiteralRegs[Lits.size()]); 00363 Lits.push_back(Imm); 00364 } 00365 } 00366 } 00367 00368 MachineBasicBlock::iterator insertLiterals( 00369 MachineBasicBlock::iterator InsertPos, 00370 const std::vector<unsigned> &Literals) const { 00371 MachineBasicBlock *MBB = InsertPos->getParent(); 00372 for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { 00373 unsigned LiteralPair0 = Literals[i]; 00374 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; 00375 InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), 00376 TII->get(AMDGPU::LITERALS)) 00377 .addImm(LiteralPair0) 00378 .addImm(LiteralPair1); 00379 } 00380 return InsertPos; 00381 } 00382 00383 ClauseFile 00384 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 00385 const { 00386 MachineBasicBlock::iterator ClauseHead = I; 00387 std::vector<MachineInstr *> ClauseContent; 00388 I++; 00389 for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) { 00390 if (IsTrivialInst(I)) { 00391 ++I; 00392 continue; 00393 } 00394 if (!I->isBundle() && !TII->isALUInstr(I->getOpcode())) 00395 break; 00396 std::vector<int64_t> Literals; 00397 if (I->isBundle()) { 00398 MachineInstr *DeleteMI = I; 00399 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 00400 while (++BI != E && BI->isBundledWithPred()) { 00401 BI->unbundleFromPred(); 00402 for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) { 00403 MachineOperand &MO = BI->getOperand(i); 00404 if (MO.isReg() && MO.isInternalRead()) 00405 MO.setIsInternalRead(false); 00406 } 00407 getLiteral(BI, Literals); 00408 ClauseContent.push_back(BI); 00409 } 00410 I = BI; 00411 DeleteMI->eraseFromParent(); 00412 } else { 00413 getLiteral(I, Literals); 00414 ClauseContent.push_back(I); 00415 I++; 00416 } 00417 for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { 00418 unsigned literal0 = Literals[i]; 00419 unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0; 00420 MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(), 00421 TII->get(AMDGPU::LITERALS)) 00422 .addImm(literal0) 00423 .addImm(literal2); 00424 ClauseContent.push_back(MILit); 00425 } 00426 } 00427 assert(ClauseContent.size() < 128 && "ALU clause is too big"); 00428 ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1); 00429 return ClauseFile(ClauseHead, ClauseContent); 00430 } 00431 00432 void 00433 EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause, 00434 unsigned &CfCount) { 00435 CounterPropagateAddr(Clause.first, CfCount); 00436 MachineBasicBlock *BB = Clause.first->getParent(); 00437 BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE)) 00438 .addImm(CfCount); 00439 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 00440 BB->splice(InsertPos, BB, Clause.second[i]); 00441 } 00442 CfCount += 2 * Clause.second.size(); 00443 } 00444 00445 void 00446 EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause, 00447 unsigned &CfCount) { 00448 Clause.first->getOperand(0).setImm(0); 00449 CounterPropagateAddr(Clause.first, CfCount); 00450 MachineBasicBlock *BB = Clause.first->getParent(); 00451 BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE)) 00452 .addImm(CfCount); 00453 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 00454 BB->splice(InsertPos, BB, Clause.second[i]); 00455 } 00456 CfCount += Clause.second.size(); 00457 } 00458 00459 void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const { 00460 MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm()); 00461 } 00462 void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr) 00463 const { 00464 for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end(); 00465 It != E; ++It) { 00466 MachineInstr *MI = *It; 00467 CounterPropagateAddr(MI, Addr); 00468 } 00469 } 00470 00471 public: 00472 R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID), 00473 TII (nullptr), TRI(nullptr), 00474 ST(tm.getSubtarget<AMDGPUSubtarget>()) { 00475 const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>(); 00476 MaxFetchInst = ST.getTexVTXClauseSize(); 00477 } 00478 00479 bool runOnMachineFunction(MachineFunction &MF) override { 00480 TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo()); 00481 TRI = static_cast<const R600RegisterInfo *>( 00482 MF.getSubtarget().getRegisterInfo()); 00483 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 00484 00485 CFStack CFStack(ST, MFI->getShaderType()); 00486 for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; 00487 ++MB) { 00488 MachineBasicBlock &MBB = *MB; 00489 unsigned CfCount = 0; 00490 std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack; 00491 std::vector<MachineInstr * > IfThenElseStack; 00492 if (MFI->getShaderType() == ShaderType::VERTEX) { 00493 BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), 00494 getHWInstrDesc(CF_CALL_FS)); 00495 CfCount++; 00496 } 00497 std::vector<ClauseFile> FetchClauses, AluClauses; 00498 std::vector<MachineInstr *> LastAlu(1); 00499 std::vector<MachineInstr *> ToPopAfter; 00500 00501 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 00502 I != E;) { 00503 if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) { 00504 DEBUG(dbgs() << CfCount << ":"; I->dump();); 00505 FetchClauses.push_back(MakeFetchClause(MBB, I)); 00506 CfCount++; 00507 LastAlu.back() = nullptr; 00508 continue; 00509 } 00510 00511 MachineBasicBlock::iterator MI = I; 00512 if (MI->getOpcode() != AMDGPU::ENDIF) 00513 LastAlu.back() = nullptr; 00514 if (MI->getOpcode() == AMDGPU::CF_ALU) 00515 LastAlu.back() = MI; 00516 I++; 00517 bool RequiresWorkAround = 00518 CFStack.requiresWorkAroundForInst(MI->getOpcode()); 00519 switch (MI->getOpcode()) { 00520 case AMDGPU::CF_ALU_PUSH_BEFORE: 00521 if (RequiresWorkAround) { 00522 DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n"); 00523 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG)) 00524 .addImm(CfCount + 1) 00525 .addImm(1); 00526 MI->setDesc(TII->get(AMDGPU::CF_ALU)); 00527 CfCount++; 00528 CFStack.pushBranch(AMDGPU::CF_PUSH_EG); 00529 } else 00530 CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE); 00531 00532 case AMDGPU::CF_ALU: 00533 I = MI; 00534 AluClauses.push_back(MakeALUClause(MBB, I)); 00535 DEBUG(dbgs() << CfCount << ":"; MI->dump();); 00536 CfCount++; 00537 break; 00538 case AMDGPU::WHILELOOP: { 00539 CFStack.pushLoop(); 00540 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 00541 getHWInstrDesc(CF_WHILE_LOOP)) 00542 .addImm(1); 00543 std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount, 00544 std::set<MachineInstr *>()); 00545 Pair.second.insert(MIb); 00546 LoopStack.push_back(Pair); 00547 MI->eraseFromParent(); 00548 CfCount++; 00549 break; 00550 } 00551 case AMDGPU::ENDLOOP: { 00552 CFStack.popLoop(); 00553 std::pair<unsigned, std::set<MachineInstr *> > Pair = 00554 LoopStack.back(); 00555 LoopStack.pop_back(); 00556 CounterPropagateAddr(Pair.second, CfCount); 00557 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) 00558 .addImm(Pair.first + 1); 00559 MI->eraseFromParent(); 00560 CfCount++; 00561 break; 00562 } 00563 case AMDGPU::IF_PREDICATE_SET: { 00564 LastAlu.push_back(nullptr); 00565 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 00566 getHWInstrDesc(CF_JUMP)) 00567 .addImm(0) 00568 .addImm(0); 00569 IfThenElseStack.push_back(MIb); 00570 DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 00571 MI->eraseFromParent(); 00572 CfCount++; 00573 break; 00574 } 00575 case AMDGPU::ELSE: { 00576 MachineInstr * JumpInst = IfThenElseStack.back(); 00577 IfThenElseStack.pop_back(); 00578 CounterPropagateAddr(JumpInst, CfCount); 00579 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 00580 getHWInstrDesc(CF_ELSE)) 00581 .addImm(0) 00582 .addImm(0); 00583 DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 00584 IfThenElseStack.push_back(MIb); 00585 MI->eraseFromParent(); 00586 CfCount++; 00587 break; 00588 } 00589 case AMDGPU::ENDIF: { 00590 CFStack.popBranch(); 00591 if (LastAlu.back()) { 00592 ToPopAfter.push_back(LastAlu.back()); 00593 } else { 00594 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 00595 getHWInstrDesc(CF_POP)) 00596 .addImm(CfCount + 1) 00597 .addImm(1); 00598 (void)MIb; 00599 DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 00600 CfCount++; 00601 } 00602 00603 MachineInstr *IfOrElseInst = IfThenElseStack.back(); 00604 IfThenElseStack.pop_back(); 00605 CounterPropagateAddr(IfOrElseInst, CfCount); 00606 IfOrElseInst->getOperand(1).setImm(1); 00607 LastAlu.pop_back(); 00608 MI->eraseFromParent(); 00609 break; 00610 } 00611 case AMDGPU::BREAK: { 00612 CfCount ++; 00613 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 00614 getHWInstrDesc(CF_LOOP_BREAK)) 00615 .addImm(0); 00616 LoopStack.back().second.insert(MIb); 00617 MI->eraseFromParent(); 00618 break; 00619 } 00620 case AMDGPU::CONTINUE: { 00621 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 00622 getHWInstrDesc(CF_LOOP_CONTINUE)) 00623 .addImm(0); 00624 LoopStack.back().second.insert(MIb); 00625 MI->eraseFromParent(); 00626 CfCount++; 00627 break; 00628 } 00629 case AMDGPU::RETURN: { 00630 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END)); 00631 CfCount++; 00632 MI->eraseFromParent(); 00633 if (CfCount % 2) { 00634 BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD)); 00635 CfCount++; 00636 } 00637 for (unsigned i = 0, e = FetchClauses.size(); i < e; i++) 00638 EmitFetchClause(I, FetchClauses[i], CfCount); 00639 for (unsigned i = 0, e = AluClauses.size(); i < e; i++) 00640 EmitALUClause(I, AluClauses[i], CfCount); 00641 } 00642 default: 00643 if (TII->isExport(MI->getOpcode())) { 00644 DEBUG(dbgs() << CfCount << ":"; MI->dump();); 00645 CfCount++; 00646 } 00647 break; 00648 } 00649 } 00650 for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) { 00651 MachineInstr *Alu = ToPopAfter[i]; 00652 BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), 00653 TII->get(AMDGPU::CF_ALU_POP_AFTER)) 00654 .addImm(Alu->getOperand(0).getImm()) 00655 .addImm(Alu->getOperand(1).getImm()) 00656 .addImm(Alu->getOperand(2).getImm()) 00657 .addImm(Alu->getOperand(3).getImm()) 00658 .addImm(Alu->getOperand(4).getImm()) 00659 .addImm(Alu->getOperand(5).getImm()) 00660 .addImm(Alu->getOperand(6).getImm()) 00661 .addImm(Alu->getOperand(7).getImm()) 00662 .addImm(Alu->getOperand(8).getImm()); 00663 Alu->eraseFromParent(); 00664 } 00665 MFI->StackSize = CFStack.MaxStackSize; 00666 } 00667 00668 return false; 00669 } 00670 00671 const char *getPassName() const override { 00672 return "R600 Control Flow Finalizer Pass"; 00673 } 00674 }; 00675 00676 char R600ControlFlowFinalizer::ID = 0; 00677 00678 } // end anonymous namespace 00679 00680 00681 llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) { 00682 return new R600ControlFlowFinalizer(TM); 00683 }