LLVM API Documentation
00001 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 /// \file 00009 //==-----------------------------------------------------------------------===// 00010 00011 #include "AMDGPU.h" 00012 #include "AMDGPUInstrInfo.h" 00013 #include "R600InstrInfo.h" 00014 #include "AMDGPUSubtarget.h" 00015 #include "llvm/ADT/DepthFirstIterator.h" 00016 #include "llvm/ADT/SCCIterator.h" 00017 #include "llvm/ADT/SmallVector.h" 00018 #include "llvm/ADT/Statistic.h" 00019 #include "llvm/CodeGen/MachineDominators.h" 00020 #include "llvm/CodeGen/MachineFunction.h" 00021 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 00022 #include "llvm/CodeGen/MachineFunctionPass.h" 00023 #include "llvm/CodeGen/MachineInstrBuilder.h" 00024 #include "llvm/CodeGen/MachineJumpTableInfo.h" 00025 #include "llvm/CodeGen/MachineLoopInfo.h" 00026 #include "llvm/CodeGen/MachinePostDominators.h" 00027 #include "llvm/CodeGen/MachineRegisterInfo.h" 00028 #include "llvm/IR/Dominators.h" 00029 #include "llvm/Support/Debug.h" 00030 #include "llvm/Support/raw_ostream.h" 00031 #include "llvm/Target/TargetInstrInfo.h" 00032 #include "llvm/Target/TargetMachine.h" 00033 00034 using namespace llvm; 00035 00036 #define DEBUG_TYPE "structcfg" 00037 00038 #define DEFAULT_VEC_SLOTS 8 00039 00040 // TODO: move-begin. 00041 00042 //===----------------------------------------------------------------------===// 00043 // 00044 // Statistics for CFGStructurizer. 00045 // 00046 //===----------------------------------------------------------------------===// 00047 00048 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " 00049 "matched"); 00050 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " 00051 "matched"); 00052 STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue " 00053 "pattern matched"); 00054 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); 00055 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); 00056 00057 namespace llvm { 00058 void initializeAMDGPUCFGStructurizerPass(PassRegistry&); 00059 } 00060 00061 //===----------------------------------------------------------------------===// 00062 // 00063 // Miscellaneous utility for CFGStructurizer. 00064 // 00065 //===----------------------------------------------------------------------===// 00066 namespace { 00067 #define SHOWNEWINSTR(i) \ 00068 DEBUG(dbgs() << "New instr: " << *i << "\n"); 00069 00070 #define SHOWNEWBLK(b, msg) \ 00071 DEBUG( \ 00072 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 00073 dbgs() << "\n"; \ 00074 ); 00075 00076 #define SHOWBLK_DETAIL(b, msg) \ 00077 DEBUG( \ 00078 if (b) { \ 00079 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 00080 b->print(dbgs()); \ 00081 dbgs() << "\n"; \ 00082 } \ 00083 ); 00084 00085 #define INVALIDSCCNUM -1 00086 00087 template<class NodeT> 00088 void ReverseVector(SmallVectorImpl<NodeT *> &Src) { 00089 size_t sz = Src.size(); 00090 for (size_t i = 0; i < sz/2; ++i) { 00091 NodeT *t = Src[i]; 00092 Src[i] = Src[sz - i - 1]; 00093 Src[sz - i - 1] = t; 00094 } 00095 } 00096 00097 } // end anonymous namespace 00098 00099 //===----------------------------------------------------------------------===// 00100 // 00101 // supporting data structure for CFGStructurizer 00102 // 00103 //===----------------------------------------------------------------------===// 00104 00105 00106 namespace { 00107 00108 class BlockInformation { 00109 public: 00110 bool IsRetired; 00111 int SccNum; 00112 BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {} 00113 }; 00114 00115 } // end anonymous namespace 00116 00117 //===----------------------------------------------------------------------===// 00118 // 00119 // CFGStructurizer 00120 // 00121 //===----------------------------------------------------------------------===// 00122 00123 namespace { 00124 class AMDGPUCFGStructurizer : public MachineFunctionPass { 00125 public: 00126 typedef SmallVector<MachineBasicBlock *, 32> MBBVector; 00127 typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap; 00128 typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap; 00129 00130 enum PathToKind { 00131 Not_SinglePath = 0, 00132 SinglePath_InPath = 1, 00133 SinglePath_NotInPath = 2 00134 }; 00135 00136 static char ID; 00137 00138 AMDGPUCFGStructurizer() : 00139 MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) { 00140 initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry()); 00141 } 00142 00143 const char *getPassName() const override { 00144 return "AMDGPU Control Flow Graph structurizer Pass"; 00145 } 00146 00147 void getAnalysisUsage(AnalysisUsage &AU) const override { 00148 AU.addPreserved<MachineFunctionAnalysis>(); 00149 AU.addRequired<MachineFunctionAnalysis>(); 00150 AU.addRequired<MachineDominatorTree>(); 00151 AU.addRequired<MachinePostDominatorTree>(); 00152 AU.addRequired<MachineLoopInfo>(); 00153 } 00154 00155 /// Perform the CFG structurization 00156 bool run(); 00157 00158 /// Perform the CFG preparation 00159 /// This step will remove every unconditionnal/dead jump instructions and make 00160 /// sure all loops have an exit block 00161 bool prepare(); 00162 00163 bool runOnMachineFunction(MachineFunction &MF) override { 00164 TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo()); 00165 TRI = &TII->getRegisterInfo(); 00166 DEBUG(MF.dump();); 00167 OrderedBlks.clear(); 00168 FuncRep = &MF; 00169 MLI = &getAnalysis<MachineLoopInfo>(); 00170 DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); 00171 MDT = &getAnalysis<MachineDominatorTree>(); 00172 DEBUG(MDT->print(dbgs(), (const llvm::Module*)nullptr);); 00173 PDT = &getAnalysis<MachinePostDominatorTree>(); 00174 DEBUG(PDT->print(dbgs());); 00175 prepare(); 00176 run(); 00177 DEBUG(MF.dump();); 00178 return true; 00179 } 00180 00181 protected: 00182 MachineDominatorTree *MDT; 00183 MachinePostDominatorTree *PDT; 00184 MachineLoopInfo *MLI; 00185 const R600InstrInfo *TII; 00186 const AMDGPURegisterInfo *TRI; 00187 00188 // PRINT FUNCTIONS 00189 /// Print the ordered Blocks. 00190 void printOrderedBlocks() const { 00191 size_t i = 0; 00192 for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(), 00193 iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) { 00194 dbgs() << "BB" << (*iterBlk)->getNumber(); 00195 dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; 00196 if (i != 0 && i % 10 == 0) { 00197 dbgs() << "\n"; 00198 } else { 00199 dbgs() << " "; 00200 } 00201 } 00202 } 00203 static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) { 00204 for (MachineLoop::iterator iter = LoopInfo.begin(), 00205 iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) { 00206 (*iter)->print(dbgs(), 0); 00207 } 00208 } 00209 00210 // UTILITY FUNCTIONS 00211 int getSCCNum(MachineBasicBlock *MBB) const; 00212 MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const; 00213 bool hasBackEdge(MachineBasicBlock *MBB) const; 00214 static unsigned getLoopDepth(MachineLoop *LoopRep); 00215 bool isRetiredBlock(MachineBasicBlock *MBB) const; 00216 bool isActiveLoophead(MachineBasicBlock *MBB) const; 00217 PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 00218 bool AllowSideEntry = true) const; 00219 int countActiveBlock(MBBVector::const_iterator It, 00220 MBBVector::const_iterator E) const; 00221 bool needMigrateBlock(MachineBasicBlock *MBB) const; 00222 00223 // Utility Functions 00224 void reversePredicateSetter(MachineBasicBlock::iterator I); 00225 /// Compute the reversed DFS post order of Blocks 00226 void orderBlocks(MachineFunction *MF); 00227 00228 // Function originally from CFGStructTraits 00229 void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode, 00230 DebugLoc DL = DebugLoc()); 00231 MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode, 00232 DebugLoc DL = DebugLoc()); 00233 MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode); 00234 void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode, 00235 DebugLoc DL); 00236 void insertCondBranchBefore(MachineBasicBlock *MBB, 00237 MachineBasicBlock::iterator I, int NewOpcode, int RegNum, 00238 DebugLoc DL); 00239 void insertCondBranchEnd(MachineBasicBlock *MBB, int NewOpcode, int RegNum); 00240 static int getBranchNzeroOpcode(int OldOpcode); 00241 static int getBranchZeroOpcode(int OldOpcode); 00242 static int getContinueNzeroOpcode(int OldOpcode); 00243 static int getContinueZeroOpcode(int OldOpcode); 00244 static MachineBasicBlock *getTrueBranch(MachineInstr *MI); 00245 static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB); 00246 static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB, 00247 MachineInstr *MI); 00248 static bool isCondBranch(MachineInstr *MI); 00249 static bool isUncondBranch(MachineInstr *MI); 00250 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB); 00251 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB); 00252 /// The correct naming for this is getPossibleLoopendBlockBranchInstr. 00253 /// 00254 /// BB with backward-edge could have move instructions after the branch 00255 /// instruction. Such move instruction "belong to" the loop backward-edge. 00256 MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB); 00257 static MachineInstr *getReturnInstr(MachineBasicBlock *MBB); 00258 static MachineInstr *getContinueInstr(MachineBasicBlock *MBB); 00259 static bool isReturnBlock(MachineBasicBlock *MBB); 00260 static void cloneSuccessorList(MachineBasicBlock *DstMBB, 00261 MachineBasicBlock *SrcMBB) ; 00262 static MachineBasicBlock *clone(MachineBasicBlock *MBB); 00263 /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose 00264 /// because the AMDGPU instruction is not recognized as terminator fix this 00265 /// and retire this routine 00266 void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB, 00267 MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); 00268 static void wrapup(MachineBasicBlock *MBB); 00269 00270 00271 int patternMatch(MachineBasicBlock *MBB); 00272 int patternMatchGroup(MachineBasicBlock *MBB); 00273 int serialPatternMatch(MachineBasicBlock *MBB); 00274 int ifPatternMatch(MachineBasicBlock *MBB); 00275 int loopendPatternMatch(); 00276 int mergeLoop(MachineLoop *LoopRep); 00277 int loopcontPatternMatch(MachineLoop *LoopRep, MachineBasicBlock *LoopHeader); 00278 00279 void handleLoopcontBlock(MachineBasicBlock *ContingMBB, 00280 MachineLoop *ContingLoop, MachineBasicBlock *ContMBB, 00281 MachineLoop *ContLoop); 00282 /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in 00283 /// the same loop with LoopLandInfo without explicitly keeping track of 00284 /// loopContBlks and loopBreakBlks, this is a method to get the information. 00285 bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB, 00286 MachineBasicBlock *Src2MBB); 00287 int handleJumpintoIf(MachineBasicBlock *HeadMBB, 00288 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 00289 int handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 00290 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 00291 int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 00292 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 00293 MachineBasicBlock **LandMBBPtr); 00294 void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 00295 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 00296 MachineBasicBlock *LandMBB, bool Detail = false); 00297 int cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 00298 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB); 00299 void mergeSerialBlock(MachineBasicBlock *DstMBB, 00300 MachineBasicBlock *SrcMBB); 00301 00302 void mergeIfthenelseBlock(MachineInstr *BranchMI, 00303 MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 00304 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB); 00305 void mergeLooplandBlock(MachineBasicBlock *DstMBB, 00306 MachineBasicBlock *LandMBB); 00307 void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 00308 MachineBasicBlock *LandMBB); 00309 void settleLoopcontBlock(MachineBasicBlock *ContingMBB, 00310 MachineBasicBlock *ContMBB); 00311 /// normalizeInfiniteLoopExit change 00312 /// B1: 00313 /// uncond_br LoopHeader 00314 /// 00315 /// to 00316 /// B1: 00317 /// cond_br 1 LoopHeader dummyExit 00318 /// and return the newly added dummy exit block 00319 MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep); 00320 void removeUnconditionalBranch(MachineBasicBlock *MBB); 00321 /// Remove duplicate branches instructions in a block. 00322 /// For instance 00323 /// B0: 00324 /// cond_br X B1 B2 00325 /// cond_br X B1 B2 00326 /// is transformed to 00327 /// B0: 00328 /// cond_br X B1 B2 00329 void removeRedundantConditionalBranch(MachineBasicBlock *MBB); 00330 void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB); 00331 void removeSuccessor(MachineBasicBlock *MBB); 00332 MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB, 00333 MachineBasicBlock *PredMBB); 00334 void migrateInstruction(MachineBasicBlock *SrcMBB, 00335 MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I); 00336 void recordSccnum(MachineBasicBlock *MBB, int SCCNum); 00337 void retireBlock(MachineBasicBlock *MBB); 00338 void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = nullptr); 00339 00340 MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&); 00341 /// This is work around solution for findNearestCommonDominator not available 00342 /// to post dom a proper fix should go to Dominators.h. 00343 MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1, 00344 MachineBasicBlock *MBB2); 00345 00346 private: 00347 MBBInfoMap BlockInfoMap; 00348 LoopLandInfoMap LLInfoMap; 00349 std::map<MachineLoop *, bool> Visited; 00350 MachineFunction *FuncRep; 00351 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks; 00352 }; 00353 00354 int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { 00355 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 00356 if (It == BlockInfoMap.end()) 00357 return INVALIDSCCNUM; 00358 return (*It).second->SccNum; 00359 } 00360 00361 MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep) 00362 const { 00363 LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep); 00364 if (It == LLInfoMap.end()) 00365 return nullptr; 00366 return (*It).second; 00367 } 00368 00369 bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const { 00370 MachineLoop *LoopRep = MLI->getLoopFor(MBB); 00371 if (!LoopRep) 00372 return false; 00373 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 00374 return MBB->isSuccessor(LoopHeader); 00375 } 00376 00377 unsigned AMDGPUCFGStructurizer::getLoopDepth(MachineLoop *LoopRep) { 00378 return LoopRep ? LoopRep->getLoopDepth() : 0; 00379 } 00380 00381 bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const { 00382 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 00383 if (It == BlockInfoMap.end()) 00384 return false; 00385 return (*It).second->IsRetired; 00386 } 00387 00388 bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const { 00389 MachineLoop *LoopRep = MLI->getLoopFor(MBB); 00390 while (LoopRep && LoopRep->getHeader() == MBB) { 00391 MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep); 00392 if(!LoopLand) 00393 return true; 00394 if (!isRetiredBlock(LoopLand)) 00395 return true; 00396 LoopRep = LoopRep->getParentLoop(); 00397 } 00398 return false; 00399 } 00400 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo( 00401 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 00402 bool AllowSideEntry) const { 00403 assert(DstMBB); 00404 if (SrcMBB == DstMBB) 00405 return SinglePath_InPath; 00406 while (SrcMBB && SrcMBB->succ_size() == 1) { 00407 SrcMBB = *SrcMBB->succ_begin(); 00408 if (SrcMBB == DstMBB) 00409 return SinglePath_InPath; 00410 if (!AllowSideEntry && SrcMBB->pred_size() > 1) 00411 return Not_SinglePath; 00412 } 00413 if (SrcMBB && SrcMBB->succ_size()==0) 00414 return SinglePath_NotInPath; 00415 return Not_SinglePath; 00416 } 00417 00418 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It, 00419 MBBVector::const_iterator E) const { 00420 int Count = 0; 00421 while (It != E) { 00422 if (!isRetiredBlock(*It)) 00423 ++Count; 00424 ++It; 00425 } 00426 return Count; 00427 } 00428 00429 bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const { 00430 unsigned BlockSizeThreshold = 30; 00431 unsigned CloneInstrThreshold = 100; 00432 bool MultiplePreds = MBB && (MBB->pred_size() > 1); 00433 00434 if(!MultiplePreds) 00435 return false; 00436 unsigned BlkSize = MBB->size(); 00437 return ((BlkSize > BlockSizeThreshold) && 00438 (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold)); 00439 } 00440 00441 void AMDGPUCFGStructurizer::reversePredicateSetter( 00442 MachineBasicBlock::iterator I) { 00443 while (I--) { 00444 if (I->getOpcode() == AMDGPU::PRED_X) { 00445 switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) { 00446 case OPCODE_IS_ZERO_INT: 00447 static_cast<MachineInstr *>(I)->getOperand(2) 00448 .setImm(OPCODE_IS_NOT_ZERO_INT); 00449 return; 00450 case OPCODE_IS_NOT_ZERO_INT: 00451 static_cast<MachineInstr *>(I)->getOperand(2) 00452 .setImm(OPCODE_IS_ZERO_INT); 00453 return; 00454 case OPCODE_IS_ZERO: 00455 static_cast<MachineInstr *>(I)->getOperand(2) 00456 .setImm(OPCODE_IS_NOT_ZERO); 00457 return; 00458 case OPCODE_IS_NOT_ZERO: 00459 static_cast<MachineInstr *>(I)->getOperand(2) 00460 .setImm(OPCODE_IS_ZERO); 00461 return; 00462 default: 00463 llvm_unreachable("PRED_X Opcode invalid!"); 00464 } 00465 } 00466 } 00467 } 00468 00469 void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB, 00470 int NewOpcode, DebugLoc DL) { 00471 MachineInstr *MI = MBB->getParent() 00472 ->CreateMachineInstr(TII->get(NewOpcode), DL); 00473 MBB->push_back(MI); 00474 //assume the instruction doesn't take any reg operand ... 00475 SHOWNEWINSTR(MI); 00476 } 00477 00478 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB, 00479 int NewOpcode, DebugLoc DL) { 00480 MachineInstr *MI = 00481 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); 00482 if (MBB->begin() != MBB->end()) 00483 MBB->insert(MBB->begin(), MI); 00484 else 00485 MBB->push_back(MI); 00486 SHOWNEWINSTR(MI); 00487 return MI; 00488 } 00489 00490 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore( 00491 MachineBasicBlock::iterator I, int NewOpcode) { 00492 MachineInstr *OldMI = &(*I); 00493 MachineBasicBlock *MBB = OldMI->getParent(); 00494 MachineInstr *NewMBB = 00495 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); 00496 MBB->insert(I, NewMBB); 00497 //assume the instruction doesn't take any reg operand ... 00498 SHOWNEWINSTR(NewMBB); 00499 return NewMBB; 00500 } 00501 00502 void AMDGPUCFGStructurizer::insertCondBranchBefore( 00503 MachineBasicBlock::iterator I, int NewOpcode, DebugLoc DL) { 00504 MachineInstr *OldMI = &(*I); 00505 MachineBasicBlock *MBB = OldMI->getParent(); 00506 MachineFunction *MF = MBB->getParent(); 00507 MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 00508 MBB->insert(I, NewMI); 00509 MachineInstrBuilder MIB(*MF, NewMI); 00510 MIB.addReg(OldMI->getOperand(1).getReg(), false); 00511 SHOWNEWINSTR(NewMI); 00512 //erase later oldInstr->eraseFromParent(); 00513 } 00514 00515 void AMDGPUCFGStructurizer::insertCondBranchBefore(MachineBasicBlock *blk, 00516 MachineBasicBlock::iterator I, int NewOpcode, int RegNum, 00517 DebugLoc DL) { 00518 MachineFunction *MF = blk->getParent(); 00519 MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 00520 //insert before 00521 blk->insert(I, NewInstr); 00522 MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); 00523 SHOWNEWINSTR(NewInstr); 00524 } 00525 00526 void AMDGPUCFGStructurizer::insertCondBranchEnd(MachineBasicBlock *MBB, 00527 int NewOpcode, int RegNum) { 00528 MachineFunction *MF = MBB->getParent(); 00529 MachineInstr *NewInstr = 00530 MF->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); 00531 MBB->push_back(NewInstr); 00532 MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); 00533 SHOWNEWINSTR(NewInstr); 00534 } 00535 00536 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) { 00537 switch(OldOpcode) { 00538 case AMDGPU::JUMP_COND: 00539 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; 00540 case AMDGPU::BRANCH_COND_i32: 00541 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; 00542 default: llvm_unreachable("internal error"); 00543 } 00544 return -1; 00545 } 00546 00547 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) { 00548 switch(OldOpcode) { 00549 case AMDGPU::JUMP_COND: 00550 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; 00551 case AMDGPU::BRANCH_COND_i32: 00552 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; 00553 default: llvm_unreachable("internal error"); 00554 } 00555 return -1; 00556 } 00557 00558 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { 00559 switch(OldOpcode) { 00560 case AMDGPU::JUMP_COND: 00561 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; 00562 default: llvm_unreachable("internal error"); 00563 }; 00564 return -1; 00565 } 00566 00567 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) { 00568 switch(OldOpcode) { 00569 case AMDGPU::JUMP_COND: 00570 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; 00571 default: llvm_unreachable("internal error"); 00572 } 00573 return -1; 00574 } 00575 00576 MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) { 00577 return MI->getOperand(0).getMBB(); 00578 } 00579 00580 void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI, 00581 MachineBasicBlock *MBB) { 00582 MI->getOperand(0).setMBB(MBB); 00583 } 00584 00585 MachineBasicBlock * 00586 AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB, 00587 MachineInstr *MI) { 00588 assert(MBB->succ_size() == 2); 00589 MachineBasicBlock *TrueBranch = getTrueBranch(MI); 00590 MachineBasicBlock::succ_iterator It = MBB->succ_begin(); 00591 MachineBasicBlock::succ_iterator Next = It; 00592 ++Next; 00593 return (*It == TrueBranch) ? *Next : *It; 00594 } 00595 00596 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) { 00597 switch (MI->getOpcode()) { 00598 case AMDGPU::JUMP_COND: 00599 case AMDGPU::BRANCH_COND_i32: 00600 case AMDGPU::BRANCH_COND_f32: return true; 00601 default: 00602 return false; 00603 } 00604 return false; 00605 } 00606 00607 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) { 00608 switch (MI->getOpcode()) { 00609 case AMDGPU::JUMP: 00610 case AMDGPU::BRANCH: 00611 return true; 00612 default: 00613 return false; 00614 } 00615 return false; 00616 } 00617 00618 DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { 00619 //get DebugLoc from the first MachineBasicBlock instruction with debug info 00620 DebugLoc DL; 00621 for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end(); 00622 ++It) { 00623 MachineInstr *instr = &(*It); 00624 if (instr->getDebugLoc().isUnknown() == false) 00625 DL = instr->getDebugLoc(); 00626 } 00627 return DL; 00628 } 00629 00630 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr( 00631 MachineBasicBlock *MBB) { 00632 MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 00633 MachineInstr *MI = &*It; 00634 if (MI && (isCondBranch(MI) || isUncondBranch(MI))) 00635 return MI; 00636 return nullptr; 00637 } 00638 00639 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr( 00640 MachineBasicBlock *MBB) { 00641 for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend(); 00642 It != E; ++It) { 00643 // FIXME: Simplify 00644 MachineInstr *MI = &*It; 00645 if (MI) { 00646 if (isCondBranch(MI) || isUncondBranch(MI)) 00647 return MI; 00648 else if (!TII->isMov(MI->getOpcode())) 00649 break; 00650 } 00651 } 00652 return nullptr; 00653 } 00654 00655 MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { 00656 MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 00657 if (It != MBB->rend()) { 00658 MachineInstr *instr = &(*It); 00659 if (instr->getOpcode() == AMDGPU::RETURN) 00660 return instr; 00661 } 00662 return nullptr; 00663 } 00664 00665 MachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) { 00666 MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 00667 if (It != MBB->rend()) { 00668 MachineInstr *MI = &(*It); 00669 if (MI->getOpcode() == AMDGPU::CONTINUE) 00670 return MI; 00671 } 00672 return nullptr; 00673 } 00674 00675 bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { 00676 MachineInstr *MI = getReturnInstr(MBB); 00677 bool IsReturn = (MBB->succ_size() == 0); 00678 if (MI) 00679 assert(IsReturn); 00680 else if (IsReturn) 00681 DEBUG( 00682 dbgs() << "BB" << MBB->getNumber() 00683 <<" is return block without RETURN instr\n";); 00684 return IsReturn; 00685 } 00686 00687 void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB, 00688 MachineBasicBlock *SrcMBB) { 00689 for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(), 00690 iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It) 00691 DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of 00692 } 00693 00694 MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) { 00695 MachineFunction *Func = MBB->getParent(); 00696 MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock(); 00697 Func->push_back(NewMBB); //insert to function 00698 for (MachineBasicBlock::iterator It = MBB->begin(), E = MBB->end(); 00699 It != E; ++It) { 00700 MachineInstr *MI = Func->CloneMachineInstr(It); 00701 NewMBB->push_back(MI); 00702 } 00703 return NewMBB; 00704 } 00705 00706 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith( 00707 MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB, 00708 MachineBasicBlock *NewBlk) { 00709 MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB); 00710 if (BranchMI && isCondBranch(BranchMI) && 00711 getTrueBranch(BranchMI) == OldMBB) 00712 setTrueBranch(BranchMI, NewBlk); 00713 } 00714 00715 void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) { 00716 assert((!MBB->getParent()->getJumpTableInfo() 00717 || MBB->getParent()->getJumpTableInfo()->isEmpty()) 00718 && "found a jump table"); 00719 00720 //collect continue right before endloop 00721 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr; 00722 MachineBasicBlock::iterator Pre = MBB->begin(); 00723 MachineBasicBlock::iterator E = MBB->end(); 00724 MachineBasicBlock::iterator It = Pre; 00725 while (It != E) { 00726 if (Pre->getOpcode() == AMDGPU::CONTINUE 00727 && It->getOpcode() == AMDGPU::ENDLOOP) 00728 ContInstr.push_back(Pre); 00729 Pre = It; 00730 ++It; 00731 } 00732 00733 //delete continue right before endloop 00734 for (unsigned i = 0; i < ContInstr.size(); ++i) 00735 ContInstr[i]->eraseFromParent(); 00736 00737 // TODO to fix up jump table so later phase won't be confused. if 00738 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but 00739 // there isn't such an interface yet. alternatively, replace all the other 00740 // blocks in the jump table with the entryBlk //} 00741 00742 } 00743 00744 00745 bool AMDGPUCFGStructurizer::prepare() { 00746 bool Changed = false; 00747 00748 //FIXME: if not reducible flow graph, make it so ??? 00749 00750 DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";); 00751 00752 orderBlocks(FuncRep); 00753 00754 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks; 00755 00756 // Add an ExitBlk to loop that don't have one 00757 for (MachineLoopInfo::iterator It = MLI->begin(), 00758 E = MLI->end(); It != E; ++It) { 00759 MachineLoop *LoopRep = (*It); 00760 MBBVector ExitingMBBs; 00761 LoopRep->getExitingBlocks(ExitingMBBs); 00762 00763 if (ExitingMBBs.size() == 0) { 00764 MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep); 00765 if (DummyExitBlk) 00766 RetBlks.push_back(DummyExitBlk); 00767 } 00768 } 00769 00770 // Remove unconditional branch instr. 00771 // Add dummy exit block iff there are multiple returns. 00772 for (SmallVectorImpl<MachineBasicBlock *>::const_iterator 00773 It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) { 00774 MachineBasicBlock *MBB = *It; 00775 removeUnconditionalBranch(MBB); 00776 removeRedundantConditionalBranch(MBB); 00777 if (isReturnBlock(MBB)) { 00778 RetBlks.push_back(MBB); 00779 } 00780 assert(MBB->succ_size() <= 2); 00781 } 00782 00783 if (RetBlks.size() >= 2) { 00784 addDummyExitBlock(RetBlks); 00785 Changed = true; 00786 } 00787 00788 return Changed; 00789 } 00790 00791 bool AMDGPUCFGStructurizer::run() { 00792 00793 //Assume reducible CFG... 00794 DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n"); 00795 00796 #ifdef STRESSTEST 00797 //Use the worse block ordering to test the algorithm. 00798 ReverseVector(orderedBlks); 00799 #endif 00800 00801 DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks();); 00802 int NumIter = 0; 00803 bool Finish = false; 00804 MachineBasicBlock *MBB; 00805 bool MakeProgress = false; 00806 int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(), 00807 OrderedBlks.end()); 00808 00809 do { 00810 ++NumIter; 00811 DEBUG( 00812 dbgs() << "numIter = " << NumIter 00813 << ", numRemaintedBlk = " << NumRemainedBlk << "\n"; 00814 ); 00815 00816 SmallVectorImpl<MachineBasicBlock *>::const_iterator It = 00817 OrderedBlks.begin(); 00818 SmallVectorImpl<MachineBasicBlock *>::const_iterator E = 00819 OrderedBlks.end(); 00820 00821 SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter = 00822 It; 00823 MachineBasicBlock *SccBeginMBB = nullptr; 00824 int SccNumBlk = 0; // The number of active blocks, init to a 00825 // maximum possible number. 00826 int SccNumIter; // Number of iteration in this SCC. 00827 00828 while (It != E) { 00829 MBB = *It; 00830 00831 if (!SccBeginMBB) { 00832 SccBeginIter = It; 00833 SccBeginMBB = MBB; 00834 SccNumIter = 0; 00835 SccNumBlk = NumRemainedBlk; // Init to maximum possible number. 00836 DEBUG( 00837 dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); 00838 dbgs() << "\n"; 00839 ); 00840 } 00841 00842 if (!isRetiredBlock(MBB)) 00843 patternMatch(MBB); 00844 00845 ++It; 00846 00847 bool ContNextScc = true; 00848 if (It == E 00849 || getSCCNum(SccBeginMBB) != getSCCNum(*It)) { 00850 // Just finish one scc. 00851 ++SccNumIter; 00852 int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It); 00853 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) { 00854 DEBUG( 00855 dbgs() << "Can't reduce SCC " << getSCCNum(MBB) 00856 << ", sccNumIter = " << SccNumIter; 00857 dbgs() << "doesn't make any progress\n"; 00858 ); 00859 ContNextScc = true; 00860 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) { 00861 SccNumBlk = sccRemainedNumBlk; 00862 It = SccBeginIter; 00863 ContNextScc = false; 00864 DEBUG( 00865 dbgs() << "repeat processing SCC" << getSCCNum(MBB) 00866 << "sccNumIter = " << SccNumIter << '\n'; 00867 ); 00868 } else { 00869 // Finish the current scc. 00870 ContNextScc = true; 00871 } 00872 } else { 00873 // Continue on next component in the current scc. 00874 ContNextScc = false; 00875 } 00876 00877 if (ContNextScc) 00878 SccBeginMBB = nullptr; 00879 } //while, "one iteration" over the function. 00880 00881 MachineBasicBlock *EntryMBB = 00882 GraphTraits<MachineFunction *>::nodes_begin(FuncRep); 00883 if (EntryMBB->succ_size() == 0) { 00884 Finish = true; 00885 DEBUG( 00886 dbgs() << "Reduce to one block\n"; 00887 ); 00888 } else { 00889 int NewnumRemainedBlk 00890 = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end()); 00891 // consider cloned blocks ?? 00892 if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) { 00893 MakeProgress = true; 00894 NumRemainedBlk = NewnumRemainedBlk; 00895 } else { 00896 MakeProgress = false; 00897 DEBUG( 00898 dbgs() << "No progress\n"; 00899 ); 00900 } 00901 } 00902 } while (!Finish && MakeProgress); 00903 00904 // Misc wrap up to maintain the consistency of the Function representation. 00905 wrapup(GraphTraits<MachineFunction *>::nodes_begin(FuncRep)); 00906 00907 // Detach retired Block, release memory. 00908 for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end(); 00909 It != E; ++It) { 00910 if ((*It).second && (*It).second->IsRetired) { 00911 assert(((*It).first)->getNumber() != -1); 00912 DEBUG( 00913 dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n"; 00914 ); 00915 (*It).first->eraseFromParent(); //Remove from the parent Function. 00916 } 00917 delete (*It).second; 00918 } 00919 BlockInfoMap.clear(); 00920 LLInfoMap.clear(); 00921 00922 if (!Finish) { 00923 DEBUG(FuncRep->viewCFG()); 00924 llvm_unreachable("IRREDUCIBLE_CFG"); 00925 } 00926 00927 return true; 00928 } 00929 00930 00931 00932 void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) { 00933 int SccNum = 0; 00934 MachineBasicBlock *MBB; 00935 for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd(); 00936 ++It, ++SccNum) { 00937 const std::vector<MachineBasicBlock *> &SccNext = *It; 00938 for (std::vector<MachineBasicBlock *>::const_iterator 00939 blockIter = SccNext.begin(), blockEnd = SccNext.end(); 00940 blockIter != blockEnd; ++blockIter) { 00941 MBB = *blockIter; 00942 OrderedBlks.push_back(MBB); 00943 recordSccnum(MBB, SccNum); 00944 } 00945 } 00946 00947 //walk through all the block in func to check for unreachable 00948 typedef GraphTraits<MachineFunction *> GTM; 00949 MachineFunction::iterator It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF); 00950 for (; It != E; ++It) { 00951 MachineBasicBlock *MBB = &(*It); 00952 SccNum = getSCCNum(MBB); 00953 if (SccNum == INVALIDSCCNUM) 00954 dbgs() << "unreachable block BB" << MBB->getNumber() << "\n"; 00955 } 00956 } 00957 00958 int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) { 00959 int NumMatch = 0; 00960 int CurMatch; 00961 00962 DEBUG( 00963 dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n"; 00964 ); 00965 00966 while ((CurMatch = patternMatchGroup(MBB)) > 0) 00967 NumMatch += CurMatch; 00968 00969 DEBUG( 00970 dbgs() << "End patternMatch BB" << MBB->getNumber() 00971 << ", numMatch = " << NumMatch << "\n"; 00972 ); 00973 00974 return NumMatch; 00975 } 00976 00977 int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) { 00978 int NumMatch = 0; 00979 NumMatch += loopendPatternMatch(); 00980 NumMatch += serialPatternMatch(MBB); 00981 NumMatch += ifPatternMatch(MBB); 00982 return NumMatch; 00983 } 00984 00985 00986 int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) { 00987 if (MBB->succ_size() != 1) 00988 return 0; 00989 00990 MachineBasicBlock *childBlk = *MBB->succ_begin(); 00991 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) 00992 return 0; 00993 00994 mergeSerialBlock(MBB, childBlk); 00995 ++numSerialPatternMatch; 00996 return 1; 00997 } 00998 00999 int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { 01000 //two edges 01001 if (MBB->succ_size() != 2) 01002 return 0; 01003 if (hasBackEdge(MBB)) 01004 return 0; 01005 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 01006 if (!BranchMI) 01007 return 0; 01008 01009 assert(isCondBranch(BranchMI)); 01010 int NumMatch = 0; 01011 01012 MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI); 01013 NumMatch += serialPatternMatch(TrueMBB); 01014 NumMatch += ifPatternMatch(TrueMBB); 01015 MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI); 01016 NumMatch += serialPatternMatch(FalseMBB); 01017 NumMatch += ifPatternMatch(FalseMBB); 01018 MachineBasicBlock *LandBlk; 01019 int Cloned = 0; 01020 01021 assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty()); 01022 // TODO: Simplify 01023 if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1 01024 && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) { 01025 // Diamond pattern 01026 LandBlk = *TrueMBB->succ_begin(); 01027 } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) { 01028 // Triangle pattern, false is empty 01029 LandBlk = FalseMBB; 01030 FalseMBB = nullptr; 01031 } else if (FalseMBB->succ_size() == 1 01032 && *FalseMBB->succ_begin() == TrueMBB) { 01033 // Triangle pattern, true is empty 01034 // We reverse the predicate to make a triangle, empty false pattern; 01035 std::swap(TrueMBB, FalseMBB); 01036 reversePredicateSetter(MBB->end()); 01037 LandBlk = FalseMBB; 01038 FalseMBB = nullptr; 01039 } else if (FalseMBB->succ_size() == 1 01040 && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) { 01041 LandBlk = *FalseMBB->succ_begin(); 01042 } else if (TrueMBB->succ_size() == 1 01043 && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) { 01044 LandBlk = *TrueMBB->succ_begin(); 01045 } else { 01046 return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB); 01047 } 01048 01049 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the 01050 // new BB created for landBlk==NULL may introduce new challenge to the 01051 // reduction process. 01052 if (LandBlk && 01053 ((TrueMBB && TrueMBB->pred_size() > 1) 01054 || (FalseMBB && FalseMBB->pred_size() > 1))) { 01055 Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk); 01056 } 01057 01058 if (TrueMBB && TrueMBB->pred_size() > 1) { 01059 TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB); 01060 ++Cloned; 01061 } 01062 01063 if (FalseMBB && FalseMBB->pred_size() > 1) { 01064 FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB); 01065 ++Cloned; 01066 } 01067 01068 mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk); 01069 01070 ++numIfPatternMatch; 01071 01072 numClonedBlock += Cloned; 01073 01074 return 1 + Cloned + NumMatch; 01075 } 01076 01077 int AMDGPUCFGStructurizer::loopendPatternMatch() { 01078 std::vector<MachineLoop *> NestedLoops; 01079 for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end(); It != E; 01080 ++It) 01081 for (MachineLoop *ML : depth_first(*It)) 01082 NestedLoops.push_back(ML); 01083 01084 if (NestedLoops.size() == 0) 01085 return 0; 01086 01087 // Process nested loop outside->inside, so "continue" to a outside loop won't 01088 // be mistaken as "break" of the current loop. 01089 int Num = 0; 01090 for (std::vector<MachineLoop *>::reverse_iterator It = NestedLoops.rbegin(), 01091 E = NestedLoops.rend(); It != E; ++It) { 01092 MachineLoop *ExaminedLoop = *It; 01093 if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) 01094 continue; 01095 DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); 01096 int NumBreak = mergeLoop(ExaminedLoop); 01097 if (NumBreak == -1) 01098 break; 01099 Num += NumBreak; 01100 } 01101 return Num; 01102 } 01103 01104 int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { 01105 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 01106 MBBVector ExitingMBBs; 01107 LoopRep->getExitingBlocks(ExitingMBBs); 01108 assert(!ExitingMBBs.empty() && "Infinite Loop not supported"); 01109 DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";); 01110 // We assume a single ExitBlk 01111 MBBVector ExitBlks; 01112 LoopRep->getExitBlocks(ExitBlks); 01113 SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet; 01114 for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i) 01115 ExitBlkSet.insert(ExitBlks[i]); 01116 assert(ExitBlkSet.size() == 1); 01117 MachineBasicBlock *ExitBlk = *ExitBlks.begin(); 01118 assert(ExitBlk && "Loop has several exit block"); 01119 MBBVector LatchBlks; 01120 typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits; 01121 InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader), 01122 PE = InvMBBTraits::child_end(LoopHeader); 01123 for (; PI != PE; PI++) { 01124 if (LoopRep->contains(*PI)) 01125 LatchBlks.push_back(*PI); 01126 } 01127 01128 for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i) 01129 mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk); 01130 for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i) 01131 settleLoopcontBlock(LatchBlks[i], LoopHeader); 01132 int Match = 0; 01133 do { 01134 Match = 0; 01135 Match += serialPatternMatch(LoopHeader); 01136 Match += ifPatternMatch(LoopHeader); 01137 } while (Match > 0); 01138 mergeLooplandBlock(LoopHeader, ExitBlk); 01139 MachineLoop *ParentLoop = LoopRep->getParentLoop(); 01140 if (ParentLoop) 01141 MLI->changeLoopFor(LoopHeader, ParentLoop); 01142 else 01143 MLI->removeBlock(LoopHeader); 01144 Visited[LoopRep] = true; 01145 return 1; 01146 } 01147 01148 int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep, 01149 MachineBasicBlock *LoopHeader) { 01150 int NumCont = 0; 01151 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> ContMBB; 01152 typedef GraphTraits<Inverse<MachineBasicBlock *> > GTIM; 01153 GTIM::ChildIteratorType It = GTIM::child_begin(LoopHeader), 01154 E = GTIM::child_end(LoopHeader); 01155 for (; It != E; ++It) { 01156 MachineBasicBlock *MBB = *It; 01157 if (LoopRep->contains(MBB)) { 01158 handleLoopcontBlock(MBB, MLI->getLoopFor(MBB), 01159 LoopHeader, LoopRep); 01160 ContMBB.push_back(MBB); 01161 ++NumCont; 01162 } 01163 } 01164 01165 for (SmallVectorImpl<MachineBasicBlock *>::iterator It = ContMBB.begin(), 01166 E = ContMBB.end(); It != E; ++It) { 01167 (*It)->removeSuccessor(LoopHeader); 01168 } 01169 01170 numLoopcontPatternMatch += NumCont; 01171 01172 return NumCont; 01173 } 01174 01175 01176 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak( 01177 MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) { 01178 if (Src1MBB->succ_size() == 0) { 01179 MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB); 01180 if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) { 01181 MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep]; 01182 if (TheEntry) { 01183 DEBUG( 01184 dbgs() << "isLoopContBreakBlock yes src1 = BB" 01185 << Src1MBB->getNumber() 01186 << " src2 = BB" << Src2MBB->getNumber() << "\n"; 01187 ); 01188 return true; 01189 } 01190 } 01191 } 01192 return false; 01193 } 01194 01195 int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB, 01196 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 01197 int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB); 01198 if (Num == 0) { 01199 DEBUG( 01200 dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; 01201 ); 01202 Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB); 01203 } 01204 return Num; 01205 } 01206 01207 int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 01208 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 01209 int Num = 0; 01210 MachineBasicBlock *DownBlk; 01211 01212 //trueBlk could be the common post dominator 01213 DownBlk = TrueMBB; 01214 01215 DEBUG( 01216 dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() 01217 << " true = BB" << TrueMBB->getNumber() 01218 << ", numSucc=" << TrueMBB->succ_size() 01219 << " false = BB" << FalseMBB->getNumber() << "\n"; 01220 ); 01221 01222 while (DownBlk) { 01223 DEBUG( 01224 dbgs() << "check down = BB" << DownBlk->getNumber(); 01225 ); 01226 01227 if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) { 01228 DEBUG( 01229 dbgs() << " working\n"; 01230 ); 01231 01232 Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk); 01233 Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk); 01234 01235 numClonedBlock += Num; 01236 Num += serialPatternMatch(*HeadMBB->succ_begin()); 01237 Num += serialPatternMatch(*std::next(HeadMBB->succ_begin())); 01238 Num += ifPatternMatch(HeadMBB); 01239 assert(Num > 0); 01240 01241 break; 01242 } 01243 DEBUG( 01244 dbgs() << " not working\n"; 01245 ); 01246 DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr; 01247 } // walk down the postDomTree 01248 01249 return Num; 01250 } 01251 01252 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf( 01253 MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB, 01254 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) { 01255 dbgs() << "head = BB" << HeadMBB->getNumber() 01256 << " size = " << HeadMBB->size(); 01257 if (Detail) { 01258 dbgs() << "\n"; 01259 HeadMBB->print(dbgs()); 01260 dbgs() << "\n"; 01261 } 01262 01263 if (TrueMBB) { 01264 dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = " 01265 << TrueMBB->size() << " numPred = " << TrueMBB->pred_size(); 01266 if (Detail) { 01267 dbgs() << "\n"; 01268 TrueMBB->print(dbgs()); 01269 dbgs() << "\n"; 01270 } 01271 } 01272 if (FalseMBB) { 01273 dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = " 01274 << FalseMBB->size() << " numPred = " << FalseMBB->pred_size(); 01275 if (Detail) { 01276 dbgs() << "\n"; 01277 FalseMBB->print(dbgs()); 01278 dbgs() << "\n"; 01279 } 01280 } 01281 if (LandMBB) { 01282 dbgs() << ", land = BB" << LandMBB->getNumber() << " size = " 01283 << LandMBB->size() << " numPred = " << LandMBB->pred_size(); 01284 if (Detail) { 01285 dbgs() << "\n"; 01286 LandMBB->print(dbgs()); 01287 dbgs() << "\n"; 01288 } 01289 } 01290 01291 dbgs() << "\n"; 01292 } 01293 01294 int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 01295 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 01296 MachineBasicBlock **LandMBBPtr) { 01297 bool MigrateTrue = false; 01298 bool MigrateFalse = false; 01299 01300 MachineBasicBlock *LandBlk = *LandMBBPtr; 01301 01302 assert((!TrueMBB || TrueMBB->succ_size() <= 1) 01303 && (!FalseMBB || FalseMBB->succ_size() <= 1)); 01304 01305 if (TrueMBB == FalseMBB) 01306 return 0; 01307 01308 MigrateTrue = needMigrateBlock(TrueMBB); 01309 MigrateFalse = needMigrateBlock(FalseMBB); 01310 01311 if (!MigrateTrue && !MigrateFalse) 01312 return 0; 01313 01314 // If we need to migrate either trueBlk and falseBlk, migrate the rest that 01315 // have more than one predecessors. without doing this, its predecessor 01316 // rather than headBlk will have undefined value in initReg. 01317 if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1) 01318 MigrateTrue = true; 01319 if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1) 01320 MigrateFalse = true; 01321 01322 DEBUG( 01323 dbgs() << "before improveSimpleJumpintoIf: "; 01324 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); 01325 ); 01326 01327 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk 01328 // 01329 // new: headBlk => if () {initReg = 1; org trueBlk branch} else 01330 // {initReg = 0; org falseBlk branch } 01331 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} 01332 // => org landBlk 01333 // if landBlk->pred_size() > 2, put the about if-else inside 01334 // if (initReg !=2) {...} 01335 // 01336 // add initReg = initVal to headBlk 01337 01338 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 01339 if (!MigrateTrue || !MigrateFalse) { 01340 // XXX: We have an opportunity here to optimize the "branch into if" case 01341 // here. Branch into if looks like this: 01342 // entry 01343 // / | 01344 // diamond_head branch_from 01345 // / \ | 01346 // diamond_false diamond_true 01347 // \ / 01348 // done 01349 // 01350 // The diamond_head block begins the "if" and the diamond_true block 01351 // is the block being "branched into". 01352 // 01353 // If MigrateTrue is true, then TrueBB is the block being "branched into" 01354 // and if MigrateFalse is true, then FalseBB is the block being 01355 // "branched into" 01356 // 01357 // Here is the pseudo code for how I think the optimization should work: 01358 // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head. 01359 // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from. 01360 // 3. Move the branch instruction from diamond_head into its own basic 01361 // block (new_block). 01362 // 4. Add an unconditional branch from diamond_head to new_block 01363 // 5. Replace the branch instruction in branch_from with an unconditional 01364 // branch to new_block. If branch_from has multiple predecessors, then 01365 // we need to replace the True/False block in the branch 01366 // instruction instead of replacing it. 01367 // 6. Change the condition of the branch instruction in new_block from 01368 // COND to (COND || GPR0) 01369 // 01370 // In order insert these MOV instruction, we will need to use the 01371 // RegisterScavenger. Usually liveness stops being tracked during 01372 // the late machine optimization passes, however if we implement 01373 // bool TargetRegisterInfo::requiresRegisterScavenging( 01374 // const MachineFunction &MF) 01375 // and have it return true, liveness will be tracked correctly 01376 // by generic optimization passes. We will also need to make sure that 01377 // all of our target-specific passes that run after regalloc and before 01378 // the CFGStructurizer track liveness and we will need to modify this pass 01379 // to correctly track liveness. 01380 // 01381 // After the above changes, the new CFG should look like this: 01382 // entry 01383 // / | 01384 // diamond_head branch_from 01385 // \ / 01386 // new_block 01387 // / | 01388 // diamond_false diamond_true 01389 // \ / 01390 // done 01391 // 01392 // Without this optimization, we are forced to duplicate the diamond_true 01393 // block and we will end up with a CFG like this: 01394 // 01395 // entry 01396 // / | 01397 // diamond_head branch_from 01398 // / \ | 01399 // diamond_false diamond_true diamond_true (duplicate) 01400 // \ / | 01401 // done --------------------| 01402 // 01403 // Duplicating diamond_true can be very costly especially if it has a 01404 // lot of instructions. 01405 return 0; 01406 } 01407 01408 int NumNewBlk = 0; 01409 01410 bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); 01411 01412 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" 01413 MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF); 01414 01415 if (LandBlkHasOtherPred) { 01416 llvm_unreachable("Extra register needed to handle CFG"); 01417 unsigned CmpResReg = 01418 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 01419 llvm_unreachable("Extra compare instruction needed to handle CFG"); 01420 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, 01421 CmpResReg, DebugLoc()); 01422 } 01423 01424 // XXX: We are running this after RA, so creating virtual registers will 01425 // cause an assertion failure in the PostRA scheduling pass. 01426 unsigned InitReg = 01427 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 01428 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg, 01429 DebugLoc()); 01430 01431 if (MigrateTrue) { 01432 migrateInstruction(TrueMBB, LandBlk, I); 01433 // need to uncondionally insert the assignment to ensure a path from its 01434 // predecessor rather than headBlk has valid value in initReg if 01435 // (initVal != 1). 01436 llvm_unreachable("Extra register needed to handle CFG"); 01437 } 01438 insertInstrBefore(I, AMDGPU::ELSE); 01439 01440 if (MigrateFalse) { 01441 migrateInstruction(FalseMBB, LandBlk, I); 01442 // need to uncondionally insert the assignment to ensure a path from its 01443 // predecessor rather than headBlk has valid value in initReg if 01444 // (initVal != 0) 01445 llvm_unreachable("Extra register needed to handle CFG"); 01446 } 01447 01448 if (LandBlkHasOtherPred) { 01449 // add endif 01450 insertInstrBefore(I, AMDGPU::ENDIF); 01451 01452 // put initReg = 2 to other predecessors of landBlk 01453 for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(), 01454 PE = LandBlk->pred_end(); PI != PE; ++PI) { 01455 MachineBasicBlock *MBB = *PI; 01456 if (MBB != TrueMBB && MBB != FalseMBB) 01457 llvm_unreachable("Extra register needed to handle CFG"); 01458 } 01459 } 01460 DEBUG( 01461 dbgs() << "result from improveSimpleJumpintoIf: "; 01462 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); 01463 ); 01464 01465 // update landBlk 01466 *LandMBBPtr = LandBlk; 01467 01468 return NumNewBlk; 01469 } 01470 01471 void AMDGPUCFGStructurizer::handleLoopcontBlock(MachineBasicBlock *ContingMBB, 01472 MachineLoop *ContingLoop, MachineBasicBlock *ContMBB, 01473 MachineLoop *ContLoop) { 01474 DEBUG(dbgs() << "loopcontPattern cont = BB" << ContingMBB->getNumber() 01475 << " header = BB" << ContMBB->getNumber() << "\n"; 01476 dbgs() << "Trying to continue loop-depth = " 01477 << getLoopDepth(ContLoop) 01478 << " from loop-depth = " << getLoopDepth(ContingLoop) << "\n";); 01479 settleLoopcontBlock(ContingMBB, ContMBB); 01480 } 01481 01482 void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, 01483 MachineBasicBlock *SrcMBB) { 01484 DEBUG( 01485 dbgs() << "serialPattern BB" << DstMBB->getNumber() 01486 << " <= BB" << SrcMBB->getNumber() << "\n"; 01487 ); 01488 DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end()); 01489 01490 DstMBB->removeSuccessor(SrcMBB); 01491 cloneSuccessorList(DstMBB, SrcMBB); 01492 01493 removeSuccessor(SrcMBB); 01494 MLI->removeBlock(SrcMBB); 01495 retireBlock(SrcMBB); 01496 } 01497 01498 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, 01499 MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 01500 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { 01501 assert (TrueMBB); 01502 DEBUG( 01503 dbgs() << "ifPattern BB" << MBB->getNumber(); 01504 dbgs() << "{ "; 01505 if (TrueMBB) { 01506 dbgs() << "BB" << TrueMBB->getNumber(); 01507 } 01508 dbgs() << " } else "; 01509 dbgs() << "{ "; 01510 if (FalseMBB) { 01511 dbgs() << "BB" << FalseMBB->getNumber(); 01512 } 01513 dbgs() << " }\n "; 01514 dbgs() << "landBlock: "; 01515 if (!LandMBB) { 01516 dbgs() << "NULL"; 01517 } else { 01518 dbgs() << "BB" << LandMBB->getNumber(); 01519 } 01520 dbgs() << "\n"; 01521 ); 01522 01523 int OldOpcode = BranchMI->getOpcode(); 01524 DebugLoc BranchDL = BranchMI->getDebugLoc(); 01525 01526 // transform to 01527 // if cond 01528 // trueBlk 01529 // else 01530 // falseBlk 01531 // endif 01532 // landBlk 01533 01534 MachineBasicBlock::iterator I = BranchMI; 01535 insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode), 01536 BranchDL); 01537 01538 if (TrueMBB) { 01539 MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end()); 01540 MBB->removeSuccessor(TrueMBB); 01541 if (LandMBB && TrueMBB->succ_size()!=0) 01542 TrueMBB->removeSuccessor(LandMBB); 01543 retireBlock(TrueMBB); 01544 MLI->removeBlock(TrueMBB); 01545 } 01546 01547 if (FalseMBB) { 01548 insertInstrBefore(I, AMDGPU::ELSE); 01549 MBB->splice(I, FalseMBB, FalseMBB->begin(), 01550 FalseMBB->end()); 01551 MBB->removeSuccessor(FalseMBB); 01552 if (LandMBB && FalseMBB->succ_size() != 0) 01553 FalseMBB->removeSuccessor(LandMBB); 01554 retireBlock(FalseMBB); 01555 MLI->removeBlock(FalseMBB); 01556 } 01557 insertInstrBefore(I, AMDGPU::ENDIF); 01558 01559 BranchMI->eraseFromParent(); 01560 01561 if (LandMBB && TrueMBB && FalseMBB) 01562 MBB->addSuccessor(LandMBB); 01563 01564 } 01565 01566 void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, 01567 MachineBasicBlock *LandMBB) { 01568 DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() 01569 << " land = BB" << LandMBB->getNumber() << "\n";); 01570 01571 insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc()); 01572 insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc()); 01573 DstBlk->addSuccessor(LandMBB); 01574 DstBlk->removeSuccessor(DstBlk); 01575 } 01576 01577 01578 void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 01579 MachineBasicBlock *LandMBB) { 01580 DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber() 01581 << " land = BB" << LandMBB->getNumber() << "\n";); 01582 MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB); 01583 assert(BranchMI && isCondBranch(BranchMI)); 01584 DebugLoc DL = BranchMI->getDebugLoc(); 01585 MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI); 01586 MachineBasicBlock::iterator I = BranchMI; 01587 if (TrueBranch != LandMBB) 01588 reversePredicateSetter(I); 01589 insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL); 01590 insertInstrBefore(I, AMDGPU::BREAK); 01591 insertInstrBefore(I, AMDGPU::ENDIF); 01592 //now branchInst can be erase safely 01593 BranchMI->eraseFromParent(); 01594 //now take care of successors, retire blocks 01595 ExitingMBB->removeSuccessor(LandMBB); 01596 } 01597 01598 void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, 01599 MachineBasicBlock *ContMBB) { 01600 DEBUG(dbgs() << "settleLoopcontBlock conting = BB" 01601 << ContingMBB->getNumber() 01602 << ", cont = BB" << ContMBB->getNumber() << "\n";); 01603 01604 MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB); 01605 if (MI) { 01606 assert(isCondBranch(MI)); 01607 MachineBasicBlock::iterator I = MI; 01608 MachineBasicBlock *TrueBranch = getTrueBranch(MI); 01609 int OldOpcode = MI->getOpcode(); 01610 DebugLoc DL = MI->getDebugLoc(); 01611 01612 bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI); 01613 01614 if (UseContinueLogical == false) { 01615 int BranchOpcode = 01616 TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) : 01617 getBranchZeroOpcode(OldOpcode); 01618 insertCondBranchBefore(I, BranchOpcode, DL); 01619 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 01620 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL); 01621 insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL); 01622 } else { 01623 int BranchOpcode = 01624 TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : 01625 getContinueZeroOpcode(OldOpcode); 01626 insertCondBranchBefore(I, BranchOpcode, DL); 01627 } 01628 01629 MI->eraseFromParent(); 01630 } else { 01631 // if we've arrived here then we've already erased the branch instruction 01632 // travel back up the basic block to see the last reference of our debug 01633 // location we've just inserted that reference here so it should be 01634 // representative insertEnd to ensure phi-moves, if exist, go before the 01635 // continue-instr. 01636 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, 01637 getLastDebugLocInBB(ContingMBB)); 01638 } 01639 } 01640 01641 int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 01642 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) { 01643 int Cloned = 0; 01644 assert(PreMBB->isSuccessor(SrcMBB)); 01645 while (SrcMBB && SrcMBB != DstMBB) { 01646 assert(SrcMBB->succ_size() == 1); 01647 if (SrcMBB->pred_size() > 1) { 01648 SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB); 01649 ++Cloned; 01650 } 01651 01652 PreMBB = SrcMBB; 01653 SrcMBB = *SrcMBB->succ_begin(); 01654 } 01655 01656 return Cloned; 01657 } 01658 01659 MachineBasicBlock * 01660 AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, 01661 MachineBasicBlock *PredMBB) { 01662 assert(PredMBB->isSuccessor(MBB) && 01663 "succBlk is not a prececessor of curBlk"); 01664 01665 MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions 01666 replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB); 01667 //srcBlk, oldBlk, newBlk 01668 01669 PredMBB->removeSuccessor(MBB); 01670 PredMBB->addSuccessor(CloneMBB); 01671 01672 // add all successor to cloneBlk 01673 cloneSuccessorList(CloneMBB, MBB); 01674 01675 numClonedInstr += MBB->size(); 01676 01677 DEBUG( 01678 dbgs() << "Cloned block: " << "BB" 01679 << MBB->getNumber() << "size " << MBB->size() << "\n"; 01680 ); 01681 01682 SHOWNEWBLK(CloneMBB, "result of Cloned block: "); 01683 01684 return CloneMBB; 01685 } 01686 01687 void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, 01688 MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) { 01689 MachineBasicBlock::iterator SpliceEnd; 01690 //look for the input branchinstr, not the AMDGPU branchinstr 01691 MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB); 01692 if (!BranchMI) { 01693 DEBUG( 01694 dbgs() << "migrateInstruction don't see branch instr\n" ; 01695 ); 01696 SpliceEnd = SrcMBB->end(); 01697 } else { 01698 DEBUG( 01699 dbgs() << "migrateInstruction see branch instr\n" ; 01700 BranchMI->dump(); 01701 ); 01702 SpliceEnd = BranchMI; 01703 } 01704 DEBUG( 01705 dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size() 01706 << "srcSize = " << SrcMBB->size() << "\n"; 01707 ); 01708 01709 //splice insert before insertPos 01710 DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd); 01711 01712 DEBUG( 01713 dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size() 01714 << "srcSize = " << SrcMBB->size() << "\n"; 01715 ); 01716 } 01717 01718 MachineBasicBlock * 01719 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { 01720 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 01721 MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch(); 01722 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 01723 01724 if (!LoopHeader || !LoopLatch) 01725 return nullptr; 01726 MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch); 01727 // Is LoopRep an infinite loop ? 01728 if (!BranchMI || !isUncondBranch(BranchMI)) 01729 return nullptr; 01730 01731 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 01732 FuncRep->push_back(DummyExitBlk); //insert to function 01733 SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); 01734 DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); 01735 MachineBasicBlock::iterator I = BranchMI; 01736 unsigned ImmReg = FuncRep->getRegInfo().createVirtualRegister(I32RC); 01737 llvm_unreachable("Extra register needed to handle CFG"); 01738 MachineInstr *NewMI = insertInstrBefore(I, AMDGPU::BRANCH_COND_i32); 01739 MachineInstrBuilder MIB(*FuncRep, NewMI); 01740 MIB.addMBB(LoopHeader); 01741 MIB.addReg(ImmReg, false); 01742 SHOWNEWINSTR(NewMI); 01743 BranchMI->eraseFromParent(); 01744 LoopLatch->addSuccessor(DummyExitBlk); 01745 01746 return DummyExitBlk; 01747 } 01748 01749 void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { 01750 MachineInstr *BranchMI; 01751 01752 // I saw two unconditional branch in one basic block in example 01753 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. 01754 while ((BranchMI = getLoopendBlockBranchInstr(MBB)) 01755 && isUncondBranch(BranchMI)) { 01756 DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump();); 01757 BranchMI->eraseFromParent(); 01758 } 01759 } 01760 01761 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch( 01762 MachineBasicBlock *MBB) { 01763 if (MBB->succ_size() != 2) 01764 return; 01765 MachineBasicBlock *MBB1 = *MBB->succ_begin(); 01766 MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin()); 01767 if (MBB1 != MBB2) 01768 return; 01769 01770 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 01771 assert(BranchMI && isCondBranch(BranchMI)); 01772 DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump();); 01773 BranchMI->eraseFromParent(); 01774 SHOWNEWBLK(MBB1, "Removing redundant successor"); 01775 MBB->removeSuccessor(MBB1); 01776 } 01777 01778 void AMDGPUCFGStructurizer::addDummyExitBlock( 01779 SmallVectorImpl<MachineBasicBlock*> &RetMBB) { 01780 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 01781 FuncRep->push_back(DummyExitBlk); //insert to function 01782 insertInstrEnd(DummyExitBlk, AMDGPU::RETURN); 01783 01784 for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(), 01785 E = RetMBB.end(); It != E; ++It) { 01786 MachineBasicBlock *MBB = *It; 01787 MachineInstr *MI = getReturnInstr(MBB); 01788 if (MI) 01789 MI->eraseFromParent(); 01790 MBB->addSuccessor(DummyExitBlk); 01791 DEBUG( 01792 dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() 01793 << " successors\n"; 01794 ); 01795 } 01796 SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: "); 01797 } 01798 01799 void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) { 01800 while (MBB->succ_size()) 01801 MBB->removeSuccessor(*MBB->succ_begin()); 01802 } 01803 01804 void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB, 01805 int SccNum) { 01806 BlockInformation *&srcBlkInfo = BlockInfoMap[MBB]; 01807 if (!srcBlkInfo) 01808 srcBlkInfo = new BlockInformation(); 01809 srcBlkInfo->SccNum = SccNum; 01810 } 01811 01812 void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { 01813 DEBUG( 01814 dbgs() << "Retiring BB" << MBB->getNumber() << "\n"; 01815 ); 01816 01817 BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB]; 01818 01819 if (!SrcBlkInfo) 01820 SrcBlkInfo = new BlockInformation(); 01821 01822 SrcBlkInfo->IsRetired = true; 01823 assert(MBB->succ_size() == 0 && MBB->pred_size() == 0 01824 && "can't retire block yet"); 01825 } 01826 01827 void AMDGPUCFGStructurizer::setLoopLandBlock(MachineLoop *loopRep, 01828 MachineBasicBlock *MBB) { 01829 MachineBasicBlock *&TheEntry = LLInfoMap[loopRep]; 01830 if (!MBB) { 01831 MBB = FuncRep->CreateMachineBasicBlock(); 01832 FuncRep->push_back(MBB); //insert to function 01833 SHOWNEWBLK(MBB, "DummyLandingBlock for loop without break: "); 01834 } 01835 TheEntry = MBB; 01836 DEBUG( 01837 dbgs() << "setLoopLandBlock loop-header = BB" 01838 << loopRep->getHeader()->getNumber() 01839 << " landing-block = BB" << MBB->getNumber() << "\n"; 01840 ); 01841 } 01842 01843 MachineBasicBlock * 01844 AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1, 01845 MachineBasicBlock *MBB2) { 01846 01847 if (PDT->dominates(MBB1, MBB2)) 01848 return MBB1; 01849 if (PDT->dominates(MBB2, MBB1)) 01850 return MBB2; 01851 01852 MachineDomTreeNode *Node1 = PDT->getNode(MBB1); 01853 MachineDomTreeNode *Node2 = PDT->getNode(MBB2); 01854 01855 // Handle newly cloned node. 01856 if (!Node1 && MBB1->succ_size() == 1) 01857 return findNearestCommonPostDom(*MBB1->succ_begin(), MBB2); 01858 if (!Node2 && MBB2->succ_size() == 1) 01859 return findNearestCommonPostDom(MBB1, *MBB2->succ_begin()); 01860 01861 if (!Node1 || !Node2) 01862 return nullptr; 01863 01864 Node1 = Node1->getIDom(); 01865 while (Node1) { 01866 if (PDT->dominates(Node1, Node2)) 01867 return Node1->getBlock(); 01868 Node1 = Node1->getIDom(); 01869 } 01870 01871 return nullptr; 01872 } 01873 01874 MachineBasicBlock * 01875 AMDGPUCFGStructurizer::findNearestCommonPostDom( 01876 std::set<MachineBasicBlock *> &MBBs) { 01877 MachineBasicBlock *CommonDom; 01878 std::set<MachineBasicBlock *>::const_iterator It = MBBs.begin(); 01879 std::set<MachineBasicBlock *>::const_iterator E = MBBs.end(); 01880 for (CommonDom = *It; It != E && CommonDom; ++It) { 01881 MachineBasicBlock *MBB = *It; 01882 if (MBB != CommonDom) 01883 CommonDom = findNearestCommonPostDom(MBB, CommonDom); 01884 } 01885 01886 DEBUG( 01887 dbgs() << "Common post dominator for exit blocks is "; 01888 if (CommonDom) 01889 dbgs() << "BB" << CommonDom->getNumber() << "\n"; 01890 else 01891 dbgs() << "NULL\n"; 01892 ); 01893 01894 return CommonDom; 01895 } 01896 01897 char AMDGPUCFGStructurizer::ID = 0; 01898 01899 } // end anonymous namespace 01900 01901 01902 INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer", 01903 "AMDGPU CFG Structurizer", false, false) 01904 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 01905 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) 01906 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 01907 INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer", 01908 "AMDGPU CFG Structurizer", false, false) 01909 01910 FunctionPass *llvm::createAMDGPUCFGStructurizerPass() { 01911 return new AMDGPUCFGStructurizer(); 01912 }