LLVM API Documentation

AMDILCFGStructurizer.cpp
Go to the documentation of this file.
00001 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 /// \file
00009 //==-----------------------------------------------------------------------===//
00010 
00011 #include "AMDGPU.h"
00012 #include "AMDGPUInstrInfo.h"
00013 #include "R600InstrInfo.h"
00014 #include "AMDGPUSubtarget.h"
00015 #include "llvm/ADT/DepthFirstIterator.h"
00016 #include "llvm/ADT/SCCIterator.h"
00017 #include "llvm/ADT/SmallVector.h"
00018 #include "llvm/ADT/Statistic.h"
00019 #include "llvm/CodeGen/MachineDominators.h"
00020 #include "llvm/CodeGen/MachineFunction.h"
00021 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
00022 #include "llvm/CodeGen/MachineFunctionPass.h"
00023 #include "llvm/CodeGen/MachineInstrBuilder.h"
00024 #include "llvm/CodeGen/MachineJumpTableInfo.h"
00025 #include "llvm/CodeGen/MachineLoopInfo.h"
00026 #include "llvm/CodeGen/MachinePostDominators.h"
00027 #include "llvm/CodeGen/MachineRegisterInfo.h"
00028 #include "llvm/IR/Dominators.h"
00029 #include "llvm/Support/Debug.h"
00030 #include "llvm/Support/raw_ostream.h"
00031 #include "llvm/Target/TargetInstrInfo.h"
00032 #include "llvm/Target/TargetMachine.h"
00033 
00034 using namespace llvm;
00035 
00036 #define DEBUG_TYPE "structcfg"
00037 
00038 #define DEFAULT_VEC_SLOTS 8
00039 
00040 // TODO: move-begin.
00041 
00042 //===----------------------------------------------------------------------===//
00043 //
00044 // Statistics for CFGStructurizer.
00045 //
00046 //===----------------------------------------------------------------------===//
00047 
00048 STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
00049     "matched");
00050 STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
00051     "matched");
00052 STATISTIC(numLoopcontPatternMatch,  "CFGStructurizer number of loop-continue "
00053     "pattern matched");
00054 STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
00055 STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
00056 
00057 namespace llvm {
00058   void initializeAMDGPUCFGStructurizerPass(PassRegistry&);
00059 }
00060 
00061 //===----------------------------------------------------------------------===//
00062 //
00063 // Miscellaneous utility for CFGStructurizer.
00064 //
00065 //===----------------------------------------------------------------------===//
00066 namespace {
00067 #define SHOWNEWINSTR(i) \
00068   DEBUG(dbgs() << "New instr: " << *i << "\n");
00069 
00070 #define SHOWNEWBLK(b, msg) \
00071 DEBUG( \
00072   dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
00073   dbgs() << "\n"; \
00074 );
00075 
00076 #define SHOWBLK_DETAIL(b, msg) \
00077 DEBUG( \
00078   if (b) { \
00079   dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
00080   b->print(dbgs()); \
00081   dbgs() << "\n"; \
00082   } \
00083 );
00084 
00085 #define INVALIDSCCNUM -1
00086 
00087 template<class NodeT>
00088 void ReverseVector(SmallVectorImpl<NodeT *> &Src) {
00089   size_t sz = Src.size();
00090   for (size_t i = 0; i < sz/2; ++i) {
00091     NodeT *t = Src[i];
00092     Src[i] = Src[sz - i - 1];
00093     Src[sz - i - 1] = t;
00094   }
00095 }
00096 
00097 } // end anonymous namespace
00098 
00099 //===----------------------------------------------------------------------===//
00100 //
00101 // supporting data structure for CFGStructurizer
00102 //
00103 //===----------------------------------------------------------------------===//
00104 
00105 
00106 namespace {
00107 
00108 class BlockInformation {
00109 public:
00110   bool IsRetired;
00111   int  SccNum;
00112   BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {}
00113 };
00114 
00115 } // end anonymous namespace
00116 
00117 //===----------------------------------------------------------------------===//
00118 //
00119 // CFGStructurizer
00120 //
00121 //===----------------------------------------------------------------------===//
00122 
00123 namespace {
00124 class AMDGPUCFGStructurizer : public MachineFunctionPass {
00125 public:
00126   typedef SmallVector<MachineBasicBlock *, 32> MBBVector;
00127   typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
00128   typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
00129 
00130   enum PathToKind {
00131     Not_SinglePath = 0,
00132     SinglePath_InPath = 1,
00133     SinglePath_NotInPath = 2
00134   };
00135 
00136   static char ID;
00137 
00138   AMDGPUCFGStructurizer() :
00139       MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {
00140     initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
00141   }
00142 
00143    const char *getPassName() const override {
00144     return "AMDGPU Control Flow Graph structurizer Pass";
00145   }
00146 
00147   void getAnalysisUsage(AnalysisUsage &AU) const override {
00148     AU.addPreserved<MachineFunctionAnalysis>();
00149     AU.addRequired<MachineFunctionAnalysis>();
00150     AU.addRequired<MachineDominatorTree>();
00151     AU.addRequired<MachinePostDominatorTree>();
00152     AU.addRequired<MachineLoopInfo>();
00153   }
00154 
00155   /// Perform the CFG structurization
00156   bool run();
00157 
00158   /// Perform the CFG preparation
00159   /// This step will remove every unconditionnal/dead jump instructions and make
00160   /// sure all loops have an exit block
00161   bool prepare();
00162 
00163   bool runOnMachineFunction(MachineFunction &MF) override {
00164     TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
00165     TRI = &TII->getRegisterInfo();
00166     DEBUG(MF.dump(););
00167     OrderedBlks.clear();
00168     FuncRep = &MF;
00169     MLI = &getAnalysis<MachineLoopInfo>();
00170     DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
00171     MDT = &getAnalysis<MachineDominatorTree>();
00172     DEBUG(MDT->print(dbgs(), (const llvm::Module*)nullptr););
00173     PDT = &getAnalysis<MachinePostDominatorTree>();
00174     DEBUG(PDT->print(dbgs()););
00175     prepare();
00176     run();
00177     DEBUG(MF.dump(););
00178     return true;
00179   }
00180 
00181 protected:
00182   MachineDominatorTree *MDT;
00183   MachinePostDominatorTree *PDT;
00184   MachineLoopInfo *MLI;
00185   const R600InstrInfo *TII;
00186   const AMDGPURegisterInfo *TRI;
00187 
00188   // PRINT FUNCTIONS
00189   /// Print the ordered Blocks.
00190   void printOrderedBlocks() const {
00191     size_t i = 0;
00192     for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
00193         iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
00194       dbgs() << "BB" << (*iterBlk)->getNumber();
00195       dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
00196       if (i != 0 && i % 10 == 0) {
00197         dbgs() << "\n";
00198       } else {
00199         dbgs() << " ";
00200       }
00201     }
00202   }
00203   static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
00204     for (MachineLoop::iterator iter = LoopInfo.begin(),
00205          iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
00206       (*iter)->print(dbgs(), 0);
00207     }
00208   }
00209 
00210   // UTILITY FUNCTIONS
00211   int getSCCNum(MachineBasicBlock *MBB) const;
00212   MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
00213   bool hasBackEdge(MachineBasicBlock *MBB) const;
00214   static unsigned getLoopDepth(MachineLoop *LoopRep);
00215   bool isRetiredBlock(MachineBasicBlock *MBB) const;
00216   bool isActiveLoophead(MachineBasicBlock *MBB) const;
00217   PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
00218       bool AllowSideEntry = true) const;
00219   int countActiveBlock(MBBVector::const_iterator It,
00220       MBBVector::const_iterator E) const;
00221   bool needMigrateBlock(MachineBasicBlock *MBB) const;
00222 
00223   // Utility Functions
00224   void reversePredicateSetter(MachineBasicBlock::iterator I);
00225   /// Compute the reversed DFS post order of Blocks
00226   void orderBlocks(MachineFunction *MF);
00227 
00228   // Function originally from CFGStructTraits
00229   void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
00230       DebugLoc DL = DebugLoc());
00231   MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
00232     DebugLoc DL = DebugLoc());
00233   MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
00234   void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
00235       DebugLoc DL);
00236   void insertCondBranchBefore(MachineBasicBlock *MBB,
00237       MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
00238       DebugLoc DL);
00239   void insertCondBranchEnd(MachineBasicBlock *MBB, int NewOpcode, int RegNum);
00240   static int getBranchNzeroOpcode(int OldOpcode);
00241   static int getBranchZeroOpcode(int OldOpcode);
00242   static int getContinueNzeroOpcode(int OldOpcode);
00243   static int getContinueZeroOpcode(int OldOpcode);
00244   static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
00245   static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
00246   static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
00247       MachineInstr *MI);
00248   static bool isCondBranch(MachineInstr *MI);
00249   static bool isUncondBranch(MachineInstr *MI);
00250   static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
00251   static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
00252   /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
00253   ///
00254   /// BB with backward-edge could have move instructions after the branch
00255   /// instruction.  Such move instruction "belong to" the loop backward-edge.
00256   MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
00257   static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
00258   static MachineInstr *getContinueInstr(MachineBasicBlock *MBB);
00259   static bool isReturnBlock(MachineBasicBlock *MBB);
00260   static void cloneSuccessorList(MachineBasicBlock *DstMBB,
00261       MachineBasicBlock *SrcMBB) ;
00262   static MachineBasicBlock *clone(MachineBasicBlock *MBB);
00263   /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
00264   /// because the AMDGPU instruction is not recognized as terminator fix this
00265   /// and retire this routine
00266   void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
00267       MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
00268   static void wrapup(MachineBasicBlock *MBB);
00269 
00270 
00271   int patternMatch(MachineBasicBlock *MBB);
00272   int patternMatchGroup(MachineBasicBlock *MBB);
00273   int serialPatternMatch(MachineBasicBlock *MBB);
00274   int ifPatternMatch(MachineBasicBlock *MBB);
00275   int loopendPatternMatch();
00276   int mergeLoop(MachineLoop *LoopRep);
00277   int loopcontPatternMatch(MachineLoop *LoopRep, MachineBasicBlock *LoopHeader);
00278 
00279   void handleLoopcontBlock(MachineBasicBlock *ContingMBB,
00280       MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
00281       MachineLoop *ContLoop);
00282   /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
00283   /// the same loop with LoopLandInfo without explicitly keeping track of
00284   /// loopContBlks and loopBreakBlks, this is a method to get the information.
00285   bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
00286       MachineBasicBlock *Src2MBB);
00287   int handleJumpintoIf(MachineBasicBlock *HeadMBB,
00288       MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
00289   int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
00290       MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
00291   int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
00292       MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
00293       MachineBasicBlock **LandMBBPtr);
00294   void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
00295       MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
00296       MachineBasicBlock *LandMBB, bool Detail = false);
00297   int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
00298       MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
00299   void mergeSerialBlock(MachineBasicBlock *DstMBB,
00300       MachineBasicBlock *SrcMBB);
00301 
00302   void mergeIfthenelseBlock(MachineInstr *BranchMI,
00303       MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
00304       MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
00305   void mergeLooplandBlock(MachineBasicBlock *DstMBB,
00306       MachineBasicBlock *LandMBB);
00307   void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
00308       MachineBasicBlock *LandMBB);
00309   void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
00310       MachineBasicBlock *ContMBB);
00311   /// normalizeInfiniteLoopExit change
00312   ///   B1:
00313   ///        uncond_br LoopHeader
00314   ///
00315   /// to
00316   ///   B1:
00317   ///        cond_br 1 LoopHeader dummyExit
00318   /// and return the newly added dummy exit block
00319   MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
00320   void removeUnconditionalBranch(MachineBasicBlock *MBB);
00321   /// Remove duplicate branches instructions in a block.
00322   /// For instance
00323   /// B0:
00324   ///    cond_br X B1 B2
00325   ///    cond_br X B1 B2
00326   /// is transformed to
00327   /// B0:
00328   ///    cond_br X B1 B2
00329   void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
00330   void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
00331   void removeSuccessor(MachineBasicBlock *MBB);
00332   MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
00333       MachineBasicBlock *PredMBB);
00334   void migrateInstruction(MachineBasicBlock *SrcMBB,
00335       MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
00336   void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
00337   void retireBlock(MachineBasicBlock *MBB);
00338   void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = nullptr);
00339 
00340   MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&);
00341   /// This is work around solution for findNearestCommonDominator not available
00342   /// to post dom a proper fix should go to Dominators.h.
00343   MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1,
00344       MachineBasicBlock *MBB2);
00345 
00346 private:
00347   MBBInfoMap BlockInfoMap;
00348   LoopLandInfoMap LLInfoMap;
00349   std::map<MachineLoop *, bool> Visited;
00350   MachineFunction *FuncRep;
00351   SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
00352 };
00353 
00354 int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
00355   MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
00356   if (It == BlockInfoMap.end())
00357     return INVALIDSCCNUM;
00358   return (*It).second->SccNum;
00359 }
00360 
00361 MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
00362     const {
00363   LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
00364   if (It == LLInfoMap.end())
00365     return nullptr;
00366   return (*It).second;
00367 }
00368 
00369 bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
00370   MachineLoop *LoopRep = MLI->getLoopFor(MBB);
00371   if (!LoopRep)
00372     return false;
00373   MachineBasicBlock *LoopHeader = LoopRep->getHeader();
00374   return MBB->isSuccessor(LoopHeader);
00375 }
00376 
00377 unsigned AMDGPUCFGStructurizer::getLoopDepth(MachineLoop *LoopRep) {
00378   return LoopRep ? LoopRep->getLoopDepth() : 0;
00379 }
00380 
00381 bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
00382   MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
00383   if (It == BlockInfoMap.end())
00384     return false;
00385   return (*It).second->IsRetired;
00386 }
00387 
00388 bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
00389   MachineLoop *LoopRep = MLI->getLoopFor(MBB);
00390   while (LoopRep && LoopRep->getHeader() == MBB) {
00391     MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
00392     if(!LoopLand)
00393       return true;
00394     if (!isRetiredBlock(LoopLand))
00395       return true;
00396     LoopRep = LoopRep->getParentLoop();
00397   }
00398   return false;
00399 }
00400 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
00401     MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
00402     bool AllowSideEntry) const {
00403   assert(DstMBB);
00404   if (SrcMBB == DstMBB)
00405     return SinglePath_InPath;
00406   while (SrcMBB && SrcMBB->succ_size() == 1) {
00407     SrcMBB = *SrcMBB->succ_begin();
00408     if (SrcMBB == DstMBB)
00409       return SinglePath_InPath;
00410     if (!AllowSideEntry && SrcMBB->pred_size() > 1)
00411       return Not_SinglePath;
00412   }
00413   if (SrcMBB && SrcMBB->succ_size()==0)
00414     return SinglePath_NotInPath;
00415   return Not_SinglePath;
00416 }
00417 
00418 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
00419     MBBVector::const_iterator E) const {
00420   int Count = 0;
00421   while (It != E) {
00422     if (!isRetiredBlock(*It))
00423       ++Count;
00424     ++It;
00425   }
00426   return Count;
00427 }
00428 
00429 bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
00430   unsigned BlockSizeThreshold = 30;
00431   unsigned CloneInstrThreshold = 100;
00432   bool MultiplePreds = MBB && (MBB->pred_size() > 1);
00433 
00434   if(!MultiplePreds)
00435     return false;
00436   unsigned BlkSize = MBB->size();
00437   return ((BlkSize > BlockSizeThreshold) &&
00438       (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
00439 }
00440 
00441 void AMDGPUCFGStructurizer::reversePredicateSetter(
00442     MachineBasicBlock::iterator I) {
00443   while (I--) {
00444     if (I->getOpcode() == AMDGPU::PRED_X) {
00445       switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
00446       case OPCODE_IS_ZERO_INT:
00447         static_cast<MachineInstr *>(I)->getOperand(2)
00448             .setImm(OPCODE_IS_NOT_ZERO_INT);
00449         return;
00450       case OPCODE_IS_NOT_ZERO_INT:
00451         static_cast<MachineInstr *>(I)->getOperand(2)
00452             .setImm(OPCODE_IS_ZERO_INT);
00453         return;
00454       case OPCODE_IS_ZERO:
00455         static_cast<MachineInstr *>(I)->getOperand(2)
00456             .setImm(OPCODE_IS_NOT_ZERO);
00457         return;
00458       case OPCODE_IS_NOT_ZERO:
00459         static_cast<MachineInstr *>(I)->getOperand(2)
00460             .setImm(OPCODE_IS_ZERO);
00461         return;
00462       default:
00463         llvm_unreachable("PRED_X Opcode invalid!");
00464       }
00465     }
00466   }
00467 }
00468 
00469 void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
00470     int NewOpcode, DebugLoc DL) {
00471  MachineInstr *MI = MBB->getParent()
00472     ->CreateMachineInstr(TII->get(NewOpcode), DL);
00473   MBB->push_back(MI);
00474   //assume the instruction doesn't take any reg operand ...
00475   SHOWNEWINSTR(MI);
00476 }
00477 
00478 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
00479     int NewOpcode, DebugLoc DL) {
00480   MachineInstr *MI =
00481       MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
00482   if (MBB->begin() != MBB->end())
00483     MBB->insert(MBB->begin(), MI);
00484   else
00485     MBB->push_back(MI);
00486   SHOWNEWINSTR(MI);
00487   return MI;
00488 }
00489 
00490 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
00491     MachineBasicBlock::iterator I, int NewOpcode) {
00492   MachineInstr *OldMI = &(*I);
00493   MachineBasicBlock *MBB = OldMI->getParent();
00494   MachineInstr *NewMBB =
00495       MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
00496   MBB->insert(I, NewMBB);
00497   //assume the instruction doesn't take any reg operand ...
00498   SHOWNEWINSTR(NewMBB);
00499   return NewMBB;
00500 }
00501 
00502 void AMDGPUCFGStructurizer::insertCondBranchBefore(
00503     MachineBasicBlock::iterator I, int NewOpcode, DebugLoc DL) {
00504   MachineInstr *OldMI = &(*I);
00505   MachineBasicBlock *MBB = OldMI->getParent();
00506   MachineFunction *MF = MBB->getParent();
00507   MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
00508   MBB->insert(I, NewMI);
00509   MachineInstrBuilder MIB(*MF, NewMI);
00510   MIB.addReg(OldMI->getOperand(1).getReg(), false);
00511   SHOWNEWINSTR(NewMI);
00512   //erase later oldInstr->eraseFromParent();
00513 }
00514 
00515 void AMDGPUCFGStructurizer::insertCondBranchBefore(MachineBasicBlock *blk,
00516     MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
00517     DebugLoc DL) {
00518   MachineFunction *MF = blk->getParent();
00519   MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
00520   //insert before
00521   blk->insert(I, NewInstr);
00522   MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
00523   SHOWNEWINSTR(NewInstr);
00524 }
00525 
00526 void AMDGPUCFGStructurizer::insertCondBranchEnd(MachineBasicBlock *MBB,
00527     int NewOpcode, int RegNum) {
00528   MachineFunction *MF = MBB->getParent();
00529   MachineInstr *NewInstr =
00530     MF->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
00531   MBB->push_back(NewInstr);
00532   MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
00533   SHOWNEWINSTR(NewInstr);
00534 }
00535 
00536 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
00537   switch(OldOpcode) {
00538   case AMDGPU::JUMP_COND:
00539   case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
00540   case AMDGPU::BRANCH_COND_i32:
00541   case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
00542   default: llvm_unreachable("internal error");
00543   }
00544   return -1;
00545 }
00546 
00547 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
00548   switch(OldOpcode) {
00549   case AMDGPU::JUMP_COND:
00550   case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
00551   case AMDGPU::BRANCH_COND_i32:
00552   case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
00553   default: llvm_unreachable("internal error");
00554   }
00555   return -1;
00556 }
00557 
00558 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
00559   switch(OldOpcode) {
00560   case AMDGPU::JUMP_COND:
00561   case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
00562   default: llvm_unreachable("internal error");
00563   };
00564   return -1;
00565 }
00566 
00567 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
00568   switch(OldOpcode) {
00569   case AMDGPU::JUMP_COND:
00570   case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
00571   default: llvm_unreachable("internal error");
00572   }
00573   return -1;
00574 }
00575 
00576 MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
00577   return MI->getOperand(0).getMBB();
00578 }
00579 
00580 void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
00581     MachineBasicBlock *MBB) {
00582   MI->getOperand(0).setMBB(MBB);
00583 }
00584 
00585 MachineBasicBlock *
00586 AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
00587     MachineInstr *MI) {
00588   assert(MBB->succ_size() == 2);
00589   MachineBasicBlock *TrueBranch = getTrueBranch(MI);
00590   MachineBasicBlock::succ_iterator It = MBB->succ_begin();
00591   MachineBasicBlock::succ_iterator Next = It;
00592   ++Next;
00593   return (*It == TrueBranch) ? *Next : *It;
00594 }
00595 
00596 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
00597   switch (MI->getOpcode()) {
00598     case AMDGPU::JUMP_COND:
00599     case AMDGPU::BRANCH_COND_i32:
00600     case AMDGPU::BRANCH_COND_f32: return true;
00601   default:
00602     return false;
00603   }
00604   return false;
00605 }
00606 
00607 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
00608   switch (MI->getOpcode()) {
00609   case AMDGPU::JUMP:
00610   case AMDGPU::BRANCH:
00611     return true;
00612   default:
00613     return false;
00614   }
00615   return false;
00616 }
00617 
00618 DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
00619   //get DebugLoc from the first MachineBasicBlock instruction with debug info
00620   DebugLoc DL;
00621   for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
00622       ++It) {
00623     MachineInstr *instr = &(*It);
00624     if (instr->getDebugLoc().isUnknown() == false)
00625       DL = instr->getDebugLoc();
00626   }
00627   return DL;
00628 }
00629 
00630 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
00631     MachineBasicBlock *MBB) {
00632   MachineBasicBlock::reverse_iterator It = MBB->rbegin();
00633   MachineInstr *MI = &*It;
00634   if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
00635     return MI;
00636   return nullptr;
00637 }
00638 
00639 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
00640     MachineBasicBlock *MBB) {
00641   for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
00642       It != E; ++It) {
00643     // FIXME: Simplify
00644     MachineInstr *MI = &*It;
00645     if (MI) {
00646       if (isCondBranch(MI) || isUncondBranch(MI))
00647         return MI;
00648       else if (!TII->isMov(MI->getOpcode()))
00649         break;
00650     }
00651   }
00652   return nullptr;
00653 }
00654 
00655 MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
00656   MachineBasicBlock::reverse_iterator It = MBB->rbegin();
00657   if (It != MBB->rend()) {
00658     MachineInstr *instr = &(*It);
00659     if (instr->getOpcode() == AMDGPU::RETURN)
00660       return instr;
00661   }
00662   return nullptr;
00663 }
00664 
00665 MachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) {
00666   MachineBasicBlock::reverse_iterator It = MBB->rbegin();
00667   if (It != MBB->rend()) {
00668     MachineInstr *MI = &(*It);
00669     if (MI->getOpcode() == AMDGPU::CONTINUE)
00670       return MI;
00671   }
00672   return nullptr;
00673 }
00674 
00675 bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
00676   MachineInstr *MI = getReturnInstr(MBB);
00677   bool IsReturn = (MBB->succ_size() == 0);
00678   if (MI)
00679     assert(IsReturn);
00680   else if (IsReturn)
00681     DEBUG(
00682       dbgs() << "BB" << MBB->getNumber()
00683              <<" is return block without RETURN instr\n";);
00684   return  IsReturn;
00685 }
00686 
00687 void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
00688     MachineBasicBlock *SrcMBB) {
00689   for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
00690        iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
00691     DstMBB->addSuccessor(*It);  // *iter's predecessor is also taken care of
00692 }
00693 
00694 MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
00695   MachineFunction *Func = MBB->getParent();
00696   MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
00697   Func->push_back(NewMBB);  //insert to function
00698   for (MachineBasicBlock::iterator It = MBB->begin(), E = MBB->end();
00699       It != E; ++It) {
00700     MachineInstr *MI = Func->CloneMachineInstr(It);
00701     NewMBB->push_back(MI);
00702   }
00703   return NewMBB;
00704 }
00705 
00706 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
00707     MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
00708     MachineBasicBlock *NewBlk) {
00709   MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
00710   if (BranchMI && isCondBranch(BranchMI) &&
00711       getTrueBranch(BranchMI) == OldMBB)
00712     setTrueBranch(BranchMI, NewBlk);
00713 }
00714 
00715 void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
00716   assert((!MBB->getParent()->getJumpTableInfo()
00717           || MBB->getParent()->getJumpTableInfo()->isEmpty())
00718          && "found a jump table");
00719 
00720    //collect continue right before endloop
00721    SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr;
00722    MachineBasicBlock::iterator Pre = MBB->begin();
00723    MachineBasicBlock::iterator E = MBB->end();
00724    MachineBasicBlock::iterator It = Pre;
00725    while (It != E) {
00726      if (Pre->getOpcode() == AMDGPU::CONTINUE
00727          && It->getOpcode() == AMDGPU::ENDLOOP)
00728        ContInstr.push_back(Pre);
00729      Pre = It;
00730      ++It;
00731    }
00732 
00733    //delete continue right before endloop
00734    for (unsigned i = 0; i < ContInstr.size(); ++i)
00735       ContInstr[i]->eraseFromParent();
00736 
00737    // TODO to fix up jump table so later phase won't be confused.  if
00738    // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
00739    // there isn't such an interface yet.  alternatively, replace all the other
00740    // blocks in the jump table with the entryBlk //}
00741 
00742 }
00743 
00744 
00745 bool AMDGPUCFGStructurizer::prepare() {
00746   bool Changed = false;
00747 
00748   //FIXME: if not reducible flow graph, make it so ???
00749 
00750   DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
00751 
00752   orderBlocks(FuncRep);
00753 
00754   SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
00755 
00756   // Add an ExitBlk to loop that don't have one
00757   for (MachineLoopInfo::iterator It = MLI->begin(),
00758        E = MLI->end(); It != E; ++It) {
00759     MachineLoop *LoopRep = (*It);
00760     MBBVector ExitingMBBs;
00761     LoopRep->getExitingBlocks(ExitingMBBs);
00762 
00763     if (ExitingMBBs.size() == 0) {
00764       MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
00765       if (DummyExitBlk)
00766         RetBlks.push_back(DummyExitBlk);
00767     }
00768   }
00769 
00770   // Remove unconditional branch instr.
00771   // Add dummy exit block iff there are multiple returns.
00772   for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
00773        It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
00774     MachineBasicBlock *MBB = *It;
00775     removeUnconditionalBranch(MBB);
00776     removeRedundantConditionalBranch(MBB);
00777     if (isReturnBlock(MBB)) {
00778       RetBlks.push_back(MBB);
00779     }
00780     assert(MBB->succ_size() <= 2);
00781   }
00782 
00783   if (RetBlks.size() >= 2) {
00784     addDummyExitBlock(RetBlks);
00785     Changed = true;
00786   }
00787 
00788   return Changed;
00789 }
00790 
00791 bool AMDGPUCFGStructurizer::run() {
00792 
00793   //Assume reducible CFG...
00794   DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
00795 
00796 #ifdef STRESSTEST
00797   //Use the worse block ordering to test the algorithm.
00798   ReverseVector(orderedBlks);
00799 #endif
00800 
00801   DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
00802   int NumIter = 0;
00803   bool Finish = false;
00804   MachineBasicBlock *MBB;
00805   bool MakeProgress = false;
00806   int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
00807                                         OrderedBlks.end());
00808 
00809   do {
00810     ++NumIter;
00811     DEBUG(
00812       dbgs() << "numIter = " << NumIter
00813              << ", numRemaintedBlk = " << NumRemainedBlk << "\n";
00814     );
00815 
00816     SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
00817         OrderedBlks.begin();
00818     SmallVectorImpl<MachineBasicBlock *>::const_iterator E =
00819         OrderedBlks.end();
00820 
00821     SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
00822         It;
00823     MachineBasicBlock *SccBeginMBB = nullptr;
00824     int SccNumBlk = 0;  // The number of active blocks, init to a
00825                         // maximum possible number.
00826     int SccNumIter;     // Number of iteration in this SCC.
00827 
00828     while (It != E) {
00829       MBB = *It;
00830 
00831       if (!SccBeginMBB) {
00832         SccBeginIter = It;
00833         SccBeginMBB = MBB;
00834         SccNumIter = 0;
00835         SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
00836         DEBUG(
00837               dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
00838               dbgs() << "\n";
00839         );
00840       }
00841 
00842       if (!isRetiredBlock(MBB))
00843         patternMatch(MBB);
00844 
00845       ++It;
00846 
00847       bool ContNextScc = true;
00848       if (It == E
00849           || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
00850         // Just finish one scc.
00851         ++SccNumIter;
00852         int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
00853         if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
00854           DEBUG(
00855             dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
00856                    << ", sccNumIter = " << SccNumIter;
00857             dbgs() << "doesn't make any progress\n";
00858           );
00859           ContNextScc = true;
00860         } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
00861           SccNumBlk = sccRemainedNumBlk;
00862           It = SccBeginIter;
00863           ContNextScc = false;
00864           DEBUG(
00865             dbgs() << "repeat processing SCC" << getSCCNum(MBB)
00866                    << "sccNumIter = " << SccNumIter << '\n';
00867           );
00868         } else {
00869           // Finish the current scc.
00870           ContNextScc = true;
00871         }
00872       } else {
00873         // Continue on next component in the current scc.
00874         ContNextScc = false;
00875       }
00876 
00877       if (ContNextScc)
00878         SccBeginMBB = nullptr;
00879     } //while, "one iteration" over the function.
00880 
00881     MachineBasicBlock *EntryMBB =
00882         GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
00883     if (EntryMBB->succ_size() == 0) {
00884       Finish = true;
00885       DEBUG(
00886         dbgs() << "Reduce to one block\n";
00887       );
00888     } else {
00889       int NewnumRemainedBlk
00890         = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
00891       // consider cloned blocks ??
00892       if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
00893         MakeProgress = true;
00894         NumRemainedBlk = NewnumRemainedBlk;
00895       } else {
00896         MakeProgress = false;
00897         DEBUG(
00898           dbgs() << "No progress\n";
00899         );
00900       }
00901     }
00902   } while (!Finish && MakeProgress);
00903 
00904   // Misc wrap up to maintain the consistency of the Function representation.
00905   wrapup(GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
00906 
00907   // Detach retired Block, release memory.
00908   for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
00909       It != E; ++It) {
00910     if ((*It).second && (*It).second->IsRetired) {
00911       assert(((*It).first)->getNumber() != -1);
00912       DEBUG(
00913         dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";
00914       );
00915       (*It).first->eraseFromParent();  //Remove from the parent Function.
00916     }
00917     delete (*It).second;
00918   }
00919   BlockInfoMap.clear();
00920   LLInfoMap.clear();
00921 
00922   if (!Finish) {
00923     DEBUG(FuncRep->viewCFG());
00924     llvm_unreachable("IRREDUCIBLE_CFG");
00925   }
00926 
00927   return true;
00928 }
00929 
00930 
00931 
00932 void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
00933   int SccNum = 0;
00934   MachineBasicBlock *MBB;
00935   for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
00936        ++It, ++SccNum) {
00937     const std::vector<MachineBasicBlock *> &SccNext = *It;
00938     for (std::vector<MachineBasicBlock *>::const_iterator
00939          blockIter = SccNext.begin(), blockEnd = SccNext.end();
00940          blockIter != blockEnd; ++blockIter) {
00941       MBB = *blockIter;
00942       OrderedBlks.push_back(MBB);
00943       recordSccnum(MBB, SccNum);
00944     }
00945   }
00946 
00947   //walk through all the block in func to check for unreachable
00948   typedef GraphTraits<MachineFunction *> GTM;
00949   MachineFunction::iterator It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF);
00950   for (; It != E; ++It) {
00951     MachineBasicBlock *MBB = &(*It);
00952     SccNum = getSCCNum(MBB);
00953     if (SccNum == INVALIDSCCNUM)
00954       dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
00955   }
00956 }
00957 
00958 int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
00959   int NumMatch = 0;
00960   int CurMatch;
00961 
00962   DEBUG(
00963         dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";
00964   );
00965 
00966   while ((CurMatch = patternMatchGroup(MBB)) > 0)
00967     NumMatch += CurMatch;
00968 
00969   DEBUG(
00970         dbgs() << "End patternMatch BB" << MBB->getNumber()
00971       << ", numMatch = " << NumMatch << "\n";
00972   );
00973 
00974   return NumMatch;
00975 }
00976 
00977 int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
00978   int NumMatch = 0;
00979   NumMatch += loopendPatternMatch();
00980   NumMatch += serialPatternMatch(MBB);
00981   NumMatch += ifPatternMatch(MBB);
00982   return NumMatch;
00983 }
00984 
00985 
00986 int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
00987   if (MBB->succ_size() != 1)
00988     return 0;
00989 
00990   MachineBasicBlock *childBlk = *MBB->succ_begin();
00991   if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
00992     return 0;
00993 
00994   mergeSerialBlock(MBB, childBlk);
00995   ++numSerialPatternMatch;
00996   return 1;
00997 }
00998 
00999 int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
01000   //two edges
01001   if (MBB->succ_size() != 2)
01002     return 0;
01003   if (hasBackEdge(MBB))
01004     return 0;
01005   MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
01006   if (!BranchMI)
01007     return 0;
01008 
01009   assert(isCondBranch(BranchMI));
01010   int NumMatch = 0;
01011 
01012   MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
01013   NumMatch += serialPatternMatch(TrueMBB);
01014   NumMatch += ifPatternMatch(TrueMBB);
01015   MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
01016   NumMatch += serialPatternMatch(FalseMBB);
01017   NumMatch += ifPatternMatch(FalseMBB);
01018   MachineBasicBlock *LandBlk;
01019   int Cloned = 0;
01020 
01021   assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
01022   // TODO: Simplify
01023   if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
01024     && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
01025     // Diamond pattern
01026     LandBlk = *TrueMBB->succ_begin();
01027   } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
01028     // Triangle pattern, false is empty
01029     LandBlk = FalseMBB;
01030     FalseMBB = nullptr;
01031   } else if (FalseMBB->succ_size() == 1
01032              && *FalseMBB->succ_begin() == TrueMBB) {
01033     // Triangle pattern, true is empty
01034     // We reverse the predicate to make a triangle, empty false pattern;
01035     std::swap(TrueMBB, FalseMBB);
01036     reversePredicateSetter(MBB->end());
01037     LandBlk = FalseMBB;
01038     FalseMBB = nullptr;
01039   } else if (FalseMBB->succ_size() == 1
01040              && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
01041     LandBlk = *FalseMBB->succ_begin();
01042   } else if (TrueMBB->succ_size() == 1
01043     && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
01044     LandBlk = *TrueMBB->succ_begin();
01045   } else {
01046     return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
01047   }
01048 
01049   // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
01050   // new BB created for landBlk==NULL may introduce new challenge to the
01051   // reduction process.
01052   if (LandBlk &&
01053       ((TrueMBB && TrueMBB->pred_size() > 1)
01054       || (FalseMBB && FalseMBB->pred_size() > 1))) {
01055      Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
01056   }
01057 
01058   if (TrueMBB && TrueMBB->pred_size() > 1) {
01059     TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
01060     ++Cloned;
01061   }
01062 
01063   if (FalseMBB && FalseMBB->pred_size() > 1) {
01064     FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
01065     ++Cloned;
01066   }
01067 
01068   mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
01069 
01070   ++numIfPatternMatch;
01071 
01072   numClonedBlock += Cloned;
01073 
01074   return 1 + Cloned + NumMatch;
01075 }
01076 
01077 int AMDGPUCFGStructurizer::loopendPatternMatch() {
01078   std::vector<MachineLoop *> NestedLoops;
01079   for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end(); It != E;
01080        ++It)
01081     for (MachineLoop *ML : depth_first(*It))
01082       NestedLoops.push_back(ML);
01083 
01084   if (NestedLoops.size() == 0)
01085     return 0;
01086 
01087   // Process nested loop outside->inside, so "continue" to a outside loop won't
01088   // be mistaken as "break" of the current loop.
01089   int Num = 0;
01090   for (std::vector<MachineLoop *>::reverse_iterator It = NestedLoops.rbegin(),
01091       E = NestedLoops.rend(); It != E; ++It) {
01092     MachineLoop *ExaminedLoop = *It;
01093     if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
01094       continue;
01095     DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
01096     int NumBreak = mergeLoop(ExaminedLoop);
01097     if (NumBreak == -1)
01098       break;
01099     Num += NumBreak;
01100   }
01101   return Num;
01102 }
01103 
01104 int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
01105   MachineBasicBlock *LoopHeader = LoopRep->getHeader();
01106   MBBVector ExitingMBBs;
01107   LoopRep->getExitingBlocks(ExitingMBBs);
01108   assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
01109   DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";);
01110   // We assume a single ExitBlk
01111   MBBVector ExitBlks;
01112   LoopRep->getExitBlocks(ExitBlks);
01113   SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet;
01114   for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
01115     ExitBlkSet.insert(ExitBlks[i]);
01116   assert(ExitBlkSet.size() == 1);
01117   MachineBasicBlock *ExitBlk = *ExitBlks.begin();
01118   assert(ExitBlk && "Loop has several exit block");
01119   MBBVector LatchBlks;
01120   typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits;
01121   InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),
01122       PE = InvMBBTraits::child_end(LoopHeader);
01123   for (; PI != PE; PI++) {
01124     if (LoopRep->contains(*PI))
01125       LatchBlks.push_back(*PI);
01126   }
01127 
01128   for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
01129     mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
01130   for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
01131     settleLoopcontBlock(LatchBlks[i], LoopHeader);
01132   int Match = 0;
01133   do {
01134     Match = 0;
01135     Match += serialPatternMatch(LoopHeader);
01136     Match += ifPatternMatch(LoopHeader);
01137   } while (Match > 0);
01138   mergeLooplandBlock(LoopHeader, ExitBlk);
01139   MachineLoop *ParentLoop = LoopRep->getParentLoop();
01140   if (ParentLoop)
01141     MLI->changeLoopFor(LoopHeader, ParentLoop);
01142   else
01143     MLI->removeBlock(LoopHeader);
01144   Visited[LoopRep] = true;
01145   return 1;
01146 }
01147 
01148 int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep,
01149     MachineBasicBlock *LoopHeader) {
01150   int NumCont = 0;
01151   SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> ContMBB;
01152   typedef GraphTraits<Inverse<MachineBasicBlock *> > GTIM;
01153   GTIM::ChildIteratorType It = GTIM::child_begin(LoopHeader),
01154       E = GTIM::child_end(LoopHeader);
01155   for (; It != E; ++It) {
01156     MachineBasicBlock *MBB = *It;
01157     if (LoopRep->contains(MBB)) {
01158       handleLoopcontBlock(MBB, MLI->getLoopFor(MBB),
01159                           LoopHeader, LoopRep);
01160       ContMBB.push_back(MBB);
01161       ++NumCont;
01162     }
01163   }
01164 
01165   for (SmallVectorImpl<MachineBasicBlock *>::iterator It = ContMBB.begin(),
01166       E = ContMBB.end(); It != E; ++It) {
01167     (*It)->removeSuccessor(LoopHeader);
01168   }
01169 
01170   numLoopcontPatternMatch += NumCont;
01171 
01172   return NumCont;
01173 }
01174 
01175 
01176 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
01177     MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
01178   if (Src1MBB->succ_size() == 0) {
01179     MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
01180     if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
01181       MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
01182       if (TheEntry) {
01183         DEBUG(
01184           dbgs() << "isLoopContBreakBlock yes src1 = BB"
01185                  << Src1MBB->getNumber()
01186                  << " src2 = BB" << Src2MBB->getNumber() << "\n";
01187         );
01188         return true;
01189       }
01190     }
01191   }
01192   return false;
01193 }
01194 
01195 int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
01196     MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
01197   int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
01198   if (Num == 0) {
01199     DEBUG(
01200       dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
01201     );
01202     Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
01203   }
01204   return Num;
01205 }
01206 
01207 int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
01208     MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
01209   int Num = 0;
01210   MachineBasicBlock *DownBlk;
01211 
01212   //trueBlk could be the common post dominator
01213   DownBlk = TrueMBB;
01214 
01215   DEBUG(
01216     dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
01217            << " true = BB" << TrueMBB->getNumber()
01218            << ", numSucc=" << TrueMBB->succ_size()
01219            << " false = BB" << FalseMBB->getNumber() << "\n";
01220   );
01221 
01222   while (DownBlk) {
01223     DEBUG(
01224       dbgs() << "check down = BB" << DownBlk->getNumber();
01225     );
01226 
01227     if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
01228       DEBUG(
01229         dbgs() << " working\n";
01230       );
01231 
01232       Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
01233       Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
01234 
01235       numClonedBlock += Num;
01236       Num += serialPatternMatch(*HeadMBB->succ_begin());
01237       Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
01238       Num += ifPatternMatch(HeadMBB);
01239       assert(Num > 0);
01240 
01241       break;
01242     }
01243     DEBUG(
01244       dbgs() << " not working\n";
01245     );
01246     DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
01247   } // walk down the postDomTree
01248 
01249   return Num;
01250 }
01251 
01252 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
01253     MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
01254     MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
01255   dbgs() << "head = BB" << HeadMBB->getNumber()
01256          << " size = " << HeadMBB->size();
01257   if (Detail) {
01258     dbgs() << "\n";
01259     HeadMBB->print(dbgs());
01260     dbgs() << "\n";
01261   }
01262 
01263   if (TrueMBB) {
01264     dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
01265            << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
01266     if (Detail) {
01267       dbgs() << "\n";
01268       TrueMBB->print(dbgs());
01269       dbgs() << "\n";
01270     }
01271   }
01272   if (FalseMBB) {
01273     dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
01274            << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
01275     if (Detail) {
01276       dbgs() << "\n";
01277       FalseMBB->print(dbgs());
01278       dbgs() << "\n";
01279     }
01280   }
01281   if (LandMBB) {
01282     dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
01283            << LandMBB->size() << " numPred = " << LandMBB->pred_size();
01284     if (Detail) {
01285       dbgs() << "\n";
01286       LandMBB->print(dbgs());
01287       dbgs() << "\n";
01288     }
01289   }
01290 
01291     dbgs() << "\n";
01292 }
01293 
01294 int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
01295     MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
01296     MachineBasicBlock **LandMBBPtr) {
01297   bool MigrateTrue = false;
01298   bool MigrateFalse = false;
01299 
01300   MachineBasicBlock *LandBlk = *LandMBBPtr;
01301 
01302   assert((!TrueMBB || TrueMBB->succ_size() <= 1)
01303          && (!FalseMBB || FalseMBB->succ_size() <= 1));
01304 
01305   if (TrueMBB == FalseMBB)
01306     return 0;
01307 
01308   MigrateTrue = needMigrateBlock(TrueMBB);
01309   MigrateFalse = needMigrateBlock(FalseMBB);
01310 
01311   if (!MigrateTrue && !MigrateFalse)
01312     return 0;
01313 
01314   // If we need to migrate either trueBlk and falseBlk, migrate the rest that
01315   // have more than one predecessors.  without doing this, its predecessor
01316   // rather than headBlk will have undefined value in initReg.
01317   if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
01318     MigrateTrue = true;
01319   if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
01320     MigrateFalse = true;
01321 
01322   DEBUG(
01323     dbgs() << "before improveSimpleJumpintoIf: ";
01324     showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
01325   );
01326 
01327   // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
01328   //
01329   // new: headBlk => if () {initReg = 1; org trueBlk branch} else
01330   //      {initReg = 0; org falseBlk branch }
01331   //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
01332   //      => org landBlk
01333   //      if landBlk->pred_size() > 2, put the about if-else inside
01334   //      if (initReg !=2) {...}
01335   //
01336   // add initReg = initVal to headBlk
01337 
01338   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
01339   if (!MigrateTrue || !MigrateFalse) {
01340     // XXX: We have an opportunity here to optimize the "branch into if" case
01341     // here.  Branch into if looks like this:
01342     //                        entry
01343     //                       /     |
01344     //           diamond_head       branch_from
01345     //             /      \           |
01346     // diamond_false        diamond_true
01347     //             \      /
01348     //               done
01349     //
01350     // The diamond_head block begins the "if" and the diamond_true block
01351     // is the block being "branched into".
01352     //
01353     // If MigrateTrue is true, then TrueBB is the block being "branched into"
01354     // and if MigrateFalse is true, then FalseBB is the block being
01355     // "branched into"
01356     // 
01357     // Here is the pseudo code for how I think the optimization should work:
01358     // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
01359     // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
01360     // 3. Move the branch instruction from diamond_head into its own basic
01361     //    block (new_block).
01362     // 4. Add an unconditional branch from diamond_head to new_block
01363     // 5. Replace the branch instruction in branch_from with an unconditional
01364     //    branch to new_block.  If branch_from has multiple predecessors, then
01365     //    we need to replace the True/False block in the branch
01366     //    instruction instead of replacing it.
01367     // 6. Change the condition of the branch instruction in new_block from
01368     //    COND to (COND || GPR0)
01369     //
01370     // In order insert these MOV instruction, we will need to use the
01371     // RegisterScavenger.  Usually liveness stops being tracked during
01372     // the late machine optimization passes, however if we implement
01373     // bool TargetRegisterInfo::requiresRegisterScavenging(
01374     //                                                const MachineFunction &MF)
01375     // and have it return true, liveness will be tracked correctly 
01376     // by generic optimization passes.  We will also need to make sure that
01377     // all of our target-specific passes that run after regalloc and before
01378     // the CFGStructurizer track liveness and we will need to modify this pass
01379     // to correctly track liveness.
01380     //
01381     // After the above changes, the new CFG should look like this:
01382     //                        entry
01383     //                       /     |
01384     //           diamond_head       branch_from
01385     //                       \     /
01386     //                      new_block
01387     //                      /      |
01388     //         diamond_false        diamond_true
01389     //                      \      /
01390     //                        done
01391     //
01392     // Without this optimization, we are forced to duplicate the diamond_true
01393     // block and we will end up with a CFG like this:
01394     //
01395     //                        entry
01396     //                       /     |
01397     //           diamond_head       branch_from
01398     //             /      \                   |
01399     // diamond_false        diamond_true      diamond_true (duplicate)
01400     //             \      /                   |
01401     //               done --------------------|
01402     //
01403     // Duplicating diamond_true can be very costly especially if it has a
01404     // lot of instructions.
01405     return 0;
01406   }
01407 
01408   int NumNewBlk = 0;
01409 
01410   bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
01411 
01412   //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
01413   MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
01414 
01415   if (LandBlkHasOtherPred) {
01416     llvm_unreachable("Extra register needed to handle CFG");
01417     unsigned CmpResReg =
01418       HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
01419     llvm_unreachable("Extra compare instruction needed to handle CFG");
01420     insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
01421         CmpResReg, DebugLoc());
01422   }
01423 
01424   // XXX: We are running this after RA, so creating virtual registers will
01425   // cause an assertion failure in the PostRA scheduling pass.
01426   unsigned InitReg =
01427     HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
01428   insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
01429       DebugLoc());
01430 
01431   if (MigrateTrue) {
01432     migrateInstruction(TrueMBB, LandBlk, I);
01433     // need to uncondionally insert the assignment to ensure a path from its
01434     // predecessor rather than headBlk has valid value in initReg if
01435     // (initVal != 1).
01436     llvm_unreachable("Extra register needed to handle CFG");
01437   }
01438   insertInstrBefore(I, AMDGPU::ELSE);
01439 
01440   if (MigrateFalse) {
01441     migrateInstruction(FalseMBB, LandBlk, I);
01442     // need to uncondionally insert the assignment to ensure a path from its
01443     // predecessor rather than headBlk has valid value in initReg if
01444     // (initVal != 0)
01445     llvm_unreachable("Extra register needed to handle CFG");
01446   }
01447 
01448   if (LandBlkHasOtherPred) {
01449     // add endif
01450     insertInstrBefore(I, AMDGPU::ENDIF);
01451 
01452     // put initReg = 2 to other predecessors of landBlk
01453     for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
01454          PE = LandBlk->pred_end(); PI != PE; ++PI) {
01455       MachineBasicBlock *MBB = *PI;
01456       if (MBB != TrueMBB && MBB != FalseMBB)
01457         llvm_unreachable("Extra register needed to handle CFG");
01458     }
01459   }
01460   DEBUG(
01461     dbgs() << "result from improveSimpleJumpintoIf: ";
01462     showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
01463   );
01464 
01465   // update landBlk
01466   *LandMBBPtr = LandBlk;
01467 
01468   return NumNewBlk;
01469 }
01470 
01471 void AMDGPUCFGStructurizer::handleLoopcontBlock(MachineBasicBlock *ContingMBB,
01472     MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
01473     MachineLoop *ContLoop) {
01474   DEBUG(dbgs() << "loopcontPattern cont = BB" << ContingMBB->getNumber()
01475                << " header = BB" << ContMBB->getNumber() << "\n";
01476         dbgs() << "Trying to continue loop-depth = "
01477                << getLoopDepth(ContLoop)
01478                << " from loop-depth = " << getLoopDepth(ContingLoop) << "\n";);
01479   settleLoopcontBlock(ContingMBB, ContMBB);
01480 }
01481 
01482 void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
01483     MachineBasicBlock *SrcMBB) {
01484   DEBUG(
01485     dbgs() << "serialPattern BB" << DstMBB->getNumber()
01486            << " <= BB" << SrcMBB->getNumber() << "\n";
01487   );
01488   DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
01489 
01490   DstMBB->removeSuccessor(SrcMBB);
01491   cloneSuccessorList(DstMBB, SrcMBB);
01492 
01493   removeSuccessor(SrcMBB);
01494   MLI->removeBlock(SrcMBB);
01495   retireBlock(SrcMBB);
01496 }
01497 
01498 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
01499     MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
01500     MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
01501   assert (TrueMBB);
01502   DEBUG(
01503     dbgs() << "ifPattern BB" << MBB->getNumber();
01504     dbgs() << "{  ";
01505     if (TrueMBB) {
01506       dbgs() << "BB" << TrueMBB->getNumber();
01507     }
01508     dbgs() << "  } else ";
01509     dbgs() << "{  ";
01510     if (FalseMBB) {
01511       dbgs() << "BB" << FalseMBB->getNumber();
01512     }
01513     dbgs() << "  }\n ";
01514     dbgs() << "landBlock: ";
01515     if (!LandMBB) {
01516       dbgs() << "NULL";
01517     } else {
01518       dbgs() << "BB" << LandMBB->getNumber();
01519     }
01520     dbgs() << "\n";
01521   );
01522 
01523   int OldOpcode = BranchMI->getOpcode();
01524   DebugLoc BranchDL = BranchMI->getDebugLoc();
01525 
01526 //    transform to
01527 //    if cond
01528 //       trueBlk
01529 //    else
01530 //       falseBlk
01531 //    endif
01532 //    landBlk
01533 
01534   MachineBasicBlock::iterator I = BranchMI;
01535   insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
01536       BranchDL);
01537 
01538   if (TrueMBB) {
01539     MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
01540     MBB->removeSuccessor(TrueMBB);
01541     if (LandMBB && TrueMBB->succ_size()!=0)
01542       TrueMBB->removeSuccessor(LandMBB);
01543     retireBlock(TrueMBB);
01544     MLI->removeBlock(TrueMBB);
01545   }
01546 
01547   if (FalseMBB) {
01548     insertInstrBefore(I, AMDGPU::ELSE);
01549     MBB->splice(I, FalseMBB, FalseMBB->begin(),
01550                    FalseMBB->end());
01551     MBB->removeSuccessor(FalseMBB);
01552     if (LandMBB && FalseMBB->succ_size() != 0)
01553       FalseMBB->removeSuccessor(LandMBB);
01554     retireBlock(FalseMBB);
01555     MLI->removeBlock(FalseMBB);
01556   }
01557   insertInstrBefore(I, AMDGPU::ENDIF);
01558 
01559   BranchMI->eraseFromParent();
01560 
01561   if (LandMBB && TrueMBB && FalseMBB)
01562     MBB->addSuccessor(LandMBB);
01563 
01564 }
01565 
01566 void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
01567     MachineBasicBlock *LandMBB) {
01568   DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
01569                << " land = BB" << LandMBB->getNumber() << "\n";);
01570 
01571   insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
01572   insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
01573   DstBlk->addSuccessor(LandMBB);
01574   DstBlk->removeSuccessor(DstBlk);
01575 }
01576 
01577 
01578 void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
01579     MachineBasicBlock *LandMBB) {
01580   DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
01581                << " land = BB" << LandMBB->getNumber() << "\n";);
01582   MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
01583   assert(BranchMI && isCondBranch(BranchMI));
01584   DebugLoc DL = BranchMI->getDebugLoc();
01585   MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
01586   MachineBasicBlock::iterator I = BranchMI;
01587   if (TrueBranch != LandMBB)
01588     reversePredicateSetter(I);
01589   insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
01590   insertInstrBefore(I, AMDGPU::BREAK);
01591   insertInstrBefore(I, AMDGPU::ENDIF);
01592   //now branchInst can be erase safely
01593   BranchMI->eraseFromParent();
01594   //now take care of successors, retire blocks
01595   ExitingMBB->removeSuccessor(LandMBB);
01596 }
01597 
01598 void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
01599     MachineBasicBlock *ContMBB) {
01600   DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
01601                << ContingMBB->getNumber()
01602                << ", cont = BB" << ContMBB->getNumber() << "\n";);
01603 
01604   MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
01605   if (MI) {
01606     assert(isCondBranch(MI));
01607     MachineBasicBlock::iterator I = MI;
01608     MachineBasicBlock *TrueBranch = getTrueBranch(MI);
01609     int OldOpcode = MI->getOpcode();
01610     DebugLoc DL = MI->getDebugLoc();
01611 
01612     bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
01613 
01614     if (UseContinueLogical == false) {
01615       int BranchOpcode =
01616           TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
01617           getBranchZeroOpcode(OldOpcode);
01618       insertCondBranchBefore(I, BranchOpcode, DL);
01619       // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
01620       insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
01621       insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
01622     } else {
01623       int BranchOpcode =
01624           TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
01625           getContinueZeroOpcode(OldOpcode);
01626       insertCondBranchBefore(I, BranchOpcode, DL);
01627     }
01628 
01629     MI->eraseFromParent();
01630   } else {
01631     // if we've arrived here then we've already erased the branch instruction
01632     // travel back up the basic block to see the last reference of our debug
01633     // location we've just inserted that reference here so it should be
01634     // representative insertEnd to ensure phi-moves, if exist, go before the
01635     // continue-instr.
01636     insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
01637         getLastDebugLocInBB(ContingMBB));
01638   }
01639 }
01640 
01641 int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
01642     MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
01643   int Cloned = 0;
01644   assert(PreMBB->isSuccessor(SrcMBB));
01645   while (SrcMBB && SrcMBB != DstMBB) {
01646     assert(SrcMBB->succ_size() == 1);
01647     if (SrcMBB->pred_size() > 1) {
01648       SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
01649       ++Cloned;
01650     }
01651 
01652     PreMBB = SrcMBB;
01653     SrcMBB = *SrcMBB->succ_begin();
01654   }
01655 
01656   return Cloned;
01657 }
01658 
01659 MachineBasicBlock *
01660 AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
01661     MachineBasicBlock *PredMBB) {
01662   assert(PredMBB->isSuccessor(MBB) &&
01663          "succBlk is not a prececessor of curBlk");
01664 
01665   MachineBasicBlock *CloneMBB = clone(MBB);  //clone instructions
01666   replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
01667   //srcBlk, oldBlk, newBlk
01668 
01669   PredMBB->removeSuccessor(MBB);
01670   PredMBB->addSuccessor(CloneMBB);
01671 
01672   // add all successor to cloneBlk
01673   cloneSuccessorList(CloneMBB, MBB);
01674 
01675   numClonedInstr += MBB->size();
01676 
01677   DEBUG(
01678     dbgs() << "Cloned block: " << "BB"
01679            << MBB->getNumber() << "size " << MBB->size() << "\n";
01680   );
01681 
01682   SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
01683 
01684   return CloneMBB;
01685 }
01686 
01687 void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
01688     MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
01689   MachineBasicBlock::iterator SpliceEnd;
01690   //look for the input branchinstr, not the AMDGPU branchinstr
01691   MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
01692   if (!BranchMI) {
01693     DEBUG(
01694       dbgs() << "migrateInstruction don't see branch instr\n" ;
01695     );
01696     SpliceEnd = SrcMBB->end();
01697   } else {
01698     DEBUG(
01699       dbgs() << "migrateInstruction see branch instr\n" ;
01700       BranchMI->dump();
01701     );
01702     SpliceEnd = BranchMI;
01703   }
01704   DEBUG(
01705     dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
01706       << "srcSize = " << SrcMBB->size() << "\n";
01707   );
01708 
01709   //splice insert before insertPos
01710   DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
01711 
01712   DEBUG(
01713     dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
01714       << "srcSize = " << SrcMBB->size() << "\n";
01715   );
01716 }
01717 
01718 MachineBasicBlock *
01719 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
01720   MachineBasicBlock *LoopHeader = LoopRep->getHeader();
01721   MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
01722   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
01723 
01724   if (!LoopHeader || !LoopLatch)
01725     return nullptr;
01726   MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
01727   // Is LoopRep an infinite loop ?
01728   if (!BranchMI || !isUncondBranch(BranchMI))
01729     return nullptr;
01730 
01731   MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
01732   FuncRep->push_back(DummyExitBlk);  //insert to function
01733   SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
01734   DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
01735   MachineBasicBlock::iterator I = BranchMI;
01736   unsigned ImmReg = FuncRep->getRegInfo().createVirtualRegister(I32RC);
01737   llvm_unreachable("Extra register needed to handle CFG");
01738   MachineInstr *NewMI = insertInstrBefore(I, AMDGPU::BRANCH_COND_i32);
01739   MachineInstrBuilder MIB(*FuncRep, NewMI);
01740   MIB.addMBB(LoopHeader);
01741   MIB.addReg(ImmReg, false);
01742   SHOWNEWINSTR(NewMI);
01743   BranchMI->eraseFromParent();
01744   LoopLatch->addSuccessor(DummyExitBlk);
01745 
01746   return DummyExitBlk;
01747 }
01748 
01749 void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
01750   MachineInstr *BranchMI;
01751 
01752   // I saw two unconditional branch in one basic block in example
01753   // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
01754   while ((BranchMI = getLoopendBlockBranchInstr(MBB))
01755           && isUncondBranch(BranchMI)) {
01756     DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump(););
01757     BranchMI->eraseFromParent();
01758   }
01759 }
01760 
01761 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
01762     MachineBasicBlock *MBB) {
01763   if (MBB->succ_size() != 2)
01764     return;
01765   MachineBasicBlock *MBB1 = *MBB->succ_begin();
01766   MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
01767   if (MBB1 != MBB2)
01768     return;
01769 
01770   MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
01771   assert(BranchMI && isCondBranch(BranchMI));
01772   DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump(););
01773   BranchMI->eraseFromParent();
01774   SHOWNEWBLK(MBB1, "Removing redundant successor");
01775   MBB->removeSuccessor(MBB1);
01776 }
01777 
01778 void AMDGPUCFGStructurizer::addDummyExitBlock(
01779     SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
01780   MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
01781   FuncRep->push_back(DummyExitBlk);  //insert to function
01782   insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
01783 
01784   for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
01785        E = RetMBB.end(); It != E; ++It) {
01786     MachineBasicBlock *MBB = *It;
01787     MachineInstr *MI = getReturnInstr(MBB);
01788     if (MI)
01789       MI->eraseFromParent();
01790     MBB->addSuccessor(DummyExitBlk);
01791     DEBUG(
01792       dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
01793              << " successors\n";
01794     );
01795   }
01796   SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
01797 }
01798 
01799 void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
01800   while (MBB->succ_size())
01801     MBB->removeSuccessor(*MBB->succ_begin());
01802 }
01803 
01804 void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
01805     int SccNum) {
01806   BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
01807   if (!srcBlkInfo)
01808     srcBlkInfo = new BlockInformation();
01809   srcBlkInfo->SccNum = SccNum;
01810 }
01811 
01812 void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
01813   DEBUG(
01814         dbgs() << "Retiring BB" << MBB->getNumber() << "\n";
01815   );
01816 
01817   BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
01818 
01819   if (!SrcBlkInfo)
01820     SrcBlkInfo = new BlockInformation();
01821 
01822   SrcBlkInfo->IsRetired = true;
01823   assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
01824          && "can't retire block yet");
01825 }
01826 
01827 void AMDGPUCFGStructurizer::setLoopLandBlock(MachineLoop *loopRep,
01828     MachineBasicBlock *MBB) {
01829   MachineBasicBlock *&TheEntry = LLInfoMap[loopRep];
01830   if (!MBB) {
01831     MBB = FuncRep->CreateMachineBasicBlock();
01832     FuncRep->push_back(MBB);  //insert to function
01833     SHOWNEWBLK(MBB, "DummyLandingBlock for loop without break: ");
01834   }
01835   TheEntry = MBB;
01836   DEBUG(
01837     dbgs() << "setLoopLandBlock loop-header = BB"
01838            << loopRep->getHeader()->getNumber()
01839            << "  landing-block = BB" << MBB->getNumber() << "\n";
01840   );
01841 }
01842 
01843 MachineBasicBlock *
01844 AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1,
01845     MachineBasicBlock *MBB2) {
01846 
01847   if (PDT->dominates(MBB1, MBB2))
01848     return MBB1;
01849   if (PDT->dominates(MBB2, MBB1))
01850     return MBB2;
01851 
01852   MachineDomTreeNode *Node1 = PDT->getNode(MBB1);
01853   MachineDomTreeNode *Node2 = PDT->getNode(MBB2);
01854 
01855   // Handle newly cloned node.
01856   if (!Node1 && MBB1->succ_size() == 1)
01857     return findNearestCommonPostDom(*MBB1->succ_begin(), MBB2);
01858   if (!Node2 && MBB2->succ_size() == 1)
01859     return findNearestCommonPostDom(MBB1, *MBB2->succ_begin());
01860 
01861   if (!Node1 || !Node2)
01862     return nullptr;
01863 
01864   Node1 = Node1->getIDom();
01865   while (Node1) {
01866     if (PDT->dominates(Node1, Node2))
01867       return Node1->getBlock();
01868     Node1 = Node1->getIDom();
01869   }
01870 
01871   return nullptr;
01872 }
01873 
01874 MachineBasicBlock *
01875 AMDGPUCFGStructurizer::findNearestCommonPostDom(
01876     std::set<MachineBasicBlock *> &MBBs) {
01877   MachineBasicBlock *CommonDom;
01878   std::set<MachineBasicBlock *>::const_iterator It = MBBs.begin();
01879   std::set<MachineBasicBlock *>::const_iterator E = MBBs.end();
01880   for (CommonDom = *It; It != E && CommonDom; ++It) {
01881     MachineBasicBlock *MBB = *It;
01882     if (MBB != CommonDom)
01883       CommonDom = findNearestCommonPostDom(MBB, CommonDom);
01884   }
01885 
01886   DEBUG(
01887     dbgs() << "Common post dominator for exit blocks is ";
01888     if (CommonDom)
01889           dbgs() << "BB" << CommonDom->getNumber() << "\n";
01890     else
01891       dbgs() << "NULL\n";
01892   );
01893 
01894   return CommonDom;
01895 }
01896 
01897 char AMDGPUCFGStructurizer::ID = 0;
01898 
01899 } // end anonymous namespace
01900 
01901 
01902 INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
01903                       "AMDGPU CFG Structurizer", false, false)
01904 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
01905 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
01906 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
01907 INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
01908                       "AMDGPU CFG Structurizer", false, false)
01909 
01910 FunctionPass *llvm::createAMDGPUCFGStructurizerPass() {
01911   return new AMDGPUCFGStructurizer();
01912 }