LLVM API Documentation
00001 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This pass identifies loops where we can generate the PPC branch instructions 00011 // that decrement and test the count register (CTR) (bdnz and friends). 00012 // 00013 // The pattern that defines the induction variable can changed depending on 00014 // prior optimizations. For example, the IndVarSimplify phase run by 'opt' 00015 // normalizes induction variables, and the Loop Strength Reduction pass 00016 // run by 'llc' may also make changes to the induction variable. 00017 // 00018 // Criteria for CTR loops: 00019 // - Countable loops (w/ ind. var for a trip count) 00020 // - Try inner-most loops first 00021 // - No nested CTR loops. 00022 // - No function calls in loops. 00023 // 00024 //===----------------------------------------------------------------------===// 00025 00026 #include "llvm/Transforms/Scalar.h" 00027 #include "PPC.h" 00028 #include "PPCTargetMachine.h" 00029 #include "llvm/ADT/STLExtras.h" 00030 #include "llvm/ADT/Statistic.h" 00031 #include "llvm/Analysis/LoopInfo.h" 00032 #include "llvm/Analysis/ScalarEvolutionExpander.h" 00033 #include "llvm/IR/Constants.h" 00034 #include "llvm/IR/DerivedTypes.h" 00035 #include "llvm/IR/Dominators.h" 00036 #include "llvm/IR/InlineAsm.h" 00037 #include "llvm/IR/Instructions.h" 00038 #include "llvm/IR/IntrinsicInst.h" 00039 #include "llvm/IR/Module.h" 00040 #include "llvm/IR/ValueHandle.h" 00041 #include "llvm/PassSupport.h" 00042 #include "llvm/Support/CommandLine.h" 00043 #include "llvm/Support/Debug.h" 00044 #include "llvm/Support/raw_ostream.h" 00045 #include "llvm/Target/TargetLibraryInfo.h" 00046 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 00047 #include "llvm/Transforms/Utils/Local.h" 00048 #include "llvm/Transforms/Utils/LoopUtils.h" 00049 00050 #ifndef NDEBUG 00051 #include "llvm/CodeGen/MachineDominators.h" 00052 #include "llvm/CodeGen/MachineFunction.h" 00053 #include "llvm/CodeGen/MachineFunctionPass.h" 00054 #include "llvm/CodeGen/MachineRegisterInfo.h" 00055 #endif 00056 00057 #include <algorithm> 00058 #include <vector> 00059 00060 using namespace llvm; 00061 00062 #define DEBUG_TYPE "ctrloops" 00063 00064 #ifndef NDEBUG 00065 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); 00066 #endif 00067 00068 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); 00069 00070 namespace llvm { 00071 void initializePPCCTRLoopsPass(PassRegistry&); 00072 #ifndef NDEBUG 00073 void initializePPCCTRLoopsVerifyPass(PassRegistry&); 00074 #endif 00075 } 00076 00077 namespace { 00078 struct PPCCTRLoops : public FunctionPass { 00079 00080 #ifndef NDEBUG 00081 static int Counter; 00082 #endif 00083 00084 public: 00085 static char ID; 00086 00087 PPCCTRLoops() : FunctionPass(ID), TM(nullptr) { 00088 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); 00089 } 00090 PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { 00091 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); 00092 } 00093 00094 bool runOnFunction(Function &F) override; 00095 00096 void getAnalysisUsage(AnalysisUsage &AU) const override { 00097 AU.addRequired<LoopInfo>(); 00098 AU.addPreserved<LoopInfo>(); 00099 AU.addRequired<DominatorTreeWrapperPass>(); 00100 AU.addPreserved<DominatorTreeWrapperPass>(); 00101 AU.addRequired<ScalarEvolution>(); 00102 } 00103 00104 private: 00105 bool mightUseCTR(const Triple &TT, BasicBlock *BB); 00106 bool convertToCTRLoop(Loop *L); 00107 00108 private: 00109 PPCTargetMachine *TM; 00110 LoopInfo *LI; 00111 ScalarEvolution *SE; 00112 const DataLayout *DL; 00113 DominatorTree *DT; 00114 const TargetLibraryInfo *LibInfo; 00115 }; 00116 00117 char PPCCTRLoops::ID = 0; 00118 #ifndef NDEBUG 00119 int PPCCTRLoops::Counter = 0; 00120 #endif 00121 00122 #ifndef NDEBUG 00123 struct PPCCTRLoopsVerify : public MachineFunctionPass { 00124 public: 00125 static char ID; 00126 00127 PPCCTRLoopsVerify() : MachineFunctionPass(ID) { 00128 initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry()); 00129 } 00130 00131 void getAnalysisUsage(AnalysisUsage &AU) const override { 00132 AU.addRequired<MachineDominatorTree>(); 00133 MachineFunctionPass::getAnalysisUsage(AU); 00134 } 00135 00136 bool runOnMachineFunction(MachineFunction &MF) override; 00137 00138 private: 00139 MachineDominatorTree *MDT; 00140 }; 00141 00142 char PPCCTRLoopsVerify::ID = 0; 00143 #endif // NDEBUG 00144 } // end anonymous namespace 00145 00146 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", 00147 false, false) 00148 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 00149 INITIALIZE_PASS_DEPENDENCY(LoopInfo) 00150 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 00151 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", 00152 false, false) 00153 00154 FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) { 00155 return new PPCCTRLoops(TM); 00156 } 00157 00158 #ifndef NDEBUG 00159 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", 00160 "PowerPC CTR Loops Verify", false, false) 00161 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 00162 INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", 00163 "PowerPC CTR Loops Verify", false, false) 00164 00165 FunctionPass *llvm::createPPCCTRLoopsVerify() { 00166 return new PPCCTRLoopsVerify(); 00167 } 00168 #endif // NDEBUG 00169 00170 bool PPCCTRLoops::runOnFunction(Function &F) { 00171 LI = &getAnalysis<LoopInfo>(); 00172 SE = &getAnalysis<ScalarEvolution>(); 00173 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 00174 DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); 00175 DL = DLP ? &DLP->getDataLayout() : nullptr; 00176 LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>(); 00177 00178 bool MadeChange = false; 00179 00180 for (LoopInfo::iterator I = LI->begin(), E = LI->end(); 00181 I != E; ++I) { 00182 Loop *L = *I; 00183 if (!L->getParentLoop()) 00184 MadeChange |= convertToCTRLoop(L); 00185 } 00186 00187 return MadeChange; 00188 } 00189 00190 static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { 00191 if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) 00192 return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); 00193 00194 return false; 00195 } 00196 00197 bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { 00198 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); 00199 J != JE; ++J) { 00200 if (CallInst *CI = dyn_cast<CallInst>(J)) { 00201 if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) { 00202 // Inline ASM is okay, unless it clobbers the ctr register. 00203 InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); 00204 for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { 00205 InlineAsm::ConstraintInfo &C = CIV[i]; 00206 if (C.Type != InlineAsm::isInput) 00207 for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) 00208 if (StringRef(C.Codes[j]).equals_lower("{ctr}")) 00209 return true; 00210 } 00211 00212 continue; 00213 } 00214 00215 if (!TM) 00216 return true; 00217 const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); 00218 00219 if (Function *F = CI->getCalledFunction()) { 00220 // Most intrinsics don't become function calls, but some might. 00221 // sin, cos, exp and log are always calls. 00222 unsigned Opcode; 00223 if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { 00224 switch (F->getIntrinsicID()) { 00225 default: continue; 00226 00227 // VisualStudio defines setjmp as _setjmp 00228 #if defined(_MSC_VER) && defined(setjmp) && \ 00229 !defined(setjmp_undefined_for_msvc) 00230 # pragma push_macro("setjmp") 00231 # undef setjmp 00232 # define setjmp_undefined_for_msvc 00233 #endif 00234 00235 case Intrinsic::setjmp: 00236 00237 #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) 00238 // let's return it to _setjmp state 00239 # pragma pop_macro("setjmp") 00240 # undef setjmp_undefined_for_msvc 00241 #endif 00242 00243 case Intrinsic::longjmp: 00244 00245 // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp 00246 // because, although it does clobber the counter register, the 00247 // control can't then return to inside the loop unless there is also 00248 // an eh_sjlj_setjmp. 00249 case Intrinsic::eh_sjlj_setjmp: 00250 00251 case Intrinsic::memcpy: 00252 case Intrinsic::memmove: 00253 case Intrinsic::memset: 00254 case Intrinsic::powi: 00255 case Intrinsic::log: 00256 case Intrinsic::log2: 00257 case Intrinsic::log10: 00258 case Intrinsic::exp: 00259 case Intrinsic::exp2: 00260 case Intrinsic::pow: 00261 case Intrinsic::sin: 00262 case Intrinsic::cos: 00263 return true; 00264 case Intrinsic::copysign: 00265 if (CI->getArgOperand(0)->getType()->getScalarType()-> 00266 isPPC_FP128Ty()) 00267 return true; 00268 else 00269 continue; // ISD::FCOPYSIGN is never a library call. 00270 case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; 00271 case Intrinsic::floor: Opcode = ISD::FFLOOR; break; 00272 case Intrinsic::ceil: Opcode = ISD::FCEIL; break; 00273 case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; 00274 case Intrinsic::rint: Opcode = ISD::FRINT; break; 00275 case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; 00276 case Intrinsic::round: Opcode = ISD::FROUND; break; 00277 } 00278 } 00279 00280 // PowerPC does not use [US]DIVREM or other library calls for 00281 // operations on regular types which are not otherwise library calls 00282 // (i.e. soft float or atomics). If adapting for targets that do, 00283 // additional care is required here. 00284 00285 LibFunc::Func Func; 00286 if (!F->hasLocalLinkage() && F->hasName() && LibInfo && 00287 LibInfo->getLibFunc(F->getName(), Func) && 00288 LibInfo->hasOptimizedCodeGen(Func)) { 00289 // Non-read-only functions are never treated as intrinsics. 00290 if (!CI->onlyReadsMemory()) 00291 return true; 00292 00293 // Conversion happens only for FP calls. 00294 if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) 00295 return true; 00296 00297 switch (Func) { 00298 default: return true; 00299 case LibFunc::copysign: 00300 case LibFunc::copysignf: 00301 continue; // ISD::FCOPYSIGN is never a library call. 00302 case LibFunc::copysignl: 00303 return true; 00304 case LibFunc::fabs: 00305 case LibFunc::fabsf: 00306 case LibFunc::fabsl: 00307 continue; // ISD::FABS is never a library call. 00308 case LibFunc::sqrt: 00309 case LibFunc::sqrtf: 00310 case LibFunc::sqrtl: 00311 Opcode = ISD::FSQRT; break; 00312 case LibFunc::floor: 00313 case LibFunc::floorf: 00314 case LibFunc::floorl: 00315 Opcode = ISD::FFLOOR; break; 00316 case LibFunc::nearbyint: 00317 case LibFunc::nearbyintf: 00318 case LibFunc::nearbyintl: 00319 Opcode = ISD::FNEARBYINT; break; 00320 case LibFunc::ceil: 00321 case LibFunc::ceilf: 00322 case LibFunc::ceill: 00323 Opcode = ISD::FCEIL; break; 00324 case LibFunc::rint: 00325 case LibFunc::rintf: 00326 case LibFunc::rintl: 00327 Opcode = ISD::FRINT; break; 00328 case LibFunc::round: 00329 case LibFunc::roundf: 00330 case LibFunc::roundl: 00331 Opcode = ISD::FROUND; break; 00332 case LibFunc::trunc: 00333 case LibFunc::truncf: 00334 case LibFunc::truncl: 00335 Opcode = ISD::FTRUNC; break; 00336 } 00337 00338 MVT VTy = 00339 TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true); 00340 if (VTy == MVT::Other) 00341 return true; 00342 00343 if (TLI->isOperationLegalOrCustom(Opcode, VTy)) 00344 continue; 00345 else if (VTy.isVector() && 00346 TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType())) 00347 continue; 00348 00349 return true; 00350 } 00351 } 00352 00353 return true; 00354 } else if (isa<BinaryOperator>(J) && 00355 J->getType()->getScalarType()->isPPC_FP128Ty()) { 00356 // Most operations on ppc_f128 values become calls. 00357 return true; 00358 } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || 00359 isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) { 00360 CastInst *CI = cast<CastInst>(J); 00361 if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || 00362 CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || 00363 isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) || 00364 isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType())) 00365 return true; 00366 } else if (isLargeIntegerTy(TT.isArch32Bit(), 00367 J->getType()->getScalarType()) && 00368 (J->getOpcode() == Instruction::UDiv || 00369 J->getOpcode() == Instruction::SDiv || 00370 J->getOpcode() == Instruction::URem || 00371 J->getOpcode() == Instruction::SRem)) { 00372 return true; 00373 } else if (TT.isArch32Bit() && 00374 isLargeIntegerTy(false, J->getType()->getScalarType()) && 00375 (J->getOpcode() == Instruction::Shl || 00376 J->getOpcode() == Instruction::AShr || 00377 J->getOpcode() == Instruction::LShr)) { 00378 // Only on PPC32, for 128-bit integers (specifically not 64-bit 00379 // integers), these might be runtime calls. 00380 return true; 00381 } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) { 00382 // On PowerPC, indirect jumps use the counter register. 00383 return true; 00384 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { 00385 if (!TM) 00386 return true; 00387 const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); 00388 00389 if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) 00390 return true; 00391 } 00392 } 00393 00394 return false; 00395 } 00396 00397 bool PPCCTRLoops::convertToCTRLoop(Loop *L) { 00398 bool MadeChange = false; 00399 00400 Triple TT = Triple(L->getHeader()->getParent()->getParent()-> 00401 getTargetTriple()); 00402 if (!TT.isArch32Bit() && !TT.isArch64Bit()) 00403 return MadeChange; // Unknown arch. type. 00404 00405 // Process nested loops first. 00406 for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { 00407 MadeChange |= convertToCTRLoop(*I); 00408 } 00409 00410 // If a nested loop has been converted, then we can't convert this loop. 00411 if (MadeChange) 00412 return MadeChange; 00413 00414 #ifndef NDEBUG 00415 // Stop trying after reaching the limit (if any). 00416 int Limit = CTRLoopLimit; 00417 if (Limit >= 0) { 00418 if (Counter >= CTRLoopLimit) 00419 return false; 00420 Counter++; 00421 } 00422 #endif 00423 00424 // We don't want to spill/restore the counter register, and so we don't 00425 // want to use the counter register if the loop contains calls. 00426 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 00427 I != IE; ++I) 00428 if (mightUseCTR(TT, *I)) 00429 return MadeChange; 00430 00431 SmallVector<BasicBlock*, 4> ExitingBlocks; 00432 L->getExitingBlocks(ExitingBlocks); 00433 00434 BasicBlock *CountedExitBlock = nullptr; 00435 const SCEV *ExitCount = nullptr; 00436 BranchInst *CountedExitBranch = nullptr; 00437 for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), 00438 IE = ExitingBlocks.end(); I != IE; ++I) { 00439 const SCEV *EC = SE->getExitCount(L, *I); 00440 DEBUG(dbgs() << "Exit Count for " << *L << " from block " << 00441 (*I)->getName() << ": " << *EC << "\n"); 00442 if (isa<SCEVCouldNotCompute>(EC)) 00443 continue; 00444 if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { 00445 if (ConstEC->getValue()->isZero()) 00446 continue; 00447 } else if (!SE->isLoopInvariant(EC, L)) 00448 continue; 00449 00450 if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32)) 00451 continue; 00452 00453 // We now have a loop-invariant count of loop iterations (which is not the 00454 // constant zero) for which we know that this loop will not exit via this 00455 // exisiting block. 00456 00457 // We need to make sure that this block will run on every loop iteration. 00458 // For this to be true, we must dominate all blocks with backedges. Such 00459 // blocks are in-loop predecessors to the header block. 00460 bool NotAlways = false; 00461 for (pred_iterator PI = pred_begin(L->getHeader()), 00462 PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { 00463 if (!L->contains(*PI)) 00464 continue; 00465 00466 if (!DT->dominates(*I, *PI)) { 00467 NotAlways = true; 00468 break; 00469 } 00470 } 00471 00472 if (NotAlways) 00473 continue; 00474 00475 // Make sure this blocks ends with a conditional branch. 00476 Instruction *TI = (*I)->getTerminator(); 00477 if (!TI) 00478 continue; 00479 00480 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 00481 if (!BI->isConditional()) 00482 continue; 00483 00484 CountedExitBranch = BI; 00485 } else 00486 continue; 00487 00488 // Note that this block may not be the loop latch block, even if the loop 00489 // has a latch block. 00490 CountedExitBlock = *I; 00491 ExitCount = EC; 00492 break; 00493 } 00494 00495 if (!CountedExitBlock) 00496 return MadeChange; 00497 00498 BasicBlock *Preheader = L->getLoopPreheader(); 00499 00500 // If we don't have a preheader, then insert one. If we already have a 00501 // preheader, then we can use it (except if the preheader contains a use of 00502 // the CTR register because some such uses might be reordered by the 00503 // selection DAG after the mtctr instruction). 00504 if (!Preheader || mightUseCTR(TT, Preheader)) 00505 Preheader = InsertPreheaderForLoop(L, this); 00506 if (!Preheader) 00507 return MadeChange; 00508 00509 DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n"); 00510 00511 // Insert the count into the preheader and replace the condition used by the 00512 // selected branch. 00513 MadeChange = true; 00514 00515 SCEVExpander SCEVE(*SE, "loopcnt"); 00516 LLVMContext &C = SE->getContext(); 00517 Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) : 00518 Type::getInt32Ty(C); 00519 if (!ExitCount->getType()->isPointerTy() && 00520 ExitCount->getType() != CountType) 00521 ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); 00522 ExitCount = SE->getAddExpr(ExitCount, 00523 SE->getConstant(CountType, 1)); 00524 Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType, 00525 Preheader->getTerminator()); 00526 00527 IRBuilder<> CountBuilder(Preheader->getTerminator()); 00528 Module *M = Preheader->getParent()->getParent(); 00529 Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, 00530 CountType); 00531 CountBuilder.CreateCall(MTCTRFunc, ECValue); 00532 00533 IRBuilder<> CondBuilder(CountedExitBranch); 00534 Value *DecFunc = 00535 Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); 00536 Value *NewCond = CondBuilder.CreateCall(DecFunc); 00537 Value *OldCond = CountedExitBranch->getCondition(); 00538 CountedExitBranch->setCondition(NewCond); 00539 00540 // The false branch must exit the loop. 00541 if (!L->contains(CountedExitBranch->getSuccessor(0))) 00542 CountedExitBranch->swapSuccessors(); 00543 00544 // The old condition may be dead now, and may have even created a dead PHI 00545 // (the original induction variable). 00546 RecursivelyDeleteTriviallyDeadInstructions(OldCond); 00547 DeleteDeadPHIs(CountedExitBlock); 00548 00549 ++NumCTRLoops; 00550 return MadeChange; 00551 } 00552 00553 #ifndef NDEBUG 00554 static bool clobbersCTR(const MachineInstr *MI) { 00555 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00556 const MachineOperand &MO = MI->getOperand(i); 00557 if (MO.isReg()) { 00558 if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) 00559 return true; 00560 } else if (MO.isRegMask()) { 00561 if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8)) 00562 return true; 00563 } 00564 } 00565 00566 return false; 00567 } 00568 00569 static bool verifyCTRBranch(MachineBasicBlock *MBB, 00570 MachineBasicBlock::iterator I) { 00571 MachineBasicBlock::iterator BI = I; 00572 SmallSet<MachineBasicBlock *, 16> Visited; 00573 SmallVector<MachineBasicBlock *, 8> Preds; 00574 bool CheckPreds; 00575 00576 if (I == MBB->begin()) { 00577 Visited.insert(MBB); 00578 goto queue_preds; 00579 } else 00580 --I; 00581 00582 check_block: 00583 Visited.insert(MBB); 00584 if (I == MBB->end()) 00585 goto queue_preds; 00586 00587 CheckPreds = true; 00588 for (MachineBasicBlock::iterator IE = MBB->begin();; --I) { 00589 unsigned Opc = I->getOpcode(); 00590 if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) { 00591 CheckPreds = false; 00592 break; 00593 } 00594 00595 if (I != BI && clobbersCTR(I)) { 00596 DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" << 00597 MBB->getFullName() << ") instruction " << *I << 00598 " clobbers CTR, invalidating " << "BB#" << 00599 BI->getParent()->getNumber() << " (" << 00600 BI->getParent()->getFullName() << ") instruction " << 00601 *BI << "\n"); 00602 return false; 00603 } 00604 00605 if (I == IE) 00606 break; 00607 } 00608 00609 if (!CheckPreds && Preds.empty()) 00610 return true; 00611 00612 if (CheckPreds) { 00613 queue_preds: 00614 if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) { 00615 DEBUG(dbgs() << "Unable to find a MTCTR instruction for BB#" << 00616 BI->getParent()->getNumber() << " (" << 00617 BI->getParent()->getFullName() << ") instruction " << 00618 *BI << "\n"); 00619 return false; 00620 } 00621 00622 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 00623 PIE = MBB->pred_end(); PI != PIE; ++PI) 00624 Preds.push_back(*PI); 00625 } 00626 00627 do { 00628 MBB = Preds.pop_back_val(); 00629 if (!Visited.count(MBB)) { 00630 I = MBB->getLastNonDebugInstr(); 00631 goto check_block; 00632 } 00633 } while (!Preds.empty()); 00634 00635 return true; 00636 } 00637 00638 bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { 00639 MDT = &getAnalysis<MachineDominatorTree>(); 00640 00641 // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before 00642 // any other instructions that might clobber the ctr register. 00643 for (MachineFunction::iterator I = MF.begin(), IE = MF.end(); 00644 I != IE; ++I) { 00645 MachineBasicBlock *MBB = I; 00646 if (!MDT->isReachableFromEntry(MBB)) 00647 continue; 00648 00649 for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(), 00650 MIIE = MBB->end(); MII != MIIE; ++MII) { 00651 unsigned Opc = MII->getOpcode(); 00652 if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ || 00653 Opc == PPC::BDZ8 || Opc == PPC::BDZ) 00654 if (!verifyCTRBranch(MBB, MII)) 00655 llvm_unreachable("Invalid PPC CTR loop!"); 00656 } 00657 } 00658 00659 return false; 00660 } 00661 #endif // NDEBUG 00662