LLVM API Documentation
00001 //===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This pass identifies loops where we can generate the Hexagon hardware 00011 // loop instruction. The hardware loop can perform loop branches with a 00012 // zero-cycle overhead. 00013 // 00014 // The pattern that defines the induction variable can changed depending on 00015 // prior optimizations. For example, the IndVarSimplify phase run by 'opt' 00016 // normalizes induction variables, and the Loop Strength Reduction pass 00017 // run by 'llc' may also make changes to the induction variable. 00018 // The pattern detected by this phase is due to running Strength Reduction. 00019 // 00020 // Criteria for hardware loops: 00021 // - Countable loops (w/ ind. var for a trip count) 00022 // - Assumes loops are normalized by IndVarSimplify 00023 // - Try inner-most loops first 00024 // - No nested hardware loops. 00025 // - No function calls in loops. 00026 // 00027 //===----------------------------------------------------------------------===// 00028 00029 #include "llvm/ADT/SmallSet.h" 00030 #include "Hexagon.h" 00031 #include "HexagonTargetMachine.h" 00032 #include "llvm/ADT/Statistic.h" 00033 #include "llvm/CodeGen/MachineDominators.h" 00034 #include "llvm/CodeGen/MachineFunction.h" 00035 #include "llvm/CodeGen/MachineFunctionPass.h" 00036 #include "llvm/CodeGen/MachineInstrBuilder.h" 00037 #include "llvm/CodeGen/MachineLoopInfo.h" 00038 #include "llvm/CodeGen/MachineRegisterInfo.h" 00039 #include "llvm/PassSupport.h" 00040 #include "llvm/Support/CommandLine.h" 00041 #include "llvm/Support/Debug.h" 00042 #include "llvm/Support/raw_ostream.h" 00043 #include "llvm/Target/TargetInstrInfo.h" 00044 #include <algorithm> 00045 #include <vector> 00046 00047 using namespace llvm; 00048 00049 #define DEBUG_TYPE "hwloops" 00050 00051 #ifndef NDEBUG 00052 static cl::opt<int> HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1)); 00053 #endif 00054 00055 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); 00056 00057 namespace llvm { 00058 void initializeHexagonHardwareLoopsPass(PassRegistry&); 00059 } 00060 00061 namespace { 00062 class CountValue; 00063 struct HexagonHardwareLoops : public MachineFunctionPass { 00064 MachineLoopInfo *MLI; 00065 MachineRegisterInfo *MRI; 00066 MachineDominatorTree *MDT; 00067 const HexagonTargetMachine *TM; 00068 const HexagonInstrInfo *TII; 00069 const HexagonRegisterInfo *TRI; 00070 #ifndef NDEBUG 00071 static int Counter; 00072 #endif 00073 00074 public: 00075 static char ID; 00076 00077 HexagonHardwareLoops() : MachineFunctionPass(ID) { 00078 initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry()); 00079 } 00080 00081 bool runOnMachineFunction(MachineFunction &MF) override; 00082 00083 const char *getPassName() const override { return "Hexagon Hardware Loops"; } 00084 00085 void getAnalysisUsage(AnalysisUsage &AU) const override { 00086 AU.addRequired<MachineDominatorTree>(); 00087 AU.addRequired<MachineLoopInfo>(); 00088 MachineFunctionPass::getAnalysisUsage(AU); 00089 } 00090 00091 private: 00092 /// Kinds of comparisons in the compare instructions. 00093 struct Comparison { 00094 enum Kind { 00095 EQ = 0x01, 00096 NE = 0x02, 00097 L = 0x04, // Less-than property. 00098 G = 0x08, // Greater-than property. 00099 U = 0x40, // Unsigned property. 00100 LTs = L, 00101 LEs = L | EQ, 00102 GTs = G, 00103 GEs = G | EQ, 00104 LTu = L | U, 00105 LEu = L | EQ | U, 00106 GTu = G | U, 00107 GEu = G | EQ | U 00108 }; 00109 00110 static Kind getSwappedComparison(Kind Cmp) { 00111 assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator"); 00112 if ((Cmp & L) || (Cmp & G)) 00113 return (Kind)(Cmp ^ (L|G)); 00114 return Cmp; 00115 } 00116 }; 00117 00118 /// \brief Find the register that contains the loop controlling 00119 /// induction variable. 00120 /// If successful, it will return true and set the \p Reg, \p IVBump 00121 /// and \p IVOp arguments. Otherwise it will return false. 00122 /// The returned induction register is the register R that follows the 00123 /// following induction pattern: 00124 /// loop: 00125 /// R = phi ..., [ R.next, LatchBlock ] 00126 /// R.next = R + #bump 00127 /// if (R.next < #N) goto loop 00128 /// IVBump is the immediate value added to R, and IVOp is the instruction 00129 /// "R.next = R + #bump". 00130 bool findInductionRegister(MachineLoop *L, unsigned &Reg, 00131 int64_t &IVBump, MachineInstr *&IVOp) const; 00132 00133 /// \brief Analyze the statements in a loop to determine if the loop 00134 /// has a computable trip count and, if so, return a value that represents 00135 /// the trip count expression. 00136 CountValue *getLoopTripCount(MachineLoop *L, 00137 SmallVectorImpl<MachineInstr *> &OldInsts); 00138 00139 /// \brief Return the expression that represents the number of times 00140 /// a loop iterates. The function takes the operands that represent the 00141 /// loop start value, loop end value, and induction value. Based upon 00142 /// these operands, the function attempts to compute the trip count. 00143 /// If the trip count is not directly available (as an immediate value, 00144 /// or a register), the function will attempt to insert computation of it 00145 /// to the loop's preheader. 00146 CountValue *computeCount(MachineLoop *Loop, 00147 const MachineOperand *Start, 00148 const MachineOperand *End, 00149 unsigned IVReg, 00150 int64_t IVBump, 00151 Comparison::Kind Cmp) const; 00152 00153 /// \brief Return true if the instruction is not valid within a hardware 00154 /// loop. 00155 bool isInvalidLoopOperation(const MachineInstr *MI) const; 00156 00157 /// \brief Return true if the loop contains an instruction that inhibits 00158 /// using the hardware loop. 00159 bool containsInvalidInstruction(MachineLoop *L) const; 00160 00161 /// \brief Given a loop, check if we can convert it to a hardware loop. 00162 /// If so, then perform the conversion and return true. 00163 bool convertToHardwareLoop(MachineLoop *L); 00164 00165 /// \brief Return true if the instruction is now dead. 00166 bool isDead(const MachineInstr *MI, 00167 SmallVectorImpl<MachineInstr *> &DeadPhis) const; 00168 00169 /// \brief Remove the instruction if it is now dead. 00170 void removeIfDead(MachineInstr *MI); 00171 00172 /// \brief Make sure that the "bump" instruction executes before the 00173 /// compare. We need that for the IV fixup, so that the compare 00174 /// instruction would not use a bumped value that has not yet been 00175 /// defined. If the instructions are out of order, try to reorder them. 00176 bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI); 00177 00178 /// \brief Get the instruction that loads an immediate value into \p R, 00179 /// or 0 if such an instruction does not exist. 00180 MachineInstr *defWithImmediate(unsigned R); 00181 00182 /// \brief Get the immediate value referenced to by \p MO, either for 00183 /// immediate operands, or for register operands, where the register 00184 /// was defined with an immediate value. 00185 int64_t getImmediate(MachineOperand &MO); 00186 00187 /// \brief Reset the given machine operand to now refer to a new immediate 00188 /// value. Assumes that the operand was already referencing an immediate 00189 /// value, either directly, or via a register. 00190 void setImmediate(MachineOperand &MO, int64_t Val); 00191 00192 /// \brief Fix the data flow of the induction varible. 00193 /// The desired flow is: phi ---> bump -+-> comparison-in-latch. 00194 /// | 00195 /// +-> back to phi 00196 /// where "bump" is the increment of the induction variable: 00197 /// iv = iv + #const. 00198 /// Due to some prior code transformations, the actual flow may look 00199 /// like this: 00200 /// phi -+-> bump ---> back to phi 00201 /// | 00202 /// +-> comparison-in-latch (against upper_bound-bump), 00203 /// i.e. the comparison that controls the loop execution may be using 00204 /// the value of the induction variable from before the increment. 00205 /// 00206 /// Return true if the loop's flow is the desired one (i.e. it's 00207 /// either been fixed, or no fixing was necessary). 00208 /// Otherwise, return false. This can happen if the induction variable 00209 /// couldn't be identified, or if the value in the latch's comparison 00210 /// cannot be adjusted to reflect the post-bump value. 00211 bool fixupInductionVariable(MachineLoop *L); 00212 00213 /// \brief Given a loop, if it does not have a preheader, create one. 00214 /// Return the block that is the preheader. 00215 MachineBasicBlock *createPreheaderForLoop(MachineLoop *L); 00216 }; 00217 00218 char HexagonHardwareLoops::ID = 0; 00219 #ifndef NDEBUG 00220 int HexagonHardwareLoops::Counter = 0; 00221 #endif 00222 00223 /// \brief Abstraction for a trip count of a loop. A smaller version 00224 /// of the MachineOperand class without the concerns of changing the 00225 /// operand representation. 00226 class CountValue { 00227 public: 00228 enum CountValueType { 00229 CV_Register, 00230 CV_Immediate 00231 }; 00232 private: 00233 CountValueType Kind; 00234 union Values { 00235 struct { 00236 unsigned Reg; 00237 unsigned Sub; 00238 } R; 00239 unsigned ImmVal; 00240 } Contents; 00241 00242 public: 00243 explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) { 00244 Kind = t; 00245 if (Kind == CV_Register) { 00246 Contents.R.Reg = v; 00247 Contents.R.Sub = u; 00248 } else { 00249 Contents.ImmVal = v; 00250 } 00251 } 00252 bool isReg() const { return Kind == CV_Register; } 00253 bool isImm() const { return Kind == CV_Immediate; } 00254 00255 unsigned getReg() const { 00256 assert(isReg() && "Wrong CountValue accessor"); 00257 return Contents.R.Reg; 00258 } 00259 unsigned getSubReg() const { 00260 assert(isReg() && "Wrong CountValue accessor"); 00261 return Contents.R.Sub; 00262 } 00263 unsigned getImm() const { 00264 assert(isImm() && "Wrong CountValue accessor"); 00265 return Contents.ImmVal; 00266 } 00267 00268 void print(raw_ostream &OS, const TargetMachine *TM = nullptr) const { 00269 const TargetRegisterInfo *TRI = 00270 TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr; 00271 if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); } 00272 if (isImm()) { OS << Contents.ImmVal; } 00273 } 00274 }; 00275 } // end anonymous namespace 00276 00277 00278 INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops", 00279 "Hexagon Hardware Loops", false, false) 00280 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 00281 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 00282 INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops", 00283 "Hexagon Hardware Loops", false, false) 00284 00285 00286 /// \brief Returns true if the instruction is a hardware loop instruction. 00287 static bool isHardwareLoop(const MachineInstr *MI) { 00288 return MI->getOpcode() == Hexagon::LOOP0_r || 00289 MI->getOpcode() == Hexagon::LOOP0_i; 00290 } 00291 00292 FunctionPass *llvm::createHexagonHardwareLoops() { 00293 return new HexagonHardwareLoops(); 00294 } 00295 00296 00297 bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { 00298 DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); 00299 00300 bool Changed = false; 00301 00302 MLI = &getAnalysis<MachineLoopInfo>(); 00303 MRI = &MF.getRegInfo(); 00304 MDT = &getAnalysis<MachineDominatorTree>(); 00305 TM = static_cast<const HexagonTargetMachine*>(&MF.getTarget()); 00306 TII = static_cast<const HexagonInstrInfo *>( 00307 TM->getSubtargetImpl()->getInstrInfo()); 00308 TRI = static_cast<const HexagonRegisterInfo *>( 00309 TM->getSubtargetImpl()->getRegisterInfo()); 00310 00311 for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); 00312 I != E; ++I) { 00313 MachineLoop *L = *I; 00314 if (!L->getParentLoop()) 00315 Changed |= convertToHardwareLoop(L); 00316 } 00317 00318 return Changed; 00319 } 00320 00321 00322 bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, 00323 unsigned &Reg, 00324 int64_t &IVBump, 00325 MachineInstr *&IVOp 00326 ) const { 00327 MachineBasicBlock *Header = L->getHeader(); 00328 MachineBasicBlock *Preheader = L->getLoopPreheader(); 00329 MachineBasicBlock *Latch = L->getLoopLatch(); 00330 if (!Header || !Preheader || !Latch) 00331 return false; 00332 00333 // This pair represents an induction register together with an immediate 00334 // value that will be added to it in each loop iteration. 00335 typedef std::pair<unsigned,int64_t> RegisterBump; 00336 00337 // Mapping: R.next -> (R, bump), where R, R.next and bump are derived 00338 // from an induction operation 00339 // R.next = R + bump 00340 // where bump is an immediate value. 00341 typedef std::map<unsigned,RegisterBump> InductionMap; 00342 00343 InductionMap IndMap; 00344 00345 typedef MachineBasicBlock::instr_iterator instr_iterator; 00346 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); 00347 I != E && I->isPHI(); ++I) { 00348 MachineInstr *Phi = &*I; 00349 00350 // Have a PHI instruction. Get the operand that corresponds to the 00351 // latch block, and see if is a result of an addition of form "reg+imm", 00352 // where the "reg" is defined by the PHI node we are looking at. 00353 for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { 00354 if (Phi->getOperand(i+1).getMBB() != Latch) 00355 continue; 00356 00357 unsigned PhiOpReg = Phi->getOperand(i).getReg(); 00358 MachineInstr *DI = MRI->getVRegDef(PhiOpReg); 00359 unsigned UpdOpc = DI->getOpcode(); 00360 bool isAdd = (UpdOpc == Hexagon::ADD_ri); 00361 00362 if (isAdd) { 00363 // If the register operand to the add is the PHI we're 00364 // looking at, this meets the induction pattern. 00365 unsigned IndReg = DI->getOperand(1).getReg(); 00366 if (MRI->getVRegDef(IndReg) == Phi) { 00367 unsigned UpdReg = DI->getOperand(0).getReg(); 00368 int64_t V = DI->getOperand(2).getImm(); 00369 IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); 00370 } 00371 } 00372 } // for (i) 00373 } // for (instr) 00374 00375 SmallVector<MachineOperand,2> Cond; 00376 MachineBasicBlock *TB = nullptr, *FB = nullptr; 00377 bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); 00378 if (NotAnalyzed) 00379 return false; 00380 00381 unsigned CSz = Cond.size(); 00382 assert (CSz == 1 || CSz == 2); 00383 unsigned PredR = Cond[CSz-1].getReg(); 00384 00385 MachineInstr *PredI = MRI->getVRegDef(PredR); 00386 if (!PredI->isCompare()) 00387 return false; 00388 00389 unsigned CmpReg1 = 0, CmpReg2 = 0; 00390 int CmpImm = 0, CmpMask = 0; 00391 bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2, 00392 CmpMask, CmpImm); 00393 // Fail if the compare was not analyzed, or it's not comparing a register 00394 // with an immediate value. Not checking the mask here, since we handle 00395 // the individual compare opcodes (including CMPb) later on. 00396 if (!CmpAnalyzed) 00397 return false; 00398 00399 // Exactly one of the input registers to the comparison should be among 00400 // the induction registers. 00401 InductionMap::iterator IndMapEnd = IndMap.end(); 00402 InductionMap::iterator F = IndMapEnd; 00403 if (CmpReg1 != 0) { 00404 InductionMap::iterator F1 = IndMap.find(CmpReg1); 00405 if (F1 != IndMapEnd) 00406 F = F1; 00407 } 00408 if (CmpReg2 != 0) { 00409 InductionMap::iterator F2 = IndMap.find(CmpReg2); 00410 if (F2 != IndMapEnd) { 00411 if (F != IndMapEnd) 00412 return false; 00413 F = F2; 00414 } 00415 } 00416 if (F == IndMapEnd) 00417 return false; 00418 00419 Reg = F->second.first; 00420 IVBump = F->second.second; 00421 IVOp = MRI->getVRegDef(F->first); 00422 return true; 00423 } 00424 00425 00426 /// \brief Analyze the statements in a loop to determine if the loop has 00427 /// a computable trip count and, if so, return a value that represents 00428 /// the trip count expression. 00429 /// 00430 /// This function iterates over the phi nodes in the loop to check for 00431 /// induction variable patterns that are used in the calculation for 00432 /// the number of time the loop is executed. 00433 CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, 00434 SmallVectorImpl<MachineInstr *> &OldInsts) { 00435 MachineBasicBlock *TopMBB = L->getTopBlock(); 00436 MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); 00437 assert(PI != TopMBB->pred_end() && 00438 "Loop must have more than one incoming edge!"); 00439 MachineBasicBlock *Backedge = *PI++; 00440 if (PI == TopMBB->pred_end()) // dead loop? 00441 return nullptr; 00442 MachineBasicBlock *Incoming = *PI++; 00443 if (PI != TopMBB->pred_end()) // multiple backedges? 00444 return nullptr; 00445 00446 // Make sure there is one incoming and one backedge and determine which 00447 // is which. 00448 if (L->contains(Incoming)) { 00449 if (L->contains(Backedge)) 00450 return nullptr; 00451 std::swap(Incoming, Backedge); 00452 } else if (!L->contains(Backedge)) 00453 return nullptr; 00454 00455 // Look for the cmp instruction to determine if we can get a useful trip 00456 // count. The trip count can be either a register or an immediate. The 00457 // location of the value depends upon the type (reg or imm). 00458 MachineBasicBlock *Latch = L->getLoopLatch(); 00459 if (!Latch) 00460 return nullptr; 00461 00462 unsigned IVReg = 0; 00463 int64_t IVBump = 0; 00464 MachineInstr *IVOp; 00465 bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp); 00466 if (!FoundIV) 00467 return nullptr; 00468 00469 MachineBasicBlock *Preheader = L->getLoopPreheader(); 00470 00471 MachineOperand *InitialValue = nullptr; 00472 MachineInstr *IV_Phi = MRI->getVRegDef(IVReg); 00473 for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) { 00474 MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB(); 00475 if (MBB == Preheader) 00476 InitialValue = &IV_Phi->getOperand(i); 00477 else if (MBB == Latch) 00478 IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump. 00479 } 00480 if (!InitialValue) 00481 return nullptr; 00482 00483 SmallVector<MachineOperand,2> Cond; 00484 MachineBasicBlock *TB = nullptr, *FB = nullptr; 00485 bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); 00486 if (NotAnalyzed) 00487 return nullptr; 00488 00489 MachineBasicBlock *Header = L->getHeader(); 00490 // TB must be non-null. If FB is also non-null, one of them must be 00491 // the header. Otherwise, branch to TB could be exiting the loop, and 00492 // the fall through can go to the header. 00493 assert (TB && "Latch block without a branch?"); 00494 assert ((!FB || TB == Header || FB == Header) && "Branches not to header?"); 00495 if (!TB || (FB && TB != Header && FB != Header)) 00496 return nullptr; 00497 00498 // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch 00499 // to put imm(0), followed by P in the vector Cond. 00500 // If TB is not the header, it means that the "not-taken" path must lead 00501 // to the header. 00502 bool Negated = (Cond.size() > 1) ^ (TB != Header); 00503 unsigned PredReg = Cond[Cond.size()-1].getReg(); 00504 MachineInstr *CondI = MRI->getVRegDef(PredReg); 00505 unsigned CondOpc = CondI->getOpcode(); 00506 00507 unsigned CmpReg1 = 0, CmpReg2 = 0; 00508 int Mask = 0, ImmValue = 0; 00509 bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2, 00510 Mask, ImmValue); 00511 if (!AnalyzedCmp) 00512 return nullptr; 00513 00514 // The comparison operator type determines how we compute the loop 00515 // trip count. 00516 OldInsts.push_back(CondI); 00517 OldInsts.push_back(IVOp); 00518 00519 // Sadly, the following code gets information based on the position 00520 // of the operands in the compare instruction. This has to be done 00521 // this way, because the comparisons check for a specific relationship 00522 // between the operands (e.g. is-less-than), rather than to find out 00523 // what relationship the operands are in (as on PPC). 00524 Comparison::Kind Cmp; 00525 bool isSwapped = false; 00526 const MachineOperand &Op1 = CondI->getOperand(1); 00527 const MachineOperand &Op2 = CondI->getOperand(2); 00528 const MachineOperand *EndValue = nullptr; 00529 00530 if (Op1.isReg()) { 00531 if (Op2.isImm() || Op1.getReg() == IVReg) 00532 EndValue = &Op2; 00533 else { 00534 EndValue = &Op1; 00535 isSwapped = true; 00536 } 00537 } 00538 00539 if (!EndValue) 00540 return nullptr; 00541 00542 switch (CondOpc) { 00543 case Hexagon::CMPEQri: 00544 case Hexagon::CMPEQrr: 00545 Cmp = !Negated ? Comparison::EQ : Comparison::NE; 00546 break; 00547 case Hexagon::CMPGTUri: 00548 case Hexagon::CMPGTUrr: 00549 Cmp = !Negated ? Comparison::GTu : Comparison::LEu; 00550 break; 00551 case Hexagon::CMPGTri: 00552 case Hexagon::CMPGTrr: 00553 Cmp = !Negated ? Comparison::GTs : Comparison::LEs; 00554 break; 00555 // Very limited support for byte/halfword compares. 00556 case Hexagon::CMPbEQri_V4: 00557 case Hexagon::CMPhEQri_V4: { 00558 if (IVBump != 1) 00559 return nullptr; 00560 00561 int64_t InitV, EndV; 00562 // Since the comparisons are "ri", the EndValue should be an 00563 // immediate. Check it just in case. 00564 assert(EndValue->isImm() && "Unrecognized latch comparison"); 00565 EndV = EndValue->getImm(); 00566 // Allow InitialValue to be a register defined with an immediate. 00567 if (InitialValue->isReg()) { 00568 if (!defWithImmediate(InitialValue->getReg())) 00569 return nullptr; 00570 InitV = getImmediate(*InitialValue); 00571 } else { 00572 assert(InitialValue->isImm()); 00573 InitV = InitialValue->getImm(); 00574 } 00575 if (InitV >= EndV) 00576 return nullptr; 00577 if (CondOpc == Hexagon::CMPbEQri_V4) { 00578 if (!isInt<8>(InitV) || !isInt<8>(EndV)) 00579 return nullptr; 00580 } else { // Hexagon::CMPhEQri_V4 00581 if (!isInt<16>(InitV) || !isInt<16>(EndV)) 00582 return nullptr; 00583 } 00584 Cmp = !Negated ? Comparison::EQ : Comparison::NE; 00585 break; 00586 } 00587 default: 00588 return nullptr; 00589 } 00590 00591 if (isSwapped) 00592 Cmp = Comparison::getSwappedComparison(Cmp); 00593 00594 if (InitialValue->isReg()) { 00595 unsigned R = InitialValue->getReg(); 00596 MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); 00597 if (!MDT->properlyDominates(DefBB, Header)) 00598 return nullptr; 00599 OldInsts.push_back(MRI->getVRegDef(R)); 00600 } 00601 if (EndValue->isReg()) { 00602 unsigned R = EndValue->getReg(); 00603 MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); 00604 if (!MDT->properlyDominates(DefBB, Header)) 00605 return nullptr; 00606 } 00607 00608 return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp); 00609 } 00610 00611 /// \brief Helper function that returns the expression that represents the 00612 /// number of times a loop iterates. The function takes the operands that 00613 /// represent the loop start value, loop end value, and induction value. 00614 /// Based upon these operands, the function attempts to compute the trip count. 00615 CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, 00616 const MachineOperand *Start, 00617 const MachineOperand *End, 00618 unsigned IVReg, 00619 int64_t IVBump, 00620 Comparison::Kind Cmp) const { 00621 // Cannot handle comparison EQ, i.e. while (A == B). 00622 if (Cmp == Comparison::EQ) 00623 return nullptr; 00624 00625 // Check if either the start or end values are an assignment of an immediate. 00626 // If so, use the immediate value rather than the register. 00627 if (Start->isReg()) { 00628 const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg()); 00629 if (StartValInstr && StartValInstr->getOpcode() == Hexagon::TFRI) 00630 Start = &StartValInstr->getOperand(1); 00631 } 00632 if (End->isReg()) { 00633 const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); 00634 if (EndValInstr && EndValInstr->getOpcode() == Hexagon::TFRI) 00635 End = &EndValInstr->getOperand(1); 00636 } 00637 00638 assert (Start->isReg() || Start->isImm()); 00639 assert (End->isReg() || End->isImm()); 00640 00641 bool CmpLess = Cmp & Comparison::L; 00642 bool CmpGreater = Cmp & Comparison::G; 00643 bool CmpHasEqual = Cmp & Comparison::EQ; 00644 00645 // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds. 00646 // If loop executes while iv is "less" with the iv value going down, then 00647 // the iv must wrap. 00648 if (CmpLess && IVBump < 0) 00649 return nullptr; 00650 // If loop executes while iv is "greater" with the iv value going up, then 00651 // the iv must wrap. 00652 if (CmpGreater && IVBump > 0) 00653 return nullptr; 00654 00655 if (Start->isImm() && End->isImm()) { 00656 // Both, start and end are immediates. 00657 int64_t StartV = Start->getImm(); 00658 int64_t EndV = End->getImm(); 00659 int64_t Dist = EndV - StartV; 00660 if (Dist == 0) 00661 return nullptr; 00662 00663 bool Exact = (Dist % IVBump) == 0; 00664 00665 if (Cmp == Comparison::NE) { 00666 if (!Exact) 00667 return nullptr; 00668 if ((Dist < 0) ^ (IVBump < 0)) 00669 return nullptr; 00670 } 00671 00672 // For comparisons that include the final value (i.e. include equality 00673 // with the final value), we need to increase the distance by 1. 00674 if (CmpHasEqual) 00675 Dist = Dist > 0 ? Dist+1 : Dist-1; 00676 00677 // assert (CmpLess => Dist > 0); 00678 assert ((!CmpLess || Dist > 0) && "Loop should never iterate!"); 00679 // assert (CmpGreater => Dist < 0); 00680 assert ((!CmpGreater || Dist < 0) && "Loop should never iterate!"); 00681 00682 // "Normalized" distance, i.e. with the bump set to +-1. 00683 int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump-1)) / IVBump 00684 : (-Dist + (-IVBump-1)) / (-IVBump); 00685 assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign."); 00686 00687 uint64_t Count = Dist1; 00688 00689 if (Count > 0xFFFFFFFFULL) 00690 return nullptr; 00691 00692 return new CountValue(CountValue::CV_Immediate, Count); 00693 } 00694 00695 // A general case: Start and End are some values, but the actual 00696 // iteration count may not be available. If it is not, insert 00697 // a computation of it into the preheader. 00698 00699 // If the induction variable bump is not a power of 2, quit. 00700 // Othwerise we'd need a general integer division. 00701 if (!isPowerOf2_64(abs64(IVBump))) 00702 return nullptr; 00703 00704 MachineBasicBlock *PH = Loop->getLoopPreheader(); 00705 assert (PH && "Should have a preheader by now"); 00706 MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator(); 00707 DebugLoc DL = (InsertPos != PH->end()) ? InsertPos->getDebugLoc() 00708 : DebugLoc(); 00709 00710 // If Start is an immediate and End is a register, the trip count 00711 // will be "reg - imm". Hexagon's "subtract immediate" instruction 00712 // is actually "reg + -imm". 00713 00714 // If the loop IV is going downwards, i.e. if the bump is negative, 00715 // then the iteration count (computed as End-Start) will need to be 00716 // negated. To avoid the negation, just swap Start and End. 00717 if (IVBump < 0) { 00718 std::swap(Start, End); 00719 IVBump = -IVBump; 00720 } 00721 // Cmp may now have a wrong direction, e.g. LEs may now be GEs. 00722 // Signedness, and "including equality" are preserved. 00723 00724 bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm) 00725 bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg) 00726 00727 int64_t StartV = 0, EndV = 0; 00728 if (Start->isImm()) 00729 StartV = Start->getImm(); 00730 if (End->isImm()) 00731 EndV = End->getImm(); 00732 00733 int64_t AdjV = 0; 00734 // To compute the iteration count, we would need this computation: 00735 // Count = (End - Start + (IVBump-1)) / IVBump 00736 // or, when CmpHasEqual: 00737 // Count = (End - Start + (IVBump-1)+1) / IVBump 00738 // The "IVBump-1" part is the adjustment (AdjV). We can avoid 00739 // generating an instruction specifically to add it if we can adjust 00740 // the immediate values for Start or End. 00741 00742 if (CmpHasEqual) { 00743 // Need to add 1 to the total iteration count. 00744 if (Start->isImm()) 00745 StartV--; 00746 else if (End->isImm()) 00747 EndV++; 00748 else 00749 AdjV += 1; 00750 } 00751 00752 if (Cmp != Comparison::NE) { 00753 if (Start->isImm()) 00754 StartV -= (IVBump-1); 00755 else if (End->isImm()) 00756 EndV += (IVBump-1); 00757 else 00758 AdjV += (IVBump-1); 00759 } 00760 00761 unsigned R = 0, SR = 0; 00762 if (Start->isReg()) { 00763 R = Start->getReg(); 00764 SR = Start->getSubReg(); 00765 } else { 00766 R = End->getReg(); 00767 SR = End->getSubReg(); 00768 } 00769 const TargetRegisterClass *RC = MRI->getRegClass(R); 00770 // Hardware loops cannot handle 64-bit registers. If it's a double 00771 // register, it has to have a subregister. 00772 if (!SR && RC == &Hexagon::DoubleRegsRegClass) 00773 return nullptr; 00774 const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; 00775 00776 // Compute DistR (register with the distance between Start and End). 00777 unsigned DistR, DistSR; 00778 00779 // Avoid special case, where the start value is an imm(0). 00780 if (Start->isImm() && StartV == 0) { 00781 DistR = End->getReg(); 00782 DistSR = End->getSubReg(); 00783 } else { 00784 const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::SUB_rr) : 00785 (RegToImm ? TII->get(Hexagon::SUB_ri) : 00786 TII->get(Hexagon::ADD_ri)); 00787 unsigned SubR = MRI->createVirtualRegister(IntRC); 00788 MachineInstrBuilder SubIB = 00789 BuildMI(*PH, InsertPos, DL, SubD, SubR); 00790 00791 if (RegToReg) { 00792 SubIB.addReg(End->getReg(), 0, End->getSubReg()) 00793 .addReg(Start->getReg(), 0, Start->getSubReg()); 00794 } else if (RegToImm) { 00795 SubIB.addImm(EndV) 00796 .addReg(Start->getReg(), 0, Start->getSubReg()); 00797 } else { // ImmToReg 00798 SubIB.addReg(End->getReg(), 0, End->getSubReg()) 00799 .addImm(-StartV); 00800 } 00801 DistR = SubR; 00802 DistSR = 0; 00803 } 00804 00805 // From DistR, compute AdjR (register with the adjusted distance). 00806 unsigned AdjR, AdjSR; 00807 00808 if (AdjV == 0) { 00809 AdjR = DistR; 00810 AdjSR = DistSR; 00811 } else { 00812 // Generate CountR = ADD DistR, AdjVal 00813 unsigned AddR = MRI->createVirtualRegister(IntRC); 00814 const MCInstrDesc &AddD = TII->get(Hexagon::ADD_ri); 00815 BuildMI(*PH, InsertPos, DL, AddD, AddR) 00816 .addReg(DistR, 0, DistSR) 00817 .addImm(AdjV); 00818 00819 AdjR = AddR; 00820 AdjSR = 0; 00821 } 00822 00823 // From AdjR, compute CountR (register with the final count). 00824 unsigned CountR, CountSR; 00825 00826 if (IVBump == 1) { 00827 CountR = AdjR; 00828 CountSR = AdjSR; 00829 } else { 00830 // The IV bump is a power of two. Log_2(IV bump) is the shift amount. 00831 unsigned Shift = Log2_32(IVBump); 00832 00833 // Generate NormR = LSR DistR, Shift. 00834 unsigned LsrR = MRI->createVirtualRegister(IntRC); 00835 const MCInstrDesc &LsrD = TII->get(Hexagon::LSR_ri); 00836 BuildMI(*PH, InsertPos, DL, LsrD, LsrR) 00837 .addReg(AdjR, 0, AdjSR) 00838 .addImm(Shift); 00839 00840 CountR = LsrR; 00841 CountSR = 0; 00842 } 00843 00844 return new CountValue(CountValue::CV_Register, CountR, CountSR); 00845 } 00846 00847 00848 /// \brief Return true if the operation is invalid within hardware loop. 00849 bool HexagonHardwareLoops::isInvalidLoopOperation( 00850 const MachineInstr *MI) const { 00851 00852 // call is not allowed because the callee may use a hardware loop 00853 if (MI->getDesc().isCall()) 00854 return true; 00855 00856 // do not allow nested hardware loops 00857 if (isHardwareLoop(MI)) 00858 return true; 00859 00860 // check if the instruction defines a hardware loop register 00861 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00862 const MachineOperand &MO = MI->getOperand(i); 00863 if (!MO.isReg() || !MO.isDef()) 00864 continue; 00865 unsigned R = MO.getReg(); 00866 if (R == Hexagon::LC0 || R == Hexagon::LC1 || 00867 R == Hexagon::SA0 || R == Hexagon::SA1) 00868 return true; 00869 } 00870 return false; 00871 } 00872 00873 00874 /// \brief - Return true if the loop contains an instruction that inhibits 00875 /// the use of the hardware loop function. 00876 bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const { 00877 const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks(); 00878 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 00879 MachineBasicBlock *MBB = Blocks[i]; 00880 for (MachineBasicBlock::iterator 00881 MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { 00882 const MachineInstr *MI = &*MII; 00883 if (isInvalidLoopOperation(MI)) 00884 return true; 00885 } 00886 } 00887 return false; 00888 } 00889 00890 00891 /// \brief Returns true if the instruction is dead. This was essentially 00892 /// copied from DeadMachineInstructionElim::isDead, but with special cases 00893 /// for inline asm, physical registers and instructions with side effects 00894 /// removed. 00895 bool HexagonHardwareLoops::isDead(const MachineInstr *MI, 00896 SmallVectorImpl<MachineInstr *> &DeadPhis) const { 00897 // Examine each operand. 00898 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00899 const MachineOperand &MO = MI->getOperand(i); 00900 if (!MO.isReg() || !MO.isDef()) 00901 continue; 00902 00903 unsigned Reg = MO.getReg(); 00904 if (MRI->use_nodbg_empty(Reg)) 00905 continue; 00906 00907 typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator; 00908 00909 // This instruction has users, but if the only user is the phi node for the 00910 // parent block, and the only use of that phi node is this instruction, then 00911 // this instruction is dead: both it (and the phi node) can be removed. 00912 use_nodbg_iterator I = MRI->use_nodbg_begin(Reg); 00913 use_nodbg_iterator End = MRI->use_nodbg_end(); 00914 if (std::next(I) != End || !I->getParent()->isPHI()) 00915 return false; 00916 00917 MachineInstr *OnePhi = I->getParent(); 00918 for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) { 00919 const MachineOperand &OPO = OnePhi->getOperand(j); 00920 if (!OPO.isReg() || !OPO.isDef()) 00921 continue; 00922 00923 unsigned OPReg = OPO.getReg(); 00924 use_nodbg_iterator nextJ; 00925 for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg); 00926 J != End; J = nextJ) { 00927 nextJ = std::next(J); 00928 MachineOperand &Use = *J; 00929 MachineInstr *UseMI = Use.getParent(); 00930 00931 // If the phi node has a user that is not MI, bail... 00932 if (MI != UseMI) 00933 return false; 00934 } 00935 } 00936 DeadPhis.push_back(OnePhi); 00937 } 00938 00939 // If there are no defs with uses, the instruction is dead. 00940 return true; 00941 } 00942 00943 void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { 00944 // This procedure was essentially copied from DeadMachineInstructionElim. 00945 00946 SmallVector<MachineInstr*, 1> DeadPhis; 00947 if (isDead(MI, DeadPhis)) { 00948 DEBUG(dbgs() << "HW looping will remove: " << *MI); 00949 00950 // It is possible that some DBG_VALUE instructions refer to this 00951 // instruction. Examine each def operand for such references; 00952 // if found, mark the DBG_VALUE as undef (but don't delete it). 00953 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00954 const MachineOperand &MO = MI->getOperand(i); 00955 if (!MO.isReg() || !MO.isDef()) 00956 continue; 00957 unsigned Reg = MO.getReg(); 00958 MachineRegisterInfo::use_iterator nextI; 00959 for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), 00960 E = MRI->use_end(); I != E; I = nextI) { 00961 nextI = std::next(I); // I is invalidated by the setReg 00962 MachineOperand &Use = *I; 00963 MachineInstr *UseMI = I->getParent(); 00964 if (UseMI == MI) 00965 continue; 00966 if (Use.isDebug()) 00967 UseMI->getOperand(0).setReg(0U); 00968 // This may also be a "instr -> phi -> instr" case which can 00969 // be removed too. 00970 } 00971 } 00972 00973 MI->eraseFromParent(); 00974 for (unsigned i = 0; i < DeadPhis.size(); ++i) 00975 DeadPhis[i]->eraseFromParent(); 00976 } 00977 } 00978 00979 /// \brief Check if the loop is a candidate for converting to a hardware 00980 /// loop. If so, then perform the transformation. 00981 /// 00982 /// This function works on innermost loops first. A loop can be converted 00983 /// if it is a counting loop; either a register value or an immediate. 00984 /// 00985 /// The code makes several assumptions about the representation of the loop 00986 /// in llvm. 00987 bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { 00988 // This is just for sanity. 00989 assert(L->getHeader() && "Loop without a header?"); 00990 00991 bool Changed = false; 00992 // Process nested loops first. 00993 for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) 00994 Changed |= convertToHardwareLoop(*I); 00995 00996 // If a nested loop has been converted, then we can't convert this loop. 00997 if (Changed) 00998 return Changed; 00999 01000 #ifndef NDEBUG 01001 // Stop trying after reaching the limit (if any). 01002 int Limit = HWLoopLimit; 01003 if (Limit >= 0) { 01004 if (Counter >= HWLoopLimit) 01005 return false; 01006 Counter++; 01007 } 01008 #endif 01009 01010 // Does the loop contain any invalid instructions? 01011 if (containsInvalidInstruction(L)) 01012 return false; 01013 01014 // Is the induction variable bump feeding the latch condition? 01015 if (!fixupInductionVariable(L)) 01016 return false; 01017 01018 MachineBasicBlock *LastMBB = L->getExitingBlock(); 01019 // Don't generate hw loop if the loop has more than one exit. 01020 if (!LastMBB) 01021 return false; 01022 01023 MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); 01024 if (LastI == LastMBB->end()) 01025 return false; 01026 01027 // Ensure the loop has a preheader: the loop instruction will be 01028 // placed there. 01029 bool NewPreheader = false; 01030 MachineBasicBlock *Preheader = L->getLoopPreheader(); 01031 if (!Preheader) { 01032 Preheader = createPreheaderForLoop(L); 01033 if (!Preheader) 01034 return false; 01035 NewPreheader = true; 01036 } 01037 MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); 01038 01039 SmallVector<MachineInstr*, 2> OldInsts; 01040 // Are we able to determine the trip count for the loop? 01041 CountValue *TripCount = getLoopTripCount(L, OldInsts); 01042 if (!TripCount) 01043 return false; 01044 01045 // Is the trip count available in the preheader? 01046 if (TripCount->isReg()) { 01047 // There will be a use of the register inserted into the preheader, 01048 // so make sure that the register is actually defined at that point. 01049 MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg()); 01050 MachineBasicBlock *BBDef = TCDef->getParent(); 01051 if (!NewPreheader) { 01052 if (!MDT->dominates(BBDef, Preheader)) 01053 return false; 01054 } else { 01055 // If we have just created a preheader, the dominator tree won't be 01056 // aware of it. Check if the definition of the register dominates 01057 // the header, but is not the header itself. 01058 if (!MDT->properlyDominates(BBDef, L->getHeader())) 01059 return false; 01060 } 01061 } 01062 01063 // Determine the loop start. 01064 MachineBasicBlock *LoopStart = L->getTopBlock(); 01065 if (L->getLoopLatch() != LastMBB) { 01066 // When the exit and latch are not the same, use the latch block as the 01067 // start. 01068 // The loop start address is used only after the 1st iteration, and the 01069 // loop latch may contains instrs. that need to be executed after the 01070 // first iteration. 01071 LoopStart = L->getLoopLatch(); 01072 // Make sure the latch is a successor of the exit, otherwise it won't work. 01073 if (!LastMBB->isSuccessor(LoopStart)) 01074 return false; 01075 } 01076 01077 // Convert the loop to a hardware loop. 01078 DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); 01079 DebugLoc DL; 01080 if (InsertPos != Preheader->end()) 01081 DL = InsertPos->getDebugLoc(); 01082 01083 if (TripCount->isReg()) { 01084 // Create a copy of the loop count register. 01085 unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); 01086 BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg) 01087 .addReg(TripCount->getReg(), 0, TripCount->getSubReg()); 01088 // Add the Loop instruction to the beginning of the loop. 01089 BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r)) 01090 .addMBB(LoopStart) 01091 .addReg(CountReg); 01092 } else { 01093 assert(TripCount->isImm() && "Expecting immediate value for trip count"); 01094 // Add the Loop immediate instruction to the beginning of the loop, 01095 // if the immediate fits in the instructions. Otherwise, we need to 01096 // create a new virtual register. 01097 int64_t CountImm = TripCount->getImm(); 01098 if (!TII->isValidOffset(Hexagon::LOOP0_i, CountImm)) { 01099 unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); 01100 BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::TFRI), CountReg) 01101 .addImm(CountImm); 01102 BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r)) 01103 .addMBB(LoopStart).addReg(CountReg); 01104 } else 01105 BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_i)) 01106 .addMBB(LoopStart).addImm(CountImm); 01107 } 01108 01109 // Make sure the loop start always has a reference in the CFG. We need 01110 // to create a BlockAddress operand to get this mechanism to work both the 01111 // MachineBasicBlock and BasicBlock objects need the flag set. 01112 LoopStart->setHasAddressTaken(); 01113 // This line is needed to set the hasAddressTaken flag on the BasicBlock 01114 // object. 01115 BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock())); 01116 01117 // Replace the loop branch with an endloop instruction. 01118 DebugLoc LastIDL = LastI->getDebugLoc(); 01119 BuildMI(*LastMBB, LastI, LastIDL, 01120 TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart); 01121 01122 // The loop ends with either: 01123 // - a conditional branch followed by an unconditional branch, or 01124 // - a conditional branch to the loop start. 01125 if (LastI->getOpcode() == Hexagon::JMP_t || 01126 LastI->getOpcode() == Hexagon::JMP_f) { 01127 // Delete one and change/add an uncond. branch to out of the loop. 01128 MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); 01129 LastI = LastMBB->erase(LastI); 01130 if (!L->contains(BranchTarget)) { 01131 if (LastI != LastMBB->end()) 01132 LastI = LastMBB->erase(LastI); 01133 SmallVector<MachineOperand, 0> Cond; 01134 TII->InsertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL); 01135 } 01136 } else { 01137 // Conditional branch to loop start; just delete it. 01138 LastMBB->erase(LastI); 01139 } 01140 delete TripCount; 01141 01142 // The induction operation and the comparison may now be 01143 // unneeded. If these are unneeded, then remove them. 01144 for (unsigned i = 0; i < OldInsts.size(); ++i) 01145 removeIfDead(OldInsts[i]); 01146 01147 ++NumHWLoops; 01148 return true; 01149 } 01150 01151 01152 bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, 01153 MachineInstr *CmpI) { 01154 assert (BumpI != CmpI && "Bump and compare in the same instruction?"); 01155 01156 MachineBasicBlock *BB = BumpI->getParent(); 01157 if (CmpI->getParent() != BB) 01158 return false; 01159 01160 typedef MachineBasicBlock::instr_iterator instr_iterator; 01161 // Check if things are in order to begin with. 01162 for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I) 01163 if (&*I == CmpI) 01164 return true; 01165 01166 // Out of order. 01167 unsigned PredR = CmpI->getOperand(0).getReg(); 01168 bool FoundBump = false; 01169 instr_iterator CmpIt = CmpI, NextIt = std::next(CmpIt); 01170 for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) { 01171 MachineInstr *In = &*I; 01172 for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) { 01173 MachineOperand &MO = In->getOperand(i); 01174 if (MO.isReg() && MO.isUse()) { 01175 if (MO.getReg() == PredR) // Found an intervening use of PredR. 01176 return false; 01177 } 01178 } 01179 01180 if (In == BumpI) { 01181 instr_iterator After = BumpI; 01182 instr_iterator From = CmpI; 01183 BB->splice(std::next(After), BB, From); 01184 FoundBump = true; 01185 break; 01186 } 01187 } 01188 assert (FoundBump && "Cannot determine instruction order"); 01189 return FoundBump; 01190 } 01191 01192 01193 MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) { 01194 MachineInstr *DI = MRI->getVRegDef(R); 01195 unsigned DOpc = DI->getOpcode(); 01196 switch (DOpc) { 01197 case Hexagon::TFRI: 01198 case Hexagon::TFRI64: 01199 case Hexagon::CONST32_Int_Real: 01200 case Hexagon::CONST64_Int_Real: 01201 return DI; 01202 } 01203 return nullptr; 01204 } 01205 01206 01207 int64_t HexagonHardwareLoops::getImmediate(MachineOperand &MO) { 01208 if (MO.isImm()) 01209 return MO.getImm(); 01210 assert(MO.isReg()); 01211 unsigned R = MO.getReg(); 01212 MachineInstr *DI = defWithImmediate(R); 01213 assert(DI && "Need an immediate operand"); 01214 // All currently supported "define-with-immediate" instructions have the 01215 // actual immediate value in the operand(1). 01216 int64_t v = DI->getOperand(1).getImm(); 01217 return v; 01218 } 01219 01220 01221 void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { 01222 if (MO.isImm()) { 01223 MO.setImm(Val); 01224 return; 01225 } 01226 01227 assert(MO.isReg()); 01228 unsigned R = MO.getReg(); 01229 MachineInstr *DI = defWithImmediate(R); 01230 if (MRI->hasOneNonDBGUse(R)) { 01231 // If R has only one use, then just change its defining instruction to 01232 // the new immediate value. 01233 DI->getOperand(1).setImm(Val); 01234 return; 01235 } 01236 01237 const TargetRegisterClass *RC = MRI->getRegClass(R); 01238 unsigned NewR = MRI->createVirtualRegister(RC); 01239 MachineBasicBlock &B = *DI->getParent(); 01240 DebugLoc DL = DI->getDebugLoc(); 01241 BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR) 01242 .addImm(Val); 01243 MO.setReg(NewR); 01244 } 01245 01246 01247 bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { 01248 MachineBasicBlock *Header = L->getHeader(); 01249 MachineBasicBlock *Preheader = L->getLoopPreheader(); 01250 MachineBasicBlock *Latch = L->getLoopLatch(); 01251 01252 if (!Header || !Preheader || !Latch) 01253 return false; 01254 01255 // These data structures follow the same concept as the corresponding 01256 // ones in findInductionRegister (where some comments are). 01257 typedef std::pair<unsigned,int64_t> RegisterBump; 01258 typedef std::pair<unsigned,RegisterBump> RegisterInduction; 01259 typedef std::set<RegisterInduction> RegisterInductionSet; 01260 01261 // Register candidates for induction variables, with their associated bumps. 01262 RegisterInductionSet IndRegs; 01263 01264 // Look for induction patterns: 01265 // vreg1 = PHI ..., [ latch, vreg2 ] 01266 // vreg2 = ADD vreg1, imm 01267 typedef MachineBasicBlock::instr_iterator instr_iterator; 01268 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); 01269 I != E && I->isPHI(); ++I) { 01270 MachineInstr *Phi = &*I; 01271 01272 // Have a PHI instruction. 01273 for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { 01274 if (Phi->getOperand(i+1).getMBB() != Latch) 01275 continue; 01276 01277 unsigned PhiReg = Phi->getOperand(i).getReg(); 01278 MachineInstr *DI = MRI->getVRegDef(PhiReg); 01279 unsigned UpdOpc = DI->getOpcode(); 01280 bool isAdd = (UpdOpc == Hexagon::ADD_ri); 01281 01282 if (isAdd) { 01283 // If the register operand to the add/sub is the PHI we are looking 01284 // at, this meets the induction pattern. 01285 unsigned IndReg = DI->getOperand(1).getReg(); 01286 if (MRI->getVRegDef(IndReg) == Phi) { 01287 unsigned UpdReg = DI->getOperand(0).getReg(); 01288 int64_t V = DI->getOperand(2).getImm(); 01289 IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); 01290 } 01291 } 01292 } // for (i) 01293 } // for (instr) 01294 01295 if (IndRegs.empty()) 01296 return false; 01297 01298 MachineBasicBlock *TB = nullptr, *FB = nullptr; 01299 SmallVector<MachineOperand,2> Cond; 01300 // AnalyzeBranch returns true if it fails to analyze branch. 01301 bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); 01302 if (NotAnalyzed) 01303 return false; 01304 01305 // Check if the latch branch is unconditional. 01306 if (Cond.empty()) 01307 return false; 01308 01309 if (TB != Header && FB != Header) 01310 // The latch does not go back to the header. Not a latch we know and love. 01311 return false; 01312 01313 // Expecting a predicate register as a condition. It won't be a hardware 01314 // predicate register at this point yet, just a vreg. 01315 // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0) 01316 // into Cond, followed by the predicate register. For non-negated branches 01317 // it's just the register. 01318 unsigned CSz = Cond.size(); 01319 if (CSz != 1 && CSz != 2) 01320 return false; 01321 01322 unsigned P = Cond[CSz-1].getReg(); 01323 MachineInstr *PredDef = MRI->getVRegDef(P); 01324 01325 if (!PredDef->isCompare()) 01326 return false; 01327 01328 SmallSet<unsigned,2> CmpRegs; 01329 MachineOperand *CmpImmOp = nullptr; 01330 01331 // Go over all operands to the compare and look for immediate and register 01332 // operands. Assume that if the compare has a single register use and a 01333 // single immediate operand, then the register is being compared with the 01334 // immediate value. 01335 for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { 01336 MachineOperand &MO = PredDef->getOperand(i); 01337 if (MO.isReg()) { 01338 // Skip all implicit references. In one case there was: 01339 // %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use> 01340 if (MO.isImplicit()) 01341 continue; 01342 if (MO.isUse()) { 01343 unsigned R = MO.getReg(); 01344 if (!defWithImmediate(R)) { 01345 CmpRegs.insert(MO.getReg()); 01346 continue; 01347 } 01348 // Consider the register to be the "immediate" operand. 01349 if (CmpImmOp) 01350 return false; 01351 CmpImmOp = &MO; 01352 } 01353 } else if (MO.isImm()) { 01354 if (CmpImmOp) // A second immediate argument? Confusing. Bail out. 01355 return false; 01356 CmpImmOp = &MO; 01357 } 01358 } 01359 01360 if (CmpRegs.empty()) 01361 return false; 01362 01363 // Check if the compared register follows the order we want. Fix if needed. 01364 for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end(); 01365 I != E; ++I) { 01366 // This is a success. If the register used in the comparison is one that 01367 // we have identified as a bumped (updated) induction register, there is 01368 // nothing to do. 01369 if (CmpRegs.count(I->first)) 01370 return true; 01371 01372 // Otherwise, if the register being compared comes out of a PHI node, 01373 // and has been recognized as following the induction pattern, and is 01374 // compared against an immediate, we can fix it. 01375 const RegisterBump &RB = I->second; 01376 if (CmpRegs.count(RB.first)) { 01377 if (!CmpImmOp) 01378 return false; 01379 01380 int64_t CmpImm = getImmediate(*CmpImmOp); 01381 int64_t V = RB.second; 01382 if (V > 0 && CmpImm+V < CmpImm) // Overflow (64-bit). 01383 return false; 01384 if (V < 0 && CmpImm+V > CmpImm) // Overflow (64-bit). 01385 return false; 01386 CmpImm += V; 01387 // Some forms of cmp-immediate allow u9 and s10. Assume the worst case 01388 // scenario, i.e. an 8-bit value. 01389 if (CmpImmOp->isImm() && !isInt<8>(CmpImm)) 01390 return false; 01391 01392 // Make sure that the compare happens after the bump. Otherwise, 01393 // after the fixup, the compare would use a yet-undefined register. 01394 MachineInstr *BumpI = MRI->getVRegDef(I->first); 01395 bool Order = orderBumpCompare(BumpI, PredDef); 01396 if (!Order) 01397 return false; 01398 01399 // Finally, fix the compare instruction. 01400 setImmediate(*CmpImmOp, CmpImm); 01401 for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { 01402 MachineOperand &MO = PredDef->getOperand(i); 01403 if (MO.isReg() && MO.getReg() == RB.first) { 01404 MO.setReg(I->first); 01405 return true; 01406 } 01407 } 01408 } 01409 } 01410 01411 return false; 01412 } 01413 01414 01415 /// \brief Create a preheader for a given loop. 01416 MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( 01417 MachineLoop *L) { 01418 if (MachineBasicBlock *TmpPH = L->getLoopPreheader()) 01419 return TmpPH; 01420 01421 MachineBasicBlock *Header = L->getHeader(); 01422 MachineBasicBlock *Latch = L->getLoopLatch(); 01423 MachineFunction *MF = Header->getParent(); 01424 DebugLoc DL; 01425 01426 if (!Latch || Header->hasAddressTaken()) 01427 return nullptr; 01428 01429 typedef MachineBasicBlock::instr_iterator instr_iterator; 01430 01431 // Verify that all existing predecessors have analyzable branches 01432 // (or no branches at all). 01433 typedef std::vector<MachineBasicBlock*> MBBVector; 01434 MBBVector Preds(Header->pred_begin(), Header->pred_end()); 01435 SmallVector<MachineOperand,2> Tmp1; 01436 MachineBasicBlock *TB = nullptr, *FB = nullptr; 01437 01438 if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false)) 01439 return nullptr; 01440 01441 for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { 01442 MachineBasicBlock *PB = *I; 01443 if (PB != Latch) { 01444 bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false); 01445 if (NotAnalyzed) 01446 return nullptr; 01447 } 01448 } 01449 01450 MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock(); 01451 MF->insert(Header, NewPH); 01452 01453 if (Header->pred_size() > 2) { 01454 // Ensure that the header has only two predecessors: the preheader and 01455 // the loop latch. Any additional predecessors of the header should 01456 // join at the newly created preheader. Inspect all PHI nodes from the 01457 // header and create appropriate corresponding PHI nodes in the preheader. 01458 01459 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); 01460 I != E && I->isPHI(); ++I) { 01461 MachineInstr *PN = &*I; 01462 01463 const MCInstrDesc &PD = TII->get(TargetOpcode::PHI); 01464 MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL); 01465 NewPH->insert(NewPH->end(), NewPN); 01466 01467 unsigned PR = PN->getOperand(0).getReg(); 01468 const TargetRegisterClass *RC = MRI->getRegClass(PR); 01469 unsigned NewPR = MRI->createVirtualRegister(RC); 01470 NewPN->addOperand(MachineOperand::CreateReg(NewPR, true)); 01471 01472 // Copy all non-latch operands of a header's PHI node to the newly 01473 // created PHI node in the preheader. 01474 for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { 01475 unsigned PredR = PN->getOperand(i).getReg(); 01476 MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); 01477 if (PredB == Latch) 01478 continue; 01479 01480 NewPN->addOperand(MachineOperand::CreateReg(PredR, false)); 01481 NewPN->addOperand(MachineOperand::CreateMBB(PredB)); 01482 } 01483 01484 // Remove copied operands from the old PHI node and add the value 01485 // coming from the preheader's PHI. 01486 for (int i = PN->getNumOperands()-2; i > 0; i -= 2) { 01487 MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); 01488 if (PredB != Latch) { 01489 PN->RemoveOperand(i+1); 01490 PN->RemoveOperand(i); 01491 } 01492 } 01493 PN->addOperand(MachineOperand::CreateReg(NewPR, false)); 01494 PN->addOperand(MachineOperand::CreateMBB(NewPH)); 01495 } 01496 01497 } else { 01498 assert(Header->pred_size() == 2); 01499 01500 // The header has only two predecessors, but the non-latch predecessor 01501 // is not a preheader (e.g. it has other successors, etc.) 01502 // In such a case we don't need any extra PHI nodes in the new preheader, 01503 // all we need is to adjust existing PHIs in the header to now refer to 01504 // the new preheader. 01505 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); 01506 I != E && I->isPHI(); ++I) { 01507 MachineInstr *PN = &*I; 01508 for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { 01509 MachineOperand &MO = PN->getOperand(i+1); 01510 if (MO.getMBB() != Latch) 01511 MO.setMBB(NewPH); 01512 } 01513 } 01514 } 01515 01516 // "Reroute" the CFG edges to link in the new preheader. 01517 // If any of the predecessors falls through to the header, insert a branch 01518 // to the new preheader in that place. 01519 SmallVector<MachineOperand,1> Tmp2; 01520 SmallVector<MachineOperand,1> EmptyCond; 01521 01522 TB = FB = nullptr; 01523 01524 for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { 01525 MachineBasicBlock *PB = *I; 01526 if (PB != Latch) { 01527 Tmp2.clear(); 01528 bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false); 01529 (void)NotAnalyzed; // suppress compiler warning 01530 assert (!NotAnalyzed && "Should be analyzable!"); 01531 if (TB != Header && (Tmp2.empty() || FB != Header)) 01532 TII->InsertBranch(*PB, NewPH, nullptr, EmptyCond, DL); 01533 PB->ReplaceUsesOfBlockWith(Header, NewPH); 01534 } 01535 } 01536 01537 // It can happen that the latch block will fall through into the header. 01538 // Insert an unconditional branch to the header. 01539 TB = FB = nullptr; 01540 bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false); 01541 (void)LatchNotAnalyzed; // suppress compiler warning 01542 assert (!LatchNotAnalyzed && "Should be analyzable!"); 01543 if (!TB && !FB) 01544 TII->InsertBranch(*Latch, Header, nullptr, EmptyCond, DL); 01545 01546 // Finally, the branch from the preheader to the header. 01547 TII->InsertBranch(*NewPH, Header, nullptr, EmptyCond, DL); 01548 NewPH->addSuccessor(Header); 01549 01550 return NewPH; 01551 }