LLVM API Documentation
00001 //===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // Perform peephole optimizations on the machine code: 00011 // 00012 // - Optimize Extensions 00013 // 00014 // Optimization of sign / zero extension instructions. It may be extended to 00015 // handle other instructions with similar properties. 00016 // 00017 // On some targets, some instructions, e.g. X86 sign / zero extension, may 00018 // leave the source value in the lower part of the result. This optimization 00019 // will replace some uses of the pre-extension value with uses of the 00020 // sub-register of the results. 00021 // 00022 // - Optimize Comparisons 00023 // 00024 // Optimization of comparison instructions. For instance, in this code: 00025 // 00026 // sub r1, 1 00027 // cmp r1, 0 00028 // bz L1 00029 // 00030 // If the "sub" instruction all ready sets (or could be modified to set) the 00031 // same flag that the "cmp" instruction sets and that "bz" uses, then we can 00032 // eliminate the "cmp" instruction. 00033 // 00034 // Another instance, in this code: 00035 // 00036 // sub r1, r3 | sub r1, imm 00037 // cmp r3, r1 or cmp r1, r3 | cmp r1, imm 00038 // bge L1 00039 // 00040 // If the branch instruction can use flag from "sub", then we can replace 00041 // "sub" with "subs" and eliminate the "cmp" instruction. 00042 // 00043 // - Optimize Loads: 00044 // 00045 // Loads that can be folded into a later instruction. A load is foldable 00046 // if it loads to virtual registers and the virtual register defined has 00047 // a single use. 00048 // 00049 // - Optimize Copies and Bitcast (more generally, target specific copies): 00050 // 00051 // Rewrite copies and bitcasts to avoid cross register bank copies 00052 // when possible. 00053 // E.g., Consider the following example, where capital and lower 00054 // letters denote different register file: 00055 // b = copy A <-- cross-bank copy 00056 // C = copy b <-- cross-bank copy 00057 // => 00058 // b = copy A <-- cross-bank copy 00059 // C = copy A <-- same-bank copy 00060 // 00061 // E.g., for bitcast: 00062 // b = bitcast A <-- cross-bank copy 00063 // C = bitcast b <-- cross-bank copy 00064 // => 00065 // b = bitcast A <-- cross-bank copy 00066 // C = copy A <-- same-bank copy 00067 //===----------------------------------------------------------------------===// 00068 00069 #include "llvm/CodeGen/Passes.h" 00070 #include "llvm/ADT/DenseMap.h" 00071 #include "llvm/ADT/SmallPtrSet.h" 00072 #include "llvm/ADT/SmallSet.h" 00073 #include "llvm/ADT/Statistic.h" 00074 #include "llvm/CodeGen/MachineDominators.h" 00075 #include "llvm/CodeGen/MachineInstrBuilder.h" 00076 #include "llvm/CodeGen/MachineRegisterInfo.h" 00077 #include "llvm/Support/CommandLine.h" 00078 #include "llvm/Support/Debug.h" 00079 #include "llvm/Target/TargetInstrInfo.h" 00080 #include "llvm/Target/TargetRegisterInfo.h" 00081 #include "llvm/Target/TargetSubtargetInfo.h" 00082 #include <utility> 00083 using namespace llvm; 00084 00085 #define DEBUG_TYPE "peephole-opt" 00086 00087 // Optimize Extensions 00088 static cl::opt<bool> 00089 Aggressive("aggressive-ext-opt", cl::Hidden, 00090 cl::desc("Aggressive extension optimization")); 00091 00092 static cl::opt<bool> 00093 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), 00094 cl::desc("Disable the peephole optimizer")); 00095 00096 static cl::opt<bool> 00097 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), 00098 cl::desc("Disable advanced copy optimization")); 00099 00100 STATISTIC(NumReuse, "Number of extension results reused"); 00101 STATISTIC(NumCmps, "Number of compares eliminated"); 00102 STATISTIC(NumImmFold, "Number of move immediate folded"); 00103 STATISTIC(NumLoadFold, "Number of loads folded"); 00104 STATISTIC(NumSelects, "Number of selects optimized"); 00105 STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); 00106 STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); 00107 00108 namespace { 00109 class PeepholeOptimizer : public MachineFunctionPass { 00110 const TargetMachine *TM; 00111 const TargetInstrInfo *TII; 00112 MachineRegisterInfo *MRI; 00113 MachineDominatorTree *DT; // Machine dominator tree 00114 00115 public: 00116 static char ID; // Pass identification 00117 PeepholeOptimizer() : MachineFunctionPass(ID) { 00118 initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); 00119 } 00120 00121 bool runOnMachineFunction(MachineFunction &MF) override; 00122 00123 void getAnalysisUsage(AnalysisUsage &AU) const override { 00124 AU.setPreservesCFG(); 00125 MachineFunctionPass::getAnalysisUsage(AU); 00126 if (Aggressive) { 00127 AU.addRequired<MachineDominatorTree>(); 00128 AU.addPreserved<MachineDominatorTree>(); 00129 } 00130 } 00131 00132 private: 00133 bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); 00134 bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 00135 SmallPtrSetImpl<MachineInstr*> &LocalMIs); 00136 bool optimizeSelect(MachineInstr *MI); 00137 bool optimizeCopyOrBitcast(MachineInstr *MI); 00138 bool optimizeCoalescableCopy(MachineInstr *MI); 00139 bool optimizeUncoalescableCopy(MachineInstr *MI, 00140 SmallPtrSetImpl<MachineInstr *> &LocalMIs); 00141 bool findNextSource(unsigned &Reg, unsigned &SubReg); 00142 bool isMoveImmediate(MachineInstr *MI, 00143 SmallSet<unsigned, 4> &ImmDefRegs, 00144 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 00145 bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 00146 SmallSet<unsigned, 4> &ImmDefRegs, 00147 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 00148 bool isLoadFoldable(MachineInstr *MI, 00149 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); 00150 00151 /// \brief Check whether \p MI is understood by the register coalescer 00152 /// but may require some rewriting. 00153 bool isCoalescableCopy(const MachineInstr &MI) { 00154 // SubregToRegs are not interesting, because they are already register 00155 // coalescer friendly. 00156 return MI.isCopy() || (!DisableAdvCopyOpt && 00157 (MI.isRegSequence() || MI.isInsertSubreg() || 00158 MI.isExtractSubreg())); 00159 } 00160 00161 /// \brief Check whether \p MI is a copy like instruction that is 00162 /// not recognized by the register coalescer. 00163 bool isUncoalescableCopy(const MachineInstr &MI) { 00164 return MI.isBitcast() || 00165 (!DisableAdvCopyOpt && 00166 (MI.isRegSequenceLike() || MI.isInsertSubregLike() || 00167 MI.isExtractSubregLike())); 00168 } 00169 }; 00170 00171 /// \brief Helper class to track the possible sources of a value defined by 00172 /// a (chain of) copy related instructions. 00173 /// Given a definition (instruction and definition index), this class 00174 /// follows the use-def chain to find successive suitable sources. 00175 /// The given source can be used to rewrite the definition into 00176 /// def = COPY src. 00177 /// 00178 /// For instance, let us consider the following snippet: 00179 /// v0 = 00180 /// v2 = INSERT_SUBREG v1, v0, sub0 00181 /// def = COPY v2.sub0 00182 /// 00183 /// Using a ValueTracker for def = COPY v2.sub0 will give the following 00184 /// suitable sources: 00185 /// v2.sub0 and v0. 00186 /// Then, def can be rewritten into def = COPY v0. 00187 class ValueTracker { 00188 private: 00189 /// The current point into the use-def chain. 00190 const MachineInstr *Def; 00191 /// The index of the definition in Def. 00192 unsigned DefIdx; 00193 /// The sub register index of the definition. 00194 unsigned DefSubReg; 00195 /// The register where the value can be found. 00196 unsigned Reg; 00197 /// Specifiy whether or not the value tracking looks through 00198 /// complex instructions. When this is false, the value tracker 00199 /// bails on everything that is not a copy or a bitcast. 00200 /// 00201 /// Note: This could have been implemented as a specialized version of 00202 /// the ValueTracker class but that would have complicated the code of 00203 /// the users of this class. 00204 bool UseAdvancedTracking; 00205 /// MachineRegisterInfo used to perform tracking. 00206 const MachineRegisterInfo &MRI; 00207 /// Optional TargetInstrInfo used to perform some complex 00208 /// tracking. 00209 const TargetInstrInfo *TII; 00210 00211 /// \brief Dispatcher to the right underlying implementation of 00212 /// getNextSource. 00213 bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg); 00214 /// \brief Specialized version of getNextSource for Copy instructions. 00215 bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg); 00216 /// \brief Specialized version of getNextSource for Bitcast instructions. 00217 bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg); 00218 /// \brief Specialized version of getNextSource for RegSequence 00219 /// instructions. 00220 bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg); 00221 /// \brief Specialized version of getNextSource for InsertSubreg 00222 /// instructions. 00223 bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg); 00224 /// \brief Specialized version of getNextSource for ExtractSubreg 00225 /// instructions. 00226 bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg); 00227 /// \brief Specialized version of getNextSource for SubregToReg 00228 /// instructions. 00229 bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg); 00230 00231 public: 00232 /// \brief Create a ValueTracker instance for the value defined by \p Reg. 00233 /// \p DefSubReg represents the sub register index the value tracker will 00234 /// track. It does not need to match the sub register index used in the 00235 /// definition of \p Reg. 00236 /// \p UseAdvancedTracking specifies whether or not the value tracker looks 00237 /// through complex instructions. By default (false), it handles only copy 00238 /// and bitcast instructions. 00239 /// If \p Reg is a physical register, a value tracker constructed with 00240 /// this constructor will not find any alternative source. 00241 /// Indeed, when \p Reg is a physical register that constructor does not 00242 /// know which definition of \p Reg it should track. 00243 /// Use the next constructor to track a physical register. 00244 ValueTracker(unsigned Reg, unsigned DefSubReg, 00245 const MachineRegisterInfo &MRI, 00246 bool UseAdvancedTracking = false, 00247 const TargetInstrInfo *TII = nullptr) 00248 : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg), 00249 UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { 00250 if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { 00251 Def = MRI.getVRegDef(Reg); 00252 DefIdx = MRI.def_begin(Reg).getOperandNo(); 00253 } 00254 } 00255 00256 /// \brief Create a ValueTracker instance for the value defined by 00257 /// the pair \p MI, \p DefIdx. 00258 /// Unlike the other constructor, the value tracker produced by this one 00259 /// may be able to find a new source when the definition is a physical 00260 /// register. 00261 /// This could be useful to rewrite target specific instructions into 00262 /// generic copy instructions. 00263 ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg, 00264 const MachineRegisterInfo &MRI, 00265 bool UseAdvancedTracking = false, 00266 const TargetInstrInfo *TII = nullptr) 00267 : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg), 00268 UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { 00269 assert(DefIdx < Def->getDesc().getNumDefs() && 00270 Def->getOperand(DefIdx).isReg() && "Invalid definition"); 00271 Reg = Def->getOperand(DefIdx).getReg(); 00272 } 00273 00274 /// \brief Following the use-def chain, get the next available source 00275 /// for the tracked value. 00276 /// When the returned value is not nullptr, \p SrcReg gives the register 00277 /// that contain the tracked value. 00278 /// \note The sub register index returned in \p SrcSubReg must be used 00279 /// on \p SrcReg to access the actual value. 00280 /// \return Unless the returned value is nullptr (i.e., no source found), 00281 /// \p SrcReg gives the register of the next source used in the returned 00282 /// instruction and \p SrcSubReg the sub-register index to be used on that 00283 /// source to get the tracked value. When nullptr is returned, no 00284 /// alternative source has been found. 00285 const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg); 00286 00287 /// \brief Get the last register where the initial value can be found. 00288 /// Initially this is the register of the definition. 00289 /// Then, after each successful call to getNextSource, this is the 00290 /// register of the last source. 00291 unsigned getReg() const { return Reg; } 00292 }; 00293 } 00294 00295 char PeepholeOptimizer::ID = 0; 00296 char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; 00297 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts", 00298 "Peephole Optimizations", false, false) 00299 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 00300 INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", 00301 "Peephole Optimizations", false, false) 00302 00303 /// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads 00304 /// a single register and writes a single register and it does not modify the 00305 /// source, and if the source value is preserved as a sub-register of the 00306 /// result, then replace all reachable uses of the source with the subreg of the 00307 /// result. 00308 /// 00309 /// Do not generate an EXTRACT that is used only in a debug use, as this changes 00310 /// the code. Since this code does not currently share EXTRACTs, just ignore all 00311 /// debug uses. 00312 bool PeepholeOptimizer:: 00313 optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 00314 SmallPtrSetImpl<MachineInstr*> &LocalMIs) { 00315 unsigned SrcReg, DstReg, SubIdx; 00316 if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) 00317 return false; 00318 00319 if (TargetRegisterInfo::isPhysicalRegister(DstReg) || 00320 TargetRegisterInfo::isPhysicalRegister(SrcReg)) 00321 return false; 00322 00323 if (MRI->hasOneNonDBGUse(SrcReg)) 00324 // No other uses. 00325 return false; 00326 00327 // Ensure DstReg can get a register class that actually supports 00328 // sub-registers. Don't change the class until we commit. 00329 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); 00330 DstRC = TM->getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg( 00331 DstRC, SubIdx); 00332 if (!DstRC) 00333 return false; 00334 00335 // The ext instr may be operating on a sub-register of SrcReg as well. 00336 // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit 00337 // register. 00338 // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of 00339 // SrcReg:SubIdx should be replaced. 00340 bool UseSrcSubIdx = 00341 TM->getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg( 00342 MRI->getRegClass(SrcReg), SubIdx) != nullptr; 00343 00344 // The source has other uses. See if we can replace the other uses with use of 00345 // the result of the extension. 00346 SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; 00347 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) 00348 ReachedBBs.insert(UI.getParent()); 00349 00350 // Uses that are in the same BB of uses of the result of the instruction. 00351 SmallVector<MachineOperand*, 8> Uses; 00352 00353 // Uses that the result of the instruction can reach. 00354 SmallVector<MachineOperand*, 8> ExtendedUses; 00355 00356 bool ExtendLife = true; 00357 for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { 00358 MachineInstr *UseMI = UseMO.getParent(); 00359 if (UseMI == MI) 00360 continue; 00361 00362 if (UseMI->isPHI()) { 00363 ExtendLife = false; 00364 continue; 00365 } 00366 00367 // Only accept uses of SrcReg:SubIdx. 00368 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) 00369 continue; 00370 00371 // It's an error to translate this: 00372 // 00373 // %reg1025 = <sext> %reg1024 00374 // ... 00375 // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 00376 // 00377 // into this: 00378 // 00379 // %reg1025 = <sext> %reg1024 00380 // ... 00381 // %reg1027 = COPY %reg1025:4 00382 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 00383 // 00384 // The problem here is that SUBREG_TO_REG is there to assert that an 00385 // implicit zext occurs. It doesn't insert a zext instruction. If we allow 00386 // the COPY here, it will give us the value after the <sext>, not the 00387 // original value of %reg1024 before <sext>. 00388 if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) 00389 continue; 00390 00391 MachineBasicBlock *UseMBB = UseMI->getParent(); 00392 if (UseMBB == MBB) { 00393 // Local uses that come after the extension. 00394 if (!LocalMIs.count(UseMI)) 00395 Uses.push_back(&UseMO); 00396 } else if (ReachedBBs.count(UseMBB)) { 00397 // Non-local uses where the result of the extension is used. Always 00398 // replace these unless it's a PHI. 00399 Uses.push_back(&UseMO); 00400 } else if (Aggressive && DT->dominates(MBB, UseMBB)) { 00401 // We may want to extend the live range of the extension result in order 00402 // to replace these uses. 00403 ExtendedUses.push_back(&UseMO); 00404 } else { 00405 // Both will be live out of the def MBB anyway. Don't extend live range of 00406 // the extension result. 00407 ExtendLife = false; 00408 break; 00409 } 00410 } 00411 00412 if (ExtendLife && !ExtendedUses.empty()) 00413 // Extend the liveness of the extension result. 00414 std::copy(ExtendedUses.begin(), ExtendedUses.end(), 00415 std::back_inserter(Uses)); 00416 00417 // Now replace all uses. 00418 bool Changed = false; 00419 if (!Uses.empty()) { 00420 SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; 00421 00422 // Look for PHI uses of the extended result, we don't want to extend the 00423 // liveness of a PHI input. It breaks all kinds of assumptions down 00424 // stream. A PHI use is expected to be the kill of its source values. 00425 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) 00426 if (UI.isPHI()) 00427 PHIBBs.insert(UI.getParent()); 00428 00429 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 00430 for (unsigned i = 0, e = Uses.size(); i != e; ++i) { 00431 MachineOperand *UseMO = Uses[i]; 00432 MachineInstr *UseMI = UseMO->getParent(); 00433 MachineBasicBlock *UseMBB = UseMI->getParent(); 00434 if (PHIBBs.count(UseMBB)) 00435 continue; 00436 00437 // About to add uses of DstReg, clear DstReg's kill flags. 00438 if (!Changed) { 00439 MRI->clearKillFlags(DstReg); 00440 MRI->constrainRegClass(DstReg, DstRC); 00441 } 00442 00443 unsigned NewVR = MRI->createVirtualRegister(RC); 00444 MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), 00445 TII->get(TargetOpcode::COPY), NewVR) 00446 .addReg(DstReg, 0, SubIdx); 00447 // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. 00448 if (UseSrcSubIdx) { 00449 Copy->getOperand(0).setSubReg(SubIdx); 00450 Copy->getOperand(0).setIsUndef(); 00451 } 00452 UseMO->setReg(NewVR); 00453 ++NumReuse; 00454 Changed = true; 00455 } 00456 } 00457 00458 return Changed; 00459 } 00460 00461 /// optimizeCmpInstr - If the instruction is a compare and the previous 00462 /// instruction it's comparing against all ready sets (or could be modified to 00463 /// set) the same flag as the compare, then we can remove the comparison and use 00464 /// the flag from the previous instruction. 00465 bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, 00466 MachineBasicBlock *MBB) { 00467 // If this instruction is a comparison against zero and isn't comparing a 00468 // physical register, we can try to optimize it. 00469 unsigned SrcReg, SrcReg2; 00470 int CmpMask, CmpValue; 00471 if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || 00472 TargetRegisterInfo::isPhysicalRegister(SrcReg) || 00473 (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) 00474 return false; 00475 00476 // Attempt to optimize the comparison instruction. 00477 if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { 00478 ++NumCmps; 00479 return true; 00480 } 00481 00482 return false; 00483 } 00484 00485 /// Optimize a select instruction. 00486 bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { 00487 unsigned TrueOp = 0; 00488 unsigned FalseOp = 0; 00489 bool Optimizable = false; 00490 SmallVector<MachineOperand, 4> Cond; 00491 if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) 00492 return false; 00493 if (!Optimizable) 00494 return false; 00495 if (!TII->optimizeSelect(MI)) 00496 return false; 00497 MI->eraseFromParent(); 00498 ++NumSelects; 00499 return true; 00500 } 00501 00502 /// \brief Check if the registers defined by the pair (RegisterClass, SubReg) 00503 /// share the same register file. 00504 static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, 00505 const TargetRegisterClass *DefRC, 00506 unsigned DefSubReg, 00507 const TargetRegisterClass *SrcRC, 00508 unsigned SrcSubReg) { 00509 // Same register class. 00510 if (DefRC == SrcRC) 00511 return true; 00512 00513 // Both operands are sub registers. Check if they share a register class. 00514 unsigned SrcIdx, DefIdx; 00515 if (SrcSubReg && DefSubReg) 00516 return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, 00517 SrcIdx, DefIdx) != nullptr; 00518 // At most one of the register is a sub register, make it Src to avoid 00519 // duplicating the test. 00520 if (!SrcSubReg) { 00521 std::swap(DefSubReg, SrcSubReg); 00522 std::swap(DefRC, SrcRC); 00523 } 00524 00525 // One of the register is a sub register, check if we can get a superclass. 00526 if (SrcSubReg) 00527 return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr; 00528 // Plain copy. 00529 return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; 00530 } 00531 00532 /// \brief Try to find the next source that share the same register file 00533 /// for the value defined by \p Reg and \p SubReg. 00534 /// When true is returned, \p Reg and \p SubReg are updated with the 00535 /// register number and sub-register index of the new source. 00536 /// \return False if no alternative sources are available. True otherwise. 00537 bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) { 00538 // Do not try to find a new source for a physical register. 00539 // So far we do not have any motivating example for doing that. 00540 // Thus, instead of maintaining untested code, we will revisit that if 00541 // that changes at some point. 00542 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 00543 return false; 00544 00545 const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); 00546 unsigned DefSubReg = SubReg; 00547 00548 unsigned Src; 00549 unsigned SrcSubReg; 00550 bool ShouldRewrite = false; 00551 const TargetRegisterInfo &TRI = *TM->getSubtargetImpl()->getRegisterInfo(); 00552 00553 // Follow the chain of copies until we reach the top of the use-def chain 00554 // or find a more suitable source. 00555 ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII); 00556 do { 00557 unsigned CopySrcReg, CopySrcSubReg; 00558 if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg)) 00559 break; 00560 Src = CopySrcReg; 00561 SrcSubReg = CopySrcSubReg; 00562 00563 // Do not extend the live-ranges of physical registers as they add 00564 // constraints to the register allocator. 00565 // Moreover, if we want to extend the live-range of a physical register, 00566 // unlike SSA virtual register, we will have to check that they are not 00567 // redefine before the related use. 00568 if (TargetRegisterInfo::isPhysicalRegister(Src)) 00569 break; 00570 00571 const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); 00572 00573 // If this source does not incur a cross register bank copy, use it. 00574 ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC, 00575 SrcSubReg); 00576 } while (!ShouldRewrite); 00577 00578 // If we did not find a more suitable source, there is nothing to optimize. 00579 if (!ShouldRewrite || Src == Reg) 00580 return false; 00581 00582 Reg = Src; 00583 SubReg = SrcSubReg; 00584 return true; 00585 } 00586 00587 namespace { 00588 /// \brief Helper class to rewrite the arguments of a copy-like instruction. 00589 class CopyRewriter { 00590 protected: 00591 /// The copy-like instruction. 00592 MachineInstr &CopyLike; 00593 /// The index of the source being rewritten. 00594 unsigned CurrentSrcIdx; 00595 00596 public: 00597 CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {} 00598 00599 virtual ~CopyRewriter() {} 00600 00601 /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and 00602 /// the related value that it affects (TrackReg, TrackSubReg). 00603 /// A source is considered rewritable if its register class and the 00604 /// register class of the related TrackReg may not be register 00605 /// coalescer friendly. In other words, given a copy-like instruction 00606 /// not all the arguments may be returned at rewritable source, since 00607 /// some arguments are none to be register coalescer friendly. 00608 /// 00609 /// Each call of this method moves the current source to the next 00610 /// rewritable source. 00611 /// For instance, let CopyLike be the instruction to rewrite. 00612 /// CopyLike has one definition and one source: 00613 /// dst.dstSubIdx = CopyLike src.srcSubIdx. 00614 /// 00615 /// The first call will give the first rewritable source, i.e., 00616 /// the only source this instruction has: 00617 /// (SrcReg, SrcSubReg) = (src, srcSubIdx). 00618 /// This source defines the whole definition, i.e., 00619 /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). 00620 /// 00621 /// The second and subsequent calls will return false, has there is only one 00622 /// rewritable source. 00623 /// 00624 /// \return True if a rewritable source has been found, false otherwise. 00625 /// The output arguments are valid if and only if true is returned. 00626 virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 00627 unsigned &TrackReg, 00628 unsigned &TrackSubReg) { 00629 // If CurrentSrcIdx == 1, this means this function has already been 00630 // called once. CopyLike has one defintiion and one argument, thus, 00631 // there is nothing else to rewrite. 00632 if (!CopyLike.isCopy() || CurrentSrcIdx == 1) 00633 return false; 00634 // This is the first call to getNextRewritableSource. 00635 // Move the CurrentSrcIdx to remember that we made that call. 00636 CurrentSrcIdx = 1; 00637 // The rewritable source is the argument. 00638 const MachineOperand &MOSrc = CopyLike.getOperand(1); 00639 SrcReg = MOSrc.getReg(); 00640 SrcSubReg = MOSrc.getSubReg(); 00641 // What we track are the alternative sources of the definition. 00642 const MachineOperand &MODef = CopyLike.getOperand(0); 00643 TrackReg = MODef.getReg(); 00644 TrackSubReg = MODef.getSubReg(); 00645 return true; 00646 } 00647 00648 /// \brief Rewrite the current source with \p NewReg and \p NewSubReg 00649 /// if possible. 00650 /// \return True if the rewritting was possible, false otherwise. 00651 virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { 00652 if (!CopyLike.isCopy() || CurrentSrcIdx != 1) 00653 return false; 00654 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); 00655 MOSrc.setReg(NewReg); 00656 MOSrc.setSubReg(NewSubReg); 00657 return true; 00658 } 00659 }; 00660 00661 /// \brief Specialized rewriter for INSERT_SUBREG instruction. 00662 class InsertSubregRewriter : public CopyRewriter { 00663 public: 00664 InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) { 00665 assert(MI.isInsertSubreg() && "Invalid instruction"); 00666 } 00667 00668 /// \brief See CopyRewriter::getNextRewritableSource. 00669 /// Here CopyLike has the following form: 00670 /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. 00671 /// Src1 has the same register class has dst, hence, there is 00672 /// nothing to rewrite. 00673 /// Src2.src2SubIdx, may not be register coalescer friendly. 00674 /// Therefore, the first call to this method returns: 00675 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). 00676 /// (TrackReg, TrackSubReg) = (dst, subIdx). 00677 /// 00678 /// Subsequence calls will return false. 00679 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 00680 unsigned &TrackReg, 00681 unsigned &TrackSubReg) override { 00682 // If we already get the only source we can rewrite, return false. 00683 if (CurrentSrcIdx == 2) 00684 return false; 00685 // We are looking at v2 = INSERT_SUBREG v0, v1, sub0. 00686 CurrentSrcIdx = 2; 00687 const MachineOperand &MOInsertedReg = CopyLike.getOperand(2); 00688 SrcReg = MOInsertedReg.getReg(); 00689 SrcSubReg = MOInsertedReg.getSubReg(); 00690 const MachineOperand &MODef = CopyLike.getOperand(0); 00691 00692 // We want to track something that is compatible with the 00693 // partial definition. 00694 TrackReg = MODef.getReg(); 00695 if (MODef.getSubReg()) 00696 // Bails if we have to compose sub-register indices. 00697 return false; 00698 TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); 00699 return true; 00700 } 00701 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 00702 if (CurrentSrcIdx != 2) 00703 return false; 00704 // We are rewriting the inserted reg. 00705 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); 00706 MO.setReg(NewReg); 00707 MO.setSubReg(NewSubReg); 00708 return true; 00709 } 00710 }; 00711 00712 /// \brief Specialized rewriter for EXTRACT_SUBREG instruction. 00713 class ExtractSubregRewriter : public CopyRewriter { 00714 const TargetInstrInfo &TII; 00715 00716 public: 00717 ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII) 00718 : CopyRewriter(MI), TII(TII) { 00719 assert(MI.isExtractSubreg() && "Invalid instruction"); 00720 } 00721 00722 /// \brief See CopyRewriter::getNextRewritableSource. 00723 /// Here CopyLike has the following form: 00724 /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx. 00725 /// There is only one rewritable source: Src.subIdx, 00726 /// which defines dst.dstSubIdx. 00727 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 00728 unsigned &TrackReg, 00729 unsigned &TrackSubReg) override { 00730 // If we already get the only source we can rewrite, return false. 00731 if (CurrentSrcIdx == 1) 00732 return false; 00733 // We are looking at v1 = EXTRACT_SUBREG v0, sub0. 00734 CurrentSrcIdx = 1; 00735 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); 00736 SrcReg = MOExtractedReg.getReg(); 00737 // If we have to compose sub-register indices, bails out. 00738 if (MOExtractedReg.getSubReg()) 00739 return false; 00740 00741 SrcSubReg = CopyLike.getOperand(2).getImm(); 00742 00743 // We want to track something that is compatible with the definition. 00744 const MachineOperand &MODef = CopyLike.getOperand(0); 00745 TrackReg = MODef.getReg(); 00746 TrackSubReg = MODef.getSubReg(); 00747 return true; 00748 } 00749 00750 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 00751 // The only source we can rewrite is the input register. 00752 if (CurrentSrcIdx != 1) 00753 return false; 00754 00755 CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg); 00756 00757 // If we find a source that does not require to extract something, 00758 // rewrite the operation with a copy. 00759 if (!NewSubReg) { 00760 // Move the current index to an invalid position. 00761 // We do not want another call to this method to be able 00762 // to do any change. 00763 CurrentSrcIdx = -1; 00764 // Rewrite the operation as a COPY. 00765 // Get rid of the sub-register index. 00766 CopyLike.RemoveOperand(2); 00767 // Morph the operation into a COPY. 00768 CopyLike.setDesc(TII.get(TargetOpcode::COPY)); 00769 return true; 00770 } 00771 CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg); 00772 return true; 00773 } 00774 }; 00775 00776 /// \brief Specialized rewriter for REG_SEQUENCE instruction. 00777 class RegSequenceRewriter : public CopyRewriter { 00778 public: 00779 RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) { 00780 assert(MI.isRegSequence() && "Invalid instruction"); 00781 } 00782 00783 /// \brief See CopyRewriter::getNextRewritableSource. 00784 /// Here CopyLike has the following form: 00785 /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. 00786 /// Each call will return a different source, walking all the available 00787 /// source. 00788 /// 00789 /// The first call returns: 00790 /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). 00791 /// (TrackReg, TrackSubReg) = (dst, subIdx1). 00792 /// 00793 /// The second call returns: 00794 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). 00795 /// (TrackReg, TrackSubReg) = (dst, subIdx2). 00796 /// 00797 /// And so on, until all the sources have been traversed, then 00798 /// it returns false. 00799 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 00800 unsigned &TrackReg, 00801 unsigned &TrackSubReg) override { 00802 // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. 00803 00804 // If this is the first call, move to the first argument. 00805 if (CurrentSrcIdx == 0) { 00806 CurrentSrcIdx = 1; 00807 } else { 00808 // Otherwise, move to the next argument and check that it is valid. 00809 CurrentSrcIdx += 2; 00810 if (CurrentSrcIdx >= CopyLike.getNumOperands()) 00811 return false; 00812 } 00813 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); 00814 SrcReg = MOInsertedReg.getReg(); 00815 // If we have to compose sub-register indices, bails out. 00816 if ((SrcSubReg = MOInsertedReg.getSubReg())) 00817 return false; 00818 00819 // We want to track something that is compatible with the related 00820 // partial definition. 00821 TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); 00822 00823 const MachineOperand &MODef = CopyLike.getOperand(0); 00824 TrackReg = MODef.getReg(); 00825 // If we have to compose sub-registers, bails. 00826 return MODef.getSubReg() == 0; 00827 } 00828 00829 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 00830 // We cannot rewrite out of bound operands. 00831 // Moreover, rewritable sources are at odd positions. 00832 if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands()) 00833 return false; 00834 00835 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); 00836 MO.setReg(NewReg); 00837 MO.setSubReg(NewSubReg); 00838 return true; 00839 } 00840 }; 00841 } // End namespace. 00842 00843 /// \brief Get the appropriated CopyRewriter for \p MI. 00844 /// \return A pointer to a dynamically allocated CopyRewriter or nullptr 00845 /// if no rewriter works for \p MI. 00846 static CopyRewriter *getCopyRewriter(MachineInstr &MI, 00847 const TargetInstrInfo &TII) { 00848 switch (MI.getOpcode()) { 00849 default: 00850 return nullptr; 00851 case TargetOpcode::COPY: 00852 return new CopyRewriter(MI); 00853 case TargetOpcode::INSERT_SUBREG: 00854 return new InsertSubregRewriter(MI); 00855 case TargetOpcode::EXTRACT_SUBREG: 00856 return new ExtractSubregRewriter(MI, TII); 00857 case TargetOpcode::REG_SEQUENCE: 00858 return new RegSequenceRewriter(MI); 00859 } 00860 llvm_unreachable(nullptr); 00861 } 00862 00863 /// \brief Optimize generic copy instructions to avoid cross 00864 /// register bank copy. The optimization looks through a chain of 00865 /// copies and tries to find a source that has a compatible register 00866 /// class. 00867 /// Two register classes are considered to be compatible if they share 00868 /// the same register bank. 00869 /// New copies issued by this optimization are register allocator 00870 /// friendly. This optimization does not remove any copy as it may 00871 /// overconstraint the register allocator, but replaces some operands 00872 /// when possible. 00873 /// \pre isCoalescableCopy(*MI) is true. 00874 /// \return True, when \p MI has been rewritten. False otherwise. 00875 bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { 00876 assert(MI && isCoalescableCopy(*MI) && "Invalid argument"); 00877 assert(MI->getDesc().getNumDefs() == 1 && 00878 "Coalescer can understand multiple defs?!"); 00879 const MachineOperand &MODef = MI->getOperand(0); 00880 // Do not rewrite physical definitions. 00881 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) 00882 return false; 00883 00884 bool Changed = false; 00885 // Get the right rewriter for the current copy. 00886 std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII)); 00887 // If none exists, bails out. 00888 if (!CpyRewriter) 00889 return false; 00890 // Rewrite each rewritable source. 00891 unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; 00892 while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, 00893 TrackSubReg)) { 00894 unsigned NewSrc = TrackReg; 00895 unsigned NewSubReg = TrackSubReg; 00896 // Try to find a more suitable source. 00897 // If we failed to do so, or get the actual source, 00898 // move to the next source. 00899 if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc) 00900 continue; 00901 // Rewrite source. 00902 if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) { 00903 // We may have extended the live-range of NewSrc, account for that. 00904 MRI->clearKillFlags(NewSrc); 00905 Changed = true; 00906 } 00907 } 00908 // TODO: We could have a clean-up method to tidy the instruction. 00909 // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0 00910 // => v0 = COPY v1 00911 // Currently we haven't seen motivating example for that and we 00912 // want to avoid untested code. 00913 NumRewrittenCopies += Changed == true; 00914 return Changed; 00915 } 00916 00917 /// \brief Optimize copy-like instructions to create 00918 /// register coalescer friendly instruction. 00919 /// The optimization tries to kill-off the \p MI by looking 00920 /// through a chain of copies to find a source that has a compatible 00921 /// register class. 00922 /// If such a source is found, it replace \p MI by a generic COPY 00923 /// operation. 00924 /// \pre isUncoalescableCopy(*MI) is true. 00925 /// \return True, when \p MI has been optimized. In that case, \p MI has 00926 /// been removed from its parent. 00927 /// All COPY instructions created, are inserted in \p LocalMIs. 00928 bool PeepholeOptimizer::optimizeUncoalescableCopy( 00929 MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) { 00930 assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); 00931 00932 // Check if we can rewrite all the values defined by this instruction. 00933 SmallVector< 00934 std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>, 00935 4> RewritePairs; 00936 for (const MachineOperand &MODef : MI->defs()) { 00937 if (MODef.isDead()) 00938 // We can ignore those. 00939 continue; 00940 00941 // If a physical register is here, this is probably for a good reason. 00942 // Do not rewrite that. 00943 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) 00944 return false; 00945 00946 // If we do not know how to rewrite this definition, there is no point 00947 // in trying to kill this instruction. 00948 TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg()); 00949 TargetInstrInfo::RegSubRegPair Src = Def; 00950 if (!findNextSource(Src.Reg, Src.SubReg)) 00951 return false; 00952 RewritePairs.push_back(std::make_pair(Def, Src)); 00953 } 00954 // The change is possible for all defs, do it. 00955 for (const auto &PairDefSrc : RewritePairs) { 00956 const auto &Def = PairDefSrc.first; 00957 const auto &Src = PairDefSrc.second; 00958 // Rewrite the "copy" in a way the register coalescer understands. 00959 assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && 00960 "We do not rewrite physical registers"); 00961 const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); 00962 unsigned NewVR = MRI->createVirtualRegister(DefRC); 00963 MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 00964 TII->get(TargetOpcode::COPY), 00965 NewVR).addReg(Src.Reg, 0, Src.SubReg); 00966 NewCopy->getOperand(0).setSubReg(Def.SubReg); 00967 if (Def.SubReg) 00968 NewCopy->getOperand(0).setIsUndef(); 00969 LocalMIs.insert(NewCopy); 00970 MRI->replaceRegWith(Def.Reg, NewVR); 00971 MRI->clearKillFlags(NewVR); 00972 // We extended the lifetime of Src. 00973 // Clear the kill flags to account for that. 00974 MRI->clearKillFlags(Src.Reg); 00975 } 00976 // MI is now dead. 00977 MI->eraseFromParent(); 00978 ++NumUncoalescableCopies; 00979 return true; 00980 } 00981 00982 /// isLoadFoldable - Check whether MI is a candidate for folding into a later 00983 /// instruction. We only fold loads to virtual registers and the virtual 00984 /// register defined has a single use. 00985 bool PeepholeOptimizer::isLoadFoldable( 00986 MachineInstr *MI, 00987 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) { 00988 if (!MI->canFoldAsLoad() || !MI->mayLoad()) 00989 return false; 00990 const MCInstrDesc &MCID = MI->getDesc(); 00991 if (MCID.getNumDefs() != 1) 00992 return false; 00993 00994 unsigned Reg = MI->getOperand(0).getReg(); 00995 // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting 00996 // loads. It should be checked when processing uses of the load, since 00997 // uses can be removed during peephole. 00998 if (!MI->getOperand(0).getSubReg() && 00999 TargetRegisterInfo::isVirtualRegister(Reg) && 01000 MRI->hasOneNonDBGUse(Reg)) { 01001 FoldAsLoadDefCandidates.insert(Reg); 01002 return true; 01003 } 01004 return false; 01005 } 01006 01007 bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, 01008 SmallSet<unsigned, 4> &ImmDefRegs, 01009 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 01010 const MCInstrDesc &MCID = MI->getDesc(); 01011 if (!MI->isMoveImmediate()) 01012 return false; 01013 if (MCID.getNumDefs() != 1) 01014 return false; 01015 unsigned Reg = MI->getOperand(0).getReg(); 01016 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 01017 ImmDefMIs.insert(std::make_pair(Reg, MI)); 01018 ImmDefRegs.insert(Reg); 01019 return true; 01020 } 01021 01022 return false; 01023 } 01024 01025 /// foldImmediate - Try folding register operands that are defined by move 01026 /// immediate instructions, i.e. a trivial constant folding optimization, if 01027 /// and only if the def and use are in the same BB. 01028 bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 01029 SmallSet<unsigned, 4> &ImmDefRegs, 01030 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 01031 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 01032 MachineOperand &MO = MI->getOperand(i); 01033 if (!MO.isReg() || MO.isDef()) 01034 continue; 01035 unsigned Reg = MO.getReg(); 01036 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 01037 continue; 01038 if (ImmDefRegs.count(Reg) == 0) 01039 continue; 01040 DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg); 01041 assert(II != ImmDefMIs.end()); 01042 if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { 01043 ++NumImmFold; 01044 return true; 01045 } 01046 } 01047 return false; 01048 } 01049 01050 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { 01051 if (skipOptnoneFunction(*MF.getFunction())) 01052 return false; 01053 01054 DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); 01055 DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); 01056 01057 if (DisablePeephole) 01058 return false; 01059 01060 TM = &MF.getTarget(); 01061 TII = TM->getSubtargetImpl()->getInstrInfo(); 01062 MRI = &MF.getRegInfo(); 01063 DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr; 01064 01065 bool Changed = false; 01066 01067 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 01068 MachineBasicBlock *MBB = &*I; 01069 01070 bool SeenMoveImm = false; 01071 SmallPtrSet<MachineInstr*, 16> LocalMIs; 01072 SmallSet<unsigned, 4> ImmDefRegs; 01073 DenseMap<unsigned, MachineInstr*> ImmDefMIs; 01074 SmallSet<unsigned, 16> FoldAsLoadDefCandidates; 01075 01076 for (MachineBasicBlock::iterator 01077 MII = I->begin(), MIE = I->end(); MII != MIE; ) { 01078 MachineInstr *MI = &*MII; 01079 // We may be erasing MI below, increment MII now. 01080 ++MII; 01081 LocalMIs.insert(MI); 01082 01083 // Skip debug values. They should not affect this peephole optimization. 01084 if (MI->isDebugValue()) 01085 continue; 01086 01087 // If there exists an instruction which belongs to the following 01088 // categories, we will discard the load candidates. 01089 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || 01090 MI->isKill() || MI->isInlineAsm() || 01091 MI->hasUnmodeledSideEffects()) { 01092 FoldAsLoadDefCandidates.clear(); 01093 continue; 01094 } 01095 if (MI->mayStore() || MI->isCall()) 01096 FoldAsLoadDefCandidates.clear(); 01097 01098 if ((isUncoalescableCopy(*MI) && 01099 optimizeUncoalescableCopy(MI, LocalMIs)) || 01100 (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || 01101 (MI->isSelect() && optimizeSelect(MI))) { 01102 // MI is deleted. 01103 LocalMIs.erase(MI); 01104 Changed = true; 01105 continue; 01106 } 01107 01108 if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) { 01109 // MI is just rewritten. 01110 Changed = true; 01111 continue; 01112 } 01113 01114 if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { 01115 SeenMoveImm = true; 01116 } else { 01117 Changed |= optimizeExtInstr(MI, MBB, LocalMIs); 01118 // optimizeExtInstr might have created new instructions after MI 01119 // and before the already incremented MII. Adjust MII so that the 01120 // next iteration sees the new instructions. 01121 MII = MI; 01122 ++MII; 01123 if (SeenMoveImm) 01124 Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); 01125 } 01126 01127 // Check whether MI is a load candidate for folding into a later 01128 // instruction. If MI is not a candidate, check whether we can fold an 01129 // earlier load into MI. 01130 if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && 01131 !FoldAsLoadDefCandidates.empty()) { 01132 const MCInstrDesc &MIDesc = MI->getDesc(); 01133 for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands(); 01134 ++i) { 01135 const MachineOperand &MOp = MI->getOperand(i); 01136 if (!MOp.isReg()) 01137 continue; 01138 unsigned FoldAsLoadDefReg = MOp.getReg(); 01139 if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) { 01140 // We need to fold load after optimizeCmpInstr, since 01141 // optimizeCmpInstr can enable folding by converting SUB to CMP. 01142 // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and 01143 // we need it for markUsesInDebugValueAsUndef(). 01144 unsigned FoldedReg = FoldAsLoadDefReg; 01145 MachineInstr *DefMI = nullptr; 01146 MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, 01147 FoldAsLoadDefReg, 01148 DefMI); 01149 if (FoldMI) { 01150 // Update LocalMIs since we replaced MI with FoldMI and deleted 01151 // DefMI. 01152 DEBUG(dbgs() << "Replacing: " << *MI); 01153 DEBUG(dbgs() << " With: " << *FoldMI); 01154 LocalMIs.erase(MI); 01155 LocalMIs.erase(DefMI); 01156 LocalMIs.insert(FoldMI); 01157 MI->eraseFromParent(); 01158 DefMI->eraseFromParent(); 01159 MRI->markUsesInDebugValueAsUndef(FoldedReg); 01160 FoldAsLoadDefCandidates.erase(FoldedReg); 01161 ++NumLoadFold; 01162 // MI is replaced with FoldMI. 01163 Changed = true; 01164 break; 01165 } 01166 } 01167 } 01168 } 01169 } 01170 } 01171 01172 return Changed; 01173 } 01174 01175 bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg, 01176 unsigned &SrcSubReg) { 01177 assert(Def->isCopy() && "Invalid definition"); 01178 // Copy instruction are supposed to be: Def = Src. 01179 // If someone breaks this assumption, bad things will happen everywhere. 01180 assert(Def->getNumOperands() == 2 && "Invalid number of operands"); 01181 01182 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 01183 // If we look for a different subreg, it means we want a subreg of src. 01184 // Bails as we do not support composing subreg yet. 01185 return false; 01186 // Otherwise, we want the whole source. 01187 const MachineOperand &Src = Def->getOperand(1); 01188 SrcReg = Src.getReg(); 01189 SrcSubReg = Src.getSubReg(); 01190 return true; 01191 } 01192 01193 bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg, 01194 unsigned &SrcSubReg) { 01195 assert(Def->isBitcast() && "Invalid definition"); 01196 01197 // Bail if there are effects that a plain copy will not expose. 01198 if (Def->hasUnmodeledSideEffects()) 01199 return false; 01200 01201 // Bitcasts with more than one def are not supported. 01202 if (Def->getDesc().getNumDefs() != 1) 01203 return false; 01204 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 01205 // If we look for a different subreg, it means we want a subreg of the src. 01206 // Bails as we do not support composing subreg yet. 01207 return false; 01208 01209 unsigned SrcIdx = Def->getNumOperands(); 01210 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; 01211 ++OpIdx) { 01212 const MachineOperand &MO = Def->getOperand(OpIdx); 01213 if (!MO.isReg() || !MO.getReg()) 01214 continue; 01215 assert(!MO.isDef() && "We should have skipped all the definitions by now"); 01216 if (SrcIdx != EndOpIdx) 01217 // Multiple sources? 01218 return false; 01219 SrcIdx = OpIdx; 01220 } 01221 const MachineOperand &Src = Def->getOperand(SrcIdx); 01222 SrcReg = Src.getReg(); 01223 SrcSubReg = Src.getSubReg(); 01224 return true; 01225 } 01226 01227 bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg, 01228 unsigned &SrcSubReg) { 01229 assert((Def->isRegSequence() || Def->isRegSequenceLike()) && 01230 "Invalid definition"); 01231 01232 if (Def->getOperand(DefIdx).getSubReg()) 01233 // If we are composing subreg, bails out. 01234 // The case we are checking is Def.<subreg> = REG_SEQUENCE. 01235 // This should almost never happen as the SSA property is tracked at 01236 // the register level (as opposed to the subreg level). 01237 // I.e., 01238 // Def.sub0 = 01239 // Def.sub1 = 01240 // is a valid SSA representation for Def.sub0 and Def.sub1, but not for 01241 // Def. Thus, it must not be generated. 01242 // However, some code could theoretically generates a single 01243 // Def.sub0 (i.e, not defining the other subregs) and we would 01244 // have this case. 01245 // If we can ascertain (or force) that this never happens, we could 01246 // turn that into an assertion. 01247 return false; 01248 01249 if (!TII) 01250 // We could handle the REG_SEQUENCE here, but we do not want to 01251 // duplicate the code from the generic TII. 01252 return false; 01253 01254 SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs; 01255 if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) 01256 return false; 01257 01258 // We are looking at: 01259 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... 01260 // Check if one of the operand defines the subreg we are interested in. 01261 for (auto &RegSeqInput : RegSeqInputRegs) { 01262 if (RegSeqInput.SubIdx == DefSubReg) { 01263 if (RegSeqInput.SubReg) 01264 // Bails if we have to compose sub registers. 01265 return false; 01266 01267 SrcReg = RegSeqInput.Reg; 01268 SrcSubReg = RegSeqInput.SubReg; 01269 return true; 01270 } 01271 } 01272 01273 // If the subreg we are tracking is super-defined by another subreg, 01274 // we could follow this value. However, this would require to compose 01275 // the subreg and we do not do that for now. 01276 return false; 01277 } 01278 01279 bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, 01280 unsigned &SrcSubReg) { 01281 assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && 01282 "Invalid definition"); 01283 01284 if (Def->getOperand(DefIdx).getSubReg()) 01285 // If we are composing subreg, bails out. 01286 // Same remark as getNextSourceFromRegSequence. 01287 // I.e., this may be turned into an assert. 01288 return false; 01289 01290 if (!TII) 01291 // We could handle the REG_SEQUENCE here, but we do not want to 01292 // duplicate the code from the generic TII. 01293 return false; 01294 01295 TargetInstrInfo::RegSubRegPair BaseReg; 01296 TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; 01297 if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) 01298 return false; 01299 01300 // We are looking at: 01301 // Def = INSERT_SUBREG v0, v1, sub1 01302 // There are two cases: 01303 // 1. DefSubReg == sub1, get v1. 01304 // 2. DefSubReg != sub1, the value may be available through v0. 01305 01306 // #1 Check if the inserted register matches the required sub index. 01307 if (InsertedReg.SubIdx == DefSubReg) { 01308 SrcReg = InsertedReg.Reg; 01309 SrcSubReg = InsertedReg.SubReg; 01310 return true; 01311 } 01312 // #2 Otherwise, if the sub register we are looking for is not partial 01313 // defined by the inserted element, we can look through the main 01314 // register (v0). 01315 const MachineOperand &MODef = Def->getOperand(DefIdx); 01316 // If the result register (Def) and the base register (v0) do not 01317 // have the same register class or if we have to compose 01318 // subregisters, bails out. 01319 if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || 01320 BaseReg.SubReg) 01321 return false; 01322 01323 // Get the TRI and check if the inserted sub-register overlaps with the 01324 // sub-register we are tracking. 01325 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); 01326 if (!TRI || 01327 (TRI->getSubRegIndexLaneMask(DefSubReg) & 01328 TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) 01329 return false; 01330 // At this point, the value is available in v0 via the same subreg 01331 // we used for Def. 01332 SrcReg = BaseReg.Reg; 01333 SrcSubReg = DefSubReg; 01334 return true; 01335 } 01336 01337 bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg, 01338 unsigned &SrcSubReg) { 01339 assert((Def->isExtractSubreg() || 01340 Def->isExtractSubregLike()) && "Invalid definition"); 01341 // We are looking at: 01342 // Def = EXTRACT_SUBREG v0, sub0 01343 01344 // Bails if we have to compose sub registers. 01345 // Indeed, if DefSubReg != 0, we would have to compose it with sub0. 01346 if (DefSubReg) 01347 return false; 01348 01349 if (!TII) 01350 // We could handle the EXTRACT_SUBREG here, but we do not want to 01351 // duplicate the code from the generic TII. 01352 return false; 01353 01354 TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; 01355 if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) 01356 return false; 01357 01358 // Bails if we have to compose sub registers. 01359 // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. 01360 if (ExtractSubregInputReg.SubReg) 01361 return false; 01362 // Otherwise, the value is available in the v0.sub0. 01363 SrcReg = ExtractSubregInputReg.Reg; 01364 SrcSubReg = ExtractSubregInputReg.SubIdx; 01365 return true; 01366 } 01367 01368 bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg, 01369 unsigned &SrcSubReg) { 01370 assert(Def->isSubregToReg() && "Invalid definition"); 01371 // We are looking at: 01372 // Def = SUBREG_TO_REG Imm, v0, sub0 01373 01374 // Bails if we have to compose sub registers. 01375 // If DefSubReg != sub0, we would have to check that all the bits 01376 // we track are included in sub0 and if yes, we would have to 01377 // determine the right subreg in v0. 01378 if (DefSubReg != Def->getOperand(3).getImm()) 01379 return false; 01380 // Bails if we have to compose sub registers. 01381 // Likewise, if v0.subreg != 0, we would have to compose it with sub0. 01382 if (Def->getOperand(2).getSubReg()) 01383 return false; 01384 01385 SrcReg = Def->getOperand(2).getReg(); 01386 SrcSubReg = Def->getOperand(3).getImm(); 01387 return true; 01388 } 01389 01390 bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) { 01391 assert(Def && "This method needs a valid definition"); 01392 01393 assert( 01394 (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && 01395 Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); 01396 if (Def->isCopy()) 01397 return getNextSourceFromCopy(SrcReg, SrcSubReg); 01398 if (Def->isBitcast()) 01399 return getNextSourceFromBitcast(SrcReg, SrcSubReg); 01400 // All the remaining cases involve "complex" instructions. 01401 // Bails if we did not ask for the advanced tracking. 01402 if (!UseAdvancedTracking) 01403 return false; 01404 if (Def->isRegSequence() || Def->isRegSequenceLike()) 01405 return getNextSourceFromRegSequence(SrcReg, SrcSubReg); 01406 if (Def->isInsertSubreg() || Def->isInsertSubregLike()) 01407 return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg); 01408 if (Def->isExtractSubreg() || Def->isExtractSubregLike()) 01409 return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg); 01410 if (Def->isSubregToReg()) 01411 return getNextSourceFromSubregToReg(SrcReg, SrcSubReg); 01412 return false; 01413 } 01414 01415 const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg, 01416 unsigned &SrcSubReg) { 01417 // If we reach a point where we cannot move up in the use-def chain, 01418 // there is nothing we can get. 01419 if (!Def) 01420 return nullptr; 01421 01422 const MachineInstr *PrevDef = nullptr; 01423 // Try to find the next source. 01424 if (getNextSourceImpl(SrcReg, SrcSubReg)) { 01425 // Update definition, definition index, and subregister for the 01426 // next call of getNextSource. 01427 // Update the current register. 01428 Reg = SrcReg; 01429 // Update the return value before moving up in the use-def chain. 01430 PrevDef = Def; 01431 // If we can still move up in the use-def chain, move to the next 01432 // defintion. 01433 if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { 01434 Def = MRI.getVRegDef(Reg); 01435 DefIdx = MRI.def_begin(Reg).getOperandNo(); 01436 DefSubReg = SrcSubReg; 01437 return PrevDef; 01438 } 01439 } 01440 // If we end up here, this means we will not be able to find another source 01441 // for the next iteration. 01442 // Make sure any new call to getNextSource bails out early by cutting the 01443 // use-def chain. 01444 Def = nullptr; 01445 return PrevDef; 01446 }