LLVM API Documentation

PeepholeOptimizer.cpp
Go to the documentation of this file.
00001 //===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // Perform peephole optimizations on the machine code:
00011 //
00012 // - Optimize Extensions
00013 //
00014 //     Optimization of sign / zero extension instructions. It may be extended to
00015 //     handle other instructions with similar properties.
00016 //
00017 //     On some targets, some instructions, e.g. X86 sign / zero extension, may
00018 //     leave the source value in the lower part of the result. This optimization
00019 //     will replace some uses of the pre-extension value with uses of the
00020 //     sub-register of the results.
00021 //
00022 // - Optimize Comparisons
00023 //
00024 //     Optimization of comparison instructions. For instance, in this code:
00025 //
00026 //       sub r1, 1
00027 //       cmp r1, 0
00028 //       bz  L1
00029 //
00030 //     If the "sub" instruction all ready sets (or could be modified to set) the
00031 //     same flag that the "cmp" instruction sets and that "bz" uses, then we can
00032 //     eliminate the "cmp" instruction.
00033 //
00034 //     Another instance, in this code:
00035 //
00036 //       sub r1, r3 | sub r1, imm
00037 //       cmp r3, r1 or cmp r1, r3 | cmp r1, imm
00038 //       bge L1
00039 //
00040 //     If the branch instruction can use flag from "sub", then we can replace
00041 //     "sub" with "subs" and eliminate the "cmp" instruction.
00042 //
00043 // - Optimize Loads:
00044 //
00045 //     Loads that can be folded into a later instruction. A load is foldable
00046 //     if it loads to virtual registers and the virtual register defined has 
00047 //     a single use.
00048 //
00049 // - Optimize Copies and Bitcast (more generally, target specific copies):
00050 //
00051 //     Rewrite copies and bitcasts to avoid cross register bank copies
00052 //     when possible.
00053 //     E.g., Consider the following example, where capital and lower
00054 //     letters denote different register file:
00055 //     b = copy A <-- cross-bank copy
00056 //     C = copy b <-- cross-bank copy
00057 //   =>
00058 //     b = copy A <-- cross-bank copy
00059 //     C = copy A <-- same-bank copy
00060 //
00061 //     E.g., for bitcast:
00062 //     b = bitcast A <-- cross-bank copy
00063 //     C = bitcast b <-- cross-bank copy
00064 //   =>
00065 //     b = bitcast A <-- cross-bank copy
00066 //     C = copy A    <-- same-bank copy
00067 //===----------------------------------------------------------------------===//
00068 
00069 #include "llvm/CodeGen/Passes.h"
00070 #include "llvm/ADT/DenseMap.h"
00071 #include "llvm/ADT/SmallPtrSet.h"
00072 #include "llvm/ADT/SmallSet.h"
00073 #include "llvm/ADT/Statistic.h"
00074 #include "llvm/CodeGen/MachineDominators.h"
00075 #include "llvm/CodeGen/MachineInstrBuilder.h"
00076 #include "llvm/CodeGen/MachineRegisterInfo.h"
00077 #include "llvm/Support/CommandLine.h"
00078 #include "llvm/Support/Debug.h"
00079 #include "llvm/Target/TargetInstrInfo.h"
00080 #include "llvm/Target/TargetRegisterInfo.h"
00081 #include "llvm/Target/TargetSubtargetInfo.h"
00082 #include <utility>
00083 using namespace llvm;
00084 
00085 #define DEBUG_TYPE "peephole-opt"
00086 
00087 // Optimize Extensions
00088 static cl::opt<bool>
00089 Aggressive("aggressive-ext-opt", cl::Hidden,
00090            cl::desc("Aggressive extension optimization"));
00091 
00092 static cl::opt<bool>
00093 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
00094                 cl::desc("Disable the peephole optimizer"));
00095 
00096 static cl::opt<bool>
00097 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
00098                   cl::desc("Disable advanced copy optimization"));
00099 
00100 STATISTIC(NumReuse,      "Number of extension results reused");
00101 STATISTIC(NumCmps,       "Number of compares eliminated");
00102 STATISTIC(NumImmFold,    "Number of move immediate folded");
00103 STATISTIC(NumLoadFold,   "Number of loads folded");
00104 STATISTIC(NumSelects,    "Number of selects optimized");
00105 STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
00106 STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
00107 
00108 namespace {
00109   class PeepholeOptimizer : public MachineFunctionPass {
00110     const TargetMachine   *TM;
00111     const TargetInstrInfo *TII;
00112     MachineRegisterInfo   *MRI;
00113     MachineDominatorTree  *DT;  // Machine dominator tree
00114 
00115   public:
00116     static char ID; // Pass identification
00117     PeepholeOptimizer() : MachineFunctionPass(ID) {
00118       initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
00119     }
00120 
00121     bool runOnMachineFunction(MachineFunction &MF) override;
00122 
00123     void getAnalysisUsage(AnalysisUsage &AU) const override {
00124       AU.setPreservesCFG();
00125       MachineFunctionPass::getAnalysisUsage(AU);
00126       if (Aggressive) {
00127         AU.addRequired<MachineDominatorTree>();
00128         AU.addPreserved<MachineDominatorTree>();
00129       }
00130     }
00131 
00132   private:
00133     bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
00134     bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
00135                           SmallPtrSetImpl<MachineInstr*> &LocalMIs);
00136     bool optimizeSelect(MachineInstr *MI);
00137     bool optimizeCopyOrBitcast(MachineInstr *MI);
00138     bool optimizeCoalescableCopy(MachineInstr *MI);
00139     bool optimizeUncoalescableCopy(MachineInstr *MI,
00140                                    SmallPtrSetImpl<MachineInstr *> &LocalMIs);
00141     bool findNextSource(unsigned &Reg, unsigned &SubReg);
00142     bool isMoveImmediate(MachineInstr *MI,
00143                          SmallSet<unsigned, 4> &ImmDefRegs,
00144                          DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
00145     bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
00146                        SmallSet<unsigned, 4> &ImmDefRegs,
00147                        DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
00148     bool isLoadFoldable(MachineInstr *MI,
00149                         SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
00150 
00151     /// \brief Check whether \p MI is understood by the register coalescer
00152     /// but may require some rewriting.
00153     bool isCoalescableCopy(const MachineInstr &MI) {
00154       // SubregToRegs are not interesting, because they are already register
00155       // coalescer friendly.
00156       return MI.isCopy() || (!DisableAdvCopyOpt &&
00157                              (MI.isRegSequence() || MI.isInsertSubreg() ||
00158                               MI.isExtractSubreg()));
00159     }
00160 
00161     /// \brief Check whether \p MI is a copy like instruction that is
00162     /// not recognized by the register coalescer.
00163     bool isUncoalescableCopy(const MachineInstr &MI) {
00164       return MI.isBitcast() ||
00165              (!DisableAdvCopyOpt &&
00166               (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
00167                MI.isExtractSubregLike()));
00168     }
00169   };
00170 
00171   /// \brief Helper class to track the possible sources of a value defined by
00172   /// a (chain of) copy related instructions.
00173   /// Given a definition (instruction and definition index), this class
00174   /// follows the use-def chain to find successive suitable sources.
00175   /// The given source can be used to rewrite the definition into
00176   /// def = COPY src.
00177   ///
00178   /// For instance, let us consider the following snippet:
00179   /// v0 =
00180   /// v2 = INSERT_SUBREG v1, v0, sub0
00181   /// def = COPY v2.sub0
00182   ///
00183   /// Using a ValueTracker for def = COPY v2.sub0 will give the following
00184   /// suitable sources:
00185   /// v2.sub0 and v0.
00186   /// Then, def can be rewritten into def = COPY v0.
00187   class ValueTracker {
00188   private:
00189     /// The current point into the use-def chain.
00190     const MachineInstr *Def;
00191     /// The index of the definition in Def.
00192     unsigned DefIdx;
00193     /// The sub register index of the definition.
00194     unsigned DefSubReg;
00195     /// The register where the value can be found.
00196     unsigned Reg;
00197     /// Specifiy whether or not the value tracking looks through
00198     /// complex instructions. When this is false, the value tracker
00199     /// bails on everything that is not a copy or a bitcast.
00200     ///
00201     /// Note: This could have been implemented as a specialized version of
00202     /// the ValueTracker class but that would have complicated the code of
00203     /// the users of this class.
00204     bool UseAdvancedTracking;
00205     /// MachineRegisterInfo used to perform tracking.
00206     const MachineRegisterInfo &MRI;
00207     /// Optional TargetInstrInfo used to perform some complex
00208     /// tracking.
00209     const TargetInstrInfo *TII;
00210 
00211     /// \brief Dispatcher to the right underlying implementation of
00212     /// getNextSource.
00213     bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg);
00214     /// \brief Specialized version of getNextSource for Copy instructions.
00215     bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg);
00216     /// \brief Specialized version of getNextSource for Bitcast instructions.
00217     bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg);
00218     /// \brief Specialized version of getNextSource for RegSequence
00219     /// instructions.
00220     bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg);
00221     /// \brief Specialized version of getNextSource for InsertSubreg
00222     /// instructions.
00223     bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg);
00224     /// \brief Specialized version of getNextSource for ExtractSubreg
00225     /// instructions.
00226     bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg);
00227     /// \brief Specialized version of getNextSource for SubregToReg
00228     /// instructions.
00229     bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg);
00230 
00231   public:
00232     /// \brief Create a ValueTracker instance for the value defined by \p Reg.
00233     /// \p DefSubReg represents the sub register index the value tracker will
00234     /// track. It does not need to match the sub register index used in the
00235     /// definition of \p Reg.
00236     /// \p UseAdvancedTracking specifies whether or not the value tracker looks
00237     /// through complex instructions. By default (false), it handles only copy
00238     /// and bitcast instructions.
00239     /// If \p Reg is a physical register, a value tracker constructed with
00240     /// this constructor will not find any alternative source.
00241     /// Indeed, when \p Reg is a physical register that constructor does not
00242     /// know which definition of \p Reg it should track.
00243     /// Use the next constructor to track a physical register.
00244     ValueTracker(unsigned Reg, unsigned DefSubReg,
00245                  const MachineRegisterInfo &MRI,
00246                  bool UseAdvancedTracking = false,
00247                  const TargetInstrInfo *TII = nullptr)
00248         : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg),
00249           UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) {
00250       if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
00251         Def = MRI.getVRegDef(Reg);
00252         DefIdx = MRI.def_begin(Reg).getOperandNo();
00253       }
00254     }
00255 
00256     /// \brief Create a ValueTracker instance for the value defined by
00257     /// the pair \p MI, \p DefIdx.
00258     /// Unlike the other constructor, the value tracker produced by this one
00259     /// may be able to find a new source when the definition is a physical
00260     /// register.
00261     /// This could be useful to rewrite target specific instructions into
00262     /// generic copy instructions.
00263     ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg,
00264                  const MachineRegisterInfo &MRI,
00265                  bool UseAdvancedTracking = false,
00266                  const TargetInstrInfo *TII = nullptr)
00267         : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg),
00268           UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) {
00269       assert(DefIdx < Def->getDesc().getNumDefs() &&
00270              Def->getOperand(DefIdx).isReg() && "Invalid definition");
00271       Reg = Def->getOperand(DefIdx).getReg();
00272     }
00273 
00274     /// \brief Following the use-def chain, get the next available source
00275     /// for the tracked value.
00276     /// When the returned value is not nullptr, \p SrcReg gives the register
00277     /// that contain the tracked value.
00278     /// \note The sub register index returned in \p SrcSubReg must be used
00279     /// on \p SrcReg to access the actual value.
00280     /// \return Unless the returned value is nullptr (i.e., no source found),
00281     /// \p SrcReg gives the register of the next source used in the returned
00282     /// instruction and \p SrcSubReg the sub-register index to be used on that
00283     /// source to get the tracked value. When nullptr is returned, no
00284     /// alternative source has been found.
00285     const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg);
00286 
00287     /// \brief Get the last register where the initial value can be found.
00288     /// Initially this is the register of the definition.
00289     /// Then, after each successful call to getNextSource, this is the
00290     /// register of the last source.
00291     unsigned getReg() const { return Reg; }
00292   };
00293 }
00294 
00295 char PeepholeOptimizer::ID = 0;
00296 char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
00297 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
00298                 "Peephole Optimizations", false, false)
00299 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
00300 INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
00301                 "Peephole Optimizations", false, false)
00302 
00303 /// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
00304 /// a single register and writes a single register and it does not modify the
00305 /// source, and if the source value is preserved as a sub-register of the
00306 /// result, then replace all reachable uses of the source with the subreg of the
00307 /// result.
00308 ///
00309 /// Do not generate an EXTRACT that is used only in a debug use, as this changes
00310 /// the code. Since this code does not currently share EXTRACTs, just ignore all
00311 /// debug uses.
00312 bool PeepholeOptimizer::
00313 optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
00314                  SmallPtrSetImpl<MachineInstr*> &LocalMIs) {
00315   unsigned SrcReg, DstReg, SubIdx;
00316   if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
00317     return false;
00318 
00319   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
00320       TargetRegisterInfo::isPhysicalRegister(SrcReg))
00321     return false;
00322 
00323   if (MRI->hasOneNonDBGUse(SrcReg))
00324     // No other uses.
00325     return false;
00326 
00327   // Ensure DstReg can get a register class that actually supports
00328   // sub-registers. Don't change the class until we commit.
00329   const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
00330   DstRC = TM->getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg(
00331       DstRC, SubIdx);
00332   if (!DstRC)
00333     return false;
00334 
00335   // The ext instr may be operating on a sub-register of SrcReg as well.
00336   // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
00337   // register.
00338   // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
00339   // SrcReg:SubIdx should be replaced.
00340   bool UseSrcSubIdx =
00341       TM->getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg(
00342           MRI->getRegClass(SrcReg), SubIdx) != nullptr;
00343 
00344   // The source has other uses. See if we can replace the other uses with use of
00345   // the result of the extension.
00346   SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
00347   for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
00348     ReachedBBs.insert(UI.getParent());
00349 
00350   // Uses that are in the same BB of uses of the result of the instruction.
00351   SmallVector<MachineOperand*, 8> Uses;
00352 
00353   // Uses that the result of the instruction can reach.
00354   SmallVector<MachineOperand*, 8> ExtendedUses;
00355 
00356   bool ExtendLife = true;
00357   for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
00358     MachineInstr *UseMI = UseMO.getParent();
00359     if (UseMI == MI)
00360       continue;
00361 
00362     if (UseMI->isPHI()) {
00363       ExtendLife = false;
00364       continue;
00365     }
00366 
00367     // Only accept uses of SrcReg:SubIdx.
00368     if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
00369       continue;
00370 
00371     // It's an error to translate this:
00372     //
00373     //    %reg1025 = <sext> %reg1024
00374     //     ...
00375     //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
00376     //
00377     // into this:
00378     //
00379     //    %reg1025 = <sext> %reg1024
00380     //     ...
00381     //    %reg1027 = COPY %reg1025:4
00382     //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
00383     //
00384     // The problem here is that SUBREG_TO_REG is there to assert that an
00385     // implicit zext occurs. It doesn't insert a zext instruction. If we allow
00386     // the COPY here, it will give us the value after the <sext>, not the
00387     // original value of %reg1024 before <sext>.
00388     if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
00389       continue;
00390 
00391     MachineBasicBlock *UseMBB = UseMI->getParent();
00392     if (UseMBB == MBB) {
00393       // Local uses that come after the extension.
00394       if (!LocalMIs.count(UseMI))
00395         Uses.push_back(&UseMO);
00396     } else if (ReachedBBs.count(UseMBB)) {
00397       // Non-local uses where the result of the extension is used. Always
00398       // replace these unless it's a PHI.
00399       Uses.push_back(&UseMO);
00400     } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
00401       // We may want to extend the live range of the extension result in order
00402       // to replace these uses.
00403       ExtendedUses.push_back(&UseMO);
00404     } else {
00405       // Both will be live out of the def MBB anyway. Don't extend live range of
00406       // the extension result.
00407       ExtendLife = false;
00408       break;
00409     }
00410   }
00411 
00412   if (ExtendLife && !ExtendedUses.empty())
00413     // Extend the liveness of the extension result.
00414     std::copy(ExtendedUses.begin(), ExtendedUses.end(),
00415               std::back_inserter(Uses));
00416 
00417   // Now replace all uses.
00418   bool Changed = false;
00419   if (!Uses.empty()) {
00420     SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
00421 
00422     // Look for PHI uses of the extended result, we don't want to extend the
00423     // liveness of a PHI input. It breaks all kinds of assumptions down
00424     // stream. A PHI use is expected to be the kill of its source values.
00425     for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
00426       if (UI.isPHI())
00427         PHIBBs.insert(UI.getParent());
00428 
00429     const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
00430     for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
00431       MachineOperand *UseMO = Uses[i];
00432       MachineInstr *UseMI = UseMO->getParent();
00433       MachineBasicBlock *UseMBB = UseMI->getParent();
00434       if (PHIBBs.count(UseMBB))
00435         continue;
00436 
00437       // About to add uses of DstReg, clear DstReg's kill flags.
00438       if (!Changed) {
00439         MRI->clearKillFlags(DstReg);
00440         MRI->constrainRegClass(DstReg, DstRC);
00441       }
00442 
00443       unsigned NewVR = MRI->createVirtualRegister(RC);
00444       MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
00445                                    TII->get(TargetOpcode::COPY), NewVR)
00446         .addReg(DstReg, 0, SubIdx);
00447       // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
00448       if (UseSrcSubIdx) {
00449         Copy->getOperand(0).setSubReg(SubIdx);
00450         Copy->getOperand(0).setIsUndef();
00451       }
00452       UseMO->setReg(NewVR);
00453       ++NumReuse;
00454       Changed = true;
00455     }
00456   }
00457 
00458   return Changed;
00459 }
00460 
00461 /// optimizeCmpInstr - If the instruction is a compare and the previous
00462 /// instruction it's comparing against all ready sets (or could be modified to
00463 /// set) the same flag as the compare, then we can remove the comparison and use
00464 /// the flag from the previous instruction.
00465 bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
00466                                          MachineBasicBlock *MBB) {
00467   // If this instruction is a comparison against zero and isn't comparing a
00468   // physical register, we can try to optimize it.
00469   unsigned SrcReg, SrcReg2;
00470   int CmpMask, CmpValue;
00471   if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
00472       TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
00473       (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
00474     return false;
00475 
00476   // Attempt to optimize the comparison instruction.
00477   if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
00478     ++NumCmps;
00479     return true;
00480   }
00481 
00482   return false;
00483 }
00484 
00485 /// Optimize a select instruction.
00486 bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
00487   unsigned TrueOp = 0;
00488   unsigned FalseOp = 0;
00489   bool Optimizable = false;
00490   SmallVector<MachineOperand, 4> Cond;
00491   if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
00492     return false;
00493   if (!Optimizable)
00494     return false;
00495   if (!TII->optimizeSelect(MI))
00496     return false;
00497   MI->eraseFromParent();
00498   ++NumSelects;
00499   return true;
00500 }
00501 
00502 /// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
00503 /// share the same register file.
00504 static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
00505                                   const TargetRegisterClass *DefRC,
00506                                   unsigned DefSubReg,
00507                                   const TargetRegisterClass *SrcRC,
00508                                   unsigned SrcSubReg) {
00509   // Same register class.
00510   if (DefRC == SrcRC)
00511     return true;
00512 
00513   // Both operands are sub registers. Check if they share a register class.
00514   unsigned SrcIdx, DefIdx;
00515   if (SrcSubReg && DefSubReg)
00516     return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
00517                                       SrcIdx, DefIdx) != nullptr;
00518   // At most one of the register is a sub register, make it Src to avoid
00519   // duplicating the test.
00520   if (!SrcSubReg) {
00521     std::swap(DefSubReg, SrcSubReg);
00522     std::swap(DefRC, SrcRC);
00523   }
00524 
00525   // One of the register is a sub register, check if we can get a superclass.
00526   if (SrcSubReg)
00527     return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr;
00528   // Plain copy.
00529   return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
00530 }
00531 
00532 /// \brief Try to find the next source that share the same register file
00533 /// for the value defined by \p Reg and \p SubReg.
00534 /// When true is returned, \p Reg and \p SubReg are updated with the
00535 /// register number and sub-register index of the new source.
00536 /// \return False if no alternative sources are available. True otherwise.
00537 bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) {
00538   // Do not try to find a new source for a physical register.
00539   // So far we do not have any motivating example for doing that.
00540   // Thus, instead of maintaining untested code, we will revisit that if
00541   // that changes at some point.
00542   if (TargetRegisterInfo::isPhysicalRegister(Reg))
00543     return false;
00544 
00545   const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
00546   unsigned DefSubReg = SubReg;
00547 
00548   unsigned Src;
00549   unsigned SrcSubReg;
00550   bool ShouldRewrite = false;
00551   const TargetRegisterInfo &TRI = *TM->getSubtargetImpl()->getRegisterInfo();
00552 
00553   // Follow the chain of copies until we reach the top of the use-def chain
00554   // or find a more suitable source.
00555   ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII);
00556   do {
00557     unsigned CopySrcReg, CopySrcSubReg;
00558     if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg))
00559       break;
00560     Src = CopySrcReg;
00561     SrcSubReg = CopySrcSubReg;
00562 
00563     // Do not extend the live-ranges of physical registers as they add
00564     // constraints to the register allocator.
00565     // Moreover, if we want to extend the live-range of a physical register,
00566     // unlike SSA virtual register, we will have to check that they are not
00567     // redefine before the related use.
00568     if (TargetRegisterInfo::isPhysicalRegister(Src))
00569       break;
00570 
00571     const TargetRegisterClass *SrcRC = MRI->getRegClass(Src);
00572 
00573     // If this source does not incur a cross register bank copy, use it.
00574     ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC,
00575                                           SrcSubReg);
00576   } while (!ShouldRewrite);
00577 
00578   // If we did not find a more suitable source, there is nothing to optimize.
00579   if (!ShouldRewrite || Src == Reg)
00580     return false;
00581 
00582   Reg = Src;
00583   SubReg = SrcSubReg;
00584   return true;
00585 }
00586 
00587 namespace {
00588 /// \brief Helper class to rewrite the arguments of a copy-like instruction.
00589 class CopyRewriter {
00590 protected:
00591   /// The copy-like instruction.
00592   MachineInstr &CopyLike;
00593   /// The index of the source being rewritten.
00594   unsigned CurrentSrcIdx;
00595 
00596 public:
00597   CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {}
00598 
00599   virtual ~CopyRewriter() {}
00600 
00601   /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and
00602   /// the related value that it affects (TrackReg, TrackSubReg).
00603   /// A source is considered rewritable if its register class and the
00604   /// register class of the related TrackReg may not be register
00605   /// coalescer friendly. In other words, given a copy-like instruction
00606   /// not all the arguments may be returned at rewritable source, since
00607   /// some arguments are none to be register coalescer friendly.
00608   ///
00609   /// Each call of this method moves the current source to the next
00610   /// rewritable source.
00611   /// For instance, let CopyLike be the instruction to rewrite.
00612   /// CopyLike has one definition and one source:
00613   /// dst.dstSubIdx = CopyLike src.srcSubIdx.
00614   ///
00615   /// The first call will give the first rewritable source, i.e.,
00616   /// the only source this instruction has:
00617   /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
00618   /// This source defines the whole definition, i.e.,
00619   /// (TrackReg, TrackSubReg) = (dst, dstSubIdx).
00620   ///
00621   /// The second and subsequent calls will return false, has there is only one
00622   /// rewritable source.
00623   ///
00624   /// \return True if a rewritable source has been found, false otherwise.
00625   /// The output arguments are valid if and only if true is returned.
00626   virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
00627                                        unsigned &TrackReg,
00628                                        unsigned &TrackSubReg) {
00629     // If CurrentSrcIdx == 1, this means this function has already been
00630     // called once. CopyLike has one defintiion and one argument, thus,
00631     // there is nothing else to rewrite.
00632     if (!CopyLike.isCopy() || CurrentSrcIdx == 1)
00633       return false;
00634     // This is the first call to getNextRewritableSource.
00635     // Move the CurrentSrcIdx to remember that we made that call.
00636     CurrentSrcIdx = 1;
00637     // The rewritable source is the argument.
00638     const MachineOperand &MOSrc = CopyLike.getOperand(1);
00639     SrcReg = MOSrc.getReg();
00640     SrcSubReg = MOSrc.getSubReg();
00641     // What we track are the alternative sources of the definition.
00642     const MachineOperand &MODef = CopyLike.getOperand(0);
00643     TrackReg = MODef.getReg();
00644     TrackSubReg = MODef.getSubReg();
00645     return true;
00646   }
00647 
00648   /// \brief Rewrite the current source with \p NewReg and \p NewSubReg
00649   /// if possible.
00650   /// \return True if the rewritting was possible, false otherwise.
00651   virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) {
00652     if (!CopyLike.isCopy() || CurrentSrcIdx != 1)
00653       return false;
00654     MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
00655     MOSrc.setReg(NewReg);
00656     MOSrc.setSubReg(NewSubReg);
00657     return true;
00658   }
00659 };
00660 
00661 /// \brief Specialized rewriter for INSERT_SUBREG instruction.
00662 class InsertSubregRewriter : public CopyRewriter {
00663 public:
00664   InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) {
00665     assert(MI.isInsertSubreg() && "Invalid instruction");
00666   }
00667 
00668   /// \brief See CopyRewriter::getNextRewritableSource.
00669   /// Here CopyLike has the following form:
00670   /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
00671   /// Src1 has the same register class has dst, hence, there is
00672   /// nothing to rewrite.
00673   /// Src2.src2SubIdx, may not be register coalescer friendly.
00674   /// Therefore, the first call to this method returns:
00675   /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
00676   /// (TrackReg, TrackSubReg) = (dst, subIdx).
00677   ///
00678   /// Subsequence calls will return false.
00679   bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
00680                                unsigned &TrackReg,
00681                                unsigned &TrackSubReg) override {
00682     // If we already get the only source we can rewrite, return false.
00683     if (CurrentSrcIdx == 2)
00684       return false;
00685     // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
00686     CurrentSrcIdx = 2;
00687     const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
00688     SrcReg = MOInsertedReg.getReg();
00689     SrcSubReg = MOInsertedReg.getSubReg();
00690     const MachineOperand &MODef = CopyLike.getOperand(0);
00691 
00692     // We want to track something that is compatible with the
00693     // partial definition.
00694     TrackReg = MODef.getReg();
00695     if (MODef.getSubReg())
00696       // Bails if we have to compose sub-register indices.
00697       return false;
00698     TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm();
00699     return true;
00700   }
00701   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
00702     if (CurrentSrcIdx != 2)
00703       return false;
00704     // We are rewriting the inserted reg.
00705     MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
00706     MO.setReg(NewReg);
00707     MO.setSubReg(NewSubReg);
00708     return true;
00709   }
00710 };
00711 
00712 /// \brief Specialized rewriter for EXTRACT_SUBREG instruction.
00713 class ExtractSubregRewriter : public CopyRewriter {
00714   const TargetInstrInfo &TII;
00715 
00716 public:
00717   ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
00718       : CopyRewriter(MI), TII(TII) {
00719     assert(MI.isExtractSubreg() && "Invalid instruction");
00720   }
00721 
00722   /// \brief See CopyRewriter::getNextRewritableSource.
00723   /// Here CopyLike has the following form:
00724   /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
00725   /// There is only one rewritable source: Src.subIdx,
00726   /// which defines dst.dstSubIdx.
00727   bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
00728                                unsigned &TrackReg,
00729                                unsigned &TrackSubReg) override {
00730     // If we already get the only source we can rewrite, return false.
00731     if (CurrentSrcIdx == 1)
00732       return false;
00733     // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
00734     CurrentSrcIdx = 1;
00735     const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
00736     SrcReg = MOExtractedReg.getReg();
00737     // If we have to compose sub-register indices, bails out.
00738     if (MOExtractedReg.getSubReg())
00739       return false;
00740 
00741     SrcSubReg = CopyLike.getOperand(2).getImm();
00742 
00743     // We want to track something that is compatible with the definition.
00744     const MachineOperand &MODef = CopyLike.getOperand(0);
00745     TrackReg = MODef.getReg();
00746     TrackSubReg = MODef.getSubReg();
00747     return true;
00748   }
00749 
00750   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
00751     // The only source we can rewrite is the input register.
00752     if (CurrentSrcIdx != 1)
00753       return false;
00754 
00755     CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
00756 
00757     // If we find a source that does not require to extract something,
00758     // rewrite the operation with a copy.
00759     if (!NewSubReg) {
00760       // Move the current index to an invalid position.
00761       // We do not want another call to this method to be able
00762       // to do any change.
00763       CurrentSrcIdx = -1;
00764       // Rewrite the operation as a COPY.
00765       // Get rid of the sub-register index.
00766       CopyLike.RemoveOperand(2);
00767       // Morph the operation into a COPY.
00768       CopyLike.setDesc(TII.get(TargetOpcode::COPY));
00769       return true;
00770     }
00771     CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
00772     return true;
00773   }
00774 };
00775 
00776 /// \brief Specialized rewriter for REG_SEQUENCE instruction.
00777 class RegSequenceRewriter : public CopyRewriter {
00778 public:
00779   RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) {
00780     assert(MI.isRegSequence() && "Invalid instruction");
00781   }
00782 
00783   /// \brief See CopyRewriter::getNextRewritableSource.
00784   /// Here CopyLike has the following form:
00785   /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
00786   /// Each call will return a different source, walking all the available
00787   /// source.
00788   ///
00789   /// The first call returns:
00790   /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
00791   /// (TrackReg, TrackSubReg) = (dst, subIdx1).
00792   ///
00793   /// The second call returns:
00794   /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
00795   /// (TrackReg, TrackSubReg) = (dst, subIdx2).
00796   ///
00797   /// And so on, until all the sources have been traversed, then
00798   /// it returns false.
00799   bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
00800                                unsigned &TrackReg,
00801                                unsigned &TrackSubReg) override {
00802     // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
00803 
00804     // If this is the first call, move to the first argument.
00805     if (CurrentSrcIdx == 0) {
00806       CurrentSrcIdx = 1;
00807     } else {
00808       // Otherwise, move to the next argument and check that it is valid.
00809       CurrentSrcIdx += 2;
00810       if (CurrentSrcIdx >= CopyLike.getNumOperands())
00811         return false;
00812     }
00813     const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
00814     SrcReg = MOInsertedReg.getReg();
00815     // If we have to compose sub-register indices, bails out.
00816     if ((SrcSubReg = MOInsertedReg.getSubReg()))
00817       return false;
00818 
00819     // We want to track something that is compatible with the related
00820     // partial definition.
00821     TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
00822 
00823     const MachineOperand &MODef = CopyLike.getOperand(0);
00824     TrackReg = MODef.getReg();
00825     // If we have to compose sub-registers, bails.
00826     return MODef.getSubReg() == 0;
00827   }
00828 
00829   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
00830     // We cannot rewrite out of bound operands.
00831     // Moreover, rewritable sources are at odd positions.
00832     if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
00833       return false;
00834 
00835     MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
00836     MO.setReg(NewReg);
00837     MO.setSubReg(NewSubReg);
00838     return true;
00839   }
00840 };
00841 } // End namespace.
00842 
00843 /// \brief Get the appropriated CopyRewriter for \p MI.
00844 /// \return A pointer to a dynamically allocated CopyRewriter or nullptr
00845 /// if no rewriter works for \p MI.
00846 static CopyRewriter *getCopyRewriter(MachineInstr &MI,
00847                                      const TargetInstrInfo &TII) {
00848   switch (MI.getOpcode()) {
00849   default:
00850     return nullptr;
00851   case TargetOpcode::COPY:
00852     return new CopyRewriter(MI);
00853   case TargetOpcode::INSERT_SUBREG:
00854     return new InsertSubregRewriter(MI);
00855   case TargetOpcode::EXTRACT_SUBREG:
00856     return new ExtractSubregRewriter(MI, TII);
00857   case TargetOpcode::REG_SEQUENCE:
00858     return new RegSequenceRewriter(MI);
00859   }
00860   llvm_unreachable(nullptr);
00861 }
00862 
00863 /// \brief Optimize generic copy instructions to avoid cross
00864 /// register bank copy. The optimization looks through a chain of
00865 /// copies and tries to find a source that has a compatible register
00866 /// class.
00867 /// Two register classes are considered to be compatible if they share
00868 /// the same register bank.
00869 /// New copies issued by this optimization are register allocator
00870 /// friendly. This optimization does not remove any copy as it may
00871 /// overconstraint the register allocator, but replaces some operands
00872 /// when possible.
00873 /// \pre isCoalescableCopy(*MI) is true.
00874 /// \return True, when \p MI has been rewritten. False otherwise.
00875 bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) {
00876   assert(MI && isCoalescableCopy(*MI) && "Invalid argument");
00877   assert(MI->getDesc().getNumDefs() == 1 &&
00878          "Coalescer can understand multiple defs?!");
00879   const MachineOperand &MODef = MI->getOperand(0);
00880   // Do not rewrite physical definitions.
00881   if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
00882     return false;
00883 
00884   bool Changed = false;
00885   // Get the right rewriter for the current copy.
00886   std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII));
00887   // If none exists, bails out.
00888   if (!CpyRewriter)
00889     return false;
00890   // Rewrite each rewritable source.
00891   unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg;
00892   while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg,
00893                                               TrackSubReg)) {
00894     unsigned NewSrc = TrackReg;
00895     unsigned NewSubReg = TrackSubReg;
00896     // Try to find a more suitable source.
00897     // If we failed to do so, or get the actual source,
00898     // move to the next source.
00899     if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc)
00900       continue;
00901     // Rewrite source.
00902     if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) {
00903       // We may have extended the live-range of NewSrc, account for that.
00904       MRI->clearKillFlags(NewSrc);
00905       Changed = true;
00906     }
00907   }
00908   // TODO: We could have a clean-up method to tidy the instruction.
00909   // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
00910   // => v0 = COPY v1
00911   // Currently we haven't seen motivating example for that and we
00912   // want to avoid untested code.
00913   NumRewrittenCopies += Changed == true;
00914   return Changed;
00915 }
00916 
00917 /// \brief Optimize copy-like instructions to create
00918 /// register coalescer friendly instruction.
00919 /// The optimization tries to kill-off the \p MI by looking
00920 /// through a chain of copies to find a source that has a compatible
00921 /// register class.
00922 /// If such a source is found, it replace \p MI by a generic COPY
00923 /// operation.
00924 /// \pre isUncoalescableCopy(*MI) is true.
00925 /// \return True, when \p MI has been optimized. In that case, \p MI has
00926 /// been removed from its parent.
00927 /// All COPY instructions created, are inserted in \p LocalMIs.
00928 bool PeepholeOptimizer::optimizeUncoalescableCopy(
00929     MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
00930   assert(MI && isUncoalescableCopy(*MI) && "Invalid argument");
00931 
00932   // Check if we can rewrite all the values defined by this instruction.
00933   SmallVector<
00934       std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>,
00935       4> RewritePairs;
00936   for (const MachineOperand &MODef : MI->defs()) {
00937     if (MODef.isDead())
00938       // We can ignore those.
00939       continue;
00940 
00941     // If a physical register is here, this is probably for a good reason.
00942     // Do not rewrite that.
00943     if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
00944       return false;
00945 
00946     // If we do not know how to rewrite this definition, there is no point
00947     // in trying to kill this instruction.
00948     TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg());
00949     TargetInstrInfo::RegSubRegPair Src = Def;
00950     if (!findNextSource(Src.Reg, Src.SubReg))
00951       return false;
00952     RewritePairs.push_back(std::make_pair(Def, Src));
00953   }
00954   // The change is possible for all defs, do it.
00955   for (const auto &PairDefSrc : RewritePairs) {
00956     const auto &Def = PairDefSrc.first;
00957     const auto &Src = PairDefSrc.second;
00958     // Rewrite the "copy" in a way the register coalescer understands.
00959     assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
00960            "We do not rewrite physical registers");
00961     const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
00962     unsigned NewVR = MRI->createVirtualRegister(DefRC);
00963     MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
00964                                     TII->get(TargetOpcode::COPY),
00965                                     NewVR).addReg(Src.Reg, 0, Src.SubReg);
00966     NewCopy->getOperand(0).setSubReg(Def.SubReg);
00967     if (Def.SubReg)
00968       NewCopy->getOperand(0).setIsUndef();
00969     LocalMIs.insert(NewCopy);
00970     MRI->replaceRegWith(Def.Reg, NewVR);
00971     MRI->clearKillFlags(NewVR);
00972     // We extended the lifetime of Src.
00973     // Clear the kill flags to account for that.
00974     MRI->clearKillFlags(Src.Reg);
00975   }
00976   // MI is now dead.
00977   MI->eraseFromParent();
00978   ++NumUncoalescableCopies;
00979   return true;
00980 }
00981 
00982 /// isLoadFoldable - Check whether MI is a candidate for folding into a later
00983 /// instruction. We only fold loads to virtual registers and the virtual
00984 /// register defined has a single use.
00985 bool PeepholeOptimizer::isLoadFoldable(
00986                               MachineInstr *MI,
00987                               SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
00988   if (!MI->canFoldAsLoad() || !MI->mayLoad())
00989     return false;
00990   const MCInstrDesc &MCID = MI->getDesc();
00991   if (MCID.getNumDefs() != 1)
00992     return false;
00993 
00994   unsigned Reg = MI->getOperand(0).getReg();
00995   // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting
00996   // loads. It should be checked when processing uses of the load, since
00997   // uses can be removed during peephole.
00998   if (!MI->getOperand(0).getSubReg() &&
00999       TargetRegisterInfo::isVirtualRegister(Reg) &&
01000       MRI->hasOneNonDBGUse(Reg)) {
01001     FoldAsLoadDefCandidates.insert(Reg);
01002     return true;
01003   }
01004   return false;
01005 }
01006 
01007 bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
01008                                         SmallSet<unsigned, 4> &ImmDefRegs,
01009                                  DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
01010   const MCInstrDesc &MCID = MI->getDesc();
01011   if (!MI->isMoveImmediate())
01012     return false;
01013   if (MCID.getNumDefs() != 1)
01014     return false;
01015   unsigned Reg = MI->getOperand(0).getReg();
01016   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
01017     ImmDefMIs.insert(std::make_pair(Reg, MI));
01018     ImmDefRegs.insert(Reg);
01019     return true;
01020   }
01021 
01022   return false;
01023 }
01024 
01025 /// foldImmediate - Try folding register operands that are defined by move
01026 /// immediate instructions, i.e. a trivial constant folding optimization, if
01027 /// and only if the def and use are in the same BB.
01028 bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
01029                                       SmallSet<unsigned, 4> &ImmDefRegs,
01030                                  DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
01031   for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
01032     MachineOperand &MO = MI->getOperand(i);
01033     if (!MO.isReg() || MO.isDef())
01034       continue;
01035     unsigned Reg = MO.getReg();
01036     if (!TargetRegisterInfo::isVirtualRegister(Reg))
01037       continue;
01038     if (ImmDefRegs.count(Reg) == 0)
01039       continue;
01040     DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
01041     assert(II != ImmDefMIs.end());
01042     if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
01043       ++NumImmFold;
01044       return true;
01045     }
01046   }
01047   return false;
01048 }
01049 
01050 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
01051   if (skipOptnoneFunction(*MF.getFunction()))
01052     return false;
01053 
01054   DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
01055   DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
01056 
01057   if (DisablePeephole)
01058     return false;
01059 
01060   TM  = &MF.getTarget();
01061   TII = TM->getSubtargetImpl()->getInstrInfo();
01062   MRI = &MF.getRegInfo();
01063   DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
01064 
01065   bool Changed = false;
01066 
01067   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
01068     MachineBasicBlock *MBB = &*I;
01069 
01070     bool SeenMoveImm = false;
01071     SmallPtrSet<MachineInstr*, 16> LocalMIs;
01072     SmallSet<unsigned, 4> ImmDefRegs;
01073     DenseMap<unsigned, MachineInstr*> ImmDefMIs;
01074     SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
01075 
01076     for (MachineBasicBlock::iterator
01077            MII = I->begin(), MIE = I->end(); MII != MIE; ) {
01078       MachineInstr *MI = &*MII;
01079       // We may be erasing MI below, increment MII now.
01080       ++MII;
01081       LocalMIs.insert(MI);
01082 
01083       // Skip debug values. They should not affect this peephole optimization.
01084       if (MI->isDebugValue())
01085           continue;
01086 
01087       // If there exists an instruction which belongs to the following
01088       // categories, we will discard the load candidates.
01089       if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
01090           MI->isKill() || MI->isInlineAsm() ||
01091           MI->hasUnmodeledSideEffects()) {
01092         FoldAsLoadDefCandidates.clear();
01093         continue;
01094       }
01095       if (MI->mayStore() || MI->isCall())
01096         FoldAsLoadDefCandidates.clear();
01097 
01098       if ((isUncoalescableCopy(*MI) &&
01099            optimizeUncoalescableCopy(MI, LocalMIs)) ||
01100           (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
01101           (MI->isSelect() && optimizeSelect(MI))) {
01102         // MI is deleted.
01103         LocalMIs.erase(MI);
01104         Changed = true;
01105         continue;
01106       }
01107 
01108       if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) {
01109         // MI is just rewritten.
01110         Changed = true;
01111         continue;
01112       }
01113 
01114       if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
01115         SeenMoveImm = true;
01116       } else {
01117         Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
01118         // optimizeExtInstr might have created new instructions after MI
01119         // and before the already incremented MII. Adjust MII so that the
01120         // next iteration sees the new instructions.
01121         MII = MI;
01122         ++MII;
01123         if (SeenMoveImm)
01124           Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
01125       }
01126 
01127       // Check whether MI is a load candidate for folding into a later
01128       // instruction. If MI is not a candidate, check whether we can fold an
01129       // earlier load into MI.
01130       if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
01131           !FoldAsLoadDefCandidates.empty()) {
01132         const MCInstrDesc &MIDesc = MI->getDesc();
01133         for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands();
01134              ++i) {
01135           const MachineOperand &MOp = MI->getOperand(i);
01136           if (!MOp.isReg())
01137             continue;
01138           unsigned FoldAsLoadDefReg = MOp.getReg();
01139           if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
01140             // We need to fold load after optimizeCmpInstr, since
01141             // optimizeCmpInstr can enable folding by converting SUB to CMP.
01142             // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
01143             // we need it for markUsesInDebugValueAsUndef().
01144             unsigned FoldedReg = FoldAsLoadDefReg;
01145             MachineInstr *DefMI = nullptr;
01146             MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
01147                                                           FoldAsLoadDefReg,
01148                                                           DefMI);
01149             if (FoldMI) {
01150               // Update LocalMIs since we replaced MI with FoldMI and deleted
01151               // DefMI.
01152               DEBUG(dbgs() << "Replacing: " << *MI);
01153               DEBUG(dbgs() << "     With: " << *FoldMI);
01154               LocalMIs.erase(MI);
01155               LocalMIs.erase(DefMI);
01156               LocalMIs.insert(FoldMI);
01157               MI->eraseFromParent();
01158               DefMI->eraseFromParent();
01159               MRI->markUsesInDebugValueAsUndef(FoldedReg);
01160               FoldAsLoadDefCandidates.erase(FoldedReg);
01161               ++NumLoadFold;
01162               // MI is replaced with FoldMI.
01163               Changed = true;
01164               break;
01165             }
01166           }
01167         }
01168       }
01169     }
01170   }
01171 
01172   return Changed;
01173 }
01174 
01175 bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg,
01176                                          unsigned &SrcSubReg) {
01177   assert(Def->isCopy() && "Invalid definition");
01178   // Copy instruction are supposed to be: Def = Src.
01179   // If someone breaks this assumption, bad things will happen everywhere.
01180   assert(Def->getNumOperands() == 2 && "Invalid number of operands");
01181 
01182   if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
01183     // If we look for a different subreg, it means we want a subreg of src.
01184     // Bails as we do not support composing subreg yet.
01185     return false;
01186   // Otherwise, we want the whole source.
01187   const MachineOperand &Src = Def->getOperand(1);
01188   SrcReg = Src.getReg();
01189   SrcSubReg = Src.getSubReg();
01190   return true;
01191 }
01192 
01193 bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg,
01194                                             unsigned &SrcSubReg) {
01195   assert(Def->isBitcast() && "Invalid definition");
01196 
01197   // Bail if there are effects that a plain copy will not expose.
01198   if (Def->hasUnmodeledSideEffects())
01199     return false;
01200 
01201   // Bitcasts with more than one def are not supported.
01202   if (Def->getDesc().getNumDefs() != 1)
01203     return false;
01204   if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
01205     // If we look for a different subreg, it means we want a subreg of the src.
01206     // Bails as we do not support composing subreg yet.
01207     return false;
01208 
01209   unsigned SrcIdx = Def->getNumOperands();
01210   for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
01211        ++OpIdx) {
01212     const MachineOperand &MO = Def->getOperand(OpIdx);
01213     if (!MO.isReg() || !MO.getReg())
01214       continue;
01215     assert(!MO.isDef() && "We should have skipped all the definitions by now");
01216     if (SrcIdx != EndOpIdx)
01217       // Multiple sources?
01218       return false;
01219     SrcIdx = OpIdx;
01220   }
01221   const MachineOperand &Src = Def->getOperand(SrcIdx);
01222   SrcReg = Src.getReg();
01223   SrcSubReg = Src.getSubReg();
01224   return true;
01225 }
01226 
01227 bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg,
01228                                                 unsigned &SrcSubReg) {
01229   assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
01230          "Invalid definition");
01231 
01232   if (Def->getOperand(DefIdx).getSubReg())
01233     // If we are composing subreg, bails out.
01234     // The case we are checking is Def.<subreg> = REG_SEQUENCE.
01235     // This should almost never happen as the SSA property is tracked at
01236     // the register level (as opposed to the subreg level).
01237     // I.e.,
01238     // Def.sub0 =
01239     // Def.sub1 =
01240     // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
01241     // Def. Thus, it must not be generated.
01242     // However, some code could theoretically generates a single
01243     // Def.sub0 (i.e, not defining the other subregs) and we would
01244     // have this case.
01245     // If we can ascertain (or force) that this never happens, we could
01246     // turn that into an assertion.
01247     return false;
01248 
01249   if (!TII)
01250     // We could handle the REG_SEQUENCE here, but we do not want to
01251     // duplicate the code from the generic TII.
01252     return false;
01253 
01254   SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs;
01255   if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
01256     return false;
01257 
01258   // We are looking at:
01259   // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
01260   // Check if one of the operand defines the subreg we are interested in.
01261   for (auto &RegSeqInput : RegSeqInputRegs) {
01262     if (RegSeqInput.SubIdx == DefSubReg) {
01263       if (RegSeqInput.SubReg)
01264         // Bails if we have to compose sub registers.
01265         return false;
01266 
01267       SrcReg = RegSeqInput.Reg;
01268       SrcSubReg = RegSeqInput.SubReg;
01269       return true;
01270     }
01271   }
01272 
01273   // If the subreg we are tracking is super-defined by another subreg,
01274   // we could follow this value. However, this would require to compose
01275   // the subreg and we do not do that for now.
01276   return false;
01277 }
01278 
01279 bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
01280                                                  unsigned &SrcSubReg) {
01281   assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
01282          "Invalid definition");
01283 
01284   if (Def->getOperand(DefIdx).getSubReg())
01285     // If we are composing subreg, bails out.
01286     // Same remark as getNextSourceFromRegSequence.
01287     // I.e., this may be turned into an assert.
01288     return false;
01289 
01290   if (!TII)
01291     // We could handle the REG_SEQUENCE here, but we do not want to
01292     // duplicate the code from the generic TII.
01293     return false;
01294 
01295   TargetInstrInfo::RegSubRegPair BaseReg;
01296   TargetInstrInfo::RegSubRegPairAndIdx InsertedReg;
01297   if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
01298     return false;
01299 
01300   // We are looking at:
01301   // Def = INSERT_SUBREG v0, v1, sub1
01302   // There are two cases:
01303   // 1. DefSubReg == sub1, get v1.
01304   // 2. DefSubReg != sub1, the value may be available through v0.
01305 
01306   // #1 Check if the inserted register matches the required sub index.
01307   if (InsertedReg.SubIdx == DefSubReg) {
01308     SrcReg = InsertedReg.Reg;
01309     SrcSubReg = InsertedReg.SubReg;
01310     return true;
01311   }
01312   // #2 Otherwise, if the sub register we are looking for is not partial
01313   // defined by the inserted element, we can look through the main
01314   // register (v0).
01315   const MachineOperand &MODef = Def->getOperand(DefIdx);
01316   // If the result register (Def) and the base register (v0) do not
01317   // have the same register class or if we have to compose
01318   // subregisters, bails out.
01319   if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
01320       BaseReg.SubReg)
01321     return false;
01322 
01323   // Get the TRI and check if the inserted sub-register overlaps with the
01324   // sub-register we are tracking.
01325   const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
01326   if (!TRI ||
01327       (TRI->getSubRegIndexLaneMask(DefSubReg) &
01328        TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0)
01329     return false;
01330   // At this point, the value is available in v0 via the same subreg
01331   // we used for Def.
01332   SrcReg = BaseReg.Reg;
01333   SrcSubReg = DefSubReg;
01334   return true;
01335 }
01336 
01337 bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg,
01338                                                   unsigned &SrcSubReg) {
01339   assert((Def->isExtractSubreg() ||
01340           Def->isExtractSubregLike()) && "Invalid definition");
01341   // We are looking at:
01342   // Def = EXTRACT_SUBREG v0, sub0
01343 
01344   // Bails if we have to compose sub registers.
01345   // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
01346   if (DefSubReg)
01347     return false;
01348 
01349   if (!TII)
01350     // We could handle the EXTRACT_SUBREG here, but we do not want to
01351     // duplicate the code from the generic TII.
01352     return false;
01353 
01354   TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg;
01355   if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
01356     return false;
01357 
01358   // Bails if we have to compose sub registers.
01359   // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
01360   if (ExtractSubregInputReg.SubReg)
01361     return false;
01362   // Otherwise, the value is available in the v0.sub0.
01363   SrcReg = ExtractSubregInputReg.Reg;
01364   SrcSubReg = ExtractSubregInputReg.SubIdx;
01365   return true;
01366 }
01367 
01368 bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg,
01369                                                 unsigned &SrcSubReg) {
01370   assert(Def->isSubregToReg() && "Invalid definition");
01371   // We are looking at:
01372   // Def = SUBREG_TO_REG Imm, v0, sub0
01373 
01374   // Bails if we have to compose sub registers.
01375   // If DefSubReg != sub0, we would have to check that all the bits
01376   // we track are included in sub0 and if yes, we would have to
01377   // determine the right subreg in v0.
01378   if (DefSubReg != Def->getOperand(3).getImm())
01379     return false;
01380   // Bails if we have to compose sub registers.
01381   // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
01382   if (Def->getOperand(2).getSubReg())
01383     return false;
01384 
01385   SrcReg = Def->getOperand(2).getReg();
01386   SrcSubReg = Def->getOperand(3).getImm();
01387   return true;
01388 }
01389 
01390 bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) {
01391   assert(Def && "This method needs a valid definition");
01392 
01393   assert(
01394       (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) &&
01395       Def->getOperand(DefIdx).isDef() && "Invalid DefIdx");
01396   if (Def->isCopy())
01397     return getNextSourceFromCopy(SrcReg, SrcSubReg);
01398   if (Def->isBitcast())
01399     return getNextSourceFromBitcast(SrcReg, SrcSubReg);
01400   // All the remaining cases involve "complex" instructions.
01401   // Bails if we did not ask for the advanced tracking.
01402   if (!UseAdvancedTracking)
01403     return false;
01404   if (Def->isRegSequence() || Def->isRegSequenceLike())
01405     return getNextSourceFromRegSequence(SrcReg, SrcSubReg);
01406   if (Def->isInsertSubreg() || Def->isInsertSubregLike())
01407     return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg);
01408   if (Def->isExtractSubreg() || Def->isExtractSubregLike())
01409     return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg);
01410   if (Def->isSubregToReg())
01411     return getNextSourceFromSubregToReg(SrcReg, SrcSubReg);
01412   return false;
01413 }
01414 
01415 const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg,
01416                                                 unsigned &SrcSubReg) {
01417   // If we reach a point where we cannot move up in the use-def chain,
01418   // there is nothing we can get.
01419   if (!Def)
01420     return nullptr;
01421 
01422   const MachineInstr *PrevDef = nullptr;
01423   // Try to find the next source.
01424   if (getNextSourceImpl(SrcReg, SrcSubReg)) {
01425     // Update definition, definition index, and subregister for the
01426     // next call of getNextSource.
01427     // Update the current register.
01428     Reg = SrcReg;
01429     // Update the return value before moving up in the use-def chain.
01430     PrevDef = Def;
01431     // If we can still move up in the use-def chain, move to the next
01432     // defintion.
01433     if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
01434       Def = MRI.getVRegDef(Reg);
01435       DefIdx = MRI.def_begin(Reg).getOperandNo();
01436       DefSubReg = SrcSubReg;
01437       return PrevDef;
01438     }
01439   }
01440   // If we end up here, this means we will not be able to find another source
01441   // for the next iteration.
01442   // Make sure any new call to getNextSource bails out early by cutting the
01443   // use-def chain.
01444   Def = nullptr;
01445   return PrevDef;
01446 }