LLVM API Documentation

MachineCSE.cpp
Go to the documentation of this file.
00001 //===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This pass performs global common subexpression elimination on machine
00011 // instructions using a scoped hash table based value numbering scheme. It
00012 // must be run while the machine function is still in SSA form.
00013 //
00014 //===----------------------------------------------------------------------===//
00015 
00016 #include "llvm/CodeGen/Passes.h"
00017 #include "llvm/ADT/DenseMap.h"
00018 #include "llvm/ADT/ScopedHashTable.h"
00019 #include "llvm/ADT/SmallSet.h"
00020 #include "llvm/ADT/Statistic.h"
00021 #include "llvm/Analysis/AliasAnalysis.h"
00022 #include "llvm/CodeGen/MachineDominators.h"
00023 #include "llvm/CodeGen/MachineInstr.h"
00024 #include "llvm/CodeGen/MachineRegisterInfo.h"
00025 #include "llvm/Support/Debug.h"
00026 #include "llvm/Support/RecyclingAllocator.h"
00027 #include "llvm/Target/TargetInstrInfo.h"
00028 #include "llvm/Target/TargetSubtargetInfo.h"
00029 using namespace llvm;
00030 
00031 #define DEBUG_TYPE "machine-cse"
00032 
00033 STATISTIC(NumCoalesces, "Number of copies coalesced");
00034 STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
00035 STATISTIC(NumPhysCSEs,
00036           "Number of physreg referencing common subexpr eliminated");
00037 STATISTIC(NumCrossBBCSEs,
00038           "Number of cross-MBB physreg referencing CS eliminated");
00039 STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
00040 
00041 namespace {
00042   class MachineCSE : public MachineFunctionPass {
00043     const TargetInstrInfo *TII;
00044     const TargetRegisterInfo *TRI;
00045     AliasAnalysis *AA;
00046     MachineDominatorTree *DT;
00047     MachineRegisterInfo *MRI;
00048   public:
00049     static char ID; // Pass identification
00050     MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
00051       initializeMachineCSEPass(*PassRegistry::getPassRegistry());
00052     }
00053 
00054     bool runOnMachineFunction(MachineFunction &MF) override;
00055 
00056     void getAnalysisUsage(AnalysisUsage &AU) const override {
00057       AU.setPreservesCFG();
00058       MachineFunctionPass::getAnalysisUsage(AU);
00059       AU.addRequired<AliasAnalysis>();
00060       AU.addPreservedID(MachineLoopInfoID);
00061       AU.addRequired<MachineDominatorTree>();
00062       AU.addPreserved<MachineDominatorTree>();
00063     }
00064 
00065     void releaseMemory() override {
00066       ScopeMap.clear();
00067       Exps.clear();
00068     }
00069 
00070   private:
00071     const unsigned LookAheadLimit;
00072     typedef RecyclingAllocator<BumpPtrAllocator,
00073         ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
00074     typedef ScopedHashTable<MachineInstr*, unsigned,
00075         MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
00076     typedef ScopedHTType::ScopeTy ScopeType;
00077     DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
00078     ScopedHTType VNT;
00079     SmallVector<MachineInstr*, 64> Exps;
00080     unsigned CurrVN;
00081 
00082     bool PerformTrivialCopyPropagation(MachineInstr *MI,
00083                                        MachineBasicBlock *MBB);
00084     bool isPhysDefTriviallyDead(unsigned Reg,
00085                                 MachineBasicBlock::const_iterator I,
00086                                 MachineBasicBlock::const_iterator E) const;
00087     bool hasLivePhysRegDefUses(const MachineInstr *MI,
00088                                const MachineBasicBlock *MBB,
00089                                SmallSet<unsigned,8> &PhysRefs,
00090                                SmallVectorImpl<unsigned> &PhysDefs,
00091                                bool &PhysUseDef) const;
00092     bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
00093                           SmallSet<unsigned,8> &PhysRefs,
00094                           SmallVectorImpl<unsigned> &PhysDefs,
00095                           bool &NonLocal) const;
00096     bool isCSECandidate(MachineInstr *MI);
00097     bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
00098                            MachineInstr *CSMI, MachineInstr *MI);
00099     void EnterScope(MachineBasicBlock *MBB);
00100     void ExitScope(MachineBasicBlock *MBB);
00101     bool ProcessBlock(MachineBasicBlock *MBB);
00102     void ExitScopeIfDone(MachineDomTreeNode *Node,
00103                          DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
00104     bool PerformCSE(MachineDomTreeNode *Node);
00105   };
00106 } // end anonymous namespace
00107 
00108 char MachineCSE::ID = 0;
00109 char &llvm::MachineCSEID = MachineCSE::ID;
00110 INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
00111                 "Machine Common Subexpression Elimination", false, false)
00112 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
00113 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
00114 INITIALIZE_PASS_END(MachineCSE, "machine-cse",
00115                 "Machine Common Subexpression Elimination", false, false)
00116 
00117 /// The source register of a COPY machine instruction can be propagated to all
00118 /// its users, and this propagation could increase the probability of finding
00119 /// common subexpressions. If the COPY has only one user, the COPY itself can
00120 /// be removed.
00121 bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
00122                                                MachineBasicBlock *MBB) {
00123   bool Changed = false;
00124   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
00125     MachineOperand &MO = MI->getOperand(i);
00126     if (!MO.isReg() || !MO.isUse())
00127       continue;
00128     unsigned Reg = MO.getReg();
00129     if (!TargetRegisterInfo::isVirtualRegister(Reg))
00130       continue;
00131     bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
00132     MachineInstr *DefMI = MRI->getVRegDef(Reg);
00133     if (!DefMI->isCopy())
00134       continue;
00135     unsigned SrcReg = DefMI->getOperand(1).getReg();
00136     if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
00137       continue;
00138     if (DefMI->getOperand(0).getSubReg())
00139       continue;
00140     // FIXME: We should trivially coalesce subregister copies to expose CSE
00141     // opportunities on instructions with truncated operands (see
00142     // cse-add-with-overflow.ll). This can be done here as follows:
00143     // if (SrcSubReg)
00144     //  RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
00145     //                                     SrcSubReg);
00146     // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
00147     //
00148     // The 2-addr pass has been updated to handle coalesced subregs. However,
00149     // some machine-specific code still can't handle it.
00150     // To handle it properly we also need a way find a constrained subregister
00151     // class given a super-reg class and subreg index.
00152     if (DefMI->getOperand(1).getSubReg())
00153       continue;
00154     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
00155     if (!MRI->constrainRegClass(SrcReg, RC))
00156       continue;
00157     DEBUG(dbgs() << "Coalescing: " << *DefMI);
00158     DEBUG(dbgs() << "***     to: " << *MI);
00159     // Propagate SrcReg of copies to MI.
00160     MO.setReg(SrcReg);
00161     MRI->clearKillFlags(SrcReg);
00162     // Coalesce single use copies.
00163     if (OnlyOneUse) {
00164       DefMI->eraseFromParent();
00165       ++NumCoalesces;
00166     }
00167     Changed = true;
00168   }
00169 
00170   return Changed;
00171 }
00172 
00173 bool
00174 MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
00175                                    MachineBasicBlock::const_iterator I,
00176                                    MachineBasicBlock::const_iterator E) const {
00177   unsigned LookAheadLeft = LookAheadLimit;
00178   while (LookAheadLeft) {
00179     // Skip over dbg_value's.
00180     while (I != E && I->isDebugValue())
00181       ++I;
00182 
00183     if (I == E)
00184       // Reached end of block, register is obviously dead.
00185       return true;
00186 
00187     bool SeenDef = false;
00188     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
00189       const MachineOperand &MO = I->getOperand(i);
00190       if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
00191         SeenDef = true;
00192       if (!MO.isReg() || !MO.getReg())
00193         continue;
00194       if (!TRI->regsOverlap(MO.getReg(), Reg))
00195         continue;
00196       if (MO.isUse())
00197         // Found a use!
00198         return false;
00199       SeenDef = true;
00200     }
00201     if (SeenDef)
00202       // See a def of Reg (or an alias) before encountering any use, it's
00203       // trivially dead.
00204       return true;
00205 
00206     --LookAheadLeft;
00207     ++I;
00208   }
00209   return false;
00210 }
00211 
00212 /// hasLivePhysRegDefUses - Return true if the specified instruction read/write
00213 /// physical registers (except for dead defs of physical registers). It also
00214 /// returns the physical register def by reference if it's the only one and the
00215 /// instruction does not uses a physical register.
00216 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
00217                                        const MachineBasicBlock *MBB,
00218                                        SmallSet<unsigned,8> &PhysRefs,
00219                                        SmallVectorImpl<unsigned> &PhysDefs,
00220                                        bool &PhysUseDef) const{
00221   // First, add all uses to PhysRefs.
00222   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
00223     const MachineOperand &MO = MI->getOperand(i);
00224     if (!MO.isReg() || MO.isDef())
00225       continue;
00226     unsigned Reg = MO.getReg();
00227     if (!Reg)
00228       continue;
00229     if (TargetRegisterInfo::isVirtualRegister(Reg))
00230       continue;
00231     // Reading constant physregs is ok.
00232     if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
00233       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
00234         PhysRefs.insert(*AI);
00235   }
00236 
00237   // Next, collect all defs into PhysDefs.  If any is already in PhysRefs
00238   // (which currently contains only uses), set the PhysUseDef flag.
00239   PhysUseDef = false;
00240   MachineBasicBlock::const_iterator I = MI; I = std::next(I);
00241   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
00242     const MachineOperand &MO = MI->getOperand(i);
00243     if (!MO.isReg() || !MO.isDef())
00244       continue;
00245     unsigned Reg = MO.getReg();
00246     if (!Reg)
00247       continue;
00248     if (TargetRegisterInfo::isVirtualRegister(Reg))
00249       continue;
00250     // Check against PhysRefs even if the def is "dead".
00251     if (PhysRefs.count(Reg))
00252       PhysUseDef = true;
00253     // If the def is dead, it's ok. But the def may not marked "dead". That's
00254     // common since this pass is run before livevariables. We can scan
00255     // forward a few instructions and check if it is obviously dead.
00256     if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
00257       PhysDefs.push_back(Reg);
00258   }
00259 
00260   // Finally, add all defs to PhysRefs as well.
00261   for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
00262     for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
00263       PhysRefs.insert(*AI);
00264 
00265   return !PhysRefs.empty();
00266 }
00267 
00268 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
00269                                   SmallSet<unsigned,8> &PhysRefs,
00270                                   SmallVectorImpl<unsigned> &PhysDefs,
00271                                   bool &NonLocal) const {
00272   // For now conservatively returns false if the common subexpression is
00273   // not in the same basic block as the given instruction. The only exception
00274   // is if the common subexpression is in the sole predecessor block.
00275   const MachineBasicBlock *MBB = MI->getParent();
00276   const MachineBasicBlock *CSMBB = CSMI->getParent();
00277 
00278   bool CrossMBB = false;
00279   if (CSMBB != MBB) {
00280     if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
00281       return false;
00282 
00283     for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
00284       if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
00285         // Avoid extending live range of physical registers if they are
00286         //allocatable or reserved.
00287         return false;
00288     }
00289     CrossMBB = true;
00290   }
00291   MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
00292   MachineBasicBlock::const_iterator E = MI;
00293   MachineBasicBlock::const_iterator EE = CSMBB->end();
00294   unsigned LookAheadLeft = LookAheadLimit;
00295   while (LookAheadLeft) {
00296     // Skip over dbg_value's.
00297     while (I != E && I != EE && I->isDebugValue())
00298       ++I;
00299 
00300     if (I == EE) {
00301       assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
00302       (void)CrossMBB;
00303       CrossMBB = false;
00304       NonLocal = true;
00305       I = MBB->begin();
00306       EE = MBB->end();
00307       continue;
00308     }
00309 
00310     if (I == E)
00311       return true;
00312 
00313     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
00314       const MachineOperand &MO = I->getOperand(i);
00315       // RegMasks go on instructions like calls that clobber lots of physregs.
00316       // Don't attempt to CSE across such an instruction.
00317       if (MO.isRegMask())
00318         return false;
00319       if (!MO.isReg() || !MO.isDef())
00320         continue;
00321       unsigned MOReg = MO.getReg();
00322       if (TargetRegisterInfo::isVirtualRegister(MOReg))
00323         continue;
00324       if (PhysRefs.count(MOReg))
00325         return false;
00326     }
00327 
00328     --LookAheadLeft;
00329     ++I;
00330   }
00331 
00332   return false;
00333 }
00334 
00335 bool MachineCSE::isCSECandidate(MachineInstr *MI) {
00336   if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
00337       MI->isInlineAsm() || MI->isDebugValue())
00338     return false;
00339 
00340   // Ignore copies.
00341   if (MI->isCopyLike())
00342     return false;
00343 
00344   // Ignore stuff that we obviously can't move.
00345   if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
00346       MI->hasUnmodeledSideEffects())
00347     return false;
00348 
00349   if (MI->mayLoad()) {
00350     // Okay, this instruction does a load. As a refinement, we allow the target
00351     // to decide whether the loaded value is actually a constant. If so, we can
00352     // actually use it as a load.
00353     if (!MI->isInvariantLoad(AA))
00354       // FIXME: we should be able to hoist loads with no other side effects if
00355       // there are no other instructions which can change memory in this loop.
00356       // This is a trivial form of alias analysis.
00357       return false;
00358   }
00359   return true;
00360 }
00361 
00362 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
00363 /// common expression that defines Reg.
00364 bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
00365                                    MachineInstr *CSMI, MachineInstr *MI) {
00366   // FIXME: Heuristics that works around the lack the live range splitting.
00367 
00368   // If CSReg is used at all uses of Reg, CSE should not increase register
00369   // pressure of CSReg.
00370   bool MayIncreasePressure = true;
00371   if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
00372       TargetRegisterInfo::isVirtualRegister(Reg)) {
00373     MayIncreasePressure = false;
00374     SmallPtrSet<MachineInstr*, 8> CSUses;
00375     for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
00376       CSUses.insert(&MI);
00377     }
00378     for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
00379       if (!CSUses.count(&MI)) {
00380         MayIncreasePressure = true;
00381         break;
00382       }
00383     }
00384   }
00385   if (!MayIncreasePressure) return true;
00386 
00387   // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
00388   // an immediate predecessor. We don't want to increase register pressure and
00389   // end up causing other computation to be spilled.
00390   if (TII->isAsCheapAsAMove(MI)) {
00391     MachineBasicBlock *CSBB = CSMI->getParent();
00392     MachineBasicBlock *BB = MI->getParent();
00393     if (CSBB != BB && !CSBB->isSuccessor(BB))
00394       return false;
00395   }
00396 
00397   // Heuristics #2: If the expression doesn't not use a vr and the only use
00398   // of the redundant computation are copies, do not cse.
00399   bool HasVRegUse = false;
00400   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
00401     const MachineOperand &MO = MI->getOperand(i);
00402     if (MO.isReg() && MO.isUse() &&
00403         TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
00404       HasVRegUse = true;
00405       break;
00406     }
00407   }
00408   if (!HasVRegUse) {
00409     bool HasNonCopyUse = false;
00410     for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
00411       // Ignore copies.
00412       if (!MI.isCopyLike()) {
00413         HasNonCopyUse = true;
00414         break;
00415       }
00416     }
00417     if (!HasNonCopyUse)
00418       return false;
00419   }
00420 
00421   // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
00422   // it unless the defined value is already used in the BB of the new use.
00423   bool HasPHI = false;
00424   SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
00425   for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
00426     HasPHI |= MI.isPHI();
00427     CSBBs.insert(MI.getParent());
00428   }
00429 
00430   if (!HasPHI)
00431     return true;
00432   return CSBBs.count(MI->getParent());
00433 }
00434 
00435 void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
00436   DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
00437   ScopeType *Scope = new ScopeType(VNT);
00438   ScopeMap[MBB] = Scope;
00439 }
00440 
00441 void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
00442   DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
00443   DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
00444   assert(SI != ScopeMap.end());
00445   delete SI->second;
00446   ScopeMap.erase(SI);
00447 }
00448 
00449 bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
00450   bool Changed = false;
00451 
00452   SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
00453   SmallVector<unsigned, 2> ImplicitDefsToUpdate;
00454   for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
00455     MachineInstr *MI = &*I;
00456     ++I;
00457 
00458     if (!isCSECandidate(MI))
00459       continue;
00460 
00461     bool FoundCSE = VNT.count(MI);
00462     if (!FoundCSE) {
00463       // Using trivial copy propagation to find more CSE opportunities.
00464       if (PerformTrivialCopyPropagation(MI, MBB)) {
00465         Changed = true;
00466 
00467         // After coalescing MI itself may become a copy.
00468         if (MI->isCopyLike())
00469           continue;
00470 
00471         // Try again to see if CSE is possible.
00472         FoundCSE = VNT.count(MI);
00473       }
00474     }
00475 
00476     // Commute commutable instructions.
00477     bool Commuted = false;
00478     if (!FoundCSE && MI->isCommutable()) {
00479       MachineInstr *NewMI = TII->commuteInstruction(MI);
00480       if (NewMI) {
00481         Commuted = true;
00482         FoundCSE = VNT.count(NewMI);
00483         if (NewMI != MI) {
00484           // New instruction. It doesn't need to be kept.
00485           NewMI->eraseFromParent();
00486           Changed = true;
00487         } else if (!FoundCSE)
00488           // MI was changed but it didn't help, commute it back!
00489           (void)TII->commuteInstruction(MI);
00490       }
00491     }
00492 
00493     // If the instruction defines physical registers and the values *may* be
00494     // used, then it's not safe to replace it with a common subexpression.
00495     // It's also not safe if the instruction uses physical registers.
00496     bool CrossMBBPhysDef = false;
00497     SmallSet<unsigned, 8> PhysRefs;
00498     SmallVector<unsigned, 2> PhysDefs;
00499     bool PhysUseDef = false;
00500     if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
00501                                           PhysDefs, PhysUseDef)) {
00502       FoundCSE = false;
00503 
00504       // ... Unless the CS is local or is in the sole predecessor block
00505       // and it also defines the physical register which is not clobbered
00506       // in between and the physical register uses were not clobbered.
00507       // This can never be the case if the instruction both uses and
00508       // defines the same physical register, which was detected above.
00509       if (!PhysUseDef) {
00510         unsigned CSVN = VNT.lookup(MI);
00511         MachineInstr *CSMI = Exps[CSVN];
00512         if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
00513           FoundCSE = true;
00514       }
00515     }
00516 
00517     if (!FoundCSE) {
00518       VNT.insert(MI, CurrVN++);
00519       Exps.push_back(MI);
00520       continue;
00521     }
00522 
00523     // Found a common subexpression, eliminate it.
00524     unsigned CSVN = VNT.lookup(MI);
00525     MachineInstr *CSMI = Exps[CSVN];
00526     DEBUG(dbgs() << "Examining: " << *MI);
00527     DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
00528 
00529     // Check if it's profitable to perform this CSE.
00530     bool DoCSE = true;
00531     unsigned NumDefs = MI->getDesc().getNumDefs() +
00532                        MI->getDesc().getNumImplicitDefs();
00533 
00534     for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
00535       MachineOperand &MO = MI->getOperand(i);
00536       if (!MO.isReg() || !MO.isDef())
00537         continue;
00538       unsigned OldReg = MO.getReg();
00539       unsigned NewReg = CSMI->getOperand(i).getReg();
00540 
00541       // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
00542       // we should make sure it is not dead at CSMI.
00543       if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
00544         ImplicitDefsToUpdate.push_back(i);
00545       if (OldReg == NewReg) {
00546         --NumDefs;
00547         continue;
00548       }
00549 
00550       assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
00551              TargetRegisterInfo::isVirtualRegister(NewReg) &&
00552              "Do not CSE physical register defs!");
00553 
00554       if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
00555         DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
00556         DoCSE = false;
00557         break;
00558       }
00559 
00560       // Don't perform CSE if the result of the old instruction cannot exist
00561       // within the register class of the new instruction.
00562       const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg);
00563       if (!MRI->constrainRegClass(NewReg, OldRC)) {
00564         DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n");
00565         DoCSE = false;
00566         break;
00567       }
00568 
00569       CSEPairs.push_back(std::make_pair(OldReg, NewReg));
00570       --NumDefs;
00571     }
00572 
00573     // Actually perform the elimination.
00574     if (DoCSE) {
00575       for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
00576         MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
00577         MRI->clearKillFlags(CSEPairs[i].second);
00578       }
00579 
00580       // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
00581       // we should make sure it is not dead at CSMI.
00582       for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
00583         CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
00584 
00585       if (CrossMBBPhysDef) {
00586         // Add physical register defs now coming in from a predecessor to MBB
00587         // livein list.
00588         while (!PhysDefs.empty()) {
00589           unsigned LiveIn = PhysDefs.pop_back_val();
00590           if (!MBB->isLiveIn(LiveIn))
00591             MBB->addLiveIn(LiveIn);
00592         }
00593         ++NumCrossBBCSEs;
00594       }
00595 
00596       MI->eraseFromParent();
00597       ++NumCSEs;
00598       if (!PhysRefs.empty())
00599         ++NumPhysCSEs;
00600       if (Commuted)
00601         ++NumCommutes;
00602       Changed = true;
00603     } else {
00604       VNT.insert(MI, CurrVN++);
00605       Exps.push_back(MI);
00606     }
00607     CSEPairs.clear();
00608     ImplicitDefsToUpdate.clear();
00609   }
00610 
00611   return Changed;
00612 }
00613 
00614 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
00615 /// dominator tree node if its a leaf or all of its children are done. Walk
00616 /// up the dominator tree to destroy ancestors which are now done.
00617 void
00618 MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
00619                         DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
00620   if (OpenChildren[Node])
00621     return;
00622 
00623   // Pop scope.
00624   ExitScope(Node->getBlock());
00625 
00626   // Now traverse upwards to pop ancestors whose offsprings are all done.
00627   while (MachineDomTreeNode *Parent = Node->getIDom()) {
00628     unsigned Left = --OpenChildren[Parent];
00629     if (Left != 0)
00630       break;
00631     ExitScope(Parent->getBlock());
00632     Node = Parent;
00633   }
00634 }
00635 
00636 bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
00637   SmallVector<MachineDomTreeNode*, 32> Scopes;
00638   SmallVector<MachineDomTreeNode*, 8> WorkList;
00639   DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
00640 
00641   CurrVN = 0;
00642 
00643   // Perform a DFS walk to determine the order of visit.
00644   WorkList.push_back(Node);
00645   do {
00646     Node = WorkList.pop_back_val();
00647     Scopes.push_back(Node);
00648     const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
00649     unsigned NumChildren = Children.size();
00650     OpenChildren[Node] = NumChildren;
00651     for (unsigned i = 0; i != NumChildren; ++i) {
00652       MachineDomTreeNode *Child = Children[i];
00653       WorkList.push_back(Child);
00654     }
00655   } while (!WorkList.empty());
00656 
00657   // Now perform CSE.
00658   bool Changed = false;
00659   for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
00660     MachineDomTreeNode *Node = Scopes[i];
00661     MachineBasicBlock *MBB = Node->getBlock();
00662     EnterScope(MBB);
00663     Changed |= ProcessBlock(MBB);
00664     // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
00665     ExitScopeIfDone(Node, OpenChildren);
00666   }
00667 
00668   return Changed;
00669 }
00670 
00671 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
00672   if (skipOptnoneFunction(*MF.getFunction()))
00673     return false;
00674 
00675   TII = MF.getSubtarget().getInstrInfo();
00676   TRI = MF.getSubtarget().getRegisterInfo();
00677   MRI = &MF.getRegInfo();
00678   AA = &getAnalysis<AliasAnalysis>();
00679   DT = &getAnalysis<MachineDominatorTree>();
00680   return PerformCSE(DT->getRootNode());
00681 }