LLVM API Documentation

ARMLoadStoreOptimizer.cpp
Go to the documentation of this file.
00001 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains a pass that performs load / store related peephole
00011 // optimizations. This pass should be run after register allocation.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "ARM.h"
00016 #include "ARMBaseInstrInfo.h"
00017 #include "ARMBaseRegisterInfo.h"
00018 #include "ARMISelLowering.h"
00019 #include "ARMMachineFunctionInfo.h"
00020 #include "ARMSubtarget.h"
00021 #include "MCTargetDesc/ARMAddressingModes.h"
00022 #include "Thumb1RegisterInfo.h"
00023 #include "llvm/ADT/DenseMap.h"
00024 #include "llvm/ADT/STLExtras.h"
00025 #include "llvm/ADT/SmallPtrSet.h"
00026 #include "llvm/ADT/SmallSet.h"
00027 #include "llvm/ADT/SmallVector.h"
00028 #include "llvm/ADT/Statistic.h"
00029 #include "llvm/CodeGen/MachineBasicBlock.h"
00030 #include "llvm/CodeGen/MachineFunctionPass.h"
00031 #include "llvm/CodeGen/MachineInstr.h"
00032 #include "llvm/CodeGen/MachineInstrBuilder.h"
00033 #include "llvm/CodeGen/MachineRegisterInfo.h"
00034 #include "llvm/CodeGen/RegisterScavenging.h"
00035 #include "llvm/CodeGen/SelectionDAGNodes.h"
00036 #include "llvm/IR/DataLayout.h"
00037 #include "llvm/IR/DerivedTypes.h"
00038 #include "llvm/IR/Function.h"
00039 #include "llvm/Support/Debug.h"
00040 #include "llvm/Support/ErrorHandling.h"
00041 #include "llvm/Target/TargetInstrInfo.h"
00042 #include "llvm/Target/TargetMachine.h"
00043 #include "llvm/Target/TargetRegisterInfo.h"
00044 using namespace llvm;
00045 
00046 #define DEBUG_TYPE "arm-ldst-opt"
00047 
00048 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
00049 STATISTIC(NumSTMGened , "Number of stm instructions generated");
00050 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
00051 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
00052 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
00053 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
00054 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
00055 STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
00056 STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
00057 STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
00058 STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
00059 
00060 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
00061 /// load / store instructions to form ldm / stm instructions.
00062 
00063 namespace {
00064   struct ARMLoadStoreOpt : public MachineFunctionPass {
00065     static char ID;
00066     ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
00067 
00068     const TargetInstrInfo *TII;
00069     const TargetRegisterInfo *TRI;
00070     const ARMSubtarget *STI;
00071     const TargetLowering *TL;
00072     ARMFunctionInfo *AFI;
00073     RegScavenger *RS;
00074     bool isThumb1, isThumb2;
00075 
00076     bool runOnMachineFunction(MachineFunction &Fn) override;
00077 
00078     const char *getPassName() const override {
00079       return "ARM load / store optimization pass";
00080     }
00081 
00082   private:
00083     struct MemOpQueueEntry {
00084       int Offset;
00085       unsigned Reg;
00086       bool isKill;
00087       unsigned Position;
00088       MachineBasicBlock::iterator MBBI;
00089       bool Merged;
00090       MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
00091                       MachineBasicBlock::iterator i)
00092         : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
00093     };
00094     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
00095     typedef MemOpQueue::iterator MemOpQueueIter;
00096 
00097     void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
00098                           const MemOpQueue &MemOps, unsigned DefReg,
00099                           unsigned RangeBegin, unsigned RangeEnd);
00100     bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
00101                   int Offset, unsigned Base, bool BaseKill, int Opcode,
00102                   ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
00103                   DebugLoc dl,
00104                   ArrayRef<std::pair<unsigned, bool> > Regs,
00105                   ArrayRef<unsigned> ImpDefs);
00106     void MergeOpsUpdate(MachineBasicBlock &MBB,
00107                         MemOpQueue &MemOps,
00108                         unsigned memOpsBegin,
00109                         unsigned memOpsEnd,
00110                         unsigned insertAfter,
00111                         int Offset,
00112                         unsigned Base,
00113                         bool BaseKill,
00114                         int Opcode,
00115                         ARMCC::CondCodes Pred,
00116                         unsigned PredReg,
00117                         unsigned Scratch,
00118                         DebugLoc dl,
00119                         SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
00120     void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
00121                       int Opcode, unsigned Size,
00122                       ARMCC::CondCodes Pred, unsigned PredReg,
00123                       unsigned Scratch, MemOpQueue &MemOps,
00124                       SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
00125     void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
00126     bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
00127                              MachineBasicBlock::iterator &MBBI);
00128     bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
00129                                   MachineBasicBlock::iterator MBBI,
00130                                   const TargetInstrInfo *TII,
00131                                   bool &Advance,
00132                                   MachineBasicBlock::iterator &I);
00133     bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
00134                                    MachineBasicBlock::iterator MBBI,
00135                                    bool &Advance,
00136                                    MachineBasicBlock::iterator &I);
00137     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
00138     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
00139   };
00140   char ARMLoadStoreOpt::ID = 0;
00141 }
00142 
00143 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
00144   switch (Opcode) {
00145   default: llvm_unreachable("Unhandled opcode!");
00146   case ARM::LDRi12:
00147     ++NumLDMGened;
00148     switch (Mode) {
00149     default: llvm_unreachable("Unhandled submode!");
00150     case ARM_AM::ia: return ARM::LDMIA;
00151     case ARM_AM::da: return ARM::LDMDA;
00152     case ARM_AM::db: return ARM::LDMDB;
00153     case ARM_AM::ib: return ARM::LDMIB;
00154     }
00155   case ARM::STRi12:
00156     ++NumSTMGened;
00157     switch (Mode) {
00158     default: llvm_unreachable("Unhandled submode!");
00159     case ARM_AM::ia: return ARM::STMIA;
00160     case ARM_AM::da: return ARM::STMDA;
00161     case ARM_AM::db: return ARM::STMDB;
00162     case ARM_AM::ib: return ARM::STMIB;
00163     }
00164   case ARM::tLDRi:
00165     // tLDMIA is writeback-only - unless the base register is in the input
00166     // reglist.
00167     ++NumLDMGened;
00168     switch (Mode) {
00169     default: llvm_unreachable("Unhandled submode!");
00170     case ARM_AM::ia: return ARM::tLDMIA;
00171     }
00172   case ARM::tSTRi:
00173     // There is no non-writeback tSTMIA either.
00174     ++NumSTMGened;
00175     switch (Mode) {
00176     default: llvm_unreachable("Unhandled submode!");
00177     case ARM_AM::ia: return ARM::tSTMIA_UPD;
00178     }
00179   case ARM::t2LDRi8:
00180   case ARM::t2LDRi12:
00181     ++NumLDMGened;
00182     switch (Mode) {
00183     default: llvm_unreachable("Unhandled submode!");
00184     case ARM_AM::ia: return ARM::t2LDMIA;
00185     case ARM_AM::db: return ARM::t2LDMDB;
00186     }
00187   case ARM::t2STRi8:
00188   case ARM::t2STRi12:
00189     ++NumSTMGened;
00190     switch (Mode) {
00191     default: llvm_unreachable("Unhandled submode!");
00192     case ARM_AM::ia: return ARM::t2STMIA;
00193     case ARM_AM::db: return ARM::t2STMDB;
00194     }
00195   case ARM::VLDRS:
00196     ++NumVLDMGened;
00197     switch (Mode) {
00198     default: llvm_unreachable("Unhandled submode!");
00199     case ARM_AM::ia: return ARM::VLDMSIA;
00200     case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
00201     }
00202   case ARM::VSTRS:
00203     ++NumVSTMGened;
00204     switch (Mode) {
00205     default: llvm_unreachable("Unhandled submode!");
00206     case ARM_AM::ia: return ARM::VSTMSIA;
00207     case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
00208     }
00209   case ARM::VLDRD:
00210     ++NumVLDMGened;
00211     switch (Mode) {
00212     default: llvm_unreachable("Unhandled submode!");
00213     case ARM_AM::ia: return ARM::VLDMDIA;
00214     case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
00215     }
00216   case ARM::VSTRD:
00217     ++NumVSTMGened;
00218     switch (Mode) {
00219     default: llvm_unreachable("Unhandled submode!");
00220     case ARM_AM::ia: return ARM::VSTMDIA;
00221     case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
00222     }
00223   }
00224 }
00225 
00226 namespace llvm {
00227   namespace ARM_AM {
00228 
00229 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
00230   switch (Opcode) {
00231   default: llvm_unreachable("Unhandled opcode!");
00232   case ARM::LDMIA_RET:
00233   case ARM::LDMIA:
00234   case ARM::LDMIA_UPD:
00235   case ARM::STMIA:
00236   case ARM::STMIA_UPD:
00237   case ARM::tLDMIA:
00238   case ARM::tLDMIA_UPD:
00239   case ARM::tSTMIA_UPD:
00240   case ARM::t2LDMIA_RET:
00241   case ARM::t2LDMIA:
00242   case ARM::t2LDMIA_UPD:
00243   case ARM::t2STMIA:
00244   case ARM::t2STMIA_UPD:
00245   case ARM::VLDMSIA:
00246   case ARM::VLDMSIA_UPD:
00247   case ARM::VSTMSIA:
00248   case ARM::VSTMSIA_UPD:
00249   case ARM::VLDMDIA:
00250   case ARM::VLDMDIA_UPD:
00251   case ARM::VSTMDIA:
00252   case ARM::VSTMDIA_UPD:
00253     return ARM_AM::ia;
00254 
00255   case ARM::LDMDA:
00256   case ARM::LDMDA_UPD:
00257   case ARM::STMDA:
00258   case ARM::STMDA_UPD:
00259     return ARM_AM::da;
00260 
00261   case ARM::LDMDB:
00262   case ARM::LDMDB_UPD:
00263   case ARM::STMDB:
00264   case ARM::STMDB_UPD:
00265   case ARM::t2LDMDB:
00266   case ARM::t2LDMDB_UPD:
00267   case ARM::t2STMDB:
00268   case ARM::t2STMDB_UPD:
00269   case ARM::VLDMSDB_UPD:
00270   case ARM::VSTMSDB_UPD:
00271   case ARM::VLDMDDB_UPD:
00272   case ARM::VSTMDDB_UPD:
00273     return ARM_AM::db;
00274 
00275   case ARM::LDMIB:
00276   case ARM::LDMIB_UPD:
00277   case ARM::STMIB:
00278   case ARM::STMIB_UPD:
00279     return ARM_AM::ib;
00280   }
00281 }
00282 
00283   } // end namespace ARM_AM
00284 } // end namespace llvm
00285 
00286 static bool isT1i32Load(unsigned Opc) {
00287   return Opc == ARM::tLDRi;
00288 }
00289 
00290 static bool isT2i32Load(unsigned Opc) {
00291   return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
00292 }
00293 
00294 static bool isi32Load(unsigned Opc) {
00295   return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
00296 }
00297 
00298 static bool isT1i32Store(unsigned Opc) {
00299   return Opc == ARM::tSTRi;
00300 }
00301 
00302 static bool isT2i32Store(unsigned Opc) {
00303   return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
00304 }
00305 
00306 static bool isi32Store(unsigned Opc) {
00307   return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
00308 }
00309 
00310 /// MergeOps - Create and insert a LDM or STM with Base as base register and
00311 /// registers in Regs as the register operands that would be loaded / stored.
00312 /// It returns true if the transformation is done.
00313 bool
00314 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
00315                           MachineBasicBlock::iterator MBBI,
00316                           int Offset, unsigned Base, bool BaseKill,
00317                           int Opcode, ARMCC::CondCodes Pred,
00318                           unsigned PredReg, unsigned Scratch, DebugLoc dl,
00319                           ArrayRef<std::pair<unsigned, bool> > Regs,
00320                           ArrayRef<unsigned> ImpDefs) {
00321   // Only a single register to load / store. Don't bother.
00322   unsigned NumRegs = Regs.size();
00323   if (NumRegs <= 1)
00324     return false;
00325 
00326   // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
00327   // Compute liveness information for that register to make the decision.
00328   bool SafeToClobberCPSR = !isThumb1 ||
00329     (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
00330      MachineBasicBlock::LQR_Dead);
00331 
00332   ARM_AM::AMSubMode Mode = ARM_AM::ia;
00333   // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
00334   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
00335   bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
00336 
00337   if (Offset == 4 && haveIBAndDA) {
00338     Mode = ARM_AM::ib;
00339   } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
00340     Mode = ARM_AM::da;
00341   } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
00342     // VLDM/VSTM do not support DB mode without also updating the base reg.
00343     Mode = ARM_AM::db;
00344   } else if (Offset != 0) {
00345     // Check if this is a supported opcode before inserting instructions to
00346     // calculate a new base register.
00347     if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
00348 
00349     // If starting offset isn't zero, insert a MI to materialize a new base.
00350     // But only do so if it is cost effective, i.e. merging more than two
00351     // loads / stores.
00352     if (NumRegs <= 2)
00353       return false;
00354 
00355     // On Thumb1, it's not worth materializing a new base register without
00356     // clobbering the CPSR (i.e. not using ADDS/SUBS).
00357     if (!SafeToClobberCPSR)
00358       return false;
00359 
00360     unsigned NewBase;
00361     if (isi32Load(Opcode)) {
00362       // If it is a load, then just use one of the destination register to
00363       // use as the new base.
00364       NewBase = Regs[NumRegs-1].first;
00365     } else {
00366       // Use the scratch register to use as a new base.
00367       NewBase = Scratch;
00368       if (NewBase == 0)
00369         return false;
00370     }
00371 
00372     int BaseOpc =
00373       isThumb2 ? ARM::t2ADDri :
00374       (isThumb1 && Offset < 8) ? ARM::tADDi3 :
00375       isThumb1 ? ARM::tADDi8  : ARM::ADDri;
00376 
00377     if (Offset < 0) {
00378       Offset = - Offset;
00379       BaseOpc =
00380         isThumb2 ? ARM::t2SUBri :
00381         (isThumb1 && Offset < 8) ? ARM::tSUBi3 :
00382         isThumb1 ? ARM::tSUBi8  : ARM::SUBri;
00383     }
00384 
00385     if (!TL->isLegalAddImmediate(Offset))
00386       // FIXME: Try add with register operand?
00387       return false; // Probably not worth it then.
00388 
00389     if (isThumb1) {
00390       // Thumb1: depending on immediate size, use either
00391       //   ADDS NewBase, Base, #imm3
00392       // or
00393       //   MOV  NewBase, Base
00394       //   ADDS NewBase, #imm8.
00395       if (Base != NewBase && Offset >= 8) {
00396         // Need to insert a MOV to the new base first.
00397         BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
00398           .addReg(Base, getKillRegState(BaseKill))
00399           .addImm(Pred).addReg(PredReg);
00400         // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
00401         Base = NewBase;
00402         BaseKill = false;
00403       }
00404       AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
00405         .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
00406         .addImm(Pred).addReg(PredReg);
00407     } else {
00408       BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
00409         .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
00410         .addImm(Pred).addReg(PredReg).addReg(0);
00411     }
00412     Base = NewBase;
00413     BaseKill = true; // New base is always killed straight away.
00414   }
00415 
00416   bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
00417                 Opcode == ARM::VLDRD);
00418 
00419   // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
00420   // base register writeback.
00421   Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
00422   if (!Opcode) return false;
00423 
00424   bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
00425 
00426   // Exception: If the base register is in the input reglist, Thumb1 LDM is
00427   // non-writeback. Check for this.
00428   if (Opcode == ARM::tLDMIA && isThumb1)
00429     for (unsigned I = 0; I < NumRegs; ++I)
00430       if (Base == Regs[I].first) {
00431         Writeback = false;
00432         break;
00433       }
00434 
00435   // If the merged instruction has writeback and the base register is not killed
00436   // it's not safe to do the merge on Thumb1. This is because resetting the base
00437   // register writeback by inserting a SUBS sets the condition flags.
00438   // FIXME: Try something clever here to see if resetting the base register can
00439   // be avoided, e.g. by updating a later ADD/SUB of the base register with the
00440   // writeback.
00441   if (isThumb1 && Writeback && !BaseKill) return false;
00442 
00443   MachineInstrBuilder MIB;
00444 
00445   if (Writeback) {
00446     if (Opcode == ARM::tLDMIA)
00447       // Update tLDMIA with writeback if necessary.
00448       Opcode = ARM::tLDMIA_UPD;
00449 
00450     MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
00451 
00452     // Thumb1: we might need to set base writeback when building the MI.
00453     MIB.addReg(Base, getDefRegState(true))
00454        .addReg(Base, getKillRegState(BaseKill));
00455   } else {
00456     // No writeback, simply build the MachineInstr.
00457     MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
00458     MIB.addReg(Base, getKillRegState(BaseKill));
00459   }
00460 
00461   MIB.addImm(Pred).addReg(PredReg);
00462 
00463   for (unsigned i = 0; i != NumRegs; ++i)
00464     MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
00465                      | getKillRegState(Regs[i].second));
00466 
00467   // Add implicit defs for super-registers.
00468   for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
00469     MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
00470 
00471   return true;
00472 }
00473 
00474 /// \brief Find all instructions using a given imp-def within a range.
00475 ///
00476 /// We are trying to combine a range of instructions, one of which (located at
00477 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
00478 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
00479 /// and RangeEnd must be modified to use an undefined value.
00480 ///
00481 /// The live range continues until we find a second definition or one of the
00482 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
00483 /// we must consider all uses and decide which are relevant in a second pass.
00484 void ARMLoadStoreOpt::findUsesOfImpDef(
00485     SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
00486     unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
00487   std::map<unsigned, MachineOperand *> Uses;
00488   unsigned LastLivePos = RangeEnd;
00489 
00490   // First we find all uses of this register with Position between RangeBegin
00491   // and RangeEnd, any or all of these could be uses of a definition at
00492   // RangeBegin. We also record the latest position a definition at RangeBegin
00493   // would be considered live.
00494   for (unsigned i = 0; i < MemOps.size(); ++i) {
00495     MachineInstr &MI = *MemOps[i].MBBI;
00496     unsigned MIPosition = MemOps[i].Position;
00497     if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
00498       continue;
00499 
00500     // If this instruction defines the register, then any later use will be of
00501     // that definition rather than ours.
00502     if (MI.definesRegister(DefReg))
00503       LastLivePos = std::min(LastLivePos, MIPosition);
00504 
00505     MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
00506     if (!UseOp)
00507       continue;
00508 
00509     // If this instruction kills the register then (assuming liveness is
00510     // correct when we start) we don't need to think about anything after here.
00511     if (UseOp->isKill())
00512       LastLivePos = std::min(LastLivePos, MIPosition);
00513 
00514     Uses[MIPosition] = UseOp;
00515   }
00516 
00517   // Now we traverse the list of all uses, and append the ones that actually use
00518   // our definition to the requested list.
00519   for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
00520                                                       E = Uses.end();
00521        I != E; ++I) {
00522     // List is sorted by position so once we've found one out of range there
00523     // will be no more to consider.
00524     if (I->first > LastLivePos)
00525       break;
00526     UsesOfImpDefs.push_back(I->second);
00527   }
00528 }
00529 
00530 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
00531 // success.
00532 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
00533                                      MemOpQueue &memOps,
00534                                      unsigned memOpsBegin, unsigned memOpsEnd,
00535                                      unsigned insertAfter, int Offset,
00536                                      unsigned Base, bool BaseKill,
00537                                      int Opcode,
00538                                      ARMCC::CondCodes Pred, unsigned PredReg,
00539                                      unsigned Scratch,
00540                                      DebugLoc dl,
00541                          SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
00542   // First calculate which of the registers should be killed by the merged
00543   // instruction.
00544   const unsigned insertPos = memOps[insertAfter].Position;
00545   SmallSet<unsigned, 4> KilledRegs;
00546   DenseMap<unsigned, unsigned> Killer;
00547   for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
00548     if (i == memOpsBegin) {
00549       i = memOpsEnd;
00550       if (i == e)
00551         break;
00552     }
00553     if (memOps[i].Position < insertPos && memOps[i].isKill) {
00554       unsigned Reg = memOps[i].Reg;
00555       KilledRegs.insert(Reg);
00556       Killer[Reg] = i;
00557     }
00558   }
00559 
00560   SmallVector<std::pair<unsigned, bool>, 8> Regs;
00561   SmallVector<unsigned, 8> ImpDefs;
00562   SmallVector<MachineOperand *, 8> UsesOfImpDefs;
00563   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
00564     unsigned Reg = memOps[i].Reg;
00565     // If we are inserting the merged operation after an operation that
00566     // uses the same register, make sure to transfer any kill flag.
00567     bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
00568     Regs.push_back(std::make_pair(Reg, isKill));
00569 
00570     // Collect any implicit defs of super-registers. They must be preserved.
00571     for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
00572       if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
00573         continue;
00574       unsigned DefReg = MO->getReg();
00575       if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
00576         ImpDefs.push_back(DefReg);
00577 
00578       // There may be other uses of the definition between this instruction and
00579       // the eventual LDM/STM position. These should be marked undef if the
00580       // merge takes place.
00581       findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
00582                        insertPos);
00583     }
00584   }
00585 
00586   // Try to do the merge.
00587   MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
00588   ++Loc;
00589   if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
00590                 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
00591     return;
00592 
00593   // Merge succeeded, update records.
00594   Merges.push_back(std::prev(Loc));
00595 
00596   // In gathering loads together, we may have moved the imp-def of a register
00597   // past one of its uses. This is OK, since we know better than the rest of
00598   // LLVM what's OK with ARM loads and stores; but we still have to adjust the
00599   // affected uses.
00600   for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
00601                                                    E = UsesOfImpDefs.end();
00602                                                    I != E; ++I)
00603     (*I)->setIsUndef();
00604 
00605   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
00606     // Remove kill flags from any memops that come before insertPos.
00607     if (Regs[i-memOpsBegin].second) {
00608       unsigned Reg = Regs[i-memOpsBegin].first;
00609       if (KilledRegs.count(Reg)) {
00610         unsigned j = Killer[Reg];
00611         int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
00612         assert(Idx >= 0 && "Cannot find killing operand");
00613         memOps[j].MBBI->getOperand(Idx).setIsKill(false);
00614         memOps[j].isKill = false;
00615       }
00616       memOps[i].isKill = true;
00617     }
00618     MBB.erase(memOps[i].MBBI);
00619     // Update this memop to refer to the merged instruction.
00620     // We may need to move kill flags again.
00621     memOps[i].Merged = true;
00622     memOps[i].MBBI = Merges.back();
00623     memOps[i].Position = insertPos;
00624   }
00625 }
00626 
00627 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
00628 /// load / store multiple instructions.
00629 void
00630 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
00631                          unsigned Base, int Opcode, unsigned Size,
00632                          ARMCC::CondCodes Pred, unsigned PredReg,
00633                          unsigned Scratch, MemOpQueue &MemOps,
00634                          SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
00635   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
00636   int Offset = MemOps[SIndex].Offset;
00637   int SOffset = Offset;
00638   unsigned insertAfter = SIndex;
00639   MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
00640   DebugLoc dl = Loc->getDebugLoc();
00641   const MachineOperand &PMO = Loc->getOperand(0);
00642   unsigned PReg = PMO.getReg();
00643   unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
00644   unsigned Count = 1;
00645   unsigned Limit = ~0U;
00646   bool BaseKill = false;
00647   // vldm / vstm limit are 32 for S variants, 16 for D variants.
00648 
00649   switch (Opcode) {
00650   default: break;
00651   case ARM::VSTRS:
00652     Limit = 32;
00653     break;
00654   case ARM::VSTRD:
00655     Limit = 16;
00656     break;
00657   case ARM::VLDRD:
00658     Limit = 16;
00659     break;
00660   case ARM::VLDRS:
00661     Limit = 32;
00662     break;
00663   }
00664 
00665   for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
00666     int NewOffset = MemOps[i].Offset;
00667     const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
00668     unsigned Reg = MO.getReg();
00669     unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
00670     // Register numbers must be in ascending order. For VFP / NEON load and
00671     // store multiples, the registers must also be consecutive and within the
00672     // limit on the number of registers per instruction.
00673     if (Reg != ARM::SP &&
00674         NewOffset == Offset + (int)Size &&
00675         ((isNotVFP && RegNum > PRegNum) ||
00676          ((Count < Limit) && RegNum == PRegNum+1)) &&
00677         // On Swift we don't want vldm/vstm to start with a odd register num
00678         // because Q register unaligned vldm/vstm need more uops.
00679         (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
00680       Offset += Size;
00681       PRegNum = RegNum;
00682       ++Count;
00683     } else {
00684       // Can't merge this in. Try merge the earlier ones first.
00685       // We need to compute BaseKill here because the MemOps may have been
00686       // reordered.
00687       BaseKill = Loc->killsRegister(Base);
00688 
00689       MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
00690                      BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
00691       MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
00692                    MemOps, Merges);
00693       return;
00694     }
00695 
00696     if (MemOps[i].Position > MemOps[insertAfter].Position) {
00697       insertAfter = i;
00698       Loc = MemOps[i].MBBI;
00699     }
00700   }
00701 
00702   BaseKill =  Loc->killsRegister(Base);
00703   MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
00704                  Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
00705 }
00706 
00707 static bool definesCPSR(MachineInstr *MI) {
00708   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
00709     const MachineOperand &MO = MI->getOperand(i);
00710     if (!MO.isReg())
00711       continue;
00712     if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
00713       // If the instruction has live CPSR def, then it's not safe to fold it
00714       // into load / store.
00715       return true;
00716   }
00717 
00718   return false;
00719 }
00720 
00721 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
00722                                 unsigned Bytes, unsigned Limit,
00723                                 ARMCC::CondCodes Pred, unsigned PredReg) {
00724   unsigned MyPredReg = 0;
00725   if (!MI)
00726     return false;
00727 
00728   bool CheckCPSRDef = false;
00729   switch (MI->getOpcode()) {
00730   default: return false;
00731   case ARM::tSUBi8:
00732   case ARM::t2SUBri:
00733   case ARM::SUBri:
00734     CheckCPSRDef = true;
00735   // fallthrough
00736   case ARM::tSUBspi:
00737     break;
00738   }
00739 
00740   // Make sure the offset fits in 8 bits.
00741   if (Bytes == 0 || (Limit && Bytes >= Limit))
00742     return false;
00743 
00744   unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
00745                     MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
00746   if (!(MI->getOperand(0).getReg() == Base &&
00747         MI->getOperand(1).getReg() == Base &&
00748         (MI->getOperand(2).getImm() * Scale) == Bytes &&
00749         getInstrPredicate(MI, MyPredReg) == Pred &&
00750         MyPredReg == PredReg))
00751     return false;
00752 
00753   return CheckCPSRDef ? !definesCPSR(MI) : true;
00754 }
00755 
00756 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
00757                                 unsigned Bytes, unsigned Limit,
00758                                 ARMCC::CondCodes Pred, unsigned PredReg) {
00759   unsigned MyPredReg = 0;
00760   if (!MI)
00761     return false;
00762 
00763   bool CheckCPSRDef = false;
00764   switch (MI->getOpcode()) {
00765   default: return false;
00766   case ARM::tADDi8:
00767   case ARM::t2ADDri:
00768   case ARM::ADDri:
00769     CheckCPSRDef = true;
00770   // fallthrough
00771   case ARM::tADDspi:
00772     break;
00773   }
00774 
00775   if (Bytes == 0 || (Limit && Bytes >= Limit))
00776     // Make sure the offset fits in 8 bits.
00777     return false;
00778 
00779   unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
00780                     MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
00781   if (!(MI->getOperand(0).getReg() == Base &&
00782         MI->getOperand(1).getReg() == Base &&
00783         (MI->getOperand(2).getImm() * Scale) == Bytes &&
00784         getInstrPredicate(MI, MyPredReg) == Pred &&
00785         MyPredReg == PredReg))
00786     return false;
00787 
00788   return CheckCPSRDef ? !definesCPSR(MI) : true;
00789 }
00790 
00791 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
00792   switch (MI->getOpcode()) {
00793   default: return 0;
00794   case ARM::LDRi12:
00795   case ARM::STRi12:
00796   case ARM::tLDRi:
00797   case ARM::tSTRi:
00798   case ARM::t2LDRi8:
00799   case ARM::t2LDRi12:
00800   case ARM::t2STRi8:
00801   case ARM::t2STRi12:
00802   case ARM::VLDRS:
00803   case ARM::VSTRS:
00804     return 4;
00805   case ARM::VLDRD:
00806   case ARM::VSTRD:
00807     return 8;
00808   case ARM::LDMIA:
00809   case ARM::LDMDA:
00810   case ARM::LDMDB:
00811   case ARM::LDMIB:
00812   case ARM::STMIA:
00813   case ARM::STMDA:
00814   case ARM::STMDB:
00815   case ARM::STMIB:
00816   case ARM::tLDMIA:
00817   case ARM::tLDMIA_UPD:
00818   case ARM::tSTMIA_UPD:
00819   case ARM::t2LDMIA:
00820   case ARM::t2LDMDB:
00821   case ARM::t2STMIA:
00822   case ARM::t2STMDB:
00823   case ARM::VLDMSIA:
00824   case ARM::VSTMSIA:
00825     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
00826   case ARM::VLDMDIA:
00827   case ARM::VSTMDIA:
00828     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
00829   }
00830 }
00831 
00832 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
00833                                             ARM_AM::AMSubMode Mode) {
00834   switch (Opc) {
00835   default: llvm_unreachable("Unhandled opcode!");
00836   case ARM::LDMIA:
00837   case ARM::LDMDA:
00838   case ARM::LDMDB:
00839   case ARM::LDMIB:
00840     switch (Mode) {
00841     default: llvm_unreachable("Unhandled submode!");
00842     case ARM_AM::ia: return ARM::LDMIA_UPD;
00843     case ARM_AM::ib: return ARM::LDMIB_UPD;
00844     case ARM_AM::da: return ARM::LDMDA_UPD;
00845     case ARM_AM::db: return ARM::LDMDB_UPD;
00846     }
00847   case ARM::STMIA:
00848   case ARM::STMDA:
00849   case ARM::STMDB:
00850   case ARM::STMIB:
00851     switch (Mode) {
00852     default: llvm_unreachable("Unhandled submode!");
00853     case ARM_AM::ia: return ARM::STMIA_UPD;
00854     case ARM_AM::ib: return ARM::STMIB_UPD;
00855     case ARM_AM::da: return ARM::STMDA_UPD;
00856     case ARM_AM::db: return ARM::STMDB_UPD;
00857     }
00858   case ARM::t2LDMIA:
00859   case ARM::t2LDMDB:
00860     switch (Mode) {
00861     default: llvm_unreachable("Unhandled submode!");
00862     case ARM_AM::ia: return ARM::t2LDMIA_UPD;
00863     case ARM_AM::db: return ARM::t2LDMDB_UPD;
00864     }
00865   case ARM::t2STMIA:
00866   case ARM::t2STMDB:
00867     switch (Mode) {
00868     default: llvm_unreachable("Unhandled submode!");
00869     case ARM_AM::ia: return ARM::t2STMIA_UPD;
00870     case ARM_AM::db: return ARM::t2STMDB_UPD;
00871     }
00872   case ARM::VLDMSIA:
00873     switch (Mode) {
00874     default: llvm_unreachable("Unhandled submode!");
00875     case ARM_AM::ia: return ARM::VLDMSIA_UPD;
00876     case ARM_AM::db: return ARM::VLDMSDB_UPD;
00877     }
00878   case ARM::VLDMDIA:
00879     switch (Mode) {
00880     default: llvm_unreachable("Unhandled submode!");
00881     case ARM_AM::ia: return ARM::VLDMDIA_UPD;
00882     case ARM_AM::db: return ARM::VLDMDDB_UPD;
00883     }
00884   case ARM::VSTMSIA:
00885     switch (Mode) {
00886     default: llvm_unreachable("Unhandled submode!");
00887     case ARM_AM::ia: return ARM::VSTMSIA_UPD;
00888     case ARM_AM::db: return ARM::VSTMSDB_UPD;
00889     }
00890   case ARM::VSTMDIA:
00891     switch (Mode) {
00892     default: llvm_unreachable("Unhandled submode!");
00893     case ARM_AM::ia: return ARM::VSTMDIA_UPD;
00894     case ARM_AM::db: return ARM::VSTMDDB_UPD;
00895     }
00896   }
00897 }
00898 
00899 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
00900 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
00901 ///
00902 /// stmia rn, <ra, rb, rc>
00903 /// rn := rn + 4 * 3;
00904 /// =>
00905 /// stmia rn!, <ra, rb, rc>
00906 ///
00907 /// rn := rn - 4 * 3;
00908 /// ldmia rn, <ra, rb, rc>
00909 /// =>
00910 /// ldmdb rn!, <ra, rb, rc>
00911 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
00912                                                MachineBasicBlock::iterator MBBI,
00913                                                bool &Advance,
00914                                                MachineBasicBlock::iterator &I) {
00915   // Thumb1 is already using updating loads/stores.
00916   if (isThumb1) return false;
00917 
00918   MachineInstr *MI = MBBI;
00919   unsigned Base = MI->getOperand(0).getReg();
00920   bool BaseKill = MI->getOperand(0).isKill();
00921   unsigned Bytes = getLSMultipleTransferSize(MI);
00922   unsigned PredReg = 0;
00923   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
00924   int Opcode = MI->getOpcode();
00925   DebugLoc dl = MI->getDebugLoc();
00926 
00927   // Can't use an updating ld/st if the base register is also a dest
00928   // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
00929   for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
00930     if (MI->getOperand(i).getReg() == Base)
00931       return false;
00932 
00933   bool DoMerge = false;
00934   ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
00935 
00936   // Try merging with the previous instruction.
00937   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
00938   if (MBBI != BeginMBBI) {
00939     MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
00940     while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
00941       --PrevMBBI;
00942     if (Mode == ARM_AM::ia &&
00943         isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
00944       Mode = ARM_AM::db;
00945       DoMerge = true;
00946     } else if (Mode == ARM_AM::ib &&
00947                isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
00948       Mode = ARM_AM::da;
00949       DoMerge = true;
00950     }
00951     if (DoMerge)
00952       MBB.erase(PrevMBBI);
00953   }
00954 
00955   // Try merging with the next instruction.
00956   MachineBasicBlock::iterator EndMBBI = MBB.end();
00957   if (!DoMerge && MBBI != EndMBBI) {
00958     MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
00959     while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
00960       ++NextMBBI;
00961     if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
00962         isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
00963       DoMerge = true;
00964     } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
00965                isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
00966       DoMerge = true;
00967     }
00968     if (DoMerge) {
00969       if (NextMBBI == I) {
00970         Advance = true;
00971         ++I;
00972       }
00973       MBB.erase(NextMBBI);
00974     }
00975   }
00976 
00977   if (!DoMerge)
00978     return false;
00979 
00980   unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
00981   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
00982     .addReg(Base, getDefRegState(true)) // WB base register
00983     .addReg(Base, getKillRegState(BaseKill))
00984     .addImm(Pred).addReg(PredReg);
00985 
00986   // Transfer the rest of operands.
00987   for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
00988     MIB.addOperand(MI->getOperand(OpNum));
00989 
00990   // Transfer memoperands.
00991   MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
00992 
00993   MBB.erase(MBBI);
00994   return true;
00995 }
00996 
00997 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
00998                                              ARM_AM::AddrOpc Mode) {
00999   switch (Opc) {
01000   case ARM::LDRi12:
01001     return ARM::LDR_PRE_IMM;
01002   case ARM::STRi12:
01003     return ARM::STR_PRE_IMM;
01004   case ARM::VLDRS:
01005     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
01006   case ARM::VLDRD:
01007     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
01008   case ARM::VSTRS:
01009     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
01010   case ARM::VSTRD:
01011     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
01012   case ARM::t2LDRi8:
01013   case ARM::t2LDRi12:
01014     return ARM::t2LDR_PRE;
01015   case ARM::t2STRi8:
01016   case ARM::t2STRi12:
01017     return ARM::t2STR_PRE;
01018   default: llvm_unreachable("Unhandled opcode!");
01019   }
01020 }
01021 
01022 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
01023                                               ARM_AM::AddrOpc Mode) {
01024   switch (Opc) {
01025   case ARM::LDRi12:
01026     return ARM::LDR_POST_IMM;
01027   case ARM::STRi12:
01028     return ARM::STR_POST_IMM;
01029   case ARM::VLDRS:
01030     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
01031   case ARM::VLDRD:
01032     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
01033   case ARM::VSTRS:
01034     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
01035   case ARM::VSTRD:
01036     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
01037   case ARM::t2LDRi8:
01038   case ARM::t2LDRi12:
01039     return ARM::t2LDR_POST;
01040   case ARM::t2STRi8:
01041   case ARM::t2STRi12:
01042     return ARM::t2STR_POST;
01043   default: llvm_unreachable("Unhandled opcode!");
01044   }
01045 }
01046 
01047 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
01048 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
01049 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
01050                                                MachineBasicBlock::iterator MBBI,
01051                                                const TargetInstrInfo *TII,
01052                                                bool &Advance,
01053                                                MachineBasicBlock::iterator &I) {
01054   // Thumb1 doesn't have updating LDR/STR.
01055   // FIXME: Use LDM/STM with single register instead.
01056   if (isThumb1) return false;
01057 
01058   MachineInstr *MI = MBBI;
01059   unsigned Base = MI->getOperand(1).getReg();
01060   bool BaseKill = MI->getOperand(1).isKill();
01061   unsigned Bytes = getLSMultipleTransferSize(MI);
01062   int Opcode = MI->getOpcode();
01063   DebugLoc dl = MI->getDebugLoc();
01064   bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
01065                 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
01066   bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
01067   if (isi32Load(Opcode) || isi32Store(Opcode))
01068     if (MI->getOperand(2).getImm() != 0)
01069       return false;
01070   if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
01071     return false;
01072 
01073   bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
01074   // Can't do the merge if the destination register is the same as the would-be
01075   // writeback register.
01076   if (MI->getOperand(0).getReg() == Base)
01077     return false;
01078 
01079   unsigned PredReg = 0;
01080   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
01081   bool DoMerge = false;
01082   ARM_AM::AddrOpc AddSub = ARM_AM::add;
01083   unsigned NewOpc = 0;
01084   // AM2 - 12 bits, thumb2 - 8 bits.
01085   unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
01086 
01087   // Try merging with the previous instruction.
01088   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
01089   if (MBBI != BeginMBBI) {
01090     MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
01091     while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
01092       --PrevMBBI;
01093     if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
01094       DoMerge = true;
01095       AddSub = ARM_AM::sub;
01096     } else if (!isAM5 &&
01097                isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
01098       DoMerge = true;
01099     }
01100     if (DoMerge) {
01101       NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
01102       MBB.erase(PrevMBBI);
01103     }
01104   }
01105 
01106   // Try merging with the next instruction.
01107   MachineBasicBlock::iterator EndMBBI = MBB.end();
01108   if (!DoMerge && MBBI != EndMBBI) {
01109     MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
01110     while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
01111       ++NextMBBI;
01112     if (!isAM5 &&
01113         isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
01114       DoMerge = true;
01115       AddSub = ARM_AM::sub;
01116     } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
01117       DoMerge = true;
01118     }
01119     if (DoMerge) {
01120       NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
01121       if (NextMBBI == I) {
01122         Advance = true;
01123         ++I;
01124       }
01125       MBB.erase(NextMBBI);
01126     }
01127   }
01128 
01129   if (!DoMerge)
01130     return false;
01131 
01132   if (isAM5) {
01133     // VLDM[SD]_UPD, VSTM[SD]_UPD
01134     // (There are no base-updating versions of VLDR/VSTR instructions, but the
01135     // updating load/store-multiple instructions can be used with only one
01136     // register.)
01137     MachineOperand &MO = MI->getOperand(0);
01138     BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
01139       .addReg(Base, getDefRegState(true)) // WB base register
01140       .addReg(Base, getKillRegState(isLd ? BaseKill : false))
01141       .addImm(Pred).addReg(PredReg)
01142       .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
01143                             getKillRegState(MO.isKill())));
01144   } else if (isLd) {
01145     if (isAM2) {
01146       // LDR_PRE, LDR_POST
01147       if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
01148         int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
01149         BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
01150           .addReg(Base, RegState::Define)
01151           .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
01152       } else {
01153         int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
01154         BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
01155           .addReg(Base, RegState::Define)
01156           .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
01157       }
01158     } else {
01159       int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
01160       // t2LDR_PRE, t2LDR_POST
01161       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
01162         .addReg(Base, RegState::Define)
01163         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
01164     }
01165   } else {
01166     MachineOperand &MO = MI->getOperand(0);
01167     // FIXME: post-indexed stores use am2offset_imm, which still encodes
01168     // the vestigal zero-reg offset register. When that's fixed, this clause
01169     // can be removed entirely.
01170     if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
01171       int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
01172       // STR_PRE, STR_POST
01173       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
01174         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
01175         .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
01176     } else {
01177       int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
01178       // t2STR_PRE, t2STR_POST
01179       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
01180         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
01181         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
01182     }
01183   }
01184   MBB.erase(MBBI);
01185 
01186   return true;
01187 }
01188 
01189 /// isMemoryOp - Returns true if instruction is a memory operation that this
01190 /// pass is capable of operating on.
01191 static bool isMemoryOp(const MachineInstr *MI) {
01192   // When no memory operands are present, conservatively assume unaligned,
01193   // volatile, unfoldable.
01194   if (!MI->hasOneMemOperand())
01195     return false;
01196 
01197   const MachineMemOperand *MMO = *MI->memoperands_begin();
01198 
01199   // Don't touch volatile memory accesses - we may be changing their order.
01200   if (MMO->isVolatile())
01201     return false;
01202 
01203   // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
01204   // not.
01205   if (MMO->getAlignment() < 4)
01206     return false;
01207 
01208   // str <undef> could probably be eliminated entirely, but for now we just want
01209   // to avoid making a mess of it.
01210   // FIXME: Use str <undef> as a wildcard to enable better stm folding.
01211   if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
01212       MI->getOperand(0).isUndef())
01213     return false;
01214 
01215   // Likewise don't mess with references to undefined addresses.
01216   if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
01217       MI->getOperand(1).isUndef())
01218     return false;
01219 
01220   int Opcode = MI->getOpcode();
01221   switch (Opcode) {
01222   default: break;
01223   case ARM::VLDRS:
01224   case ARM::VSTRS:
01225     return MI->getOperand(1).isReg();
01226   case ARM::VLDRD:
01227   case ARM::VSTRD:
01228     return MI->getOperand(1).isReg();
01229   case ARM::LDRi12:
01230   case ARM::STRi12:
01231   case ARM::tLDRi:
01232   case ARM::tSTRi:
01233   case ARM::t2LDRi8:
01234   case ARM::t2LDRi12:
01235   case ARM::t2STRi8:
01236   case ARM::t2STRi12:
01237     return MI->getOperand(1).isReg();
01238   }
01239   return false;
01240 }
01241 
01242 /// AdvanceRS - Advance register scavenger to just before the earliest memory
01243 /// op that is being merged.
01244 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
01245   MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
01246   unsigned Position = MemOps[0].Position;
01247   for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
01248     if (MemOps[i].Position < Position) {
01249       Position = MemOps[i].Position;
01250       Loc = MemOps[i].MBBI;
01251     }
01252   }
01253 
01254   if (Loc != MBB.begin())
01255     RS->forward(std::prev(Loc));
01256 }
01257 
01258 static int getMemoryOpOffset(const MachineInstr *MI) {
01259   int Opcode = MI->getOpcode();
01260   bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
01261   unsigned NumOperands = MI->getDesc().getNumOperands();
01262   unsigned OffField = MI->getOperand(NumOperands-3).getImm();
01263 
01264   if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
01265       Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
01266       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
01267       Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
01268     return OffField;
01269 
01270   // Thumb1 immediate offsets are scaled by 4
01271   if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
01272     return OffField * 4;
01273 
01274   int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
01275     : ARM_AM::getAM5Offset(OffField) * 4;
01276   if (isAM3) {
01277     if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
01278       Offset = -Offset;
01279   } else {
01280     if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
01281       Offset = -Offset;
01282   }
01283   return Offset;
01284 }
01285 
01286 static void InsertLDR_STR(MachineBasicBlock &MBB,
01287                           MachineBasicBlock::iterator &MBBI,
01288                           int Offset, bool isDef,
01289                           DebugLoc dl, unsigned NewOpc,
01290                           unsigned Reg, bool RegDeadKill, bool RegUndef,
01291                           unsigned BaseReg, bool BaseKill, bool BaseUndef,
01292                           bool OffKill, bool OffUndef,
01293                           ARMCC::CondCodes Pred, unsigned PredReg,
01294                           const TargetInstrInfo *TII, bool isT2) {
01295   if (isDef) {
01296     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
01297                                       TII->get(NewOpc))
01298       .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
01299       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
01300     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
01301   } else {
01302     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
01303                                       TII->get(NewOpc))
01304       .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
01305       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
01306     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
01307   }
01308 }
01309 
01310 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
01311                                           MachineBasicBlock::iterator &MBBI) {
01312   MachineInstr *MI = &*MBBI;
01313   unsigned Opcode = MI->getOpcode();
01314   if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
01315       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
01316     const MachineOperand &BaseOp = MI->getOperand(2);
01317     unsigned BaseReg = BaseOp.getReg();
01318     unsigned EvenReg = MI->getOperand(0).getReg();
01319     unsigned OddReg  = MI->getOperand(1).getReg();
01320     unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
01321     unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
01322     // ARM errata 602117: LDRD with base in list may result in incorrect base
01323     // register when interrupted or faulted.
01324     bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
01325     if (!Errata602117 &&
01326         ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
01327       return false;
01328 
01329     MachineBasicBlock::iterator NewBBI = MBBI;
01330     bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
01331     bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
01332     bool EvenDeadKill = isLd ?
01333       MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
01334     bool EvenUndef = MI->getOperand(0).isUndef();
01335     bool OddDeadKill  = isLd ?
01336       MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
01337     bool OddUndef = MI->getOperand(1).isUndef();
01338     bool BaseKill = BaseOp.isKill();
01339     bool BaseUndef = BaseOp.isUndef();
01340     bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
01341     bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
01342     int OffImm = getMemoryOpOffset(MI);
01343     unsigned PredReg = 0;
01344     ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
01345 
01346     if (OddRegNum > EvenRegNum && OffImm == 0) {
01347       // Ascending register numbers and no offset. It's safe to change it to a
01348       // ldm or stm.
01349       unsigned NewOpc = (isLd)
01350         ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
01351         : (isT2 ? ARM::t2STMIA : ARM::STMIA);
01352       if (isLd) {
01353         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
01354           .addReg(BaseReg, getKillRegState(BaseKill))
01355           .addImm(Pred).addReg(PredReg)
01356           .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
01357           .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
01358         ++NumLDRD2LDM;
01359       } else {
01360         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
01361           .addReg(BaseReg, getKillRegState(BaseKill))
01362           .addImm(Pred).addReg(PredReg)
01363           .addReg(EvenReg,
01364                   getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
01365           .addReg(OddReg,
01366                   getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
01367         ++NumSTRD2STM;
01368       }
01369       NewBBI = std::prev(MBBI);
01370     } else {
01371       // Split into two instructions.
01372       unsigned NewOpc = (isLd)
01373         ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
01374         : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
01375       // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
01376       // so adjust and use t2LDRi12 here for that.
01377       unsigned NewOpc2 = (isLd)
01378         ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
01379         : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
01380       DebugLoc dl = MBBI->getDebugLoc();
01381       // If this is a load and base register is killed, it may have been
01382       // re-defed by the load, make sure the first load does not clobber it.
01383       if (isLd &&
01384           (BaseKill || OffKill) &&
01385           (TRI->regsOverlap(EvenReg, BaseReg))) {
01386         assert(!TRI->regsOverlap(OddReg, BaseReg));
01387         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
01388                       OddReg, OddDeadKill, false,
01389                       BaseReg, false, BaseUndef, false, OffUndef,
01390                       Pred, PredReg, TII, isT2);
01391         NewBBI = std::prev(MBBI);
01392         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
01393                       EvenReg, EvenDeadKill, false,
01394                       BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
01395                       Pred, PredReg, TII, isT2);
01396       } else {
01397         if (OddReg == EvenReg && EvenDeadKill) {
01398           // If the two source operands are the same, the kill marker is
01399           // probably on the first one. e.g.
01400           // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
01401           EvenDeadKill = false;
01402           OddDeadKill = true;
01403         }
01404         // Never kill the base register in the first instruction.
01405         if (EvenReg == BaseReg)
01406           EvenDeadKill = false;
01407         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
01408                       EvenReg, EvenDeadKill, EvenUndef,
01409                       BaseReg, false, BaseUndef, false, OffUndef,
01410                       Pred, PredReg, TII, isT2);
01411         NewBBI = std::prev(MBBI);
01412         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
01413                       OddReg, OddDeadKill, OddUndef,
01414                       BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
01415                       Pred, PredReg, TII, isT2);
01416       }
01417       if (isLd)
01418         ++NumLDRD2LDR;
01419       else
01420         ++NumSTRD2STR;
01421     }
01422 
01423     MBB.erase(MI);
01424     MBBI = NewBBI;
01425     return true;
01426   }
01427   return false;
01428 }
01429 
01430 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
01431 /// ops of the same base and incrementing offset into LDM / STM ops.
01432 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
01433   unsigned NumMerges = 0;
01434   unsigned NumMemOps = 0;
01435   MemOpQueue MemOps;
01436   unsigned CurrBase = 0;
01437   int CurrOpc = -1;
01438   unsigned CurrSize = 0;
01439   ARMCC::CondCodes CurrPred = ARMCC::AL;
01440   unsigned CurrPredReg = 0;
01441   unsigned Position = 0;
01442   SmallVector<MachineBasicBlock::iterator,4> Merges;
01443 
01444   RS->enterBasicBlock(&MBB);
01445   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
01446   while (MBBI != E) {
01447     if (FixInvalidRegPairOp(MBB, MBBI))
01448       continue;
01449 
01450     bool Advance  = false;
01451     bool TryMerge = false;
01452     bool Clobber  = false;
01453 
01454     bool isMemOp = isMemoryOp(MBBI);
01455     if (isMemOp) {
01456       int Opcode = MBBI->getOpcode();
01457       unsigned Size = getLSMultipleTransferSize(MBBI);
01458       const MachineOperand &MO = MBBI->getOperand(0);
01459       unsigned Reg = MO.getReg();
01460       bool isKill = MO.isDef() ? false : MO.isKill();
01461       unsigned Base = MBBI->getOperand(1).getReg();
01462       unsigned PredReg = 0;
01463       ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
01464       int Offset = getMemoryOpOffset(MBBI);
01465       // Watch out for:
01466       // r4 := ldr [r5]
01467       // r5 := ldr [r5, #4]
01468       // r6 := ldr [r5, #8]
01469       //
01470       // The second ldr has effectively broken the chain even though it
01471       // looks like the later ldr(s) use the same base register. Try to
01472       // merge the ldr's so far, including this one. But don't try to
01473       // combine the following ldr(s).
01474       Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
01475 
01476       // Watch out for:
01477       // r4 := ldr [r0, #8]
01478       // r4 := ldr [r0, #4]
01479       //
01480       // The optimization may reorder the second ldr in front of the first
01481       // ldr, which violates write after write(WAW) dependence. The same as
01482       // str. Try to merge inst(s) already in MemOps.
01483       bool Overlap = false;
01484       for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
01485         if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
01486           Overlap = true;
01487           break;
01488         }
01489       }
01490 
01491       if (CurrBase == 0 && !Clobber) {
01492         // Start of a new chain.
01493         CurrBase = Base;
01494         CurrOpc  = Opcode;
01495         CurrSize = Size;
01496         CurrPred = Pred;
01497         CurrPredReg = PredReg;
01498         MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
01499         ++NumMemOps;
01500         Advance = true;
01501       } else if (!Overlap) {
01502         if (Clobber) {
01503           TryMerge = true;
01504           Advance = true;
01505         }
01506 
01507         if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
01508           // No need to match PredReg.
01509           // Continue adding to the queue.
01510           if (Offset > MemOps.back().Offset) {
01511             MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
01512                                              Position, MBBI));
01513             ++NumMemOps;
01514             Advance = true;
01515           } else {
01516             for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
01517                  I != E; ++I) {
01518               if (Offset < I->Offset) {
01519                 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
01520                                                  Position, MBBI));
01521                 ++NumMemOps;
01522                 Advance = true;
01523                 break;
01524               } else if (Offset == I->Offset) {
01525                 // Collision! This can't be merged!
01526                 break;
01527               }
01528             }
01529           }
01530         }
01531       }
01532     }
01533 
01534     if (MBBI->isDebugValue()) {
01535       ++MBBI;
01536       if (MBBI == E)
01537         // Reach the end of the block, try merging the memory instructions.
01538         TryMerge = true;
01539     } else if (Advance) {
01540       ++Position;
01541       ++MBBI;
01542       if (MBBI == E)
01543         // Reach the end of the block, try merging the memory instructions.
01544         TryMerge = true;
01545     } else {
01546       TryMerge = true;
01547     }
01548 
01549     if (TryMerge) {
01550       if (NumMemOps > 1) {
01551         // Try to find a free register to use as a new base in case it's needed.
01552         // First advance to the instruction just before the start of the chain.
01553         AdvanceRS(MBB, MemOps);
01554 
01555         // Find a scratch register.
01556         unsigned Scratch =
01557           RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
01558 
01559         // Process the load / store instructions.
01560         RS->forward(std::prev(MBBI));
01561 
01562         // Merge ops.
01563         Merges.clear();
01564         MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
01565                      CurrPred, CurrPredReg, Scratch, MemOps, Merges);
01566 
01567         // Try folding preceding/trailing base inc/dec into the generated
01568         // LDM/STM ops.
01569         for (unsigned i = 0, e = Merges.size(); i < e; ++i)
01570           if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
01571             ++NumMerges;
01572         NumMerges += Merges.size();
01573 
01574         // Try folding preceding/trailing base inc/dec into those load/store
01575         // that were not merged to form LDM/STM ops.
01576         for (unsigned i = 0; i != NumMemOps; ++i)
01577           if (!MemOps[i].Merged)
01578             if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
01579               ++NumMerges;
01580 
01581         // RS may be pointing to an instruction that's deleted.
01582         RS->skipTo(std::prev(MBBI));
01583       } else if (NumMemOps == 1) {
01584         // Try folding preceding/trailing base inc/dec into the single
01585         // load/store.
01586         if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
01587           ++NumMerges;
01588           RS->forward(std::prev(MBBI));
01589         }
01590       }
01591 
01592       CurrBase = 0;
01593       CurrOpc = -1;
01594       CurrSize = 0;
01595       CurrPred = ARMCC::AL;
01596       CurrPredReg = 0;
01597       if (NumMemOps) {
01598         MemOps.clear();
01599         NumMemOps = 0;
01600       }
01601 
01602       // If iterator hasn't been advanced and this is not a memory op, skip it.
01603       // It can't start a new chain anyway.
01604       if (!Advance && !isMemOp && MBBI != E) {
01605         ++Position;
01606         ++MBBI;
01607       }
01608     }
01609   }
01610   return NumMerges > 0;
01611 }
01612 
01613 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
01614 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
01615 /// directly restore the value of LR into pc.
01616 ///   ldmfd sp!, {..., lr}
01617 ///   bx lr
01618 /// or
01619 ///   ldmfd sp!, {..., lr}
01620 ///   mov pc, lr
01621 /// =>
01622 ///   ldmfd sp!, {..., pc}
01623 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
01624   // Thumb1 LDM doesn't allow high registers.
01625   if (isThumb1) return false;
01626   if (MBB.empty()) return false;
01627 
01628   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
01629   if (MBBI != MBB.begin() &&
01630       (MBBI->getOpcode() == ARM::BX_RET ||
01631        MBBI->getOpcode() == ARM::tBX_RET ||
01632        MBBI->getOpcode() == ARM::MOVPCLR)) {
01633     MachineInstr *PrevMI = std::prev(MBBI);
01634     unsigned Opcode = PrevMI->getOpcode();
01635     if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
01636         Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
01637         Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
01638       MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
01639       if (MO.getReg() != ARM::LR)
01640         return false;
01641       unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
01642       assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
01643               Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
01644       PrevMI->setDesc(TII->get(NewOpc));
01645       MO.setReg(ARM::PC);
01646       PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
01647       MBB.erase(MBBI);
01648       return true;
01649     }
01650   }
01651   return false;
01652 }
01653 
01654 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
01655   const TargetMachine &TM = Fn.getTarget();
01656   TL = TM.getSubtargetImpl()->getTargetLowering();
01657   AFI = Fn.getInfo<ARMFunctionInfo>();
01658   TII = TM.getSubtargetImpl()->getInstrInfo();
01659   TRI = TM.getSubtargetImpl()->getRegisterInfo();
01660   STI = &TM.getSubtarget<ARMSubtarget>();
01661   RS = new RegScavenger();
01662   isThumb2 = AFI->isThumb2Function();
01663   isThumb1 = AFI->isThumbFunction() && !isThumb2;
01664 
01665   bool Modified = false;
01666   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
01667        ++MFI) {
01668     MachineBasicBlock &MBB = *MFI;
01669     Modified |= LoadStoreMultipleOpti(MBB);
01670     if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
01671       Modified |= MergeReturnIntoLDM(MBB);
01672   }
01673 
01674   delete RS;
01675   return Modified;
01676 }
01677 
01678 
01679 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
01680 /// load / stores from consecutive locations close to make it more
01681 /// likely they will be combined later.
01682 
01683 namespace {
01684   struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
01685     static char ID;
01686     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
01687 
01688     const DataLayout *TD;
01689     const TargetInstrInfo *TII;
01690     const TargetRegisterInfo *TRI;
01691     const ARMSubtarget *STI;
01692     MachineRegisterInfo *MRI;
01693     MachineFunction *MF;
01694 
01695     bool runOnMachineFunction(MachineFunction &Fn) override;
01696 
01697     const char *getPassName() const override {
01698       return "ARM pre- register allocation load / store optimization pass";
01699     }
01700 
01701   private:
01702     bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
01703                           unsigned &NewOpc, unsigned &EvenReg,
01704                           unsigned &OddReg, unsigned &BaseReg,
01705                           int &Offset,
01706                           unsigned &PredReg, ARMCC::CondCodes &Pred,
01707                           bool &isT2);
01708     bool RescheduleOps(MachineBasicBlock *MBB,
01709                        SmallVectorImpl<MachineInstr *> &Ops,
01710                        unsigned Base, bool isLd,
01711                        DenseMap<MachineInstr*, unsigned> &MI2LocMap);
01712     bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
01713   };
01714   char ARMPreAllocLoadStoreOpt::ID = 0;
01715 }
01716 
01717 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
01718   TD = Fn.getSubtarget().getDataLayout();
01719   TII = Fn.getSubtarget().getInstrInfo();
01720   TRI = Fn.getSubtarget().getRegisterInfo();
01721   STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
01722   MRI = &Fn.getRegInfo();
01723   MF  = &Fn;
01724 
01725   bool Modified = false;
01726   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
01727        ++MFI)
01728     Modified |= RescheduleLoadStoreInstrs(MFI);
01729 
01730   return Modified;
01731 }
01732 
01733 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
01734                                       MachineBasicBlock::iterator I,
01735                                       MachineBasicBlock::iterator E,
01736                                       SmallPtrSetImpl<MachineInstr*> &MemOps,
01737                                       SmallSet<unsigned, 4> &MemRegs,
01738                                       const TargetRegisterInfo *TRI) {
01739   // Are there stores / loads / calls between them?
01740   // FIXME: This is overly conservative. We should make use of alias information
01741   // some day.
01742   SmallSet<unsigned, 4> AddedRegPressure;
01743   while (++I != E) {
01744     if (I->isDebugValue() || MemOps.count(&*I))
01745       continue;
01746     if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
01747       return false;
01748     if (isLd && I->mayStore())
01749       return false;
01750     if (!isLd) {
01751       if (I->mayLoad())
01752         return false;
01753       // It's not safe to move the first 'str' down.
01754       // str r1, [r0]
01755       // strh r5, [r0]
01756       // str r4, [r0, #+4]
01757       if (I->mayStore())
01758         return false;
01759     }
01760     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
01761       MachineOperand &MO = I->getOperand(j);
01762       if (!MO.isReg())
01763         continue;
01764       unsigned Reg = MO.getReg();
01765       if (MO.isDef() && TRI->regsOverlap(Reg, Base))
01766         return false;
01767       if (Reg != Base && !MemRegs.count(Reg))
01768         AddedRegPressure.insert(Reg);
01769     }
01770   }
01771 
01772   // Estimate register pressure increase due to the transformation.
01773   if (MemRegs.size() <= 4)
01774     // Ok if we are moving small number of instructions.
01775     return true;
01776   return AddedRegPressure.size() <= MemRegs.size() * 2;
01777 }
01778 
01779 
01780 /// Copy Op0 and Op1 operands into a new array assigned to MI.
01781 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
01782                                    MachineInstr *Op1) {
01783   assert(MI->memoperands_empty() && "expected a new machineinstr");
01784   size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
01785     + (Op1->memoperands_end() - Op1->memoperands_begin());
01786 
01787   MachineFunction *MF = MI->getParent()->getParent();
01788   MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
01789   MachineSDNode::mmo_iterator MemEnd =
01790     std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
01791   MemEnd =
01792     std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
01793   MI->setMemRefs(MemBegin, MemEnd);
01794 }
01795 
01796 bool
01797 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
01798                                           DebugLoc &dl,
01799                                           unsigned &NewOpc, unsigned &EvenReg,
01800                                           unsigned &OddReg, unsigned &BaseReg,
01801                                           int &Offset, unsigned &PredReg,
01802                                           ARMCC::CondCodes &Pred,
01803                                           bool &isT2) {
01804   // Make sure we're allowed to generate LDRD/STRD.
01805   if (!STI->hasV5TEOps())
01806     return false;
01807 
01808   // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
01809   unsigned Scale = 1;
01810   unsigned Opcode = Op0->getOpcode();
01811   if (Opcode == ARM::LDRi12) {
01812     NewOpc = ARM::LDRD;
01813   } else if (Opcode == ARM::STRi12) {
01814     NewOpc = ARM::STRD;
01815   } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
01816     NewOpc = ARM::t2LDRDi8;
01817     Scale = 4;
01818     isT2 = true;
01819   } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
01820     NewOpc = ARM::t2STRDi8;
01821     Scale = 4;
01822     isT2 = true;
01823   } else {
01824     return false;
01825   }
01826 
01827   // Make sure the base address satisfies i64 ld / st alignment requirement.
01828   // At the moment, we ignore the memoryoperand's value.
01829   // If we want to use AliasAnalysis, we should check it accordingly.
01830   if (!Op0->hasOneMemOperand() ||
01831       (*Op0->memoperands_begin())->isVolatile())
01832     return false;
01833 
01834   unsigned Align = (*Op0->memoperands_begin())->getAlignment();
01835   const Function *Func = MF->getFunction();
01836   unsigned ReqAlign = STI->hasV6Ops()
01837     ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
01838     : 8;  // Pre-v6 need 8-byte align
01839   if (Align < ReqAlign)
01840     return false;
01841 
01842   // Then make sure the immediate offset fits.
01843   int OffImm = getMemoryOpOffset(Op0);
01844   if (isT2) {
01845     int Limit = (1 << 8) * Scale;
01846     if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
01847       return false;
01848     Offset = OffImm;
01849   } else {
01850     ARM_AM::AddrOpc AddSub = ARM_AM::add;
01851     if (OffImm < 0) {
01852       AddSub = ARM_AM::sub;
01853       OffImm = - OffImm;
01854     }
01855     int Limit = (1 << 8) * Scale;
01856     if (OffImm >= Limit || (OffImm & (Scale-1)))
01857       return false;
01858     Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
01859   }
01860   EvenReg = Op0->getOperand(0).getReg();
01861   OddReg  = Op1->getOperand(0).getReg();
01862   if (EvenReg == OddReg)
01863     return false;
01864   BaseReg = Op0->getOperand(1).getReg();
01865   Pred = getInstrPredicate(Op0, PredReg);
01866   dl = Op0->getDebugLoc();
01867   return true;
01868 }
01869 
01870 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
01871                                  SmallVectorImpl<MachineInstr *> &Ops,
01872                                  unsigned Base, bool isLd,
01873                                  DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
01874   bool RetVal = false;
01875 
01876   // Sort by offset (in reverse order).
01877   std::sort(Ops.begin(), Ops.end(),
01878             [](const MachineInstr *LHS, const MachineInstr *RHS) {
01879     int LOffset = getMemoryOpOffset(LHS);
01880     int ROffset = getMemoryOpOffset(RHS);
01881     assert(LHS == RHS || LOffset != ROffset);
01882     return LOffset > ROffset;
01883   });
01884 
01885   // The loads / stores of the same base are in order. Scan them from first to
01886   // last and check for the following:
01887   // 1. Any def of base.
01888   // 2. Any gaps.
01889   while (Ops.size() > 1) {
01890     unsigned FirstLoc = ~0U;
01891     unsigned LastLoc = 0;
01892     MachineInstr *FirstOp = nullptr;
01893     MachineInstr *LastOp = nullptr;
01894     int LastOffset = 0;
01895     unsigned LastOpcode = 0;
01896     unsigned LastBytes = 0;
01897     unsigned NumMove = 0;
01898     for (int i = Ops.size() - 1; i >= 0; --i) {
01899       MachineInstr *Op = Ops[i];
01900       unsigned Loc = MI2LocMap[Op];
01901       if (Loc <= FirstLoc) {
01902         FirstLoc = Loc;
01903         FirstOp = Op;
01904       }
01905       if (Loc >= LastLoc) {
01906         LastLoc = Loc;
01907         LastOp = Op;
01908       }
01909 
01910       unsigned LSMOpcode
01911         = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
01912       if (LastOpcode && LSMOpcode != LastOpcode)
01913         break;
01914 
01915       int Offset = getMemoryOpOffset(Op);
01916       unsigned Bytes = getLSMultipleTransferSize(Op);
01917       if (LastBytes) {
01918         if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
01919           break;
01920       }
01921       LastOffset = Offset;
01922       LastBytes = Bytes;
01923       LastOpcode = LSMOpcode;
01924       if (++NumMove == 8) // FIXME: Tune this limit.
01925         break;
01926     }
01927 
01928     if (NumMove <= 1)
01929       Ops.pop_back();
01930     else {
01931       SmallPtrSet<MachineInstr*, 4> MemOps;
01932       SmallSet<unsigned, 4> MemRegs;
01933       for (int i = NumMove-1; i >= 0; --i) {
01934         MemOps.insert(Ops[i]);
01935         MemRegs.insert(Ops[i]->getOperand(0).getReg());
01936       }
01937 
01938       // Be conservative, if the instructions are too far apart, don't
01939       // move them. We want to limit the increase of register pressure.
01940       bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
01941       if (DoMove)
01942         DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
01943                                            MemOps, MemRegs, TRI);
01944       if (!DoMove) {
01945         for (unsigned i = 0; i != NumMove; ++i)
01946           Ops.pop_back();
01947       } else {
01948         // This is the new location for the loads / stores.
01949         MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
01950         while (InsertPos != MBB->end()
01951                && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
01952           ++InsertPos;
01953 
01954         // If we are moving a pair of loads / stores, see if it makes sense
01955         // to try to allocate a pair of registers that can form register pairs.
01956         MachineInstr *Op0 = Ops.back();
01957         MachineInstr *Op1 = Ops[Ops.size()-2];
01958         unsigned EvenReg = 0, OddReg = 0;
01959         unsigned BaseReg = 0, PredReg = 0;
01960         ARMCC::CondCodes Pred = ARMCC::AL;
01961         bool isT2 = false;
01962         unsigned NewOpc = 0;
01963         int Offset = 0;
01964         DebugLoc dl;
01965         if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
01966                                              EvenReg, OddReg, BaseReg,
01967                                              Offset, PredReg, Pred, isT2)) {
01968           Ops.pop_back();
01969           Ops.pop_back();
01970 
01971           const MCInstrDesc &MCID = TII->get(NewOpc);
01972           const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
01973           MRI->constrainRegClass(EvenReg, TRC);
01974           MRI->constrainRegClass(OddReg, TRC);
01975 
01976           // Form the pair instruction.
01977           if (isLd) {
01978             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
01979               .addReg(EvenReg, RegState::Define)
01980               .addReg(OddReg, RegState::Define)
01981               .addReg(BaseReg);
01982             // FIXME: We're converting from LDRi12 to an insn that still
01983             // uses addrmode2, so we need an explicit offset reg. It should
01984             // always by reg0 since we're transforming LDRi12s.
01985             if (!isT2)
01986               MIB.addReg(0);
01987             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
01988             concatenateMemOperands(MIB, Op0, Op1);
01989             DEBUG(dbgs() << "Formed " << *MIB << "\n");
01990             ++NumLDRDFormed;
01991           } else {
01992             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
01993               .addReg(EvenReg)
01994               .addReg(OddReg)
01995               .addReg(BaseReg);
01996             // FIXME: We're converting from LDRi12 to an insn that still
01997             // uses addrmode2, so we need an explicit offset reg. It should
01998             // always by reg0 since we're transforming STRi12s.
01999             if (!isT2)
02000               MIB.addReg(0);
02001             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
02002             concatenateMemOperands(MIB, Op0, Op1);
02003             DEBUG(dbgs() << "Formed " << *MIB << "\n");
02004             ++NumSTRDFormed;
02005           }
02006           MBB->erase(Op0);
02007           MBB->erase(Op1);
02008 
02009           // Add register allocation hints to form register pairs.
02010           MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
02011           MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
02012         } else {
02013           for (unsigned i = 0; i != NumMove; ++i) {
02014             MachineInstr *Op = Ops.back();
02015             Ops.pop_back();
02016             MBB->splice(InsertPos, MBB, Op);
02017           }
02018         }
02019 
02020         NumLdStMoved += NumMove;
02021         RetVal = true;
02022       }
02023     }
02024   }
02025 
02026   return RetVal;
02027 }
02028 
02029 bool
02030 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
02031   bool RetVal = false;
02032 
02033   DenseMap<MachineInstr*, unsigned> MI2LocMap;
02034   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
02035   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
02036   SmallVector<unsigned, 4> LdBases;
02037   SmallVector<unsigned, 4> StBases;
02038 
02039   unsigned Loc = 0;
02040   MachineBasicBlock::iterator MBBI = MBB->begin();
02041   MachineBasicBlock::iterator E = MBB->end();
02042   while (MBBI != E) {
02043     for (; MBBI != E; ++MBBI) {
02044       MachineInstr *MI = MBBI;
02045       if (MI->isCall() || MI->isTerminator()) {
02046         // Stop at barriers.
02047         ++MBBI;
02048         break;
02049       }
02050 
02051       if (!MI->isDebugValue())
02052         MI2LocMap[MI] = ++Loc;
02053 
02054       if (!isMemoryOp(MI))
02055         continue;
02056       unsigned PredReg = 0;
02057       if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
02058         continue;
02059 
02060       int Opc = MI->getOpcode();
02061       bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
02062       unsigned Base = MI->getOperand(1).getReg();
02063       int Offset = getMemoryOpOffset(MI);
02064 
02065       bool StopHere = false;
02066       if (isLd) {
02067         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
02068           Base2LdsMap.find(Base);
02069         if (BI != Base2LdsMap.end()) {
02070           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
02071             if (Offset == getMemoryOpOffset(BI->second[i])) {
02072               StopHere = true;
02073               break;
02074             }
02075           }
02076           if (!StopHere)
02077             BI->second.push_back(MI);
02078         } else {
02079           Base2LdsMap[Base].push_back(MI);
02080           LdBases.push_back(Base);
02081         }
02082       } else {
02083         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
02084           Base2StsMap.find(Base);
02085         if (BI != Base2StsMap.end()) {
02086           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
02087             if (Offset == getMemoryOpOffset(BI->second[i])) {
02088               StopHere = true;
02089               break;
02090             }
02091           }
02092           if (!StopHere)
02093             BI->second.push_back(MI);
02094         } else {
02095           Base2StsMap[Base].push_back(MI);
02096           StBases.push_back(Base);
02097         }
02098       }
02099 
02100       if (StopHere) {
02101         // Found a duplicate (a base+offset combination that's seen earlier).
02102         // Backtrack.
02103         --Loc;
02104         break;
02105       }
02106     }
02107 
02108     // Re-schedule loads.
02109     for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
02110       unsigned Base = LdBases[i];
02111       SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
02112       if (Lds.size() > 1)
02113         RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
02114     }
02115 
02116     // Re-schedule stores.
02117     for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
02118       unsigned Base = StBases[i];
02119       SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
02120       if (Sts.size() > 1)
02121         RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
02122     }
02123 
02124     if (MBBI != E) {
02125       Base2LdsMap.clear();
02126       Base2StsMap.clear();
02127       LdBases.clear();
02128       StBases.clear();
02129     }
02130   }
02131 
02132   return RetVal;
02133 }
02134 
02135 
02136 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
02137 /// optimization pass.
02138 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
02139   if (PreAlloc)
02140     return new ARMPreAllocLoadStoreOpt();
02141   return new ARMLoadStoreOpt();
02142 }