LLVM API Documentation
00001 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file contains a pass that performs load / store related peephole 00011 // optimizations. This pass should be run after register allocation. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "ARM.h" 00016 #include "ARMBaseInstrInfo.h" 00017 #include "ARMBaseRegisterInfo.h" 00018 #include "ARMISelLowering.h" 00019 #include "ARMMachineFunctionInfo.h" 00020 #include "ARMSubtarget.h" 00021 #include "MCTargetDesc/ARMAddressingModes.h" 00022 #include "Thumb1RegisterInfo.h" 00023 #include "llvm/ADT/DenseMap.h" 00024 #include "llvm/ADT/STLExtras.h" 00025 #include "llvm/ADT/SmallPtrSet.h" 00026 #include "llvm/ADT/SmallSet.h" 00027 #include "llvm/ADT/SmallVector.h" 00028 #include "llvm/ADT/Statistic.h" 00029 #include "llvm/CodeGen/MachineBasicBlock.h" 00030 #include "llvm/CodeGen/MachineFunctionPass.h" 00031 #include "llvm/CodeGen/MachineInstr.h" 00032 #include "llvm/CodeGen/MachineInstrBuilder.h" 00033 #include "llvm/CodeGen/MachineRegisterInfo.h" 00034 #include "llvm/CodeGen/RegisterScavenging.h" 00035 #include "llvm/CodeGen/SelectionDAGNodes.h" 00036 #include "llvm/IR/DataLayout.h" 00037 #include "llvm/IR/DerivedTypes.h" 00038 #include "llvm/IR/Function.h" 00039 #include "llvm/Support/Debug.h" 00040 #include "llvm/Support/ErrorHandling.h" 00041 #include "llvm/Target/TargetInstrInfo.h" 00042 #include "llvm/Target/TargetMachine.h" 00043 #include "llvm/Target/TargetRegisterInfo.h" 00044 using namespace llvm; 00045 00046 #define DEBUG_TYPE "arm-ldst-opt" 00047 00048 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 00049 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 00050 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 00051 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 00052 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 00053 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 00054 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 00055 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 00056 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 00057 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 00058 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 00059 00060 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 00061 /// load / store instructions to form ldm / stm instructions. 00062 00063 namespace { 00064 struct ARMLoadStoreOpt : public MachineFunctionPass { 00065 static char ID; 00066 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 00067 00068 const TargetInstrInfo *TII; 00069 const TargetRegisterInfo *TRI; 00070 const ARMSubtarget *STI; 00071 const TargetLowering *TL; 00072 ARMFunctionInfo *AFI; 00073 RegScavenger *RS; 00074 bool isThumb1, isThumb2; 00075 00076 bool runOnMachineFunction(MachineFunction &Fn) override; 00077 00078 const char *getPassName() const override { 00079 return "ARM load / store optimization pass"; 00080 } 00081 00082 private: 00083 struct MemOpQueueEntry { 00084 int Offset; 00085 unsigned Reg; 00086 bool isKill; 00087 unsigned Position; 00088 MachineBasicBlock::iterator MBBI; 00089 bool Merged; 00090 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 00091 MachineBasicBlock::iterator i) 00092 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 00093 }; 00094 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 00095 typedef MemOpQueue::iterator MemOpQueueIter; 00096 00097 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, 00098 const MemOpQueue &MemOps, unsigned DefReg, 00099 unsigned RangeBegin, unsigned RangeEnd); 00100 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 00101 int Offset, unsigned Base, bool BaseKill, int Opcode, 00102 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 00103 DebugLoc dl, 00104 ArrayRef<std::pair<unsigned, bool> > Regs, 00105 ArrayRef<unsigned> ImpDefs); 00106 void MergeOpsUpdate(MachineBasicBlock &MBB, 00107 MemOpQueue &MemOps, 00108 unsigned memOpsBegin, 00109 unsigned memOpsEnd, 00110 unsigned insertAfter, 00111 int Offset, 00112 unsigned Base, 00113 bool BaseKill, 00114 int Opcode, 00115 ARMCC::CondCodes Pred, 00116 unsigned PredReg, 00117 unsigned Scratch, 00118 DebugLoc dl, 00119 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 00120 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 00121 int Opcode, unsigned Size, 00122 ARMCC::CondCodes Pred, unsigned PredReg, 00123 unsigned Scratch, MemOpQueue &MemOps, 00124 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 00125 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 00126 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 00127 MachineBasicBlock::iterator &MBBI); 00128 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 00129 MachineBasicBlock::iterator MBBI, 00130 const TargetInstrInfo *TII, 00131 bool &Advance, 00132 MachineBasicBlock::iterator &I); 00133 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 00134 MachineBasicBlock::iterator MBBI, 00135 bool &Advance, 00136 MachineBasicBlock::iterator &I); 00137 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 00138 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 00139 }; 00140 char ARMLoadStoreOpt::ID = 0; 00141 } 00142 00143 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 00144 switch (Opcode) { 00145 default: llvm_unreachable("Unhandled opcode!"); 00146 case ARM::LDRi12: 00147 ++NumLDMGened; 00148 switch (Mode) { 00149 default: llvm_unreachable("Unhandled submode!"); 00150 case ARM_AM::ia: return ARM::LDMIA; 00151 case ARM_AM::da: return ARM::LDMDA; 00152 case ARM_AM::db: return ARM::LDMDB; 00153 case ARM_AM::ib: return ARM::LDMIB; 00154 } 00155 case ARM::STRi12: 00156 ++NumSTMGened; 00157 switch (Mode) { 00158 default: llvm_unreachable("Unhandled submode!"); 00159 case ARM_AM::ia: return ARM::STMIA; 00160 case ARM_AM::da: return ARM::STMDA; 00161 case ARM_AM::db: return ARM::STMDB; 00162 case ARM_AM::ib: return ARM::STMIB; 00163 } 00164 case ARM::tLDRi: 00165 // tLDMIA is writeback-only - unless the base register is in the input 00166 // reglist. 00167 ++NumLDMGened; 00168 switch (Mode) { 00169 default: llvm_unreachable("Unhandled submode!"); 00170 case ARM_AM::ia: return ARM::tLDMIA; 00171 } 00172 case ARM::tSTRi: 00173 // There is no non-writeback tSTMIA either. 00174 ++NumSTMGened; 00175 switch (Mode) { 00176 default: llvm_unreachable("Unhandled submode!"); 00177 case ARM_AM::ia: return ARM::tSTMIA_UPD; 00178 } 00179 case ARM::t2LDRi8: 00180 case ARM::t2LDRi12: 00181 ++NumLDMGened; 00182 switch (Mode) { 00183 default: llvm_unreachable("Unhandled submode!"); 00184 case ARM_AM::ia: return ARM::t2LDMIA; 00185 case ARM_AM::db: return ARM::t2LDMDB; 00186 } 00187 case ARM::t2STRi8: 00188 case ARM::t2STRi12: 00189 ++NumSTMGened; 00190 switch (Mode) { 00191 default: llvm_unreachable("Unhandled submode!"); 00192 case ARM_AM::ia: return ARM::t2STMIA; 00193 case ARM_AM::db: return ARM::t2STMDB; 00194 } 00195 case ARM::VLDRS: 00196 ++NumVLDMGened; 00197 switch (Mode) { 00198 default: llvm_unreachable("Unhandled submode!"); 00199 case ARM_AM::ia: return ARM::VLDMSIA; 00200 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 00201 } 00202 case ARM::VSTRS: 00203 ++NumVSTMGened; 00204 switch (Mode) { 00205 default: llvm_unreachable("Unhandled submode!"); 00206 case ARM_AM::ia: return ARM::VSTMSIA; 00207 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 00208 } 00209 case ARM::VLDRD: 00210 ++NumVLDMGened; 00211 switch (Mode) { 00212 default: llvm_unreachable("Unhandled submode!"); 00213 case ARM_AM::ia: return ARM::VLDMDIA; 00214 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 00215 } 00216 case ARM::VSTRD: 00217 ++NumVSTMGened; 00218 switch (Mode) { 00219 default: llvm_unreachable("Unhandled submode!"); 00220 case ARM_AM::ia: return ARM::VSTMDIA; 00221 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 00222 } 00223 } 00224 } 00225 00226 namespace llvm { 00227 namespace ARM_AM { 00228 00229 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 00230 switch (Opcode) { 00231 default: llvm_unreachable("Unhandled opcode!"); 00232 case ARM::LDMIA_RET: 00233 case ARM::LDMIA: 00234 case ARM::LDMIA_UPD: 00235 case ARM::STMIA: 00236 case ARM::STMIA_UPD: 00237 case ARM::tLDMIA: 00238 case ARM::tLDMIA_UPD: 00239 case ARM::tSTMIA_UPD: 00240 case ARM::t2LDMIA_RET: 00241 case ARM::t2LDMIA: 00242 case ARM::t2LDMIA_UPD: 00243 case ARM::t2STMIA: 00244 case ARM::t2STMIA_UPD: 00245 case ARM::VLDMSIA: 00246 case ARM::VLDMSIA_UPD: 00247 case ARM::VSTMSIA: 00248 case ARM::VSTMSIA_UPD: 00249 case ARM::VLDMDIA: 00250 case ARM::VLDMDIA_UPD: 00251 case ARM::VSTMDIA: 00252 case ARM::VSTMDIA_UPD: 00253 return ARM_AM::ia; 00254 00255 case ARM::LDMDA: 00256 case ARM::LDMDA_UPD: 00257 case ARM::STMDA: 00258 case ARM::STMDA_UPD: 00259 return ARM_AM::da; 00260 00261 case ARM::LDMDB: 00262 case ARM::LDMDB_UPD: 00263 case ARM::STMDB: 00264 case ARM::STMDB_UPD: 00265 case ARM::t2LDMDB: 00266 case ARM::t2LDMDB_UPD: 00267 case ARM::t2STMDB: 00268 case ARM::t2STMDB_UPD: 00269 case ARM::VLDMSDB_UPD: 00270 case ARM::VSTMSDB_UPD: 00271 case ARM::VLDMDDB_UPD: 00272 case ARM::VSTMDDB_UPD: 00273 return ARM_AM::db; 00274 00275 case ARM::LDMIB: 00276 case ARM::LDMIB_UPD: 00277 case ARM::STMIB: 00278 case ARM::STMIB_UPD: 00279 return ARM_AM::ib; 00280 } 00281 } 00282 00283 } // end namespace ARM_AM 00284 } // end namespace llvm 00285 00286 static bool isT1i32Load(unsigned Opc) { 00287 return Opc == ARM::tLDRi; 00288 } 00289 00290 static bool isT2i32Load(unsigned Opc) { 00291 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 00292 } 00293 00294 static bool isi32Load(unsigned Opc) { 00295 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 00296 } 00297 00298 static bool isT1i32Store(unsigned Opc) { 00299 return Opc == ARM::tSTRi; 00300 } 00301 00302 static bool isT2i32Store(unsigned Opc) { 00303 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 00304 } 00305 00306 static bool isi32Store(unsigned Opc) { 00307 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 00308 } 00309 00310 /// MergeOps - Create and insert a LDM or STM with Base as base register and 00311 /// registers in Regs as the register operands that would be loaded / stored. 00312 /// It returns true if the transformation is done. 00313 bool 00314 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 00315 MachineBasicBlock::iterator MBBI, 00316 int Offset, unsigned Base, bool BaseKill, 00317 int Opcode, ARMCC::CondCodes Pred, 00318 unsigned PredReg, unsigned Scratch, DebugLoc dl, 00319 ArrayRef<std::pair<unsigned, bool> > Regs, 00320 ArrayRef<unsigned> ImpDefs) { 00321 // Only a single register to load / store. Don't bother. 00322 unsigned NumRegs = Regs.size(); 00323 if (NumRegs <= 1) 00324 return false; 00325 00326 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 00327 // Compute liveness information for that register to make the decision. 00328 bool SafeToClobberCPSR = !isThumb1 || 00329 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) == 00330 MachineBasicBlock::LQR_Dead); 00331 00332 ARM_AM::AMSubMode Mode = ARM_AM::ia; 00333 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 00334 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 00335 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 00336 00337 if (Offset == 4 && haveIBAndDA) { 00338 Mode = ARM_AM::ib; 00339 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 00340 Mode = ARM_AM::da; 00341 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 00342 // VLDM/VSTM do not support DB mode without also updating the base reg. 00343 Mode = ARM_AM::db; 00344 } else if (Offset != 0) { 00345 // Check if this is a supported opcode before inserting instructions to 00346 // calculate a new base register. 00347 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 00348 00349 // If starting offset isn't zero, insert a MI to materialize a new base. 00350 // But only do so if it is cost effective, i.e. merging more than two 00351 // loads / stores. 00352 if (NumRegs <= 2) 00353 return false; 00354 00355 // On Thumb1, it's not worth materializing a new base register without 00356 // clobbering the CPSR (i.e. not using ADDS/SUBS). 00357 if (!SafeToClobberCPSR) 00358 return false; 00359 00360 unsigned NewBase; 00361 if (isi32Load(Opcode)) { 00362 // If it is a load, then just use one of the destination register to 00363 // use as the new base. 00364 NewBase = Regs[NumRegs-1].first; 00365 } else { 00366 // Use the scratch register to use as a new base. 00367 NewBase = Scratch; 00368 if (NewBase == 0) 00369 return false; 00370 } 00371 00372 int BaseOpc = 00373 isThumb2 ? ARM::t2ADDri : 00374 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 00375 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 00376 00377 if (Offset < 0) { 00378 Offset = - Offset; 00379 BaseOpc = 00380 isThumb2 ? ARM::t2SUBri : 00381 (isThumb1 && Offset < 8) ? ARM::tSUBi3 : 00382 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 00383 } 00384 00385 if (!TL->isLegalAddImmediate(Offset)) 00386 // FIXME: Try add with register operand? 00387 return false; // Probably not worth it then. 00388 00389 if (isThumb1) { 00390 // Thumb1: depending on immediate size, use either 00391 // ADDS NewBase, Base, #imm3 00392 // or 00393 // MOV NewBase, Base 00394 // ADDS NewBase, #imm8. 00395 if (Base != NewBase && Offset >= 8) { 00396 // Need to insert a MOV to the new base first. 00397 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) 00398 .addReg(Base, getKillRegState(BaseKill)) 00399 .addImm(Pred).addReg(PredReg); 00400 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. 00401 Base = NewBase; 00402 BaseKill = false; 00403 } 00404 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) 00405 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 00406 .addImm(Pred).addReg(PredReg); 00407 } else { 00408 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 00409 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 00410 .addImm(Pred).addReg(PredReg).addReg(0); 00411 } 00412 Base = NewBase; 00413 BaseKill = true; // New base is always killed straight away. 00414 } 00415 00416 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 00417 Opcode == ARM::VLDRD); 00418 00419 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 00420 // base register writeback. 00421 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 00422 if (!Opcode) return false; 00423 00424 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 00425 00426 // Exception: If the base register is in the input reglist, Thumb1 LDM is 00427 // non-writeback. Check for this. 00428 if (Opcode == ARM::tLDMIA && isThumb1) 00429 for (unsigned I = 0; I < NumRegs; ++I) 00430 if (Base == Regs[I].first) { 00431 Writeback = false; 00432 break; 00433 } 00434 00435 // If the merged instruction has writeback and the base register is not killed 00436 // it's not safe to do the merge on Thumb1. This is because resetting the base 00437 // register writeback by inserting a SUBS sets the condition flags. 00438 // FIXME: Try something clever here to see if resetting the base register can 00439 // be avoided, e.g. by updating a later ADD/SUB of the base register with the 00440 // writeback. 00441 if (isThumb1 && Writeback && !BaseKill) return false; 00442 00443 MachineInstrBuilder MIB; 00444 00445 if (Writeback) { 00446 if (Opcode == ARM::tLDMIA) 00447 // Update tLDMIA with writeback if necessary. 00448 Opcode = ARM::tLDMIA_UPD; 00449 00450 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 00451 00452 // Thumb1: we might need to set base writeback when building the MI. 00453 MIB.addReg(Base, getDefRegState(true)) 00454 .addReg(Base, getKillRegState(BaseKill)); 00455 } else { 00456 // No writeback, simply build the MachineInstr. 00457 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 00458 MIB.addReg(Base, getKillRegState(BaseKill)); 00459 } 00460 00461 MIB.addImm(Pred).addReg(PredReg); 00462 00463 for (unsigned i = 0; i != NumRegs; ++i) 00464 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 00465 | getKillRegState(Regs[i].second)); 00466 00467 // Add implicit defs for super-registers. 00468 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i) 00469 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine); 00470 00471 return true; 00472 } 00473 00474 /// \brief Find all instructions using a given imp-def within a range. 00475 /// 00476 /// We are trying to combine a range of instructions, one of which (located at 00477 /// position RangeBegin) implicitly defines a register. The final LDM/STM will 00478 /// be placed at RangeEnd, and so any uses of this definition between RangeStart 00479 /// and RangeEnd must be modified to use an undefined value. 00480 /// 00481 /// The live range continues until we find a second definition or one of the 00482 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so 00483 /// we must consider all uses and decide which are relevant in a second pass. 00484 void ARMLoadStoreOpt::findUsesOfImpDef( 00485 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, 00486 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { 00487 std::map<unsigned, MachineOperand *> Uses; 00488 unsigned LastLivePos = RangeEnd; 00489 00490 // First we find all uses of this register with Position between RangeBegin 00491 // and RangeEnd, any or all of these could be uses of a definition at 00492 // RangeBegin. We also record the latest position a definition at RangeBegin 00493 // would be considered live. 00494 for (unsigned i = 0; i < MemOps.size(); ++i) { 00495 MachineInstr &MI = *MemOps[i].MBBI; 00496 unsigned MIPosition = MemOps[i].Position; 00497 if (MIPosition <= RangeBegin || MIPosition > RangeEnd) 00498 continue; 00499 00500 // If this instruction defines the register, then any later use will be of 00501 // that definition rather than ours. 00502 if (MI.definesRegister(DefReg)) 00503 LastLivePos = std::min(LastLivePos, MIPosition); 00504 00505 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); 00506 if (!UseOp) 00507 continue; 00508 00509 // If this instruction kills the register then (assuming liveness is 00510 // correct when we start) we don't need to think about anything after here. 00511 if (UseOp->isKill()) 00512 LastLivePos = std::min(LastLivePos, MIPosition); 00513 00514 Uses[MIPosition] = UseOp; 00515 } 00516 00517 // Now we traverse the list of all uses, and append the ones that actually use 00518 // our definition to the requested list. 00519 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(), 00520 E = Uses.end(); 00521 I != E; ++I) { 00522 // List is sorted by position so once we've found one out of range there 00523 // will be no more to consider. 00524 if (I->first > LastLivePos) 00525 break; 00526 UsesOfImpDefs.push_back(I->second); 00527 } 00528 } 00529 00530 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 00531 // success. 00532 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 00533 MemOpQueue &memOps, 00534 unsigned memOpsBegin, unsigned memOpsEnd, 00535 unsigned insertAfter, int Offset, 00536 unsigned Base, bool BaseKill, 00537 int Opcode, 00538 ARMCC::CondCodes Pred, unsigned PredReg, 00539 unsigned Scratch, 00540 DebugLoc dl, 00541 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 00542 // First calculate which of the registers should be killed by the merged 00543 // instruction. 00544 const unsigned insertPos = memOps[insertAfter].Position; 00545 SmallSet<unsigned, 4> KilledRegs; 00546 DenseMap<unsigned, unsigned> Killer; 00547 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 00548 if (i == memOpsBegin) { 00549 i = memOpsEnd; 00550 if (i == e) 00551 break; 00552 } 00553 if (memOps[i].Position < insertPos && memOps[i].isKill) { 00554 unsigned Reg = memOps[i].Reg; 00555 KilledRegs.insert(Reg); 00556 Killer[Reg] = i; 00557 } 00558 } 00559 00560 SmallVector<std::pair<unsigned, bool>, 8> Regs; 00561 SmallVector<unsigned, 8> ImpDefs; 00562 SmallVector<MachineOperand *, 8> UsesOfImpDefs; 00563 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 00564 unsigned Reg = memOps[i].Reg; 00565 // If we are inserting the merged operation after an operation that 00566 // uses the same register, make sure to transfer any kill flag. 00567 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 00568 Regs.push_back(std::make_pair(Reg, isKill)); 00569 00570 // Collect any implicit defs of super-registers. They must be preserved. 00571 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { 00572 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) 00573 continue; 00574 unsigned DefReg = MO->getReg(); 00575 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) 00576 ImpDefs.push_back(DefReg); 00577 00578 // There may be other uses of the definition between this instruction and 00579 // the eventual LDM/STM position. These should be marked undef if the 00580 // merge takes place. 00581 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, 00582 insertPos); 00583 } 00584 } 00585 00586 // Try to do the merge. 00587 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 00588 ++Loc; 00589 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 00590 Pred, PredReg, Scratch, dl, Regs, ImpDefs)) 00591 return; 00592 00593 // Merge succeeded, update records. 00594 Merges.push_back(std::prev(Loc)); 00595 00596 // In gathering loads together, we may have moved the imp-def of a register 00597 // past one of its uses. This is OK, since we know better than the rest of 00598 // LLVM what's OK with ARM loads and stores; but we still have to adjust the 00599 // affected uses. 00600 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), 00601 E = UsesOfImpDefs.end(); 00602 I != E; ++I) 00603 (*I)->setIsUndef(); 00604 00605 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 00606 // Remove kill flags from any memops that come before insertPos. 00607 if (Regs[i-memOpsBegin].second) { 00608 unsigned Reg = Regs[i-memOpsBegin].first; 00609 if (KilledRegs.count(Reg)) { 00610 unsigned j = Killer[Reg]; 00611 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 00612 assert(Idx >= 0 && "Cannot find killing operand"); 00613 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 00614 memOps[j].isKill = false; 00615 } 00616 memOps[i].isKill = true; 00617 } 00618 MBB.erase(memOps[i].MBBI); 00619 // Update this memop to refer to the merged instruction. 00620 // We may need to move kill flags again. 00621 memOps[i].Merged = true; 00622 memOps[i].MBBI = Merges.back(); 00623 memOps[i].Position = insertPos; 00624 } 00625 } 00626 00627 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 00628 /// load / store multiple instructions. 00629 void 00630 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 00631 unsigned Base, int Opcode, unsigned Size, 00632 ARMCC::CondCodes Pred, unsigned PredReg, 00633 unsigned Scratch, MemOpQueue &MemOps, 00634 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 00635 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 00636 int Offset = MemOps[SIndex].Offset; 00637 int SOffset = Offset; 00638 unsigned insertAfter = SIndex; 00639 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 00640 DebugLoc dl = Loc->getDebugLoc(); 00641 const MachineOperand &PMO = Loc->getOperand(0); 00642 unsigned PReg = PMO.getReg(); 00643 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 00644 unsigned Count = 1; 00645 unsigned Limit = ~0U; 00646 bool BaseKill = false; 00647 // vldm / vstm limit are 32 for S variants, 16 for D variants. 00648 00649 switch (Opcode) { 00650 default: break; 00651 case ARM::VSTRS: 00652 Limit = 32; 00653 break; 00654 case ARM::VSTRD: 00655 Limit = 16; 00656 break; 00657 case ARM::VLDRD: 00658 Limit = 16; 00659 break; 00660 case ARM::VLDRS: 00661 Limit = 32; 00662 break; 00663 } 00664 00665 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 00666 int NewOffset = MemOps[i].Offset; 00667 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 00668 unsigned Reg = MO.getReg(); 00669 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 00670 // Register numbers must be in ascending order. For VFP / NEON load and 00671 // store multiples, the registers must also be consecutive and within the 00672 // limit on the number of registers per instruction. 00673 if (Reg != ARM::SP && 00674 NewOffset == Offset + (int)Size && 00675 ((isNotVFP && RegNum > PRegNum) || 00676 ((Count < Limit) && RegNum == PRegNum+1)) && 00677 // On Swift we don't want vldm/vstm to start with a odd register num 00678 // because Q register unaligned vldm/vstm need more uops. 00679 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { 00680 Offset += Size; 00681 PRegNum = RegNum; 00682 ++Count; 00683 } else { 00684 // Can't merge this in. Try merge the earlier ones first. 00685 // We need to compute BaseKill here because the MemOps may have been 00686 // reordered. 00687 BaseKill = Loc->killsRegister(Base); 00688 00689 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base, 00690 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 00691 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 00692 MemOps, Merges); 00693 return; 00694 } 00695 00696 if (MemOps[i].Position > MemOps[insertAfter].Position) { 00697 insertAfter = i; 00698 Loc = MemOps[i].MBBI; 00699 } 00700 } 00701 00702 BaseKill = Loc->killsRegister(Base); 00703 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 00704 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 00705 } 00706 00707 static bool definesCPSR(MachineInstr *MI) { 00708 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00709 const MachineOperand &MO = MI->getOperand(i); 00710 if (!MO.isReg()) 00711 continue; 00712 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 00713 // If the instruction has live CPSR def, then it's not safe to fold it 00714 // into load / store. 00715 return true; 00716 } 00717 00718 return false; 00719 } 00720 00721 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 00722 unsigned Bytes, unsigned Limit, 00723 ARMCC::CondCodes Pred, unsigned PredReg) { 00724 unsigned MyPredReg = 0; 00725 if (!MI) 00726 return false; 00727 00728 bool CheckCPSRDef = false; 00729 switch (MI->getOpcode()) { 00730 default: return false; 00731 case ARM::tSUBi8: 00732 case ARM::t2SUBri: 00733 case ARM::SUBri: 00734 CheckCPSRDef = true; 00735 // fallthrough 00736 case ARM::tSUBspi: 00737 break; 00738 } 00739 00740 // Make sure the offset fits in 8 bits. 00741 if (Bytes == 0 || (Limit && Bytes >= Limit)) 00742 return false; 00743 00744 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || 00745 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME 00746 if (!(MI->getOperand(0).getReg() == Base && 00747 MI->getOperand(1).getReg() == Base && 00748 (MI->getOperand(2).getImm() * Scale) == Bytes && 00749 getInstrPredicate(MI, MyPredReg) == Pred && 00750 MyPredReg == PredReg)) 00751 return false; 00752 00753 return CheckCPSRDef ? !definesCPSR(MI) : true; 00754 } 00755 00756 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 00757 unsigned Bytes, unsigned Limit, 00758 ARMCC::CondCodes Pred, unsigned PredReg) { 00759 unsigned MyPredReg = 0; 00760 if (!MI) 00761 return false; 00762 00763 bool CheckCPSRDef = false; 00764 switch (MI->getOpcode()) { 00765 default: return false; 00766 case ARM::tADDi8: 00767 case ARM::t2ADDri: 00768 case ARM::ADDri: 00769 CheckCPSRDef = true; 00770 // fallthrough 00771 case ARM::tADDspi: 00772 break; 00773 } 00774 00775 if (Bytes == 0 || (Limit && Bytes >= Limit)) 00776 // Make sure the offset fits in 8 bits. 00777 return false; 00778 00779 unsigned Scale = (MI->getOpcode() == ARM::tADDspi || 00780 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME 00781 if (!(MI->getOperand(0).getReg() == Base && 00782 MI->getOperand(1).getReg() == Base && 00783 (MI->getOperand(2).getImm() * Scale) == Bytes && 00784 getInstrPredicate(MI, MyPredReg) == Pred && 00785 MyPredReg == PredReg)) 00786 return false; 00787 00788 return CheckCPSRDef ? !definesCPSR(MI) : true; 00789 } 00790 00791 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 00792 switch (MI->getOpcode()) { 00793 default: return 0; 00794 case ARM::LDRi12: 00795 case ARM::STRi12: 00796 case ARM::tLDRi: 00797 case ARM::tSTRi: 00798 case ARM::t2LDRi8: 00799 case ARM::t2LDRi12: 00800 case ARM::t2STRi8: 00801 case ARM::t2STRi12: 00802 case ARM::VLDRS: 00803 case ARM::VSTRS: 00804 return 4; 00805 case ARM::VLDRD: 00806 case ARM::VSTRD: 00807 return 8; 00808 case ARM::LDMIA: 00809 case ARM::LDMDA: 00810 case ARM::LDMDB: 00811 case ARM::LDMIB: 00812 case ARM::STMIA: 00813 case ARM::STMDA: 00814 case ARM::STMDB: 00815 case ARM::STMIB: 00816 case ARM::tLDMIA: 00817 case ARM::tLDMIA_UPD: 00818 case ARM::tSTMIA_UPD: 00819 case ARM::t2LDMIA: 00820 case ARM::t2LDMDB: 00821 case ARM::t2STMIA: 00822 case ARM::t2STMDB: 00823 case ARM::VLDMSIA: 00824 case ARM::VSTMSIA: 00825 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 00826 case ARM::VLDMDIA: 00827 case ARM::VSTMDIA: 00828 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 00829 } 00830 } 00831 00832 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 00833 ARM_AM::AMSubMode Mode) { 00834 switch (Opc) { 00835 default: llvm_unreachable("Unhandled opcode!"); 00836 case ARM::LDMIA: 00837 case ARM::LDMDA: 00838 case ARM::LDMDB: 00839 case ARM::LDMIB: 00840 switch (Mode) { 00841 default: llvm_unreachable("Unhandled submode!"); 00842 case ARM_AM::ia: return ARM::LDMIA_UPD; 00843 case ARM_AM::ib: return ARM::LDMIB_UPD; 00844 case ARM_AM::da: return ARM::LDMDA_UPD; 00845 case ARM_AM::db: return ARM::LDMDB_UPD; 00846 } 00847 case ARM::STMIA: 00848 case ARM::STMDA: 00849 case ARM::STMDB: 00850 case ARM::STMIB: 00851 switch (Mode) { 00852 default: llvm_unreachable("Unhandled submode!"); 00853 case ARM_AM::ia: return ARM::STMIA_UPD; 00854 case ARM_AM::ib: return ARM::STMIB_UPD; 00855 case ARM_AM::da: return ARM::STMDA_UPD; 00856 case ARM_AM::db: return ARM::STMDB_UPD; 00857 } 00858 case ARM::t2LDMIA: 00859 case ARM::t2LDMDB: 00860 switch (Mode) { 00861 default: llvm_unreachable("Unhandled submode!"); 00862 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 00863 case ARM_AM::db: return ARM::t2LDMDB_UPD; 00864 } 00865 case ARM::t2STMIA: 00866 case ARM::t2STMDB: 00867 switch (Mode) { 00868 default: llvm_unreachable("Unhandled submode!"); 00869 case ARM_AM::ia: return ARM::t2STMIA_UPD; 00870 case ARM_AM::db: return ARM::t2STMDB_UPD; 00871 } 00872 case ARM::VLDMSIA: 00873 switch (Mode) { 00874 default: llvm_unreachable("Unhandled submode!"); 00875 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 00876 case ARM_AM::db: return ARM::VLDMSDB_UPD; 00877 } 00878 case ARM::VLDMDIA: 00879 switch (Mode) { 00880 default: llvm_unreachable("Unhandled submode!"); 00881 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 00882 case ARM_AM::db: return ARM::VLDMDDB_UPD; 00883 } 00884 case ARM::VSTMSIA: 00885 switch (Mode) { 00886 default: llvm_unreachable("Unhandled submode!"); 00887 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 00888 case ARM_AM::db: return ARM::VSTMSDB_UPD; 00889 } 00890 case ARM::VSTMDIA: 00891 switch (Mode) { 00892 default: llvm_unreachable("Unhandled submode!"); 00893 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 00894 case ARM_AM::db: return ARM::VSTMDDB_UPD; 00895 } 00896 } 00897 } 00898 00899 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 00900 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 00901 /// 00902 /// stmia rn, <ra, rb, rc> 00903 /// rn := rn + 4 * 3; 00904 /// => 00905 /// stmia rn!, <ra, rb, rc> 00906 /// 00907 /// rn := rn - 4 * 3; 00908 /// ldmia rn, <ra, rb, rc> 00909 /// => 00910 /// ldmdb rn!, <ra, rb, rc> 00911 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 00912 MachineBasicBlock::iterator MBBI, 00913 bool &Advance, 00914 MachineBasicBlock::iterator &I) { 00915 // Thumb1 is already using updating loads/stores. 00916 if (isThumb1) return false; 00917 00918 MachineInstr *MI = MBBI; 00919 unsigned Base = MI->getOperand(0).getReg(); 00920 bool BaseKill = MI->getOperand(0).isKill(); 00921 unsigned Bytes = getLSMultipleTransferSize(MI); 00922 unsigned PredReg = 0; 00923 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 00924 int Opcode = MI->getOpcode(); 00925 DebugLoc dl = MI->getDebugLoc(); 00926 00927 // Can't use an updating ld/st if the base register is also a dest 00928 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 00929 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 00930 if (MI->getOperand(i).getReg() == Base) 00931 return false; 00932 00933 bool DoMerge = false; 00934 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 00935 00936 // Try merging with the previous instruction. 00937 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 00938 if (MBBI != BeginMBBI) { 00939 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 00940 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 00941 --PrevMBBI; 00942 if (Mode == ARM_AM::ia && 00943 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 00944 Mode = ARM_AM::db; 00945 DoMerge = true; 00946 } else if (Mode == ARM_AM::ib && 00947 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 00948 Mode = ARM_AM::da; 00949 DoMerge = true; 00950 } 00951 if (DoMerge) 00952 MBB.erase(PrevMBBI); 00953 } 00954 00955 // Try merging with the next instruction. 00956 MachineBasicBlock::iterator EndMBBI = MBB.end(); 00957 if (!DoMerge && MBBI != EndMBBI) { 00958 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 00959 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 00960 ++NextMBBI; 00961 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 00962 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 00963 DoMerge = true; 00964 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 00965 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 00966 DoMerge = true; 00967 } 00968 if (DoMerge) { 00969 if (NextMBBI == I) { 00970 Advance = true; 00971 ++I; 00972 } 00973 MBB.erase(NextMBBI); 00974 } 00975 } 00976 00977 if (!DoMerge) 00978 return false; 00979 00980 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 00981 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 00982 .addReg(Base, getDefRegState(true)) // WB base register 00983 .addReg(Base, getKillRegState(BaseKill)) 00984 .addImm(Pred).addReg(PredReg); 00985 00986 // Transfer the rest of operands. 00987 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 00988 MIB.addOperand(MI->getOperand(OpNum)); 00989 00990 // Transfer memoperands. 00991 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 00992 00993 MBB.erase(MBBI); 00994 return true; 00995 } 00996 00997 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 00998 ARM_AM::AddrOpc Mode) { 00999 switch (Opc) { 01000 case ARM::LDRi12: 01001 return ARM::LDR_PRE_IMM; 01002 case ARM::STRi12: 01003 return ARM::STR_PRE_IMM; 01004 case ARM::VLDRS: 01005 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 01006 case ARM::VLDRD: 01007 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 01008 case ARM::VSTRS: 01009 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 01010 case ARM::VSTRD: 01011 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 01012 case ARM::t2LDRi8: 01013 case ARM::t2LDRi12: 01014 return ARM::t2LDR_PRE; 01015 case ARM::t2STRi8: 01016 case ARM::t2STRi12: 01017 return ARM::t2STR_PRE; 01018 default: llvm_unreachable("Unhandled opcode!"); 01019 } 01020 } 01021 01022 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 01023 ARM_AM::AddrOpc Mode) { 01024 switch (Opc) { 01025 case ARM::LDRi12: 01026 return ARM::LDR_POST_IMM; 01027 case ARM::STRi12: 01028 return ARM::STR_POST_IMM; 01029 case ARM::VLDRS: 01030 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 01031 case ARM::VLDRD: 01032 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 01033 case ARM::VSTRS: 01034 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 01035 case ARM::VSTRD: 01036 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 01037 case ARM::t2LDRi8: 01038 case ARM::t2LDRi12: 01039 return ARM::t2LDR_POST; 01040 case ARM::t2STRi8: 01041 case ARM::t2STRi12: 01042 return ARM::t2STR_POST; 01043 default: llvm_unreachable("Unhandled opcode!"); 01044 } 01045 } 01046 01047 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 01048 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 01049 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 01050 MachineBasicBlock::iterator MBBI, 01051 const TargetInstrInfo *TII, 01052 bool &Advance, 01053 MachineBasicBlock::iterator &I) { 01054 // Thumb1 doesn't have updating LDR/STR. 01055 // FIXME: Use LDM/STM with single register instead. 01056 if (isThumb1) return false; 01057 01058 MachineInstr *MI = MBBI; 01059 unsigned Base = MI->getOperand(1).getReg(); 01060 bool BaseKill = MI->getOperand(1).isKill(); 01061 unsigned Bytes = getLSMultipleTransferSize(MI); 01062 int Opcode = MI->getOpcode(); 01063 DebugLoc dl = MI->getDebugLoc(); 01064 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 01065 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 01066 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 01067 if (isi32Load(Opcode) || isi32Store(Opcode)) 01068 if (MI->getOperand(2).getImm() != 0) 01069 return false; 01070 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 01071 return false; 01072 01073 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 01074 // Can't do the merge if the destination register is the same as the would-be 01075 // writeback register. 01076 if (MI->getOperand(0).getReg() == Base) 01077 return false; 01078 01079 unsigned PredReg = 0; 01080 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 01081 bool DoMerge = false; 01082 ARM_AM::AddrOpc AddSub = ARM_AM::add; 01083 unsigned NewOpc = 0; 01084 // AM2 - 12 bits, thumb2 - 8 bits. 01085 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 01086 01087 // Try merging with the previous instruction. 01088 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 01089 if (MBBI != BeginMBBI) { 01090 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 01091 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 01092 --PrevMBBI; 01093 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 01094 DoMerge = true; 01095 AddSub = ARM_AM::sub; 01096 } else if (!isAM5 && 01097 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 01098 DoMerge = true; 01099 } 01100 if (DoMerge) { 01101 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 01102 MBB.erase(PrevMBBI); 01103 } 01104 } 01105 01106 // Try merging with the next instruction. 01107 MachineBasicBlock::iterator EndMBBI = MBB.end(); 01108 if (!DoMerge && MBBI != EndMBBI) { 01109 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 01110 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 01111 ++NextMBBI; 01112 if (!isAM5 && 01113 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 01114 DoMerge = true; 01115 AddSub = ARM_AM::sub; 01116 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 01117 DoMerge = true; 01118 } 01119 if (DoMerge) { 01120 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 01121 if (NextMBBI == I) { 01122 Advance = true; 01123 ++I; 01124 } 01125 MBB.erase(NextMBBI); 01126 } 01127 } 01128 01129 if (!DoMerge) 01130 return false; 01131 01132 if (isAM5) { 01133 // VLDM[SD]_UPD, VSTM[SD]_UPD 01134 // (There are no base-updating versions of VLDR/VSTR instructions, but the 01135 // updating load/store-multiple instructions can be used with only one 01136 // register.) 01137 MachineOperand &MO = MI->getOperand(0); 01138 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 01139 .addReg(Base, getDefRegState(true)) // WB base register 01140 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 01141 .addImm(Pred).addReg(PredReg) 01142 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 01143 getKillRegState(MO.isKill()))); 01144 } else if (isLd) { 01145 if (isAM2) { 01146 // LDR_PRE, LDR_POST 01147 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 01148 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 01149 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 01150 .addReg(Base, RegState::Define) 01151 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 01152 } else { 01153 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 01154 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 01155 .addReg(Base, RegState::Define) 01156 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 01157 } 01158 } else { 01159 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 01160 // t2LDR_PRE, t2LDR_POST 01161 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 01162 .addReg(Base, RegState::Define) 01163 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 01164 } 01165 } else { 01166 MachineOperand &MO = MI->getOperand(0); 01167 // FIXME: post-indexed stores use am2offset_imm, which still encodes 01168 // the vestigal zero-reg offset register. When that's fixed, this clause 01169 // can be removed entirely. 01170 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 01171 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 01172 // STR_PRE, STR_POST 01173 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 01174 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 01175 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 01176 } else { 01177 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 01178 // t2STR_PRE, t2STR_POST 01179 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 01180 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 01181 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 01182 } 01183 } 01184 MBB.erase(MBBI); 01185 01186 return true; 01187 } 01188 01189 /// isMemoryOp - Returns true if instruction is a memory operation that this 01190 /// pass is capable of operating on. 01191 static bool isMemoryOp(const MachineInstr *MI) { 01192 // When no memory operands are present, conservatively assume unaligned, 01193 // volatile, unfoldable. 01194 if (!MI->hasOneMemOperand()) 01195 return false; 01196 01197 const MachineMemOperand *MMO = *MI->memoperands_begin(); 01198 01199 // Don't touch volatile memory accesses - we may be changing their order. 01200 if (MMO->isVolatile()) 01201 return false; 01202 01203 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 01204 // not. 01205 if (MMO->getAlignment() < 4) 01206 return false; 01207 01208 // str <undef> could probably be eliminated entirely, but for now we just want 01209 // to avoid making a mess of it. 01210 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 01211 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 01212 MI->getOperand(0).isUndef()) 01213 return false; 01214 01215 // Likewise don't mess with references to undefined addresses. 01216 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 01217 MI->getOperand(1).isUndef()) 01218 return false; 01219 01220 int Opcode = MI->getOpcode(); 01221 switch (Opcode) { 01222 default: break; 01223 case ARM::VLDRS: 01224 case ARM::VSTRS: 01225 return MI->getOperand(1).isReg(); 01226 case ARM::VLDRD: 01227 case ARM::VSTRD: 01228 return MI->getOperand(1).isReg(); 01229 case ARM::LDRi12: 01230 case ARM::STRi12: 01231 case ARM::tLDRi: 01232 case ARM::tSTRi: 01233 case ARM::t2LDRi8: 01234 case ARM::t2LDRi12: 01235 case ARM::t2STRi8: 01236 case ARM::t2STRi12: 01237 return MI->getOperand(1).isReg(); 01238 } 01239 return false; 01240 } 01241 01242 /// AdvanceRS - Advance register scavenger to just before the earliest memory 01243 /// op that is being merged. 01244 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 01245 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 01246 unsigned Position = MemOps[0].Position; 01247 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 01248 if (MemOps[i].Position < Position) { 01249 Position = MemOps[i].Position; 01250 Loc = MemOps[i].MBBI; 01251 } 01252 } 01253 01254 if (Loc != MBB.begin()) 01255 RS->forward(std::prev(Loc)); 01256 } 01257 01258 static int getMemoryOpOffset(const MachineInstr *MI) { 01259 int Opcode = MI->getOpcode(); 01260 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 01261 unsigned NumOperands = MI->getDesc().getNumOperands(); 01262 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 01263 01264 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 01265 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 01266 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 01267 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 01268 return OffField; 01269 01270 // Thumb1 immediate offsets are scaled by 4 01271 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) 01272 return OffField * 4; 01273 01274 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 01275 : ARM_AM::getAM5Offset(OffField) * 4; 01276 if (isAM3) { 01277 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 01278 Offset = -Offset; 01279 } else { 01280 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 01281 Offset = -Offset; 01282 } 01283 return Offset; 01284 } 01285 01286 static void InsertLDR_STR(MachineBasicBlock &MBB, 01287 MachineBasicBlock::iterator &MBBI, 01288 int Offset, bool isDef, 01289 DebugLoc dl, unsigned NewOpc, 01290 unsigned Reg, bool RegDeadKill, bool RegUndef, 01291 unsigned BaseReg, bool BaseKill, bool BaseUndef, 01292 bool OffKill, bool OffUndef, 01293 ARMCC::CondCodes Pred, unsigned PredReg, 01294 const TargetInstrInfo *TII, bool isT2) { 01295 if (isDef) { 01296 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 01297 TII->get(NewOpc)) 01298 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 01299 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 01300 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 01301 } else { 01302 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 01303 TII->get(NewOpc)) 01304 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 01305 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 01306 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 01307 } 01308 } 01309 01310 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 01311 MachineBasicBlock::iterator &MBBI) { 01312 MachineInstr *MI = &*MBBI; 01313 unsigned Opcode = MI->getOpcode(); 01314 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 01315 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 01316 const MachineOperand &BaseOp = MI->getOperand(2); 01317 unsigned BaseReg = BaseOp.getReg(); 01318 unsigned EvenReg = MI->getOperand(0).getReg(); 01319 unsigned OddReg = MI->getOperand(1).getReg(); 01320 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 01321 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 01322 // ARM errata 602117: LDRD with base in list may result in incorrect base 01323 // register when interrupted or faulted. 01324 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 01325 if (!Errata602117 && 01326 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 01327 return false; 01328 01329 MachineBasicBlock::iterator NewBBI = MBBI; 01330 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 01331 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 01332 bool EvenDeadKill = isLd ? 01333 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 01334 bool EvenUndef = MI->getOperand(0).isUndef(); 01335 bool OddDeadKill = isLd ? 01336 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 01337 bool OddUndef = MI->getOperand(1).isUndef(); 01338 bool BaseKill = BaseOp.isKill(); 01339 bool BaseUndef = BaseOp.isUndef(); 01340 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 01341 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 01342 int OffImm = getMemoryOpOffset(MI); 01343 unsigned PredReg = 0; 01344 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 01345 01346 if (OddRegNum > EvenRegNum && OffImm == 0) { 01347 // Ascending register numbers and no offset. It's safe to change it to a 01348 // ldm or stm. 01349 unsigned NewOpc = (isLd) 01350 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 01351 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 01352 if (isLd) { 01353 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 01354 .addReg(BaseReg, getKillRegState(BaseKill)) 01355 .addImm(Pred).addReg(PredReg) 01356 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 01357 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 01358 ++NumLDRD2LDM; 01359 } else { 01360 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 01361 .addReg(BaseReg, getKillRegState(BaseKill)) 01362 .addImm(Pred).addReg(PredReg) 01363 .addReg(EvenReg, 01364 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 01365 .addReg(OddReg, 01366 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 01367 ++NumSTRD2STM; 01368 } 01369 NewBBI = std::prev(MBBI); 01370 } else { 01371 // Split into two instructions. 01372 unsigned NewOpc = (isLd) 01373 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 01374 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 01375 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 01376 // so adjust and use t2LDRi12 here for that. 01377 unsigned NewOpc2 = (isLd) 01378 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 01379 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 01380 DebugLoc dl = MBBI->getDebugLoc(); 01381 // If this is a load and base register is killed, it may have been 01382 // re-defed by the load, make sure the first load does not clobber it. 01383 if (isLd && 01384 (BaseKill || OffKill) && 01385 (TRI->regsOverlap(EvenReg, BaseReg))) { 01386 assert(!TRI->regsOverlap(OddReg, BaseReg)); 01387 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 01388 OddReg, OddDeadKill, false, 01389 BaseReg, false, BaseUndef, false, OffUndef, 01390 Pred, PredReg, TII, isT2); 01391 NewBBI = std::prev(MBBI); 01392 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 01393 EvenReg, EvenDeadKill, false, 01394 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 01395 Pred, PredReg, TII, isT2); 01396 } else { 01397 if (OddReg == EvenReg && EvenDeadKill) { 01398 // If the two source operands are the same, the kill marker is 01399 // probably on the first one. e.g. 01400 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 01401 EvenDeadKill = false; 01402 OddDeadKill = true; 01403 } 01404 // Never kill the base register in the first instruction. 01405 if (EvenReg == BaseReg) 01406 EvenDeadKill = false; 01407 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 01408 EvenReg, EvenDeadKill, EvenUndef, 01409 BaseReg, false, BaseUndef, false, OffUndef, 01410 Pred, PredReg, TII, isT2); 01411 NewBBI = std::prev(MBBI); 01412 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 01413 OddReg, OddDeadKill, OddUndef, 01414 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 01415 Pred, PredReg, TII, isT2); 01416 } 01417 if (isLd) 01418 ++NumLDRD2LDR; 01419 else 01420 ++NumSTRD2STR; 01421 } 01422 01423 MBB.erase(MI); 01424 MBBI = NewBBI; 01425 return true; 01426 } 01427 return false; 01428 } 01429 01430 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 01431 /// ops of the same base and incrementing offset into LDM / STM ops. 01432 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 01433 unsigned NumMerges = 0; 01434 unsigned NumMemOps = 0; 01435 MemOpQueue MemOps; 01436 unsigned CurrBase = 0; 01437 int CurrOpc = -1; 01438 unsigned CurrSize = 0; 01439 ARMCC::CondCodes CurrPred = ARMCC::AL; 01440 unsigned CurrPredReg = 0; 01441 unsigned Position = 0; 01442 SmallVector<MachineBasicBlock::iterator,4> Merges; 01443 01444 RS->enterBasicBlock(&MBB); 01445 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 01446 while (MBBI != E) { 01447 if (FixInvalidRegPairOp(MBB, MBBI)) 01448 continue; 01449 01450 bool Advance = false; 01451 bool TryMerge = false; 01452 bool Clobber = false; 01453 01454 bool isMemOp = isMemoryOp(MBBI); 01455 if (isMemOp) { 01456 int Opcode = MBBI->getOpcode(); 01457 unsigned Size = getLSMultipleTransferSize(MBBI); 01458 const MachineOperand &MO = MBBI->getOperand(0); 01459 unsigned Reg = MO.getReg(); 01460 bool isKill = MO.isDef() ? false : MO.isKill(); 01461 unsigned Base = MBBI->getOperand(1).getReg(); 01462 unsigned PredReg = 0; 01463 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 01464 int Offset = getMemoryOpOffset(MBBI); 01465 // Watch out for: 01466 // r4 := ldr [r5] 01467 // r5 := ldr [r5, #4] 01468 // r6 := ldr [r5, #8] 01469 // 01470 // The second ldr has effectively broken the chain even though it 01471 // looks like the later ldr(s) use the same base register. Try to 01472 // merge the ldr's so far, including this one. But don't try to 01473 // combine the following ldr(s). 01474 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 01475 01476 // Watch out for: 01477 // r4 := ldr [r0, #8] 01478 // r4 := ldr [r0, #4] 01479 // 01480 // The optimization may reorder the second ldr in front of the first 01481 // ldr, which violates write after write(WAW) dependence. The same as 01482 // str. Try to merge inst(s) already in MemOps. 01483 bool Overlap = false; 01484 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { 01485 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { 01486 Overlap = true; 01487 break; 01488 } 01489 } 01490 01491 if (CurrBase == 0 && !Clobber) { 01492 // Start of a new chain. 01493 CurrBase = Base; 01494 CurrOpc = Opcode; 01495 CurrSize = Size; 01496 CurrPred = Pred; 01497 CurrPredReg = PredReg; 01498 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 01499 ++NumMemOps; 01500 Advance = true; 01501 } else if (!Overlap) { 01502 if (Clobber) { 01503 TryMerge = true; 01504 Advance = true; 01505 } 01506 01507 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 01508 // No need to match PredReg. 01509 // Continue adding to the queue. 01510 if (Offset > MemOps.back().Offset) { 01511 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 01512 Position, MBBI)); 01513 ++NumMemOps; 01514 Advance = true; 01515 } else { 01516 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 01517 I != E; ++I) { 01518 if (Offset < I->Offset) { 01519 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 01520 Position, MBBI)); 01521 ++NumMemOps; 01522 Advance = true; 01523 break; 01524 } else if (Offset == I->Offset) { 01525 // Collision! This can't be merged! 01526 break; 01527 } 01528 } 01529 } 01530 } 01531 } 01532 } 01533 01534 if (MBBI->isDebugValue()) { 01535 ++MBBI; 01536 if (MBBI == E) 01537 // Reach the end of the block, try merging the memory instructions. 01538 TryMerge = true; 01539 } else if (Advance) { 01540 ++Position; 01541 ++MBBI; 01542 if (MBBI == E) 01543 // Reach the end of the block, try merging the memory instructions. 01544 TryMerge = true; 01545 } else { 01546 TryMerge = true; 01547 } 01548 01549 if (TryMerge) { 01550 if (NumMemOps > 1) { 01551 // Try to find a free register to use as a new base in case it's needed. 01552 // First advance to the instruction just before the start of the chain. 01553 AdvanceRS(MBB, MemOps); 01554 01555 // Find a scratch register. 01556 unsigned Scratch = 01557 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); 01558 01559 // Process the load / store instructions. 01560 RS->forward(std::prev(MBBI)); 01561 01562 // Merge ops. 01563 Merges.clear(); 01564 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 01565 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 01566 01567 // Try folding preceding/trailing base inc/dec into the generated 01568 // LDM/STM ops. 01569 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 01570 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 01571 ++NumMerges; 01572 NumMerges += Merges.size(); 01573 01574 // Try folding preceding/trailing base inc/dec into those load/store 01575 // that were not merged to form LDM/STM ops. 01576 for (unsigned i = 0; i != NumMemOps; ++i) 01577 if (!MemOps[i].Merged) 01578 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 01579 ++NumMerges; 01580 01581 // RS may be pointing to an instruction that's deleted. 01582 RS->skipTo(std::prev(MBBI)); 01583 } else if (NumMemOps == 1) { 01584 // Try folding preceding/trailing base inc/dec into the single 01585 // load/store. 01586 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 01587 ++NumMerges; 01588 RS->forward(std::prev(MBBI)); 01589 } 01590 } 01591 01592 CurrBase = 0; 01593 CurrOpc = -1; 01594 CurrSize = 0; 01595 CurrPred = ARMCC::AL; 01596 CurrPredReg = 0; 01597 if (NumMemOps) { 01598 MemOps.clear(); 01599 NumMemOps = 0; 01600 } 01601 01602 // If iterator hasn't been advanced and this is not a memory op, skip it. 01603 // It can't start a new chain anyway. 01604 if (!Advance && !isMemOp && MBBI != E) { 01605 ++Position; 01606 ++MBBI; 01607 } 01608 } 01609 } 01610 return NumMerges > 0; 01611 } 01612 01613 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 01614 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 01615 /// directly restore the value of LR into pc. 01616 /// ldmfd sp!, {..., lr} 01617 /// bx lr 01618 /// or 01619 /// ldmfd sp!, {..., lr} 01620 /// mov pc, lr 01621 /// => 01622 /// ldmfd sp!, {..., pc} 01623 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 01624 // Thumb1 LDM doesn't allow high registers. 01625 if (isThumb1) return false; 01626 if (MBB.empty()) return false; 01627 01628 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 01629 if (MBBI != MBB.begin() && 01630 (MBBI->getOpcode() == ARM::BX_RET || 01631 MBBI->getOpcode() == ARM::tBX_RET || 01632 MBBI->getOpcode() == ARM::MOVPCLR)) { 01633 MachineInstr *PrevMI = std::prev(MBBI); 01634 unsigned Opcode = PrevMI->getOpcode(); 01635 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 01636 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 01637 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 01638 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 01639 if (MO.getReg() != ARM::LR) 01640 return false; 01641 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 01642 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 01643 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 01644 PrevMI->setDesc(TII->get(NewOpc)); 01645 MO.setReg(ARM::PC); 01646 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 01647 MBB.erase(MBBI); 01648 return true; 01649 } 01650 } 01651 return false; 01652 } 01653 01654 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 01655 const TargetMachine &TM = Fn.getTarget(); 01656 TL = TM.getSubtargetImpl()->getTargetLowering(); 01657 AFI = Fn.getInfo<ARMFunctionInfo>(); 01658 TII = TM.getSubtargetImpl()->getInstrInfo(); 01659 TRI = TM.getSubtargetImpl()->getRegisterInfo(); 01660 STI = &TM.getSubtarget<ARMSubtarget>(); 01661 RS = new RegScavenger(); 01662 isThumb2 = AFI->isThumb2Function(); 01663 isThumb1 = AFI->isThumbFunction() && !isThumb2; 01664 01665 bool Modified = false; 01666 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 01667 ++MFI) { 01668 MachineBasicBlock &MBB = *MFI; 01669 Modified |= LoadStoreMultipleOpti(MBB); 01670 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 01671 Modified |= MergeReturnIntoLDM(MBB); 01672 } 01673 01674 delete RS; 01675 return Modified; 01676 } 01677 01678 01679 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 01680 /// load / stores from consecutive locations close to make it more 01681 /// likely they will be combined later. 01682 01683 namespace { 01684 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 01685 static char ID; 01686 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 01687 01688 const DataLayout *TD; 01689 const TargetInstrInfo *TII; 01690 const TargetRegisterInfo *TRI; 01691 const ARMSubtarget *STI; 01692 MachineRegisterInfo *MRI; 01693 MachineFunction *MF; 01694 01695 bool runOnMachineFunction(MachineFunction &Fn) override; 01696 01697 const char *getPassName() const override { 01698 return "ARM pre- register allocation load / store optimization pass"; 01699 } 01700 01701 private: 01702 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 01703 unsigned &NewOpc, unsigned &EvenReg, 01704 unsigned &OddReg, unsigned &BaseReg, 01705 int &Offset, 01706 unsigned &PredReg, ARMCC::CondCodes &Pred, 01707 bool &isT2); 01708 bool RescheduleOps(MachineBasicBlock *MBB, 01709 SmallVectorImpl<MachineInstr *> &Ops, 01710 unsigned Base, bool isLd, 01711 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 01712 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 01713 }; 01714 char ARMPreAllocLoadStoreOpt::ID = 0; 01715 } 01716 01717 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 01718 TD = Fn.getSubtarget().getDataLayout(); 01719 TII = Fn.getSubtarget().getInstrInfo(); 01720 TRI = Fn.getSubtarget().getRegisterInfo(); 01721 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 01722 MRI = &Fn.getRegInfo(); 01723 MF = &Fn; 01724 01725 bool Modified = false; 01726 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 01727 ++MFI) 01728 Modified |= RescheduleLoadStoreInstrs(MFI); 01729 01730 return Modified; 01731 } 01732 01733 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 01734 MachineBasicBlock::iterator I, 01735 MachineBasicBlock::iterator E, 01736 SmallPtrSetImpl<MachineInstr*> &MemOps, 01737 SmallSet<unsigned, 4> &MemRegs, 01738 const TargetRegisterInfo *TRI) { 01739 // Are there stores / loads / calls between them? 01740 // FIXME: This is overly conservative. We should make use of alias information 01741 // some day. 01742 SmallSet<unsigned, 4> AddedRegPressure; 01743 while (++I != E) { 01744 if (I->isDebugValue() || MemOps.count(&*I)) 01745 continue; 01746 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 01747 return false; 01748 if (isLd && I->mayStore()) 01749 return false; 01750 if (!isLd) { 01751 if (I->mayLoad()) 01752 return false; 01753 // It's not safe to move the first 'str' down. 01754 // str r1, [r0] 01755 // strh r5, [r0] 01756 // str r4, [r0, #+4] 01757 if (I->mayStore()) 01758 return false; 01759 } 01760 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 01761 MachineOperand &MO = I->getOperand(j); 01762 if (!MO.isReg()) 01763 continue; 01764 unsigned Reg = MO.getReg(); 01765 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 01766 return false; 01767 if (Reg != Base && !MemRegs.count(Reg)) 01768 AddedRegPressure.insert(Reg); 01769 } 01770 } 01771 01772 // Estimate register pressure increase due to the transformation. 01773 if (MemRegs.size() <= 4) 01774 // Ok if we are moving small number of instructions. 01775 return true; 01776 return AddedRegPressure.size() <= MemRegs.size() * 2; 01777 } 01778 01779 01780 /// Copy Op0 and Op1 operands into a new array assigned to MI. 01781 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 01782 MachineInstr *Op1) { 01783 assert(MI->memoperands_empty() && "expected a new machineinstr"); 01784 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 01785 + (Op1->memoperands_end() - Op1->memoperands_begin()); 01786 01787 MachineFunction *MF = MI->getParent()->getParent(); 01788 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 01789 MachineSDNode::mmo_iterator MemEnd = 01790 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 01791 MemEnd = 01792 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 01793 MI->setMemRefs(MemBegin, MemEnd); 01794 } 01795 01796 bool 01797 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 01798 DebugLoc &dl, 01799 unsigned &NewOpc, unsigned &EvenReg, 01800 unsigned &OddReg, unsigned &BaseReg, 01801 int &Offset, unsigned &PredReg, 01802 ARMCC::CondCodes &Pred, 01803 bool &isT2) { 01804 // Make sure we're allowed to generate LDRD/STRD. 01805 if (!STI->hasV5TEOps()) 01806 return false; 01807 01808 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 01809 unsigned Scale = 1; 01810 unsigned Opcode = Op0->getOpcode(); 01811 if (Opcode == ARM::LDRi12) { 01812 NewOpc = ARM::LDRD; 01813 } else if (Opcode == ARM::STRi12) { 01814 NewOpc = ARM::STRD; 01815 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 01816 NewOpc = ARM::t2LDRDi8; 01817 Scale = 4; 01818 isT2 = true; 01819 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 01820 NewOpc = ARM::t2STRDi8; 01821 Scale = 4; 01822 isT2 = true; 01823 } else { 01824 return false; 01825 } 01826 01827 // Make sure the base address satisfies i64 ld / st alignment requirement. 01828 // At the moment, we ignore the memoryoperand's value. 01829 // If we want to use AliasAnalysis, we should check it accordingly. 01830 if (!Op0->hasOneMemOperand() || 01831 (*Op0->memoperands_begin())->isVolatile()) 01832 return false; 01833 01834 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 01835 const Function *Func = MF->getFunction(); 01836 unsigned ReqAlign = STI->hasV6Ops() 01837 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 01838 : 8; // Pre-v6 need 8-byte align 01839 if (Align < ReqAlign) 01840 return false; 01841 01842 // Then make sure the immediate offset fits. 01843 int OffImm = getMemoryOpOffset(Op0); 01844 if (isT2) { 01845 int Limit = (1 << 8) * Scale; 01846 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 01847 return false; 01848 Offset = OffImm; 01849 } else { 01850 ARM_AM::AddrOpc AddSub = ARM_AM::add; 01851 if (OffImm < 0) { 01852 AddSub = ARM_AM::sub; 01853 OffImm = - OffImm; 01854 } 01855 int Limit = (1 << 8) * Scale; 01856 if (OffImm >= Limit || (OffImm & (Scale-1))) 01857 return false; 01858 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 01859 } 01860 EvenReg = Op0->getOperand(0).getReg(); 01861 OddReg = Op1->getOperand(0).getReg(); 01862 if (EvenReg == OddReg) 01863 return false; 01864 BaseReg = Op0->getOperand(1).getReg(); 01865 Pred = getInstrPredicate(Op0, PredReg); 01866 dl = Op0->getDebugLoc(); 01867 return true; 01868 } 01869 01870 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 01871 SmallVectorImpl<MachineInstr *> &Ops, 01872 unsigned Base, bool isLd, 01873 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 01874 bool RetVal = false; 01875 01876 // Sort by offset (in reverse order). 01877 std::sort(Ops.begin(), Ops.end(), 01878 [](const MachineInstr *LHS, const MachineInstr *RHS) { 01879 int LOffset = getMemoryOpOffset(LHS); 01880 int ROffset = getMemoryOpOffset(RHS); 01881 assert(LHS == RHS || LOffset != ROffset); 01882 return LOffset > ROffset; 01883 }); 01884 01885 // The loads / stores of the same base are in order. Scan them from first to 01886 // last and check for the following: 01887 // 1. Any def of base. 01888 // 2. Any gaps. 01889 while (Ops.size() > 1) { 01890 unsigned FirstLoc = ~0U; 01891 unsigned LastLoc = 0; 01892 MachineInstr *FirstOp = nullptr; 01893 MachineInstr *LastOp = nullptr; 01894 int LastOffset = 0; 01895 unsigned LastOpcode = 0; 01896 unsigned LastBytes = 0; 01897 unsigned NumMove = 0; 01898 for (int i = Ops.size() - 1; i >= 0; --i) { 01899 MachineInstr *Op = Ops[i]; 01900 unsigned Loc = MI2LocMap[Op]; 01901 if (Loc <= FirstLoc) { 01902 FirstLoc = Loc; 01903 FirstOp = Op; 01904 } 01905 if (Loc >= LastLoc) { 01906 LastLoc = Loc; 01907 LastOp = Op; 01908 } 01909 01910 unsigned LSMOpcode 01911 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 01912 if (LastOpcode && LSMOpcode != LastOpcode) 01913 break; 01914 01915 int Offset = getMemoryOpOffset(Op); 01916 unsigned Bytes = getLSMultipleTransferSize(Op); 01917 if (LastBytes) { 01918 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 01919 break; 01920 } 01921 LastOffset = Offset; 01922 LastBytes = Bytes; 01923 LastOpcode = LSMOpcode; 01924 if (++NumMove == 8) // FIXME: Tune this limit. 01925 break; 01926 } 01927 01928 if (NumMove <= 1) 01929 Ops.pop_back(); 01930 else { 01931 SmallPtrSet<MachineInstr*, 4> MemOps; 01932 SmallSet<unsigned, 4> MemRegs; 01933 for (int i = NumMove-1; i >= 0; --i) { 01934 MemOps.insert(Ops[i]); 01935 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 01936 } 01937 01938 // Be conservative, if the instructions are too far apart, don't 01939 // move them. We want to limit the increase of register pressure. 01940 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 01941 if (DoMove) 01942 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 01943 MemOps, MemRegs, TRI); 01944 if (!DoMove) { 01945 for (unsigned i = 0; i != NumMove; ++i) 01946 Ops.pop_back(); 01947 } else { 01948 // This is the new location for the loads / stores. 01949 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 01950 while (InsertPos != MBB->end() 01951 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 01952 ++InsertPos; 01953 01954 // If we are moving a pair of loads / stores, see if it makes sense 01955 // to try to allocate a pair of registers that can form register pairs. 01956 MachineInstr *Op0 = Ops.back(); 01957 MachineInstr *Op1 = Ops[Ops.size()-2]; 01958 unsigned EvenReg = 0, OddReg = 0; 01959 unsigned BaseReg = 0, PredReg = 0; 01960 ARMCC::CondCodes Pred = ARMCC::AL; 01961 bool isT2 = false; 01962 unsigned NewOpc = 0; 01963 int Offset = 0; 01964 DebugLoc dl; 01965 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 01966 EvenReg, OddReg, BaseReg, 01967 Offset, PredReg, Pred, isT2)) { 01968 Ops.pop_back(); 01969 Ops.pop_back(); 01970 01971 const MCInstrDesc &MCID = TII->get(NewOpc); 01972 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 01973 MRI->constrainRegClass(EvenReg, TRC); 01974 MRI->constrainRegClass(OddReg, TRC); 01975 01976 // Form the pair instruction. 01977 if (isLd) { 01978 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 01979 .addReg(EvenReg, RegState::Define) 01980 .addReg(OddReg, RegState::Define) 01981 .addReg(BaseReg); 01982 // FIXME: We're converting from LDRi12 to an insn that still 01983 // uses addrmode2, so we need an explicit offset reg. It should 01984 // always by reg0 since we're transforming LDRi12s. 01985 if (!isT2) 01986 MIB.addReg(0); 01987 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 01988 concatenateMemOperands(MIB, Op0, Op1); 01989 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 01990 ++NumLDRDFormed; 01991 } else { 01992 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 01993 .addReg(EvenReg) 01994 .addReg(OddReg) 01995 .addReg(BaseReg); 01996 // FIXME: We're converting from LDRi12 to an insn that still 01997 // uses addrmode2, so we need an explicit offset reg. It should 01998 // always by reg0 since we're transforming STRi12s. 01999 if (!isT2) 02000 MIB.addReg(0); 02001 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 02002 concatenateMemOperands(MIB, Op0, Op1); 02003 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 02004 ++NumSTRDFormed; 02005 } 02006 MBB->erase(Op0); 02007 MBB->erase(Op1); 02008 02009 // Add register allocation hints to form register pairs. 02010 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 02011 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 02012 } else { 02013 for (unsigned i = 0; i != NumMove; ++i) { 02014 MachineInstr *Op = Ops.back(); 02015 Ops.pop_back(); 02016 MBB->splice(InsertPos, MBB, Op); 02017 } 02018 } 02019 02020 NumLdStMoved += NumMove; 02021 RetVal = true; 02022 } 02023 } 02024 } 02025 02026 return RetVal; 02027 } 02028 02029 bool 02030 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 02031 bool RetVal = false; 02032 02033 DenseMap<MachineInstr*, unsigned> MI2LocMap; 02034 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 02035 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 02036 SmallVector<unsigned, 4> LdBases; 02037 SmallVector<unsigned, 4> StBases; 02038 02039 unsigned Loc = 0; 02040 MachineBasicBlock::iterator MBBI = MBB->begin(); 02041 MachineBasicBlock::iterator E = MBB->end(); 02042 while (MBBI != E) { 02043 for (; MBBI != E; ++MBBI) { 02044 MachineInstr *MI = MBBI; 02045 if (MI->isCall() || MI->isTerminator()) { 02046 // Stop at barriers. 02047 ++MBBI; 02048 break; 02049 } 02050 02051 if (!MI->isDebugValue()) 02052 MI2LocMap[MI] = ++Loc; 02053 02054 if (!isMemoryOp(MI)) 02055 continue; 02056 unsigned PredReg = 0; 02057 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 02058 continue; 02059 02060 int Opc = MI->getOpcode(); 02061 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 02062 unsigned Base = MI->getOperand(1).getReg(); 02063 int Offset = getMemoryOpOffset(MI); 02064 02065 bool StopHere = false; 02066 if (isLd) { 02067 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 02068 Base2LdsMap.find(Base); 02069 if (BI != Base2LdsMap.end()) { 02070 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 02071 if (Offset == getMemoryOpOffset(BI->second[i])) { 02072 StopHere = true; 02073 break; 02074 } 02075 } 02076 if (!StopHere) 02077 BI->second.push_back(MI); 02078 } else { 02079 Base2LdsMap[Base].push_back(MI); 02080 LdBases.push_back(Base); 02081 } 02082 } else { 02083 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 02084 Base2StsMap.find(Base); 02085 if (BI != Base2StsMap.end()) { 02086 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 02087 if (Offset == getMemoryOpOffset(BI->second[i])) { 02088 StopHere = true; 02089 break; 02090 } 02091 } 02092 if (!StopHere) 02093 BI->second.push_back(MI); 02094 } else { 02095 Base2StsMap[Base].push_back(MI); 02096 StBases.push_back(Base); 02097 } 02098 } 02099 02100 if (StopHere) { 02101 // Found a duplicate (a base+offset combination that's seen earlier). 02102 // Backtrack. 02103 --Loc; 02104 break; 02105 } 02106 } 02107 02108 // Re-schedule loads. 02109 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 02110 unsigned Base = LdBases[i]; 02111 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 02112 if (Lds.size() > 1) 02113 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 02114 } 02115 02116 // Re-schedule stores. 02117 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 02118 unsigned Base = StBases[i]; 02119 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 02120 if (Sts.size() > 1) 02121 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 02122 } 02123 02124 if (MBBI != E) { 02125 Base2LdsMap.clear(); 02126 Base2StsMap.clear(); 02127 LdBases.clear(); 02128 StBases.clear(); 02129 } 02130 } 02131 02132 return RetVal; 02133 } 02134 02135 02136 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 02137 /// optimization pass. 02138 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 02139 if (PreAlloc) 02140 return new ARMPreAllocLoadStoreOpt(); 02141 return new ARMLoadStoreOpt(); 02142 }