LLVM API Documentation
00001 //=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file contains a pass that performs load / store related peephole 00011 // optimizations. This pass should be run after register allocation. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "AArch64InstrInfo.h" 00016 #include "AArch64Subtarget.h" 00017 #include "MCTargetDesc/AArch64AddressingModes.h" 00018 #include "llvm/ADT/BitVector.h" 00019 #include "llvm/ADT/Statistic.h" 00020 #include "llvm/CodeGen/MachineBasicBlock.h" 00021 #include "llvm/CodeGen/MachineFunctionPass.h" 00022 #include "llvm/CodeGen/MachineInstr.h" 00023 #include "llvm/CodeGen/MachineInstrBuilder.h" 00024 #include "llvm/Support/CommandLine.h" 00025 #include "llvm/Support/Debug.h" 00026 #include "llvm/Support/ErrorHandling.h" 00027 #include "llvm/Support/raw_ostream.h" 00028 #include "llvm/Target/TargetInstrInfo.h" 00029 #include "llvm/Target/TargetMachine.h" 00030 #include "llvm/Target/TargetRegisterInfo.h" 00031 using namespace llvm; 00032 00033 #define DEBUG_TYPE "aarch64-ldst-opt" 00034 00035 /// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine 00036 /// load / store instructions to form ldp / stp instructions. 00037 00038 STATISTIC(NumPairCreated, "Number of load/store pair instructions generated"); 00039 STATISTIC(NumPostFolded, "Number of post-index updates folded"); 00040 STATISTIC(NumPreFolded, "Number of pre-index updates folded"); 00041 STATISTIC(NumUnscaledPairCreated, 00042 "Number of load/store from unscaled generated"); 00043 00044 static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", 00045 cl::init(20), cl::Hidden); 00046 00047 // Place holder while testing unscaled load/store combining 00048 static cl::opt<bool> EnableAArch64UnscaledMemOp( 00049 "aarch64-unscaled-mem-op", cl::Hidden, 00050 cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true)); 00051 00052 namespace { 00053 struct AArch64LoadStoreOpt : public MachineFunctionPass { 00054 static char ID; 00055 AArch64LoadStoreOpt() : MachineFunctionPass(ID) {} 00056 00057 const AArch64InstrInfo *TII; 00058 const TargetRegisterInfo *TRI; 00059 00060 // Scan the instructions looking for a load/store that can be combined 00061 // with the current instruction into a load/store pair. 00062 // Return the matching instruction if one is found, else MBB->end(). 00063 // If a matching instruction is found, MergeForward is set to true if the 00064 // merge is to remove the first instruction and replace the second with 00065 // a pair-wise insn, and false if the reverse is true. 00066 MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, 00067 bool &MergeForward, 00068 unsigned Limit); 00069 // Merge the two instructions indicated into a single pair-wise instruction. 00070 // If MergeForward is true, erase the first instruction and fold its 00071 // operation into the second. If false, the reverse. Return the instruction 00072 // following the first instruction (which may change during processing). 00073 MachineBasicBlock::iterator 00074 mergePairedInsns(MachineBasicBlock::iterator I, 00075 MachineBasicBlock::iterator Paired, bool MergeForward); 00076 00077 // Scan the instruction list to find a base register update that can 00078 // be combined with the current instruction (a load or store) using 00079 // pre or post indexed addressing with writeback. Scan forwards. 00080 MachineBasicBlock::iterator 00081 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit, 00082 int Value); 00083 00084 // Scan the instruction list to find a base register update that can 00085 // be combined with the current instruction (a load or store) using 00086 // pre or post indexed addressing with writeback. Scan backwards. 00087 MachineBasicBlock::iterator 00088 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit); 00089 00090 // Merge a pre-index base register update into a ld/st instruction. 00091 MachineBasicBlock::iterator 00092 mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, 00093 MachineBasicBlock::iterator Update); 00094 00095 // Merge a post-index base register update into a ld/st instruction. 00096 MachineBasicBlock::iterator 00097 mergePostIdxUpdateInsn(MachineBasicBlock::iterator I, 00098 MachineBasicBlock::iterator Update); 00099 00100 bool optimizeBlock(MachineBasicBlock &MBB); 00101 00102 bool runOnMachineFunction(MachineFunction &Fn) override; 00103 00104 const char *getPassName() const override { 00105 return "AArch64 load / store optimization pass"; 00106 } 00107 00108 private: 00109 int getMemSize(MachineInstr *MemMI); 00110 }; 00111 char AArch64LoadStoreOpt::ID = 0; 00112 } // namespace 00113 00114 static bool isUnscaledLdst(unsigned Opc) { 00115 switch (Opc) { 00116 default: 00117 return false; 00118 case AArch64::STURSi: 00119 return true; 00120 case AArch64::STURDi: 00121 return true; 00122 case AArch64::STURQi: 00123 return true; 00124 case AArch64::STURWi: 00125 return true; 00126 case AArch64::STURXi: 00127 return true; 00128 case AArch64::LDURSi: 00129 return true; 00130 case AArch64::LDURDi: 00131 return true; 00132 case AArch64::LDURQi: 00133 return true; 00134 case AArch64::LDURWi: 00135 return true; 00136 case AArch64::LDURXi: 00137 return true; 00138 } 00139 } 00140 00141 // Size in bytes of the data moved by an unscaled load or store 00142 int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) { 00143 switch (MemMI->getOpcode()) { 00144 default: 00145 llvm_unreachable("Opcode has unknown size!"); 00146 case AArch64::STRSui: 00147 case AArch64::STURSi: 00148 return 4; 00149 case AArch64::STRDui: 00150 case AArch64::STURDi: 00151 return 8; 00152 case AArch64::STRQui: 00153 case AArch64::STURQi: 00154 return 16; 00155 case AArch64::STRWui: 00156 case AArch64::STURWi: 00157 return 4; 00158 case AArch64::STRXui: 00159 case AArch64::STURXi: 00160 return 8; 00161 case AArch64::LDRSui: 00162 case AArch64::LDURSi: 00163 return 4; 00164 case AArch64::LDRDui: 00165 case AArch64::LDURDi: 00166 return 8; 00167 case AArch64::LDRQui: 00168 case AArch64::LDURQi: 00169 return 16; 00170 case AArch64::LDRWui: 00171 case AArch64::LDURWi: 00172 return 4; 00173 case AArch64::LDRXui: 00174 case AArch64::LDURXi: 00175 return 8; 00176 } 00177 } 00178 00179 static unsigned getMatchingPairOpcode(unsigned Opc) { 00180 switch (Opc) { 00181 default: 00182 llvm_unreachable("Opcode has no pairwise equivalent!"); 00183 case AArch64::STRSui: 00184 case AArch64::STURSi: 00185 return AArch64::STPSi; 00186 case AArch64::STRDui: 00187 case AArch64::STURDi: 00188 return AArch64::STPDi; 00189 case AArch64::STRQui: 00190 case AArch64::STURQi: 00191 return AArch64::STPQi; 00192 case AArch64::STRWui: 00193 case AArch64::STURWi: 00194 return AArch64::STPWi; 00195 case AArch64::STRXui: 00196 case AArch64::STURXi: 00197 return AArch64::STPXi; 00198 case AArch64::LDRSui: 00199 case AArch64::LDURSi: 00200 return AArch64::LDPSi; 00201 case AArch64::LDRDui: 00202 case AArch64::LDURDi: 00203 return AArch64::LDPDi; 00204 case AArch64::LDRQui: 00205 case AArch64::LDURQi: 00206 return AArch64::LDPQi; 00207 case AArch64::LDRWui: 00208 case AArch64::LDURWi: 00209 return AArch64::LDPWi; 00210 case AArch64::LDRXui: 00211 case AArch64::LDURXi: 00212 return AArch64::LDPXi; 00213 } 00214 } 00215 00216 static unsigned getPreIndexedOpcode(unsigned Opc) { 00217 switch (Opc) { 00218 default: 00219 llvm_unreachable("Opcode has no pre-indexed equivalent!"); 00220 case AArch64::STRSui: 00221 return AArch64::STRSpre; 00222 case AArch64::STRDui: 00223 return AArch64::STRDpre; 00224 case AArch64::STRQui: 00225 return AArch64::STRQpre; 00226 case AArch64::STRWui: 00227 return AArch64::STRWpre; 00228 case AArch64::STRXui: 00229 return AArch64::STRXpre; 00230 case AArch64::LDRSui: 00231 return AArch64::LDRSpre; 00232 case AArch64::LDRDui: 00233 return AArch64::LDRDpre; 00234 case AArch64::LDRQui: 00235 return AArch64::LDRQpre; 00236 case AArch64::LDRWui: 00237 return AArch64::LDRWpre; 00238 case AArch64::LDRXui: 00239 return AArch64::LDRXpre; 00240 } 00241 } 00242 00243 static unsigned getPostIndexedOpcode(unsigned Opc) { 00244 switch (Opc) { 00245 default: 00246 llvm_unreachable("Opcode has no post-indexed wise equivalent!"); 00247 case AArch64::STRSui: 00248 return AArch64::STRSpost; 00249 case AArch64::STRDui: 00250 return AArch64::STRDpost; 00251 case AArch64::STRQui: 00252 return AArch64::STRQpost; 00253 case AArch64::STRWui: 00254 return AArch64::STRWpost; 00255 case AArch64::STRXui: 00256 return AArch64::STRXpost; 00257 case AArch64::LDRSui: 00258 return AArch64::LDRSpost; 00259 case AArch64::LDRDui: 00260 return AArch64::LDRDpost; 00261 case AArch64::LDRQui: 00262 return AArch64::LDRQpost; 00263 case AArch64::LDRWui: 00264 return AArch64::LDRWpost; 00265 case AArch64::LDRXui: 00266 return AArch64::LDRXpost; 00267 } 00268 } 00269 00270 MachineBasicBlock::iterator 00271 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, 00272 MachineBasicBlock::iterator Paired, 00273 bool MergeForward) { 00274 MachineBasicBlock::iterator NextI = I; 00275 ++NextI; 00276 // If NextI is the second of the two instructions to be merged, we need 00277 // to skip one further. Either way we merge will invalidate the iterator, 00278 // and we don't need to scan the new instruction, as it's a pairwise 00279 // instruction, which we're not considering for further action anyway. 00280 if (NextI == Paired) 00281 ++NextI; 00282 00283 bool IsUnscaled = isUnscaledLdst(I->getOpcode()); 00284 int OffsetStride = 00285 IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1; 00286 00287 unsigned NewOpc = getMatchingPairOpcode(I->getOpcode()); 00288 // Insert our new paired instruction after whichever of the paired 00289 // instructions MergeForward indicates. 00290 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; 00291 // Also based on MergeForward is from where we copy the base register operand 00292 // so we get the flags compatible with the input code. 00293 MachineOperand &BaseRegOp = 00294 MergeForward ? Paired->getOperand(1) : I->getOperand(1); 00295 00296 // Which register is Rt and which is Rt2 depends on the offset order. 00297 MachineInstr *RtMI, *Rt2MI; 00298 if (I->getOperand(2).getImm() == 00299 Paired->getOperand(2).getImm() + OffsetStride) { 00300 RtMI = Paired; 00301 Rt2MI = I; 00302 } else { 00303 RtMI = I; 00304 Rt2MI = Paired; 00305 } 00306 // Handle Unscaled 00307 int OffsetImm = RtMI->getOperand(2).getImm(); 00308 if (IsUnscaled && EnableAArch64UnscaledMemOp) 00309 OffsetImm /= OffsetStride; 00310 00311 // Construct the new instruction. 00312 MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint, 00313 I->getDebugLoc(), TII->get(NewOpc)) 00314 .addOperand(RtMI->getOperand(0)) 00315 .addOperand(Rt2MI->getOperand(0)) 00316 .addOperand(BaseRegOp) 00317 .addImm(OffsetImm); 00318 (void)MIB; 00319 00320 // FIXME: Do we need/want to copy the mem operands from the source 00321 // instructions? Probably. What uses them after this? 00322 00323 DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n "); 00324 DEBUG(I->print(dbgs())); 00325 DEBUG(dbgs() << " "); 00326 DEBUG(Paired->print(dbgs())); 00327 DEBUG(dbgs() << " with instruction:\n "); 00328 DEBUG(((MachineInstr *)MIB)->print(dbgs())); 00329 DEBUG(dbgs() << "\n"); 00330 00331 // Erase the old instructions. 00332 I->eraseFromParent(); 00333 Paired->eraseFromParent(); 00334 00335 return NextI; 00336 } 00337 00338 /// trackRegDefsUses - Remember what registers the specified instruction uses 00339 /// and modifies. 00340 static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs, 00341 BitVector &UsedRegs, 00342 const TargetRegisterInfo *TRI) { 00343 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00344 MachineOperand &MO = MI->getOperand(i); 00345 if (MO.isRegMask()) 00346 ModifiedRegs.setBitsNotInMask(MO.getRegMask()); 00347 00348 if (!MO.isReg()) 00349 continue; 00350 unsigned Reg = MO.getReg(); 00351 if (MO.isDef()) { 00352 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 00353 ModifiedRegs.set(*AI); 00354 } else { 00355 assert(MO.isUse() && "Reg operand not a def and not a use?!?"); 00356 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 00357 UsedRegs.set(*AI); 00358 } 00359 } 00360 } 00361 00362 static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { 00363 if (!IsUnscaled && (Offset > 63 || Offset < -64)) 00364 return false; 00365 if (IsUnscaled) { 00366 // Convert the byte-offset used by unscaled into an "element" offset used 00367 // by the scaled pair load/store instructions. 00368 int ElemOffset = Offset / OffsetStride; 00369 if (ElemOffset > 63 || ElemOffset < -64) 00370 return false; 00371 } 00372 return true; 00373 } 00374 00375 // Do alignment, specialized to power of 2 and for signed ints, 00376 // avoiding having to do a C-style cast from uint_64t to int when 00377 // using RoundUpToAlignment from include/llvm/Support/MathExtras.h. 00378 // FIXME: Move this function to include/MathExtras.h? 00379 static int alignTo(int Num, int PowOf2) { 00380 return (Num + PowOf2 - 1) & ~(PowOf2 - 1); 00381 } 00382 00383 /// findMatchingInsn - Scan the instructions looking for a load/store that can 00384 /// be combined with the current instruction into a load/store pair. 00385 MachineBasicBlock::iterator 00386 AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, 00387 bool &MergeForward, unsigned Limit) { 00388 MachineBasicBlock::iterator E = I->getParent()->end(); 00389 MachineBasicBlock::iterator MBBI = I; 00390 MachineInstr *FirstMI = I; 00391 ++MBBI; 00392 00393 int Opc = FirstMI->getOpcode(); 00394 bool MayLoad = FirstMI->mayLoad(); 00395 bool IsUnscaled = isUnscaledLdst(Opc); 00396 unsigned Reg = FirstMI->getOperand(0).getReg(); 00397 unsigned BaseReg = FirstMI->getOperand(1).getReg(); 00398 int Offset = FirstMI->getOperand(2).getImm(); 00399 00400 // Early exit if the first instruction modifies the base register. 00401 // e.g., ldr x0, [x0] 00402 // Early exit if the offset if not possible to match. (6 bits of positive 00403 // range, plus allow an extra one in case we find a later insn that matches 00404 // with Offset-1 00405 if (FirstMI->modifiesRegister(BaseReg, TRI)) 00406 return E; 00407 int OffsetStride = 00408 IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1; 00409 if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) 00410 return E; 00411 00412 // Track which registers have been modified and used between the first insn 00413 // (inclusive) and the second insn. 00414 BitVector ModifiedRegs, UsedRegs; 00415 ModifiedRegs.resize(TRI->getNumRegs()); 00416 UsedRegs.resize(TRI->getNumRegs()); 00417 for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { 00418 MachineInstr *MI = MBBI; 00419 // Skip DBG_VALUE instructions. Otherwise debug info can affect the 00420 // optimization by changing how far we scan. 00421 if (MI->isDebugValue()) 00422 continue; 00423 00424 // Now that we know this is a real instruction, count it. 00425 ++Count; 00426 00427 if (Opc == MI->getOpcode() && MI->getOperand(2).isImm()) { 00428 // If we've found another instruction with the same opcode, check to see 00429 // if the base and offset are compatible with our starting instruction. 00430 // These instructions all have scaled immediate operands, so we just 00431 // check for +1/-1. Make sure to check the new instruction offset is 00432 // actually an immediate and not a symbolic reference destined for 00433 // a relocation. 00434 // 00435 // Pairwise instructions have a 7-bit signed offset field. Single insns 00436 // have a 12-bit unsigned offset field. To be a valid combine, the 00437 // final offset must be in range. 00438 unsigned MIBaseReg = MI->getOperand(1).getReg(); 00439 int MIOffset = MI->getOperand(2).getImm(); 00440 if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || 00441 (Offset + OffsetStride == MIOffset))) { 00442 int MinOffset = Offset < MIOffset ? Offset : MIOffset; 00443 // If this is a volatile load/store that otherwise matched, stop looking 00444 // as something is going on that we don't have enough information to 00445 // safely transform. Similarly, stop if we see a hint to avoid pairs. 00446 if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) 00447 return E; 00448 // If the resultant immediate offset of merging these instructions 00449 // is out of range for a pairwise instruction, bail and keep looking. 00450 bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode()); 00451 if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) { 00452 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 00453 continue; 00454 } 00455 // If the alignment requirements of the paired (scaled) instruction 00456 // can't express the offset of the unscaled input, bail and keep 00457 // looking. 00458 if (IsUnscaled && EnableAArch64UnscaledMemOp && 00459 (alignTo(MinOffset, OffsetStride) != MinOffset)) { 00460 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 00461 continue; 00462 } 00463 // If the destination register of the loads is the same register, bail 00464 // and keep looking. A load-pair instruction with both destination 00465 // registers the same is UNPREDICTABLE and will result in an exception. 00466 if (MayLoad && Reg == MI->getOperand(0).getReg()) { 00467 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 00468 continue; 00469 } 00470 00471 // If the Rt of the second instruction was not modified or used between 00472 // the two instructions, we can combine the second into the first. 00473 if (!ModifiedRegs[MI->getOperand(0).getReg()] && 00474 !UsedRegs[MI->getOperand(0).getReg()]) { 00475 MergeForward = false; 00476 return MBBI; 00477 } 00478 00479 // Likewise, if the Rt of the first instruction is not modified or used 00480 // between the two instructions, we can combine the first into the 00481 // second. 00482 if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] && 00483 !UsedRegs[FirstMI->getOperand(0).getReg()]) { 00484 MergeForward = true; 00485 return MBBI; 00486 } 00487 // Unable to combine these instructions due to interference in between. 00488 // Keep looking. 00489 } 00490 } 00491 00492 // If the instruction wasn't a matching load or store, but does (or can) 00493 // modify memory, stop searching, as we don't have alias analysis or 00494 // anything like that to tell us whether the access is tromping on the 00495 // locations we care about. The big one we want to catch is calls. 00496 // 00497 // FIXME: Theoretically, we can do better than that for SP and FP based 00498 // references since we can effectively know where those are touching. It's 00499 // unclear if it's worth the extra code, though. Most paired instructions 00500 // will be sequential, perhaps with a few intervening non-memory related 00501 // instructions. 00502 if (MI->mayStore() || MI->isCall()) 00503 return E; 00504 // Likewise, if we're matching a store instruction, we don't want to 00505 // move across a load, as it may be reading the same location. 00506 if (FirstMI->mayStore() && MI->mayLoad()) 00507 return E; 00508 00509 // Update modified / uses register lists. 00510 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 00511 00512 // Otherwise, if the base register is modified, we have no match, so 00513 // return early. 00514 if (ModifiedRegs[BaseReg]) 00515 return E; 00516 } 00517 return E; 00518 } 00519 00520 MachineBasicBlock::iterator 00521 AArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, 00522 MachineBasicBlock::iterator Update) { 00523 assert((Update->getOpcode() == AArch64::ADDXri || 00524 Update->getOpcode() == AArch64::SUBXri) && 00525 "Unexpected base register update instruction to merge!"); 00526 MachineBasicBlock::iterator NextI = I; 00527 // Return the instruction following the merged instruction, which is 00528 // the instruction following our unmerged load. Unless that's the add/sub 00529 // instruction we're merging, in which case it's the one after that. 00530 if (++NextI == Update) 00531 ++NextI; 00532 00533 int Value = Update->getOperand(2).getImm(); 00534 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && 00535 "Can't merge 1 << 12 offset into pre-indexed load / store"); 00536 if (Update->getOpcode() == AArch64::SUBXri) 00537 Value = -Value; 00538 00539 unsigned NewOpc = getPreIndexedOpcode(I->getOpcode()); 00540 MachineInstrBuilder MIB = 00541 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) 00542 .addOperand(Update->getOperand(0)) 00543 .addOperand(I->getOperand(0)) 00544 .addOperand(I->getOperand(1)) 00545 .addImm(Value); 00546 (void)MIB; 00547 00548 DEBUG(dbgs() << "Creating pre-indexed load/store."); 00549 DEBUG(dbgs() << " Replacing instructions:\n "); 00550 DEBUG(I->print(dbgs())); 00551 DEBUG(dbgs() << " "); 00552 DEBUG(Update->print(dbgs())); 00553 DEBUG(dbgs() << " with instruction:\n "); 00554 DEBUG(((MachineInstr *)MIB)->print(dbgs())); 00555 DEBUG(dbgs() << "\n"); 00556 00557 // Erase the old instructions for the block. 00558 I->eraseFromParent(); 00559 Update->eraseFromParent(); 00560 00561 return NextI; 00562 } 00563 00564 MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn( 00565 MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update) { 00566 assert((Update->getOpcode() == AArch64::ADDXri || 00567 Update->getOpcode() == AArch64::SUBXri) && 00568 "Unexpected base register update instruction to merge!"); 00569 MachineBasicBlock::iterator NextI = I; 00570 // Return the instruction following the merged instruction, which is 00571 // the instruction following our unmerged load. Unless that's the add/sub 00572 // instruction we're merging, in which case it's the one after that. 00573 if (++NextI == Update) 00574 ++NextI; 00575 00576 int Value = Update->getOperand(2).getImm(); 00577 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && 00578 "Can't merge 1 << 12 offset into post-indexed load / store"); 00579 if (Update->getOpcode() == AArch64::SUBXri) 00580 Value = -Value; 00581 00582 unsigned NewOpc = getPostIndexedOpcode(I->getOpcode()); 00583 MachineInstrBuilder MIB = 00584 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) 00585 .addOperand(Update->getOperand(0)) 00586 .addOperand(I->getOperand(0)) 00587 .addOperand(I->getOperand(1)) 00588 .addImm(Value); 00589 (void)MIB; 00590 00591 DEBUG(dbgs() << "Creating post-indexed load/store."); 00592 DEBUG(dbgs() << " Replacing instructions:\n "); 00593 DEBUG(I->print(dbgs())); 00594 DEBUG(dbgs() << " "); 00595 DEBUG(Update->print(dbgs())); 00596 DEBUG(dbgs() << " with instruction:\n "); 00597 DEBUG(((MachineInstr *)MIB)->print(dbgs())); 00598 DEBUG(dbgs() << "\n"); 00599 00600 // Erase the old instructions for the block. 00601 I->eraseFromParent(); 00602 Update->eraseFromParent(); 00603 00604 return NextI; 00605 } 00606 00607 static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg, 00608 int Offset) { 00609 switch (MI->getOpcode()) { 00610 default: 00611 break; 00612 case AArch64::SUBXri: 00613 // Negate the offset for a SUB instruction. 00614 Offset *= -1; 00615 // FALLTHROUGH 00616 case AArch64::ADDXri: 00617 // Make sure it's a vanilla immediate operand, not a relocation or 00618 // anything else we can't handle. 00619 if (!MI->getOperand(2).isImm()) 00620 break; 00621 // Watch out for 1 << 12 shifted value. 00622 if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm())) 00623 break; 00624 // If the instruction has the base register as source and dest and the 00625 // immediate will fit in a signed 9-bit integer, then we have a match. 00626 if (MI->getOperand(0).getReg() == BaseReg && 00627 MI->getOperand(1).getReg() == BaseReg && 00628 MI->getOperand(2).getImm() <= 255 && 00629 MI->getOperand(2).getImm() >= -256) { 00630 // If we have a non-zero Offset, we check that it matches the amount 00631 // we're adding to the register. 00632 if (!Offset || Offset == MI->getOperand(2).getImm()) 00633 return true; 00634 } 00635 break; 00636 } 00637 return false; 00638 } 00639 00640 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( 00641 MachineBasicBlock::iterator I, unsigned Limit, int Value) { 00642 MachineBasicBlock::iterator E = I->getParent()->end(); 00643 MachineInstr *MemMI = I; 00644 MachineBasicBlock::iterator MBBI = I; 00645 const MachineFunction &MF = *MemMI->getParent()->getParent(); 00646 00647 unsigned DestReg = MemMI->getOperand(0).getReg(); 00648 unsigned BaseReg = MemMI->getOperand(1).getReg(); 00649 int Offset = MemMI->getOperand(2).getImm() * 00650 TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); 00651 00652 // If the base register overlaps the destination register, we can't 00653 // merge the update. 00654 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) 00655 return E; 00656 00657 // Scan forward looking for post-index opportunities. 00658 // Updating instructions can't be formed if the memory insn already 00659 // has an offset other than the value we're looking for. 00660 if (Offset != Value) 00661 return E; 00662 00663 // Track which registers have been modified and used between the first insn 00664 // (inclusive) and the second insn. 00665 BitVector ModifiedRegs, UsedRegs; 00666 ModifiedRegs.resize(TRI->getNumRegs()); 00667 UsedRegs.resize(TRI->getNumRegs()); 00668 ++MBBI; 00669 for (unsigned Count = 0; MBBI != E; ++MBBI) { 00670 MachineInstr *MI = MBBI; 00671 // Skip DBG_VALUE instructions. Otherwise debug info can affect the 00672 // optimization by changing how far we scan. 00673 if (MI->isDebugValue()) 00674 continue; 00675 00676 // Now that we know this is a real instruction, count it. 00677 ++Count; 00678 00679 // If we found a match, return it. 00680 if (isMatchingUpdateInsn(MI, BaseReg, Value)) 00681 return MBBI; 00682 00683 // Update the status of what the instruction clobbered and used. 00684 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 00685 00686 // Otherwise, if the base register is used or modified, we have no match, so 00687 // return early. 00688 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) 00689 return E; 00690 } 00691 return E; 00692 } 00693 00694 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( 00695 MachineBasicBlock::iterator I, unsigned Limit) { 00696 MachineBasicBlock::iterator B = I->getParent()->begin(); 00697 MachineBasicBlock::iterator E = I->getParent()->end(); 00698 MachineInstr *MemMI = I; 00699 MachineBasicBlock::iterator MBBI = I; 00700 const MachineFunction &MF = *MemMI->getParent()->getParent(); 00701 00702 unsigned DestReg = MemMI->getOperand(0).getReg(); 00703 unsigned BaseReg = MemMI->getOperand(1).getReg(); 00704 int Offset = MemMI->getOperand(2).getImm(); 00705 unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); 00706 00707 // If the load/store is the first instruction in the block, there's obviously 00708 // not any matching update. Ditto if the memory offset isn't zero. 00709 if (MBBI == B || Offset != 0) 00710 return E; 00711 // If the base register overlaps the destination register, we can't 00712 // merge the update. 00713 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) 00714 return E; 00715 00716 // Track which registers have been modified and used between the first insn 00717 // (inclusive) and the second insn. 00718 BitVector ModifiedRegs, UsedRegs; 00719 ModifiedRegs.resize(TRI->getNumRegs()); 00720 UsedRegs.resize(TRI->getNumRegs()); 00721 --MBBI; 00722 for (unsigned Count = 0; MBBI != B; --MBBI) { 00723 MachineInstr *MI = MBBI; 00724 // Skip DBG_VALUE instructions. Otherwise debug info can affect the 00725 // optimization by changing how far we scan. 00726 if (MI->isDebugValue()) 00727 continue; 00728 00729 // Now that we know this is a real instruction, count it. 00730 ++Count; 00731 00732 // If we found a match, return it. 00733 if (isMatchingUpdateInsn(MI, BaseReg, RegSize)) 00734 return MBBI; 00735 00736 // Update the status of what the instruction clobbered and used. 00737 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 00738 00739 // Otherwise, if the base register is used or modified, we have no match, so 00740 // return early. 00741 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) 00742 return E; 00743 } 00744 return E; 00745 } 00746 00747 bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { 00748 bool Modified = false; 00749 // Two tranformations to do here: 00750 // 1) Find loads and stores that can be merged into a single load or store 00751 // pair instruction. 00752 // e.g., 00753 // ldr x0, [x2] 00754 // ldr x1, [x2, #8] 00755 // ; becomes 00756 // ldp x0, x1, [x2] 00757 // 2) Find base register updates that can be merged into the load or store 00758 // as a base-reg writeback. 00759 // e.g., 00760 // ldr x0, [x2] 00761 // add x2, x2, #4 00762 // ; becomes 00763 // ldr x0, [x2], #4 00764 00765 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 00766 MBBI != E;) { 00767 MachineInstr *MI = MBBI; 00768 switch (MI->getOpcode()) { 00769 default: 00770 // Just move on to the next instruction. 00771 ++MBBI; 00772 break; 00773 case AArch64::STRSui: 00774 case AArch64::STRDui: 00775 case AArch64::STRQui: 00776 case AArch64::STRXui: 00777 case AArch64::STRWui: 00778 case AArch64::LDRSui: 00779 case AArch64::LDRDui: 00780 case AArch64::LDRQui: 00781 case AArch64::LDRXui: 00782 case AArch64::LDRWui: 00783 // do the unscaled versions as well 00784 case AArch64::STURSi: 00785 case AArch64::STURDi: 00786 case AArch64::STURQi: 00787 case AArch64::STURWi: 00788 case AArch64::STURXi: 00789 case AArch64::LDURSi: 00790 case AArch64::LDURDi: 00791 case AArch64::LDURQi: 00792 case AArch64::LDURWi: 00793 case AArch64::LDURXi: { 00794 // If this is a volatile load/store, don't mess with it. 00795 if (MI->hasOrderedMemoryRef()) { 00796 ++MBBI; 00797 break; 00798 } 00799 // Make sure this is a reg+imm (as opposed to an address reloc). 00800 if (!MI->getOperand(2).isImm()) { 00801 ++MBBI; 00802 break; 00803 } 00804 // Check if this load/store has a hint to avoid pair formation. 00805 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. 00806 if (TII->isLdStPairSuppressed(MI)) { 00807 ++MBBI; 00808 break; 00809 } 00810 // Look ahead up to ScanLimit instructions for a pairable instruction. 00811 bool MergeForward = false; 00812 MachineBasicBlock::iterator Paired = 00813 findMatchingInsn(MBBI, MergeForward, ScanLimit); 00814 if (Paired != E) { 00815 // Merge the loads into a pair. Keeping the iterator straight is a 00816 // pain, so we let the merge routine tell us what the next instruction 00817 // is after it's done mucking about. 00818 MBBI = mergePairedInsns(MBBI, Paired, MergeForward); 00819 00820 Modified = true; 00821 ++NumPairCreated; 00822 if (isUnscaledLdst(MI->getOpcode())) 00823 ++NumUnscaledPairCreated; 00824 break; 00825 } 00826 ++MBBI; 00827 break; 00828 } 00829 // FIXME: Do the other instructions. 00830 } 00831 } 00832 00833 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 00834 MBBI != E;) { 00835 MachineInstr *MI = MBBI; 00836 // Do update merging. It's simpler to keep this separate from the above 00837 // switch, though not strictly necessary. 00838 int Opc = MI->getOpcode(); 00839 switch (Opc) { 00840 default: 00841 // Just move on to the next instruction. 00842 ++MBBI; 00843 break; 00844 case AArch64::STRSui: 00845 case AArch64::STRDui: 00846 case AArch64::STRQui: 00847 case AArch64::STRXui: 00848 case AArch64::STRWui: 00849 case AArch64::LDRSui: 00850 case AArch64::LDRDui: 00851 case AArch64::LDRQui: 00852 case AArch64::LDRXui: 00853 case AArch64::LDRWui: 00854 // do the unscaled versions as well 00855 case AArch64::STURSi: 00856 case AArch64::STURDi: 00857 case AArch64::STURQi: 00858 case AArch64::STURWi: 00859 case AArch64::STURXi: 00860 case AArch64::LDURSi: 00861 case AArch64::LDURDi: 00862 case AArch64::LDURQi: 00863 case AArch64::LDURWi: 00864 case AArch64::LDURXi: { 00865 // Make sure this is a reg+imm (as opposed to an address reloc). 00866 if (!MI->getOperand(2).isImm()) { 00867 ++MBBI; 00868 break; 00869 } 00870 // Look ahead up to ScanLimit instructions for a mergable instruction. 00871 MachineBasicBlock::iterator Update = 00872 findMatchingUpdateInsnForward(MBBI, ScanLimit, 0); 00873 if (Update != E) { 00874 // Merge the update into the ld/st. 00875 MBBI = mergePostIdxUpdateInsn(MBBI, Update); 00876 Modified = true; 00877 ++NumPostFolded; 00878 break; 00879 } 00880 // Don't know how to handle pre/post-index versions, so move to the next 00881 // instruction. 00882 if (isUnscaledLdst(Opc)) { 00883 ++MBBI; 00884 break; 00885 } 00886 00887 // Look back to try to find a pre-index instruction. For example, 00888 // add x0, x0, #8 00889 // ldr x1, [x0] 00890 // merged into: 00891 // ldr x1, [x0, #8]! 00892 Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit); 00893 if (Update != E) { 00894 // Merge the update into the ld/st. 00895 MBBI = mergePreIdxUpdateInsn(MBBI, Update); 00896 Modified = true; 00897 ++NumPreFolded; 00898 break; 00899 } 00900 00901 // Look forward to try to find a post-index instruction. For example, 00902 // ldr x1, [x0, #64] 00903 // add x0, x0, #64 00904 // merged into: 00905 // ldr x1, [x0, #64]! 00906 00907 // The immediate in the load/store is scaled by the size of the register 00908 // being loaded. The immediate in the add we're looking for, 00909 // however, is not, so adjust here. 00910 int Value = MI->getOperand(2).getImm() * 00911 TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent())) 00912 ->getSize(); 00913 Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value); 00914 if (Update != E) { 00915 // Merge the update into the ld/st. 00916 MBBI = mergePreIdxUpdateInsn(MBBI, Update); 00917 Modified = true; 00918 ++NumPreFolded; 00919 break; 00920 } 00921 00922 // Nothing found. Just move to the next instruction. 00923 ++MBBI; 00924 break; 00925 } 00926 // FIXME: Do the other instructions. 00927 } 00928 } 00929 00930 return Modified; 00931 } 00932 00933 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 00934 const TargetMachine &TM = Fn.getTarget(); 00935 TII = static_cast<const AArch64InstrInfo *>( 00936 TM.getSubtargetImpl()->getInstrInfo()); 00937 TRI = TM.getSubtargetImpl()->getRegisterInfo(); 00938 00939 bool Modified = false; 00940 for (auto &MBB : Fn) 00941 Modified |= optimizeBlock(MBB); 00942 00943 return Modified; 00944 } 00945 00946 // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep 00947 // loads and stores near one another? 00948 00949 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 00950 /// optimization pass. 00951 FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() { 00952 return new AArch64LoadStoreOpt(); 00953 }