LLVM API Documentation
00001 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file contains the Base ARM implementation of the TargetInstrInfo class. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "ARM.h" 00015 #include "ARMBaseInstrInfo.h" 00016 #include "ARMBaseRegisterInfo.h" 00017 #include "ARMConstantPoolValue.h" 00018 #include "ARMFeatures.h" 00019 #include "ARMHazardRecognizer.h" 00020 #include "ARMMachineFunctionInfo.h" 00021 #include "MCTargetDesc/ARMAddressingModes.h" 00022 #include "llvm/ADT/STLExtras.h" 00023 #include "llvm/CodeGen/LiveVariables.h" 00024 #include "llvm/CodeGen/MachineConstantPool.h" 00025 #include "llvm/CodeGen/MachineFrameInfo.h" 00026 #include "llvm/CodeGen/MachineInstrBuilder.h" 00027 #include "llvm/CodeGen/MachineJumpTableInfo.h" 00028 #include "llvm/CodeGen/MachineMemOperand.h" 00029 #include "llvm/CodeGen/MachineRegisterInfo.h" 00030 #include "llvm/CodeGen/SelectionDAGNodes.h" 00031 #include "llvm/IR/Constants.h" 00032 #include "llvm/IR/Function.h" 00033 #include "llvm/IR/GlobalValue.h" 00034 #include "llvm/MC/MCAsmInfo.h" 00035 #include "llvm/MC/MCExpr.h" 00036 #include "llvm/Support/BranchProbability.h" 00037 #include "llvm/Support/CommandLine.h" 00038 #include "llvm/Support/Debug.h" 00039 #include "llvm/Support/ErrorHandling.h" 00040 00041 using namespace llvm; 00042 00043 #define DEBUG_TYPE "arm-instrinfo" 00044 00045 #define GET_INSTRINFO_CTOR_DTOR 00046 #include "ARMGenInstrInfo.inc" 00047 00048 static cl::opt<bool> 00049 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, 00050 cl::desc("Enable ARM 2-addr to 3-addr conv")); 00051 00052 static cl::opt<bool> 00053 WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), 00054 cl::desc("Widen ARM vmovs to vmovd when possible")); 00055 00056 static cl::opt<unsigned> 00057 SwiftPartialUpdateClearance("swift-partial-update-clearance", 00058 cl::Hidden, cl::init(12), 00059 cl::desc("Clearance before partial register updates")); 00060 00061 /// ARM_MLxEntry - Record information about MLA / MLS instructions. 00062 struct ARM_MLxEntry { 00063 uint16_t MLxOpc; // MLA / MLS opcode 00064 uint16_t MulOpc; // Expanded multiplication opcode 00065 uint16_t AddSubOpc; // Expanded add / sub opcode 00066 bool NegAcc; // True if the acc is negated before the add / sub. 00067 bool HasLane; // True if instruction has an extra "lane" operand. 00068 }; 00069 00070 static const ARM_MLxEntry ARM_MLxTable[] = { 00071 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane 00072 // fp scalar ops 00073 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false }, 00074 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false }, 00075 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false }, 00076 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false }, 00077 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false }, 00078 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false }, 00079 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false }, 00080 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false }, 00081 00082 // fp SIMD ops 00083 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false }, 00084 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false }, 00085 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false }, 00086 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false }, 00087 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true }, 00088 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true }, 00089 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true }, 00090 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, 00091 }; 00092 00093 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) 00094 : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), 00095 Subtarget(STI) { 00096 for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) { 00097 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) 00098 assert(false && "Duplicated entries?"); 00099 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc); 00100 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc); 00101 } 00102 } 00103 00104 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl 00105 // currently defaults to no prepass hazard recognizer. 00106 ScheduleHazardRecognizer * 00107 ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, 00108 const ScheduleDAG *DAG) const { 00109 if (usePreRAHazardRecognizer()) { 00110 const InstrItineraryData *II = 00111 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData(); 00112 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); 00113 } 00114 return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); 00115 } 00116 00117 ScheduleHazardRecognizer *ARMBaseInstrInfo:: 00118 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 00119 const ScheduleDAG *DAG) const { 00120 if (Subtarget.isThumb2() || Subtarget.hasVFP2()) 00121 return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG); 00122 return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); 00123 } 00124 00125 MachineInstr * 00126 ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 00127 MachineBasicBlock::iterator &MBBI, 00128 LiveVariables *LV) const { 00129 // FIXME: Thumb2 support. 00130 00131 if (!EnableARM3Addr) 00132 return nullptr; 00133 00134 MachineInstr *MI = MBBI; 00135 MachineFunction &MF = *MI->getParent()->getParent(); 00136 uint64_t TSFlags = MI->getDesc().TSFlags; 00137 bool isPre = false; 00138 switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { 00139 default: return nullptr; 00140 case ARMII::IndexModePre: 00141 isPre = true; 00142 break; 00143 case ARMII::IndexModePost: 00144 break; 00145 } 00146 00147 // Try splitting an indexed load/store to an un-indexed one plus an add/sub 00148 // operation. 00149 unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); 00150 if (MemOpc == 0) 00151 return nullptr; 00152 00153 MachineInstr *UpdateMI = nullptr; 00154 MachineInstr *MemMI = nullptr; 00155 unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); 00156 const MCInstrDesc &MCID = MI->getDesc(); 00157 unsigned NumOps = MCID.getNumOperands(); 00158 bool isLoad = !MI->mayStore(); 00159 const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); 00160 const MachineOperand &Base = MI->getOperand(2); 00161 const MachineOperand &Offset = MI->getOperand(NumOps-3); 00162 unsigned WBReg = WB.getReg(); 00163 unsigned BaseReg = Base.getReg(); 00164 unsigned OffReg = Offset.getReg(); 00165 unsigned OffImm = MI->getOperand(NumOps-2).getImm(); 00166 ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); 00167 switch (AddrMode) { 00168 default: llvm_unreachable("Unknown indexed op!"); 00169 case ARMII::AddrMode2: { 00170 bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; 00171 unsigned Amt = ARM_AM::getAM2Offset(OffImm); 00172 if (OffReg == 0) { 00173 if (ARM_AM::getSOImmVal(Amt) == -1) 00174 // Can't encode it in a so_imm operand. This transformation will 00175 // add more than 1 instruction. Abandon! 00176 return nullptr; 00177 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 00178 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 00179 .addReg(BaseReg).addImm(Amt) 00180 .addImm(Pred).addReg(0).addReg(0); 00181 } else if (Amt != 0) { 00182 ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); 00183 unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); 00184 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 00185 get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) 00186 .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) 00187 .addImm(Pred).addReg(0).addReg(0); 00188 } else 00189 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 00190 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 00191 .addReg(BaseReg).addReg(OffReg) 00192 .addImm(Pred).addReg(0).addReg(0); 00193 break; 00194 } 00195 case ARMII::AddrMode3 : { 00196 bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; 00197 unsigned Amt = ARM_AM::getAM3Offset(OffImm); 00198 if (OffReg == 0) 00199 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. 00200 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 00201 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 00202 .addReg(BaseReg).addImm(Amt) 00203 .addImm(Pred).addReg(0).addReg(0); 00204 else 00205 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 00206 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 00207 .addReg(BaseReg).addReg(OffReg) 00208 .addImm(Pred).addReg(0).addReg(0); 00209 break; 00210 } 00211 } 00212 00213 std::vector<MachineInstr*> NewMIs; 00214 if (isPre) { 00215 if (isLoad) 00216 MemMI = BuildMI(MF, MI->getDebugLoc(), 00217 get(MemOpc), MI->getOperand(0).getReg()) 00218 .addReg(WBReg).addImm(0).addImm(Pred); 00219 else 00220 MemMI = BuildMI(MF, MI->getDebugLoc(), 00221 get(MemOpc)).addReg(MI->getOperand(1).getReg()) 00222 .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); 00223 NewMIs.push_back(MemMI); 00224 NewMIs.push_back(UpdateMI); 00225 } else { 00226 if (isLoad) 00227 MemMI = BuildMI(MF, MI->getDebugLoc(), 00228 get(MemOpc), MI->getOperand(0).getReg()) 00229 .addReg(BaseReg).addImm(0).addImm(Pred); 00230 else 00231 MemMI = BuildMI(MF, MI->getDebugLoc(), 00232 get(MemOpc)).addReg(MI->getOperand(1).getReg()) 00233 .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); 00234 if (WB.isDead()) 00235 UpdateMI->getOperand(0).setIsDead(); 00236 NewMIs.push_back(UpdateMI); 00237 NewMIs.push_back(MemMI); 00238 } 00239 00240 // Transfer LiveVariables states, kill / dead info. 00241 if (LV) { 00242 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00243 MachineOperand &MO = MI->getOperand(i); 00244 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 00245 unsigned Reg = MO.getReg(); 00246 00247 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); 00248 if (MO.isDef()) { 00249 MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; 00250 if (MO.isDead()) 00251 LV->addVirtualRegisterDead(Reg, NewMI); 00252 } 00253 if (MO.isUse() && MO.isKill()) { 00254 for (unsigned j = 0; j < 2; ++j) { 00255 // Look at the two new MI's in reverse order. 00256 MachineInstr *NewMI = NewMIs[j]; 00257 if (!NewMI->readsRegister(Reg)) 00258 continue; 00259 LV->addVirtualRegisterKilled(Reg, NewMI); 00260 if (VI.removeKill(MI)) 00261 VI.Kills.push_back(NewMI); 00262 break; 00263 } 00264 } 00265 } 00266 } 00267 } 00268 00269 MFI->insert(MBBI, NewMIs[1]); 00270 MFI->insert(MBBI, NewMIs[0]); 00271 return NewMIs[0]; 00272 } 00273 00274 // Branch analysis. 00275 bool 00276 ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, 00277 MachineBasicBlock *&FBB, 00278 SmallVectorImpl<MachineOperand> &Cond, 00279 bool AllowModify) const { 00280 TBB = nullptr; 00281 FBB = nullptr; 00282 00283 MachineBasicBlock::iterator I = MBB.end(); 00284 if (I == MBB.begin()) 00285 return false; // Empty blocks are easy. 00286 --I; 00287 00288 // Walk backwards from the end of the basic block until the branch is 00289 // analyzed or we give up. 00290 while (isPredicated(I) || I->isTerminator() || I->isDebugValue()) { 00291 00292 // Flag to be raised on unanalyzeable instructions. This is useful in cases 00293 // where we want to clean up on the end of the basic block before we bail 00294 // out. 00295 bool CantAnalyze = false; 00296 00297 // Skip over DEBUG values and predicated nonterminators. 00298 while (I->isDebugValue() || !I->isTerminator()) { 00299 if (I == MBB.begin()) 00300 return false; 00301 --I; 00302 } 00303 00304 if (isIndirectBranchOpcode(I->getOpcode()) || 00305 isJumpTableBranchOpcode(I->getOpcode())) { 00306 // Indirect branches and jump tables can't be analyzed, but we still want 00307 // to clean up any instructions at the tail of the basic block. 00308 CantAnalyze = true; 00309 } else if (isUncondBranchOpcode(I->getOpcode())) { 00310 TBB = I->getOperand(0).getMBB(); 00311 } else if (isCondBranchOpcode(I->getOpcode())) { 00312 // Bail out if we encounter multiple conditional branches. 00313 if (!Cond.empty()) 00314 return true; 00315 00316 assert(!FBB && "FBB should have been null."); 00317 FBB = TBB; 00318 TBB = I->getOperand(0).getMBB(); 00319 Cond.push_back(I->getOperand(1)); 00320 Cond.push_back(I->getOperand(2)); 00321 } else if (I->isReturn()) { 00322 // Returns can't be analyzed, but we should run cleanup. 00323 CantAnalyze = !isPredicated(I); 00324 } else { 00325 // We encountered other unrecognized terminator. Bail out immediately. 00326 return true; 00327 } 00328 00329 // Cleanup code - to be run for unpredicated unconditional branches and 00330 // returns. 00331 if (!isPredicated(I) && 00332 (isUncondBranchOpcode(I->getOpcode()) || 00333 isIndirectBranchOpcode(I->getOpcode()) || 00334 isJumpTableBranchOpcode(I->getOpcode()) || 00335 I->isReturn())) { 00336 // Forget any previous condition branch information - it no longer applies. 00337 Cond.clear(); 00338 FBB = nullptr; 00339 00340 // If we can modify the function, delete everything below this 00341 // unconditional branch. 00342 if (AllowModify) { 00343 MachineBasicBlock::iterator DI = std::next(I); 00344 while (DI != MBB.end()) { 00345 MachineInstr *InstToDelete = DI; 00346 ++DI; 00347 InstToDelete->eraseFromParent(); 00348 } 00349 } 00350 } 00351 00352 if (CantAnalyze) 00353 return true; 00354 00355 if (I == MBB.begin()) 00356 return false; 00357 00358 --I; 00359 } 00360 00361 // We made it past the terminators without bailing out - we must have 00362 // analyzed this branch successfully. 00363 return false; 00364 } 00365 00366 00367 unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 00368 MachineBasicBlock::iterator I = MBB.end(); 00369 if (I == MBB.begin()) return 0; 00370 --I; 00371 while (I->isDebugValue()) { 00372 if (I == MBB.begin()) 00373 return 0; 00374 --I; 00375 } 00376 if (!isUncondBranchOpcode(I->getOpcode()) && 00377 !isCondBranchOpcode(I->getOpcode())) 00378 return 0; 00379 00380 // Remove the branch. 00381 I->eraseFromParent(); 00382 00383 I = MBB.end(); 00384 00385 if (I == MBB.begin()) return 1; 00386 --I; 00387 if (!isCondBranchOpcode(I->getOpcode())) 00388 return 1; 00389 00390 // Remove the branch. 00391 I->eraseFromParent(); 00392 return 2; 00393 } 00394 00395 unsigned 00396 ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 00397 MachineBasicBlock *FBB, 00398 const SmallVectorImpl<MachineOperand> &Cond, 00399 DebugLoc DL) const { 00400 ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); 00401 int BOpc = !AFI->isThumbFunction() 00402 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); 00403 int BccOpc = !AFI->isThumbFunction() 00404 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); 00405 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); 00406 00407 // Shouldn't be a fall through. 00408 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 00409 assert((Cond.size() == 2 || Cond.size() == 0) && 00410 "ARM branch conditions have two components!"); 00411 00412 if (!FBB) { 00413 if (Cond.empty()) { // Unconditional branch? 00414 if (isThumb) 00415 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0); 00416 else 00417 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); 00418 } else 00419 BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) 00420 .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); 00421 return 1; 00422 } 00423 00424 // Two-way conditional branch. 00425 BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) 00426 .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); 00427 if (isThumb) 00428 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0); 00429 else 00430 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); 00431 return 2; 00432 } 00433 00434 bool ARMBaseInstrInfo:: 00435 ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 00436 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); 00437 Cond[0].setImm(ARMCC::getOppositeCondition(CC)); 00438 return false; 00439 } 00440 00441 bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { 00442 if (MI->isBundle()) { 00443 MachineBasicBlock::const_instr_iterator I = MI; 00444 MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); 00445 while (++I != E && I->isInsideBundle()) { 00446 int PIdx = I->findFirstPredOperandIdx(); 00447 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL) 00448 return true; 00449 } 00450 return false; 00451 } 00452 00453 int PIdx = MI->findFirstPredOperandIdx(); 00454 return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; 00455 } 00456 00457 bool ARMBaseInstrInfo:: 00458 PredicateInstruction(MachineInstr *MI, 00459 const SmallVectorImpl<MachineOperand> &Pred) const { 00460 unsigned Opc = MI->getOpcode(); 00461 if (isUncondBranchOpcode(Opc)) { 00462 MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); 00463 MachineInstrBuilder(*MI->getParent()->getParent(), MI) 00464 .addImm(Pred[0].getImm()) 00465 .addReg(Pred[1].getReg()); 00466 return true; 00467 } 00468 00469 int PIdx = MI->findFirstPredOperandIdx(); 00470 if (PIdx != -1) { 00471 MachineOperand &PMO = MI->getOperand(PIdx); 00472 PMO.setImm(Pred[0].getImm()); 00473 MI->getOperand(PIdx+1).setReg(Pred[1].getReg()); 00474 return true; 00475 } 00476 return false; 00477 } 00478 00479 bool ARMBaseInstrInfo:: 00480 SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 00481 const SmallVectorImpl<MachineOperand> &Pred2) const { 00482 if (Pred1.size() > 2 || Pred2.size() > 2) 00483 return false; 00484 00485 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); 00486 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); 00487 if (CC1 == CC2) 00488 return true; 00489 00490 switch (CC1) { 00491 default: 00492 return false; 00493 case ARMCC::AL: 00494 return true; 00495 case ARMCC::HS: 00496 return CC2 == ARMCC::HI; 00497 case ARMCC::LS: 00498 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; 00499 case ARMCC::GE: 00500 return CC2 == ARMCC::GT; 00501 case ARMCC::LE: 00502 return CC2 == ARMCC::LT; 00503 } 00504 } 00505 00506 bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, 00507 std::vector<MachineOperand> &Pred) const { 00508 bool Found = false; 00509 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00510 const MachineOperand &MO = MI->getOperand(i); 00511 if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) || 00512 (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) { 00513 Pred.push_back(MO); 00514 Found = true; 00515 } 00516 } 00517 00518 return Found; 00519 } 00520 00521 static bool isCPSRDefined(const MachineInstr *MI) { 00522 for (const auto &MO : MI->operands()) 00523 if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef()) 00524 return true; 00525 return false; 00526 } 00527 00528 static bool isEligibleForITBlock(const MachineInstr *MI) { 00529 switch (MI->getOpcode()) { 00530 default: return true; 00531 case ARM::tADC: // ADC (register) T1 00532 case ARM::tADDi3: // ADD (immediate) T1 00533 case ARM::tADDi8: // ADD (immediate) T2 00534 case ARM::tADDrr: // ADD (register) T1 00535 case ARM::tAND: // AND (register) T1 00536 case ARM::tASRri: // ASR (immediate) T1 00537 case ARM::tASRrr: // ASR (register) T1 00538 case ARM::tBIC: // BIC (register) T1 00539 case ARM::tEOR: // EOR (register) T1 00540 case ARM::tLSLri: // LSL (immediate) T1 00541 case ARM::tLSLrr: // LSL (register) T1 00542 case ARM::tLSRri: // LSR (immediate) T1 00543 case ARM::tLSRrr: // LSR (register) T1 00544 case ARM::tMUL: // MUL T1 00545 case ARM::tMVN: // MVN (register) T1 00546 case ARM::tORR: // ORR (register) T1 00547 case ARM::tROR: // ROR (register) T1 00548 case ARM::tRSB: // RSB (immediate) T1 00549 case ARM::tSBC: // SBC (register) T1 00550 case ARM::tSUBi3: // SUB (immediate) T1 00551 case ARM::tSUBi8: // SUB (immediate) T2 00552 case ARM::tSUBrr: // SUB (register) T1 00553 return !isCPSRDefined(MI); 00554 } 00555 } 00556 00557 /// isPredicable - Return true if the specified instruction can be predicated. 00558 /// By default, this returns true for every instruction with a 00559 /// PredicateOperand. 00560 bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { 00561 if (!MI->isPredicable()) 00562 return false; 00563 00564 if (!isEligibleForITBlock(MI)) 00565 return false; 00566 00567 ARMFunctionInfo *AFI = 00568 MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); 00569 00570 if (AFI->isThumb2Function()) { 00571 if (getSubtarget().restrictIT()) 00572 return isV8EligibleForIT(MI); 00573 } else { // non-Thumb 00574 if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) 00575 return false; 00576 } 00577 00578 return true; 00579 } 00580 00581 namespace llvm { 00582 template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) { 00583 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00584 const MachineOperand &MO = MI->getOperand(i); 00585 if (!MO.isReg() || MO.isUndef() || MO.isUse()) 00586 continue; 00587 if (MO.getReg() != ARM::CPSR) 00588 continue; 00589 if (!MO.isDead()) 00590 return false; 00591 } 00592 // all definitions of CPSR are dead 00593 return true; 00594 } 00595 } 00596 00597 /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. 00598 LLVM_ATTRIBUTE_NOINLINE 00599 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, 00600 unsigned JTI); 00601 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, 00602 unsigned JTI) { 00603 assert(JTI < JT.size()); 00604 return JT[JTI].MBBs.size(); 00605 } 00606 00607 /// GetInstSize - Return the size of the specified MachineInstr. 00608 /// 00609 unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 00610 const MachineBasicBlock &MBB = *MI->getParent(); 00611 const MachineFunction *MF = MBB.getParent(); 00612 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 00613 00614 const MCInstrDesc &MCID = MI->getDesc(); 00615 if (MCID.getSize()) 00616 return MCID.getSize(); 00617 00618 // If this machine instr is an inline asm, measure it. 00619 if (MI->getOpcode() == ARM::INLINEASM) 00620 return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); 00621 unsigned Opc = MI->getOpcode(); 00622 switch (Opc) { 00623 default: 00624 // pseudo-instruction sizes are zero. 00625 return 0; 00626 case TargetOpcode::BUNDLE: 00627 return getInstBundleLength(MI); 00628 case ARM::MOVi16_ga_pcrel: 00629 case ARM::MOVTi16_ga_pcrel: 00630 case ARM::t2MOVi16_ga_pcrel: 00631 case ARM::t2MOVTi16_ga_pcrel: 00632 return 4; 00633 case ARM::MOVi32imm: 00634 case ARM::t2MOVi32imm: 00635 return 8; 00636 case ARM::CONSTPOOL_ENTRY: 00637 // If this machine instr is a constant pool entry, its size is recorded as 00638 // operand #2. 00639 return MI->getOperand(2).getImm(); 00640 case ARM::Int_eh_sjlj_longjmp: 00641 return 16; 00642 case ARM::tInt_eh_sjlj_longjmp: 00643 return 10; 00644 case ARM::Int_eh_sjlj_setjmp: 00645 case ARM::Int_eh_sjlj_setjmp_nofp: 00646 return 20; 00647 case ARM::tInt_eh_sjlj_setjmp: 00648 case ARM::t2Int_eh_sjlj_setjmp: 00649 case ARM::t2Int_eh_sjlj_setjmp_nofp: 00650 return 12; 00651 case ARM::BR_JTr: 00652 case ARM::BR_JTm: 00653 case ARM::BR_JTadd: 00654 case ARM::tBR_JTr: 00655 case ARM::t2BR_JT: 00656 case ARM::t2TBB_JT: 00657 case ARM::t2TBH_JT: { 00658 // These are jumptable branches, i.e. a branch followed by an inlined 00659 // jumptable. The size is 4 + 4 * number of entries. For TBB, each 00660 // entry is one byte; TBH two byte each. 00661 unsigned EntrySize = (Opc == ARM::t2TBB_JT) 00662 ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); 00663 unsigned NumOps = MCID.getNumOperands(); 00664 MachineOperand JTOP = 00665 MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); 00666 unsigned JTI = JTOP.getIndex(); 00667 const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); 00668 assert(MJTI != nullptr); 00669 const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); 00670 assert(JTI < JT.size()); 00671 // Thumb instructions are 2 byte aligned, but JT entries are 4 byte 00672 // 4 aligned. The assembler / linker may add 2 byte padding just before 00673 // the JT entries. The size does not include this padding; the 00674 // constant islands pass does separate bookkeeping for it. 00675 // FIXME: If we know the size of the function is less than (1 << 16) *2 00676 // bytes, we can use 16-bit entries instead. Then there won't be an 00677 // alignment issue. 00678 unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; 00679 unsigned NumEntries = getNumJTEntries(JT, JTI); 00680 if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) 00681 // Make sure the instruction that follows TBB is 2-byte aligned. 00682 // FIXME: Constant island pass should insert an "ALIGN" instruction 00683 // instead. 00684 ++NumEntries; 00685 return NumEntries * EntrySize + InstSize; 00686 } 00687 } 00688 } 00689 00690 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { 00691 unsigned Size = 0; 00692 MachineBasicBlock::const_instr_iterator I = MI; 00693 MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); 00694 while (++I != E && I->isInsideBundle()) { 00695 assert(!I->isBundle() && "No nested bundle!"); 00696 Size += GetInstSizeInBytes(&*I); 00697 } 00698 return Size; 00699 } 00700 00701 void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 00702 MachineBasicBlock::iterator I, DebugLoc DL, 00703 unsigned DestReg, unsigned SrcReg, 00704 bool KillSrc) const { 00705 bool GPRDest = ARM::GPRRegClass.contains(DestReg); 00706 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); 00707 00708 if (GPRDest && GPRSrc) { 00709 AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) 00710 .addReg(SrcReg, getKillRegState(KillSrc)))); 00711 return; 00712 } 00713 00714 bool SPRDest = ARM::SPRRegClass.contains(DestReg); 00715 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); 00716 00717 unsigned Opc = 0; 00718 if (SPRDest && SPRSrc) 00719 Opc = ARM::VMOVS; 00720 else if (GPRDest && SPRSrc) 00721 Opc = ARM::VMOVRS; 00722 else if (SPRDest && GPRSrc) 00723 Opc = ARM::VMOVSR; 00724 else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP()) 00725 Opc = ARM::VMOVD; 00726 else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) 00727 Opc = ARM::VORRq; 00728 00729 if (Opc) { 00730 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); 00731 MIB.addReg(SrcReg, getKillRegState(KillSrc)); 00732 if (Opc == ARM::VORRq) 00733 MIB.addReg(SrcReg, getKillRegState(KillSrc)); 00734 AddDefaultPred(MIB); 00735 return; 00736 } 00737 00738 // Handle register classes that require multiple instructions. 00739 unsigned BeginIdx = 0; 00740 unsigned SubRegs = 0; 00741 int Spacing = 1; 00742 00743 // Use VORRq when possible. 00744 if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) { 00745 Opc = ARM::VORRq; 00746 BeginIdx = ARM::qsub_0; 00747 SubRegs = 2; 00748 } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { 00749 Opc = ARM::VORRq; 00750 BeginIdx = ARM::qsub_0; 00751 SubRegs = 4; 00752 // Fall back to VMOVD. 00753 } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) { 00754 Opc = ARM::VMOVD; 00755 BeginIdx = ARM::dsub_0; 00756 SubRegs = 2; 00757 } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) { 00758 Opc = ARM::VMOVD; 00759 BeginIdx = ARM::dsub_0; 00760 SubRegs = 3; 00761 } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) { 00762 Opc = ARM::VMOVD; 00763 BeginIdx = ARM::dsub_0; 00764 SubRegs = 4; 00765 } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) { 00766 Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr; 00767 BeginIdx = ARM::gsub_0; 00768 SubRegs = 2; 00769 } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) { 00770 Opc = ARM::VMOVD; 00771 BeginIdx = ARM::dsub_0; 00772 SubRegs = 2; 00773 Spacing = 2; 00774 } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) { 00775 Opc = ARM::VMOVD; 00776 BeginIdx = ARM::dsub_0; 00777 SubRegs = 3; 00778 Spacing = 2; 00779 } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) { 00780 Opc = ARM::VMOVD; 00781 BeginIdx = ARM::dsub_0; 00782 SubRegs = 4; 00783 Spacing = 2; 00784 } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) { 00785 Opc = ARM::VMOVS; 00786 BeginIdx = ARM::ssub_0; 00787 SubRegs = 2; 00788 } 00789 00790 assert(Opc && "Impossible reg-to-reg copy"); 00791 00792 const TargetRegisterInfo *TRI = &getRegisterInfo(); 00793 MachineInstrBuilder Mov; 00794 00795 // Copy register tuples backward when the first Dest reg overlaps with SrcReg. 00796 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { 00797 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing); 00798 Spacing = -Spacing; 00799 } 00800 #ifndef NDEBUG 00801 SmallSet<unsigned, 4> DstRegs; 00802 #endif 00803 for (unsigned i = 0; i != SubRegs; ++i) { 00804 unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); 00805 unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); 00806 assert(Dst && Src && "Bad sub-register"); 00807 #ifndef NDEBUG 00808 assert(!DstRegs.count(Src) && "destructive vector copy"); 00809 DstRegs.insert(Dst); 00810 #endif 00811 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src); 00812 // VORR takes two source operands. 00813 if (Opc == ARM::VORRq) 00814 Mov.addReg(Src); 00815 Mov = AddDefaultPred(Mov); 00816 // MOVr can set CC. 00817 if (Opc == ARM::MOVr) 00818 Mov = AddDefaultCC(Mov); 00819 } 00820 // Add implicit super-register defs and kills to the last instruction. 00821 Mov->addRegisterDefined(DestReg, TRI); 00822 if (KillSrc) 00823 Mov->addRegisterKilled(SrcReg, TRI); 00824 } 00825 00826 const MachineInstrBuilder & 00827 ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, 00828 unsigned SubIdx, unsigned State, 00829 const TargetRegisterInfo *TRI) const { 00830 if (!SubIdx) 00831 return MIB.addReg(Reg, State); 00832 00833 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 00834 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 00835 return MIB.addReg(Reg, State, SubIdx); 00836 } 00837 00838 void ARMBaseInstrInfo:: 00839 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 00840 unsigned SrcReg, bool isKill, int FI, 00841 const TargetRegisterClass *RC, 00842 const TargetRegisterInfo *TRI) const { 00843 DebugLoc DL; 00844 if (I != MBB.end()) DL = I->getDebugLoc(); 00845 MachineFunction &MF = *MBB.getParent(); 00846 MachineFrameInfo &MFI = *MF.getFrameInfo(); 00847 unsigned Align = MFI.getObjectAlignment(FI); 00848 00849 MachineMemOperand *MMO = 00850 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 00851 MachineMemOperand::MOStore, 00852 MFI.getObjectSize(FI), 00853 Align); 00854 00855 switch (RC->getSize()) { 00856 case 4: 00857 if (ARM::GPRRegClass.hasSubClassEq(RC)) { 00858 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12)) 00859 .addReg(SrcReg, getKillRegState(isKill)) 00860 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 00861 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 00862 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) 00863 .addReg(SrcReg, getKillRegState(isKill)) 00864 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 00865 } else 00866 llvm_unreachable("Unknown reg class!"); 00867 break; 00868 case 8: 00869 if (ARM::DPRRegClass.hasSubClassEq(RC)) { 00870 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) 00871 .addReg(SrcReg, getKillRegState(isKill)) 00872 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 00873 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { 00874 if (Subtarget.hasV5TEOps()) { 00875 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD)); 00876 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); 00877 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); 00878 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); 00879 00880 AddDefaultPred(MIB); 00881 } else { 00882 // Fallback to STM instruction, which has existed since the dawn of 00883 // time. 00884 MachineInstrBuilder MIB = 00885 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) 00886 .addFrameIndex(FI).addMemOperand(MMO)); 00887 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); 00888 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); 00889 } 00890 } else 00891 llvm_unreachable("Unknown reg class!"); 00892 break; 00893 case 16: 00894 if (ARM::DPairRegClass.hasSubClassEq(RC)) { 00895 // Use aligned spills if the stack can be realigned. 00896 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 00897 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) 00898 .addFrameIndex(FI).addImm(16) 00899 .addReg(SrcReg, getKillRegState(isKill)) 00900 .addMemOperand(MMO)); 00901 } else { 00902 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) 00903 .addReg(SrcReg, getKillRegState(isKill)) 00904 .addFrameIndex(FI) 00905 .addMemOperand(MMO)); 00906 } 00907 } else 00908 llvm_unreachable("Unknown reg class!"); 00909 break; 00910 case 24: 00911 if (ARM::DTripleRegClass.hasSubClassEq(RC)) { 00912 // Use aligned spills if the stack can be realigned. 00913 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 00914 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo)) 00915 .addFrameIndex(FI).addImm(16) 00916 .addReg(SrcReg, getKillRegState(isKill)) 00917 .addMemOperand(MMO)); 00918 } else { 00919 MachineInstrBuilder MIB = 00920 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 00921 .addFrameIndex(FI)) 00922 .addMemOperand(MMO); 00923 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 00924 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 00925 AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 00926 } 00927 } else 00928 llvm_unreachable("Unknown reg class!"); 00929 break; 00930 case 32: 00931 if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { 00932 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 00933 // FIXME: It's possible to only store part of the QQ register if the 00934 // spilled def has a sub-register index. 00935 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) 00936 .addFrameIndex(FI).addImm(16) 00937 .addReg(SrcReg, getKillRegState(isKill)) 00938 .addMemOperand(MMO)); 00939 } else { 00940 MachineInstrBuilder MIB = 00941 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 00942 .addFrameIndex(FI)) 00943 .addMemOperand(MMO); 00944 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 00945 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 00946 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 00947 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 00948 } 00949 } else 00950 llvm_unreachable("Unknown reg class!"); 00951 break; 00952 case 64: 00953 if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 00954 MachineInstrBuilder MIB = 00955 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 00956 .addFrameIndex(FI)) 00957 .addMemOperand(MMO); 00958 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 00959 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 00960 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 00961 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 00962 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); 00963 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); 00964 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); 00965 AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); 00966 } else 00967 llvm_unreachable("Unknown reg class!"); 00968 break; 00969 default: 00970 llvm_unreachable("Unknown reg class!"); 00971 } 00972 } 00973 00974 unsigned 00975 ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, 00976 int &FrameIndex) const { 00977 switch (MI->getOpcode()) { 00978 default: break; 00979 case ARM::STRrs: 00980 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. 00981 if (MI->getOperand(1).isFI() && 00982 MI->getOperand(2).isReg() && 00983 MI->getOperand(3).isImm() && 00984 MI->getOperand(2).getReg() == 0 && 00985 MI->getOperand(3).getImm() == 0) { 00986 FrameIndex = MI->getOperand(1).getIndex(); 00987 return MI->getOperand(0).getReg(); 00988 } 00989 break; 00990 case ARM::STRi12: 00991 case ARM::t2STRi12: 00992 case ARM::tSTRspi: 00993 case ARM::VSTRD: 00994 case ARM::VSTRS: 00995 if (MI->getOperand(1).isFI() && 00996 MI->getOperand(2).isImm() && 00997 MI->getOperand(2).getImm() == 0) { 00998 FrameIndex = MI->getOperand(1).getIndex(); 00999 return MI->getOperand(0).getReg(); 01000 } 01001 break; 01002 case ARM::VST1q64: 01003 case ARM::VST1d64TPseudo: 01004 case ARM::VST1d64QPseudo: 01005 if (MI->getOperand(0).isFI() && 01006 MI->getOperand(2).getSubReg() == 0) { 01007 FrameIndex = MI->getOperand(0).getIndex(); 01008 return MI->getOperand(2).getReg(); 01009 } 01010 break; 01011 case ARM::VSTMQIA: 01012 if (MI->getOperand(1).isFI() && 01013 MI->getOperand(0).getSubReg() == 0) { 01014 FrameIndex = MI->getOperand(1).getIndex(); 01015 return MI->getOperand(0).getReg(); 01016 } 01017 break; 01018 } 01019 01020 return 0; 01021 } 01022 01023 unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, 01024 int &FrameIndex) const { 01025 const MachineMemOperand *Dummy; 01026 return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); 01027 } 01028 01029 void ARMBaseInstrInfo:: 01030 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 01031 unsigned DestReg, int FI, 01032 const TargetRegisterClass *RC, 01033 const TargetRegisterInfo *TRI) const { 01034 DebugLoc DL; 01035 if (I != MBB.end()) DL = I->getDebugLoc(); 01036 MachineFunction &MF = *MBB.getParent(); 01037 MachineFrameInfo &MFI = *MF.getFrameInfo(); 01038 unsigned Align = MFI.getObjectAlignment(FI); 01039 MachineMemOperand *MMO = 01040 MF.getMachineMemOperand( 01041 MachinePointerInfo::getFixedStack(FI), 01042 MachineMemOperand::MOLoad, 01043 MFI.getObjectSize(FI), 01044 Align); 01045 01046 switch (RC->getSize()) { 01047 case 4: 01048 if (ARM::GPRRegClass.hasSubClassEq(RC)) { 01049 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) 01050 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 01051 01052 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 01053 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) 01054 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 01055 } else 01056 llvm_unreachable("Unknown reg class!"); 01057 break; 01058 case 8: 01059 if (ARM::DPRRegClass.hasSubClassEq(RC)) { 01060 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) 01061 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 01062 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { 01063 MachineInstrBuilder MIB; 01064 01065 if (Subtarget.hasV5TEOps()) { 01066 MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); 01067 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); 01068 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); 01069 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); 01070 01071 AddDefaultPred(MIB); 01072 } else { 01073 // Fallback to LDM instruction, which has existed since the dawn of 01074 // time. 01075 MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA)) 01076 .addFrameIndex(FI).addMemOperand(MMO)); 01077 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); 01078 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); 01079 } 01080 01081 if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 01082 MIB.addReg(DestReg, RegState::ImplicitDefine); 01083 } else 01084 llvm_unreachable("Unknown reg class!"); 01085 break; 01086 case 16: 01087 if (ARM::DPairRegClass.hasSubClassEq(RC)) { 01088 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 01089 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) 01090 .addFrameIndex(FI).addImm(16) 01091 .addMemOperand(MMO)); 01092 } else { 01093 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) 01094 .addFrameIndex(FI) 01095 .addMemOperand(MMO)); 01096 } 01097 } else 01098 llvm_unreachable("Unknown reg class!"); 01099 break; 01100 case 24: 01101 if (ARM::DTripleRegClass.hasSubClassEq(RC)) { 01102 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 01103 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) 01104 .addFrameIndex(FI).addImm(16) 01105 .addMemOperand(MMO)); 01106 } else { 01107 MachineInstrBuilder MIB = 01108 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 01109 .addFrameIndex(FI) 01110 .addMemOperand(MMO)); 01111 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 01112 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 01113 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 01114 if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 01115 MIB.addReg(DestReg, RegState::ImplicitDefine); 01116 } 01117 } else 01118 llvm_unreachable("Unknown reg class!"); 01119 break; 01120 case 32: 01121 if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { 01122 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 01123 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) 01124 .addFrameIndex(FI).addImm(16) 01125 .addMemOperand(MMO)); 01126 } else { 01127 MachineInstrBuilder MIB = 01128 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 01129 .addFrameIndex(FI)) 01130 .addMemOperand(MMO); 01131 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 01132 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 01133 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 01134 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); 01135 if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 01136 MIB.addReg(DestReg, RegState::ImplicitDefine); 01137 } 01138 } else 01139 llvm_unreachable("Unknown reg class!"); 01140 break; 01141 case 64: 01142 if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 01143 MachineInstrBuilder MIB = 01144 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 01145 .addFrameIndex(FI)) 01146 .addMemOperand(MMO); 01147 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 01148 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 01149 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 01150 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); 01151 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI); 01152 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); 01153 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); 01154 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); 01155 if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 01156 MIB.addReg(DestReg, RegState::ImplicitDefine); 01157 } else 01158 llvm_unreachable("Unknown reg class!"); 01159 break; 01160 default: 01161 llvm_unreachable("Unknown regclass!"); 01162 } 01163 } 01164 01165 unsigned 01166 ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 01167 int &FrameIndex) const { 01168 switch (MI->getOpcode()) { 01169 default: break; 01170 case ARM::LDRrs: 01171 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. 01172 if (MI->getOperand(1).isFI() && 01173 MI->getOperand(2).isReg() && 01174 MI->getOperand(3).isImm() && 01175 MI->getOperand(2).getReg() == 0 && 01176 MI->getOperand(3).getImm() == 0) { 01177 FrameIndex = MI->getOperand(1).getIndex(); 01178 return MI->getOperand(0).getReg(); 01179 } 01180 break; 01181 case ARM::LDRi12: 01182 case ARM::t2LDRi12: 01183 case ARM::tLDRspi: 01184 case ARM::VLDRD: 01185 case ARM::VLDRS: 01186 if (MI->getOperand(1).isFI() && 01187 MI->getOperand(2).isImm() && 01188 MI->getOperand(2).getImm() == 0) { 01189 FrameIndex = MI->getOperand(1).getIndex(); 01190 return MI->getOperand(0).getReg(); 01191 } 01192 break; 01193 case ARM::VLD1q64: 01194 case ARM::VLD1d64TPseudo: 01195 case ARM::VLD1d64QPseudo: 01196 if (MI->getOperand(1).isFI() && 01197 MI->getOperand(0).getSubReg() == 0) { 01198 FrameIndex = MI->getOperand(1).getIndex(); 01199 return MI->getOperand(0).getReg(); 01200 } 01201 break; 01202 case ARM::VLDMQIA: 01203 if (MI->getOperand(1).isFI() && 01204 MI->getOperand(0).getSubReg() == 0) { 01205 FrameIndex = MI->getOperand(1).getIndex(); 01206 return MI->getOperand(0).getReg(); 01207 } 01208 break; 01209 } 01210 01211 return 0; 01212 } 01213 01214 unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 01215 int &FrameIndex) const { 01216 const MachineMemOperand *Dummy; 01217 return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); 01218 } 01219 01220 bool 01221 ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { 01222 MachineFunction &MF = *MI->getParent()->getParent(); 01223 Reloc::Model RM = MF.getTarget().getRelocationModel(); 01224 01225 if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { 01226 assert(getSubtarget().getTargetTriple().getObjectFormat() == 01227 Triple::MachO && 01228 "LOAD_STACK_GUARD currently supported only for MachO."); 01229 expandLoadStackGuard(MI, RM); 01230 MI->getParent()->erase(MI); 01231 return true; 01232 } 01233 01234 // This hook gets to expand COPY instructions before they become 01235 // copyPhysReg() calls. Look for VMOVS instructions that can legally be 01236 // widened to VMOVD. We prefer the VMOVD when possible because it may be 01237 // changed into a VORR that can go down the NEON pipeline. 01238 if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15() || 01239 Subtarget.isFPOnlySP()) 01240 return false; 01241 01242 // Look for a copy between even S-registers. That is where we keep floats 01243 // when using NEON v2f32 instructions for f32 arithmetic. 01244 unsigned DstRegS = MI->getOperand(0).getReg(); 01245 unsigned SrcRegS = MI->getOperand(1).getReg(); 01246 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) 01247 return false; 01248 01249 const TargetRegisterInfo *TRI = &getRegisterInfo(); 01250 unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, 01251 &ARM::DPRRegClass); 01252 unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, 01253 &ARM::DPRRegClass); 01254 if (!DstRegD || !SrcRegD) 01255 return false; 01256 01257 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only 01258 // legal if the COPY already defines the full DstRegD, and it isn't a 01259 // sub-register insertion. 01260 if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI)) 01261 return false; 01262 01263 // A dead copy shouldn't show up here, but reject it just in case. 01264 if (MI->getOperand(0).isDead()) 01265 return false; 01266 01267 // All clear, widen the COPY. 01268 DEBUG(dbgs() << "widening: " << *MI); 01269 MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 01270 01271 // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg 01272 // or some other super-register. 01273 int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD); 01274 if (ImpDefIdx != -1) 01275 MI->RemoveOperand(ImpDefIdx); 01276 01277 // Change the opcode and operands. 01278 MI->setDesc(get(ARM::VMOVD)); 01279 MI->getOperand(0).setReg(DstRegD); 01280 MI->getOperand(1).setReg(SrcRegD); 01281 AddDefaultPred(MIB); 01282 01283 // We are now reading SrcRegD instead of SrcRegS. This may upset the 01284 // register scavenger and machine verifier, so we need to indicate that we 01285 // are reading an undefined value from SrcRegD, but a proper value from 01286 // SrcRegS. 01287 MI->getOperand(1).setIsUndef(); 01288 MIB.addReg(SrcRegS, RegState::Implicit); 01289 01290 // SrcRegD may actually contain an unrelated value in the ssub_1 01291 // sub-register. Don't kill it. Only kill the ssub_0 sub-register. 01292 if (MI->getOperand(1).isKill()) { 01293 MI->getOperand(1).setIsKill(false); 01294 MI->addRegisterKilled(SrcRegS, TRI, true); 01295 } 01296 01297 DEBUG(dbgs() << "replaced by: " << *MI); 01298 return true; 01299 } 01300 01301 /// Create a copy of a const pool value. Update CPI to the new index and return 01302 /// the label UID. 01303 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { 01304 MachineConstantPool *MCP = MF.getConstantPool(); 01305 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 01306 01307 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; 01308 assert(MCPE.isMachineConstantPoolEntry() && 01309 "Expecting a machine constantpool entry!"); 01310 ARMConstantPoolValue *ACPV = 01311 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); 01312 01313 unsigned PCLabelId = AFI->createPICLabelUId(); 01314 ARMConstantPoolValue *NewCPV = nullptr; 01315 01316 // FIXME: The below assumes PIC relocation model and that the function 01317 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and 01318 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR 01319 // instructions, so that's probably OK, but is PIC always correct when 01320 // we get here? 01321 if (ACPV->isGlobalValue()) 01322 NewCPV = ARMConstantPoolConstant:: 01323 Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, 01324 ARMCP::CPValue, 4); 01325 else if (ACPV->isExtSymbol()) 01326 NewCPV = ARMConstantPoolSymbol:: 01327 Create(MF.getFunction()->getContext(), 01328 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4); 01329 else if (ACPV->isBlockAddress()) 01330 NewCPV = ARMConstantPoolConstant:: 01331 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId, 01332 ARMCP::CPBlockAddress, 4); 01333 else if (ACPV->isLSDA()) 01334 NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId, 01335 ARMCP::CPLSDA, 4); 01336 else if (ACPV->isMachineBasicBlock()) 01337 NewCPV = ARMConstantPoolMBB:: 01338 Create(MF.getFunction()->getContext(), 01339 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4); 01340 else 01341 llvm_unreachable("Unexpected ARM constantpool value type!!"); 01342 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); 01343 return PCLabelId; 01344 } 01345 01346 void ARMBaseInstrInfo:: 01347 reMaterialize(MachineBasicBlock &MBB, 01348 MachineBasicBlock::iterator I, 01349 unsigned DestReg, unsigned SubIdx, 01350 const MachineInstr *Orig, 01351 const TargetRegisterInfo &TRI) const { 01352 unsigned Opcode = Orig->getOpcode(); 01353 switch (Opcode) { 01354 default: { 01355 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 01356 MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); 01357 MBB.insert(I, MI); 01358 break; 01359 } 01360 case ARM::tLDRpci_pic: 01361 case ARM::t2LDRpci_pic: { 01362 MachineFunction &MF = *MBB.getParent(); 01363 unsigned CPI = Orig->getOperand(1).getIndex(); 01364 unsigned PCLabelId = duplicateCPV(MF, CPI); 01365 MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), 01366 DestReg) 01367 .addConstantPoolIndex(CPI).addImm(PCLabelId); 01368 MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); 01369 break; 01370 } 01371 } 01372 } 01373 01374 MachineInstr * 01375 ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { 01376 MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF); 01377 switch(Orig->getOpcode()) { 01378 case ARM::tLDRpci_pic: 01379 case ARM::t2LDRpci_pic: { 01380 unsigned CPI = Orig->getOperand(1).getIndex(); 01381 unsigned PCLabelId = duplicateCPV(MF, CPI); 01382 Orig->getOperand(1).setIndex(CPI); 01383 Orig->getOperand(2).setImm(PCLabelId); 01384 break; 01385 } 01386 } 01387 return MI; 01388 } 01389 01390 bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, 01391 const MachineInstr *MI1, 01392 const MachineRegisterInfo *MRI) const { 01393 int Opcode = MI0->getOpcode(); 01394 if (Opcode == ARM::t2LDRpci || 01395 Opcode == ARM::t2LDRpci_pic || 01396 Opcode == ARM::tLDRpci || 01397 Opcode == ARM::tLDRpci_pic || 01398 Opcode == ARM::LDRLIT_ga_pcrel || 01399 Opcode == ARM::LDRLIT_ga_pcrel_ldr || 01400 Opcode == ARM::tLDRLIT_ga_pcrel || 01401 Opcode == ARM::MOV_ga_pcrel || 01402 Opcode == ARM::MOV_ga_pcrel_ldr || 01403 Opcode == ARM::t2MOV_ga_pcrel) { 01404 if (MI1->getOpcode() != Opcode) 01405 return false; 01406 if (MI0->getNumOperands() != MI1->getNumOperands()) 01407 return false; 01408 01409 const MachineOperand &MO0 = MI0->getOperand(1); 01410 const MachineOperand &MO1 = MI1->getOperand(1); 01411 if (MO0.getOffset() != MO1.getOffset()) 01412 return false; 01413 01414 if (Opcode == ARM::LDRLIT_ga_pcrel || 01415 Opcode == ARM::LDRLIT_ga_pcrel_ldr || 01416 Opcode == ARM::tLDRLIT_ga_pcrel || 01417 Opcode == ARM::MOV_ga_pcrel || 01418 Opcode == ARM::MOV_ga_pcrel_ldr || 01419 Opcode == ARM::t2MOV_ga_pcrel) 01420 // Ignore the PC labels. 01421 return MO0.getGlobal() == MO1.getGlobal(); 01422 01423 const MachineFunction *MF = MI0->getParent()->getParent(); 01424 const MachineConstantPool *MCP = MF->getConstantPool(); 01425 int CPI0 = MO0.getIndex(); 01426 int CPI1 = MO1.getIndex(); 01427 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; 01428 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; 01429 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); 01430 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); 01431 if (isARMCP0 && isARMCP1) { 01432 ARMConstantPoolValue *ACPV0 = 01433 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); 01434 ARMConstantPoolValue *ACPV1 = 01435 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); 01436 return ACPV0->hasSameValue(ACPV1); 01437 } else if (!isARMCP0 && !isARMCP1) { 01438 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; 01439 } 01440 return false; 01441 } else if (Opcode == ARM::PICLDR) { 01442 if (MI1->getOpcode() != Opcode) 01443 return false; 01444 if (MI0->getNumOperands() != MI1->getNumOperands()) 01445 return false; 01446 01447 unsigned Addr0 = MI0->getOperand(1).getReg(); 01448 unsigned Addr1 = MI1->getOperand(1).getReg(); 01449 if (Addr0 != Addr1) { 01450 if (!MRI || 01451 !TargetRegisterInfo::isVirtualRegister(Addr0) || 01452 !TargetRegisterInfo::isVirtualRegister(Addr1)) 01453 return false; 01454 01455 // This assumes SSA form. 01456 MachineInstr *Def0 = MRI->getVRegDef(Addr0); 01457 MachineInstr *Def1 = MRI->getVRegDef(Addr1); 01458 // Check if the loaded value, e.g. a constantpool of a global address, are 01459 // the same. 01460 if (!produceSameValue(Def0, Def1, MRI)) 01461 return false; 01462 } 01463 01464 for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) { 01465 // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg 01466 const MachineOperand &MO0 = MI0->getOperand(i); 01467 const MachineOperand &MO1 = MI1->getOperand(i); 01468 if (!MO0.isIdenticalTo(MO1)) 01469 return false; 01470 } 01471 return true; 01472 } 01473 01474 return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); 01475 } 01476 01477 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to 01478 /// determine if two loads are loading from the same base address. It should 01479 /// only return true if the base pointers are the same and the only differences 01480 /// between the two addresses is the offset. It also returns the offsets by 01481 /// reference. 01482 /// 01483 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched 01484 /// is permanently disabled. 01485 bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 01486 int64_t &Offset1, 01487 int64_t &Offset2) const { 01488 // Don't worry about Thumb: just ARM and Thumb2. 01489 if (Subtarget.isThumb1Only()) return false; 01490 01491 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 01492 return false; 01493 01494 switch (Load1->getMachineOpcode()) { 01495 default: 01496 return false; 01497 case ARM::LDRi12: 01498 case ARM::LDRBi12: 01499 case ARM::LDRD: 01500 case ARM::LDRH: 01501 case ARM::LDRSB: 01502 case ARM::LDRSH: 01503 case ARM::VLDRD: 01504 case ARM::VLDRS: 01505 case ARM::t2LDRi8: 01506 case ARM::t2LDRBi8: 01507 case ARM::t2LDRDi8: 01508 case ARM::t2LDRSHi8: 01509 case ARM::t2LDRi12: 01510 case ARM::t2LDRBi12: 01511 case ARM::t2LDRSHi12: 01512 break; 01513 } 01514 01515 switch (Load2->getMachineOpcode()) { 01516 default: 01517 return false; 01518 case ARM::LDRi12: 01519 case ARM::LDRBi12: 01520 case ARM::LDRD: 01521 case ARM::LDRH: 01522 case ARM::LDRSB: 01523 case ARM::LDRSH: 01524 case ARM::VLDRD: 01525 case ARM::VLDRS: 01526 case ARM::t2LDRi8: 01527 case ARM::t2LDRBi8: 01528 case ARM::t2LDRSHi8: 01529 case ARM::t2LDRi12: 01530 case ARM::t2LDRBi12: 01531 case ARM::t2LDRSHi12: 01532 break; 01533 } 01534 01535 // Check if base addresses and chain operands match. 01536 if (Load1->getOperand(0) != Load2->getOperand(0) || 01537 Load1->getOperand(4) != Load2->getOperand(4)) 01538 return false; 01539 01540 // Index should be Reg0. 01541 if (Load1->getOperand(3) != Load2->getOperand(3)) 01542 return false; 01543 01544 // Determine the offsets. 01545 if (isa<ConstantSDNode>(Load1->getOperand(1)) && 01546 isa<ConstantSDNode>(Load2->getOperand(1))) { 01547 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); 01548 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); 01549 return true; 01550 } 01551 01552 return false; 01553 } 01554 01555 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to 01556 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should 01557 /// be scheduled togther. On some targets if two loads are loading from 01558 /// addresses in the same cache line, it's better if they are scheduled 01559 /// together. This function takes two integers that represent the load offsets 01560 /// from the common base address. It returns true if it decides it's desirable 01561 /// to schedule the two loads together. "NumLoads" is the number of loads that 01562 /// have already been scheduled after Load1. 01563 /// 01564 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched 01565 /// is permanently disabled. 01566 bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 01567 int64_t Offset1, int64_t Offset2, 01568 unsigned NumLoads) const { 01569 // Don't worry about Thumb: just ARM and Thumb2. 01570 if (Subtarget.isThumb1Only()) return false; 01571 01572 assert(Offset2 > Offset1); 01573 01574 if ((Offset2 - Offset1) / 8 > 64) 01575 return false; 01576 01577 // Check if the machine opcodes are different. If they are different 01578 // then we consider them to not be of the same base address, 01579 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12. 01580 // In this case, they are considered to be the same because they are different 01581 // encoding forms of the same basic instruction. 01582 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) && 01583 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 && 01584 Load2->getMachineOpcode() == ARM::t2LDRBi12) || 01585 (Load1->getMachineOpcode() == ARM::t2LDRBi12 && 01586 Load2->getMachineOpcode() == ARM::t2LDRBi8))) 01587 return false; // FIXME: overly conservative? 01588 01589 // Four loads in a row should be sufficient. 01590 if (NumLoads >= 3) 01591 return false; 01592 01593 return true; 01594 } 01595 01596 bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, 01597 const MachineBasicBlock *MBB, 01598 const MachineFunction &MF) const { 01599 // Debug info is never a scheduling boundary. It's necessary to be explicit 01600 // due to the special treatment of IT instructions below, otherwise a 01601 // dbg_value followed by an IT will result in the IT instruction being 01602 // considered a scheduling hazard, which is wrong. It should be the actual 01603 // instruction preceding the dbg_value instruction(s), just like it is 01604 // when debug info is not present. 01605 if (MI->isDebugValue()) 01606 return false; 01607 01608 // Terminators and labels can't be scheduled around. 01609 if (MI->isTerminator() || MI->isPosition()) 01610 return true; 01611 01612 // Treat the start of the IT block as a scheduling boundary, but schedule 01613 // t2IT along with all instructions following it. 01614 // FIXME: This is a big hammer. But the alternative is to add all potential 01615 // true and anti dependencies to IT block instructions as implicit operands 01616 // to the t2IT instruction. The added compile time and complexity does not 01617 // seem worth it. 01618 MachineBasicBlock::const_iterator I = MI; 01619 // Make sure to skip any dbg_value instructions 01620 while (++I != MBB->end() && I->isDebugValue()) 01621 ; 01622 if (I != MBB->end() && I->getOpcode() == ARM::t2IT) 01623 return true; 01624 01625 // Don't attempt to schedule around any instruction that defines 01626 // a stack-oriented pointer, as it's unlikely to be profitable. This 01627 // saves compile time, because it doesn't require every single 01628 // stack slot reference to depend on the instruction that does the 01629 // modification. 01630 // Calls don't actually change the stack pointer, even if they have imp-defs. 01631 // No ARM calling conventions change the stack pointer. (X86 calling 01632 // conventions sometimes do). 01633 if (!MI->isCall() && MI->definesRegister(ARM::SP)) 01634 return true; 01635 01636 return false; 01637 } 01638 01639 bool ARMBaseInstrInfo:: 01640 isProfitableToIfCvt(MachineBasicBlock &MBB, 01641 unsigned NumCycles, unsigned ExtraPredCycles, 01642 const BranchProbability &Probability) const { 01643 if (!NumCycles) 01644 return false; 01645 01646 // Attempt to estimate the relative costs of predication versus branching. 01647 unsigned UnpredCost = Probability.getNumerator() * NumCycles; 01648 UnpredCost /= Probability.getDenominator(); 01649 UnpredCost += 1; // The branch itself 01650 UnpredCost += Subtarget.getMispredictionPenalty() / 10; 01651 01652 return (NumCycles + ExtraPredCycles) <= UnpredCost; 01653 } 01654 01655 bool ARMBaseInstrInfo:: 01656 isProfitableToIfCvt(MachineBasicBlock &TMBB, 01657 unsigned TCycles, unsigned TExtra, 01658 MachineBasicBlock &FMBB, 01659 unsigned FCycles, unsigned FExtra, 01660 const BranchProbability &Probability) const { 01661 if (!TCycles || !FCycles) 01662 return false; 01663 01664 // Attempt to estimate the relative costs of predication versus branching. 01665 unsigned TUnpredCost = Probability.getNumerator() * TCycles; 01666 TUnpredCost /= Probability.getDenominator(); 01667 01668 uint32_t Comp = Probability.getDenominator() - Probability.getNumerator(); 01669 unsigned FUnpredCost = Comp * FCycles; 01670 FUnpredCost /= Probability.getDenominator(); 01671 01672 unsigned UnpredCost = TUnpredCost + FUnpredCost; 01673 UnpredCost += 1; // The branch itself 01674 UnpredCost += Subtarget.getMispredictionPenalty() / 10; 01675 01676 return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; 01677 } 01678 01679 bool 01680 ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 01681 MachineBasicBlock &FMBB) const { 01682 // Reduce false anti-dependencies to let Swift's out-of-order execution 01683 // engine do its thing. 01684 return Subtarget.isSwift(); 01685 } 01686 01687 /// getInstrPredicate - If instruction is predicated, returns its predicate 01688 /// condition, otherwise returns AL. It also returns the condition code 01689 /// register by reference. 01690 ARMCC::CondCodes 01691 llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { 01692 int PIdx = MI->findFirstPredOperandIdx(); 01693 if (PIdx == -1) { 01694 PredReg = 0; 01695 return ARMCC::AL; 01696 } 01697 01698 PredReg = MI->getOperand(PIdx+1).getReg(); 01699 return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); 01700 } 01701 01702 01703 int llvm::getMatchingCondBranchOpcode(int Opc) { 01704 if (Opc == ARM::B) 01705 return ARM::Bcc; 01706 if (Opc == ARM::tB) 01707 return ARM::tBcc; 01708 if (Opc == ARM::t2B) 01709 return ARM::t2Bcc; 01710 01711 llvm_unreachable("Unknown unconditional branch opcode!"); 01712 } 01713 01714 /// commuteInstruction - Handle commutable instructions. 01715 MachineInstr * 01716 ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { 01717 switch (MI->getOpcode()) { 01718 case ARM::MOVCCr: 01719 case ARM::t2MOVCCr: { 01720 // MOVCC can be commuted by inverting the condition. 01721 unsigned PredReg = 0; 01722 ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); 01723 // MOVCC AL can't be inverted. Shouldn't happen. 01724 if (CC == ARMCC::AL || PredReg != ARM::CPSR) 01725 return nullptr; 01726 MI = TargetInstrInfo::commuteInstruction(MI, NewMI); 01727 if (!MI) 01728 return nullptr; 01729 // After swapping the MOVCC operands, also invert the condition. 01730 MI->getOperand(MI->findFirstPredOperandIdx()) 01731 .setImm(ARMCC::getOppositeCondition(CC)); 01732 return MI; 01733 } 01734 } 01735 return TargetInstrInfo::commuteInstruction(MI, NewMI); 01736 } 01737 01738 /// Identify instructions that can be folded into a MOVCC instruction, and 01739 /// return the defining instruction. 01740 static MachineInstr *canFoldIntoMOVCC(unsigned Reg, 01741 const MachineRegisterInfo &MRI, 01742 const TargetInstrInfo *TII) { 01743 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 01744 return nullptr; 01745 if (!MRI.hasOneNonDBGUse(Reg)) 01746 return nullptr; 01747 MachineInstr *MI = MRI.getVRegDef(Reg); 01748 if (!MI) 01749 return nullptr; 01750 // MI is folded into the MOVCC by predicating it. 01751 if (!MI->isPredicable()) 01752 return nullptr; 01753 // Check if MI has any non-dead defs or physreg uses. This also detects 01754 // predicated instructions which will be reading CPSR. 01755 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 01756 const MachineOperand &MO = MI->getOperand(i); 01757 // Reject frame index operands, PEI can't handle the predicated pseudos. 01758 if (MO.isFI() || MO.isCPI() || MO.isJTI()) 01759 return nullptr; 01760 if (!MO.isReg()) 01761 continue; 01762 // MI can't have any tied operands, that would conflict with predication. 01763 if (MO.isTied()) 01764 return nullptr; 01765 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 01766 return nullptr; 01767 if (MO.isDef() && !MO.isDead()) 01768 return nullptr; 01769 } 01770 bool DontMoveAcrossStores = true; 01771 if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ nullptr, 01772 DontMoveAcrossStores)) 01773 return nullptr; 01774 return MI; 01775 } 01776 01777 bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, 01778 SmallVectorImpl<MachineOperand> &Cond, 01779 unsigned &TrueOp, unsigned &FalseOp, 01780 bool &Optimizable) const { 01781 assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && 01782 "Unknown select instruction"); 01783 // MOVCC operands: 01784 // 0: Def. 01785 // 1: True use. 01786 // 2: False use. 01787 // 3: Condition code. 01788 // 4: CPSR use. 01789 TrueOp = 1; 01790 FalseOp = 2; 01791 Cond.push_back(MI->getOperand(3)); 01792 Cond.push_back(MI->getOperand(4)); 01793 // We can always fold a def. 01794 Optimizable = true; 01795 return false; 01796 } 01797 01798 MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, 01799 bool PreferFalse) const { 01800 assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && 01801 "Unknown select instruction"); 01802 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 01803 MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this); 01804 bool Invert = !DefMI; 01805 if (!DefMI) 01806 DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); 01807 if (!DefMI) 01808 return nullptr; 01809 01810 // Find new register class to use. 01811 MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); 01812 unsigned DestReg = MI->getOperand(0).getReg(); 01813 const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); 01814 if (!MRI.constrainRegClass(DestReg, PreviousClass)) 01815 return nullptr; 01816 01817 // Create a new predicated version of DefMI. 01818 // Rfalse is the first use. 01819 MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 01820 DefMI->getDesc(), DestReg); 01821 01822 // Copy all the DefMI operands, excluding its (null) predicate. 01823 const MCInstrDesc &DefDesc = DefMI->getDesc(); 01824 for (unsigned i = 1, e = DefDesc.getNumOperands(); 01825 i != e && !DefDesc.OpInfo[i].isPredicate(); ++i) 01826 NewMI.addOperand(DefMI->getOperand(i)); 01827 01828 unsigned CondCode = MI->getOperand(3).getImm(); 01829 if (Invert) 01830 NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode))); 01831 else 01832 NewMI.addImm(CondCode); 01833 NewMI.addOperand(MI->getOperand(4)); 01834 01835 // DefMI is not the -S version that sets CPSR, so add an optional %noreg. 01836 if (NewMI->hasOptionalDef()) 01837 AddDefaultCC(NewMI); 01838 01839 // The output register value when the predicate is false is an implicit 01840 // register operand tied to the first def. 01841 // The tie makes the register allocator ensure the FalseReg is allocated the 01842 // same register as operand 0. 01843 FalseReg.setImplicit(); 01844 NewMI.addOperand(FalseReg); 01845 NewMI->tieOperands(0, NewMI->getNumOperands() - 1); 01846 01847 // The caller will erase MI, but not DefMI. 01848 DefMI->eraseFromParent(); 01849 return NewMI; 01850 } 01851 01852 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the 01853 /// instruction is encoded with an 'S' bit is determined by the optional CPSR 01854 /// def operand. 01855 /// 01856 /// This will go away once we can teach tblgen how to set the optional CPSR def 01857 /// operand itself. 01858 struct AddSubFlagsOpcodePair { 01859 uint16_t PseudoOpc; 01860 uint16_t MachineOpc; 01861 }; 01862 01863 static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { 01864 {ARM::ADDSri, ARM::ADDri}, 01865 {ARM::ADDSrr, ARM::ADDrr}, 01866 {ARM::ADDSrsi, ARM::ADDrsi}, 01867 {ARM::ADDSrsr, ARM::ADDrsr}, 01868 01869 {ARM::SUBSri, ARM::SUBri}, 01870 {ARM::SUBSrr, ARM::SUBrr}, 01871 {ARM::SUBSrsi, ARM::SUBrsi}, 01872 {ARM::SUBSrsr, ARM::SUBrsr}, 01873 01874 {ARM::RSBSri, ARM::RSBri}, 01875 {ARM::RSBSrsi, ARM::RSBrsi}, 01876 {ARM::RSBSrsr, ARM::RSBrsr}, 01877 01878 {ARM::t2ADDSri, ARM::t2ADDri}, 01879 {ARM::t2ADDSrr, ARM::t2ADDrr}, 01880 {ARM::t2ADDSrs, ARM::t2ADDrs}, 01881 01882 {ARM::t2SUBSri, ARM::t2SUBri}, 01883 {ARM::t2SUBSrr, ARM::t2SUBrr}, 01884 {ARM::t2SUBSrs, ARM::t2SUBrs}, 01885 01886 {ARM::t2RSBSri, ARM::t2RSBri}, 01887 {ARM::t2RSBSrs, ARM::t2RSBrs}, 01888 }; 01889 01890 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { 01891 for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i) 01892 if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc) 01893 return AddSubFlagsOpcodeMap[i].MachineOpc; 01894 return 0; 01895 } 01896 01897 void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, 01898 MachineBasicBlock::iterator &MBBI, DebugLoc dl, 01899 unsigned DestReg, unsigned BaseReg, int NumBytes, 01900 ARMCC::CondCodes Pred, unsigned PredReg, 01901 const ARMBaseInstrInfo &TII, unsigned MIFlags) { 01902 if (NumBytes == 0 && DestReg != BaseReg) { 01903 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg) 01904 .addReg(BaseReg, RegState::Kill) 01905 .addImm((unsigned)Pred).addReg(PredReg).addReg(0) 01906 .setMIFlags(MIFlags); 01907 return; 01908 } 01909 01910 bool isSub = NumBytes < 0; 01911 if (isSub) NumBytes = -NumBytes; 01912 01913 while (NumBytes) { 01914 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); 01915 unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); 01916 assert(ThisVal && "Didn't extract field correctly"); 01917 01918 // We will handle these bits from offset, clear them. 01919 NumBytes &= ~ThisVal; 01920 01921 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); 01922 01923 // Build the new ADD / SUB. 01924 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; 01925 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) 01926 .addReg(BaseReg, RegState::Kill).addImm(ThisVal) 01927 .addImm((unsigned)Pred).addReg(PredReg).addReg(0) 01928 .setMIFlags(MIFlags); 01929 BaseReg = DestReg; 01930 } 01931 } 01932 01933 static bool isAnySubRegLive(unsigned Reg, const TargetRegisterInfo *TRI, 01934 MachineInstr *MI) { 01935 for (MCSubRegIterator Subreg(Reg, TRI, /* IncludeSelf */ true); 01936 Subreg.isValid(); ++Subreg) 01937 if (MI->getParent()->computeRegisterLiveness(TRI, *Subreg, MI) != 01938 MachineBasicBlock::LQR_Dead) 01939 return true; 01940 return false; 01941 } 01942 bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, 01943 MachineFunction &MF, MachineInstr *MI, 01944 unsigned NumBytes) { 01945 // This optimisation potentially adds lots of load and store 01946 // micro-operations, it's only really a great benefit to code-size. 01947 if (!MF.getFunction()->getAttributes().hasAttribute( 01948 AttributeSet::FunctionIndex, Attribute::MinSize)) 01949 return false; 01950 01951 // If only one register is pushed/popped, LLVM can use an LDR/STR 01952 // instead. We can't modify those so make sure we're dealing with an 01953 // instruction we understand. 01954 bool IsPop = isPopOpcode(MI->getOpcode()); 01955 bool IsPush = isPushOpcode(MI->getOpcode()); 01956 if (!IsPush && !IsPop) 01957 return false; 01958 01959 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD || 01960 MI->getOpcode() == ARM::VLDMDIA_UPD; 01961 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH || 01962 MI->getOpcode() == ARM::tPOP || 01963 MI->getOpcode() == ARM::tPOP_RET; 01964 01965 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP && 01966 MI->getOperand(1).getReg() == ARM::SP)) && 01967 "trying to fold sp update into non-sp-updating push/pop"); 01968 01969 // The VFP push & pop act on D-registers, so we can only fold an adjustment 01970 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try 01971 // if this is violated. 01972 if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0) 01973 return false; 01974 01975 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ 01976 // pred) so the list starts at 4. Thumb1 starts after the predicate. 01977 int RegListIdx = IsT1PushPop ? 2 : 4; 01978 01979 // Calculate the space we'll need in terms of registers. 01980 unsigned FirstReg = MI->getOperand(RegListIdx).getReg(); 01981 unsigned RD0Reg, RegsNeeded; 01982 if (IsVFPPushPop) { 01983 RD0Reg = ARM::D0; 01984 RegsNeeded = NumBytes / 8; 01985 } else { 01986 RD0Reg = ARM::R0; 01987 RegsNeeded = NumBytes / 4; 01988 } 01989 01990 // We're going to have to strip all list operands off before 01991 // re-adding them since the order matters, so save the existing ones 01992 // for later. 01993 SmallVector<MachineOperand, 4> RegList; 01994 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) 01995 RegList.push_back(MI->getOperand(i)); 01996 01997 const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); 01998 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); 01999 02000 // Now try to find enough space in the reglist to allocate NumBytes. 02001 for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded; 02002 --CurReg) { 02003 if (!IsPop) { 02004 // Pushing any register is completely harmless, mark the 02005 // register involved as undef since we don't care about it in 02006 // the slightest. 02007 RegList.push_back(MachineOperand::CreateReg(CurReg, false, false, 02008 false, false, true)); 02009 --RegsNeeded; 02010 continue; 02011 } 02012 02013 // However, we can only pop an extra register if it's not live. For 02014 // registers live within the function we might clobber a return value 02015 // register; the other way a register can be live here is if it's 02016 // callee-saved. 02017 // TODO: Currently, computeRegisterLiveness() does not report "live" if a 02018 // sub reg is live. When computeRegisterLiveness() works for sub reg, it 02019 // can replace isAnySubRegLive(). 02020 if (isCalleeSavedRegister(CurReg, CSRegs) || 02021 isAnySubRegLive(CurReg, TRI, MI)) { 02022 // VFP pops don't allow holes in the register list, so any skip is fatal 02023 // for our transformation. GPR pops do, so we should just keep looking. 02024 if (IsVFPPushPop) 02025 return false; 02026 else 02027 continue; 02028 } 02029 02030 // Mark the unimportant registers as <def,dead> in the POP. 02031 RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false, 02032 true)); 02033 --RegsNeeded; 02034 } 02035 02036 if (RegsNeeded > 0) 02037 return false; 02038 02039 // Finally we know we can profitably perform the optimisation so go 02040 // ahead: strip all existing registers off and add them back again 02041 // in the right order. 02042 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) 02043 MI->RemoveOperand(i); 02044 02045 // Add the complete list back in. 02046 MachineInstrBuilder MIB(MF, &*MI); 02047 for (int i = RegList.size() - 1; i >= 0; --i) 02048 MIB.addOperand(RegList[i]); 02049 02050 return true; 02051 } 02052 02053 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 02054 unsigned FrameReg, int &Offset, 02055 const ARMBaseInstrInfo &TII) { 02056 unsigned Opcode = MI.getOpcode(); 02057 const MCInstrDesc &Desc = MI.getDesc(); 02058 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); 02059 bool isSub = false; 02060 02061 // Memory operands in inline assembly always use AddrMode2. 02062 if (Opcode == ARM::INLINEASM) 02063 AddrMode = ARMII::AddrMode2; 02064 02065 if (Opcode == ARM::ADDri) { 02066 Offset += MI.getOperand(FrameRegIdx+1).getImm(); 02067 if (Offset == 0) { 02068 // Turn it into a move. 02069 MI.setDesc(TII.get(ARM::MOVr)); 02070 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 02071 MI.RemoveOperand(FrameRegIdx+1); 02072 Offset = 0; 02073 return true; 02074 } else if (Offset < 0) { 02075 Offset = -Offset; 02076 isSub = true; 02077 MI.setDesc(TII.get(ARM::SUBri)); 02078 } 02079 02080 // Common case: small offset, fits into instruction. 02081 if (ARM_AM::getSOImmVal(Offset) != -1) { 02082 // Replace the FrameIndex with sp / fp 02083 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 02084 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); 02085 Offset = 0; 02086 return true; 02087 } 02088 02089 // Otherwise, pull as much of the immedidate into this ADDri/SUBri 02090 // as possible. 02091 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); 02092 unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); 02093 02094 // We will handle these bits from offset, clear them. 02095 Offset &= ~ThisImmVal; 02096 02097 // Get the properly encoded SOImmVal field. 02098 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && 02099 "Bit extraction didn't work?"); 02100 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); 02101 } else { 02102 unsigned ImmIdx = 0; 02103 int InstrOffs = 0; 02104 unsigned NumBits = 0; 02105 unsigned Scale = 1; 02106 switch (AddrMode) { 02107 case ARMII::AddrMode_i12: { 02108 ImmIdx = FrameRegIdx + 1; 02109 InstrOffs = MI.getOperand(ImmIdx).getImm(); 02110 NumBits = 12; 02111 break; 02112 } 02113 case ARMII::AddrMode2: { 02114 ImmIdx = FrameRegIdx+2; 02115 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); 02116 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 02117 InstrOffs *= -1; 02118 NumBits = 12; 02119 break; 02120 } 02121 case ARMII::AddrMode3: { 02122 ImmIdx = FrameRegIdx+2; 02123 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); 02124 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 02125 InstrOffs *= -1; 02126 NumBits = 8; 02127 break; 02128 } 02129 case ARMII::AddrMode4: 02130 case ARMII::AddrMode6: 02131 // Can't fold any offset even if it's zero. 02132 return false; 02133 case ARMII::AddrMode5: { 02134 ImmIdx = FrameRegIdx+1; 02135 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); 02136 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 02137 InstrOffs *= -1; 02138 NumBits = 8; 02139 Scale = 4; 02140 break; 02141 } 02142 default: 02143 llvm_unreachable("Unsupported addressing mode!"); 02144 } 02145 02146 Offset += InstrOffs * Scale; 02147 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); 02148 if (Offset < 0) { 02149 Offset = -Offset; 02150 isSub = true; 02151 } 02152 02153 // Attempt to fold address comp. if opcode has offset bits 02154 if (NumBits > 0) { 02155 // Common case: small offset, fits into instruction. 02156 MachineOperand &ImmOp = MI.getOperand(ImmIdx); 02157 int ImmedOffset = Offset / Scale; 02158 unsigned Mask = (1 << NumBits) - 1; 02159 if ((unsigned)Offset <= Mask * Scale) { 02160 // Replace the FrameIndex with sp 02161 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 02162 // FIXME: When addrmode2 goes away, this will simplify (like the 02163 // T2 version), as the LDR.i12 versions don't need the encoding 02164 // tricks for the offset value. 02165 if (isSub) { 02166 if (AddrMode == ARMII::AddrMode_i12) 02167 ImmedOffset = -ImmedOffset; 02168 else 02169 ImmedOffset |= 1 << NumBits; 02170 } 02171 ImmOp.ChangeToImmediate(ImmedOffset); 02172 Offset = 0; 02173 return true; 02174 } 02175 02176 // Otherwise, it didn't fit. Pull in what we can to simplify the immed. 02177 ImmedOffset = ImmedOffset & Mask; 02178 if (isSub) { 02179 if (AddrMode == ARMII::AddrMode_i12) 02180 ImmedOffset = -ImmedOffset; 02181 else 02182 ImmedOffset |= 1 << NumBits; 02183 } 02184 ImmOp.ChangeToImmediate(ImmedOffset); 02185 Offset &= ~(Mask*Scale); 02186 } 02187 } 02188 02189 Offset = (isSub) ? -Offset : Offset; 02190 return Offset == 0; 02191 } 02192 02193 /// analyzeCompare - For a comparison instruction, return the source registers 02194 /// in SrcReg and SrcReg2 if having two register operands, and the value it 02195 /// compares against in CmpValue. Return true if the comparison instruction 02196 /// can be analyzed. 02197 bool ARMBaseInstrInfo:: 02198 analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, 02199 int &CmpMask, int &CmpValue) const { 02200 switch (MI->getOpcode()) { 02201 default: break; 02202 case ARM::CMPri: 02203 case ARM::t2CMPri: 02204 SrcReg = MI->getOperand(0).getReg(); 02205 SrcReg2 = 0; 02206 CmpMask = ~0; 02207 CmpValue = MI->getOperand(1).getImm(); 02208 return true; 02209 case ARM::CMPrr: 02210 case ARM::t2CMPrr: 02211 SrcReg = MI->getOperand(0).getReg(); 02212 SrcReg2 = MI->getOperand(1).getReg(); 02213 CmpMask = ~0; 02214 CmpValue = 0; 02215 return true; 02216 case ARM::TSTri: 02217 case ARM::t2TSTri: 02218 SrcReg = MI->getOperand(0).getReg(); 02219 SrcReg2 = 0; 02220 CmpMask = MI->getOperand(1).getImm(); 02221 CmpValue = 0; 02222 return true; 02223 } 02224 02225 return false; 02226 } 02227 02228 /// isSuitableForMask - Identify a suitable 'and' instruction that 02229 /// operates on the given source register and applies the same mask 02230 /// as a 'tst' instruction. Provide a limited look-through for copies. 02231 /// When successful, MI will hold the found instruction. 02232 static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, 02233 int CmpMask, bool CommonUse) { 02234 switch (MI->getOpcode()) { 02235 case ARM::ANDri: 02236 case ARM::t2ANDri: 02237 if (CmpMask != MI->getOperand(2).getImm()) 02238 return false; 02239 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg()) 02240 return true; 02241 break; 02242 case ARM::COPY: { 02243 // Walk down one instruction which is potentially an 'and'. 02244 const MachineInstr &Copy = *MI; 02245 MachineBasicBlock::iterator AND( 02246 std::next(MachineBasicBlock::iterator(MI))); 02247 if (AND == MI->getParent()->end()) return false; 02248 MI = AND; 02249 return isSuitableForMask(MI, Copy.getOperand(0).getReg(), 02250 CmpMask, true); 02251 } 02252 } 02253 02254 return false; 02255 } 02256 02257 /// getSwappedCondition - assume the flags are set by MI(a,b), return 02258 /// the condition code if we modify the instructions such that flags are 02259 /// set by MI(b,a). 02260 inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { 02261 switch (CC) { 02262 default: return ARMCC::AL; 02263 case ARMCC::EQ: return ARMCC::EQ; 02264 case ARMCC::NE: return ARMCC::NE; 02265 case ARMCC::HS: return ARMCC::LS; 02266 case ARMCC::LO: return ARMCC::HI; 02267 case ARMCC::HI: return ARMCC::LO; 02268 case ARMCC::LS: return ARMCC::HS; 02269 case ARMCC::GE: return ARMCC::LE; 02270 case ARMCC::LT: return ARMCC::GT; 02271 case ARMCC::GT: return ARMCC::LT; 02272 case ARMCC::LE: return ARMCC::GE; 02273 } 02274 } 02275 02276 /// isRedundantFlagInstr - check whether the first instruction, whose only 02277 /// purpose is to update flags, can be made redundant. 02278 /// CMPrr can be made redundant by SUBrr if the operands are the same. 02279 /// CMPri can be made redundant by SUBri if the operands are the same. 02280 /// This function can be extended later on. 02281 inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, 02282 unsigned SrcReg2, int ImmValue, 02283 MachineInstr *OI) { 02284 if ((CmpI->getOpcode() == ARM::CMPrr || 02285 CmpI->getOpcode() == ARM::t2CMPrr) && 02286 (OI->getOpcode() == ARM::SUBrr || 02287 OI->getOpcode() == ARM::t2SUBrr) && 02288 ((OI->getOperand(1).getReg() == SrcReg && 02289 OI->getOperand(2).getReg() == SrcReg2) || 02290 (OI->getOperand(1).getReg() == SrcReg2 && 02291 OI->getOperand(2).getReg() == SrcReg))) 02292 return true; 02293 02294 if ((CmpI->getOpcode() == ARM::CMPri || 02295 CmpI->getOpcode() == ARM::t2CMPri) && 02296 (OI->getOpcode() == ARM::SUBri || 02297 OI->getOpcode() == ARM::t2SUBri) && 02298 OI->getOperand(1).getReg() == SrcReg && 02299 OI->getOperand(2).getImm() == ImmValue) 02300 return true; 02301 return false; 02302 } 02303 02304 /// optimizeCompareInstr - Convert the instruction supplying the argument to the 02305 /// comparison into one that sets the zero bit in the flags register; 02306 /// Remove a redundant Compare instruction if an earlier instruction can set the 02307 /// flags in the same way as Compare. 02308 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two 02309 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the 02310 /// condition code of instructions which use the flags. 02311 bool ARMBaseInstrInfo:: 02312 optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, 02313 int CmpMask, int CmpValue, 02314 const MachineRegisterInfo *MRI) const { 02315 // Get the unique definition of SrcReg. 02316 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 02317 if (!MI) return false; 02318 02319 // Masked compares sometimes use the same register as the corresponding 'and'. 02320 if (CmpMask != ~0) { 02321 if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) { 02322 MI = nullptr; 02323 for (MachineRegisterInfo::use_instr_iterator 02324 UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end(); 02325 UI != UE; ++UI) { 02326 if (UI->getParent() != CmpInstr->getParent()) continue; 02327 MachineInstr *PotentialAND = &*UI; 02328 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || 02329 isPredicated(PotentialAND)) 02330 continue; 02331 MI = PotentialAND; 02332 break; 02333 } 02334 if (!MI) return false; 02335 } 02336 } 02337 02338 // Get ready to iterate backward from CmpInstr. 02339 MachineBasicBlock::iterator I = CmpInstr, E = MI, 02340 B = CmpInstr->getParent()->begin(); 02341 02342 // Early exit if CmpInstr is at the beginning of the BB. 02343 if (I == B) return false; 02344 02345 // There are two possible candidates which can be changed to set CPSR: 02346 // One is MI, the other is a SUB instruction. 02347 // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). 02348 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). 02349 MachineInstr *Sub = nullptr; 02350 if (SrcReg2 != 0) 02351 // MI is not a candidate for CMPrr. 02352 MI = nullptr; 02353 else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { 02354 // Conservatively refuse to convert an instruction which isn't in the same 02355 // BB as the comparison. 02356 // For CMPri, we need to check Sub, thus we can't return here. 02357 if (CmpInstr->getOpcode() == ARM::CMPri || 02358 CmpInstr->getOpcode() == ARM::t2CMPri) 02359 MI = nullptr; 02360 else 02361 return false; 02362 } 02363 02364 // Check that CPSR isn't set between the comparison instruction and the one we 02365 // want to change. At the same time, search for Sub. 02366 const TargetRegisterInfo *TRI = &getRegisterInfo(); 02367 --I; 02368 for (; I != E; --I) { 02369 const MachineInstr &Instr = *I; 02370 02371 if (Instr.modifiesRegister(ARM::CPSR, TRI) || 02372 Instr.readsRegister(ARM::CPSR, TRI)) 02373 // This instruction modifies or uses CPSR after the one we want to 02374 // change. We can't do this transformation. 02375 return false; 02376 02377 // Check whether CmpInstr can be made redundant by the current instruction. 02378 if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) { 02379 Sub = &*I; 02380 break; 02381 } 02382 02383 if (I == B) 02384 // The 'and' is below the comparison instruction. 02385 return false; 02386 } 02387 02388 // Return false if no candidates exist. 02389 if (!MI && !Sub) 02390 return false; 02391 02392 // The single candidate is called MI. 02393 if (!MI) MI = Sub; 02394 02395 // We can't use a predicated instruction - it doesn't always write the flags. 02396 if (isPredicated(MI)) 02397 return false; 02398 02399 switch (MI->getOpcode()) { 02400 default: break; 02401 case ARM::RSBrr: 02402 case ARM::RSBri: 02403 case ARM::RSCrr: 02404 case ARM::RSCri: 02405 case ARM::ADDrr: 02406 case ARM::ADDri: 02407 case ARM::ADCrr: 02408 case ARM::ADCri: 02409 case ARM::SUBrr: 02410 case ARM::SUBri: 02411 case ARM::SBCrr: 02412 case ARM::SBCri: 02413 case ARM::t2RSBri: 02414 case ARM::t2ADDrr: 02415 case ARM::t2ADDri: 02416 case ARM::t2ADCrr: 02417 case ARM::t2ADCri: 02418 case ARM::t2SUBrr: 02419 case ARM::t2SUBri: 02420 case ARM::t2SBCrr: 02421 case ARM::t2SBCri: 02422 case ARM::ANDrr: 02423 case ARM::ANDri: 02424 case ARM::t2ANDrr: 02425 case ARM::t2ANDri: 02426 case ARM::ORRrr: 02427 case ARM::ORRri: 02428 case ARM::t2ORRrr: 02429 case ARM::t2ORRri: 02430 case ARM::EORrr: 02431 case ARM::EORri: 02432 case ARM::t2EORrr: 02433 case ARM::t2EORri: { 02434 // Scan forward for the use of CPSR 02435 // When checking against MI: if it's a conditional code requires 02436 // checking of V bit, then this is not safe to do. 02437 // It is safe to remove CmpInstr if CPSR is redefined or killed. 02438 // If we are done with the basic block, we need to check whether CPSR is 02439 // live-out. 02440 SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> 02441 OperandsToUpdate; 02442 bool isSafe = false; 02443 I = CmpInstr; 02444 E = CmpInstr->getParent()->end(); 02445 while (!isSafe && ++I != E) { 02446 const MachineInstr &Instr = *I; 02447 for (unsigned IO = 0, EO = Instr.getNumOperands(); 02448 !isSafe && IO != EO; ++IO) { 02449 const MachineOperand &MO = Instr.getOperand(IO); 02450 if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) { 02451 isSafe = true; 02452 break; 02453 } 02454 if (!MO.isReg() || MO.getReg() != ARM::CPSR) 02455 continue; 02456 if (MO.isDef()) { 02457 isSafe = true; 02458 break; 02459 } 02460 // Condition code is after the operand before CPSR except for VSELs. 02461 ARMCC::CondCodes CC; 02462 bool IsInstrVSel = true; 02463 switch (Instr.getOpcode()) { 02464 default: 02465 IsInstrVSel = false; 02466 CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm(); 02467 break; 02468 case ARM::VSELEQD: 02469 case ARM::VSELEQS: 02470 CC = ARMCC::EQ; 02471 break; 02472 case ARM::VSELGTD: 02473 case ARM::VSELGTS: 02474 CC = ARMCC::GT; 02475 break; 02476 case ARM::VSELGED: 02477 case ARM::VSELGES: 02478 CC = ARMCC::GE; 02479 break; 02480 case ARM::VSELVSS: 02481 case ARM::VSELVSD: 02482 CC = ARMCC::VS; 02483 break; 02484 } 02485 02486 if (Sub) { 02487 ARMCC::CondCodes NewCC = getSwappedCondition(CC); 02488 if (NewCC == ARMCC::AL) 02489 return false; 02490 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based 02491 // on CMP needs to be updated to be based on SUB. 02492 // Push the condition code operands to OperandsToUpdate. 02493 // If it is safe to remove CmpInstr, the condition code of these 02494 // operands will be modified. 02495 if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && 02496 Sub->getOperand(2).getReg() == SrcReg) { 02497 // VSel doesn't support condition code update. 02498 if (IsInstrVSel) 02499 return false; 02500 OperandsToUpdate.push_back( 02501 std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); 02502 } 02503 } else 02504 switch (CC) { 02505 default: 02506 // CPSR can be used multiple times, we should continue. 02507 break; 02508 case ARMCC::VS: 02509 case ARMCC::VC: 02510 case ARMCC::GE: 02511 case ARMCC::LT: 02512 case ARMCC::GT: 02513 case ARMCC::LE: 02514 return false; 02515 } 02516 } 02517 } 02518 02519 // If CPSR is not killed nor re-defined, we should check whether it is 02520 // live-out. If it is live-out, do not optimize. 02521 if (!isSafe) { 02522 MachineBasicBlock *MBB = CmpInstr->getParent(); 02523 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 02524 SE = MBB->succ_end(); SI != SE; ++SI) 02525 if ((*SI)->isLiveIn(ARM::CPSR)) 02526 return false; 02527 } 02528 02529 // Toggle the optional operand to CPSR. 02530 MI->getOperand(5).setReg(ARM::CPSR); 02531 MI->getOperand(5).setIsDef(true); 02532 assert(!isPredicated(MI) && "Can't use flags from predicated instruction"); 02533 CmpInstr->eraseFromParent(); 02534 02535 // Modify the condition code of operands in OperandsToUpdate. 02536 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to 02537 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. 02538 for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) 02539 OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); 02540 return true; 02541 } 02542 } 02543 02544 return false; 02545 } 02546 02547 bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, 02548 MachineInstr *DefMI, unsigned Reg, 02549 MachineRegisterInfo *MRI) const { 02550 // Fold large immediates into add, sub, or, xor. 02551 unsigned DefOpc = DefMI->getOpcode(); 02552 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) 02553 return false; 02554 if (!DefMI->getOperand(1).isImm()) 02555 // Could be t2MOVi32imm <ga:xx> 02556 return false; 02557 02558 if (!MRI->hasOneNonDBGUse(Reg)) 02559 return false; 02560 02561 const MCInstrDesc &DefMCID = DefMI->getDesc(); 02562 if (DefMCID.hasOptionalDef()) { 02563 unsigned NumOps = DefMCID.getNumOperands(); 02564 const MachineOperand &MO = DefMI->getOperand(NumOps-1); 02565 if (MO.getReg() == ARM::CPSR && !MO.isDead()) 02566 // If DefMI defines CPSR and it is not dead, it's obviously not safe 02567 // to delete DefMI. 02568 return false; 02569 } 02570 02571 const MCInstrDesc &UseMCID = UseMI->getDesc(); 02572 if (UseMCID.hasOptionalDef()) { 02573 unsigned NumOps = UseMCID.getNumOperands(); 02574 if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR) 02575 // If the instruction sets the flag, do not attempt this optimization 02576 // since it may change the semantics of the code. 02577 return false; 02578 } 02579 02580 unsigned UseOpc = UseMI->getOpcode(); 02581 unsigned NewUseOpc = 0; 02582 uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm(); 02583 uint32_t SOImmValV1 = 0, SOImmValV2 = 0; 02584 bool Commute = false; 02585 switch (UseOpc) { 02586 default: return false; 02587 case ARM::SUBrr: 02588 case ARM::ADDrr: 02589 case ARM::ORRrr: 02590 case ARM::EORrr: 02591 case ARM::t2SUBrr: 02592 case ARM::t2ADDrr: 02593 case ARM::t2ORRrr: 02594 case ARM::t2EORrr: { 02595 Commute = UseMI->getOperand(2).getReg() != Reg; 02596 switch (UseOpc) { 02597 default: break; 02598 case ARM::SUBrr: { 02599 if (Commute) 02600 return false; 02601 ImmVal = -ImmVal; 02602 NewUseOpc = ARM::SUBri; 02603 // Fallthrough 02604 } 02605 case ARM::ADDrr: 02606 case ARM::ORRrr: 02607 case ARM::EORrr: { 02608 if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) 02609 return false; 02610 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); 02611 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); 02612 switch (UseOpc) { 02613 default: break; 02614 case ARM::ADDrr: NewUseOpc = ARM::ADDri; break; 02615 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; 02616 case ARM::EORrr: NewUseOpc = ARM::EORri; break; 02617 } 02618 break; 02619 } 02620 case ARM::t2SUBrr: { 02621 if (Commute) 02622 return false; 02623 ImmVal = -ImmVal; 02624 NewUseOpc = ARM::t2SUBri; 02625 // Fallthrough 02626 } 02627 case ARM::t2ADDrr: 02628 case ARM::t2ORRrr: 02629 case ARM::t2EORrr: { 02630 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) 02631 return false; 02632 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); 02633 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); 02634 switch (UseOpc) { 02635 default: break; 02636 case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break; 02637 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; 02638 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; 02639 } 02640 break; 02641 } 02642 } 02643 } 02644 } 02645 02646 unsigned OpIdx = Commute ? 2 : 1; 02647 unsigned Reg1 = UseMI->getOperand(OpIdx).getReg(); 02648 bool isKill = UseMI->getOperand(OpIdx).isKill(); 02649 unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); 02650 AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(), 02651 UseMI, UseMI->getDebugLoc(), 02652 get(NewUseOpc), NewReg) 02653 .addReg(Reg1, getKillRegState(isKill)) 02654 .addImm(SOImmValV1))); 02655 UseMI->setDesc(get(NewUseOpc)); 02656 UseMI->getOperand(1).setReg(NewReg); 02657 UseMI->getOperand(1).setIsKill(); 02658 UseMI->getOperand(2).ChangeToImmediate(SOImmValV2); 02659 DefMI->eraseFromParent(); 02660 return true; 02661 } 02662 02663 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, 02664 const MachineInstr *MI) { 02665 switch (MI->getOpcode()) { 02666 default: { 02667 const MCInstrDesc &Desc = MI->getDesc(); 02668 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass()); 02669 assert(UOps >= 0 && "bad # UOps"); 02670 return UOps; 02671 } 02672 02673 case ARM::LDRrs: 02674 case ARM::LDRBrs: 02675 case ARM::STRrs: 02676 case ARM::STRBrs: { 02677 unsigned ShOpVal = MI->getOperand(3).getImm(); 02678 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 02679 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 02680 if (!isSub && 02681 (ShImm == 0 || 02682 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 02683 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 02684 return 1; 02685 return 2; 02686 } 02687 02688 case ARM::LDRH: 02689 case ARM::STRH: { 02690 if (!MI->getOperand(2).getReg()) 02691 return 1; 02692 02693 unsigned ShOpVal = MI->getOperand(3).getImm(); 02694 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 02695 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 02696 if (!isSub && 02697 (ShImm == 0 || 02698 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 02699 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 02700 return 1; 02701 return 2; 02702 } 02703 02704 case ARM::LDRSB: 02705 case ARM::LDRSH: 02706 return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2; 02707 02708 case ARM::LDRSB_POST: 02709 case ARM::LDRSH_POST: { 02710 unsigned Rt = MI->getOperand(0).getReg(); 02711 unsigned Rm = MI->getOperand(3).getReg(); 02712 return (Rt == Rm) ? 4 : 3; 02713 } 02714 02715 case ARM::LDR_PRE_REG: 02716 case ARM::LDRB_PRE_REG: { 02717 unsigned Rt = MI->getOperand(0).getReg(); 02718 unsigned Rm = MI->getOperand(3).getReg(); 02719 if (Rt == Rm) 02720 return 3; 02721 unsigned ShOpVal = MI->getOperand(4).getImm(); 02722 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 02723 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 02724 if (!isSub && 02725 (ShImm == 0 || 02726 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 02727 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 02728 return 2; 02729 return 3; 02730 } 02731 02732 case ARM::STR_PRE_REG: 02733 case ARM::STRB_PRE_REG: { 02734 unsigned ShOpVal = MI->getOperand(4).getImm(); 02735 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 02736 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 02737 if (!isSub && 02738 (ShImm == 0 || 02739 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 02740 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 02741 return 2; 02742 return 3; 02743 } 02744 02745 case ARM::LDRH_PRE: 02746 case ARM::STRH_PRE: { 02747 unsigned Rt = MI->getOperand(0).getReg(); 02748 unsigned Rm = MI->getOperand(3).getReg(); 02749 if (!Rm) 02750 return 2; 02751 if (Rt == Rm) 02752 return 3; 02753 return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) 02754 ? 3 : 2; 02755 } 02756 02757 case ARM::LDR_POST_REG: 02758 case ARM::LDRB_POST_REG: 02759 case ARM::LDRH_POST: { 02760 unsigned Rt = MI->getOperand(0).getReg(); 02761 unsigned Rm = MI->getOperand(3).getReg(); 02762 return (Rt == Rm) ? 3 : 2; 02763 } 02764 02765 case ARM::LDR_PRE_IMM: 02766 case ARM::LDRB_PRE_IMM: 02767 case ARM::LDR_POST_IMM: 02768 case ARM::LDRB_POST_IMM: 02769 case ARM::STRB_POST_IMM: 02770 case ARM::STRB_POST_REG: 02771 case ARM::STRB_PRE_IMM: 02772 case ARM::STRH_POST: 02773 case ARM::STR_POST_IMM: 02774 case ARM::STR_POST_REG: 02775 case ARM::STR_PRE_IMM: 02776 return 2; 02777 02778 case ARM::LDRSB_PRE: 02779 case ARM::LDRSH_PRE: { 02780 unsigned Rm = MI->getOperand(3).getReg(); 02781 if (Rm == 0) 02782 return 3; 02783 unsigned Rt = MI->getOperand(0).getReg(); 02784 if (Rt == Rm) 02785 return 4; 02786 unsigned ShOpVal = MI->getOperand(4).getImm(); 02787 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 02788 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 02789 if (!isSub && 02790 (ShImm == 0 || 02791 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 02792 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 02793 return 3; 02794 return 4; 02795 } 02796 02797 case ARM::LDRD: { 02798 unsigned Rt = MI->getOperand(0).getReg(); 02799 unsigned Rn = MI->getOperand(2).getReg(); 02800 unsigned Rm = MI->getOperand(3).getReg(); 02801 if (Rm) 02802 return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; 02803 return (Rt == Rn) ? 3 : 2; 02804 } 02805 02806 case ARM::STRD: { 02807 unsigned Rm = MI->getOperand(3).getReg(); 02808 if (Rm) 02809 return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; 02810 return 2; 02811 } 02812 02813 case ARM::LDRD_POST: 02814 case ARM::t2LDRD_POST: 02815 return 3; 02816 02817 case ARM::STRD_POST: 02818 case ARM::t2STRD_POST: 02819 return 4; 02820 02821 case ARM::LDRD_PRE: { 02822 unsigned Rt = MI->getOperand(0).getReg(); 02823 unsigned Rn = MI->getOperand(3).getReg(); 02824 unsigned Rm = MI->getOperand(4).getReg(); 02825 if (Rm) 02826 return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; 02827 return (Rt == Rn) ? 4 : 3; 02828 } 02829 02830 case ARM::t2LDRD_PRE: { 02831 unsigned Rt = MI->getOperand(0).getReg(); 02832 unsigned Rn = MI->getOperand(3).getReg(); 02833 return (Rt == Rn) ? 4 : 3; 02834 } 02835 02836 case ARM::STRD_PRE: { 02837 unsigned Rm = MI->getOperand(4).getReg(); 02838 if (Rm) 02839 return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; 02840 return 3; 02841 } 02842 02843 case ARM::t2STRD_PRE: 02844 return 3; 02845 02846 case ARM::t2LDR_POST: 02847 case ARM::t2LDRB_POST: 02848 case ARM::t2LDRB_PRE: 02849 case ARM::t2LDRSBi12: 02850 case ARM::t2LDRSBi8: 02851 case ARM::t2LDRSBpci: 02852 case ARM::t2LDRSBs: 02853 case ARM::t2LDRH_POST: 02854 case ARM::t2LDRH_PRE: 02855 case ARM::t2LDRSBT: 02856 case ARM::t2LDRSB_POST: 02857 case ARM::t2LDRSB_PRE: 02858 case ARM::t2LDRSH_POST: 02859 case ARM::t2LDRSH_PRE: 02860 case ARM::t2LDRSHi12: 02861 case ARM::t2LDRSHi8: 02862 case ARM::t2LDRSHpci: 02863 case ARM::t2LDRSHs: 02864 return 2; 02865 02866 case ARM::t2LDRDi8: { 02867 unsigned Rt = MI->getOperand(0).getReg(); 02868 unsigned Rn = MI->getOperand(2).getReg(); 02869 return (Rt == Rn) ? 3 : 2; 02870 } 02871 02872 case ARM::t2STRB_POST: 02873 case ARM::t2STRB_PRE: 02874 case ARM::t2STRBs: 02875 case ARM::t2STRDi8: 02876 case ARM::t2STRH_POST: 02877 case ARM::t2STRH_PRE: 02878 case ARM::t2STRHs: 02879 case ARM::t2STR_POST: 02880 case ARM::t2STR_PRE: 02881 case ARM::t2STRs: 02882 return 2; 02883 } 02884 } 02885 02886 // Return the number of 32-bit words loaded by LDM or stored by STM. If this 02887 // can't be easily determined return 0 (missing MachineMemOperand). 02888 // 02889 // FIXME: The current MachineInstr design does not support relying on machine 02890 // mem operands to determine the width of a memory access. Instead, we expect 02891 // the target to provide this information based on the instruction opcode and 02892 // operands. However, using MachineMemOperand is the best solution now for 02893 // two reasons: 02894 // 02895 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI 02896 // operands. This is much more dangerous than using the MachineMemOperand 02897 // sizes because CodeGen passes can insert/remove optional machine operands. In 02898 // fact, it's totally incorrect for preRA passes and appears to be wrong for 02899 // postRA passes as well. 02900 // 02901 // 2) getNumLDMAddresses is only used by the scheduling machine model and any 02902 // machine model that calls this should handle the unknown (zero size) case. 02903 // 02904 // Long term, we should require a target hook that verifies MachineMemOperand 02905 // sizes during MC lowering. That target hook should be local to MC lowering 02906 // because we can't ensure that it is aware of other MI forms. Doing this will 02907 // ensure that MachineMemOperands are correctly propagated through all passes. 02908 unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const { 02909 unsigned Size = 0; 02910 for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), 02911 E = MI->memoperands_end(); I != E; ++I) { 02912 Size += (*I)->getSize(); 02913 } 02914 return Size / 4; 02915 } 02916 02917 unsigned 02918 ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, 02919 const MachineInstr *MI) const { 02920 if (!ItinData || ItinData->isEmpty()) 02921 return 1; 02922 02923 const MCInstrDesc &Desc = MI->getDesc(); 02924 unsigned Class = Desc.getSchedClass(); 02925 int ItinUOps = ItinData->getNumMicroOps(Class); 02926 if (ItinUOps >= 0) { 02927 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) 02928 return getNumMicroOpsSwiftLdSt(ItinData, MI); 02929 02930 return ItinUOps; 02931 } 02932 02933 unsigned Opc = MI->getOpcode(); 02934 switch (Opc) { 02935 default: 02936 llvm_unreachable("Unexpected multi-uops instruction!"); 02937 case ARM::VLDMQIA: 02938 case ARM::VSTMQIA: 02939 return 2; 02940 02941 // The number of uOps for load / store multiple are determined by the number 02942 // registers. 02943 // 02944 // On Cortex-A8, each pair of register loads / stores can be scheduled on the 02945 // same cycle. The scheduling for the first load / store must be done 02946 // separately by assuming the address is not 64-bit aligned. 02947 // 02948 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address 02949 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON 02950 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. 02951 case ARM::VLDMDIA: 02952 case ARM::VLDMDIA_UPD: 02953 case ARM::VLDMDDB_UPD: 02954 case ARM::VLDMSIA: 02955 case ARM::VLDMSIA_UPD: 02956 case ARM::VLDMSDB_UPD: 02957 case ARM::VSTMDIA: 02958 case ARM::VSTMDIA_UPD: 02959 case ARM::VSTMDDB_UPD: 02960 case ARM::VSTMSIA: 02961 case ARM::VSTMSIA_UPD: 02962 case ARM::VSTMSDB_UPD: { 02963 unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); 02964 return (NumRegs / 2) + (NumRegs % 2) + 1; 02965 } 02966 02967 case ARM::LDMIA_RET: 02968 case ARM::LDMIA: 02969 case ARM::LDMDA: 02970 case ARM::LDMDB: 02971 case ARM::LDMIB: 02972 case ARM::LDMIA_UPD: 02973 case ARM::LDMDA_UPD: 02974 case ARM::LDMDB_UPD: 02975 case ARM::LDMIB_UPD: 02976 case ARM::STMIA: 02977 case ARM::STMDA: 02978 case ARM::STMDB: 02979 case ARM::STMIB: 02980 case ARM::STMIA_UPD: 02981 case ARM::STMDA_UPD: 02982 case ARM::STMDB_UPD: 02983 case ARM::STMIB_UPD: 02984 case ARM::tLDMIA: 02985 case ARM::tLDMIA_UPD: 02986 case ARM::tSTMIA_UPD: 02987 case ARM::tPOP_RET: 02988 case ARM::tPOP: 02989 case ARM::tPUSH: 02990 case ARM::t2LDMIA_RET: 02991 case ARM::t2LDMIA: 02992 case ARM::t2LDMDB: 02993 case ARM::t2LDMIA_UPD: 02994 case ARM::t2LDMDB_UPD: 02995 case ARM::t2STMIA: 02996 case ARM::t2STMDB: 02997 case ARM::t2STMIA_UPD: 02998 case ARM::t2STMDB_UPD: { 02999 unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; 03000 if (Subtarget.isSwift()) { 03001 int UOps = 1 + NumRegs; // One for address computation, one for each ld / st. 03002 switch (Opc) { 03003 default: break; 03004 case ARM::VLDMDIA_UPD: 03005 case ARM::VLDMDDB_UPD: 03006 case ARM::VLDMSIA_UPD: 03007 case ARM::VLDMSDB_UPD: 03008 case ARM::VSTMDIA_UPD: 03009 case ARM::VSTMDDB_UPD: 03010 case ARM::VSTMSIA_UPD: 03011 case ARM::VSTMSDB_UPD: 03012 case ARM::LDMIA_UPD: 03013 case ARM::LDMDA_UPD: 03014 case ARM::LDMDB_UPD: 03015 case ARM::LDMIB_UPD: 03016 case ARM::STMIA_UPD: 03017 case ARM::STMDA_UPD: 03018 case ARM::STMDB_UPD: 03019 case ARM::STMIB_UPD: 03020 case ARM::tLDMIA_UPD: 03021 case ARM::tSTMIA_UPD: 03022 case ARM::t2LDMIA_UPD: 03023 case ARM::t2LDMDB_UPD: 03024 case ARM::t2STMIA_UPD: 03025 case ARM::t2STMDB_UPD: 03026 ++UOps; // One for base register writeback. 03027 break; 03028 case ARM::LDMIA_RET: 03029 case ARM::tPOP_RET: 03030 case ARM::t2LDMIA_RET: 03031 UOps += 2; // One for base reg wb, one for write to pc. 03032 break; 03033 } 03034 return UOps; 03035 } else if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 03036 if (NumRegs < 4) 03037 return 2; 03038 // 4 registers would be issued: 2, 2. 03039 // 5 registers would be issued: 2, 2, 1. 03040 int A8UOps = (NumRegs / 2); 03041 if (NumRegs % 2) 03042 ++A8UOps; 03043 return A8UOps; 03044 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 03045 int A9UOps = (NumRegs / 2); 03046 // If there are odd number of registers or if it's not 64-bit aligned, 03047 // then it takes an extra AGU (Address Generation Unit) cycle. 03048 if ((NumRegs % 2) || 03049 !MI->hasOneMemOperand() || 03050 (*MI->memoperands_begin())->getAlignment() < 8) 03051 ++A9UOps; 03052 return A9UOps; 03053 } else { 03054 // Assume the worst. 03055 return NumRegs; 03056 } 03057 } 03058 } 03059 } 03060 03061 int 03062 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, 03063 const MCInstrDesc &DefMCID, 03064 unsigned DefClass, 03065 unsigned DefIdx, unsigned DefAlign) const { 03066 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 03067 if (RegNo <= 0) 03068 // Def is the address writeback. 03069 return ItinData->getOperandCycle(DefClass, DefIdx); 03070 03071 int DefCycle; 03072 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 03073 // (regno / 2) + (regno % 2) + 1 03074 DefCycle = RegNo / 2 + 1; 03075 if (RegNo % 2) 03076 ++DefCycle; 03077 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 03078 DefCycle = RegNo; 03079 bool isSLoad = false; 03080 03081 switch (DefMCID.getOpcode()) { 03082 default: break; 03083 case ARM::VLDMSIA: 03084 case ARM::VLDMSIA_UPD: 03085 case ARM::VLDMSDB_UPD: 03086 isSLoad = true; 03087 break; 03088 } 03089 03090 // If there are odd number of 'S' registers or if it's not 64-bit aligned, 03091 // then it takes an extra cycle. 03092 if ((isSLoad && (RegNo % 2)) || DefAlign < 8) 03093 ++DefCycle; 03094 } else { 03095 // Assume the worst. 03096 DefCycle = RegNo + 2; 03097 } 03098 03099 return DefCycle; 03100 } 03101 03102 int 03103 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, 03104 const MCInstrDesc &DefMCID, 03105 unsigned DefClass, 03106 unsigned DefIdx, unsigned DefAlign) const { 03107 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 03108 if (RegNo <= 0) 03109 // Def is the address writeback. 03110 return ItinData->getOperandCycle(DefClass, DefIdx); 03111 03112 int DefCycle; 03113 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 03114 // 4 registers would be issued: 1, 2, 1. 03115 // 5 registers would be issued: 1, 2, 2. 03116 DefCycle = RegNo / 2; 03117 if (DefCycle < 1) 03118 DefCycle = 1; 03119 // Result latency is issue cycle + 2: E2. 03120 DefCycle += 2; 03121 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 03122 DefCycle = (RegNo / 2); 03123 // If there are odd number of registers or if it's not 64-bit aligned, 03124 // then it takes an extra AGU (Address Generation Unit) cycle. 03125 if ((RegNo % 2) || DefAlign < 8) 03126 ++DefCycle; 03127 // Result latency is AGU cycles + 2. 03128 DefCycle += 2; 03129 } else { 03130 // Assume the worst. 03131 DefCycle = RegNo + 2; 03132 } 03133 03134 return DefCycle; 03135 } 03136 03137 int 03138 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, 03139 const MCInstrDesc &UseMCID, 03140 unsigned UseClass, 03141 unsigned UseIdx, unsigned UseAlign) const { 03142 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 03143 if (RegNo <= 0) 03144 return ItinData->getOperandCycle(UseClass, UseIdx); 03145 03146 int UseCycle; 03147 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 03148 // (regno / 2) + (regno % 2) + 1 03149 UseCycle = RegNo / 2 + 1; 03150 if (RegNo % 2) 03151 ++UseCycle; 03152 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 03153 UseCycle = RegNo; 03154 bool isSStore = false; 03155 03156 switch (UseMCID.getOpcode()) { 03157 default: break; 03158 case ARM::VSTMSIA: 03159 case ARM::VSTMSIA_UPD: 03160 case ARM::VSTMSDB_UPD: 03161 isSStore = true; 03162 break; 03163 } 03164 03165 // If there are odd number of 'S' registers or if it's not 64-bit aligned, 03166 // then it takes an extra cycle. 03167 if ((isSStore && (RegNo % 2)) || UseAlign < 8) 03168 ++UseCycle; 03169 } else { 03170 // Assume the worst. 03171 UseCycle = RegNo + 2; 03172 } 03173 03174 return UseCycle; 03175 } 03176 03177 int 03178 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, 03179 const MCInstrDesc &UseMCID, 03180 unsigned UseClass, 03181 unsigned UseIdx, unsigned UseAlign) const { 03182 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 03183 if (RegNo <= 0) 03184 return ItinData->getOperandCycle(UseClass, UseIdx); 03185 03186 int UseCycle; 03187 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 03188 UseCycle = RegNo / 2; 03189 if (UseCycle < 2) 03190 UseCycle = 2; 03191 // Read in E3. 03192 UseCycle += 2; 03193 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 03194 UseCycle = (RegNo / 2); 03195 // If there are odd number of registers or if it's not 64-bit aligned, 03196 // then it takes an extra AGU (Address Generation Unit) cycle. 03197 if ((RegNo % 2) || UseAlign < 8) 03198 ++UseCycle; 03199 } else { 03200 // Assume the worst. 03201 UseCycle = 1; 03202 } 03203 return UseCycle; 03204 } 03205 03206 int 03207 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 03208 const MCInstrDesc &DefMCID, 03209 unsigned DefIdx, unsigned DefAlign, 03210 const MCInstrDesc &UseMCID, 03211 unsigned UseIdx, unsigned UseAlign) const { 03212 unsigned DefClass = DefMCID.getSchedClass(); 03213 unsigned UseClass = UseMCID.getSchedClass(); 03214 03215 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) 03216 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); 03217 03218 // This may be a def / use of a variable_ops instruction, the operand 03219 // latency might be determinable dynamically. Let the target try to 03220 // figure it out. 03221 int DefCycle = -1; 03222 bool LdmBypass = false; 03223 switch (DefMCID.getOpcode()) { 03224 default: 03225 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 03226 break; 03227 03228 case ARM::VLDMDIA: 03229 case ARM::VLDMDIA_UPD: 03230 case ARM::VLDMDDB_UPD: 03231 case ARM::VLDMSIA: 03232 case ARM::VLDMSIA_UPD: 03233 case ARM::VLDMSDB_UPD: 03234 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 03235 break; 03236 03237 case ARM::LDMIA_RET: 03238 case ARM::LDMIA: 03239 case ARM::LDMDA: 03240 case ARM::LDMDB: 03241 case ARM::LDMIB: 03242 case ARM::LDMIA_UPD: 03243 case ARM::LDMDA_UPD: 03244 case ARM::LDMDB_UPD: 03245 case ARM::LDMIB_UPD: 03246 case ARM::tLDMIA: 03247 case ARM::tLDMIA_UPD: 03248 case ARM::tPUSH: 03249 case ARM::t2LDMIA_RET: 03250 case ARM::t2LDMIA: 03251 case ARM::t2LDMDB: 03252 case ARM::t2LDMIA_UPD: 03253 case ARM::t2LDMDB_UPD: 03254 LdmBypass = 1; 03255 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 03256 break; 03257 } 03258 03259 if (DefCycle == -1) 03260 // We can't seem to determine the result latency of the def, assume it's 2. 03261 DefCycle = 2; 03262 03263 int UseCycle = -1; 03264 switch (UseMCID.getOpcode()) { 03265 default: 03266 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); 03267 break; 03268 03269 case ARM::VSTMDIA: 03270 case ARM::VSTMDIA_UPD: 03271 case ARM::VSTMDDB_UPD: 03272 case ARM::VSTMSIA: 03273 case ARM::VSTMSIA_UPD: 03274 case ARM::VSTMSDB_UPD: 03275 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 03276 break; 03277 03278 case ARM::STMIA: 03279 case ARM::STMDA: 03280 case ARM::STMDB: 03281 case ARM::STMIB: 03282 case ARM::STMIA_UPD: 03283 case ARM::STMDA_UPD: 03284 case ARM::STMDB_UPD: 03285 case ARM::STMIB_UPD: 03286 case ARM::tSTMIA_UPD: 03287 case ARM::tPOP_RET: 03288 case ARM::tPOP: 03289 case ARM::t2STMIA: 03290 case ARM::t2STMDB: 03291 case ARM::t2STMIA_UPD: 03292 case ARM::t2STMDB_UPD: 03293 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 03294 break; 03295 } 03296 03297 if (UseCycle == -1) 03298 // Assume it's read in the first stage. 03299 UseCycle = 1; 03300 03301 UseCycle = DefCycle - UseCycle + 1; 03302 if (UseCycle > 0) { 03303 if (LdmBypass) { 03304 // It's a variable_ops instruction so we can't use DefIdx here. Just use 03305 // first def operand. 03306 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1, 03307 UseClass, UseIdx)) 03308 --UseCycle; 03309 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, 03310 UseClass, UseIdx)) { 03311 --UseCycle; 03312 } 03313 } 03314 03315 return UseCycle; 03316 } 03317 03318 static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, 03319 const MachineInstr *MI, unsigned Reg, 03320 unsigned &DefIdx, unsigned &Dist) { 03321 Dist = 0; 03322 03323 MachineBasicBlock::const_iterator I = MI; ++I; 03324 MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator()); 03325 assert(II->isInsideBundle() && "Empty bundle?"); 03326 03327 int Idx = -1; 03328 while (II->isInsideBundle()) { 03329 Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI); 03330 if (Idx != -1) 03331 break; 03332 --II; 03333 ++Dist; 03334 } 03335 03336 assert(Idx != -1 && "Cannot find bundled definition!"); 03337 DefIdx = Idx; 03338 return II; 03339 } 03340 03341 static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, 03342 const MachineInstr *MI, unsigned Reg, 03343 unsigned &UseIdx, unsigned &Dist) { 03344 Dist = 0; 03345 03346 MachineBasicBlock::const_instr_iterator II = MI; ++II; 03347 assert(II->isInsideBundle() && "Empty bundle?"); 03348 MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); 03349 03350 // FIXME: This doesn't properly handle multiple uses. 03351 int Idx = -1; 03352 while (II != E && II->isInsideBundle()) { 03353 Idx = II->findRegisterUseOperandIdx(Reg, false, TRI); 03354 if (Idx != -1) 03355 break; 03356 if (II->getOpcode() != ARM::t2IT) 03357 ++Dist; 03358 ++II; 03359 } 03360 03361 if (Idx == -1) { 03362 Dist = 0; 03363 return nullptr; 03364 } 03365 03366 UseIdx = Idx; 03367 return II; 03368 } 03369 03370 /// Return the number of cycles to add to (or subtract from) the static 03371 /// itinerary based on the def opcode and alignment. The caller will ensure that 03372 /// adjusted latency is at least one cycle. 03373 static int adjustDefLatency(const ARMSubtarget &Subtarget, 03374 const MachineInstr *DefMI, 03375 const MCInstrDesc *DefMCID, unsigned DefAlign) { 03376 int Adjust = 0; 03377 if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) { 03378 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 03379 // variants are one cycle cheaper. 03380 switch (DefMCID->getOpcode()) { 03381 default: break; 03382 case ARM::LDRrs: 03383 case ARM::LDRBrs: { 03384 unsigned ShOpVal = DefMI->getOperand(3).getImm(); 03385 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 03386 if (ShImm == 0 || 03387 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 03388 --Adjust; 03389 break; 03390 } 03391 case ARM::t2LDRs: 03392 case ARM::t2LDRBs: 03393 case ARM::t2LDRHs: 03394 case ARM::t2LDRSHs: { 03395 // Thumb2 mode: lsl only. 03396 unsigned ShAmt = DefMI->getOperand(3).getImm(); 03397 if (ShAmt == 0 || ShAmt == 2) 03398 --Adjust; 03399 break; 03400 } 03401 } 03402 } else if (Subtarget.isSwift()) { 03403 // FIXME: Properly handle all of the latency adjustments for address 03404 // writeback. 03405 switch (DefMCID->getOpcode()) { 03406 default: break; 03407 case ARM::LDRrs: 03408 case ARM::LDRBrs: { 03409 unsigned ShOpVal = DefMI->getOperand(3).getImm(); 03410 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 03411 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 03412 if (!isSub && 03413 (ShImm == 0 || 03414 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 03415 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 03416 Adjust -= 2; 03417 else if (!isSub && 03418 ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) 03419 --Adjust; 03420 break; 03421 } 03422 case ARM::t2LDRs: 03423 case ARM::t2LDRBs: 03424 case ARM::t2LDRHs: 03425 case ARM::t2LDRSHs: { 03426 // Thumb2 mode: lsl only. 03427 unsigned ShAmt = DefMI->getOperand(3).getImm(); 03428 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) 03429 Adjust -= 2; 03430 break; 03431 } 03432 } 03433 } 03434 03435 if (DefAlign < 8 && Subtarget.isLikeA9()) { 03436 switch (DefMCID->getOpcode()) { 03437 default: break; 03438 case ARM::VLD1q8: 03439 case ARM::VLD1q16: 03440 case ARM::VLD1q32: 03441 case ARM::VLD1q64: 03442 case ARM::VLD1q8wb_fixed: 03443 case ARM::VLD1q16wb_fixed: 03444 case ARM::VLD1q32wb_fixed: 03445 case ARM::VLD1q64wb_fixed: 03446 case ARM::VLD1q8wb_register: 03447 case ARM::VLD1q16wb_register: 03448 case ARM::VLD1q32wb_register: 03449 case ARM::VLD1q64wb_register: 03450 case ARM::VLD2d8: 03451 case ARM::VLD2d16: 03452 case ARM::VLD2d32: 03453 case ARM::VLD2q8: 03454 case ARM::VLD2q16: 03455 case ARM::VLD2q32: 03456 case ARM::VLD2d8wb_fixed: 03457 case ARM::VLD2d16wb_fixed: 03458 case ARM::VLD2d32wb_fixed: 03459 case ARM::VLD2q8wb_fixed: 03460 case ARM::VLD2q16wb_fixed: 03461 case ARM::VLD2q32wb_fixed: 03462 case ARM::VLD2d8wb_register: 03463 case ARM::VLD2d16wb_register: 03464 case ARM::VLD2d32wb_register: 03465 case ARM::VLD2q8wb_register: 03466 case ARM::VLD2q16wb_register: 03467 case ARM::VLD2q32wb_register: 03468 case ARM::VLD3d8: 03469 case ARM::VLD3d16: 03470 case ARM::VLD3d32: 03471 case ARM::VLD1d64T: 03472 case ARM::VLD3d8_UPD: 03473 case ARM::VLD3d16_UPD: 03474 case ARM::VLD3d32_UPD: 03475 case ARM::VLD1d64Twb_fixed: 03476 case ARM::VLD1d64Twb_register: 03477 case ARM::VLD3q8_UPD: 03478 case ARM::VLD3q16_UPD: 03479 case ARM::VLD3q32_UPD: 03480 case ARM::VLD4d8: 03481 case ARM::VLD4d16: 03482 case ARM::VLD4d32: 03483 case ARM::VLD1d64Q: 03484 case ARM::VLD4d8_UPD: 03485 case ARM::VLD4d16_UPD: 03486 case ARM::VLD4d32_UPD: 03487 case ARM::VLD1d64Qwb_fixed: 03488 case ARM::VLD1d64Qwb_register: 03489 case ARM::VLD4q8_UPD: 03490 case ARM::VLD4q16_UPD: 03491 case ARM::VLD4q32_UPD: 03492 case ARM::VLD1DUPq8: 03493 case ARM::VLD1DUPq16: 03494 case ARM::VLD1DUPq32: 03495 case ARM::VLD1DUPq8wb_fixed: 03496 case ARM::VLD1DUPq16wb_fixed: 03497 case ARM::VLD1DUPq32wb_fixed: 03498 case ARM::VLD1DUPq8wb_register: 03499 case ARM::VLD1DUPq16wb_register: 03500 case ARM::VLD1DUPq32wb_register: 03501 case ARM::VLD2DUPd8: 03502 case ARM::VLD2DUPd16: 03503 case ARM::VLD2DUPd32: 03504 case ARM::VLD2DUPd8wb_fixed: 03505 case ARM::VLD2DUPd16wb_fixed: 03506 case ARM::VLD2DUPd32wb_fixed: 03507 case ARM::VLD2DUPd8wb_register: 03508 case ARM::VLD2DUPd16wb_register: 03509 case ARM::VLD2DUPd32wb_register: 03510 case ARM::VLD4DUPd8: 03511 case ARM::VLD4DUPd16: 03512 case ARM::VLD4DUPd32: 03513 case ARM::VLD4DUPd8_UPD: 03514 case ARM::VLD4DUPd16_UPD: 03515 case ARM::VLD4DUPd32_UPD: 03516 case ARM::VLD1LNd8: 03517 case ARM::VLD1LNd16: 03518 case ARM::VLD1LNd32: 03519 case ARM::VLD1LNd8_UPD: 03520 case ARM::VLD1LNd16_UPD: 03521 case ARM::VLD1LNd32_UPD: 03522 case ARM::VLD2LNd8: 03523 case ARM::VLD2LNd16: 03524 case ARM::VLD2LNd32: 03525 case ARM::VLD2LNq16: 03526 case ARM::VLD2LNq32: 03527 case ARM::VLD2LNd8_UPD: 03528 case ARM::VLD2LNd16_UPD: 03529 case ARM::VLD2LNd32_UPD: 03530 case ARM::VLD2LNq16_UPD: 03531 case ARM::VLD2LNq32_UPD: 03532 case ARM::VLD4LNd8: 03533 case ARM::VLD4LNd16: 03534 case ARM::VLD4LNd32: 03535 case ARM::VLD4LNq16: 03536 case ARM::VLD4LNq32: 03537 case ARM::VLD4LNd8_UPD: 03538 case ARM::VLD4LNd16_UPD: 03539 case ARM::VLD4LNd32_UPD: 03540 case ARM::VLD4LNq16_UPD: 03541 case ARM::VLD4LNq32_UPD: 03542 // If the address is not 64-bit aligned, the latencies of these 03543 // instructions increases by one. 03544 ++Adjust; 03545 break; 03546 } 03547 } 03548 return Adjust; 03549 } 03550 03551 03552 03553 int 03554 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 03555 const MachineInstr *DefMI, unsigned DefIdx, 03556 const MachineInstr *UseMI, 03557 unsigned UseIdx) const { 03558 // No operand latency. The caller may fall back to getInstrLatency. 03559 if (!ItinData || ItinData->isEmpty()) 03560 return -1; 03561 03562 const MachineOperand &DefMO = DefMI->getOperand(DefIdx); 03563 unsigned Reg = DefMO.getReg(); 03564 const MCInstrDesc *DefMCID = &DefMI->getDesc(); 03565 const MCInstrDesc *UseMCID = &UseMI->getDesc(); 03566 03567 unsigned DefAdj = 0; 03568 if (DefMI->isBundle()) { 03569 DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj); 03570 DefMCID = &DefMI->getDesc(); 03571 } 03572 if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || 03573 DefMI->isRegSequence() || DefMI->isImplicitDef()) { 03574 return 1; 03575 } 03576 03577 unsigned UseAdj = 0; 03578 if (UseMI->isBundle()) { 03579 unsigned NewUseIdx; 03580 const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, 03581 Reg, NewUseIdx, UseAdj); 03582 if (!NewUseMI) 03583 return -1; 03584 03585 UseMI = NewUseMI; 03586 UseIdx = NewUseIdx; 03587 UseMCID = &UseMI->getDesc(); 03588 } 03589 03590 if (Reg == ARM::CPSR) { 03591 if (DefMI->getOpcode() == ARM::FMSTAT) { 03592 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) 03593 return Subtarget.isLikeA9() ? 1 : 20; 03594 } 03595 03596 // CPSR set and branch can be paired in the same cycle. 03597 if (UseMI->isBranch()) 03598 return 0; 03599 03600 // Otherwise it takes the instruction latency (generally one). 03601 unsigned Latency = getInstrLatency(ItinData, DefMI); 03602 03603 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to 03604 // its uses. Instructions which are otherwise scheduled between them may 03605 // incur a code size penalty (not able to use the CPSR setting 16-bit 03606 // instructions). 03607 if (Latency > 0 && Subtarget.isThumb2()) { 03608 const MachineFunction *MF = DefMI->getParent()->getParent(); 03609 if (MF->getFunction()->getAttributes(). 03610 hasAttribute(AttributeSet::FunctionIndex, 03611 Attribute::OptimizeForSize)) 03612 --Latency; 03613 } 03614 return Latency; 03615 } 03616 03617 if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit()) 03618 return -1; 03619 03620 unsigned DefAlign = DefMI->hasOneMemOperand() 03621 ? (*DefMI->memoperands_begin())->getAlignment() : 0; 03622 unsigned UseAlign = UseMI->hasOneMemOperand() 03623 ? (*UseMI->memoperands_begin())->getAlignment() : 0; 03624 03625 // Get the itinerary's latency if possible, and handle variable_ops. 03626 int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign, 03627 *UseMCID, UseIdx, UseAlign); 03628 // Unable to find operand latency. The caller may resort to getInstrLatency. 03629 if (Latency < 0) 03630 return Latency; 03631 03632 // Adjust for IT block position. 03633 int Adj = DefAdj + UseAdj; 03634 03635 // Adjust for dynamic def-side opcode variants not captured by the itinerary. 03636 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign); 03637 if (Adj >= 0 || (int)Latency > -Adj) { 03638 return Latency + Adj; 03639 } 03640 // Return the itinerary latency, which may be zero but not less than zero. 03641 return Latency; 03642 } 03643 03644 int 03645 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 03646 SDNode *DefNode, unsigned DefIdx, 03647 SDNode *UseNode, unsigned UseIdx) const { 03648 if (!DefNode->isMachineOpcode()) 03649 return 1; 03650 03651 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode()); 03652 03653 if (isZeroCost(DefMCID.Opcode)) 03654 return 0; 03655 03656 if (!ItinData || ItinData->isEmpty()) 03657 return DefMCID.mayLoad() ? 3 : 1; 03658 03659 if (!UseNode->isMachineOpcode()) { 03660 int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); 03661 if (Subtarget.isLikeA9() || Subtarget.isSwift()) 03662 return Latency <= 2 ? 1 : Latency - 1; 03663 else 03664 return Latency <= 3 ? 1 : Latency - 2; 03665 } 03666 03667 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); 03668 const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode); 03669 unsigned DefAlign = !DefMN->memoperands_empty() 03670 ? (*DefMN->memoperands_begin())->getAlignment() : 0; 03671 const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode); 03672 unsigned UseAlign = !UseMN->memoperands_empty() 03673 ? (*UseMN->memoperands_begin())->getAlignment() : 0; 03674 int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, 03675 UseMCID, UseIdx, UseAlign); 03676 03677 if (Latency > 1 && 03678 (Subtarget.isCortexA8() || Subtarget.isLikeA9() || 03679 Subtarget.isCortexA7())) { 03680 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 03681 // variants are one cycle cheaper. 03682 switch (DefMCID.getOpcode()) { 03683 default: break; 03684 case ARM::LDRrs: 03685 case ARM::LDRBrs: { 03686 unsigned ShOpVal = 03687 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 03688 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 03689 if (ShImm == 0 || 03690 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 03691 --Latency; 03692 break; 03693 } 03694 case ARM::t2LDRs: 03695 case ARM::t2LDRBs: 03696 case ARM::t2LDRHs: 03697 case ARM::t2LDRSHs: { 03698 // Thumb2 mode: lsl only. 03699 unsigned ShAmt = 03700 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 03701 if (ShAmt == 0 || ShAmt == 2) 03702 --Latency; 03703 break; 03704 } 03705 } 03706 } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) { 03707 // FIXME: Properly handle all of the latency adjustments for address 03708 // writeback. 03709 switch (DefMCID.getOpcode()) { 03710 default: break; 03711 case ARM::LDRrs: 03712 case ARM::LDRBrs: { 03713 unsigned ShOpVal = 03714 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 03715 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 03716 if (ShImm == 0 || 03717 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 03718 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 03719 Latency -= 2; 03720 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) 03721 --Latency; 03722 break; 03723 } 03724 case ARM::t2LDRs: 03725 case ARM::t2LDRBs: 03726 case ARM::t2LDRHs: 03727 case ARM::t2LDRSHs: { 03728 // Thumb2 mode: lsl 0-3 only. 03729 Latency -= 2; 03730 break; 03731 } 03732 } 03733 } 03734 03735 if (DefAlign < 8 && Subtarget.isLikeA9()) 03736 switch (DefMCID.getOpcode()) { 03737 default: break; 03738 case ARM::VLD1q8: 03739 case ARM::VLD1q16: 03740 case ARM::VLD1q32: 03741 case ARM::VLD1q64: 03742 case ARM::VLD1q8wb_register: 03743 case ARM::VLD1q16wb_register: 03744 case ARM::VLD1q32wb_register: 03745 case ARM::VLD1q64wb_register: 03746 case ARM::VLD1q8wb_fixed: 03747 case ARM::VLD1q16wb_fixed: 03748 case ARM::VLD1q32wb_fixed: 03749 case ARM::VLD1q64wb_fixed: 03750 case ARM::VLD2d8: 03751 case ARM::VLD2d16: 03752 case ARM::VLD2d32: 03753 case ARM::VLD2q8Pseudo: 03754 case ARM::VLD2q16Pseudo: 03755 case ARM::VLD2q32Pseudo: 03756 case ARM::VLD2d8wb_fixed: 03757 case ARM::VLD2d16wb_fixed: 03758 case ARM::VLD2d32wb_fixed: 03759 case ARM::VLD2q8PseudoWB_fixed: 03760 case ARM::VLD2q16PseudoWB_fixed: 03761 case ARM::VLD2q32PseudoWB_fixed: 03762 case ARM::VLD2d8wb_register: 03763 case ARM::VLD2d16wb_register: 03764 case ARM::VLD2d32wb_register: 03765 case ARM::VLD2q8PseudoWB_register: 03766 case ARM::VLD2q16PseudoWB_register: 03767 case ARM::VLD2q32PseudoWB_register: 03768 case ARM::VLD3d8Pseudo: 03769 case ARM::VLD3d16Pseudo: 03770 case ARM::VLD3d32Pseudo: 03771 case ARM::VLD1d64TPseudo: 03772 case ARM::VLD1d64TPseudoWB_fixed: 03773 case ARM::VLD3d8Pseudo_UPD: 03774 case ARM::VLD3d16Pseudo_UPD: 03775 case ARM::VLD3d32Pseudo_UPD: 03776 case ARM::VLD3q8Pseudo_UPD: 03777 case ARM::VLD3q16Pseudo_UPD: 03778 case ARM::VLD3q32Pseudo_UPD: 03779 case ARM::VLD3q8oddPseudo: 03780 case ARM::VLD3q16oddPseudo: 03781 case ARM::VLD3q32oddPseudo: 03782 case ARM::VLD3q8oddPseudo_UPD: 03783 case ARM::VLD3q16oddPseudo_UPD: 03784 case ARM::VLD3q32oddPseudo_UPD: 03785 case ARM::VLD4d8Pseudo: 03786 case ARM::VLD4d16Pseudo: 03787 case ARM::VLD4d32Pseudo: 03788 case ARM::VLD1d64QPseudo: 03789 case ARM::VLD1d64QPseudoWB_fixed: 03790 case ARM::VLD4d8Pseudo_UPD: 03791 case ARM::VLD4d16Pseudo_UPD: 03792 case ARM::VLD4d32Pseudo_UPD: 03793 case ARM::VLD4q8Pseudo_UPD: 03794 case ARM::VLD4q16Pseudo_UPD: 03795 case ARM::VLD4q32Pseudo_UPD: 03796 case ARM::VLD4q8oddPseudo: 03797 case ARM::VLD4q16oddPseudo: 03798 case ARM::VLD4q32oddPseudo: 03799 case ARM::VLD4q8oddPseudo_UPD: 03800 case ARM::VLD4q16oddPseudo_UPD: 03801 case ARM::VLD4q32oddPseudo_UPD: 03802 case ARM::VLD1DUPq8: 03803 case ARM::VLD1DUPq16: 03804 case ARM::VLD1DUPq32: 03805 case ARM::VLD1DUPq8wb_fixed: 03806 case ARM::VLD1DUPq16wb_fixed: 03807 case ARM::VLD1DUPq32wb_fixed: 03808 case ARM::VLD1DUPq8wb_register: 03809 case ARM::VLD1DUPq16wb_register: 03810 case ARM::VLD1DUPq32wb_register: 03811 case ARM::VLD2DUPd8: 03812 case ARM::VLD2DUPd16: 03813 case ARM::VLD2DUPd32: 03814 case ARM::VLD2DUPd8wb_fixed: 03815 case ARM::VLD2DUPd16wb_fixed: 03816 case ARM::VLD2DUPd32wb_fixed: 03817 case ARM::VLD2DUPd8wb_register: 03818 case ARM::VLD2DUPd16wb_register: 03819 case ARM::VLD2DUPd32wb_register: 03820 case ARM::VLD4DUPd8Pseudo: 03821 case ARM::VLD4DUPd16Pseudo: 03822 case ARM::VLD4DUPd32Pseudo: 03823 case ARM::VLD4DUPd8Pseudo_UPD: 03824 case ARM::VLD4DUPd16Pseudo_UPD: 03825 case ARM::VLD4DUPd32Pseudo_UPD: 03826 case ARM::VLD1LNq8Pseudo: 03827 case ARM::VLD1LNq16Pseudo: 03828 case ARM::VLD1LNq32Pseudo: 03829 case ARM::VLD1LNq8Pseudo_UPD: 03830 case ARM::VLD1LNq16Pseudo_UPD: 03831 case ARM::VLD1LNq32Pseudo_UPD: 03832 case ARM::VLD2LNd8Pseudo: 03833 case ARM::VLD2LNd16Pseudo: 03834 case ARM::VLD2LNd32Pseudo: 03835 case ARM::VLD2LNq16Pseudo: 03836 case ARM::VLD2LNq32Pseudo: 03837 case ARM::VLD2LNd8Pseudo_UPD: 03838 case ARM::VLD2LNd16Pseudo_UPD: 03839 case ARM::VLD2LNd32Pseudo_UPD: 03840 case ARM::VLD2LNq16Pseudo_UPD: 03841 case ARM::VLD2LNq32Pseudo_UPD: 03842 case ARM::VLD4LNd8Pseudo: 03843 case ARM::VLD4LNd16Pseudo: 03844 case ARM::VLD4LNd32Pseudo: 03845 case ARM::VLD4LNq16Pseudo: 03846 case ARM::VLD4LNq32Pseudo: 03847 case ARM::VLD4LNd8Pseudo_UPD: 03848 case ARM::VLD4LNd16Pseudo_UPD: 03849 case ARM::VLD4LNd32Pseudo_UPD: 03850 case ARM::VLD4LNq16Pseudo_UPD: 03851 case ARM::VLD4LNq32Pseudo_UPD: 03852 // If the address is not 64-bit aligned, the latencies of these 03853 // instructions increases by one. 03854 ++Latency; 03855 break; 03856 } 03857 03858 return Latency; 03859 } 03860 03861 unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const { 03862 if (MI->isCopyLike() || MI->isInsertSubreg() || 03863 MI->isRegSequence() || MI->isImplicitDef()) 03864 return 0; 03865 03866 if (MI->isBundle()) 03867 return 0; 03868 03869 const MCInstrDesc &MCID = MI->getDesc(); 03870 03871 if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) { 03872 // When predicated, CPSR is an additional source operand for CPSR updating 03873 // instructions, this apparently increases their latencies. 03874 return 1; 03875 } 03876 return 0; 03877 } 03878 03879 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 03880 const MachineInstr *MI, 03881 unsigned *PredCost) const { 03882 if (MI->isCopyLike() || MI->isInsertSubreg() || 03883 MI->isRegSequence() || MI->isImplicitDef()) 03884 return 1; 03885 03886 // An instruction scheduler typically runs on unbundled instructions, however 03887 // other passes may query the latency of a bundled instruction. 03888 if (MI->isBundle()) { 03889 unsigned Latency = 0; 03890 MachineBasicBlock::const_instr_iterator I = MI; 03891 MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); 03892 while (++I != E && I->isInsideBundle()) { 03893 if (I->getOpcode() != ARM::t2IT) 03894 Latency += getInstrLatency(ItinData, I, PredCost); 03895 } 03896 return Latency; 03897 } 03898 03899 const MCInstrDesc &MCID = MI->getDesc(); 03900 if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) { 03901 // When predicated, CPSR is an additional source operand for CPSR updating 03902 // instructions, this apparently increases their latencies. 03903 *PredCost = 1; 03904 } 03905 // Be sure to call getStageLatency for an empty itinerary in case it has a 03906 // valid MinLatency property. 03907 if (!ItinData) 03908 return MI->mayLoad() ? 3 : 1; 03909 03910 unsigned Class = MCID.getSchedClass(); 03911 03912 // For instructions with variable uops, use uops as latency. 03913 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0) 03914 return getNumMicroOps(ItinData, MI); 03915 03916 // For the common case, fall back on the itinerary's latency. 03917 unsigned Latency = ItinData->getStageLatency(Class); 03918 03919 // Adjust for dynamic def-side opcode variants not captured by the itinerary. 03920 unsigned DefAlign = MI->hasOneMemOperand() 03921 ? (*MI->memoperands_begin())->getAlignment() : 0; 03922 int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign); 03923 if (Adj >= 0 || (int)Latency > -Adj) { 03924 return Latency + Adj; 03925 } 03926 return Latency; 03927 } 03928 03929 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 03930 SDNode *Node) const { 03931 if (!Node->isMachineOpcode()) 03932 return 1; 03933 03934 if (!ItinData || ItinData->isEmpty()) 03935 return 1; 03936 03937 unsigned Opcode = Node->getMachineOpcode(); 03938 switch (Opcode) { 03939 default: 03940 return ItinData->getStageLatency(get(Opcode).getSchedClass()); 03941 case ARM::VLDMQIA: 03942 case ARM::VSTMQIA: 03943 return 2; 03944 } 03945 } 03946 03947 bool ARMBaseInstrInfo:: 03948 hasHighOperandLatency(const InstrItineraryData *ItinData, 03949 const MachineRegisterInfo *MRI, 03950 const MachineInstr *DefMI, unsigned DefIdx, 03951 const MachineInstr *UseMI, unsigned UseIdx) const { 03952 unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; 03953 unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask; 03954 if (Subtarget.isCortexA8() && 03955 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) 03956 // CortexA8 VFP instructions are not pipelined. 03957 return true; 03958 03959 // Hoist VFP / NEON instructions with 4 or higher latency. 03960 int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); 03961 if (Latency < 0) 03962 Latency = getInstrLatency(ItinData, DefMI); 03963 if (Latency <= 3) 03964 return false; 03965 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || 03966 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; 03967 } 03968 03969 bool ARMBaseInstrInfo:: 03970 hasLowDefLatency(const InstrItineraryData *ItinData, 03971 const MachineInstr *DefMI, unsigned DefIdx) const { 03972 if (!ItinData || ItinData->isEmpty()) 03973 return false; 03974 03975 unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; 03976 if (DDomain == ARMII::DomainGeneral) { 03977 unsigned DefClass = DefMI->getDesc().getSchedClass(); 03978 int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 03979 return (DefCycle != -1 && DefCycle <= 2); 03980 } 03981 return false; 03982 } 03983 03984 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI, 03985 StringRef &ErrInfo) const { 03986 if (convertAddSubFlagsOpcode(MI->getOpcode())) { 03987 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG"; 03988 return false; 03989 } 03990 return true; 03991 } 03992 03993 // LoadStackGuard has so far only been implemented for MachO. Different code 03994 // sequence is needed for other targets. 03995 void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, 03996 unsigned LoadImmOpc, 03997 unsigned LoadOpc, 03998 Reloc::Model RM) const { 03999 MachineBasicBlock &MBB = *MI->getParent(); 04000 DebugLoc DL = MI->getDebugLoc(); 04001 unsigned Reg = MI->getOperand(0).getReg(); 04002 const GlobalValue *GV = 04003 cast<GlobalValue>((*MI->memoperands_begin())->getValue()); 04004 MachineInstrBuilder MIB; 04005 04006 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg) 04007 .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY); 04008 04009 if (Subtarget.GVIsIndirectSymbol(GV, RM)) { 04010 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); 04011 MIB.addReg(Reg, RegState::Kill).addImm(0); 04012 unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; 04013 MachineMemOperand *MMO = MBB.getParent()-> 04014 getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 4, 4); 04015 MIB.addMemOperand(MMO); 04016 AddDefaultPred(MIB); 04017 } 04018 04019 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); 04020 MIB.addReg(Reg, RegState::Kill).addImm(0); 04021 MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 04022 AddDefaultPred(MIB); 04023 } 04024 04025 bool 04026 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, 04027 unsigned &AddSubOpc, 04028 bool &NegAcc, bool &HasLane) const { 04029 DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode); 04030 if (I == MLxEntryMap.end()) 04031 return false; 04032 04033 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; 04034 MulOpc = Entry.MulOpc; 04035 AddSubOpc = Entry.AddSubOpc; 04036 NegAcc = Entry.NegAcc; 04037 HasLane = Entry.HasLane; 04038 return true; 04039 } 04040 04041 //===----------------------------------------------------------------------===// 04042 // Execution domains. 04043 //===----------------------------------------------------------------------===// 04044 // 04045 // Some instructions go down the NEON pipeline, some go down the VFP pipeline, 04046 // and some can go down both. The vmov instructions go down the VFP pipeline, 04047 // but they can be changed to vorr equivalents that are executed by the NEON 04048 // pipeline. 04049 // 04050 // We use the following execution domain numbering: 04051 // 04052 enum ARMExeDomain { 04053 ExeGeneric = 0, 04054 ExeVFP = 1, 04055 ExeNEON = 2 04056 }; 04057 // 04058 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h 04059 // 04060 std::pair<uint16_t, uint16_t> 04061 ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { 04062 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON 04063 // if they are not predicated. 04064 if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) 04065 return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); 04066 04067 // CortexA9 is particularly picky about mixing the two and wants these 04068 // converted. 04069 if (Subtarget.isCortexA9() && !isPredicated(MI) && 04070 (MI->getOpcode() == ARM::VMOVRS || 04071 MI->getOpcode() == ARM::VMOVSR || 04072 MI->getOpcode() == ARM::VMOVS)) 04073 return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); 04074 04075 // No other instructions can be swizzled, so just determine their domain. 04076 unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; 04077 04078 if (Domain & ARMII::DomainNEON) 04079 return std::make_pair(ExeNEON, 0); 04080 04081 // Certain instructions can go either way on Cortex-A8. 04082 // Treat them as NEON instructions. 04083 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()) 04084 return std::make_pair(ExeNEON, 0); 04085 04086 if (Domain & ARMII::DomainVFP) 04087 return std::make_pair(ExeVFP, 0); 04088 04089 return std::make_pair(ExeGeneric, 0); 04090 } 04091 04092 static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, 04093 unsigned SReg, unsigned &Lane) { 04094 unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass); 04095 Lane = 0; 04096 04097 if (DReg != ARM::NoRegister) 04098 return DReg; 04099 04100 Lane = 1; 04101 DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass); 04102 04103 assert(DReg && "S-register with no D super-register?"); 04104 return DReg; 04105 } 04106 04107 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, 04108 /// set ImplicitSReg to a register number that must be marked as implicit-use or 04109 /// zero if no register needs to be defined as implicit-use. 04110 /// 04111 /// If the function cannot determine if an SPR should be marked implicit use or 04112 /// not, it returns false. 04113 /// 04114 /// This function handles cases where an instruction is being modified from taking 04115 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict 04116 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other 04117 /// lane of the DPR). 04118 /// 04119 /// If the other SPR is defined, an implicit-use of it should be added. Else, 04120 /// (including the case where the DPR itself is defined), it should not. 04121 /// 04122 static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, 04123 MachineInstr *MI, 04124 unsigned DReg, unsigned Lane, 04125 unsigned &ImplicitSReg) { 04126 // If the DPR is defined or used already, the other SPR lane will be chained 04127 // correctly, so there is nothing to be done. 04128 if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) { 04129 ImplicitSReg = 0; 04130 return true; 04131 } 04132 04133 // Otherwise we need to go searching to see if the SPR is set explicitly. 04134 ImplicitSReg = TRI->getSubReg(DReg, 04135 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); 04136 MachineBasicBlock::LivenessQueryResult LQR = 04137 MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI); 04138 04139 if (LQR == MachineBasicBlock::LQR_Live) 04140 return true; 04141 else if (LQR == MachineBasicBlock::LQR_Unknown) 04142 return false; 04143 04144 // If the register is known not to be live, there is no need to add an 04145 // implicit-use. 04146 ImplicitSReg = 0; 04147 return true; 04148 } 04149 04150 void 04151 ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { 04152 unsigned DstReg, SrcReg, DReg; 04153 unsigned Lane; 04154 MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 04155 const TargetRegisterInfo *TRI = &getRegisterInfo(); 04156 switch (MI->getOpcode()) { 04157 default: 04158 llvm_unreachable("cannot handle opcode!"); 04159 break; 04160 case ARM::VMOVD: 04161 if (Domain != ExeNEON) 04162 break; 04163 04164 // Zap the predicate operands. 04165 assert(!isPredicated(MI) && "Cannot predicate a VORRd"); 04166 04167 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) 04168 DstReg = MI->getOperand(0).getReg(); 04169 SrcReg = MI->getOperand(1).getReg(); 04170 04171 for (unsigned i = MI->getDesc().getNumOperands(); i; --i) 04172 MI->RemoveOperand(i-1); 04173 04174 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) 04175 MI->setDesc(get(ARM::VORRd)); 04176 AddDefaultPred(MIB.addReg(DstReg, RegState::Define) 04177 .addReg(SrcReg) 04178 .addReg(SrcReg)); 04179 break; 04180 case ARM::VMOVRS: 04181 if (Domain != ExeNEON) 04182 break; 04183 assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); 04184 04185 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) 04186 DstReg = MI->getOperand(0).getReg(); 04187 SrcReg = MI->getOperand(1).getReg(); 04188 04189 for (unsigned i = MI->getDesc().getNumOperands(); i; --i) 04190 MI->RemoveOperand(i-1); 04191 04192 DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); 04193 04194 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) 04195 // Note that DSrc has been widened and the other lane may be undef, which 04196 // contaminates the entire register. 04197 MI->setDesc(get(ARM::VGETLNi32)); 04198 AddDefaultPred(MIB.addReg(DstReg, RegState::Define) 04199 .addReg(DReg, RegState::Undef) 04200 .addImm(Lane)); 04201 04202 // The old source should be an implicit use, otherwise we might think it 04203 // was dead before here. 04204 MIB.addReg(SrcReg, RegState::Implicit); 04205 break; 04206 case ARM::VMOVSR: { 04207 if (Domain != ExeNEON) 04208 break; 04209 assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); 04210 04211 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) 04212 DstReg = MI->getOperand(0).getReg(); 04213 SrcReg = MI->getOperand(1).getReg(); 04214 04215 DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); 04216 04217 unsigned ImplicitSReg; 04218 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) 04219 break; 04220 04221 for (unsigned i = MI->getDesc().getNumOperands(); i; --i) 04222 MI->RemoveOperand(i-1); 04223 04224 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) 04225 // Again DDst may be undefined at the beginning of this instruction. 04226 MI->setDesc(get(ARM::VSETLNi32)); 04227 MIB.addReg(DReg, RegState::Define) 04228 .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI))) 04229 .addReg(SrcReg) 04230 .addImm(Lane); 04231 AddDefaultPred(MIB); 04232 04233 // The narrower destination must be marked as set to keep previous chains 04234 // in place. 04235 MIB.addReg(DstReg, RegState::Define | RegState::Implicit); 04236 if (ImplicitSReg != 0) 04237 MIB.addReg(ImplicitSReg, RegState::Implicit); 04238 break; 04239 } 04240 case ARM::VMOVS: { 04241 if (Domain != ExeNEON) 04242 break; 04243 04244 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) 04245 DstReg = MI->getOperand(0).getReg(); 04246 SrcReg = MI->getOperand(1).getReg(); 04247 04248 unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; 04249 DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); 04250 DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); 04251 04252 unsigned ImplicitSReg; 04253 if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg)) 04254 break; 04255 04256 for (unsigned i = MI->getDesc().getNumOperands(); i; --i) 04257 MI->RemoveOperand(i-1); 04258 04259 if (DSrc == DDst) { 04260 // Destination can be: 04261 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) 04262 MI->setDesc(get(ARM::VDUPLN32d)); 04263 MIB.addReg(DDst, RegState::Define) 04264 .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI))) 04265 .addImm(SrcLane); 04266 AddDefaultPred(MIB); 04267 04268 // Neither the source or the destination are naturally represented any 04269 // more, so add them in manually. 04270 MIB.addReg(DstReg, RegState::Implicit | RegState::Define); 04271 MIB.addReg(SrcReg, RegState::Implicit); 04272 if (ImplicitSReg != 0) 04273 MIB.addReg(ImplicitSReg, RegState::Implicit); 04274 break; 04275 } 04276 04277 // In general there's no single instruction that can perform an S <-> S 04278 // move in NEON space, but a pair of VEXT instructions *can* do the 04279 // job. It turns out that the VEXTs needed will only use DSrc once, with 04280 // the position based purely on the combination of lane-0 and lane-1 04281 // involved. For example 04282 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 04283 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 04284 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 04285 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 04286 // 04287 // Pattern of the MachineInstrs is: 04288 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) 04289 MachineInstrBuilder NewMIB; 04290 NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 04291 get(ARM::VEXTd32), DDst); 04292 04293 // On the first instruction, both DSrc and DDst may be <undef> if present. 04294 // Specifically when the original instruction didn't have them as an 04295 // <imp-use>. 04296 unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; 04297 bool CurUndef = !MI->readsRegister(CurReg, TRI); 04298 NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); 04299 04300 CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; 04301 CurUndef = !MI->readsRegister(CurReg, TRI); 04302 NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); 04303 04304 NewMIB.addImm(1); 04305 AddDefaultPred(NewMIB); 04306 04307 if (SrcLane == DstLane) 04308 NewMIB.addReg(SrcReg, RegState::Implicit); 04309 04310 MI->setDesc(get(ARM::VEXTd32)); 04311 MIB.addReg(DDst, RegState::Define); 04312 04313 // On the second instruction, DDst has definitely been defined above, so 04314 // it is not <undef>. DSrc, if present, can be <undef> as above. 04315 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; 04316 CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); 04317 MIB.addReg(CurReg, getUndefRegState(CurUndef)); 04318 04319 CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; 04320 CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); 04321 MIB.addReg(CurReg, getUndefRegState(CurUndef)); 04322 04323 MIB.addImm(1); 04324 AddDefaultPred(MIB); 04325 04326 if (SrcLane != DstLane) 04327 MIB.addReg(SrcReg, RegState::Implicit); 04328 04329 // As before, the original destination is no longer represented, add it 04330 // implicitly. 04331 MIB.addReg(DstReg, RegState::Define | RegState::Implicit); 04332 if (ImplicitSReg != 0) 04333 MIB.addReg(ImplicitSReg, RegState::Implicit); 04334 break; 04335 } 04336 } 04337 04338 } 04339 04340 //===----------------------------------------------------------------------===// 04341 // Partial register updates 04342 //===----------------------------------------------------------------------===// 04343 // 04344 // Swift renames NEON registers with 64-bit granularity. That means any 04345 // instruction writing an S-reg implicitly reads the containing D-reg. The 04346 // problem is mostly avoided by translating f32 operations to v2f32 operations 04347 // on D-registers, but f32 loads are still a problem. 04348 // 04349 // These instructions can load an f32 into a NEON register: 04350 // 04351 // VLDRS - Only writes S, partial D update. 04352 // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. 04353 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. 04354 // 04355 // FCONSTD can be used as a dependency-breaking instruction. 04356 unsigned ARMBaseInstrInfo:: 04357 getPartialRegUpdateClearance(const MachineInstr *MI, 04358 unsigned OpNum, 04359 const TargetRegisterInfo *TRI) const { 04360 if (!SwiftPartialUpdateClearance || 04361 !(Subtarget.isSwift() || Subtarget.isCortexA15())) 04362 return 0; 04363 04364 assert(TRI && "Need TRI instance"); 04365 04366 const MachineOperand &MO = MI->getOperand(OpNum); 04367 if (MO.readsReg()) 04368 return 0; 04369 unsigned Reg = MO.getReg(); 04370 int UseOp = -1; 04371 04372 switch(MI->getOpcode()) { 04373 // Normal instructions writing only an S-register. 04374 case ARM::VLDRS: 04375 case ARM::FCONSTS: 04376 case ARM::VMOVSR: 04377 case ARM::VMOVv8i8: 04378 case ARM::VMOVv4i16: 04379 case ARM::VMOVv2i32: 04380 case ARM::VMOVv2f32: 04381 case ARM::VMOVv1i64: 04382 UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI); 04383 break; 04384 04385 // Explicitly reads the dependency. 04386 case ARM::VLD1LNd32: 04387 UseOp = 3; 04388 break; 04389 default: 04390 return 0; 04391 } 04392 04393 // If this instruction actually reads a value from Reg, there is no unwanted 04394 // dependency. 04395 if (UseOp != -1 && MI->getOperand(UseOp).readsReg()) 04396 return 0; 04397 04398 // We must be able to clobber the whole D-reg. 04399 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 04400 // Virtual register must be a foo:ssub_0<def,undef> operand. 04401 if (!MO.getSubReg() || MI->readsVirtualRegister(Reg)) 04402 return 0; 04403 } else if (ARM::SPRRegClass.contains(Reg)) { 04404 // Physical register: MI must define the full D-reg. 04405 unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, 04406 &ARM::DPRRegClass); 04407 if (!DReg || !MI->definesRegister(DReg, TRI)) 04408 return 0; 04409 } 04410 04411 // MI has an unwanted D-register dependency. 04412 // Avoid defs in the previous N instructrions. 04413 return SwiftPartialUpdateClearance; 04414 } 04415 04416 // Break a partial register dependency after getPartialRegUpdateClearance 04417 // returned non-zero. 04418 void ARMBaseInstrInfo:: 04419 breakPartialRegDependency(MachineBasicBlock::iterator MI, 04420 unsigned OpNum, 04421 const TargetRegisterInfo *TRI) const { 04422 assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def"); 04423 assert(TRI && "Need TRI instance"); 04424 04425 const MachineOperand &MO = MI->getOperand(OpNum); 04426 unsigned Reg = MO.getReg(); 04427 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 04428 "Can't break virtual register dependencies."); 04429 unsigned DReg = Reg; 04430 04431 // If MI defines an S-reg, find the corresponding D super-register. 04432 if (ARM::SPRRegClass.contains(Reg)) { 04433 DReg = ARM::D0 + (Reg - ARM::S0) / 2; 04434 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken"); 04435 } 04436 04437 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps"); 04438 assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); 04439 04440 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines 04441 // the full D-register by loading the same value to both lanes. The 04442 // instruction is micro-coded with 2 uops, so don't do this until we can 04443 // properly schedule micro-coded instructions. The dispatcher stalls cause 04444 // too big regressions. 04445 04446 // Insert the dependency-breaking FCONSTD before MI. 04447 // 96 is the encoding of 0.5, but the actual value doesn't matter here. 04448 AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 04449 get(ARM::FCONSTD), DReg).addImm(96)); 04450 MI->addRegisterKilled(DReg, TRI, true); 04451 } 04452 04453 void ARMBaseInstrInfo::getUnconditionalBranch( 04454 MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const { 04455 if (Subtarget.isThumb()) 04456 Branch.setOpcode(ARM::tB); 04457 else if (Subtarget.isThumb2()) 04458 Branch.setOpcode(ARM::t2B); 04459 else 04460 Branch.setOpcode(ARM::Bcc); 04461 04462 Branch.addOperand(MCOperand::CreateExpr(BranchTarget)); 04463 Branch.addOperand(MCOperand::CreateImm(ARMCC::AL)); 04464 Branch.addOperand(MCOperand::CreateReg(0)); 04465 } 04466 04467 void ARMBaseInstrInfo::getTrap(MCInst &MI) const { 04468 if (Subtarget.isThumb()) 04469 MI.setOpcode(ARM::tTRAP); 04470 else if (Subtarget.useNaClTrap()) 04471 MI.setOpcode(ARM::TRAPNaCl); 04472 else 04473 MI.setOpcode(ARM::TRAP); 04474 } 04475 04476 bool ARMBaseInstrInfo::hasNOP() const { 04477 return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; 04478 } 04479 04480 bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { 04481 if (MI->getNumOperands() < 4) 04482 return true; 04483 unsigned ShOpVal = MI->getOperand(3).getImm(); 04484 unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal); 04485 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. 04486 if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) || 04487 ((ShImm == 1 || ShImm == 2) && 04488 ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl)) 04489 return true; 04490 04491 return false; 04492 } 04493 04494 bool ARMBaseInstrInfo::getRegSequenceLikeInputs( 04495 const MachineInstr &MI, unsigned DefIdx, 04496 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { 04497 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); 04498 assert(MI.isRegSequenceLike() && "Invalid kind of instruction"); 04499 04500 switch (MI.getOpcode()) { 04501 case ARM::VMOVDRR: 04502 // dX = VMOVDRR rY, rZ 04503 // is the same as: 04504 // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1 04505 // Populate the InputRegs accordingly. 04506 // rY 04507 const MachineOperand *MOReg = &MI.getOperand(1); 04508 InputRegs.push_back( 04509 RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0)); 04510 // rZ 04511 MOReg = &MI.getOperand(2); 04512 InputRegs.push_back( 04513 RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1)); 04514 return true; 04515 } 04516 llvm_unreachable("Target dependent opcode missing"); 04517 } 04518 04519 bool ARMBaseInstrInfo::getExtractSubregLikeInputs( 04520 const MachineInstr &MI, unsigned DefIdx, 04521 RegSubRegPairAndIdx &InputReg) const { 04522 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); 04523 assert(MI.isExtractSubregLike() && "Invalid kind of instruction"); 04524 04525 switch (MI.getOpcode()) { 04526 case ARM::VMOVRRD: 04527 // rX, rY = VMOVRRD dZ 04528 // is the same as: 04529 // rX = EXTRACT_SUBREG dZ, ssub_0 04530 // rY = EXTRACT_SUBREG dZ, ssub_1 04531 const MachineOperand &MOReg = MI.getOperand(2); 04532 InputReg.Reg = MOReg.getReg(); 04533 InputReg.SubReg = MOReg.getSubReg(); 04534 InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1; 04535 return true; 04536 } 04537 llvm_unreachable("Target dependent opcode missing"); 04538 } 04539 04540 bool ARMBaseInstrInfo::getInsertSubregLikeInputs( 04541 const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, 04542 RegSubRegPairAndIdx &InsertedReg) const { 04543 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); 04544 assert(MI.isInsertSubregLike() && "Invalid kind of instruction"); 04545 04546 switch (MI.getOpcode()) { 04547 case ARM::VSETLNi32: 04548 // dX = VSETLNi32 dY, rZ, imm 04549 const MachineOperand &MOBaseReg = MI.getOperand(1); 04550 const MachineOperand &MOInsertedReg = MI.getOperand(2); 04551 const MachineOperand &MOIndex = MI.getOperand(3); 04552 BaseReg.Reg = MOBaseReg.getReg(); 04553 BaseReg.SubReg = MOBaseReg.getSubReg(); 04554 04555 InsertedReg.Reg = MOInsertedReg.getReg(); 04556 InsertedReg.SubReg = MOInsertedReg.getSubReg(); 04557 InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1; 04558 return true; 04559 } 04560 llvm_unreachable("Target dependent opcode missing"); 04561 }