LLVM API Documentation

ARMBaseInstrInfo.cpp
Go to the documentation of this file.
00001 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains the Base ARM implementation of the TargetInstrInfo class.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "ARM.h"
00015 #include "ARMBaseInstrInfo.h"
00016 #include "ARMBaseRegisterInfo.h"
00017 #include "ARMConstantPoolValue.h"
00018 #include "ARMFeatures.h"
00019 #include "ARMHazardRecognizer.h"
00020 #include "ARMMachineFunctionInfo.h"
00021 #include "MCTargetDesc/ARMAddressingModes.h"
00022 #include "llvm/ADT/STLExtras.h"
00023 #include "llvm/CodeGen/LiveVariables.h"
00024 #include "llvm/CodeGen/MachineConstantPool.h"
00025 #include "llvm/CodeGen/MachineFrameInfo.h"
00026 #include "llvm/CodeGen/MachineInstrBuilder.h"
00027 #include "llvm/CodeGen/MachineJumpTableInfo.h"
00028 #include "llvm/CodeGen/MachineMemOperand.h"
00029 #include "llvm/CodeGen/MachineRegisterInfo.h"
00030 #include "llvm/CodeGen/SelectionDAGNodes.h"
00031 #include "llvm/IR/Constants.h"
00032 #include "llvm/IR/Function.h"
00033 #include "llvm/IR/GlobalValue.h"
00034 #include "llvm/MC/MCAsmInfo.h"
00035 #include "llvm/MC/MCExpr.h"
00036 #include "llvm/Support/BranchProbability.h"
00037 #include "llvm/Support/CommandLine.h"
00038 #include "llvm/Support/Debug.h"
00039 #include "llvm/Support/ErrorHandling.h"
00040 
00041 using namespace llvm;
00042 
00043 #define DEBUG_TYPE "arm-instrinfo"
00044 
00045 #define GET_INSTRINFO_CTOR_DTOR
00046 #include "ARMGenInstrInfo.inc"
00047 
00048 static cl::opt<bool>
00049 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
00050                cl::desc("Enable ARM 2-addr to 3-addr conv"));
00051 
00052 static cl::opt<bool>
00053 WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
00054            cl::desc("Widen ARM vmovs to vmovd when possible"));
00055 
00056 static cl::opt<unsigned>
00057 SwiftPartialUpdateClearance("swift-partial-update-clearance",
00058      cl::Hidden, cl::init(12),
00059      cl::desc("Clearance before partial register updates"));
00060 
00061 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
00062 struct ARM_MLxEntry {
00063   uint16_t MLxOpc;     // MLA / MLS opcode
00064   uint16_t MulOpc;     // Expanded multiplication opcode
00065   uint16_t AddSubOpc;  // Expanded add / sub opcode
00066   bool NegAcc;         // True if the acc is negated before the add / sub.
00067   bool HasLane;        // True if instruction has an extra "lane" operand.
00068 };
00069 
00070 static const ARM_MLxEntry ARM_MLxTable[] = {
00071   // MLxOpc,          MulOpc,           AddSubOpc,       NegAcc, HasLane
00072   // fp scalar ops
00073   { ARM::VMLAS,       ARM::VMULS,       ARM::VADDS,      false,  false },
00074   { ARM::VMLSS,       ARM::VMULS,       ARM::VSUBS,      false,  false },
00075   { ARM::VMLAD,       ARM::VMULD,       ARM::VADDD,      false,  false },
00076   { ARM::VMLSD,       ARM::VMULD,       ARM::VSUBD,      false,  false },
00077   { ARM::VNMLAS,      ARM::VNMULS,      ARM::VSUBS,      true,   false },
00078   { ARM::VNMLSS,      ARM::VMULS,       ARM::VSUBS,      true,   false },
00079   { ARM::VNMLAD,      ARM::VNMULD,      ARM::VSUBD,      true,   false },
00080   { ARM::VNMLSD,      ARM::VMULD,       ARM::VSUBD,      true,   false },
00081 
00082   // fp SIMD ops
00083   { ARM::VMLAfd,      ARM::VMULfd,      ARM::VADDfd,     false,  false },
00084   { ARM::VMLSfd,      ARM::VMULfd,      ARM::VSUBfd,     false,  false },
00085   { ARM::VMLAfq,      ARM::VMULfq,      ARM::VADDfq,     false,  false },
00086   { ARM::VMLSfq,      ARM::VMULfq,      ARM::VSUBfq,     false,  false },
00087   { ARM::VMLAslfd,    ARM::VMULslfd,    ARM::VADDfd,     false,  true  },
00088   { ARM::VMLSslfd,    ARM::VMULslfd,    ARM::VSUBfd,     false,  true  },
00089   { ARM::VMLAslfq,    ARM::VMULslfq,    ARM::VADDfq,     false,  true  },
00090   { ARM::VMLSslfq,    ARM::VMULslfq,    ARM::VSUBfq,     false,  true  },
00091 };
00092 
00093 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
00094   : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
00095     Subtarget(STI) {
00096   for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
00097     if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
00098       assert(false && "Duplicated entries?");
00099     MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
00100     MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
00101   }
00102 }
00103 
00104 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
00105 // currently defaults to no prepass hazard recognizer.
00106 ScheduleHazardRecognizer *
00107 ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
00108                                                const ScheduleDAG *DAG) const {
00109   if (usePreRAHazardRecognizer()) {
00110     const InstrItineraryData *II =
00111         static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
00112     return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
00113   }
00114   return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
00115 }
00116 
00117 ScheduleHazardRecognizer *ARMBaseInstrInfo::
00118 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
00119                                    const ScheduleDAG *DAG) const {
00120   if (Subtarget.isThumb2() || Subtarget.hasVFP2())
00121     return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
00122   return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
00123 }
00124 
00125 MachineInstr *
00126 ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
00127                                         MachineBasicBlock::iterator &MBBI,
00128                                         LiveVariables *LV) const {
00129   // FIXME: Thumb2 support.
00130 
00131   if (!EnableARM3Addr)
00132     return nullptr;
00133 
00134   MachineInstr *MI = MBBI;
00135   MachineFunction &MF = *MI->getParent()->getParent();
00136   uint64_t TSFlags = MI->getDesc().TSFlags;
00137   bool isPre = false;
00138   switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
00139   default: return nullptr;
00140   case ARMII::IndexModePre:
00141     isPre = true;
00142     break;
00143   case ARMII::IndexModePost:
00144     break;
00145   }
00146 
00147   // Try splitting an indexed load/store to an un-indexed one plus an add/sub
00148   // operation.
00149   unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
00150   if (MemOpc == 0)
00151     return nullptr;
00152 
00153   MachineInstr *UpdateMI = nullptr;
00154   MachineInstr *MemMI = nullptr;
00155   unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
00156   const MCInstrDesc &MCID = MI->getDesc();
00157   unsigned NumOps = MCID.getNumOperands();
00158   bool isLoad = !MI->mayStore();
00159   const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
00160   const MachineOperand &Base = MI->getOperand(2);
00161   const MachineOperand &Offset = MI->getOperand(NumOps-3);
00162   unsigned WBReg = WB.getReg();
00163   unsigned BaseReg = Base.getReg();
00164   unsigned OffReg = Offset.getReg();
00165   unsigned OffImm = MI->getOperand(NumOps-2).getImm();
00166   ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
00167   switch (AddrMode) {
00168   default: llvm_unreachable("Unknown indexed op!");
00169   case ARMII::AddrMode2: {
00170     bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
00171     unsigned Amt = ARM_AM::getAM2Offset(OffImm);
00172     if (OffReg == 0) {
00173       if (ARM_AM::getSOImmVal(Amt) == -1)
00174         // Can't encode it in a so_imm operand. This transformation will
00175         // add more than 1 instruction. Abandon!
00176         return nullptr;
00177       UpdateMI = BuildMI(MF, MI->getDebugLoc(),
00178                          get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
00179         .addReg(BaseReg).addImm(Amt)
00180         .addImm(Pred).addReg(0).addReg(0);
00181     } else if (Amt != 0) {
00182       ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
00183       unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
00184       UpdateMI = BuildMI(MF, MI->getDebugLoc(),
00185                          get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
00186         .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
00187         .addImm(Pred).addReg(0).addReg(0);
00188     } else
00189       UpdateMI = BuildMI(MF, MI->getDebugLoc(),
00190                          get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
00191         .addReg(BaseReg).addReg(OffReg)
00192         .addImm(Pred).addReg(0).addReg(0);
00193     break;
00194   }
00195   case ARMII::AddrMode3 : {
00196     bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
00197     unsigned Amt = ARM_AM::getAM3Offset(OffImm);
00198     if (OffReg == 0)
00199       // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
00200       UpdateMI = BuildMI(MF, MI->getDebugLoc(),
00201                          get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
00202         .addReg(BaseReg).addImm(Amt)
00203         .addImm(Pred).addReg(0).addReg(0);
00204     else
00205       UpdateMI = BuildMI(MF, MI->getDebugLoc(),
00206                          get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
00207         .addReg(BaseReg).addReg(OffReg)
00208         .addImm(Pred).addReg(0).addReg(0);
00209     break;
00210   }
00211   }
00212 
00213   std::vector<MachineInstr*> NewMIs;
00214   if (isPre) {
00215     if (isLoad)
00216       MemMI = BuildMI(MF, MI->getDebugLoc(),
00217                       get(MemOpc), MI->getOperand(0).getReg())
00218         .addReg(WBReg).addImm(0).addImm(Pred);
00219     else
00220       MemMI = BuildMI(MF, MI->getDebugLoc(),
00221                       get(MemOpc)).addReg(MI->getOperand(1).getReg())
00222         .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
00223     NewMIs.push_back(MemMI);
00224     NewMIs.push_back(UpdateMI);
00225   } else {
00226     if (isLoad)
00227       MemMI = BuildMI(MF, MI->getDebugLoc(),
00228                       get(MemOpc), MI->getOperand(0).getReg())
00229         .addReg(BaseReg).addImm(0).addImm(Pred);
00230     else
00231       MemMI = BuildMI(MF, MI->getDebugLoc(),
00232                       get(MemOpc)).addReg(MI->getOperand(1).getReg())
00233         .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
00234     if (WB.isDead())
00235       UpdateMI->getOperand(0).setIsDead();
00236     NewMIs.push_back(UpdateMI);
00237     NewMIs.push_back(MemMI);
00238   }
00239 
00240   // Transfer LiveVariables states, kill / dead info.
00241   if (LV) {
00242     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
00243       MachineOperand &MO = MI->getOperand(i);
00244       if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
00245         unsigned Reg = MO.getReg();
00246 
00247         LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
00248         if (MO.isDef()) {
00249           MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
00250           if (MO.isDead())
00251             LV->addVirtualRegisterDead(Reg, NewMI);
00252         }
00253         if (MO.isUse() && MO.isKill()) {
00254           for (unsigned j = 0; j < 2; ++j) {
00255             // Look at the two new MI's in reverse order.
00256             MachineInstr *NewMI = NewMIs[j];
00257             if (!NewMI->readsRegister(Reg))
00258               continue;
00259             LV->addVirtualRegisterKilled(Reg, NewMI);
00260             if (VI.removeKill(MI))
00261               VI.Kills.push_back(NewMI);
00262             break;
00263           }
00264         }
00265       }
00266     }
00267   }
00268 
00269   MFI->insert(MBBI, NewMIs[1]);
00270   MFI->insert(MBBI, NewMIs[0]);
00271   return NewMIs[0];
00272 }
00273 
00274 // Branch analysis.
00275 bool
00276 ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
00277                                 MachineBasicBlock *&FBB,
00278                                 SmallVectorImpl<MachineOperand> &Cond,
00279                                 bool AllowModify) const {
00280   TBB = nullptr;
00281   FBB = nullptr;
00282 
00283   MachineBasicBlock::iterator I = MBB.end();
00284   if (I == MBB.begin())
00285     return false; // Empty blocks are easy.
00286   --I;
00287 
00288   // Walk backwards from the end of the basic block until the branch is
00289   // analyzed or we give up.
00290   while (isPredicated(I) || I->isTerminator() || I->isDebugValue()) {
00291 
00292     // Flag to be raised on unanalyzeable instructions. This is useful in cases
00293     // where we want to clean up on the end of the basic block before we bail
00294     // out.
00295     bool CantAnalyze = false;
00296 
00297     // Skip over DEBUG values and predicated nonterminators.
00298     while (I->isDebugValue() || !I->isTerminator()) {
00299       if (I == MBB.begin())
00300         return false;
00301       --I;
00302     }
00303 
00304     if (isIndirectBranchOpcode(I->getOpcode()) ||
00305         isJumpTableBranchOpcode(I->getOpcode())) {
00306       // Indirect branches and jump tables can't be analyzed, but we still want
00307       // to clean up any instructions at the tail of the basic block.
00308       CantAnalyze = true;
00309     } else if (isUncondBranchOpcode(I->getOpcode())) {
00310       TBB = I->getOperand(0).getMBB();
00311     } else if (isCondBranchOpcode(I->getOpcode())) {
00312       // Bail out if we encounter multiple conditional branches.
00313       if (!Cond.empty())
00314         return true;
00315 
00316       assert(!FBB && "FBB should have been null.");
00317       FBB = TBB;
00318       TBB = I->getOperand(0).getMBB();
00319       Cond.push_back(I->getOperand(1));
00320       Cond.push_back(I->getOperand(2));
00321     } else if (I->isReturn()) {
00322       // Returns can't be analyzed, but we should run cleanup.
00323       CantAnalyze = !isPredicated(I);
00324     } else {
00325       // We encountered other unrecognized terminator. Bail out immediately.
00326       return true;
00327     }
00328 
00329     // Cleanup code - to be run for unpredicated unconditional branches and
00330     //                returns.
00331     if (!isPredicated(I) &&
00332           (isUncondBranchOpcode(I->getOpcode()) ||
00333            isIndirectBranchOpcode(I->getOpcode()) ||
00334            isJumpTableBranchOpcode(I->getOpcode()) ||
00335            I->isReturn())) {
00336       // Forget any previous condition branch information - it no longer applies.
00337       Cond.clear();
00338       FBB = nullptr;
00339 
00340       // If we can modify the function, delete everything below this
00341       // unconditional branch.
00342       if (AllowModify) {
00343         MachineBasicBlock::iterator DI = std::next(I);
00344         while (DI != MBB.end()) {
00345           MachineInstr *InstToDelete = DI;
00346           ++DI;
00347           InstToDelete->eraseFromParent();
00348         }
00349       }
00350     }
00351 
00352     if (CantAnalyze)
00353       return true;
00354 
00355     if (I == MBB.begin())
00356       return false;
00357 
00358     --I;
00359   }
00360 
00361   // We made it past the terminators without bailing out - we must have
00362   // analyzed this branch successfully.
00363   return false;
00364 }
00365 
00366 
00367 unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
00368   MachineBasicBlock::iterator I = MBB.end();
00369   if (I == MBB.begin()) return 0;
00370   --I;
00371   while (I->isDebugValue()) {
00372     if (I == MBB.begin())
00373       return 0;
00374     --I;
00375   }
00376   if (!isUncondBranchOpcode(I->getOpcode()) &&
00377       !isCondBranchOpcode(I->getOpcode()))
00378     return 0;
00379 
00380   // Remove the branch.
00381   I->eraseFromParent();
00382 
00383   I = MBB.end();
00384 
00385   if (I == MBB.begin()) return 1;
00386   --I;
00387   if (!isCondBranchOpcode(I->getOpcode()))
00388     return 1;
00389 
00390   // Remove the branch.
00391   I->eraseFromParent();
00392   return 2;
00393 }
00394 
00395 unsigned
00396 ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
00397                                MachineBasicBlock *FBB,
00398                                const SmallVectorImpl<MachineOperand> &Cond,
00399                                DebugLoc DL) const {
00400   ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
00401   int BOpc   = !AFI->isThumbFunction()
00402     ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
00403   int BccOpc = !AFI->isThumbFunction()
00404     ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
00405   bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
00406 
00407   // Shouldn't be a fall through.
00408   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
00409   assert((Cond.size() == 2 || Cond.size() == 0) &&
00410          "ARM branch conditions have two components!");
00411 
00412   if (!FBB) {
00413     if (Cond.empty()) { // Unconditional branch?
00414       if (isThumb)
00415         BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
00416       else
00417         BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
00418     } else
00419       BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
00420         .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
00421     return 1;
00422   }
00423 
00424   // Two-way conditional branch.
00425   BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
00426     .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
00427   if (isThumb)
00428     BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0);
00429   else
00430     BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
00431   return 2;
00432 }
00433 
00434 bool ARMBaseInstrInfo::
00435 ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
00436   ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
00437   Cond[0].setImm(ARMCC::getOppositeCondition(CC));
00438   return false;
00439 }
00440 
00441 bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
00442   if (MI->isBundle()) {
00443     MachineBasicBlock::const_instr_iterator I = MI;
00444     MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
00445     while (++I != E && I->isInsideBundle()) {
00446       int PIdx = I->findFirstPredOperandIdx();
00447       if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
00448         return true;
00449     }
00450     return false;
00451   }
00452 
00453   int PIdx = MI->findFirstPredOperandIdx();
00454   return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
00455 }
00456 
00457 bool ARMBaseInstrInfo::
00458 PredicateInstruction(MachineInstr *MI,
00459                      const SmallVectorImpl<MachineOperand> &Pred) const {
00460   unsigned Opc = MI->getOpcode();
00461   if (isUncondBranchOpcode(Opc)) {
00462     MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
00463     MachineInstrBuilder(*MI->getParent()->getParent(), MI)
00464       .addImm(Pred[0].getImm())
00465       .addReg(Pred[1].getReg());
00466     return true;
00467   }
00468 
00469   int PIdx = MI->findFirstPredOperandIdx();
00470   if (PIdx != -1) {
00471     MachineOperand &PMO = MI->getOperand(PIdx);
00472     PMO.setImm(Pred[0].getImm());
00473     MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
00474     return true;
00475   }
00476   return false;
00477 }
00478 
00479 bool ARMBaseInstrInfo::
00480 SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
00481                   const SmallVectorImpl<MachineOperand> &Pred2) const {
00482   if (Pred1.size() > 2 || Pred2.size() > 2)
00483     return false;
00484 
00485   ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
00486   ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
00487   if (CC1 == CC2)
00488     return true;
00489 
00490   switch (CC1) {
00491   default:
00492     return false;
00493   case ARMCC::AL:
00494     return true;
00495   case ARMCC::HS:
00496     return CC2 == ARMCC::HI;
00497   case ARMCC::LS:
00498     return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
00499   case ARMCC::GE:
00500     return CC2 == ARMCC::GT;
00501   case ARMCC::LE:
00502     return CC2 == ARMCC::LT;
00503   }
00504 }
00505 
00506 bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
00507                                     std::vector<MachineOperand> &Pred) const {
00508   bool Found = false;
00509   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
00510     const MachineOperand &MO = MI->getOperand(i);
00511     if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
00512         (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
00513       Pred.push_back(MO);
00514       Found = true;
00515     }
00516   }
00517 
00518   return Found;
00519 }
00520 
00521 static bool isCPSRDefined(const MachineInstr *MI) {
00522   for (const auto &MO : MI->operands())
00523     if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef())
00524       return true;
00525   return false;
00526 }
00527 
00528 static bool isEligibleForITBlock(const MachineInstr *MI) {
00529   switch (MI->getOpcode()) {
00530   default: return true;
00531   case ARM::tADC:   // ADC (register) T1
00532   case ARM::tADDi3: // ADD (immediate) T1
00533   case ARM::tADDi8: // ADD (immediate) T2
00534   case ARM::tADDrr: // ADD (register) T1
00535   case ARM::tAND:   // AND (register) T1
00536   case ARM::tASRri: // ASR (immediate) T1
00537   case ARM::tASRrr: // ASR (register) T1
00538   case ARM::tBIC:   // BIC (register) T1
00539   case ARM::tEOR:   // EOR (register) T1
00540   case ARM::tLSLri: // LSL (immediate) T1
00541   case ARM::tLSLrr: // LSL (register) T1
00542   case ARM::tLSRri: // LSR (immediate) T1
00543   case ARM::tLSRrr: // LSR (register) T1
00544   case ARM::tMUL:   // MUL T1
00545   case ARM::tMVN:   // MVN (register) T1
00546   case ARM::tORR:   // ORR (register) T1
00547   case ARM::tROR:   // ROR (register) T1
00548   case ARM::tRSB:   // RSB (immediate) T1
00549   case ARM::tSBC:   // SBC (register) T1
00550   case ARM::tSUBi3: // SUB (immediate) T1
00551   case ARM::tSUBi8: // SUB (immediate) T2
00552   case ARM::tSUBrr: // SUB (register) T1
00553     return !isCPSRDefined(MI);
00554   }
00555 }
00556 
00557 /// isPredicable - Return true if the specified instruction can be predicated.
00558 /// By default, this returns true for every instruction with a
00559 /// PredicateOperand.
00560 bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
00561   if (!MI->isPredicable())
00562     return false;
00563 
00564   if (!isEligibleForITBlock(MI))
00565     return false;
00566 
00567   ARMFunctionInfo *AFI =
00568     MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
00569 
00570   if (AFI->isThumb2Function()) {
00571     if (getSubtarget().restrictIT())
00572       return isV8EligibleForIT(MI);
00573   } else { // non-Thumb
00574     if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
00575       return false;
00576   }
00577 
00578   return true;
00579 }
00580 
00581 namespace llvm {
00582 template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
00583   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
00584     const MachineOperand &MO = MI->getOperand(i);
00585     if (!MO.isReg() || MO.isUndef() || MO.isUse())
00586       continue;
00587     if (MO.getReg() != ARM::CPSR)
00588       continue;
00589     if (!MO.isDead())
00590       return false;
00591   }
00592   // all definitions of CPSR are dead
00593   return true;
00594 }
00595 }
00596 
00597 /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
00598 LLVM_ATTRIBUTE_NOINLINE
00599 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
00600                                 unsigned JTI);
00601 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
00602                                 unsigned JTI) {
00603   assert(JTI < JT.size());
00604   return JT[JTI].MBBs.size();
00605 }
00606 
00607 /// GetInstSize - Return the size of the specified MachineInstr.
00608 ///
00609 unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
00610   const MachineBasicBlock &MBB = *MI->getParent();
00611   const MachineFunction *MF = MBB.getParent();
00612   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
00613 
00614   const MCInstrDesc &MCID = MI->getDesc();
00615   if (MCID.getSize())
00616     return MCID.getSize();
00617 
00618   // If this machine instr is an inline asm, measure it.
00619   if (MI->getOpcode() == ARM::INLINEASM)
00620     return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
00621   unsigned Opc = MI->getOpcode();
00622   switch (Opc) {
00623   default:
00624     // pseudo-instruction sizes are zero.
00625     return 0;
00626   case TargetOpcode::BUNDLE:
00627     return getInstBundleLength(MI);
00628   case ARM::MOVi16_ga_pcrel:
00629   case ARM::MOVTi16_ga_pcrel:
00630   case ARM::t2MOVi16_ga_pcrel:
00631   case ARM::t2MOVTi16_ga_pcrel:
00632     return 4;
00633   case ARM::MOVi32imm:
00634   case ARM::t2MOVi32imm:
00635     return 8;
00636   case ARM::CONSTPOOL_ENTRY:
00637     // If this machine instr is a constant pool entry, its size is recorded as
00638     // operand #2.
00639     return MI->getOperand(2).getImm();
00640   case ARM::Int_eh_sjlj_longjmp:
00641     return 16;
00642   case ARM::tInt_eh_sjlj_longjmp:
00643     return 10;
00644   case ARM::Int_eh_sjlj_setjmp:
00645   case ARM::Int_eh_sjlj_setjmp_nofp:
00646     return 20;
00647   case ARM::tInt_eh_sjlj_setjmp:
00648   case ARM::t2Int_eh_sjlj_setjmp:
00649   case ARM::t2Int_eh_sjlj_setjmp_nofp:
00650     return 12;
00651   case ARM::BR_JTr:
00652   case ARM::BR_JTm:
00653   case ARM::BR_JTadd:
00654   case ARM::tBR_JTr:
00655   case ARM::t2BR_JT:
00656   case ARM::t2TBB_JT:
00657   case ARM::t2TBH_JT: {
00658     // These are jumptable branches, i.e. a branch followed by an inlined
00659     // jumptable. The size is 4 + 4 * number of entries. For TBB, each
00660     // entry is one byte; TBH two byte each.
00661     unsigned EntrySize = (Opc == ARM::t2TBB_JT)
00662       ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
00663     unsigned NumOps = MCID.getNumOperands();
00664     MachineOperand JTOP =
00665       MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
00666     unsigned JTI = JTOP.getIndex();
00667     const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
00668     assert(MJTI != nullptr);
00669     const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
00670     assert(JTI < JT.size());
00671     // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
00672     // 4 aligned. The assembler / linker may add 2 byte padding just before
00673     // the JT entries.  The size does not include this padding; the
00674     // constant islands pass does separate bookkeeping for it.
00675     // FIXME: If we know the size of the function is less than (1 << 16) *2
00676     // bytes, we can use 16-bit entries instead. Then there won't be an
00677     // alignment issue.
00678     unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
00679     unsigned NumEntries = getNumJTEntries(JT, JTI);
00680     if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
00681       // Make sure the instruction that follows TBB is 2-byte aligned.
00682       // FIXME: Constant island pass should insert an "ALIGN" instruction
00683       // instead.
00684       ++NumEntries;
00685     return NumEntries * EntrySize + InstSize;
00686   }
00687   }
00688 }
00689 
00690 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
00691   unsigned Size = 0;
00692   MachineBasicBlock::const_instr_iterator I = MI;
00693   MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
00694   while (++I != E && I->isInsideBundle()) {
00695     assert(!I->isBundle() && "No nested bundle!");
00696     Size += GetInstSizeInBytes(&*I);
00697   }
00698   return Size;
00699 }
00700 
00701 void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
00702                                    MachineBasicBlock::iterator I, DebugLoc DL,
00703                                    unsigned DestReg, unsigned SrcReg,
00704                                    bool KillSrc) const {
00705   bool GPRDest = ARM::GPRRegClass.contains(DestReg);
00706   bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
00707 
00708   if (GPRDest && GPRSrc) {
00709     AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
00710                                     .addReg(SrcReg, getKillRegState(KillSrc))));
00711     return;
00712   }
00713 
00714   bool SPRDest = ARM::SPRRegClass.contains(DestReg);
00715   bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
00716 
00717   unsigned Opc = 0;
00718   if (SPRDest && SPRSrc)
00719     Opc = ARM::VMOVS;
00720   else if (GPRDest && SPRSrc)
00721     Opc = ARM::VMOVRS;
00722   else if (SPRDest && GPRSrc)
00723     Opc = ARM::VMOVSR;
00724   else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
00725     Opc = ARM::VMOVD;
00726   else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
00727     Opc = ARM::VORRq;
00728 
00729   if (Opc) {
00730     MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
00731     MIB.addReg(SrcReg, getKillRegState(KillSrc));
00732     if (Opc == ARM::VORRq)
00733       MIB.addReg(SrcReg, getKillRegState(KillSrc));
00734     AddDefaultPred(MIB);
00735     return;
00736   }
00737 
00738   // Handle register classes that require multiple instructions.
00739   unsigned BeginIdx = 0;
00740   unsigned SubRegs = 0;
00741   int Spacing = 1;
00742 
00743   // Use VORRq when possible.
00744   if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
00745     Opc = ARM::VORRq;
00746     BeginIdx = ARM::qsub_0;
00747     SubRegs = 2;
00748   } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
00749     Opc = ARM::VORRq;
00750     BeginIdx = ARM::qsub_0;
00751     SubRegs = 4;
00752   // Fall back to VMOVD.
00753   } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
00754     Opc = ARM::VMOVD;
00755     BeginIdx = ARM::dsub_0;
00756     SubRegs = 2;
00757   } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
00758     Opc = ARM::VMOVD;
00759     BeginIdx = ARM::dsub_0;
00760     SubRegs = 3;
00761   } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
00762     Opc = ARM::VMOVD;
00763     BeginIdx = ARM::dsub_0;
00764     SubRegs = 4;
00765   } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
00766     Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
00767     BeginIdx = ARM::gsub_0;
00768     SubRegs = 2;
00769   } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
00770     Opc = ARM::VMOVD;
00771     BeginIdx = ARM::dsub_0;
00772     SubRegs = 2;
00773     Spacing = 2;
00774   } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
00775     Opc = ARM::VMOVD;
00776     BeginIdx = ARM::dsub_0;
00777     SubRegs = 3;
00778     Spacing = 2;
00779   } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
00780     Opc = ARM::VMOVD;
00781     BeginIdx = ARM::dsub_0;
00782     SubRegs = 4;
00783     Spacing = 2;
00784   } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
00785     Opc = ARM::VMOVS;
00786     BeginIdx = ARM::ssub_0;
00787     SubRegs = 2;
00788   }
00789 
00790   assert(Opc && "Impossible reg-to-reg copy");
00791 
00792   const TargetRegisterInfo *TRI = &getRegisterInfo();
00793   MachineInstrBuilder Mov;
00794 
00795   // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
00796   if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
00797     BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
00798     Spacing = -Spacing;
00799   }
00800 #ifndef NDEBUG
00801   SmallSet<unsigned, 4> DstRegs;
00802 #endif
00803   for (unsigned i = 0; i != SubRegs; ++i) {
00804     unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
00805     unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
00806     assert(Dst && Src && "Bad sub-register");
00807 #ifndef NDEBUG
00808     assert(!DstRegs.count(Src) && "destructive vector copy");
00809     DstRegs.insert(Dst);
00810 #endif
00811     Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
00812     // VORR takes two source operands.
00813     if (Opc == ARM::VORRq)
00814       Mov.addReg(Src);
00815     Mov = AddDefaultPred(Mov);
00816     // MOVr can set CC.
00817     if (Opc == ARM::MOVr)
00818       Mov = AddDefaultCC(Mov);
00819   }
00820   // Add implicit super-register defs and kills to the last instruction.
00821   Mov->addRegisterDefined(DestReg, TRI);
00822   if (KillSrc)
00823     Mov->addRegisterKilled(SrcReg, TRI);
00824 }
00825 
00826 const MachineInstrBuilder &
00827 ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
00828                           unsigned SubIdx, unsigned State,
00829                           const TargetRegisterInfo *TRI) const {
00830   if (!SubIdx)
00831     return MIB.addReg(Reg, State);
00832 
00833   if (TargetRegisterInfo::isPhysicalRegister(Reg))
00834     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
00835   return MIB.addReg(Reg, State, SubIdx);
00836 }
00837 
00838 void ARMBaseInstrInfo::
00839 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
00840                     unsigned SrcReg, bool isKill, int FI,
00841                     const TargetRegisterClass *RC,
00842                     const TargetRegisterInfo *TRI) const {
00843   DebugLoc DL;
00844   if (I != MBB.end()) DL = I->getDebugLoc();
00845   MachineFunction &MF = *MBB.getParent();
00846   MachineFrameInfo &MFI = *MF.getFrameInfo();
00847   unsigned Align = MFI.getObjectAlignment(FI);
00848 
00849   MachineMemOperand *MMO =
00850     MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
00851                             MachineMemOperand::MOStore,
00852                             MFI.getObjectSize(FI),
00853                             Align);
00854 
00855   switch (RC->getSize()) {
00856     case 4:
00857       if (ARM::GPRRegClass.hasSubClassEq(RC)) {
00858         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
00859                    .addReg(SrcReg, getKillRegState(isKill))
00860                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
00861       } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
00862         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
00863                    .addReg(SrcReg, getKillRegState(isKill))
00864                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
00865       } else
00866         llvm_unreachable("Unknown reg class!");
00867       break;
00868     case 8:
00869       if (ARM::DPRRegClass.hasSubClassEq(RC)) {
00870         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
00871                    .addReg(SrcReg, getKillRegState(isKill))
00872                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
00873       } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
00874         if (Subtarget.hasV5TEOps()) {
00875           MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
00876           AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
00877           AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
00878           MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
00879 
00880           AddDefaultPred(MIB);
00881         } else {
00882           // Fallback to STM instruction, which has existed since the dawn of
00883           // time.
00884           MachineInstrBuilder MIB =
00885             AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
00886                              .addFrameIndex(FI).addMemOperand(MMO));
00887           AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
00888           AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
00889         }
00890       } else
00891         llvm_unreachable("Unknown reg class!");
00892       break;
00893     case 16:
00894       if (ARM::DPairRegClass.hasSubClassEq(RC)) {
00895         // Use aligned spills if the stack can be realigned.
00896         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
00897           AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
00898                      .addFrameIndex(FI).addImm(16)
00899                      .addReg(SrcReg, getKillRegState(isKill))
00900                      .addMemOperand(MMO));
00901         } else {
00902           AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
00903                      .addReg(SrcReg, getKillRegState(isKill))
00904                      .addFrameIndex(FI)
00905                      .addMemOperand(MMO));
00906         }
00907       } else
00908         llvm_unreachable("Unknown reg class!");
00909       break;
00910     case 24:
00911       if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
00912         // Use aligned spills if the stack can be realigned.
00913         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
00914           AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
00915                      .addFrameIndex(FI).addImm(16)
00916                      .addReg(SrcReg, getKillRegState(isKill))
00917                      .addMemOperand(MMO));
00918         } else {
00919           MachineInstrBuilder MIB =
00920           AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
00921                        .addFrameIndex(FI))
00922                        .addMemOperand(MMO);
00923           MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
00924           MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
00925           AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
00926         }
00927       } else
00928         llvm_unreachable("Unknown reg class!");
00929       break;
00930     case 32:
00931       if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
00932         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
00933           // FIXME: It's possible to only store part of the QQ register if the
00934           // spilled def has a sub-register index.
00935           AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
00936                      .addFrameIndex(FI).addImm(16)
00937                      .addReg(SrcReg, getKillRegState(isKill))
00938                      .addMemOperand(MMO));
00939         } else {
00940           MachineInstrBuilder MIB =
00941           AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
00942                        .addFrameIndex(FI))
00943                        .addMemOperand(MMO);
00944           MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
00945           MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
00946           MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
00947                 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
00948         }
00949       } else
00950         llvm_unreachable("Unknown reg class!");
00951       break;
00952     case 64:
00953       if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
00954         MachineInstrBuilder MIB =
00955           AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
00956                          .addFrameIndex(FI))
00957                          .addMemOperand(MMO);
00958         MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
00959         MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
00960         MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
00961         MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
00962         MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
00963         MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
00964         MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
00965               AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
00966       } else
00967         llvm_unreachable("Unknown reg class!");
00968       break;
00969     default:
00970       llvm_unreachable("Unknown reg class!");
00971   }
00972 }
00973 
00974 unsigned
00975 ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
00976                                      int &FrameIndex) const {
00977   switch (MI->getOpcode()) {
00978   default: break;
00979   case ARM::STRrs:
00980   case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
00981     if (MI->getOperand(1).isFI() &&
00982         MI->getOperand(2).isReg() &&
00983         MI->getOperand(3).isImm() &&
00984         MI->getOperand(2).getReg() == 0 &&
00985         MI->getOperand(3).getImm() == 0) {
00986       FrameIndex = MI->getOperand(1).getIndex();
00987       return MI->getOperand(0).getReg();
00988     }
00989     break;
00990   case ARM::STRi12:
00991   case ARM::t2STRi12:
00992   case ARM::tSTRspi:
00993   case ARM::VSTRD:
00994   case ARM::VSTRS:
00995     if (MI->getOperand(1).isFI() &&
00996         MI->getOperand(2).isImm() &&
00997         MI->getOperand(2).getImm() == 0) {
00998       FrameIndex = MI->getOperand(1).getIndex();
00999       return MI->getOperand(0).getReg();
01000     }
01001     break;
01002   case ARM::VST1q64:
01003   case ARM::VST1d64TPseudo:
01004   case ARM::VST1d64QPseudo:
01005     if (MI->getOperand(0).isFI() &&
01006         MI->getOperand(2).getSubReg() == 0) {
01007       FrameIndex = MI->getOperand(0).getIndex();
01008       return MI->getOperand(2).getReg();
01009     }
01010     break;
01011   case ARM::VSTMQIA:
01012     if (MI->getOperand(1).isFI() &&
01013         MI->getOperand(0).getSubReg() == 0) {
01014       FrameIndex = MI->getOperand(1).getIndex();
01015       return MI->getOperand(0).getReg();
01016     }
01017     break;
01018   }
01019 
01020   return 0;
01021 }
01022 
01023 unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
01024                                                     int &FrameIndex) const {
01025   const MachineMemOperand *Dummy;
01026   return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
01027 }
01028 
01029 void ARMBaseInstrInfo::
01030 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
01031                      unsigned DestReg, int FI,
01032                      const TargetRegisterClass *RC,
01033                      const TargetRegisterInfo *TRI) const {
01034   DebugLoc DL;
01035   if (I != MBB.end()) DL = I->getDebugLoc();
01036   MachineFunction &MF = *MBB.getParent();
01037   MachineFrameInfo &MFI = *MF.getFrameInfo();
01038   unsigned Align = MFI.getObjectAlignment(FI);
01039   MachineMemOperand *MMO =
01040     MF.getMachineMemOperand(
01041                     MachinePointerInfo::getFixedStack(FI),
01042                             MachineMemOperand::MOLoad,
01043                             MFI.getObjectSize(FI),
01044                             Align);
01045 
01046   switch (RC->getSize()) {
01047   case 4:
01048     if (ARM::GPRRegClass.hasSubClassEq(RC)) {
01049       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
01050                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
01051 
01052     } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
01053       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
01054                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
01055     } else
01056       llvm_unreachable("Unknown reg class!");
01057     break;
01058   case 8:
01059     if (ARM::DPRRegClass.hasSubClassEq(RC)) {
01060       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
01061                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
01062     } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
01063       MachineInstrBuilder MIB;
01064 
01065       if (Subtarget.hasV5TEOps()) {
01066         MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
01067         AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
01068         AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
01069         MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
01070 
01071         AddDefaultPred(MIB);
01072       } else {
01073         // Fallback to LDM instruction, which has existed since the dawn of
01074         // time.
01075         MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
01076                                  .addFrameIndex(FI).addMemOperand(MMO));
01077         MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
01078         MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
01079       }
01080 
01081       if (TargetRegisterInfo::isPhysicalRegister(DestReg))
01082         MIB.addReg(DestReg, RegState::ImplicitDefine);
01083     } else
01084       llvm_unreachable("Unknown reg class!");
01085     break;
01086   case 16:
01087     if (ARM::DPairRegClass.hasSubClassEq(RC)) {
01088       if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
01089         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
01090                      .addFrameIndex(FI).addImm(16)
01091                      .addMemOperand(MMO));
01092       } else {
01093         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
01094                        .addFrameIndex(FI)
01095                        .addMemOperand(MMO));
01096       }
01097     } else
01098       llvm_unreachable("Unknown reg class!");
01099     break;
01100   case 24:
01101     if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
01102       if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
01103         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
01104                      .addFrameIndex(FI).addImm(16)
01105                      .addMemOperand(MMO));
01106       } else {
01107         MachineInstrBuilder MIB =
01108           AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
01109                          .addFrameIndex(FI)
01110                          .addMemOperand(MMO));
01111         MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
01112         MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
01113         MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
01114         if (TargetRegisterInfo::isPhysicalRegister(DestReg))
01115           MIB.addReg(DestReg, RegState::ImplicitDefine);
01116       }
01117     } else
01118       llvm_unreachable("Unknown reg class!");
01119     break;
01120    case 32:
01121     if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
01122       if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
01123         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
01124                      .addFrameIndex(FI).addImm(16)
01125                      .addMemOperand(MMO));
01126       } else {
01127         MachineInstrBuilder MIB =
01128         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
01129                        .addFrameIndex(FI))
01130                        .addMemOperand(MMO);
01131         MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
01132         MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
01133         MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
01134         MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
01135         if (TargetRegisterInfo::isPhysicalRegister(DestReg))
01136           MIB.addReg(DestReg, RegState::ImplicitDefine);
01137       }
01138     } else
01139       llvm_unreachable("Unknown reg class!");
01140     break;
01141   case 64:
01142     if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
01143       MachineInstrBuilder MIB =
01144       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
01145                      .addFrameIndex(FI))
01146                      .addMemOperand(MMO);
01147       MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
01148       MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
01149       MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
01150       MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
01151       MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
01152       MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
01153       MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
01154       MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
01155       if (TargetRegisterInfo::isPhysicalRegister(DestReg))
01156         MIB.addReg(DestReg, RegState::ImplicitDefine);
01157     } else
01158       llvm_unreachable("Unknown reg class!");
01159     break;
01160   default:
01161     llvm_unreachable("Unknown regclass!");
01162   }
01163 }
01164 
01165 unsigned
01166 ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
01167                                       int &FrameIndex) const {
01168   switch (MI->getOpcode()) {
01169   default: break;
01170   case ARM::LDRrs:
01171   case ARM::t2LDRs:  // FIXME: don't use t2LDRs to access frame.
01172     if (MI->getOperand(1).isFI() &&
01173         MI->getOperand(2).isReg() &&
01174         MI->getOperand(3).isImm() &&
01175         MI->getOperand(2).getReg() == 0 &&
01176         MI->getOperand(3).getImm() == 0) {
01177       FrameIndex = MI->getOperand(1).getIndex();
01178       return MI->getOperand(0).getReg();
01179     }
01180     break;
01181   case ARM::LDRi12:
01182   case ARM::t2LDRi12:
01183   case ARM::tLDRspi:
01184   case ARM::VLDRD:
01185   case ARM::VLDRS:
01186     if (MI->getOperand(1).isFI() &&
01187         MI->getOperand(2).isImm() &&
01188         MI->getOperand(2).getImm() == 0) {
01189       FrameIndex = MI->getOperand(1).getIndex();
01190       return MI->getOperand(0).getReg();
01191     }
01192     break;
01193   case ARM::VLD1q64:
01194   case ARM::VLD1d64TPseudo:
01195   case ARM::VLD1d64QPseudo:
01196     if (MI->getOperand(1).isFI() &&
01197         MI->getOperand(0).getSubReg() == 0) {
01198       FrameIndex = MI->getOperand(1).getIndex();
01199       return MI->getOperand(0).getReg();
01200     }
01201     break;
01202   case ARM::VLDMQIA:
01203     if (MI->getOperand(1).isFI() &&
01204         MI->getOperand(0).getSubReg() == 0) {
01205       FrameIndex = MI->getOperand(1).getIndex();
01206       return MI->getOperand(0).getReg();
01207     }
01208     break;
01209   }
01210 
01211   return 0;
01212 }
01213 
01214 unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
01215                                              int &FrameIndex) const {
01216   const MachineMemOperand *Dummy;
01217   return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
01218 }
01219 
01220 bool
01221 ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
01222   MachineFunction &MF = *MI->getParent()->getParent();
01223   Reloc::Model RM = MF.getTarget().getRelocationModel();
01224 
01225   if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
01226     assert(getSubtarget().getTargetTriple().getObjectFormat() ==
01227            Triple::MachO &&
01228            "LOAD_STACK_GUARD currently supported only for MachO.");
01229     expandLoadStackGuard(MI, RM);
01230     MI->getParent()->erase(MI);
01231     return true;
01232   }
01233 
01234   // This hook gets to expand COPY instructions before they become
01235   // copyPhysReg() calls.  Look for VMOVS instructions that can legally be
01236   // widened to VMOVD.  We prefer the VMOVD when possible because it may be
01237   // changed into a VORR that can go down the NEON pipeline.
01238   if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15() ||
01239       Subtarget.isFPOnlySP())
01240     return false;
01241 
01242   // Look for a copy between even S-registers.  That is where we keep floats
01243   // when using NEON v2f32 instructions for f32 arithmetic.
01244   unsigned DstRegS = MI->getOperand(0).getReg();
01245   unsigned SrcRegS = MI->getOperand(1).getReg();
01246   if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
01247     return false;
01248 
01249   const TargetRegisterInfo *TRI = &getRegisterInfo();
01250   unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
01251                                               &ARM::DPRRegClass);
01252   unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
01253                                               &ARM::DPRRegClass);
01254   if (!DstRegD || !SrcRegD)
01255     return false;
01256 
01257   // We want to widen this into a DstRegD = VMOVD SrcRegD copy.  This is only
01258   // legal if the COPY already defines the full DstRegD, and it isn't a
01259   // sub-register insertion.
01260   if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI))
01261     return false;
01262 
01263   // A dead copy shouldn't show up here, but reject it just in case.
01264   if (MI->getOperand(0).isDead())
01265     return false;
01266 
01267   // All clear, widen the COPY.
01268   DEBUG(dbgs() << "widening:    " << *MI);
01269   MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
01270 
01271   // Get rid of the old <imp-def> of DstRegD.  Leave it if it defines a Q-reg
01272   // or some other super-register.
01273   int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD);
01274   if (ImpDefIdx != -1)
01275     MI->RemoveOperand(ImpDefIdx);
01276 
01277   // Change the opcode and operands.
01278   MI->setDesc(get(ARM::VMOVD));
01279   MI->getOperand(0).setReg(DstRegD);
01280   MI->getOperand(1).setReg(SrcRegD);
01281   AddDefaultPred(MIB);
01282 
01283   // We are now reading SrcRegD instead of SrcRegS.  This may upset the
01284   // register scavenger and machine verifier, so we need to indicate that we
01285   // are reading an undefined value from SrcRegD, but a proper value from
01286   // SrcRegS.
01287   MI->getOperand(1).setIsUndef();
01288   MIB.addReg(SrcRegS, RegState::Implicit);
01289 
01290   // SrcRegD may actually contain an unrelated value in the ssub_1
01291   // sub-register.  Don't kill it.  Only kill the ssub_0 sub-register.
01292   if (MI->getOperand(1).isKill()) {
01293     MI->getOperand(1).setIsKill(false);
01294     MI->addRegisterKilled(SrcRegS, TRI, true);
01295   }
01296 
01297   DEBUG(dbgs() << "replaced by: " << *MI);
01298   return true;
01299 }
01300 
01301 /// Create a copy of a const pool value. Update CPI to the new index and return
01302 /// the label UID.
01303 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
01304   MachineConstantPool *MCP = MF.getConstantPool();
01305   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
01306 
01307   const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
01308   assert(MCPE.isMachineConstantPoolEntry() &&
01309          "Expecting a machine constantpool entry!");
01310   ARMConstantPoolValue *ACPV =
01311     static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
01312 
01313   unsigned PCLabelId = AFI->createPICLabelUId();
01314   ARMConstantPoolValue *NewCPV = nullptr;
01315 
01316   // FIXME: The below assumes PIC relocation model and that the function
01317   // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
01318   // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
01319   // instructions, so that's probably OK, but is PIC always correct when
01320   // we get here?
01321   if (ACPV->isGlobalValue())
01322     NewCPV = ARMConstantPoolConstant::
01323       Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId,
01324              ARMCP::CPValue, 4);
01325   else if (ACPV->isExtSymbol())
01326     NewCPV = ARMConstantPoolSymbol::
01327       Create(MF.getFunction()->getContext(),
01328              cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
01329   else if (ACPV->isBlockAddress())
01330     NewCPV = ARMConstantPoolConstant::
01331       Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
01332              ARMCP::CPBlockAddress, 4);
01333   else if (ACPV->isLSDA())
01334     NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId,
01335                                              ARMCP::CPLSDA, 4);
01336   else if (ACPV->isMachineBasicBlock())
01337     NewCPV = ARMConstantPoolMBB::
01338       Create(MF.getFunction()->getContext(),
01339              cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
01340   else
01341     llvm_unreachable("Unexpected ARM constantpool value type!!");
01342   CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
01343   return PCLabelId;
01344 }
01345 
01346 void ARMBaseInstrInfo::
01347 reMaterialize(MachineBasicBlock &MBB,
01348               MachineBasicBlock::iterator I,
01349               unsigned DestReg, unsigned SubIdx,
01350               const MachineInstr *Orig,
01351               const TargetRegisterInfo &TRI) const {
01352   unsigned Opcode = Orig->getOpcode();
01353   switch (Opcode) {
01354   default: {
01355     MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
01356     MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
01357     MBB.insert(I, MI);
01358     break;
01359   }
01360   case ARM::tLDRpci_pic:
01361   case ARM::t2LDRpci_pic: {
01362     MachineFunction &MF = *MBB.getParent();
01363     unsigned CPI = Orig->getOperand(1).getIndex();
01364     unsigned PCLabelId = duplicateCPV(MF, CPI);
01365     MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
01366                                       DestReg)
01367       .addConstantPoolIndex(CPI).addImm(PCLabelId);
01368     MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
01369     break;
01370   }
01371   }
01372 }
01373 
01374 MachineInstr *
01375 ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
01376   MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF);
01377   switch(Orig->getOpcode()) {
01378   case ARM::tLDRpci_pic:
01379   case ARM::t2LDRpci_pic: {
01380     unsigned CPI = Orig->getOperand(1).getIndex();
01381     unsigned PCLabelId = duplicateCPV(MF, CPI);
01382     Orig->getOperand(1).setIndex(CPI);
01383     Orig->getOperand(2).setImm(PCLabelId);
01384     break;
01385   }
01386   }
01387   return MI;
01388 }
01389 
01390 bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
01391                                         const MachineInstr *MI1,
01392                                         const MachineRegisterInfo *MRI) const {
01393   int Opcode = MI0->getOpcode();
01394   if (Opcode == ARM::t2LDRpci ||
01395       Opcode == ARM::t2LDRpci_pic ||
01396       Opcode == ARM::tLDRpci ||
01397       Opcode == ARM::tLDRpci_pic ||
01398       Opcode == ARM::LDRLIT_ga_pcrel ||
01399       Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
01400       Opcode == ARM::tLDRLIT_ga_pcrel ||
01401       Opcode == ARM::MOV_ga_pcrel ||
01402       Opcode == ARM::MOV_ga_pcrel_ldr ||
01403       Opcode == ARM::t2MOV_ga_pcrel) {
01404     if (MI1->getOpcode() != Opcode)
01405       return false;
01406     if (MI0->getNumOperands() != MI1->getNumOperands())
01407       return false;
01408 
01409     const MachineOperand &MO0 = MI0->getOperand(1);
01410     const MachineOperand &MO1 = MI1->getOperand(1);
01411     if (MO0.getOffset() != MO1.getOffset())
01412       return false;
01413 
01414     if (Opcode == ARM::LDRLIT_ga_pcrel ||
01415         Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
01416         Opcode == ARM::tLDRLIT_ga_pcrel ||
01417         Opcode == ARM::MOV_ga_pcrel ||
01418         Opcode == ARM::MOV_ga_pcrel_ldr ||
01419         Opcode == ARM::t2MOV_ga_pcrel)
01420       // Ignore the PC labels.
01421       return MO0.getGlobal() == MO1.getGlobal();
01422 
01423     const MachineFunction *MF = MI0->getParent()->getParent();
01424     const MachineConstantPool *MCP = MF->getConstantPool();
01425     int CPI0 = MO0.getIndex();
01426     int CPI1 = MO1.getIndex();
01427     const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
01428     const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
01429     bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
01430     bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
01431     if (isARMCP0 && isARMCP1) {
01432       ARMConstantPoolValue *ACPV0 =
01433         static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
01434       ARMConstantPoolValue *ACPV1 =
01435         static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
01436       return ACPV0->hasSameValue(ACPV1);
01437     } else if (!isARMCP0 && !isARMCP1) {
01438       return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
01439     }
01440     return false;
01441   } else if (Opcode == ARM::PICLDR) {
01442     if (MI1->getOpcode() != Opcode)
01443       return false;
01444     if (MI0->getNumOperands() != MI1->getNumOperands())
01445       return false;
01446 
01447     unsigned Addr0 = MI0->getOperand(1).getReg();
01448     unsigned Addr1 = MI1->getOperand(1).getReg();
01449     if (Addr0 != Addr1) {
01450       if (!MRI ||
01451           !TargetRegisterInfo::isVirtualRegister(Addr0) ||
01452           !TargetRegisterInfo::isVirtualRegister(Addr1))
01453         return false;
01454 
01455       // This assumes SSA form.
01456       MachineInstr *Def0 = MRI->getVRegDef(Addr0);
01457       MachineInstr *Def1 = MRI->getVRegDef(Addr1);
01458       // Check if the loaded value, e.g. a constantpool of a global address, are
01459       // the same.
01460       if (!produceSameValue(Def0, Def1, MRI))
01461         return false;
01462     }
01463 
01464     for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
01465       // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
01466       const MachineOperand &MO0 = MI0->getOperand(i);
01467       const MachineOperand &MO1 = MI1->getOperand(i);
01468       if (!MO0.isIdenticalTo(MO1))
01469         return false;
01470     }
01471     return true;
01472   }
01473 
01474   return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
01475 }
01476 
01477 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
01478 /// determine if two loads are loading from the same base address. It should
01479 /// only return true if the base pointers are the same and the only differences
01480 /// between the two addresses is the offset. It also returns the offsets by
01481 /// reference.
01482 ///
01483 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
01484 /// is permanently disabled.
01485 bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
01486                                                int64_t &Offset1,
01487                                                int64_t &Offset2) const {
01488   // Don't worry about Thumb: just ARM and Thumb2.
01489   if (Subtarget.isThumb1Only()) return false;
01490 
01491   if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
01492     return false;
01493 
01494   switch (Load1->getMachineOpcode()) {
01495   default:
01496     return false;
01497   case ARM::LDRi12:
01498   case ARM::LDRBi12:
01499   case ARM::LDRD:
01500   case ARM::LDRH:
01501   case ARM::LDRSB:
01502   case ARM::LDRSH:
01503   case ARM::VLDRD:
01504   case ARM::VLDRS:
01505   case ARM::t2LDRi8:
01506   case ARM::t2LDRBi8:
01507   case ARM::t2LDRDi8:
01508   case ARM::t2LDRSHi8:
01509   case ARM::t2LDRi12:
01510   case ARM::t2LDRBi12:
01511   case ARM::t2LDRSHi12:
01512     break;
01513   }
01514 
01515   switch (Load2->getMachineOpcode()) {
01516   default:
01517     return false;
01518   case ARM::LDRi12:
01519   case ARM::LDRBi12:
01520   case ARM::LDRD:
01521   case ARM::LDRH:
01522   case ARM::LDRSB:
01523   case ARM::LDRSH:
01524   case ARM::VLDRD:
01525   case ARM::VLDRS:
01526   case ARM::t2LDRi8:
01527   case ARM::t2LDRBi8:
01528   case ARM::t2LDRSHi8:
01529   case ARM::t2LDRi12:
01530   case ARM::t2LDRBi12:
01531   case ARM::t2LDRSHi12:
01532     break;
01533   }
01534 
01535   // Check if base addresses and chain operands match.
01536   if (Load1->getOperand(0) != Load2->getOperand(0) ||
01537       Load1->getOperand(4) != Load2->getOperand(4))
01538     return false;
01539 
01540   // Index should be Reg0.
01541   if (Load1->getOperand(3) != Load2->getOperand(3))
01542     return false;
01543 
01544   // Determine the offsets.
01545   if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
01546       isa<ConstantSDNode>(Load2->getOperand(1))) {
01547     Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
01548     Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
01549     return true;
01550   }
01551 
01552   return false;
01553 }
01554 
01555 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
01556 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
01557 /// be scheduled togther. On some targets if two loads are loading from
01558 /// addresses in the same cache line, it's better if they are scheduled
01559 /// together. This function takes two integers that represent the load offsets
01560 /// from the common base address. It returns true if it decides it's desirable
01561 /// to schedule the two loads together. "NumLoads" is the number of loads that
01562 /// have already been scheduled after Load1.
01563 ///
01564 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
01565 /// is permanently disabled.
01566 bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
01567                                                int64_t Offset1, int64_t Offset2,
01568                                                unsigned NumLoads) const {
01569   // Don't worry about Thumb: just ARM and Thumb2.
01570   if (Subtarget.isThumb1Only()) return false;
01571 
01572   assert(Offset2 > Offset1);
01573 
01574   if ((Offset2 - Offset1) / 8 > 64)
01575     return false;
01576 
01577   // Check if the machine opcodes are different. If they are different
01578   // then we consider them to not be of the same base address,
01579   // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
01580   // In this case, they are considered to be the same because they are different
01581   // encoding forms of the same basic instruction.
01582   if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
01583       !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
01584          Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
01585         (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
01586          Load2->getMachineOpcode() == ARM::t2LDRBi8)))
01587     return false;  // FIXME: overly conservative?
01588 
01589   // Four loads in a row should be sufficient.
01590   if (NumLoads >= 3)
01591     return false;
01592 
01593   return true;
01594 }
01595 
01596 bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
01597                                             const MachineBasicBlock *MBB,
01598                                             const MachineFunction &MF) const {
01599   // Debug info is never a scheduling boundary. It's necessary to be explicit
01600   // due to the special treatment of IT instructions below, otherwise a
01601   // dbg_value followed by an IT will result in the IT instruction being
01602   // considered a scheduling hazard, which is wrong. It should be the actual
01603   // instruction preceding the dbg_value instruction(s), just like it is
01604   // when debug info is not present.
01605   if (MI->isDebugValue())
01606     return false;
01607 
01608   // Terminators and labels can't be scheduled around.
01609   if (MI->isTerminator() || MI->isPosition())
01610     return true;
01611 
01612   // Treat the start of the IT block as a scheduling boundary, but schedule
01613   // t2IT along with all instructions following it.
01614   // FIXME: This is a big hammer. But the alternative is to add all potential
01615   // true and anti dependencies to IT block instructions as implicit operands
01616   // to the t2IT instruction. The added compile time and complexity does not
01617   // seem worth it.
01618   MachineBasicBlock::const_iterator I = MI;
01619   // Make sure to skip any dbg_value instructions
01620   while (++I != MBB->end() && I->isDebugValue())
01621     ;
01622   if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
01623     return true;
01624 
01625   // Don't attempt to schedule around any instruction that defines
01626   // a stack-oriented pointer, as it's unlikely to be profitable. This
01627   // saves compile time, because it doesn't require every single
01628   // stack slot reference to depend on the instruction that does the
01629   // modification.
01630   // Calls don't actually change the stack pointer, even if they have imp-defs.
01631   // No ARM calling conventions change the stack pointer. (X86 calling
01632   // conventions sometimes do).
01633   if (!MI->isCall() && MI->definesRegister(ARM::SP))
01634     return true;
01635 
01636   return false;
01637 }
01638 
01639 bool ARMBaseInstrInfo::
01640 isProfitableToIfCvt(MachineBasicBlock &MBB,
01641                     unsigned NumCycles, unsigned ExtraPredCycles,
01642                     const BranchProbability &Probability) const {
01643   if (!NumCycles)
01644     return false;
01645 
01646   // Attempt to estimate the relative costs of predication versus branching.
01647   unsigned UnpredCost = Probability.getNumerator() * NumCycles;
01648   UnpredCost /= Probability.getDenominator();
01649   UnpredCost += 1; // The branch itself
01650   UnpredCost += Subtarget.getMispredictionPenalty() / 10;
01651 
01652   return (NumCycles + ExtraPredCycles) <= UnpredCost;
01653 }
01654 
01655 bool ARMBaseInstrInfo::
01656 isProfitableToIfCvt(MachineBasicBlock &TMBB,
01657                     unsigned TCycles, unsigned TExtra,
01658                     MachineBasicBlock &FMBB,
01659                     unsigned FCycles, unsigned FExtra,
01660                     const BranchProbability &Probability) const {
01661   if (!TCycles || !FCycles)
01662     return false;
01663 
01664   // Attempt to estimate the relative costs of predication versus branching.
01665   unsigned TUnpredCost = Probability.getNumerator() * TCycles;
01666   TUnpredCost /= Probability.getDenominator();
01667 
01668   uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
01669   unsigned FUnpredCost = Comp * FCycles;
01670   FUnpredCost /= Probability.getDenominator();
01671 
01672   unsigned UnpredCost = TUnpredCost + FUnpredCost;
01673   UnpredCost += 1; // The branch itself
01674   UnpredCost += Subtarget.getMispredictionPenalty() / 10;
01675 
01676   return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
01677 }
01678 
01679 bool
01680 ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
01681                                             MachineBasicBlock &FMBB) const {
01682   // Reduce false anti-dependencies to let Swift's out-of-order execution
01683   // engine do its thing.
01684   return Subtarget.isSwift();
01685 }
01686 
01687 /// getInstrPredicate - If instruction is predicated, returns its predicate
01688 /// condition, otherwise returns AL. It also returns the condition code
01689 /// register by reference.
01690 ARMCC::CondCodes
01691 llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
01692   int PIdx = MI->findFirstPredOperandIdx();
01693   if (PIdx == -1) {
01694     PredReg = 0;
01695     return ARMCC::AL;
01696   }
01697 
01698   PredReg = MI->getOperand(PIdx+1).getReg();
01699   return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
01700 }
01701 
01702 
01703 int llvm::getMatchingCondBranchOpcode(int Opc) {
01704   if (Opc == ARM::B)
01705     return ARM::Bcc;
01706   if (Opc == ARM::tB)
01707     return ARM::tBcc;
01708   if (Opc == ARM::t2B)
01709     return ARM::t2Bcc;
01710 
01711   llvm_unreachable("Unknown unconditional branch opcode!");
01712 }
01713 
01714 /// commuteInstruction - Handle commutable instructions.
01715 MachineInstr *
01716 ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
01717   switch (MI->getOpcode()) {
01718   case ARM::MOVCCr:
01719   case ARM::t2MOVCCr: {
01720     // MOVCC can be commuted by inverting the condition.
01721     unsigned PredReg = 0;
01722     ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
01723     // MOVCC AL can't be inverted. Shouldn't happen.
01724     if (CC == ARMCC::AL || PredReg != ARM::CPSR)
01725       return nullptr;
01726     MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
01727     if (!MI)
01728       return nullptr;
01729     // After swapping the MOVCC operands, also invert the condition.
01730     MI->getOperand(MI->findFirstPredOperandIdx())
01731       .setImm(ARMCC::getOppositeCondition(CC));
01732     return MI;
01733   }
01734   }
01735   return TargetInstrInfo::commuteInstruction(MI, NewMI);
01736 }
01737 
01738 /// Identify instructions that can be folded into a MOVCC instruction, and
01739 /// return the defining instruction.
01740 static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
01741                                       const MachineRegisterInfo &MRI,
01742                                       const TargetInstrInfo *TII) {
01743   if (!TargetRegisterInfo::isVirtualRegister(Reg))
01744     return nullptr;
01745   if (!MRI.hasOneNonDBGUse(Reg))
01746     return nullptr;
01747   MachineInstr *MI = MRI.getVRegDef(Reg);
01748   if (!MI)
01749     return nullptr;
01750   // MI is folded into the MOVCC by predicating it.
01751   if (!MI->isPredicable())
01752     return nullptr;
01753   // Check if MI has any non-dead defs or physreg uses. This also detects
01754   // predicated instructions which will be reading CPSR.
01755   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
01756     const MachineOperand &MO = MI->getOperand(i);
01757     // Reject frame index operands, PEI can't handle the predicated pseudos.
01758     if (MO.isFI() || MO.isCPI() || MO.isJTI())
01759       return nullptr;
01760     if (!MO.isReg())
01761       continue;
01762     // MI can't have any tied operands, that would conflict with predication.
01763     if (MO.isTied())
01764       return nullptr;
01765     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
01766       return nullptr;
01767     if (MO.isDef() && !MO.isDead())
01768       return nullptr;
01769   }
01770   bool DontMoveAcrossStores = true;
01771   if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ nullptr,
01772                         DontMoveAcrossStores))
01773     return nullptr;
01774   return MI;
01775 }
01776 
01777 bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
01778                                      SmallVectorImpl<MachineOperand> &Cond,
01779                                      unsigned &TrueOp, unsigned &FalseOp,
01780                                      bool &Optimizable) const {
01781   assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
01782          "Unknown select instruction");
01783   // MOVCC operands:
01784   // 0: Def.
01785   // 1: True use.
01786   // 2: False use.
01787   // 3: Condition code.
01788   // 4: CPSR use.
01789   TrueOp = 1;
01790   FalseOp = 2;
01791   Cond.push_back(MI->getOperand(3));
01792   Cond.push_back(MI->getOperand(4));
01793   // We can always fold a def.
01794   Optimizable = true;
01795   return false;
01796 }
01797 
01798 MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
01799                                                bool PreferFalse) const {
01800   assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
01801          "Unknown select instruction");
01802   MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
01803   MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
01804   bool Invert = !DefMI;
01805   if (!DefMI)
01806     DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this);
01807   if (!DefMI)
01808     return nullptr;
01809 
01810   // Find new register class to use.
01811   MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
01812   unsigned       DestReg  = MI->getOperand(0).getReg();
01813   const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
01814   if (!MRI.constrainRegClass(DestReg, PreviousClass))
01815     return nullptr;
01816 
01817   // Create a new predicated version of DefMI.
01818   // Rfalse is the first use.
01819   MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
01820                                       DefMI->getDesc(), DestReg);
01821 
01822   // Copy all the DefMI operands, excluding its (null) predicate.
01823   const MCInstrDesc &DefDesc = DefMI->getDesc();
01824   for (unsigned i = 1, e = DefDesc.getNumOperands();
01825        i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
01826     NewMI.addOperand(DefMI->getOperand(i));
01827 
01828   unsigned CondCode = MI->getOperand(3).getImm();
01829   if (Invert)
01830     NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
01831   else
01832     NewMI.addImm(CondCode);
01833   NewMI.addOperand(MI->getOperand(4));
01834 
01835   // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
01836   if (NewMI->hasOptionalDef())
01837     AddDefaultCC(NewMI);
01838 
01839   // The output register value when the predicate is false is an implicit
01840   // register operand tied to the first def.
01841   // The tie makes the register allocator ensure the FalseReg is allocated the
01842   // same register as operand 0.
01843   FalseReg.setImplicit();
01844   NewMI.addOperand(FalseReg);
01845   NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
01846 
01847   // The caller will erase MI, but not DefMI.
01848   DefMI->eraseFromParent();
01849   return NewMI;
01850 }
01851 
01852 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
01853 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
01854 /// def operand.
01855 ///
01856 /// This will go away once we can teach tblgen how to set the optional CPSR def
01857 /// operand itself.
01858 struct AddSubFlagsOpcodePair {
01859   uint16_t PseudoOpc;
01860   uint16_t MachineOpc;
01861 };
01862 
01863 static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
01864   {ARM::ADDSri, ARM::ADDri},
01865   {ARM::ADDSrr, ARM::ADDrr},
01866   {ARM::ADDSrsi, ARM::ADDrsi},
01867   {ARM::ADDSrsr, ARM::ADDrsr},
01868 
01869   {ARM::SUBSri, ARM::SUBri},
01870   {ARM::SUBSrr, ARM::SUBrr},
01871   {ARM::SUBSrsi, ARM::SUBrsi},
01872   {ARM::SUBSrsr, ARM::SUBrsr},
01873 
01874   {ARM::RSBSri, ARM::RSBri},
01875   {ARM::RSBSrsi, ARM::RSBrsi},
01876   {ARM::RSBSrsr, ARM::RSBrsr},
01877 
01878   {ARM::t2ADDSri, ARM::t2ADDri},
01879   {ARM::t2ADDSrr, ARM::t2ADDrr},
01880   {ARM::t2ADDSrs, ARM::t2ADDrs},
01881 
01882   {ARM::t2SUBSri, ARM::t2SUBri},
01883   {ARM::t2SUBSrr, ARM::t2SUBrr},
01884   {ARM::t2SUBSrs, ARM::t2SUBrs},
01885 
01886   {ARM::t2RSBSri, ARM::t2RSBri},
01887   {ARM::t2RSBSrs, ARM::t2RSBrs},
01888 };
01889 
01890 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
01891   for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
01892     if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
01893       return AddSubFlagsOpcodeMap[i].MachineOpc;
01894   return 0;
01895 }
01896 
01897 void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
01898                                MachineBasicBlock::iterator &MBBI, DebugLoc dl,
01899                                unsigned DestReg, unsigned BaseReg, int NumBytes,
01900                                ARMCC::CondCodes Pred, unsigned PredReg,
01901                                const ARMBaseInstrInfo &TII, unsigned MIFlags) {
01902   if (NumBytes == 0 && DestReg != BaseReg) {
01903     BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
01904       .addReg(BaseReg, RegState::Kill)
01905       .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
01906       .setMIFlags(MIFlags);
01907     return;
01908   }
01909 
01910   bool isSub = NumBytes < 0;
01911   if (isSub) NumBytes = -NumBytes;
01912 
01913   while (NumBytes) {
01914     unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
01915     unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
01916     assert(ThisVal && "Didn't extract field correctly");
01917 
01918     // We will handle these bits from offset, clear them.
01919     NumBytes &= ~ThisVal;
01920 
01921     assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
01922 
01923     // Build the new ADD / SUB.
01924     unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
01925     BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
01926       .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
01927       .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
01928       .setMIFlags(MIFlags);
01929     BaseReg = DestReg;
01930   }
01931 }
01932 
01933 static bool isAnySubRegLive(unsigned Reg, const TargetRegisterInfo *TRI,
01934                       MachineInstr *MI) {
01935   for (MCSubRegIterator Subreg(Reg, TRI, /* IncludeSelf */ true);
01936        Subreg.isValid(); ++Subreg)
01937     if (MI->getParent()->computeRegisterLiveness(TRI, *Subreg, MI) !=
01938         MachineBasicBlock::LQR_Dead)
01939       return true;
01940   return false;
01941 }
01942 bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
01943                                       MachineFunction &MF, MachineInstr *MI,
01944                                       unsigned NumBytes) {
01945   // This optimisation potentially adds lots of load and store
01946   // micro-operations, it's only really a great benefit to code-size.
01947   if (!MF.getFunction()->getAttributes().hasAttribute(
01948           AttributeSet::FunctionIndex, Attribute::MinSize))
01949     return false;
01950 
01951   // If only one register is pushed/popped, LLVM can use an LDR/STR
01952   // instead. We can't modify those so make sure we're dealing with an
01953   // instruction we understand.
01954   bool IsPop = isPopOpcode(MI->getOpcode());
01955   bool IsPush = isPushOpcode(MI->getOpcode());
01956   if (!IsPush && !IsPop)
01957     return false;
01958 
01959   bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
01960                       MI->getOpcode() == ARM::VLDMDIA_UPD;
01961   bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
01962                      MI->getOpcode() == ARM::tPOP ||
01963                      MI->getOpcode() == ARM::tPOP_RET;
01964 
01965   assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
01966                           MI->getOperand(1).getReg() == ARM::SP)) &&
01967          "trying to fold sp update into non-sp-updating push/pop");
01968 
01969   // The VFP push & pop act on D-registers, so we can only fold an adjustment
01970   // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
01971   // if this is violated.
01972   if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
01973     return false;
01974 
01975   // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
01976   // pred) so the list starts at 4. Thumb1 starts after the predicate.
01977   int RegListIdx = IsT1PushPop ? 2 : 4;
01978 
01979   // Calculate the space we'll need in terms of registers.
01980   unsigned FirstReg = MI->getOperand(RegListIdx).getReg();
01981   unsigned RD0Reg, RegsNeeded;
01982   if (IsVFPPushPop) {
01983     RD0Reg = ARM::D0;
01984     RegsNeeded = NumBytes / 8;
01985   } else {
01986     RD0Reg = ARM::R0;
01987     RegsNeeded = NumBytes / 4;
01988   }
01989 
01990   // We're going to have to strip all list operands off before
01991   // re-adding them since the order matters, so save the existing ones
01992   // for later.
01993   SmallVector<MachineOperand, 4> RegList;
01994   for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
01995     RegList.push_back(MI->getOperand(i));
01996 
01997   const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
01998   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
01999 
02000   // Now try to find enough space in the reglist to allocate NumBytes.
02001   for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded;
02002        --CurReg) {
02003     if (!IsPop) {
02004       // Pushing any register is completely harmless, mark the
02005       // register involved as undef since we don't care about it in
02006       // the slightest.
02007       RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
02008                                                   false, false, true));
02009       --RegsNeeded;
02010       continue;
02011     }
02012 
02013     // However, we can only pop an extra register if it's not live. For
02014     // registers live within the function we might clobber a return value
02015     // register; the other way a register can be live here is if it's
02016     // callee-saved.
02017     // TODO: Currently, computeRegisterLiveness() does not report "live" if a
02018     // sub reg is live. When computeRegisterLiveness() works for sub reg, it
02019     // can replace isAnySubRegLive().
02020     if (isCalleeSavedRegister(CurReg, CSRegs) ||
02021         isAnySubRegLive(CurReg, TRI, MI)) {
02022       // VFP pops don't allow holes in the register list, so any skip is fatal
02023       // for our transformation. GPR pops do, so we should just keep looking.
02024       if (IsVFPPushPop)
02025         return false;
02026       else
02027         continue;
02028     }
02029 
02030     // Mark the unimportant registers as <def,dead> in the POP.
02031     RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
02032                                                 true));
02033     --RegsNeeded;
02034   }
02035 
02036   if (RegsNeeded > 0)
02037     return false;
02038 
02039   // Finally we know we can profitably perform the optimisation so go
02040   // ahead: strip all existing registers off and add them back again
02041   // in the right order.
02042   for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
02043     MI->RemoveOperand(i);
02044 
02045   // Add the complete list back in.
02046   MachineInstrBuilder MIB(MF, &*MI);
02047   for (int i = RegList.size() - 1; i >= 0; --i)
02048     MIB.addOperand(RegList[i]);
02049 
02050   return true;
02051 }
02052 
02053 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
02054                                 unsigned FrameReg, int &Offset,
02055                                 const ARMBaseInstrInfo &TII) {
02056   unsigned Opcode = MI.getOpcode();
02057   const MCInstrDesc &Desc = MI.getDesc();
02058   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
02059   bool isSub = false;
02060 
02061   // Memory operands in inline assembly always use AddrMode2.
02062   if (Opcode == ARM::INLINEASM)
02063     AddrMode = ARMII::AddrMode2;
02064 
02065   if (Opcode == ARM::ADDri) {
02066     Offset += MI.getOperand(FrameRegIdx+1).getImm();
02067     if (Offset == 0) {
02068       // Turn it into a move.
02069       MI.setDesc(TII.get(ARM::MOVr));
02070       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
02071       MI.RemoveOperand(FrameRegIdx+1);
02072       Offset = 0;
02073       return true;
02074     } else if (Offset < 0) {
02075       Offset = -Offset;
02076       isSub = true;
02077       MI.setDesc(TII.get(ARM::SUBri));
02078     }
02079 
02080     // Common case: small offset, fits into instruction.
02081     if (ARM_AM::getSOImmVal(Offset) != -1) {
02082       // Replace the FrameIndex with sp / fp
02083       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
02084       MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
02085       Offset = 0;
02086       return true;
02087     }
02088 
02089     // Otherwise, pull as much of the immedidate into this ADDri/SUBri
02090     // as possible.
02091     unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
02092     unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
02093 
02094     // We will handle these bits from offset, clear them.
02095     Offset &= ~ThisImmVal;
02096 
02097     // Get the properly encoded SOImmVal field.
02098     assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
02099            "Bit extraction didn't work?");
02100     MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
02101  } else {
02102     unsigned ImmIdx = 0;
02103     int InstrOffs = 0;
02104     unsigned NumBits = 0;
02105     unsigned Scale = 1;
02106     switch (AddrMode) {
02107     case ARMII::AddrMode_i12: {
02108       ImmIdx = FrameRegIdx + 1;
02109       InstrOffs = MI.getOperand(ImmIdx).getImm();
02110       NumBits = 12;
02111       break;
02112     }
02113     case ARMII::AddrMode2: {
02114       ImmIdx = FrameRegIdx+2;
02115       InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
02116       if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
02117         InstrOffs *= -1;
02118       NumBits = 12;
02119       break;
02120     }
02121     case ARMII::AddrMode3: {
02122       ImmIdx = FrameRegIdx+2;
02123       InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
02124       if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
02125         InstrOffs *= -1;
02126       NumBits = 8;
02127       break;
02128     }
02129     case ARMII::AddrMode4:
02130     case ARMII::AddrMode6:
02131       // Can't fold any offset even if it's zero.
02132       return false;
02133     case ARMII::AddrMode5: {
02134       ImmIdx = FrameRegIdx+1;
02135       InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
02136       if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
02137         InstrOffs *= -1;
02138       NumBits = 8;
02139       Scale = 4;
02140       break;
02141     }
02142     default:
02143       llvm_unreachable("Unsupported addressing mode!");
02144     }
02145 
02146     Offset += InstrOffs * Scale;
02147     assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
02148     if (Offset < 0) {
02149       Offset = -Offset;
02150       isSub = true;
02151     }
02152 
02153     // Attempt to fold address comp. if opcode has offset bits
02154     if (NumBits > 0) {
02155       // Common case: small offset, fits into instruction.
02156       MachineOperand &ImmOp = MI.getOperand(ImmIdx);
02157       int ImmedOffset = Offset / Scale;
02158       unsigned Mask = (1 << NumBits) - 1;
02159       if ((unsigned)Offset <= Mask * Scale) {
02160         // Replace the FrameIndex with sp
02161         MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
02162         // FIXME: When addrmode2 goes away, this will simplify (like the
02163         // T2 version), as the LDR.i12 versions don't need the encoding
02164         // tricks for the offset value.
02165         if (isSub) {
02166           if (AddrMode == ARMII::AddrMode_i12)
02167             ImmedOffset = -ImmedOffset;
02168           else
02169             ImmedOffset |= 1 << NumBits;
02170         }
02171         ImmOp.ChangeToImmediate(ImmedOffset);
02172         Offset = 0;
02173         return true;
02174       }
02175 
02176       // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
02177       ImmedOffset = ImmedOffset & Mask;
02178       if (isSub) {
02179         if (AddrMode == ARMII::AddrMode_i12)
02180           ImmedOffset = -ImmedOffset;
02181         else
02182           ImmedOffset |= 1 << NumBits;
02183       }
02184       ImmOp.ChangeToImmediate(ImmedOffset);
02185       Offset &= ~(Mask*Scale);
02186     }
02187   }
02188 
02189   Offset = (isSub) ? -Offset : Offset;
02190   return Offset == 0;
02191 }
02192 
02193 /// analyzeCompare - For a comparison instruction, return the source registers
02194 /// in SrcReg and SrcReg2 if having two register operands, and the value it
02195 /// compares against in CmpValue. Return true if the comparison instruction
02196 /// can be analyzed.
02197 bool ARMBaseInstrInfo::
02198 analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
02199                int &CmpMask, int &CmpValue) const {
02200   switch (MI->getOpcode()) {
02201   default: break;
02202   case ARM::CMPri:
02203   case ARM::t2CMPri:
02204     SrcReg = MI->getOperand(0).getReg();
02205     SrcReg2 = 0;
02206     CmpMask = ~0;
02207     CmpValue = MI->getOperand(1).getImm();
02208     return true;
02209   case ARM::CMPrr:
02210   case ARM::t2CMPrr:
02211     SrcReg = MI->getOperand(0).getReg();
02212     SrcReg2 = MI->getOperand(1).getReg();
02213     CmpMask = ~0;
02214     CmpValue = 0;
02215     return true;
02216   case ARM::TSTri:
02217   case ARM::t2TSTri:
02218     SrcReg = MI->getOperand(0).getReg();
02219     SrcReg2 = 0;
02220     CmpMask = MI->getOperand(1).getImm();
02221     CmpValue = 0;
02222     return true;
02223   }
02224 
02225   return false;
02226 }
02227 
02228 /// isSuitableForMask - Identify a suitable 'and' instruction that
02229 /// operates on the given source register and applies the same mask
02230 /// as a 'tst' instruction. Provide a limited look-through for copies.
02231 /// When successful, MI will hold the found instruction.
02232 static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
02233                               int CmpMask, bool CommonUse) {
02234   switch (MI->getOpcode()) {
02235     case ARM::ANDri:
02236     case ARM::t2ANDri:
02237       if (CmpMask != MI->getOperand(2).getImm())
02238         return false;
02239       if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
02240         return true;
02241       break;
02242     case ARM::COPY: {
02243       // Walk down one instruction which is potentially an 'and'.
02244       const MachineInstr &Copy = *MI;
02245       MachineBasicBlock::iterator AND(
02246         std::next(MachineBasicBlock::iterator(MI)));
02247       if (AND == MI->getParent()->end()) return false;
02248       MI = AND;
02249       return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
02250                                CmpMask, true);
02251     }
02252   }
02253 
02254   return false;
02255 }
02256 
02257 /// getSwappedCondition - assume the flags are set by MI(a,b), return
02258 /// the condition code if we modify the instructions such that flags are
02259 /// set by MI(b,a).
02260 inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
02261   switch (CC) {
02262   default: return ARMCC::AL;
02263   case ARMCC::EQ: return ARMCC::EQ;
02264   case ARMCC::NE: return ARMCC::NE;
02265   case ARMCC::HS: return ARMCC::LS;
02266   case ARMCC::LO: return ARMCC::HI;
02267   case ARMCC::HI: return ARMCC::LO;
02268   case ARMCC::LS: return ARMCC::HS;
02269   case ARMCC::GE: return ARMCC::LE;
02270   case ARMCC::LT: return ARMCC::GT;
02271   case ARMCC::GT: return ARMCC::LT;
02272   case ARMCC::LE: return ARMCC::GE;
02273   }
02274 }
02275 
02276 /// isRedundantFlagInstr - check whether the first instruction, whose only
02277 /// purpose is to update flags, can be made redundant.
02278 /// CMPrr can be made redundant by SUBrr if the operands are the same.
02279 /// CMPri can be made redundant by SUBri if the operands are the same.
02280 /// This function can be extended later on.
02281 inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
02282                                         unsigned SrcReg2, int ImmValue,
02283                                         MachineInstr *OI) {
02284   if ((CmpI->getOpcode() == ARM::CMPrr ||
02285        CmpI->getOpcode() == ARM::t2CMPrr) &&
02286       (OI->getOpcode() == ARM::SUBrr ||
02287        OI->getOpcode() == ARM::t2SUBrr) &&
02288       ((OI->getOperand(1).getReg() == SrcReg &&
02289         OI->getOperand(2).getReg() == SrcReg2) ||
02290        (OI->getOperand(1).getReg() == SrcReg2 &&
02291         OI->getOperand(2).getReg() == SrcReg)))
02292     return true;
02293 
02294   if ((CmpI->getOpcode() == ARM::CMPri ||
02295        CmpI->getOpcode() == ARM::t2CMPri) &&
02296       (OI->getOpcode() == ARM::SUBri ||
02297        OI->getOpcode() == ARM::t2SUBri) &&
02298       OI->getOperand(1).getReg() == SrcReg &&
02299       OI->getOperand(2).getImm() == ImmValue)
02300     return true;
02301   return false;
02302 }
02303 
02304 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
02305 /// comparison into one that sets the zero bit in the flags register;
02306 /// Remove a redundant Compare instruction if an earlier instruction can set the
02307 /// flags in the same way as Compare.
02308 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
02309 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
02310 /// condition code of instructions which use the flags.
02311 bool ARMBaseInstrInfo::
02312 optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
02313                      int CmpMask, int CmpValue,
02314                      const MachineRegisterInfo *MRI) const {
02315   // Get the unique definition of SrcReg.
02316   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
02317   if (!MI) return false;
02318 
02319   // Masked compares sometimes use the same register as the corresponding 'and'.
02320   if (CmpMask != ~0) {
02321     if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) {
02322       MI = nullptr;
02323       for (MachineRegisterInfo::use_instr_iterator
02324            UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
02325            UI != UE; ++UI) {
02326         if (UI->getParent() != CmpInstr->getParent()) continue;
02327         MachineInstr *PotentialAND = &*UI;
02328         if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
02329             isPredicated(PotentialAND))
02330           continue;
02331         MI = PotentialAND;
02332         break;
02333       }
02334       if (!MI) return false;
02335     }
02336   }
02337 
02338   // Get ready to iterate backward from CmpInstr.
02339   MachineBasicBlock::iterator I = CmpInstr, E = MI,
02340                               B = CmpInstr->getParent()->begin();
02341 
02342   // Early exit if CmpInstr is at the beginning of the BB.
02343   if (I == B) return false;
02344 
02345   // There are two possible candidates which can be changed to set CPSR:
02346   // One is MI, the other is a SUB instruction.
02347   // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
02348   // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
02349   MachineInstr *Sub = nullptr;
02350   if (SrcReg2 != 0)
02351     // MI is not a candidate for CMPrr.
02352     MI = nullptr;
02353   else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
02354     // Conservatively refuse to convert an instruction which isn't in the same
02355     // BB as the comparison.
02356     // For CMPri, we need to check Sub, thus we can't return here.
02357     if (CmpInstr->getOpcode() == ARM::CMPri ||
02358        CmpInstr->getOpcode() == ARM::t2CMPri)
02359       MI = nullptr;
02360     else
02361       return false;
02362   }
02363 
02364   // Check that CPSR isn't set between the comparison instruction and the one we
02365   // want to change. At the same time, search for Sub.
02366   const TargetRegisterInfo *TRI = &getRegisterInfo();
02367   --I;
02368   for (; I != E; --I) {
02369     const MachineInstr &Instr = *I;
02370 
02371     if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
02372         Instr.readsRegister(ARM::CPSR, TRI))
02373       // This instruction modifies or uses CPSR after the one we want to
02374       // change. We can't do this transformation.
02375       return false;
02376 
02377     // Check whether CmpInstr can be made redundant by the current instruction.
02378     if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
02379       Sub = &*I;
02380       break;
02381     }
02382 
02383     if (I == B)
02384       // The 'and' is below the comparison instruction.
02385       return false;
02386   }
02387 
02388   // Return false if no candidates exist.
02389   if (!MI && !Sub)
02390     return false;
02391 
02392   // The single candidate is called MI.
02393   if (!MI) MI = Sub;
02394 
02395   // We can't use a predicated instruction - it doesn't always write the flags.
02396   if (isPredicated(MI))
02397     return false;
02398 
02399   switch (MI->getOpcode()) {
02400   default: break;
02401   case ARM::RSBrr:
02402   case ARM::RSBri:
02403   case ARM::RSCrr:
02404   case ARM::RSCri:
02405   case ARM::ADDrr:
02406   case ARM::ADDri:
02407   case ARM::ADCrr:
02408   case ARM::ADCri:
02409   case ARM::SUBrr:
02410   case ARM::SUBri:
02411   case ARM::SBCrr:
02412   case ARM::SBCri:
02413   case ARM::t2RSBri:
02414   case ARM::t2ADDrr:
02415   case ARM::t2ADDri:
02416   case ARM::t2ADCrr:
02417   case ARM::t2ADCri:
02418   case ARM::t2SUBrr:
02419   case ARM::t2SUBri:
02420   case ARM::t2SBCrr:
02421   case ARM::t2SBCri:
02422   case ARM::ANDrr:
02423   case ARM::ANDri:
02424   case ARM::t2ANDrr:
02425   case ARM::t2ANDri:
02426   case ARM::ORRrr:
02427   case ARM::ORRri:
02428   case ARM::t2ORRrr:
02429   case ARM::t2ORRri:
02430   case ARM::EORrr:
02431   case ARM::EORri:
02432   case ARM::t2EORrr:
02433   case ARM::t2EORri: {
02434     // Scan forward for the use of CPSR
02435     // When checking against MI: if it's a conditional code requires
02436     // checking of V bit, then this is not safe to do.
02437     // It is safe to remove CmpInstr if CPSR is redefined or killed.
02438     // If we are done with the basic block, we need to check whether CPSR is
02439     // live-out.
02440     SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
02441         OperandsToUpdate;
02442     bool isSafe = false;
02443     I = CmpInstr;
02444     E = CmpInstr->getParent()->end();
02445     while (!isSafe && ++I != E) {
02446       const MachineInstr &Instr = *I;
02447       for (unsigned IO = 0, EO = Instr.getNumOperands();
02448            !isSafe && IO != EO; ++IO) {
02449         const MachineOperand &MO = Instr.getOperand(IO);
02450         if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
02451           isSafe = true;
02452           break;
02453         }
02454         if (!MO.isReg() || MO.getReg() != ARM::CPSR)
02455           continue;
02456         if (MO.isDef()) {
02457           isSafe = true;
02458           break;
02459         }
02460         // Condition code is after the operand before CPSR except for VSELs.
02461         ARMCC::CondCodes CC;
02462         bool IsInstrVSel = true;
02463         switch (Instr.getOpcode()) {
02464         default:
02465           IsInstrVSel = false;
02466           CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
02467           break;
02468         case ARM::VSELEQD:
02469         case ARM::VSELEQS:
02470           CC = ARMCC::EQ;
02471           break;
02472         case ARM::VSELGTD:
02473         case ARM::VSELGTS:
02474           CC = ARMCC::GT;
02475           break;
02476         case ARM::VSELGED:
02477         case ARM::VSELGES:
02478           CC = ARMCC::GE;
02479           break;
02480         case ARM::VSELVSS:
02481         case ARM::VSELVSD:
02482           CC = ARMCC::VS;
02483           break;
02484         }
02485 
02486         if (Sub) {
02487           ARMCC::CondCodes NewCC = getSwappedCondition(CC);
02488           if (NewCC == ARMCC::AL)
02489             return false;
02490           // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
02491           // on CMP needs to be updated to be based on SUB.
02492           // Push the condition code operands to OperandsToUpdate.
02493           // If it is safe to remove CmpInstr, the condition code of these
02494           // operands will be modified.
02495           if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
02496               Sub->getOperand(2).getReg() == SrcReg) {
02497             // VSel doesn't support condition code update.
02498             if (IsInstrVSel)
02499               return false;
02500             OperandsToUpdate.push_back(
02501                 std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
02502           }
02503         } else
02504           switch (CC) {
02505           default:
02506             // CPSR can be used multiple times, we should continue.
02507             break;
02508           case ARMCC::VS:
02509           case ARMCC::VC:
02510           case ARMCC::GE:
02511           case ARMCC::LT:
02512           case ARMCC::GT:
02513           case ARMCC::LE:
02514             return false;
02515           }
02516       }
02517     }
02518 
02519     // If CPSR is not killed nor re-defined, we should check whether it is
02520     // live-out. If it is live-out, do not optimize.
02521     if (!isSafe) {
02522       MachineBasicBlock *MBB = CmpInstr->getParent();
02523       for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
02524                SE = MBB->succ_end(); SI != SE; ++SI)
02525         if ((*SI)->isLiveIn(ARM::CPSR))
02526           return false;
02527     }
02528 
02529     // Toggle the optional operand to CPSR.
02530     MI->getOperand(5).setReg(ARM::CPSR);
02531     MI->getOperand(5).setIsDef(true);
02532     assert(!isPredicated(MI) && "Can't use flags from predicated instruction");
02533     CmpInstr->eraseFromParent();
02534 
02535     // Modify the condition code of operands in OperandsToUpdate.
02536     // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
02537     // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
02538     for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
02539       OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
02540     return true;
02541   }
02542   }
02543 
02544   return false;
02545 }
02546 
02547 bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
02548                                      MachineInstr *DefMI, unsigned Reg,
02549                                      MachineRegisterInfo *MRI) const {
02550   // Fold large immediates into add, sub, or, xor.
02551   unsigned DefOpc = DefMI->getOpcode();
02552   if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
02553     return false;
02554   if (!DefMI->getOperand(1).isImm())
02555     // Could be t2MOVi32imm <ga:xx>
02556     return false;
02557 
02558   if (!MRI->hasOneNonDBGUse(Reg))
02559     return false;
02560 
02561   const MCInstrDesc &DefMCID = DefMI->getDesc();
02562   if (DefMCID.hasOptionalDef()) {
02563     unsigned NumOps = DefMCID.getNumOperands();
02564     const MachineOperand &MO = DefMI->getOperand(NumOps-1);
02565     if (MO.getReg() == ARM::CPSR && !MO.isDead())
02566       // If DefMI defines CPSR and it is not dead, it's obviously not safe
02567       // to delete DefMI.
02568       return false;
02569   }
02570 
02571   const MCInstrDesc &UseMCID = UseMI->getDesc();
02572   if (UseMCID.hasOptionalDef()) {
02573     unsigned NumOps = UseMCID.getNumOperands();
02574     if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR)
02575       // If the instruction sets the flag, do not attempt this optimization
02576       // since it may change the semantics of the code.
02577       return false;
02578   }
02579 
02580   unsigned UseOpc = UseMI->getOpcode();
02581   unsigned NewUseOpc = 0;
02582   uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
02583   uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
02584   bool Commute = false;
02585   switch (UseOpc) {
02586   default: return false;
02587   case ARM::SUBrr:
02588   case ARM::ADDrr:
02589   case ARM::ORRrr:
02590   case ARM::EORrr:
02591   case ARM::t2SUBrr:
02592   case ARM::t2ADDrr:
02593   case ARM::t2ORRrr:
02594   case ARM::t2EORrr: {
02595     Commute = UseMI->getOperand(2).getReg() != Reg;
02596     switch (UseOpc) {
02597     default: break;
02598     case ARM::SUBrr: {
02599       if (Commute)
02600         return false;
02601       ImmVal = -ImmVal;
02602       NewUseOpc = ARM::SUBri;
02603       // Fallthrough
02604     }
02605     case ARM::ADDrr:
02606     case ARM::ORRrr:
02607     case ARM::EORrr: {
02608       if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
02609         return false;
02610       SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
02611       SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
02612       switch (UseOpc) {
02613       default: break;
02614       case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
02615       case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
02616       case ARM::EORrr: NewUseOpc = ARM::EORri; break;
02617       }
02618       break;
02619     }
02620     case ARM::t2SUBrr: {
02621       if (Commute)
02622         return false;
02623       ImmVal = -ImmVal;
02624       NewUseOpc = ARM::t2SUBri;
02625       // Fallthrough
02626     }
02627     case ARM::t2ADDrr:
02628     case ARM::t2ORRrr:
02629     case ARM::t2EORrr: {
02630       if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
02631         return false;
02632       SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
02633       SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
02634       switch (UseOpc) {
02635       default: break;
02636       case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
02637       case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
02638       case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
02639       }
02640       break;
02641     }
02642     }
02643   }
02644   }
02645 
02646   unsigned OpIdx = Commute ? 2 : 1;
02647   unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
02648   bool isKill = UseMI->getOperand(OpIdx).isKill();
02649   unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
02650   AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
02651                                       UseMI, UseMI->getDebugLoc(),
02652                                       get(NewUseOpc), NewReg)
02653                               .addReg(Reg1, getKillRegState(isKill))
02654                               .addImm(SOImmValV1)));
02655   UseMI->setDesc(get(NewUseOpc));
02656   UseMI->getOperand(1).setReg(NewReg);
02657   UseMI->getOperand(1).setIsKill();
02658   UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
02659   DefMI->eraseFromParent();
02660   return true;
02661 }
02662 
02663 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
02664                                         const MachineInstr *MI) {
02665   switch (MI->getOpcode()) {
02666   default: {
02667     const MCInstrDesc &Desc = MI->getDesc();
02668     int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
02669     assert(UOps >= 0 && "bad # UOps");
02670     return UOps;
02671   }
02672 
02673   case ARM::LDRrs:
02674   case ARM::LDRBrs:
02675   case ARM::STRrs:
02676   case ARM::STRBrs: {
02677     unsigned ShOpVal = MI->getOperand(3).getImm();
02678     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
02679     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
02680     if (!isSub &&
02681         (ShImm == 0 ||
02682          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
02683           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
02684       return 1;
02685     return 2;
02686   }
02687 
02688   case ARM::LDRH:
02689   case ARM::STRH: {
02690     if (!MI->getOperand(2).getReg())
02691       return 1;
02692 
02693     unsigned ShOpVal = MI->getOperand(3).getImm();
02694     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
02695     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
02696     if (!isSub &&
02697         (ShImm == 0 ||
02698          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
02699           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
02700       return 1;
02701     return 2;
02702   }
02703 
02704   case ARM::LDRSB:
02705   case ARM::LDRSH:
02706     return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2;
02707 
02708   case ARM::LDRSB_POST:
02709   case ARM::LDRSH_POST: {
02710     unsigned Rt = MI->getOperand(0).getReg();
02711     unsigned Rm = MI->getOperand(3).getReg();
02712     return (Rt == Rm) ? 4 : 3;
02713   }
02714 
02715   case ARM::LDR_PRE_REG:
02716   case ARM::LDRB_PRE_REG: {
02717     unsigned Rt = MI->getOperand(0).getReg();
02718     unsigned Rm = MI->getOperand(3).getReg();
02719     if (Rt == Rm)
02720       return 3;
02721     unsigned ShOpVal = MI->getOperand(4).getImm();
02722     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
02723     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
02724     if (!isSub &&
02725         (ShImm == 0 ||
02726          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
02727           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
02728       return 2;
02729     return 3;
02730   }
02731 
02732   case ARM::STR_PRE_REG:
02733   case ARM::STRB_PRE_REG: {
02734     unsigned ShOpVal = MI->getOperand(4).getImm();
02735     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
02736     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
02737     if (!isSub &&
02738         (ShImm == 0 ||
02739          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
02740           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
02741       return 2;
02742     return 3;
02743   }
02744 
02745   case ARM::LDRH_PRE:
02746   case ARM::STRH_PRE: {
02747     unsigned Rt = MI->getOperand(0).getReg();
02748     unsigned Rm = MI->getOperand(3).getReg();
02749     if (!Rm)
02750       return 2;
02751     if (Rt == Rm)
02752       return 3;
02753     return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub)
02754       ? 3 : 2;
02755   }
02756 
02757   case ARM::LDR_POST_REG:
02758   case ARM::LDRB_POST_REG:
02759   case ARM::LDRH_POST: {
02760     unsigned Rt = MI->getOperand(0).getReg();
02761     unsigned Rm = MI->getOperand(3).getReg();
02762     return (Rt == Rm) ? 3 : 2;
02763   }
02764 
02765   case ARM::LDR_PRE_IMM:
02766   case ARM::LDRB_PRE_IMM:
02767   case ARM::LDR_POST_IMM:
02768   case ARM::LDRB_POST_IMM:
02769   case ARM::STRB_POST_IMM:
02770   case ARM::STRB_POST_REG:
02771   case ARM::STRB_PRE_IMM:
02772   case ARM::STRH_POST:
02773   case ARM::STR_POST_IMM:
02774   case ARM::STR_POST_REG:
02775   case ARM::STR_PRE_IMM:
02776     return 2;
02777 
02778   case ARM::LDRSB_PRE:
02779   case ARM::LDRSH_PRE: {
02780     unsigned Rm = MI->getOperand(3).getReg();
02781     if (Rm == 0)
02782       return 3;
02783     unsigned Rt = MI->getOperand(0).getReg();
02784     if (Rt == Rm)
02785       return 4;
02786     unsigned ShOpVal = MI->getOperand(4).getImm();
02787     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
02788     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
02789     if (!isSub &&
02790         (ShImm == 0 ||
02791          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
02792           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
02793       return 3;
02794     return 4;
02795   }
02796 
02797   case ARM::LDRD: {
02798     unsigned Rt = MI->getOperand(0).getReg();
02799     unsigned Rn = MI->getOperand(2).getReg();
02800     unsigned Rm = MI->getOperand(3).getReg();
02801     if (Rm)
02802       return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
02803     return (Rt == Rn) ? 3 : 2;
02804   }
02805 
02806   case ARM::STRD: {
02807     unsigned Rm = MI->getOperand(3).getReg();
02808     if (Rm)
02809       return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
02810     return 2;
02811   }
02812 
02813   case ARM::LDRD_POST:
02814   case ARM::t2LDRD_POST:
02815     return 3;
02816 
02817   case ARM::STRD_POST:
02818   case ARM::t2STRD_POST:
02819     return 4;
02820 
02821   case ARM::LDRD_PRE: {
02822     unsigned Rt = MI->getOperand(0).getReg();
02823     unsigned Rn = MI->getOperand(3).getReg();
02824     unsigned Rm = MI->getOperand(4).getReg();
02825     if (Rm)
02826       return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
02827     return (Rt == Rn) ? 4 : 3;
02828   }
02829 
02830   case ARM::t2LDRD_PRE: {
02831     unsigned Rt = MI->getOperand(0).getReg();
02832     unsigned Rn = MI->getOperand(3).getReg();
02833     return (Rt == Rn) ? 4 : 3;
02834   }
02835 
02836   case ARM::STRD_PRE: {
02837     unsigned Rm = MI->getOperand(4).getReg();
02838     if (Rm)
02839       return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
02840     return 3;
02841   }
02842 
02843   case ARM::t2STRD_PRE:
02844     return 3;
02845 
02846   case ARM::t2LDR_POST:
02847   case ARM::t2LDRB_POST:
02848   case ARM::t2LDRB_PRE:
02849   case ARM::t2LDRSBi12:
02850   case ARM::t2LDRSBi8:
02851   case ARM::t2LDRSBpci:
02852   case ARM::t2LDRSBs:
02853   case ARM::t2LDRH_POST:
02854   case ARM::t2LDRH_PRE:
02855   case ARM::t2LDRSBT:
02856   case ARM::t2LDRSB_POST:
02857   case ARM::t2LDRSB_PRE:
02858   case ARM::t2LDRSH_POST:
02859   case ARM::t2LDRSH_PRE:
02860   case ARM::t2LDRSHi12:
02861   case ARM::t2LDRSHi8:
02862   case ARM::t2LDRSHpci:
02863   case ARM::t2LDRSHs:
02864     return 2;
02865 
02866   case ARM::t2LDRDi8: {
02867     unsigned Rt = MI->getOperand(0).getReg();
02868     unsigned Rn = MI->getOperand(2).getReg();
02869     return (Rt == Rn) ? 3 : 2;
02870   }
02871 
02872   case ARM::t2STRB_POST:
02873   case ARM::t2STRB_PRE:
02874   case ARM::t2STRBs:
02875   case ARM::t2STRDi8:
02876   case ARM::t2STRH_POST:
02877   case ARM::t2STRH_PRE:
02878   case ARM::t2STRHs:
02879   case ARM::t2STR_POST:
02880   case ARM::t2STR_PRE:
02881   case ARM::t2STRs:
02882     return 2;
02883   }
02884 }
02885 
02886 // Return the number of 32-bit words loaded by LDM or stored by STM. If this
02887 // can't be easily determined return 0 (missing MachineMemOperand).
02888 //
02889 // FIXME: The current MachineInstr design does not support relying on machine
02890 // mem operands to determine the width of a memory access. Instead, we expect
02891 // the target to provide this information based on the instruction opcode and
02892 // operands. However, using MachineMemOperand is the best solution now for
02893 // two reasons:
02894 //
02895 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
02896 // operands. This is much more dangerous than using the MachineMemOperand
02897 // sizes because CodeGen passes can insert/remove optional machine operands. In
02898 // fact, it's totally incorrect for preRA passes and appears to be wrong for
02899 // postRA passes as well.
02900 //
02901 // 2) getNumLDMAddresses is only used by the scheduling machine model and any
02902 // machine model that calls this should handle the unknown (zero size) case.
02903 //
02904 // Long term, we should require a target hook that verifies MachineMemOperand
02905 // sizes during MC lowering. That target hook should be local to MC lowering
02906 // because we can't ensure that it is aware of other MI forms. Doing this will
02907 // ensure that MachineMemOperands are correctly propagated through all passes.
02908 unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const {
02909   unsigned Size = 0;
02910   for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
02911          E = MI->memoperands_end(); I != E; ++I) {
02912     Size += (*I)->getSize();
02913   }
02914   return Size / 4;
02915 }
02916 
02917 unsigned
02918 ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
02919                                  const MachineInstr *MI) const {
02920   if (!ItinData || ItinData->isEmpty())
02921     return 1;
02922 
02923   const MCInstrDesc &Desc = MI->getDesc();
02924   unsigned Class = Desc.getSchedClass();
02925   int ItinUOps = ItinData->getNumMicroOps(Class);
02926   if (ItinUOps >= 0) {
02927     if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
02928       return getNumMicroOpsSwiftLdSt(ItinData, MI);
02929 
02930     return ItinUOps;
02931   }
02932 
02933   unsigned Opc = MI->getOpcode();
02934   switch (Opc) {
02935   default:
02936     llvm_unreachable("Unexpected multi-uops instruction!");
02937   case ARM::VLDMQIA:
02938   case ARM::VSTMQIA:
02939     return 2;
02940 
02941   // The number of uOps for load / store multiple are determined by the number
02942   // registers.
02943   //
02944   // On Cortex-A8, each pair of register loads / stores can be scheduled on the
02945   // same cycle. The scheduling for the first load / store must be done
02946   // separately by assuming the address is not 64-bit aligned.
02947   //
02948   // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
02949   // is not 64-bit aligned, then AGU would take an extra cycle.  For VFP / NEON
02950   // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
02951   case ARM::VLDMDIA:
02952   case ARM::VLDMDIA_UPD:
02953   case ARM::VLDMDDB_UPD:
02954   case ARM::VLDMSIA:
02955   case ARM::VLDMSIA_UPD:
02956   case ARM::VLDMSDB_UPD:
02957   case ARM::VSTMDIA:
02958   case ARM::VSTMDIA_UPD:
02959   case ARM::VSTMDDB_UPD:
02960   case ARM::VSTMSIA:
02961   case ARM::VSTMSIA_UPD:
02962   case ARM::VSTMSDB_UPD: {
02963     unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
02964     return (NumRegs / 2) + (NumRegs % 2) + 1;
02965   }
02966 
02967   case ARM::LDMIA_RET:
02968   case ARM::LDMIA:
02969   case ARM::LDMDA:
02970   case ARM::LDMDB:
02971   case ARM::LDMIB:
02972   case ARM::LDMIA_UPD:
02973   case ARM::LDMDA_UPD:
02974   case ARM::LDMDB_UPD:
02975   case ARM::LDMIB_UPD:
02976   case ARM::STMIA:
02977   case ARM::STMDA:
02978   case ARM::STMDB:
02979   case ARM::STMIB:
02980   case ARM::STMIA_UPD:
02981   case ARM::STMDA_UPD:
02982   case ARM::STMDB_UPD:
02983   case ARM::STMIB_UPD:
02984   case ARM::tLDMIA:
02985   case ARM::tLDMIA_UPD:
02986   case ARM::tSTMIA_UPD:
02987   case ARM::tPOP_RET:
02988   case ARM::tPOP:
02989   case ARM::tPUSH:
02990   case ARM::t2LDMIA_RET:
02991   case ARM::t2LDMIA:
02992   case ARM::t2LDMDB:
02993   case ARM::t2LDMIA_UPD:
02994   case ARM::t2LDMDB_UPD:
02995   case ARM::t2STMIA:
02996   case ARM::t2STMDB:
02997   case ARM::t2STMIA_UPD:
02998   case ARM::t2STMDB_UPD: {
02999     unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
03000     if (Subtarget.isSwift()) {
03001       int UOps = 1 + NumRegs;  // One for address computation, one for each ld / st.
03002       switch (Opc) {
03003       default: break;
03004       case ARM::VLDMDIA_UPD:
03005       case ARM::VLDMDDB_UPD:
03006       case ARM::VLDMSIA_UPD:
03007       case ARM::VLDMSDB_UPD:
03008       case ARM::VSTMDIA_UPD:
03009       case ARM::VSTMDDB_UPD:
03010       case ARM::VSTMSIA_UPD:
03011       case ARM::VSTMSDB_UPD:
03012       case ARM::LDMIA_UPD:
03013       case ARM::LDMDA_UPD:
03014       case ARM::LDMDB_UPD:
03015       case ARM::LDMIB_UPD:
03016       case ARM::STMIA_UPD:
03017       case ARM::STMDA_UPD:
03018       case ARM::STMDB_UPD:
03019       case ARM::STMIB_UPD:
03020       case ARM::tLDMIA_UPD:
03021       case ARM::tSTMIA_UPD:
03022       case ARM::t2LDMIA_UPD:
03023       case ARM::t2LDMDB_UPD:
03024       case ARM::t2STMIA_UPD:
03025       case ARM::t2STMDB_UPD:
03026         ++UOps; // One for base register writeback.
03027         break;
03028       case ARM::LDMIA_RET:
03029       case ARM::tPOP_RET:
03030       case ARM::t2LDMIA_RET:
03031         UOps += 2; // One for base reg wb, one for write to pc.
03032         break;
03033       }
03034       return UOps;
03035     } else if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
03036       if (NumRegs < 4)
03037         return 2;
03038       // 4 registers would be issued: 2, 2.
03039       // 5 registers would be issued: 2, 2, 1.
03040       int A8UOps = (NumRegs / 2);
03041       if (NumRegs % 2)
03042         ++A8UOps;
03043       return A8UOps;
03044     } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
03045       int A9UOps = (NumRegs / 2);
03046       // If there are odd number of registers or if it's not 64-bit aligned,
03047       // then it takes an extra AGU (Address Generation Unit) cycle.
03048       if ((NumRegs % 2) ||
03049           !MI->hasOneMemOperand() ||
03050           (*MI->memoperands_begin())->getAlignment() < 8)
03051         ++A9UOps;
03052       return A9UOps;
03053     } else {
03054       // Assume the worst.
03055       return NumRegs;
03056     }
03057   }
03058   }
03059 }
03060 
03061 int
03062 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
03063                                   const MCInstrDesc &DefMCID,
03064                                   unsigned DefClass,
03065                                   unsigned DefIdx, unsigned DefAlign) const {
03066   int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
03067   if (RegNo <= 0)
03068     // Def is the address writeback.
03069     return ItinData->getOperandCycle(DefClass, DefIdx);
03070 
03071   int DefCycle;
03072   if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
03073     // (regno / 2) + (regno % 2) + 1
03074     DefCycle = RegNo / 2 + 1;
03075     if (RegNo % 2)
03076       ++DefCycle;
03077   } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
03078     DefCycle = RegNo;
03079     bool isSLoad = false;
03080 
03081     switch (DefMCID.getOpcode()) {
03082     default: break;
03083     case ARM::VLDMSIA:
03084     case ARM::VLDMSIA_UPD:
03085     case ARM::VLDMSDB_UPD:
03086       isSLoad = true;
03087       break;
03088     }
03089 
03090     // If there are odd number of 'S' registers or if it's not 64-bit aligned,
03091     // then it takes an extra cycle.
03092     if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
03093       ++DefCycle;
03094   } else {
03095     // Assume the worst.
03096     DefCycle = RegNo + 2;
03097   }
03098 
03099   return DefCycle;
03100 }
03101 
03102 int
03103 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
03104                                  const MCInstrDesc &DefMCID,
03105                                  unsigned DefClass,
03106                                  unsigned DefIdx, unsigned DefAlign) const {
03107   int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
03108   if (RegNo <= 0)
03109     // Def is the address writeback.
03110     return ItinData->getOperandCycle(DefClass, DefIdx);
03111 
03112   int DefCycle;
03113   if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
03114     // 4 registers would be issued: 1, 2, 1.
03115     // 5 registers would be issued: 1, 2, 2.
03116     DefCycle = RegNo / 2;
03117     if (DefCycle < 1)
03118       DefCycle = 1;
03119     // Result latency is issue cycle + 2: E2.
03120     DefCycle += 2;
03121   } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
03122     DefCycle = (RegNo / 2);
03123     // If there are odd number of registers or if it's not 64-bit aligned,
03124     // then it takes an extra AGU (Address Generation Unit) cycle.
03125     if ((RegNo % 2) || DefAlign < 8)
03126       ++DefCycle;
03127     // Result latency is AGU cycles + 2.
03128     DefCycle += 2;
03129   } else {
03130     // Assume the worst.
03131     DefCycle = RegNo + 2;
03132   }
03133 
03134   return DefCycle;
03135 }
03136 
03137 int
03138 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
03139                                   const MCInstrDesc &UseMCID,
03140                                   unsigned UseClass,
03141                                   unsigned UseIdx, unsigned UseAlign) const {
03142   int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
03143   if (RegNo <= 0)
03144     return ItinData->getOperandCycle(UseClass, UseIdx);
03145 
03146   int UseCycle;
03147   if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
03148     // (regno / 2) + (regno % 2) + 1
03149     UseCycle = RegNo / 2 + 1;
03150     if (RegNo % 2)
03151       ++UseCycle;
03152   } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
03153     UseCycle = RegNo;
03154     bool isSStore = false;
03155 
03156     switch (UseMCID.getOpcode()) {
03157     default: break;
03158     case ARM::VSTMSIA:
03159     case ARM::VSTMSIA_UPD:
03160     case ARM::VSTMSDB_UPD:
03161       isSStore = true;
03162       break;
03163     }
03164 
03165     // If there are odd number of 'S' registers or if it's not 64-bit aligned,
03166     // then it takes an extra cycle.
03167     if ((isSStore && (RegNo % 2)) || UseAlign < 8)
03168       ++UseCycle;
03169   } else {
03170     // Assume the worst.
03171     UseCycle = RegNo + 2;
03172   }
03173 
03174   return UseCycle;
03175 }
03176 
03177 int
03178 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
03179                                  const MCInstrDesc &UseMCID,
03180                                  unsigned UseClass,
03181                                  unsigned UseIdx, unsigned UseAlign) const {
03182   int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
03183   if (RegNo <= 0)
03184     return ItinData->getOperandCycle(UseClass, UseIdx);
03185 
03186   int UseCycle;
03187   if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
03188     UseCycle = RegNo / 2;
03189     if (UseCycle < 2)
03190       UseCycle = 2;
03191     // Read in E3.
03192     UseCycle += 2;
03193   } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
03194     UseCycle = (RegNo / 2);
03195     // If there are odd number of registers or if it's not 64-bit aligned,
03196     // then it takes an extra AGU (Address Generation Unit) cycle.
03197     if ((RegNo % 2) || UseAlign < 8)
03198       ++UseCycle;
03199   } else {
03200     // Assume the worst.
03201     UseCycle = 1;
03202   }
03203   return UseCycle;
03204 }
03205 
03206 int
03207 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
03208                                     const MCInstrDesc &DefMCID,
03209                                     unsigned DefIdx, unsigned DefAlign,
03210                                     const MCInstrDesc &UseMCID,
03211                                     unsigned UseIdx, unsigned UseAlign) const {
03212   unsigned DefClass = DefMCID.getSchedClass();
03213   unsigned UseClass = UseMCID.getSchedClass();
03214 
03215   if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
03216     return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
03217 
03218   // This may be a def / use of a variable_ops instruction, the operand
03219   // latency might be determinable dynamically. Let the target try to
03220   // figure it out.
03221   int DefCycle = -1;
03222   bool LdmBypass = false;
03223   switch (DefMCID.getOpcode()) {
03224   default:
03225     DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
03226     break;
03227 
03228   case ARM::VLDMDIA:
03229   case ARM::VLDMDIA_UPD:
03230   case ARM::VLDMDDB_UPD:
03231   case ARM::VLDMSIA:
03232   case ARM::VLDMSIA_UPD:
03233   case ARM::VLDMSDB_UPD:
03234     DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
03235     break;
03236 
03237   case ARM::LDMIA_RET:
03238   case ARM::LDMIA:
03239   case ARM::LDMDA:
03240   case ARM::LDMDB:
03241   case ARM::LDMIB:
03242   case ARM::LDMIA_UPD:
03243   case ARM::LDMDA_UPD:
03244   case ARM::LDMDB_UPD:
03245   case ARM::LDMIB_UPD:
03246   case ARM::tLDMIA:
03247   case ARM::tLDMIA_UPD:
03248   case ARM::tPUSH:
03249   case ARM::t2LDMIA_RET:
03250   case ARM::t2LDMIA:
03251   case ARM::t2LDMDB:
03252   case ARM::t2LDMIA_UPD:
03253   case ARM::t2LDMDB_UPD:
03254     LdmBypass = 1;
03255     DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
03256     break;
03257   }
03258 
03259   if (DefCycle == -1)
03260     // We can't seem to determine the result latency of the def, assume it's 2.
03261     DefCycle = 2;
03262 
03263   int UseCycle = -1;
03264   switch (UseMCID.getOpcode()) {
03265   default:
03266     UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
03267     break;
03268 
03269   case ARM::VSTMDIA:
03270   case ARM::VSTMDIA_UPD:
03271   case ARM::VSTMDDB_UPD:
03272   case ARM::VSTMSIA:
03273   case ARM::VSTMSIA_UPD:
03274   case ARM::VSTMSDB_UPD:
03275     UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
03276     break;
03277 
03278   case ARM::STMIA:
03279   case ARM::STMDA:
03280   case ARM::STMDB:
03281   case ARM::STMIB:
03282   case ARM::STMIA_UPD:
03283   case ARM::STMDA_UPD:
03284   case ARM::STMDB_UPD:
03285   case ARM::STMIB_UPD:
03286   case ARM::tSTMIA_UPD:
03287   case ARM::tPOP_RET:
03288   case ARM::tPOP:
03289   case ARM::t2STMIA:
03290   case ARM::t2STMDB:
03291   case ARM::t2STMIA_UPD:
03292   case ARM::t2STMDB_UPD:
03293     UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
03294     break;
03295   }
03296 
03297   if (UseCycle == -1)
03298     // Assume it's read in the first stage.
03299     UseCycle = 1;
03300 
03301   UseCycle = DefCycle - UseCycle + 1;
03302   if (UseCycle > 0) {
03303     if (LdmBypass) {
03304       // It's a variable_ops instruction so we can't use DefIdx here. Just use
03305       // first def operand.
03306       if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
03307                                           UseClass, UseIdx))
03308         --UseCycle;
03309     } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
03310                                                UseClass, UseIdx)) {
03311       --UseCycle;
03312     }
03313   }
03314 
03315   return UseCycle;
03316 }
03317 
03318 static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
03319                                            const MachineInstr *MI, unsigned Reg,
03320                                            unsigned &DefIdx, unsigned &Dist) {
03321   Dist = 0;
03322 
03323   MachineBasicBlock::const_iterator I = MI; ++I;
03324   MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
03325   assert(II->isInsideBundle() && "Empty bundle?");
03326 
03327   int Idx = -1;
03328   while (II->isInsideBundle()) {
03329     Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
03330     if (Idx != -1)
03331       break;
03332     --II;
03333     ++Dist;
03334   }
03335 
03336   assert(Idx != -1 && "Cannot find bundled definition!");
03337   DefIdx = Idx;
03338   return II;
03339 }
03340 
03341 static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
03342                                            const MachineInstr *MI, unsigned Reg,
03343                                            unsigned &UseIdx, unsigned &Dist) {
03344   Dist = 0;
03345 
03346   MachineBasicBlock::const_instr_iterator II = MI; ++II;
03347   assert(II->isInsideBundle() && "Empty bundle?");
03348   MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
03349 
03350   // FIXME: This doesn't properly handle multiple uses.
03351   int Idx = -1;
03352   while (II != E && II->isInsideBundle()) {
03353     Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
03354     if (Idx != -1)
03355       break;
03356     if (II->getOpcode() != ARM::t2IT)
03357       ++Dist;
03358     ++II;
03359   }
03360 
03361   if (Idx == -1) {
03362     Dist = 0;
03363     return nullptr;
03364   }
03365 
03366   UseIdx = Idx;
03367   return II;
03368 }
03369 
03370 /// Return the number of cycles to add to (or subtract from) the static
03371 /// itinerary based on the def opcode and alignment. The caller will ensure that
03372 /// adjusted latency is at least one cycle.
03373 static int adjustDefLatency(const ARMSubtarget &Subtarget,
03374                             const MachineInstr *DefMI,
03375                             const MCInstrDesc *DefMCID, unsigned DefAlign) {
03376   int Adjust = 0;
03377   if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
03378     // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
03379     // variants are one cycle cheaper.
03380     switch (DefMCID->getOpcode()) {
03381     default: break;
03382     case ARM::LDRrs:
03383     case ARM::LDRBrs: {
03384       unsigned ShOpVal = DefMI->getOperand(3).getImm();
03385       unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
03386       if (ShImm == 0 ||
03387           (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
03388         --Adjust;
03389       break;
03390     }
03391     case ARM::t2LDRs:
03392     case ARM::t2LDRBs:
03393     case ARM::t2LDRHs:
03394     case ARM::t2LDRSHs: {
03395       // Thumb2 mode: lsl only.
03396       unsigned ShAmt = DefMI->getOperand(3).getImm();
03397       if (ShAmt == 0 || ShAmt == 2)
03398         --Adjust;
03399       break;
03400     }
03401     }
03402   } else if (Subtarget.isSwift()) {
03403     // FIXME: Properly handle all of the latency adjustments for address
03404     // writeback.
03405     switch (DefMCID->getOpcode()) {
03406     default: break;
03407     case ARM::LDRrs:
03408     case ARM::LDRBrs: {
03409       unsigned ShOpVal = DefMI->getOperand(3).getImm();
03410       bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
03411       unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
03412       if (!isSub &&
03413           (ShImm == 0 ||
03414            ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
03415             ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
03416         Adjust -= 2;
03417       else if (!isSub &&
03418                ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
03419         --Adjust;
03420       break;
03421     }
03422     case ARM::t2LDRs:
03423     case ARM::t2LDRBs:
03424     case ARM::t2LDRHs:
03425     case ARM::t2LDRSHs: {
03426       // Thumb2 mode: lsl only.
03427       unsigned ShAmt = DefMI->getOperand(3).getImm();
03428       if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
03429         Adjust -= 2;
03430       break;
03431     }
03432     }
03433   }
03434 
03435   if (DefAlign < 8 && Subtarget.isLikeA9()) {
03436     switch (DefMCID->getOpcode()) {
03437     default: break;
03438     case ARM::VLD1q8:
03439     case ARM::VLD1q16:
03440     case ARM::VLD1q32:
03441     case ARM::VLD1q64:
03442     case ARM::VLD1q8wb_fixed:
03443     case ARM::VLD1q16wb_fixed:
03444     case ARM::VLD1q32wb_fixed:
03445     case ARM::VLD1q64wb_fixed:
03446     case ARM::VLD1q8wb_register:
03447     case ARM::VLD1q16wb_register:
03448     case ARM::VLD1q32wb_register:
03449     case ARM::VLD1q64wb_register:
03450     case ARM::VLD2d8:
03451     case ARM::VLD2d16:
03452     case ARM::VLD2d32:
03453     case ARM::VLD2q8:
03454     case ARM::VLD2q16:
03455     case ARM::VLD2q32:
03456     case ARM::VLD2d8wb_fixed:
03457     case ARM::VLD2d16wb_fixed:
03458     case ARM::VLD2d32wb_fixed:
03459     case ARM::VLD2q8wb_fixed:
03460     case ARM::VLD2q16wb_fixed:
03461     case ARM::VLD2q32wb_fixed:
03462     case ARM::VLD2d8wb_register:
03463     case ARM::VLD2d16wb_register:
03464     case ARM::VLD2d32wb_register:
03465     case ARM::VLD2q8wb_register:
03466     case ARM::VLD2q16wb_register:
03467     case ARM::VLD2q32wb_register:
03468     case ARM::VLD3d8:
03469     case ARM::VLD3d16:
03470     case ARM::VLD3d32:
03471     case ARM::VLD1d64T:
03472     case ARM::VLD3d8_UPD:
03473     case ARM::VLD3d16_UPD:
03474     case ARM::VLD3d32_UPD:
03475     case ARM::VLD1d64Twb_fixed:
03476     case ARM::VLD1d64Twb_register:
03477     case ARM::VLD3q8_UPD:
03478     case ARM::VLD3q16_UPD:
03479     case ARM::VLD3q32_UPD:
03480     case ARM::VLD4d8:
03481     case ARM::VLD4d16:
03482     case ARM::VLD4d32:
03483     case ARM::VLD1d64Q:
03484     case ARM::VLD4d8_UPD:
03485     case ARM::VLD4d16_UPD:
03486     case ARM::VLD4d32_UPD:
03487     case ARM::VLD1d64Qwb_fixed:
03488     case ARM::VLD1d64Qwb_register:
03489     case ARM::VLD4q8_UPD:
03490     case ARM::VLD4q16_UPD:
03491     case ARM::VLD4q32_UPD:
03492     case ARM::VLD1DUPq8:
03493     case ARM::VLD1DUPq16:
03494     case ARM::VLD1DUPq32:
03495     case ARM::VLD1DUPq8wb_fixed:
03496     case ARM::VLD1DUPq16wb_fixed:
03497     case ARM::VLD1DUPq32wb_fixed:
03498     case ARM::VLD1DUPq8wb_register:
03499     case ARM::VLD1DUPq16wb_register:
03500     case ARM::VLD1DUPq32wb_register:
03501     case ARM::VLD2DUPd8:
03502     case ARM::VLD2DUPd16:
03503     case ARM::VLD2DUPd32:
03504     case ARM::VLD2DUPd8wb_fixed:
03505     case ARM::VLD2DUPd16wb_fixed:
03506     case ARM::VLD2DUPd32wb_fixed:
03507     case ARM::VLD2DUPd8wb_register:
03508     case ARM::VLD2DUPd16wb_register:
03509     case ARM::VLD2DUPd32wb_register:
03510     case ARM::VLD4DUPd8:
03511     case ARM::VLD4DUPd16:
03512     case ARM::VLD4DUPd32:
03513     case ARM::VLD4DUPd8_UPD:
03514     case ARM::VLD4DUPd16_UPD:
03515     case ARM::VLD4DUPd32_UPD:
03516     case ARM::VLD1LNd8:
03517     case ARM::VLD1LNd16:
03518     case ARM::VLD1LNd32:
03519     case ARM::VLD1LNd8_UPD:
03520     case ARM::VLD1LNd16_UPD:
03521     case ARM::VLD1LNd32_UPD:
03522     case ARM::VLD2LNd8:
03523     case ARM::VLD2LNd16:
03524     case ARM::VLD2LNd32:
03525     case ARM::VLD2LNq16:
03526     case ARM::VLD2LNq32:
03527     case ARM::VLD2LNd8_UPD:
03528     case ARM::VLD2LNd16_UPD:
03529     case ARM::VLD2LNd32_UPD:
03530     case ARM::VLD2LNq16_UPD:
03531     case ARM::VLD2LNq32_UPD:
03532     case ARM::VLD4LNd8:
03533     case ARM::VLD4LNd16:
03534     case ARM::VLD4LNd32:
03535     case ARM::VLD4LNq16:
03536     case ARM::VLD4LNq32:
03537     case ARM::VLD4LNd8_UPD:
03538     case ARM::VLD4LNd16_UPD:
03539     case ARM::VLD4LNd32_UPD:
03540     case ARM::VLD4LNq16_UPD:
03541     case ARM::VLD4LNq32_UPD:
03542       // If the address is not 64-bit aligned, the latencies of these
03543       // instructions increases by one.
03544       ++Adjust;
03545       break;
03546     }
03547   }
03548   return Adjust;
03549 }
03550 
03551 
03552 
03553 int
03554 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
03555                                     const MachineInstr *DefMI, unsigned DefIdx,
03556                                     const MachineInstr *UseMI,
03557                                     unsigned UseIdx) const {
03558   // No operand latency. The caller may fall back to getInstrLatency.
03559   if (!ItinData || ItinData->isEmpty())
03560     return -1;
03561 
03562   const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
03563   unsigned Reg = DefMO.getReg();
03564   const MCInstrDesc *DefMCID = &DefMI->getDesc();
03565   const MCInstrDesc *UseMCID = &UseMI->getDesc();
03566 
03567   unsigned DefAdj = 0;
03568   if (DefMI->isBundle()) {
03569     DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
03570     DefMCID = &DefMI->getDesc();
03571   }
03572   if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
03573       DefMI->isRegSequence() || DefMI->isImplicitDef()) {
03574     return 1;
03575   }
03576 
03577   unsigned UseAdj = 0;
03578   if (UseMI->isBundle()) {
03579     unsigned NewUseIdx;
03580     const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
03581                                                    Reg, NewUseIdx, UseAdj);
03582     if (!NewUseMI)
03583       return -1;
03584 
03585     UseMI = NewUseMI;
03586     UseIdx = NewUseIdx;
03587     UseMCID = &UseMI->getDesc();
03588   }
03589 
03590   if (Reg == ARM::CPSR) {
03591     if (DefMI->getOpcode() == ARM::FMSTAT) {
03592       // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
03593       return Subtarget.isLikeA9() ? 1 : 20;
03594     }
03595 
03596     // CPSR set and branch can be paired in the same cycle.
03597     if (UseMI->isBranch())
03598       return 0;
03599 
03600     // Otherwise it takes the instruction latency (generally one).
03601     unsigned Latency = getInstrLatency(ItinData, DefMI);
03602 
03603     // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
03604     // its uses. Instructions which are otherwise scheduled between them may
03605     // incur a code size penalty (not able to use the CPSR setting 16-bit
03606     // instructions).
03607     if (Latency > 0 && Subtarget.isThumb2()) {
03608       const MachineFunction *MF = DefMI->getParent()->getParent();
03609       if (MF->getFunction()->getAttributes().
03610             hasAttribute(AttributeSet::FunctionIndex,
03611                          Attribute::OptimizeForSize))
03612         --Latency;
03613     }
03614     return Latency;
03615   }
03616 
03617   if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit())
03618     return -1;
03619 
03620   unsigned DefAlign = DefMI->hasOneMemOperand()
03621     ? (*DefMI->memoperands_begin())->getAlignment() : 0;
03622   unsigned UseAlign = UseMI->hasOneMemOperand()
03623     ? (*UseMI->memoperands_begin())->getAlignment() : 0;
03624 
03625   // Get the itinerary's latency if possible, and handle variable_ops.
03626   int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
03627                                   *UseMCID, UseIdx, UseAlign);
03628   // Unable to find operand latency. The caller may resort to getInstrLatency.
03629   if (Latency < 0)
03630     return Latency;
03631 
03632   // Adjust for IT block position.
03633   int Adj = DefAdj + UseAdj;
03634 
03635   // Adjust for dynamic def-side opcode variants not captured by the itinerary.
03636   Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
03637   if (Adj >= 0 || (int)Latency > -Adj) {
03638     return Latency + Adj;
03639   }
03640   // Return the itinerary latency, which may be zero but not less than zero.
03641   return Latency;
03642 }
03643 
03644 int
03645 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
03646                                     SDNode *DefNode, unsigned DefIdx,
03647                                     SDNode *UseNode, unsigned UseIdx) const {
03648   if (!DefNode->isMachineOpcode())
03649     return 1;
03650 
03651   const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
03652 
03653   if (isZeroCost(DefMCID.Opcode))
03654     return 0;
03655 
03656   if (!ItinData || ItinData->isEmpty())
03657     return DefMCID.mayLoad() ? 3 : 1;
03658 
03659   if (!UseNode->isMachineOpcode()) {
03660     int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
03661     if (Subtarget.isLikeA9() || Subtarget.isSwift())
03662       return Latency <= 2 ? 1 : Latency - 1;
03663     else
03664       return Latency <= 3 ? 1 : Latency - 2;
03665   }
03666 
03667   const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
03668   const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
03669   unsigned DefAlign = !DefMN->memoperands_empty()
03670     ? (*DefMN->memoperands_begin())->getAlignment() : 0;
03671   const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
03672   unsigned UseAlign = !UseMN->memoperands_empty()
03673     ? (*UseMN->memoperands_begin())->getAlignment() : 0;
03674   int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
03675                                   UseMCID, UseIdx, UseAlign);
03676 
03677   if (Latency > 1 &&
03678       (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
03679        Subtarget.isCortexA7())) {
03680     // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
03681     // variants are one cycle cheaper.
03682     switch (DefMCID.getOpcode()) {
03683     default: break;
03684     case ARM::LDRrs:
03685     case ARM::LDRBrs: {
03686       unsigned ShOpVal =
03687         cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
03688       unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
03689       if (ShImm == 0 ||
03690           (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
03691         --Latency;
03692       break;
03693     }
03694     case ARM::t2LDRs:
03695     case ARM::t2LDRBs:
03696     case ARM::t2LDRHs:
03697     case ARM::t2LDRSHs: {
03698       // Thumb2 mode: lsl only.
03699       unsigned ShAmt =
03700         cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
03701       if (ShAmt == 0 || ShAmt == 2)
03702         --Latency;
03703       break;
03704     }
03705     }
03706   } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
03707     // FIXME: Properly handle all of the latency adjustments for address
03708     // writeback.
03709     switch (DefMCID.getOpcode()) {
03710     default: break;
03711     case ARM::LDRrs:
03712     case ARM::LDRBrs: {
03713       unsigned ShOpVal =
03714         cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
03715       unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
03716       if (ShImm == 0 ||
03717           ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
03718            ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
03719         Latency -= 2;
03720       else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
03721         --Latency;
03722       break;
03723     }
03724     case ARM::t2LDRs:
03725     case ARM::t2LDRBs:
03726     case ARM::t2LDRHs:
03727     case ARM::t2LDRSHs: {
03728       // Thumb2 mode: lsl 0-3 only.
03729       Latency -= 2;
03730       break;
03731     }
03732     }
03733   }
03734 
03735   if (DefAlign < 8 && Subtarget.isLikeA9())
03736     switch (DefMCID.getOpcode()) {
03737     default: break;
03738     case ARM::VLD1q8:
03739     case ARM::VLD1q16:
03740     case ARM::VLD1q32:
03741     case ARM::VLD1q64:
03742     case ARM::VLD1q8wb_register:
03743     case ARM::VLD1q16wb_register:
03744     case ARM::VLD1q32wb_register:
03745     case ARM::VLD1q64wb_register:
03746     case ARM::VLD1q8wb_fixed:
03747     case ARM::VLD1q16wb_fixed:
03748     case ARM::VLD1q32wb_fixed:
03749     case ARM::VLD1q64wb_fixed:
03750     case ARM::VLD2d8:
03751     case ARM::VLD2d16:
03752     case ARM::VLD2d32:
03753     case ARM::VLD2q8Pseudo:
03754     case ARM::VLD2q16Pseudo:
03755     case ARM::VLD2q32Pseudo:
03756     case ARM::VLD2d8wb_fixed:
03757     case ARM::VLD2d16wb_fixed:
03758     case ARM::VLD2d32wb_fixed:
03759     case ARM::VLD2q8PseudoWB_fixed:
03760     case ARM::VLD2q16PseudoWB_fixed:
03761     case ARM::VLD2q32PseudoWB_fixed:
03762     case ARM::VLD2d8wb_register:
03763     case ARM::VLD2d16wb_register:
03764     case ARM::VLD2d32wb_register:
03765     case ARM::VLD2q8PseudoWB_register:
03766     case ARM::VLD2q16PseudoWB_register:
03767     case ARM::VLD2q32PseudoWB_register:
03768     case ARM::VLD3d8Pseudo:
03769     case ARM::VLD3d16Pseudo:
03770     case ARM::VLD3d32Pseudo:
03771     case ARM::VLD1d64TPseudo:
03772     case ARM::VLD1d64TPseudoWB_fixed:
03773     case ARM::VLD3d8Pseudo_UPD:
03774     case ARM::VLD3d16Pseudo_UPD:
03775     case ARM::VLD3d32Pseudo_UPD:
03776     case ARM::VLD3q8Pseudo_UPD:
03777     case ARM::VLD3q16Pseudo_UPD:
03778     case ARM::VLD3q32Pseudo_UPD:
03779     case ARM::VLD3q8oddPseudo:
03780     case ARM::VLD3q16oddPseudo:
03781     case ARM::VLD3q32oddPseudo:
03782     case ARM::VLD3q8oddPseudo_UPD:
03783     case ARM::VLD3q16oddPseudo_UPD:
03784     case ARM::VLD3q32oddPseudo_UPD:
03785     case ARM::VLD4d8Pseudo:
03786     case ARM::VLD4d16Pseudo:
03787     case ARM::VLD4d32Pseudo:
03788     case ARM::VLD1d64QPseudo:
03789     case ARM::VLD1d64QPseudoWB_fixed:
03790     case ARM::VLD4d8Pseudo_UPD:
03791     case ARM::VLD4d16Pseudo_UPD:
03792     case ARM::VLD4d32Pseudo_UPD:
03793     case ARM::VLD4q8Pseudo_UPD:
03794     case ARM::VLD4q16Pseudo_UPD:
03795     case ARM::VLD4q32Pseudo_UPD:
03796     case ARM::VLD4q8oddPseudo:
03797     case ARM::VLD4q16oddPseudo:
03798     case ARM::VLD4q32oddPseudo:
03799     case ARM::VLD4q8oddPseudo_UPD:
03800     case ARM::VLD4q16oddPseudo_UPD:
03801     case ARM::VLD4q32oddPseudo_UPD:
03802     case ARM::VLD1DUPq8:
03803     case ARM::VLD1DUPq16:
03804     case ARM::VLD1DUPq32:
03805     case ARM::VLD1DUPq8wb_fixed:
03806     case ARM::VLD1DUPq16wb_fixed:
03807     case ARM::VLD1DUPq32wb_fixed:
03808     case ARM::VLD1DUPq8wb_register:
03809     case ARM::VLD1DUPq16wb_register:
03810     case ARM::VLD1DUPq32wb_register:
03811     case ARM::VLD2DUPd8:
03812     case ARM::VLD2DUPd16:
03813     case ARM::VLD2DUPd32:
03814     case ARM::VLD2DUPd8wb_fixed:
03815     case ARM::VLD2DUPd16wb_fixed:
03816     case ARM::VLD2DUPd32wb_fixed:
03817     case ARM::VLD2DUPd8wb_register:
03818     case ARM::VLD2DUPd16wb_register:
03819     case ARM::VLD2DUPd32wb_register:
03820     case ARM::VLD4DUPd8Pseudo:
03821     case ARM::VLD4DUPd16Pseudo:
03822     case ARM::VLD4DUPd32Pseudo:
03823     case ARM::VLD4DUPd8Pseudo_UPD:
03824     case ARM::VLD4DUPd16Pseudo_UPD:
03825     case ARM::VLD4DUPd32Pseudo_UPD:
03826     case ARM::VLD1LNq8Pseudo:
03827     case ARM::VLD1LNq16Pseudo:
03828     case ARM::VLD1LNq32Pseudo:
03829     case ARM::VLD1LNq8Pseudo_UPD:
03830     case ARM::VLD1LNq16Pseudo_UPD:
03831     case ARM::VLD1LNq32Pseudo_UPD:
03832     case ARM::VLD2LNd8Pseudo:
03833     case ARM::VLD2LNd16Pseudo:
03834     case ARM::VLD2LNd32Pseudo:
03835     case ARM::VLD2LNq16Pseudo:
03836     case ARM::VLD2LNq32Pseudo:
03837     case ARM::VLD2LNd8Pseudo_UPD:
03838     case ARM::VLD2LNd16Pseudo_UPD:
03839     case ARM::VLD2LNd32Pseudo_UPD:
03840     case ARM::VLD2LNq16Pseudo_UPD:
03841     case ARM::VLD2LNq32Pseudo_UPD:
03842     case ARM::VLD4LNd8Pseudo:
03843     case ARM::VLD4LNd16Pseudo:
03844     case ARM::VLD4LNd32Pseudo:
03845     case ARM::VLD4LNq16Pseudo:
03846     case ARM::VLD4LNq32Pseudo:
03847     case ARM::VLD4LNd8Pseudo_UPD:
03848     case ARM::VLD4LNd16Pseudo_UPD:
03849     case ARM::VLD4LNd32Pseudo_UPD:
03850     case ARM::VLD4LNq16Pseudo_UPD:
03851     case ARM::VLD4LNq32Pseudo_UPD:
03852       // If the address is not 64-bit aligned, the latencies of these
03853       // instructions increases by one.
03854       ++Latency;
03855       break;
03856     }
03857 
03858   return Latency;
03859 }
03860 
03861 unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const {
03862    if (MI->isCopyLike() || MI->isInsertSubreg() ||
03863       MI->isRegSequence() || MI->isImplicitDef())
03864     return 0;
03865 
03866   if (MI->isBundle())
03867     return 0;
03868 
03869   const MCInstrDesc &MCID = MI->getDesc();
03870 
03871   if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) {
03872     // When predicated, CPSR is an additional source operand for CPSR updating
03873     // instructions, this apparently increases their latencies.
03874     return 1;
03875   }
03876   return 0;
03877 }
03878 
03879 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
03880                                            const MachineInstr *MI,
03881                                            unsigned *PredCost) const {
03882   if (MI->isCopyLike() || MI->isInsertSubreg() ||
03883       MI->isRegSequence() || MI->isImplicitDef())
03884     return 1;
03885 
03886   // An instruction scheduler typically runs on unbundled instructions, however
03887   // other passes may query the latency of a bundled instruction.
03888   if (MI->isBundle()) {
03889     unsigned Latency = 0;
03890     MachineBasicBlock::const_instr_iterator I = MI;
03891     MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
03892     while (++I != E && I->isInsideBundle()) {
03893       if (I->getOpcode() != ARM::t2IT)
03894         Latency += getInstrLatency(ItinData, I, PredCost);
03895     }
03896     return Latency;
03897   }
03898 
03899   const MCInstrDesc &MCID = MI->getDesc();
03900   if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) {
03901     // When predicated, CPSR is an additional source operand for CPSR updating
03902     // instructions, this apparently increases their latencies.
03903     *PredCost = 1;
03904   }
03905   // Be sure to call getStageLatency for an empty itinerary in case it has a
03906   // valid MinLatency property.
03907   if (!ItinData)
03908     return MI->mayLoad() ? 3 : 1;
03909 
03910   unsigned Class = MCID.getSchedClass();
03911 
03912   // For instructions with variable uops, use uops as latency.
03913   if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
03914     return getNumMicroOps(ItinData, MI);
03915 
03916   // For the common case, fall back on the itinerary's latency.
03917   unsigned Latency = ItinData->getStageLatency(Class);
03918 
03919   // Adjust for dynamic def-side opcode variants not captured by the itinerary.
03920   unsigned DefAlign = MI->hasOneMemOperand()
03921     ? (*MI->memoperands_begin())->getAlignment() : 0;
03922   int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign);
03923   if (Adj >= 0 || (int)Latency > -Adj) {
03924     return Latency + Adj;
03925   }
03926   return Latency;
03927 }
03928 
03929 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
03930                                       SDNode *Node) const {
03931   if (!Node->isMachineOpcode())
03932     return 1;
03933 
03934   if (!ItinData || ItinData->isEmpty())
03935     return 1;
03936 
03937   unsigned Opcode = Node->getMachineOpcode();
03938   switch (Opcode) {
03939   default:
03940     return ItinData->getStageLatency(get(Opcode).getSchedClass());
03941   case ARM::VLDMQIA:
03942   case ARM::VSTMQIA:
03943     return 2;
03944   }
03945 }
03946 
03947 bool ARMBaseInstrInfo::
03948 hasHighOperandLatency(const InstrItineraryData *ItinData,
03949                       const MachineRegisterInfo *MRI,
03950                       const MachineInstr *DefMI, unsigned DefIdx,
03951                       const MachineInstr *UseMI, unsigned UseIdx) const {
03952   unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
03953   unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
03954   if (Subtarget.isCortexA8() &&
03955       (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
03956     // CortexA8 VFP instructions are not pipelined.
03957     return true;
03958 
03959   // Hoist VFP / NEON instructions with 4 or higher latency.
03960   int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
03961   if (Latency < 0)
03962     Latency = getInstrLatency(ItinData, DefMI);
03963   if (Latency <= 3)
03964     return false;
03965   return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
03966          UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
03967 }
03968 
03969 bool ARMBaseInstrInfo::
03970 hasLowDefLatency(const InstrItineraryData *ItinData,
03971                  const MachineInstr *DefMI, unsigned DefIdx) const {
03972   if (!ItinData || ItinData->isEmpty())
03973     return false;
03974 
03975   unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
03976   if (DDomain == ARMII::DomainGeneral) {
03977     unsigned DefClass = DefMI->getDesc().getSchedClass();
03978     int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
03979     return (DefCycle != -1 && DefCycle <= 2);
03980   }
03981   return false;
03982 }
03983 
03984 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
03985                                          StringRef &ErrInfo) const {
03986   if (convertAddSubFlagsOpcode(MI->getOpcode())) {
03987     ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
03988     return false;
03989   }
03990   return true;
03991 }
03992 
03993 // LoadStackGuard has so far only been implemented for MachO. Different code
03994 // sequence is needed for other targets.
03995 void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
03996                                                 unsigned LoadImmOpc,
03997                                                 unsigned LoadOpc,
03998                                                 Reloc::Model RM) const {
03999   MachineBasicBlock &MBB = *MI->getParent();
04000   DebugLoc DL = MI->getDebugLoc();
04001   unsigned Reg = MI->getOperand(0).getReg();
04002   const GlobalValue *GV =
04003       cast<GlobalValue>((*MI->memoperands_begin())->getValue());
04004   MachineInstrBuilder MIB;
04005 
04006   BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
04007       .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
04008 
04009   if (Subtarget.GVIsIndirectSymbol(GV, RM)) {
04010     MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
04011     MIB.addReg(Reg, RegState::Kill).addImm(0);
04012     unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
04013     MachineMemOperand *MMO = MBB.getParent()->
04014         getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 4, 4);
04015     MIB.addMemOperand(MMO);
04016     AddDefaultPred(MIB);
04017   }
04018 
04019   MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
04020   MIB.addReg(Reg, RegState::Kill).addImm(0);
04021   MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
04022   AddDefaultPred(MIB);
04023 }
04024 
04025 bool
04026 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
04027                                      unsigned &AddSubOpc,
04028                                      bool &NegAcc, bool &HasLane) const {
04029   DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
04030   if (I == MLxEntryMap.end())
04031     return false;
04032 
04033   const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
04034   MulOpc = Entry.MulOpc;
04035   AddSubOpc = Entry.AddSubOpc;
04036   NegAcc = Entry.NegAcc;
04037   HasLane = Entry.HasLane;
04038   return true;
04039 }
04040 
04041 //===----------------------------------------------------------------------===//
04042 // Execution domains.
04043 //===----------------------------------------------------------------------===//
04044 //
04045 // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
04046 // and some can go down both.  The vmov instructions go down the VFP pipeline,
04047 // but they can be changed to vorr equivalents that are executed by the NEON
04048 // pipeline.
04049 //
04050 // We use the following execution domain numbering:
04051 //
04052 enum ARMExeDomain {
04053   ExeGeneric = 0,
04054   ExeVFP = 1,
04055   ExeNEON = 2
04056 };
04057 //
04058 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
04059 //
04060 std::pair<uint16_t, uint16_t>
04061 ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
04062   // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
04063   // if they are not predicated.
04064   if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
04065     return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
04066 
04067   // CortexA9 is particularly picky about mixing the two and wants these
04068   // converted.
04069   if (Subtarget.isCortexA9() && !isPredicated(MI) &&
04070       (MI->getOpcode() == ARM::VMOVRS ||
04071        MI->getOpcode() == ARM::VMOVSR ||
04072        MI->getOpcode() == ARM::VMOVS))
04073     return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
04074 
04075   // No other instructions can be swizzled, so just determine their domain.
04076   unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
04077 
04078   if (Domain & ARMII::DomainNEON)
04079     return std::make_pair(ExeNEON, 0);
04080 
04081   // Certain instructions can go either way on Cortex-A8.
04082   // Treat them as NEON instructions.
04083   if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
04084     return std::make_pair(ExeNEON, 0);
04085 
04086   if (Domain & ARMII::DomainVFP)
04087     return std::make_pair(ExeVFP, 0);
04088 
04089   return std::make_pair(ExeGeneric, 0);
04090 }
04091 
04092 static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
04093                                             unsigned SReg, unsigned &Lane) {
04094   unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
04095   Lane = 0;
04096 
04097   if (DReg != ARM::NoRegister)
04098    return DReg;
04099 
04100   Lane = 1;
04101   DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
04102 
04103   assert(DReg && "S-register with no D super-register?");
04104   return DReg;
04105 }
04106 
04107 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
04108 /// set ImplicitSReg to a register number that must be marked as implicit-use or
04109 /// zero if no register needs to be defined as implicit-use.
04110 ///
04111 /// If the function cannot determine if an SPR should be marked implicit use or
04112 /// not, it returns false.
04113 ///
04114 /// This function handles cases where an instruction is being modified from taking
04115 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
04116 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
04117 /// lane of the DPR).
04118 ///
04119 /// If the other SPR is defined, an implicit-use of it should be added. Else,
04120 /// (including the case where the DPR itself is defined), it should not.
04121 ///
04122 static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
04123                                        MachineInstr *MI,
04124                                        unsigned DReg, unsigned Lane,
04125                                        unsigned &ImplicitSReg) {
04126   // If the DPR is defined or used already, the other SPR lane will be chained
04127   // correctly, so there is nothing to be done.
04128   if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) {
04129     ImplicitSReg = 0;
04130     return true;
04131   }
04132 
04133   // Otherwise we need to go searching to see if the SPR is set explicitly.
04134   ImplicitSReg = TRI->getSubReg(DReg,
04135                                 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
04136   MachineBasicBlock::LivenessQueryResult LQR =
04137     MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
04138 
04139   if (LQR == MachineBasicBlock::LQR_Live)
04140     return true;
04141   else if (LQR == MachineBasicBlock::LQR_Unknown)
04142     return false;
04143 
04144   // If the register is known not to be live, there is no need to add an
04145   // implicit-use.
04146   ImplicitSReg = 0;
04147   return true;
04148 }
04149 
04150 void
04151 ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
04152   unsigned DstReg, SrcReg, DReg;
04153   unsigned Lane;
04154   MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
04155   const TargetRegisterInfo *TRI = &getRegisterInfo();
04156   switch (MI->getOpcode()) {
04157     default:
04158       llvm_unreachable("cannot handle opcode!");
04159       break;
04160     case ARM::VMOVD:
04161       if (Domain != ExeNEON)
04162         break;
04163 
04164       // Zap the predicate operands.
04165       assert(!isPredicated(MI) && "Cannot predicate a VORRd");
04166 
04167       // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
04168       DstReg = MI->getOperand(0).getReg();
04169       SrcReg = MI->getOperand(1).getReg();
04170 
04171       for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
04172         MI->RemoveOperand(i-1);
04173 
04174       // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
04175       MI->setDesc(get(ARM::VORRd));
04176       AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
04177                         .addReg(SrcReg)
04178                         .addReg(SrcReg));
04179       break;
04180     case ARM::VMOVRS:
04181       if (Domain != ExeNEON)
04182         break;
04183       assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
04184 
04185       // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
04186       DstReg = MI->getOperand(0).getReg();
04187       SrcReg = MI->getOperand(1).getReg();
04188 
04189       for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
04190         MI->RemoveOperand(i-1);
04191 
04192       DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
04193 
04194       // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
04195       // Note that DSrc has been widened and the other lane may be undef, which
04196       // contaminates the entire register.
04197       MI->setDesc(get(ARM::VGETLNi32));
04198       AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
04199                         .addReg(DReg, RegState::Undef)
04200                         .addImm(Lane));
04201 
04202       // The old source should be an implicit use, otherwise we might think it
04203       // was dead before here.
04204       MIB.addReg(SrcReg, RegState::Implicit);
04205       break;
04206     case ARM::VMOVSR: {
04207       if (Domain != ExeNEON)
04208         break;
04209       assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
04210 
04211       // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
04212       DstReg = MI->getOperand(0).getReg();
04213       SrcReg = MI->getOperand(1).getReg();
04214 
04215       DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
04216 
04217       unsigned ImplicitSReg;
04218       if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
04219         break;
04220 
04221       for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
04222         MI->RemoveOperand(i-1);
04223 
04224       // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
04225       // Again DDst may be undefined at the beginning of this instruction.
04226       MI->setDesc(get(ARM::VSETLNi32));
04227       MIB.addReg(DReg, RegState::Define)
04228          .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI)))
04229          .addReg(SrcReg)
04230          .addImm(Lane);
04231       AddDefaultPred(MIB);
04232 
04233       // The narrower destination must be marked as set to keep previous chains
04234       // in place.
04235       MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
04236       if (ImplicitSReg != 0)
04237         MIB.addReg(ImplicitSReg, RegState::Implicit);
04238       break;
04239     }
04240     case ARM::VMOVS: {
04241       if (Domain != ExeNEON)
04242         break;
04243 
04244       // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
04245       DstReg = MI->getOperand(0).getReg();
04246       SrcReg = MI->getOperand(1).getReg();
04247 
04248       unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
04249       DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
04250       DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
04251 
04252       unsigned ImplicitSReg;
04253       if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
04254         break;
04255 
04256       for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
04257         MI->RemoveOperand(i-1);
04258 
04259       if (DSrc == DDst) {
04260         // Destination can be:
04261         //     %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
04262         MI->setDesc(get(ARM::VDUPLN32d));
04263         MIB.addReg(DDst, RegState::Define)
04264            .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI)))
04265            .addImm(SrcLane);
04266         AddDefaultPred(MIB);
04267 
04268         // Neither the source or the destination are naturally represented any
04269         // more, so add them in manually.
04270         MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
04271         MIB.addReg(SrcReg, RegState::Implicit);
04272         if (ImplicitSReg != 0)
04273           MIB.addReg(ImplicitSReg, RegState::Implicit);
04274         break;
04275       }
04276 
04277       // In general there's no single instruction that can perform an S <-> S
04278       // move in NEON space, but a pair of VEXT instructions *can* do the
04279       // job. It turns out that the VEXTs needed will only use DSrc once, with
04280       // the position based purely on the combination of lane-0 and lane-1
04281       // involved. For example
04282       //     vmov s0, s2 -> vext.32 d0, d0, d1, #1  vext.32 d0, d0, d0, #1
04283       //     vmov s1, s3 -> vext.32 d0, d1, d0, #1  vext.32 d0, d0, d0, #1
04284       //     vmov s0, s3 -> vext.32 d0, d0, d0, #1  vext.32 d0, d1, d0, #1
04285       //     vmov s1, s2 -> vext.32 d0, d0, d0, #1  vext.32 d0, d0, d1, #1
04286       //
04287       // Pattern of the MachineInstrs is:
04288       //     %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
04289       MachineInstrBuilder NewMIB;
04290       NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
04291                        get(ARM::VEXTd32), DDst);
04292 
04293       // On the first instruction, both DSrc and DDst may be <undef> if present.
04294       // Specifically when the original instruction didn't have them as an
04295       // <imp-use>.
04296       unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
04297       bool CurUndef = !MI->readsRegister(CurReg, TRI);
04298       NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
04299 
04300       CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
04301       CurUndef = !MI->readsRegister(CurReg, TRI);
04302       NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
04303 
04304       NewMIB.addImm(1);
04305       AddDefaultPred(NewMIB);
04306 
04307       if (SrcLane == DstLane)
04308         NewMIB.addReg(SrcReg, RegState::Implicit);
04309 
04310       MI->setDesc(get(ARM::VEXTd32));
04311       MIB.addReg(DDst, RegState::Define);
04312 
04313       // On the second instruction, DDst has definitely been defined above, so
04314       // it is not <undef>. DSrc, if present, can be <undef> as above.
04315       CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
04316       CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
04317       MIB.addReg(CurReg, getUndefRegState(CurUndef));
04318 
04319       CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
04320       CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
04321       MIB.addReg(CurReg, getUndefRegState(CurUndef));
04322 
04323       MIB.addImm(1);
04324       AddDefaultPred(MIB);
04325 
04326       if (SrcLane != DstLane)
04327         MIB.addReg(SrcReg, RegState::Implicit);
04328 
04329       // As before, the original destination is no longer represented, add it
04330       // implicitly.
04331       MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
04332       if (ImplicitSReg != 0)
04333         MIB.addReg(ImplicitSReg, RegState::Implicit);
04334       break;
04335     }
04336   }
04337 
04338 }
04339 
04340 //===----------------------------------------------------------------------===//
04341 // Partial register updates
04342 //===----------------------------------------------------------------------===//
04343 //
04344 // Swift renames NEON registers with 64-bit granularity.  That means any
04345 // instruction writing an S-reg implicitly reads the containing D-reg.  The
04346 // problem is mostly avoided by translating f32 operations to v2f32 operations
04347 // on D-registers, but f32 loads are still a problem.
04348 //
04349 // These instructions can load an f32 into a NEON register:
04350 //
04351 // VLDRS - Only writes S, partial D update.
04352 // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
04353 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
04354 //
04355 // FCONSTD can be used as a dependency-breaking instruction.
04356 unsigned ARMBaseInstrInfo::
04357 getPartialRegUpdateClearance(const MachineInstr *MI,
04358                              unsigned OpNum,
04359                              const TargetRegisterInfo *TRI) const {
04360   if (!SwiftPartialUpdateClearance ||
04361       !(Subtarget.isSwift() || Subtarget.isCortexA15()))
04362     return 0;
04363 
04364   assert(TRI && "Need TRI instance");
04365 
04366   const MachineOperand &MO = MI->getOperand(OpNum);
04367   if (MO.readsReg())
04368     return 0;
04369   unsigned Reg = MO.getReg();
04370   int UseOp = -1;
04371 
04372   switch(MI->getOpcode()) {
04373     // Normal instructions writing only an S-register.
04374   case ARM::VLDRS:
04375   case ARM::FCONSTS:
04376   case ARM::VMOVSR:
04377   case ARM::VMOVv8i8:
04378   case ARM::VMOVv4i16:
04379   case ARM::VMOVv2i32:
04380   case ARM::VMOVv2f32:
04381   case ARM::VMOVv1i64:
04382     UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI);
04383     break;
04384 
04385     // Explicitly reads the dependency.
04386   case ARM::VLD1LNd32:
04387     UseOp = 3;
04388     break;
04389   default:
04390     return 0;
04391   }
04392 
04393   // If this instruction actually reads a value from Reg, there is no unwanted
04394   // dependency.
04395   if (UseOp != -1 && MI->getOperand(UseOp).readsReg())
04396     return 0;
04397 
04398   // We must be able to clobber the whole D-reg.
04399   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
04400     // Virtual register must be a foo:ssub_0<def,undef> operand.
04401     if (!MO.getSubReg() || MI->readsVirtualRegister(Reg))
04402       return 0;
04403   } else if (ARM::SPRRegClass.contains(Reg)) {
04404     // Physical register: MI must define the full D-reg.
04405     unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
04406                                              &ARM::DPRRegClass);
04407     if (!DReg || !MI->definesRegister(DReg, TRI))
04408       return 0;
04409   }
04410 
04411   // MI has an unwanted D-register dependency.
04412   // Avoid defs in the previous N instructrions.
04413   return SwiftPartialUpdateClearance;
04414 }
04415 
04416 // Break a partial register dependency after getPartialRegUpdateClearance
04417 // returned non-zero.
04418 void ARMBaseInstrInfo::
04419 breakPartialRegDependency(MachineBasicBlock::iterator MI,
04420                           unsigned OpNum,
04421                           const TargetRegisterInfo *TRI) const {
04422   assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def");
04423   assert(TRI && "Need TRI instance");
04424 
04425   const MachineOperand &MO = MI->getOperand(OpNum);
04426   unsigned Reg = MO.getReg();
04427   assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
04428          "Can't break virtual register dependencies.");
04429   unsigned DReg = Reg;
04430 
04431   // If MI defines an S-reg, find the corresponding D super-register.
04432   if (ARM::SPRRegClass.contains(Reg)) {
04433     DReg = ARM::D0 + (Reg - ARM::S0) / 2;
04434     assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
04435   }
04436 
04437   assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
04438   assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
04439 
04440   // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
04441   // the full D-register by loading the same value to both lanes.  The
04442   // instruction is micro-coded with 2 uops, so don't do this until we can
04443   // properly schedule micro-coded instructions.  The dispatcher stalls cause
04444   // too big regressions.
04445 
04446   // Insert the dependency-breaking FCONSTD before MI.
04447   // 96 is the encoding of 0.5, but the actual value doesn't matter here.
04448   AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
04449                          get(ARM::FCONSTD), DReg).addImm(96));
04450   MI->addRegisterKilled(DReg, TRI, true);
04451 }
04452 
04453 void ARMBaseInstrInfo::getUnconditionalBranch(
04454     MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
04455   if (Subtarget.isThumb())
04456     Branch.setOpcode(ARM::tB);
04457   else if (Subtarget.isThumb2())
04458     Branch.setOpcode(ARM::t2B);
04459   else
04460     Branch.setOpcode(ARM::Bcc);
04461 
04462   Branch.addOperand(MCOperand::CreateExpr(BranchTarget));
04463   Branch.addOperand(MCOperand::CreateImm(ARMCC::AL));
04464   Branch.addOperand(MCOperand::CreateReg(0));
04465 }
04466 
04467 void ARMBaseInstrInfo::getTrap(MCInst &MI) const {
04468   if (Subtarget.isThumb())
04469     MI.setOpcode(ARM::tTRAP);
04470   else if (Subtarget.useNaClTrap())
04471     MI.setOpcode(ARM::TRAPNaCl);
04472   else
04473     MI.setOpcode(ARM::TRAP);
04474 }
04475 
04476 bool ARMBaseInstrInfo::hasNOP() const {
04477   return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
04478 }
04479 
04480 bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
04481   if (MI->getNumOperands() < 4)
04482     return true;
04483   unsigned ShOpVal = MI->getOperand(3).getImm();
04484   unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
04485   // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
04486   if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
04487       ((ShImm == 1 || ShImm == 2) &&
04488        ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
04489     return true;
04490 
04491   return false;
04492 }
04493 
04494 bool ARMBaseInstrInfo::getRegSequenceLikeInputs(
04495     const MachineInstr &MI, unsigned DefIdx,
04496     SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
04497   assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
04498   assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
04499 
04500   switch (MI.getOpcode()) {
04501   case ARM::VMOVDRR:
04502     // dX = VMOVDRR rY, rZ
04503     // is the same as:
04504     // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
04505     // Populate the InputRegs accordingly.
04506     // rY
04507     const MachineOperand *MOReg = &MI.getOperand(1);
04508     InputRegs.push_back(
04509         RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0));
04510     // rZ
04511     MOReg = &MI.getOperand(2);
04512     InputRegs.push_back(
04513         RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1));
04514     return true;
04515   }
04516   llvm_unreachable("Target dependent opcode missing");
04517 }
04518 
04519 bool ARMBaseInstrInfo::getExtractSubregLikeInputs(
04520     const MachineInstr &MI, unsigned DefIdx,
04521     RegSubRegPairAndIdx &InputReg) const {
04522   assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
04523   assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
04524 
04525   switch (MI.getOpcode()) {
04526   case ARM::VMOVRRD:
04527     // rX, rY = VMOVRRD dZ
04528     // is the same as:
04529     // rX = EXTRACT_SUBREG dZ, ssub_0
04530     // rY = EXTRACT_SUBREG dZ, ssub_1
04531     const MachineOperand &MOReg = MI.getOperand(2);
04532     InputReg.Reg = MOReg.getReg();
04533     InputReg.SubReg = MOReg.getSubReg();
04534     InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
04535     return true;
04536   }
04537   llvm_unreachable("Target dependent opcode missing");
04538 }
04539 
04540 bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
04541     const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
04542     RegSubRegPairAndIdx &InsertedReg) const {
04543   assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
04544   assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
04545 
04546   switch (MI.getOpcode()) {
04547   case ARM::VSETLNi32:
04548     // dX = VSETLNi32 dY, rZ, imm
04549     const MachineOperand &MOBaseReg = MI.getOperand(1);
04550     const MachineOperand &MOInsertedReg = MI.getOperand(2);
04551     const MachineOperand &MOIndex = MI.getOperand(3);
04552     BaseReg.Reg = MOBaseReg.getReg();
04553     BaseReg.SubReg = MOBaseReg.getSubReg();
04554 
04555     InsertedReg.Reg = MOInsertedReg.getReg();
04556     InsertedReg.SubReg = MOInsertedReg.getSubReg();
04557     InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1;
04558     return true;
04559   }
04560   llvm_unreachable("Target dependent opcode missing");
04561 }