LLVM API Documentation

ARMFrameLowering.cpp
Go to the documentation of this file.
00001 //===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains the ARM implementation of TargetFrameLowering class.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "ARMFrameLowering.h"
00015 #include "ARMBaseInstrInfo.h"
00016 #include "ARMBaseRegisterInfo.h"
00017 #include "ARMConstantPoolValue.h"
00018 #include "ARMMachineFunctionInfo.h"
00019 #include "MCTargetDesc/ARMAddressingModes.h"
00020 #include "llvm/CodeGen/MachineFrameInfo.h"
00021 #include "llvm/CodeGen/MachineFunction.h"
00022 #include "llvm/CodeGen/MachineInstrBuilder.h"
00023 #include "llvm/CodeGen/MachineModuleInfo.h"
00024 #include "llvm/CodeGen/MachineRegisterInfo.h"
00025 #include "llvm/CodeGen/RegisterScavenging.h"
00026 #include "llvm/IR/CallingConv.h"
00027 #include "llvm/IR/Function.h"
00028 #include "llvm/MC/MCContext.h"
00029 #include "llvm/Support/CommandLine.h"
00030 #include "llvm/Target/TargetOptions.h"
00031 
00032 using namespace llvm;
00033 
00034 static cl::opt<bool>
00035 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
00036                      cl::desc("Align ARM NEON spills in prolog and epilog"));
00037 
00038 static MachineBasicBlock::iterator
00039 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
00040                         unsigned NumAlignedDPRCS2Regs);
00041 
00042 ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
00043     : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
00044       STI(sti) {}
00045 
00046 /// hasFP - Return true if the specified function should have a dedicated frame
00047 /// pointer register.  This is true if the function has variable sized allocas
00048 /// or if frame pointer elimination is disabled.
00049 bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
00050   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
00051 
00052   // iOS requires FP not to be clobbered for backtracing purpose.
00053   if (STI.isTargetIOS())
00054     return true;
00055 
00056   const MachineFrameInfo *MFI = MF.getFrameInfo();
00057   // Always eliminate non-leaf frame pointers.
00058   return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&
00059            MFI->hasCalls()) ||
00060           RegInfo->needsStackRealignment(MF) ||
00061           MFI->hasVarSizedObjects() ||
00062           MFI->isFrameAddressTaken());
00063 }
00064 
00065 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
00066 /// not required, we reserve argument space for call sites in the function
00067 /// immediately on entry to the current function.  This eliminates the need for
00068 /// add/sub sp brackets around call sites.  Returns true if the call frame is
00069 /// included as part of the stack frame.
00070 bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
00071   const MachineFrameInfo *FFI = MF.getFrameInfo();
00072   unsigned CFSize = FFI->getMaxCallFrameSize();
00073   // It's not always a good idea to include the call frame as part of the
00074   // stack frame. ARM (especially Thumb) has small immediate offset to
00075   // address the stack frame. So a large call frame can cause poor codegen
00076   // and may even makes it impossible to scavenge a register.
00077   if (CFSize >= ((1 << 12) - 1) / 2)  // Half of imm12
00078     return false;
00079 
00080   return !MF.getFrameInfo()->hasVarSizedObjects();
00081 }
00082 
00083 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
00084 /// call frame pseudos can be simplified.  Unlike most targets, having a FP
00085 /// is not sufficient here since we still may reference some objects via SP
00086 /// even when FP is available in Thumb2 mode.
00087 bool
00088 ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
00089   return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
00090 }
00091 
00092 static bool isCSRestore(MachineInstr *MI,
00093                         const ARMBaseInstrInfo &TII,
00094                         const MCPhysReg *CSRegs) {
00095   // Integer spill area is handled with "pop".
00096   if (isPopOpcode(MI->getOpcode())) {
00097     // The first two operands are predicates. The last two are
00098     // imp-def and imp-use of SP. Check everything in between.
00099     for (int i = 5, e = MI->getNumOperands(); i != e; ++i)
00100       if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
00101         return false;
00102     return true;
00103   }
00104   if ((MI->getOpcode() == ARM::LDR_POST_IMM ||
00105        MI->getOpcode() == ARM::LDR_POST_REG ||
00106        MI->getOpcode() == ARM::t2LDR_POST) &&
00107       isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
00108       MI->getOperand(1).getReg() == ARM::SP)
00109     return true;
00110 
00111   return false;
00112 }
00113 
00114 static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB,
00115                                  MachineBasicBlock::iterator &MBBI, DebugLoc dl,
00116                                  const ARMBaseInstrInfo &TII, unsigned DestReg,
00117                                  unsigned SrcReg, int NumBytes,
00118                                  unsigned MIFlags = MachineInstr::NoFlags,
00119                                  ARMCC::CondCodes Pred = ARMCC::AL,
00120                                  unsigned PredReg = 0) {
00121   if (isARM)
00122     emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
00123                             Pred, PredReg, TII, MIFlags);
00124   else
00125     emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
00126                            Pred, PredReg, TII, MIFlags);
00127 }
00128 
00129 static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
00130                          MachineBasicBlock::iterator &MBBI, DebugLoc dl,
00131                          const ARMBaseInstrInfo &TII, int NumBytes,
00132                          unsigned MIFlags = MachineInstr::NoFlags,
00133                          ARMCC::CondCodes Pred = ARMCC::AL,
00134                          unsigned PredReg = 0) {
00135   emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
00136                        MIFlags, Pred, PredReg);
00137 }
00138 
00139 static int sizeOfSPAdjustment(const MachineInstr *MI) {
00140   assert(MI->getOpcode() == ARM::VSTMDDB_UPD);
00141   int count = 0;
00142   // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
00143   // pred) so the list starts at 4.
00144   for (int i = MI->getNumOperands() - 1; i >= 4; --i)
00145     count += 8;
00146   return count;
00147 }
00148 
00149 static bool WindowsRequiresStackProbe(const MachineFunction &MF,
00150                                       size_t StackSizeInBytes) {
00151   const MachineFrameInfo *MFI = MF.getFrameInfo();
00152   if (MFI->getStackProtectorIndex() > 0)
00153     return StackSizeInBytes >= 4080;
00154   return StackSizeInBytes >= 4096;
00155 }
00156 
00157 void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
00158   MachineBasicBlock &MBB = MF.front();
00159   MachineBasicBlock::iterator MBBI = MBB.begin();
00160   MachineFrameInfo  *MFI = MF.getFrameInfo();
00161   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
00162   MachineModuleInfo &MMI = MF.getMMI();
00163   MCContext &Context = MMI.getContext();
00164   const TargetMachine &TM = MF.getTarget();
00165   const MCRegisterInfo *MRI = Context.getRegisterInfo();
00166   const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
00167       TM.getSubtargetImpl()->getRegisterInfo());
00168   const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
00169                                     TM.getSubtargetImpl()->getInstrInfo());
00170   assert(!AFI->isThumb1OnlyFunction() &&
00171          "This emitPrologue does not support Thumb1!");
00172   bool isARM = !AFI->isThumbFunction();
00173   unsigned Align =
00174       TM.getSubtargetImpl()->getFrameLowering()->getStackAlignment();
00175   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
00176   unsigned NumBytes = MFI->getStackSize();
00177   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
00178   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
00179   unsigned FramePtr = RegInfo->getFrameRegister(MF);
00180   int CFAOffset = 0;
00181 
00182   // Determine the sizes of each callee-save spill areas and record which frame
00183   // belongs to which callee-save spill areas.
00184   unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
00185   int FramePtrSpillFI = 0;
00186   int D8SpillFI = 0;
00187 
00188   // All calls are tail calls in GHC calling conv, and functions have no
00189   // prologue/epilogue.
00190   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
00191     return;
00192 
00193   // Allocate the vararg register save area.
00194   if (ArgRegsSaveSize) {
00195     emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
00196                  MachineInstr::FrameSetup);
00197     CFAOffset -= ArgRegsSaveSize;
00198     unsigned CFIIndex = MMI.addFrameInst(
00199         MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
00200     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
00201         .addCFIIndex(CFIIndex);
00202   }
00203 
00204   if (!AFI->hasStackFrame() &&
00205       (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
00206     if (NumBytes - ArgRegsSaveSize != 0) {
00207       emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
00208                    MachineInstr::FrameSetup);
00209       CFAOffset -= NumBytes - ArgRegsSaveSize;
00210       unsigned CFIIndex = MMI.addFrameInst(
00211           MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
00212       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
00213           .addCFIIndex(CFIIndex);
00214     }
00215     return;
00216   }
00217 
00218   // Determine spill area sizes.
00219   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
00220     unsigned Reg = CSI[i].getReg();
00221     int FI = CSI[i].getFrameIdx();
00222     switch (Reg) {
00223     case ARM::R8:
00224     case ARM::R9:
00225     case ARM::R10:
00226     case ARM::R11:
00227     case ARM::R12:
00228       if (STI.isTargetDarwin()) {
00229         GPRCS2Size += 4;
00230         break;
00231       }
00232       // fallthrough
00233     case ARM::R0:
00234     case ARM::R1:
00235     case ARM::R2:
00236     case ARM::R3:
00237     case ARM::R4:
00238     case ARM::R5:
00239     case ARM::R6:
00240     case ARM::R7:
00241     case ARM::LR:
00242       if (Reg == FramePtr)
00243         FramePtrSpillFI = FI;
00244       GPRCS1Size += 4;
00245       break;
00246     default:
00247       // This is a DPR. Exclude the aligned DPRCS2 spills.
00248       if (Reg == ARM::D8)
00249         D8SpillFI = FI;
00250       if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
00251         DPRCSSize += 8;
00252     }
00253   }
00254 
00255   // Move past area 1.
00256   MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push,
00257       DPRCSPush;
00258   if (GPRCS1Size > 0)
00259     GPRCS1Push = LastPush = MBBI++;
00260 
00261   // Determine starting offsets of spill areas.
00262   bool HasFP = hasFP(MF);
00263   unsigned DPRCSOffset  = NumBytes - (ArgRegsSaveSize + GPRCS1Size
00264                                       + GPRCS2Size + DPRCSSize);
00265   unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
00266   unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
00267   int FramePtrOffsetInPush = 0;
00268   if (HasFP) {
00269     FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI)
00270                            + GPRCS1Size + ArgRegsSaveSize;
00271     AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
00272                                 NumBytes);
00273   }
00274   AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
00275   AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
00276   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
00277 
00278   // Move past area 2.
00279   if (GPRCS2Size > 0)
00280     GPRCS2Push = LastPush = MBBI++;
00281 
00282   // Move past area 3.
00283   if (DPRCSSize > 0) {
00284     DPRCSPush = MBBI;
00285     // Since vpush register list cannot have gaps, there may be multiple vpush
00286     // instructions in the prologue.
00287     while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
00288       LastPush = MBBI++;
00289   }
00290 
00291   // Move past the aligned DPRCS2 area.
00292   if (AFI->getNumAlignedDPRCS2Regs() > 0) {
00293     MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs());
00294     // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
00295     // leaves the stack pointer pointing to the DPRCS2 area.
00296     //
00297     // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
00298     NumBytes += MFI->getObjectOffset(D8SpillFI);
00299   } else
00300     NumBytes = DPRCSOffset;
00301 
00302   if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
00303     uint32_t NumWords = NumBytes >> 2;
00304 
00305     if (NumWords < 65536)
00306       AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
00307                      .addImm(NumWords)
00308                      .setMIFlags(MachineInstr::FrameSetup));
00309     else
00310       BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
00311         .addImm(NumWords)
00312         .setMIFlags(MachineInstr::FrameSetup);
00313 
00314     switch (TM.getCodeModel()) {
00315     case CodeModel::Small:
00316     case CodeModel::Medium:
00317     case CodeModel::Default:
00318     case CodeModel::Kernel:
00319       BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
00320         .addImm((unsigned)ARMCC::AL).addReg(0)
00321         .addExternalSymbol("__chkstk")
00322         .addReg(ARM::R4, RegState::Implicit)
00323         .setMIFlags(MachineInstr::FrameSetup);
00324       break;
00325     case CodeModel::Large:
00326     case CodeModel::JITDefault:
00327       BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
00328         .addExternalSymbol("__chkstk")
00329         .setMIFlags(MachineInstr::FrameSetup);
00330 
00331       BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
00332         .addImm((unsigned)ARMCC::AL).addReg(0)
00333         .addReg(ARM::R12, RegState::Kill)
00334         .addReg(ARM::R4, RegState::Implicit)
00335         .setMIFlags(MachineInstr::FrameSetup);
00336       break;
00337     }
00338 
00339     AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr),
00340                                         ARM::SP)
00341                                 .addReg(ARM::SP, RegState::Define)
00342                                 .addReg(ARM::R4, RegState::Kill)
00343                                 .setMIFlags(MachineInstr::FrameSetup)));
00344     NumBytes = 0;
00345   }
00346 
00347   unsigned adjustedGPRCS1Size = GPRCS1Size;
00348   if (NumBytes) {
00349     // Adjust SP after all the callee-save spills.
00350     if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) {
00351       if (LastPush == GPRCS1Push) {
00352         FramePtrOffsetInPush += NumBytes;
00353         adjustedGPRCS1Size += NumBytes;
00354         NumBytes = 0;
00355       }
00356     } else
00357       emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
00358                    MachineInstr::FrameSetup);
00359 
00360     if (HasFP && isARM)
00361       // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
00362       // Note it's not safe to do this in Thumb2 mode because it would have
00363       // taken two instructions:
00364       // mov sp, r7
00365       // sub sp, #24
00366       // If an interrupt is taken between the two instructions, then sp is in
00367       // an inconsistent state (pointing to the middle of callee-saved area).
00368       // The interrupt handler can end up clobbering the registers.
00369       AFI->setShouldRestoreSPFromFP(true);
00370   }
00371 
00372   if (adjustedGPRCS1Size > 0) {
00373     CFAOffset -= adjustedGPRCS1Size;
00374     unsigned CFIIndex = MMI.addFrameInst(
00375         MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
00376     MachineBasicBlock::iterator Pos = ++GPRCS1Push;
00377     BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
00378         .addCFIIndex(CFIIndex);
00379     for (const auto &Entry : CSI) {
00380       unsigned Reg = Entry.getReg();
00381       int FI = Entry.getFrameIdx();
00382       switch (Reg) {
00383       case ARM::R8:
00384       case ARM::R9:
00385       case ARM::R10:
00386       case ARM::R11:
00387       case ARM::R12:
00388         if (STI.isTargetDarwin())
00389           break;
00390         // fallthrough
00391       case ARM::R0:
00392       case ARM::R1:
00393       case ARM::R2:
00394       case ARM::R3:
00395       case ARM::R4:
00396       case ARM::R5:
00397       case ARM::R6:
00398       case ARM::R7:
00399       case ARM::LR:
00400         CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
00401             nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI)));
00402         BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
00403             .addCFIIndex(CFIIndex);
00404         break;
00405       }
00406     }
00407   }
00408 
00409   // Set FP to point to the stack slot that contains the previous FP.
00410   // For iOS, FP is R7, which has now been stored in spill area 1.
00411   // Otherwise, if this is not iOS, all the callee-saved registers go
00412   // into spill area 1, including the FP in R11.  In either case, it
00413   // is in area one and the adjustment needs to take place just after
00414   // that push.
00415   if (HasFP) {
00416     emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, GPRCS1Push, dl, TII,
00417                          FramePtr, ARM::SP, FramePtrOffsetInPush,
00418                          MachineInstr::FrameSetup);
00419     if (FramePtrOffsetInPush) {
00420       CFAOffset += FramePtrOffsetInPush;
00421       unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
00422           nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
00423       BuildMI(MBB, GPRCS1Push, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
00424           .addCFIIndex(CFIIndex);
00425 
00426     } else {
00427       unsigned CFIIndex =
00428           MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
00429               nullptr, MRI->getDwarfRegNum(FramePtr, true)));
00430       BuildMI(MBB, GPRCS1Push, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
00431           .addCFIIndex(CFIIndex);
00432     }
00433   }
00434 
00435   if (GPRCS2Size > 0) {
00436     MachineBasicBlock::iterator Pos = ++GPRCS2Push;
00437     if (!HasFP) {
00438       CFAOffset -= GPRCS2Size;
00439       unsigned CFIIndex = MMI.addFrameInst(
00440           MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
00441       BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
00442           .addCFIIndex(CFIIndex);
00443     }
00444     for (const auto &Entry : CSI) {
00445       unsigned Reg = Entry.getReg();
00446       int FI = Entry.getFrameIdx();
00447       switch (Reg) {
00448       case ARM::R8:
00449       case ARM::R9:
00450       case ARM::R10:
00451       case ARM::R11:
00452       case ARM::R12:
00453         if (STI.isTargetDarwin()) {
00454           unsigned DwarfReg =  MRI->getDwarfRegNum(Reg, true);
00455           unsigned Offset = MFI->getObjectOffset(FI);
00456           unsigned CFIIndex = MMI.addFrameInst(
00457               MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
00458           BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
00459               .addCFIIndex(CFIIndex);
00460         }
00461         break;
00462       }
00463     }
00464   }
00465 
00466   if (DPRCSSize > 0) {
00467     // Since vpush register list cannot have gaps, there may be multiple vpush
00468     // instructions in the prologue.
00469     do {
00470       MachineBasicBlock::iterator Push = DPRCSPush++;
00471       if (!HasFP) {
00472         CFAOffset -= sizeOfSPAdjustment(Push);
00473         unsigned CFIIndex = MMI.addFrameInst(
00474             MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
00475         BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
00476             .addCFIIndex(CFIIndex);
00477       }
00478     } while (DPRCSPush->getOpcode() == ARM::VSTMDDB_UPD);
00479 
00480     for (const auto &Entry : CSI) {
00481       unsigned Reg = Entry.getReg();
00482       int FI = Entry.getFrameIdx();
00483       if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
00484           (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
00485         unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
00486         unsigned Offset = MFI->getObjectOffset(FI);
00487         unsigned CFIIndex = MMI.addFrameInst(
00488             MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
00489         BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
00490             .addCFIIndex(CFIIndex);
00491       }
00492     }
00493   }
00494 
00495   if (NumBytes) {
00496     if (!HasFP) {
00497       CFAOffset -= NumBytes;
00498       unsigned CFIIndex = MMI.addFrameInst(
00499           MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
00500       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
00501           .addCFIIndex(CFIIndex);
00502     }
00503   }
00504 
00505   if (STI.isTargetELF() && hasFP(MF))
00506     MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
00507                              AFI->getFramePtrSpillOffset());
00508 
00509   AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
00510   AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
00511   AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
00512 
00513   // If we need dynamic stack realignment, do it here. Be paranoid and make
00514   // sure if we also have VLAs, we have a base pointer for frame access.
00515   // If aligned NEON registers were spilled, the stack has already been
00516   // realigned.
00517   if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
00518     unsigned MaxAlign = MFI->getMaxAlignment();
00519     assert (!AFI->isThumb1OnlyFunction());
00520     if (!AFI->isThumbFunction()) {
00521       // Emit bic sp, sp, MaxAlign
00522       AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
00523                                           TII.get(ARM::BICri), ARM::SP)
00524                                   .addReg(ARM::SP, RegState::Kill)
00525                                   .addImm(MaxAlign-1)));
00526     } else {
00527       // We cannot use sp as source/dest register here, thus we're emitting the
00528       // following sequence:
00529       // mov r4, sp
00530       // bic r4, r4, MaxAlign
00531       // mov sp, r4
00532       // FIXME: It will be better just to find spare register here.
00533       AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
00534         .addReg(ARM::SP, RegState::Kill));
00535       AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
00536                                           TII.get(ARM::t2BICri), ARM::R4)
00537                                   .addReg(ARM::R4, RegState::Kill)
00538                                   .addImm(MaxAlign-1)));
00539       AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
00540         .addReg(ARM::R4, RegState::Kill));
00541     }
00542 
00543     AFI->setShouldRestoreSPFromFP(true);
00544   }
00545 
00546   // If we need a base pointer, set it up here. It's whatever the value
00547   // of the stack pointer is at this point. Any variable size objects
00548   // will be allocated after this, so we can still use the base pointer
00549   // to reference locals.
00550   // FIXME: Clarify FrameSetup flags here.
00551   if (RegInfo->hasBasePointer(MF)) {
00552     if (isARM)
00553       BuildMI(MBB, MBBI, dl,
00554               TII.get(ARM::MOVr), RegInfo->getBaseRegister())
00555         .addReg(ARM::SP)
00556         .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
00557     else
00558       AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
00559                              RegInfo->getBaseRegister())
00560         .addReg(ARM::SP));
00561   }
00562 
00563   // If the frame has variable sized objects then the epilogue must restore
00564   // the sp from fp. We can assume there's an FP here since hasFP already
00565   // checks for hasVarSizedObjects.
00566   if (MFI->hasVarSizedObjects())
00567     AFI->setShouldRestoreSPFromFP(true);
00568 }
00569 
00570 void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
00571                                     MachineBasicBlock &MBB) const {
00572   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
00573   assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
00574   unsigned RetOpcode = MBBI->getOpcode();
00575   DebugLoc dl = MBBI->getDebugLoc();
00576   MachineFrameInfo *MFI = MF.getFrameInfo();
00577   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
00578   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
00579   const ARMBaseInstrInfo &TII =
00580       *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
00581   assert(!AFI->isThumb1OnlyFunction() &&
00582          "This emitEpilogue does not support Thumb1!");
00583   bool isARM = !AFI->isThumbFunction();
00584 
00585   unsigned Align = MF.getTarget()
00586                        .getSubtargetImpl()
00587                        ->getFrameLowering()
00588                        ->getStackAlignment();
00589   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
00590   int NumBytes = (int)MFI->getStackSize();
00591   unsigned FramePtr = RegInfo->getFrameRegister(MF);
00592 
00593   // All calls are tail calls in GHC calling conv, and functions have no
00594   // prologue/epilogue.
00595   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
00596     return;
00597 
00598   if (!AFI->hasStackFrame()) {
00599     if (NumBytes - ArgRegsSaveSize != 0)
00600       emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);
00601   } else {
00602     // Unwind MBBI to point to first LDR / VLDRD.
00603     const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
00604     if (MBBI != MBB.begin()) {
00605       do {
00606         --MBBI;
00607       } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
00608       if (!isCSRestore(MBBI, TII, CSRegs))
00609         ++MBBI;
00610     }
00611 
00612     // Move SP to start of FP callee save spill area.
00613     NumBytes -= (ArgRegsSaveSize +
00614                  AFI->getGPRCalleeSavedArea1Size() +
00615                  AFI->getGPRCalleeSavedArea2Size() +
00616                  AFI->getDPRCalleeSavedAreaSize());
00617 
00618     // Reset SP based on frame pointer only if the stack frame extends beyond
00619     // frame pointer stack slot or target is ELF and the function has FP.
00620     if (AFI->shouldRestoreSPFromFP()) {
00621       NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
00622       if (NumBytes) {
00623         if (isARM)
00624           emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
00625                                   ARMCC::AL, 0, TII);
00626         else {
00627           // It's not possible to restore SP from FP in a single instruction.
00628           // For iOS, this looks like:
00629           // mov sp, r7
00630           // sub sp, #24
00631           // This is bad, if an interrupt is taken after the mov, sp is in an
00632           // inconsistent state.
00633           // Use the first callee-saved register as a scratch register.
00634           assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
00635                  "No scratch register to restore SP from FP!");
00636           emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
00637                                  ARMCC::AL, 0, TII);
00638           AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
00639                                  ARM::SP)
00640             .addReg(ARM::R4));
00641         }
00642       } else {
00643         // Thumb2 or ARM.
00644         if (isARM)
00645           BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
00646             .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
00647         else
00648           AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
00649                                  ARM::SP)
00650             .addReg(FramePtr));
00651       }
00652     } else if (NumBytes &&
00653                !tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes))
00654         emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
00655 
00656     // Increment past our save areas.
00657     if (AFI->getDPRCalleeSavedAreaSize()) {
00658       MBBI++;
00659       // Since vpop register list cannot have gaps, there may be multiple vpop
00660       // instructions in the epilogue.
00661       while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
00662         MBBI++;
00663     }
00664     if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
00665     if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
00666   }
00667 
00668   if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) {
00669     // Tail call return: adjust the stack pointer and jump to callee.
00670     MBBI = MBB.getLastNonDebugInstr();
00671     MachineOperand &JumpTarget = MBBI->getOperand(0);
00672 
00673     // Jump to label or value in register.
00674     if (RetOpcode == ARM::TCRETURNdi) {
00675       unsigned TCOpcode = STI.isThumb() ?
00676                (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
00677                ARM::TAILJMPd;
00678       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
00679       if (JumpTarget.isGlobal())
00680         MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
00681                              JumpTarget.getTargetFlags());
00682       else {
00683         assert(JumpTarget.isSymbol());
00684         MIB.addExternalSymbol(JumpTarget.getSymbolName(),
00685                               JumpTarget.getTargetFlags());
00686       }
00687 
00688       // Add the default predicate in Thumb mode.
00689       if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
00690     } else if (RetOpcode == ARM::TCRETURNri) {
00691       BuildMI(MBB, MBBI, dl,
00692               TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
00693         addReg(JumpTarget.getReg(), RegState::Kill);
00694     }
00695 
00696     MachineInstr *NewMI = std::prev(MBBI);
00697     for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
00698       NewMI->addOperand(MBBI->getOperand(i));
00699 
00700     // Delete the pseudo instruction TCRETURN.
00701     MBB.erase(MBBI);
00702     MBBI = NewMI;
00703   }
00704 
00705   if (ArgRegsSaveSize)
00706     emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
00707 }
00708 
00709 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
00710 /// debug info.  It's the same as what we use for resolving the code-gen
00711 /// references for now.  FIXME: This can go wrong when references are
00712 /// SP-relative and simple call frames aren't used.
00713 int
00714 ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
00715                                          unsigned &FrameReg) const {
00716   return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
00717 }
00718 
00719 int
00720 ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
00721                                              int FI, unsigned &FrameReg,
00722                                              int SPAdj) const {
00723   const MachineFrameInfo *MFI = MF.getFrameInfo();
00724   const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
00725       MF.getSubtarget().getRegisterInfo());
00726   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
00727   int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
00728   int FPOffset = Offset - AFI->getFramePtrSpillOffset();
00729   bool isFixed = MFI->isFixedObjectIndex(FI);
00730 
00731   FrameReg = ARM::SP;
00732   Offset += SPAdj;
00733 
00734   // SP can move around if there are allocas.  We may also lose track of SP
00735   // when emergency spilling inside a non-reserved call frame setup.
00736   bool hasMovingSP = !hasReservedCallFrame(MF);
00737 
00738   // When dynamically realigning the stack, use the frame pointer for
00739   // parameters, and the stack/base pointer for locals.
00740   if (RegInfo->needsStackRealignment(MF)) {
00741     assert (hasFP(MF) && "dynamic stack realignment without a FP!");
00742     if (isFixed) {
00743       FrameReg = RegInfo->getFrameRegister(MF);
00744       Offset = FPOffset;
00745     } else if (hasMovingSP) {
00746       assert(RegInfo->hasBasePointer(MF) &&
00747              "VLAs and dynamic stack alignment, but missing base pointer!");
00748       FrameReg = RegInfo->getBaseRegister();
00749     }
00750     return Offset;
00751   }
00752 
00753   // If there is a frame pointer, use it when we can.
00754   if (hasFP(MF) && AFI->hasStackFrame()) {
00755     // Use frame pointer to reference fixed objects. Use it for locals if
00756     // there are VLAs (and thus the SP isn't reliable as a base).
00757     if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
00758       FrameReg = RegInfo->getFrameRegister(MF);
00759       return FPOffset;
00760     } else if (hasMovingSP) {
00761       assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
00762       if (AFI->isThumb2Function()) {
00763         // Try to use the frame pointer if we can, else use the base pointer
00764         // since it's available. This is handy for the emergency spill slot, in
00765         // particular.
00766         if (FPOffset >= -255 && FPOffset < 0) {
00767           FrameReg = RegInfo->getFrameRegister(MF);
00768           return FPOffset;
00769         }
00770       }
00771     } else if (AFI->isThumb2Function()) {
00772       // Use  add <rd>, sp, #<imm8>
00773       //      ldr <rd>, [sp, #<imm8>]
00774       // if at all possible to save space.
00775       if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
00776         return Offset;
00777       // In Thumb2 mode, the negative offset is very limited. Try to avoid
00778       // out of range references. ldr <rt>,[<rn>, #-<imm8>]
00779       if (FPOffset >= -255 && FPOffset < 0) {
00780         FrameReg = RegInfo->getFrameRegister(MF);
00781         return FPOffset;
00782       }
00783     } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
00784       // Otherwise, use SP or FP, whichever is closer to the stack slot.
00785       FrameReg = RegInfo->getFrameRegister(MF);
00786       return FPOffset;
00787     }
00788   }
00789   // Use the base pointer if we have one.
00790   if (RegInfo->hasBasePointer(MF))
00791     FrameReg = RegInfo->getBaseRegister();
00792   return Offset;
00793 }
00794 
00795 int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
00796                                           int FI) const {
00797   unsigned FrameReg;
00798   return getFrameIndexReference(MF, FI, FrameReg);
00799 }
00800 
00801 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
00802                                     MachineBasicBlock::iterator MI,
00803                                     const std::vector<CalleeSavedInfo> &CSI,
00804                                     unsigned StmOpc, unsigned StrOpc,
00805                                     bool NoGap,
00806                                     bool(*Func)(unsigned, bool),
00807                                     unsigned NumAlignedDPRCS2Regs,
00808                                     unsigned MIFlags) const {
00809   MachineFunction &MF = *MBB.getParent();
00810   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
00811 
00812   DebugLoc DL;
00813   if (MI != MBB.end()) DL = MI->getDebugLoc();
00814 
00815   SmallVector<std::pair<unsigned,bool>, 4> Regs;
00816   unsigned i = CSI.size();
00817   while (i != 0) {
00818     unsigned LastReg = 0;
00819     for (; i != 0; --i) {
00820       unsigned Reg = CSI[i-1].getReg();
00821       if (!(Func)(Reg, STI.isTargetDarwin())) continue;
00822 
00823       // D-registers in the aligned area DPRCS2 are NOT spilled here.
00824       if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
00825         continue;
00826 
00827       // Add the callee-saved register as live-in unless it's LR and
00828       // @llvm.returnaddress is called. If LR is returned for
00829       // @llvm.returnaddress then it's already added to the function and
00830       // entry block live-in sets.
00831       bool isKill = true;
00832       if (Reg == ARM::LR) {
00833         if (MF.getFrameInfo()->isReturnAddressTaken() &&
00834             MF.getRegInfo().isLiveIn(Reg))
00835           isKill = false;
00836       }
00837 
00838       if (isKill)
00839         MBB.addLiveIn(Reg);
00840 
00841       // If NoGap is true, push consecutive registers and then leave the rest
00842       // for other instructions. e.g.
00843       // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
00844       if (NoGap && LastReg && LastReg != Reg-1)
00845         break;
00846       LastReg = Reg;
00847       Regs.push_back(std::make_pair(Reg, isKill));
00848     }
00849 
00850     if (Regs.empty())
00851       continue;
00852     if (Regs.size() > 1 || StrOpc== 0) {
00853       MachineInstrBuilder MIB =
00854         AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
00855                        .addReg(ARM::SP).setMIFlags(MIFlags));
00856       for (unsigned i = 0, e = Regs.size(); i < e; ++i)
00857         MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
00858     } else if (Regs.size() == 1) {
00859       MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
00860                                         ARM::SP)
00861         .addReg(Regs[0].first, getKillRegState(Regs[0].second))
00862         .addReg(ARM::SP).setMIFlags(MIFlags)
00863         .addImm(-4);
00864       AddDefaultPred(MIB);
00865     }
00866     Regs.clear();
00867 
00868     // Put any subsequent vpush instructions before this one: they will refer to
00869     // higher register numbers so need to be pushed first in order to preserve
00870     // monotonicity.
00871     --MI;
00872   }
00873 }
00874 
00875 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
00876                                    MachineBasicBlock::iterator MI,
00877                                    const std::vector<CalleeSavedInfo> &CSI,
00878                                    unsigned LdmOpc, unsigned LdrOpc,
00879                                    bool isVarArg, bool NoGap,
00880                                    bool(*Func)(unsigned, bool),
00881                                    unsigned NumAlignedDPRCS2Regs) const {
00882   MachineFunction &MF = *MBB.getParent();
00883   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
00884   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
00885   DebugLoc DL = MI->getDebugLoc();
00886   unsigned RetOpcode = MI->getOpcode();
00887   bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
00888                      RetOpcode == ARM::TCRETURNri);
00889   bool isInterrupt =
00890       RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
00891 
00892   SmallVector<unsigned, 4> Regs;
00893   unsigned i = CSI.size();
00894   while (i != 0) {
00895     unsigned LastReg = 0;
00896     bool DeleteRet = false;
00897     for (; i != 0; --i) {
00898       unsigned Reg = CSI[i-1].getReg();
00899       if (!(Func)(Reg, STI.isTargetDarwin())) continue;
00900 
00901       // The aligned reloads from area DPRCS2 are not inserted here.
00902       if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
00903         continue;
00904 
00905       if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
00906           STI.hasV5TOps()) {
00907         Reg = ARM::PC;
00908         LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
00909         // Fold the return instruction into the LDM.
00910         DeleteRet = true;
00911       }
00912 
00913       // If NoGap is true, pop consecutive registers and then leave the rest
00914       // for other instructions. e.g.
00915       // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
00916       if (NoGap && LastReg && LastReg != Reg-1)
00917         break;
00918 
00919       LastReg = Reg;
00920       Regs.push_back(Reg);
00921     }
00922 
00923     if (Regs.empty())
00924       continue;
00925     if (Regs.size() > 1 || LdrOpc == 0) {
00926       MachineInstrBuilder MIB =
00927         AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
00928                        .addReg(ARM::SP));
00929       for (unsigned i = 0, e = Regs.size(); i < e; ++i)
00930         MIB.addReg(Regs[i], getDefRegState(true));
00931       if (DeleteRet) {
00932         MIB.copyImplicitOps(&*MI);
00933         MI->eraseFromParent();
00934       }
00935       MI = MIB;
00936     } else if (Regs.size() == 1) {
00937       // If we adjusted the reg to PC from LR above, switch it back here. We
00938       // only do that for LDM.
00939       if (Regs[0] == ARM::PC)
00940         Regs[0] = ARM::LR;
00941       MachineInstrBuilder MIB =
00942         BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
00943           .addReg(ARM::SP, RegState::Define)
00944           .addReg(ARM::SP);
00945       // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
00946       // that refactoring is complete (eventually).
00947       if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
00948         MIB.addReg(0);
00949         MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
00950       } else
00951         MIB.addImm(4);
00952       AddDefaultPred(MIB);
00953     }
00954     Regs.clear();
00955 
00956     // Put any subsequent vpop instructions after this one: they will refer to
00957     // higher register numbers so need to be popped afterwards.
00958     ++MI;
00959   }
00960 }
00961 
00962 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
00963 /// starting from d8.  Also insert stack realignment code and leave the stack
00964 /// pointer pointing to the d8 spill slot.
00965 static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
00966                                     MachineBasicBlock::iterator MI,
00967                                     unsigned NumAlignedDPRCS2Regs,
00968                                     const std::vector<CalleeSavedInfo> &CSI,
00969                                     const TargetRegisterInfo *TRI) {
00970   MachineFunction &MF = *MBB.getParent();
00971   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
00972   DebugLoc DL = MI->getDebugLoc();
00973   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
00974   MachineFrameInfo &MFI = *MF.getFrameInfo();
00975 
00976   // Mark the D-register spill slots as properly aligned.  Since MFI computes
00977   // stack slot layout backwards, this can actually mean that the d-reg stack
00978   // slot offsets can be wrong. The offset for d8 will always be correct.
00979   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
00980     unsigned DNum = CSI[i].getReg() - ARM::D8;
00981     if (DNum >= 8)
00982       continue;
00983     int FI = CSI[i].getFrameIdx();
00984     // The even-numbered registers will be 16-byte aligned, the odd-numbered
00985     // registers will be 8-byte aligned.
00986     MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
00987 
00988     // The stack slot for D8 needs to be maximally aligned because this is
00989     // actually the point where we align the stack pointer.  MachineFrameInfo
00990     // computes all offsets relative to the incoming stack pointer which is a
00991     // bit weird when realigning the stack.  Any extra padding for this
00992     // over-alignment is not realized because the code inserted below adjusts
00993     // the stack pointer by numregs * 8 before aligning the stack pointer.
00994     if (DNum == 0)
00995       MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
00996   }
00997 
00998   // Move the stack pointer to the d8 spill slot, and align it at the same
00999   // time. Leave the stack slot address in the scratch register r4.
01000   //
01001   //   sub r4, sp, #numregs * 8
01002   //   bic r4, r4, #align - 1
01003   //   mov sp, r4
01004   //
01005   bool isThumb = AFI->isThumbFunction();
01006   assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
01007   AFI->setShouldRestoreSPFromFP(true);
01008 
01009   // sub r4, sp, #numregs * 8
01010   // The immediate is <= 64, so it doesn't need any special encoding.
01011   unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
01012   AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
01013                               .addReg(ARM::SP)
01014                               .addImm(8 * NumAlignedDPRCS2Regs)));
01015 
01016   // bic r4, r4, #align-1
01017   Opc = isThumb ? ARM::t2BICri : ARM::BICri;
01018   unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment();
01019   AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
01020                               .addReg(ARM::R4, RegState::Kill)
01021                               .addImm(MaxAlign - 1)));
01022 
01023   // mov sp, r4
01024   // The stack pointer must be adjusted before spilling anything, otherwise
01025   // the stack slots could be clobbered by an interrupt handler.
01026   // Leave r4 live, it is used below.
01027   Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
01028   MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
01029                             .addReg(ARM::R4);
01030   MIB = AddDefaultPred(MIB);
01031   if (!isThumb)
01032     AddDefaultCC(MIB);
01033 
01034   // Now spill NumAlignedDPRCS2Regs registers starting from d8.
01035   // r4 holds the stack slot address.
01036   unsigned NextReg = ARM::D8;
01037 
01038   // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
01039   // The writeback is only needed when emitting two vst1.64 instructions.
01040   if (NumAlignedDPRCS2Regs >= 6) {
01041     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
01042                                                &ARM::QQPRRegClass);
01043     MBB.addLiveIn(SupReg);
01044     AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed),
01045                            ARM::R4)
01046                    .addReg(ARM::R4, RegState::Kill).addImm(16)
01047                    .addReg(NextReg)
01048                    .addReg(SupReg, RegState::ImplicitKill));
01049     NextReg += 4;
01050     NumAlignedDPRCS2Regs -= 4;
01051   }
01052 
01053   // We won't modify r4 beyond this point.  It currently points to the next
01054   // register to be spilled.
01055   unsigned R4BaseReg = NextReg;
01056 
01057   // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
01058   if (NumAlignedDPRCS2Regs >= 4) {
01059     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
01060                                                &ARM::QQPRRegClass);
01061     MBB.addLiveIn(SupReg);
01062     AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
01063                    .addReg(ARM::R4).addImm(16).addReg(NextReg)
01064                    .addReg(SupReg, RegState::ImplicitKill));
01065     NextReg += 4;
01066     NumAlignedDPRCS2Regs -= 4;
01067   }
01068 
01069   // 16-byte aligned vst1.64 with 2 d-regs.
01070   if (NumAlignedDPRCS2Regs >= 2) {
01071     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
01072                                                &ARM::QPRRegClass);
01073     MBB.addLiveIn(SupReg);
01074     AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
01075                    .addReg(ARM::R4).addImm(16).addReg(SupReg));
01076     NextReg += 2;
01077     NumAlignedDPRCS2Regs -= 2;
01078   }
01079 
01080   // Finally, use a vanilla vstr.64 for the odd last register.
01081   if (NumAlignedDPRCS2Regs) {
01082     MBB.addLiveIn(NextReg);
01083     // vstr.64 uses addrmode5 which has an offset scale of 4.
01084     AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
01085                    .addReg(NextReg)
01086                    .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2));
01087   }
01088 
01089   // The last spill instruction inserted should kill the scratch register r4.
01090   std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
01091 }
01092 
01093 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
01094 /// iterator to the following instruction.
01095 static MachineBasicBlock::iterator
01096 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
01097                         unsigned NumAlignedDPRCS2Regs) {
01098   //   sub r4, sp, #numregs * 8
01099   //   bic r4, r4, #align - 1
01100   //   mov sp, r4
01101   ++MI; ++MI; ++MI;
01102   assert(MI->mayStore() && "Expecting spill instruction");
01103 
01104   // These switches all fall through.
01105   switch(NumAlignedDPRCS2Regs) {
01106   case 7:
01107     ++MI;
01108     assert(MI->mayStore() && "Expecting spill instruction");
01109   default:
01110     ++MI;
01111     assert(MI->mayStore() && "Expecting spill instruction");
01112   case 1:
01113   case 2:
01114   case 4:
01115     assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
01116     ++MI;
01117   }
01118   return MI;
01119 }
01120 
01121 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
01122 /// starting from d8.  These instructions are assumed to execute while the
01123 /// stack is still aligned, unlike the code inserted by emitPopInst.
01124 static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
01125                                       MachineBasicBlock::iterator MI,
01126                                       unsigned NumAlignedDPRCS2Regs,
01127                                       const std::vector<CalleeSavedInfo> &CSI,
01128                                       const TargetRegisterInfo *TRI) {
01129   MachineFunction &MF = *MBB.getParent();
01130   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
01131   DebugLoc DL = MI->getDebugLoc();
01132   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
01133 
01134   // Find the frame index assigned to d8.
01135   int D8SpillFI = 0;
01136   for (unsigned i = 0, e = CSI.size(); i != e; ++i)
01137     if (CSI[i].getReg() == ARM::D8) {
01138       D8SpillFI = CSI[i].getFrameIdx();
01139       break;
01140     }
01141 
01142   // Materialize the address of the d8 spill slot into the scratch register r4.
01143   // This can be fairly complicated if the stack frame is large, so just use
01144   // the normal frame index elimination mechanism to do it.  This code runs as
01145   // the initial part of the epilog where the stack and base pointers haven't
01146   // been changed yet.
01147   bool isThumb = AFI->isThumbFunction();
01148   assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
01149 
01150   unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
01151   AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
01152                               .addFrameIndex(D8SpillFI).addImm(0)));
01153 
01154   // Now restore NumAlignedDPRCS2Regs registers starting from d8.
01155   unsigned NextReg = ARM::D8;
01156 
01157   // 16-byte aligned vld1.64 with 4 d-regs and writeback.
01158   if (NumAlignedDPRCS2Regs >= 6) {
01159     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
01160                                                &ARM::QQPRRegClass);
01161     AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
01162                    .addReg(ARM::R4, RegState::Define)
01163                    .addReg(ARM::R4, RegState::Kill).addImm(16)
01164                    .addReg(SupReg, RegState::ImplicitDefine));
01165     NextReg += 4;
01166     NumAlignedDPRCS2Regs -= 4;
01167   }
01168 
01169   // We won't modify r4 beyond this point.  It currently points to the next
01170   // register to be spilled.
01171   unsigned R4BaseReg = NextReg;
01172 
01173   // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
01174   if (NumAlignedDPRCS2Regs >= 4) {
01175     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
01176                                                &ARM::QQPRRegClass);
01177     AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
01178                    .addReg(ARM::R4).addImm(16)
01179                    .addReg(SupReg, RegState::ImplicitDefine));
01180     NextReg += 4;
01181     NumAlignedDPRCS2Regs -= 4;
01182   }
01183 
01184   // 16-byte aligned vld1.64 with 2 d-regs.
01185   if (NumAlignedDPRCS2Regs >= 2) {
01186     unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
01187                                                &ARM::QPRRegClass);
01188     AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
01189                    .addReg(ARM::R4).addImm(16));
01190     NextReg += 2;
01191     NumAlignedDPRCS2Regs -= 2;
01192   }
01193 
01194   // Finally, use a vanilla vldr.64 for the remaining odd register.
01195   if (NumAlignedDPRCS2Regs)
01196     AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
01197                    .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg)));
01198 
01199   // Last store kills r4.
01200   std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
01201 }
01202 
01203 bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
01204                                         MachineBasicBlock::iterator MI,
01205                                         const std::vector<CalleeSavedInfo> &CSI,
01206                                         const TargetRegisterInfo *TRI) const {
01207   if (CSI.empty())
01208     return false;
01209 
01210   MachineFunction &MF = *MBB.getParent();
01211   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
01212 
01213   unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
01214   unsigned PushOneOpc = AFI->isThumbFunction() ?
01215     ARM::t2STR_PRE : ARM::STR_PRE_IMM;
01216   unsigned FltOpc = ARM::VSTMDDB_UPD;
01217   unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
01218   emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
01219                MachineInstr::FrameSetup);
01220   emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
01221                MachineInstr::FrameSetup);
01222   emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
01223                NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
01224 
01225   // The code above does not insert spill code for the aligned DPRCS2 registers.
01226   // The stack realignment code will be inserted between the push instructions
01227   // and these spills.
01228   if (NumAlignedDPRCS2Regs)
01229     emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
01230 
01231   return true;
01232 }
01233 
01234 bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
01235                                         MachineBasicBlock::iterator MI,
01236                                         const std::vector<CalleeSavedInfo> &CSI,
01237                                         const TargetRegisterInfo *TRI) const {
01238   if (CSI.empty())
01239     return false;
01240 
01241   MachineFunction &MF = *MBB.getParent();
01242   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
01243   bool isVarArg = AFI->getArgRegsSaveSize() > 0;
01244   unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
01245 
01246   // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
01247   // registers. Do that here instead.
01248   if (NumAlignedDPRCS2Regs)
01249     emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
01250 
01251   unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
01252   unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
01253   unsigned FltOpc = ARM::VLDMDIA_UPD;
01254   emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
01255               NumAlignedDPRCS2Regs);
01256   emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
01257               &isARMArea2Register, 0);
01258   emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
01259               &isARMArea1Register, 0);
01260 
01261   return true;
01262 }
01263 
01264 // FIXME: Make generic?
01265 static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
01266                                        const ARMBaseInstrInfo &TII) {
01267   unsigned FnSize = 0;
01268   for (auto &MBB : MF) {
01269     for (auto &MI : MBB)
01270       FnSize += TII.GetInstSizeInBytes(&MI);
01271   }
01272   return FnSize;
01273 }
01274 
01275 /// estimateRSStackSizeLimit - Look at each instruction that references stack
01276 /// frames and return the stack size limit beyond which some of these
01277 /// instructions will require a scratch register during their expansion later.
01278 // FIXME: Move to TII?
01279 static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
01280                                          const TargetFrameLowering *TFI) {
01281   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
01282   unsigned Limit = (1 << 12) - 1;
01283   for (auto &MBB : MF) {
01284     for (auto &MI : MBB) {
01285       for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
01286         if (!MI.getOperand(i).isFI())
01287           continue;
01288 
01289         // When using ADDri to get the address of a stack object, 255 is the
01290         // largest offset guaranteed to fit in the immediate offset.
01291         if (MI.getOpcode() == ARM::ADDri) {
01292           Limit = std::min(Limit, (1U << 8) - 1);
01293           break;
01294         }
01295 
01296         // Otherwise check the addressing mode.
01297         switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
01298         case ARMII::AddrMode3:
01299         case ARMII::AddrModeT2_i8:
01300           Limit = std::min(Limit, (1U << 8) - 1);
01301           break;
01302         case ARMII::AddrMode5:
01303         case ARMII::AddrModeT2_i8s4:
01304           Limit = std::min(Limit, ((1U << 8) - 1) * 4);
01305           break;
01306         case ARMII::AddrModeT2_i12:
01307           // i12 supports only positive offset so these will be converted to
01308           // i8 opcodes. See llvm::rewriteT2FrameIndex.
01309           if (TFI->hasFP(MF) && AFI->hasStackFrame())
01310             Limit = std::min(Limit, (1U << 8) - 1);
01311           break;
01312         case ARMII::AddrMode4:
01313         case ARMII::AddrMode6:
01314           // Addressing modes 4 & 6 (load/store) instructions can't encode an
01315           // immediate offset for stack references.
01316           return 0;
01317         default:
01318           break;
01319         }
01320         break; // At most one FI per instruction
01321       }
01322     }
01323   }
01324 
01325   return Limit;
01326 }
01327 
01328 // In functions that realign the stack, it can be an advantage to spill the
01329 // callee-saved vector registers after realigning the stack. The vst1 and vld1
01330 // instructions take alignment hints that can improve performance.
01331 //
01332 static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
01333   MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
01334   if (!SpillAlignedNEONRegs)
01335     return;
01336 
01337   // Naked functions don't spill callee-saved registers.
01338   if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
01339                                                      Attribute::Naked))
01340     return;
01341 
01342   // We are planning to use NEON instructions vst1 / vld1.
01343   if (!MF.getTarget().getSubtarget<ARMSubtarget>().hasNEON())
01344     return;
01345 
01346   // Don't bother if the default stack alignment is sufficiently high.
01347   if (MF.getTarget()
01348           .getSubtargetImpl()
01349           ->getFrameLowering()
01350           ->getStackAlignment() >= 8)
01351     return;
01352 
01353   // Aligned spills require stack realignment.
01354   const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
01355       MF.getSubtarget().getRegisterInfo());
01356   if (!RegInfo->canRealignStack(MF))
01357     return;
01358 
01359   // We always spill contiguous d-registers starting from d8. Count how many
01360   // needs spilling.  The register allocator will almost always use the
01361   // callee-saved registers in order, but it can happen that there are holes in
01362   // the range.  Registers above the hole will be spilled to the standard DPRCS
01363   // area.
01364   MachineRegisterInfo &MRI = MF.getRegInfo();
01365   unsigned NumSpills = 0;
01366   for (; NumSpills < 8; ++NumSpills)
01367     if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills))
01368       break;
01369 
01370   // Don't do this for just one d-register. It's not worth it.
01371   if (NumSpills < 2)
01372     return;
01373 
01374   // Spill the first NumSpills D-registers after realigning the stack.
01375   MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
01376 
01377   // A scratch register is required for the vst1 / vld1 instructions.
01378   MF.getRegInfo().setPhysRegUsed(ARM::R4);
01379 }
01380 
01381 void
01382 ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
01383                                                        RegScavenger *RS) const {
01384   // This tells PEI to spill the FP as if it is any other callee-save register
01385   // to take advantage the eliminateFrameIndex machinery. This also ensures it
01386   // is spilled in the order specified by getCalleeSavedRegs() to make it easier
01387   // to combine multiple loads / stores.
01388   bool CanEliminateFrame = true;
01389   bool CS1Spilled = false;
01390   bool LRSpilled = false;
01391   unsigned NumGPRSpills = 0;
01392   SmallVector<unsigned, 4> UnspilledCS1GPRs;
01393   SmallVector<unsigned, 4> UnspilledCS2GPRs;
01394   const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
01395       MF.getSubtarget().getRegisterInfo());
01396   const ARMBaseInstrInfo &TII =
01397       *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
01398   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
01399   MachineFrameInfo *MFI = MF.getFrameInfo();
01400   MachineRegisterInfo &MRI = MF.getRegInfo();
01401   unsigned FramePtr = RegInfo->getFrameRegister(MF);
01402 
01403   // Spill R4 if Thumb2 function requires stack realignment - it will be used as
01404   // scratch register. Also spill R4 if Thumb2 function has varsized objects,
01405   // since it's not always possible to restore sp from fp in a single
01406   // instruction.
01407   // FIXME: It will be better just to find spare register here.
01408   if (AFI->isThumb2Function() &&
01409       (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
01410     MRI.setPhysRegUsed(ARM::R4);
01411 
01412   if (AFI->isThumb1OnlyFunction()) {
01413     // Spill LR if Thumb1 function uses variable length argument lists.
01414     if (AFI->getArgRegsSaveSize() > 0)
01415       MRI.setPhysRegUsed(ARM::LR);
01416 
01417     // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
01418     // for sure what the stack size will be, but for this, an estimate is good
01419     // enough. If there anything changes it, it'll be a spill, which implies
01420     // we've used all the registers and so R4 is already used, so not marking
01421     // it here will be OK.
01422     // FIXME: It will be better just to find spare register here.
01423     unsigned StackSize = MFI->estimateStackSize(MF);
01424     if (MFI->hasVarSizedObjects() || StackSize > 508)
01425       MRI.setPhysRegUsed(ARM::R4);
01426   }
01427 
01428   // See if we can spill vector registers to aligned stack.
01429   checkNumAlignedDPRCS2Regs(MF);
01430 
01431   // Spill the BasePtr if it's used.
01432   if (RegInfo->hasBasePointer(MF))
01433     MRI.setPhysRegUsed(RegInfo->getBaseRegister());
01434 
01435   // Don't spill FP if the frame can be eliminated. This is determined
01436   // by scanning the callee-save registers to see if any is used.
01437   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
01438   for (unsigned i = 0; CSRegs[i]; ++i) {
01439     unsigned Reg = CSRegs[i];
01440     bool Spilled = false;
01441     if (MRI.isPhysRegUsed(Reg)) {
01442       Spilled = true;
01443       CanEliminateFrame = false;
01444     }
01445 
01446     if (!ARM::GPRRegClass.contains(Reg))
01447       continue;
01448 
01449     if (Spilled) {
01450       NumGPRSpills++;
01451 
01452       if (!STI.isTargetDarwin()) {
01453         if (Reg == ARM::LR)
01454           LRSpilled = true;
01455         CS1Spilled = true;
01456         continue;
01457       }
01458 
01459       // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
01460       switch (Reg) {
01461       case ARM::LR:
01462         LRSpilled = true;
01463         // Fallthrough
01464       case ARM::R0: case ARM::R1:
01465       case ARM::R2: case ARM::R3:
01466       case ARM::R4: case ARM::R5:
01467       case ARM::R6: case ARM::R7:
01468         CS1Spilled = true;
01469         break;
01470       default:
01471         break;
01472       }
01473     } else {
01474       if (!STI.isTargetDarwin()) {
01475         UnspilledCS1GPRs.push_back(Reg);
01476         continue;
01477       }
01478 
01479       switch (Reg) {
01480       case ARM::R0: case ARM::R1:
01481       case ARM::R2: case ARM::R3:
01482       case ARM::R4: case ARM::R5:
01483       case ARM::R6: case ARM::R7:
01484       case ARM::LR:
01485         UnspilledCS1GPRs.push_back(Reg);
01486         break;
01487       default:
01488         UnspilledCS2GPRs.push_back(Reg);
01489         break;
01490       }
01491     }
01492   }
01493 
01494   bool ForceLRSpill = false;
01495   if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
01496     unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
01497     // Force LR to be spilled if the Thumb function size is > 2048. This enables
01498     // use of BL to implement far jump. If it turns out that it's not needed
01499     // then the branch fix up path will undo it.
01500     if (FnSize >= (1 << 11)) {
01501       CanEliminateFrame = false;
01502       ForceLRSpill = true;
01503     }
01504   }
01505 
01506   // If any of the stack slot references may be out of range of an immediate
01507   // offset, make sure a register (or a spill slot) is available for the
01508   // register scavenger. Note that if we're indexing off the frame pointer, the
01509   // effective stack size is 4 bytes larger since the FP points to the stack
01510   // slot of the previous FP. Also, if we have variable sized objects in the
01511   // function, stack slot references will often be negative, and some of
01512   // our instructions are positive-offset only, so conservatively consider
01513   // that case to want a spill slot (or register) as well. Similarly, if
01514   // the function adjusts the stack pointer during execution and the
01515   // adjustments aren't already part of our stack size estimate, our offset
01516   // calculations may be off, so be conservative.
01517   // FIXME: We could add logic to be more precise about negative offsets
01518   //        and which instructions will need a scratch register for them. Is it
01519   //        worth the effort and added fragility?
01520   bool BigStack =
01521     (RS &&
01522      (MFI->estimateStackSize(MF) +
01523       ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
01524       estimateRSStackSizeLimit(MF, this)))
01525     || MFI->hasVarSizedObjects()
01526     || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
01527 
01528   bool ExtraCSSpill = false;
01529   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
01530     AFI->setHasStackFrame(true);
01531 
01532     // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
01533     // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
01534     if (!LRSpilled && CS1Spilled) {
01535       MRI.setPhysRegUsed(ARM::LR);
01536       NumGPRSpills++;
01537       SmallVectorImpl<unsigned>::iterator LRPos;
01538       LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
01539                         (unsigned)ARM::LR);
01540       if (LRPos != UnspilledCS1GPRs.end())
01541         UnspilledCS1GPRs.erase(LRPos);
01542 
01543       ForceLRSpill = false;
01544       ExtraCSSpill = true;
01545     }
01546 
01547     if (hasFP(MF)) {
01548       MRI.setPhysRegUsed(FramePtr);
01549       auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
01550                              FramePtr);
01551       if (FPPos != UnspilledCS1GPRs.end())
01552         UnspilledCS1GPRs.erase(FPPos);
01553       NumGPRSpills++;
01554     }
01555 
01556     // If stack and double are 8-byte aligned and we are spilling an odd number
01557     // of GPRs, spill one extra callee save GPR so we won't have to pad between
01558     // the integer and double callee save areas.
01559     unsigned TargetAlign = getStackAlignment();
01560     if (TargetAlign == 8 && (NumGPRSpills & 1)) {
01561       if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
01562         for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
01563           unsigned Reg = UnspilledCS1GPRs[i];
01564           // Don't spill high register if the function is thumb1
01565           if (!AFI->isThumb1OnlyFunction() ||
01566               isARMLowRegister(Reg) || Reg == ARM::LR) {
01567             MRI.setPhysRegUsed(Reg);
01568             if (!MRI.isReserved(Reg))
01569               ExtraCSSpill = true;
01570             break;
01571           }
01572         }
01573       } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
01574         unsigned Reg = UnspilledCS2GPRs.front();
01575         MRI.setPhysRegUsed(Reg);
01576         if (!MRI.isReserved(Reg))
01577           ExtraCSSpill = true;
01578       }
01579     }
01580 
01581     // Estimate if we might need to scavenge a register at some point in order
01582     // to materialize a stack offset. If so, either spill one additional
01583     // callee-saved register or reserve a special spill slot to facilitate
01584     // register scavenging. Thumb1 needs a spill slot for stack pointer
01585     // adjustments also, even when the frame itself is small.
01586     if (BigStack && !ExtraCSSpill) {
01587       // If any non-reserved CS register isn't spilled, just spill one or two
01588       // extra. That should take care of it!
01589       unsigned NumExtras = TargetAlign / 4;
01590       SmallVector<unsigned, 2> Extras;
01591       while (NumExtras && !UnspilledCS1GPRs.empty()) {
01592         unsigned Reg = UnspilledCS1GPRs.back();
01593         UnspilledCS1GPRs.pop_back();
01594         if (!MRI.isReserved(Reg) &&
01595             (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
01596              Reg == ARM::LR)) {
01597           Extras.push_back(Reg);
01598           NumExtras--;
01599         }
01600       }
01601       // For non-Thumb1 functions, also check for hi-reg CS registers
01602       if (!AFI->isThumb1OnlyFunction()) {
01603         while (NumExtras && !UnspilledCS2GPRs.empty()) {
01604           unsigned Reg = UnspilledCS2GPRs.back();
01605           UnspilledCS2GPRs.pop_back();
01606           if (!MRI.isReserved(Reg)) {
01607             Extras.push_back(Reg);
01608             NumExtras--;
01609           }
01610         }
01611       }
01612       if (Extras.size() && NumExtras == 0) {
01613         for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
01614           MRI.setPhysRegUsed(Extras[i]);
01615         }
01616       } else if (!AFI->isThumb1OnlyFunction()) {
01617         // note: Thumb1 functions spill to R12, not the stack.  Reserve a slot
01618         // closest to SP or frame pointer.
01619         const TargetRegisterClass *RC = &ARM::GPRRegClass;
01620         RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
01621                                                            RC->getAlignment(),
01622                                                            false));
01623       }
01624     }
01625   }
01626 
01627   if (ForceLRSpill) {
01628     MRI.setPhysRegUsed(ARM::LR);
01629     AFI->setLRIsSpilledForFarJump(true);
01630   }
01631 }
01632 
01633 
01634 void ARMFrameLowering::
01635 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
01636                               MachineBasicBlock::iterator I) const {
01637   const ARMBaseInstrInfo &TII =
01638       *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
01639   if (!hasReservedCallFrame(MF)) {
01640     // If we have alloca, convert as follows:
01641     // ADJCALLSTACKDOWN -> sub, sp, sp, amount
01642     // ADJCALLSTACKUP   -> add, sp, sp, amount
01643     MachineInstr *Old = I;
01644     DebugLoc dl = Old->getDebugLoc();
01645     unsigned Amount = Old->getOperand(0).getImm();
01646     if (Amount != 0) {
01647       // We need to keep the stack aligned properly.  To do this, we round the
01648       // amount of space needed for the outgoing arguments up to the next
01649       // alignment boundary.
01650       unsigned Align = getStackAlignment();
01651       Amount = (Amount+Align-1)/Align*Align;
01652 
01653       ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
01654       assert(!AFI->isThumb1OnlyFunction() &&
01655              "This eliminateCallFramePseudoInstr does not support Thumb1!");
01656       bool isARM = !AFI->isThumbFunction();
01657 
01658       // Replace the pseudo instruction with a new instruction...
01659       unsigned Opc = Old->getOpcode();
01660       int PIdx = Old->findFirstPredOperandIdx();
01661       ARMCC::CondCodes Pred = (PIdx == -1)
01662         ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
01663       if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
01664         // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
01665         unsigned PredReg = Old->getOperand(2).getReg();
01666         emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
01667                      Pred, PredReg);
01668       } else {
01669         // Note: PredReg is operand 3 for ADJCALLSTACKUP.
01670         unsigned PredReg = Old->getOperand(3).getReg();
01671         assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
01672         emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
01673                      Pred, PredReg);
01674       }
01675     }
01676   }
01677   MBB.erase(I);
01678 }
01679 
01680 /// Get the minimum constant for ARM that is greater than or equal to the
01681 /// argument. In ARM, constants can have any value that can be produced by
01682 /// rotating an 8-bit value to the right by an even number of bits within a
01683 /// 32-bit word.
01684 static uint32_t alignToARMConstant(uint32_t Value) {
01685   unsigned Shifted = 0;
01686 
01687   if (Value == 0)
01688       return 0;
01689 
01690   while (!(Value & 0xC0000000)) {
01691       Value = Value << 2;
01692       Shifted += 2;
01693   }
01694 
01695   bool Carry = (Value & 0x00FFFFFF);
01696   Value = ((Value & 0xFF000000) >> 24) + Carry;
01697 
01698   if (Value & 0x0000100)
01699       Value = Value & 0x000001FC;
01700 
01701   if (Shifted > 24)
01702       Value = Value >> (Shifted - 24);
01703   else
01704       Value = Value << (24 - Shifted);
01705 
01706   return Value;
01707 }
01708 
01709 // The stack limit in the TCB is set to this many bytes above the actual
01710 // stack limit.
01711 static const uint64_t kSplitStackAvailable = 256;
01712 
01713 // Adjust the function prologue to enable split stacks. This currently only
01714 // supports android and linux.
01715 //
01716 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but
01717 // must be well defined in order to allow for consistent implementations of the
01718 // __morestack helper function. The ABI is also not a normal ABI in that it
01719 // doesn't follow the normal calling conventions because this allows the
01720 // prologue of each function to be optimized further.
01721 //
01722 // Currently, the ABI looks like (when calling __morestack)
01723 //
01724 //  * r4 holds the minimum stack size requested for this function call
01725 //  * r5 holds the stack size of the arguments to the function
01726 //  * the beginning of the function is 3 instructions after the call to
01727 //    __morestack
01728 //
01729 // Implementations of __morestack should use r4 to allocate a new stack, r5 to
01730 // place the arguments on to the new stack, and the 3-instruction knowledge to
01731 // jump directly to the body of the function when working on the new stack.
01732 //
01733 // An old (and possibly no longer compatible) implementation of __morestack for
01734 // ARM can be found at [1].
01735 //
01736 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
01737 void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
01738   unsigned Opcode;
01739   unsigned CFIIndex;
01740   const ARMSubtarget *ST = &MF.getTarget().getSubtarget<ARMSubtarget>();
01741   bool Thumb = ST->isThumb();
01742 
01743   // Sadly, this currently doesn't support varargs, platforms other than
01744   // android/linux. Note that thumb1/thumb2 are support for android/linux.
01745   if (MF.getFunction()->isVarArg())
01746     report_fatal_error("Segmented stacks do not support vararg functions.");
01747   if (!ST->isTargetAndroid() && !ST->isTargetLinux())
01748     report_fatal_error("Segmented stacks not supported on this platform.");
01749 
01750   MachineBasicBlock &prologueMBB = MF.front();
01751   MachineFrameInfo *MFI = MF.getFrameInfo();
01752   MachineModuleInfo &MMI = MF.getMMI();
01753   MCContext &Context = MMI.getContext();
01754   const MCRegisterInfo *MRI = Context.getRegisterInfo();
01755   const ARMBaseInstrInfo &TII =
01756       *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
01757   ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
01758   DebugLoc DL;
01759 
01760   uint64_t StackSize = MFI->getStackSize();
01761 
01762   // Do not generate a prologue for functions with a stack of size zero
01763   if (StackSize == 0)
01764     return;
01765 
01766   // Use R4 and R5 as scratch registers.
01767   // We save R4 and R5 before use and restore them before leaving the function.
01768   unsigned ScratchReg0 = ARM::R4;
01769   unsigned ScratchReg1 = ARM::R5;
01770   uint64_t AlignedStackSize;
01771 
01772   MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
01773   MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
01774   MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
01775   MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
01776   MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
01777 
01778   for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
01779                                           e = prologueMBB.livein_end();
01780        i != e; ++i) {
01781     AllocMBB->addLiveIn(*i);
01782     GetMBB->addLiveIn(*i);
01783     McrMBB->addLiveIn(*i);
01784     PrevStackMBB->addLiveIn(*i);
01785     PostStackMBB->addLiveIn(*i);
01786   }
01787 
01788   MF.push_front(PostStackMBB);
01789   MF.push_front(AllocMBB);
01790   MF.push_front(GetMBB);
01791   MF.push_front(McrMBB);
01792   MF.push_front(PrevStackMBB);
01793 
01794   // The required stack size that is aligned to ARM constant criterion.
01795   AlignedStackSize = alignToARMConstant(StackSize);
01796 
01797   // When the frame size is less than 256 we just compare the stack
01798   // boundary directly to the value of the stack pointer, per gcc.
01799   bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
01800 
01801   // We will use two of the callee save registers as scratch registers so we
01802   // need to save those registers onto the stack.
01803   // We will use SR0 to hold stack limit and SR1 to hold the stack size
01804   // requested and arguments for __morestack().
01805   // SR0: Scratch Register #0
01806   // SR1: Scratch Register #1
01807   // push {SR0, SR1}
01808   if (Thumb) {
01809     AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH)))
01810         .addReg(ScratchReg0).addReg(ScratchReg1);
01811   } else {
01812     AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
01813                    .addReg(ARM::SP, RegState::Define).addReg(ARM::SP))
01814         .addReg(ScratchReg0).addReg(ScratchReg1);
01815   }
01816 
01817   // Emit the relevant DWARF information about the change in stack pointer as
01818   // well as where to find both r4 and r5 (the callee-save registers)
01819   CFIIndex =
01820       MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8));
01821   BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
01822       .addCFIIndex(CFIIndex);
01823   CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
01824       nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
01825   BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
01826       .addCFIIndex(CFIIndex);
01827   CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
01828       nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
01829   BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
01830       .addCFIIndex(CFIIndex);
01831 
01832   // mov SR1, sp
01833   if (Thumb) {
01834     AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
01835                       .addReg(ARM::SP));
01836   } else if (CompareStackPointer) {
01837     AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
01838                       .addReg(ARM::SP)).addReg(0);
01839   }
01840 
01841   // sub SR1, sp, #StackSize
01842   if (!CompareStackPointer && Thumb) {
01843     AddDefaultPred(
01844         AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1))
01845             .addReg(ScratchReg1).addImm(AlignedStackSize));
01846   } else if (!CompareStackPointer) {
01847     AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
01848                       .addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0);
01849   }
01850 
01851   if (Thumb && ST->isThumb1Only()) {
01852     unsigned PCLabelId = ARMFI->createPICLabelUId();
01853     ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
01854         MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0);
01855     MachineConstantPool *MCP = MF.getConstantPool();
01856     unsigned CPI = MCP->getConstantPoolIndex(NewCPV, MF.getAlignment());
01857 
01858     // ldr SR0, [pc, offset(STACK_LIMIT)]
01859     AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
01860                       .addConstantPoolIndex(CPI));
01861 
01862     // ldr SR0, [SR0]
01863     AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
01864                       .addReg(ScratchReg0).addImm(0));
01865   } else {
01866     // Get TLS base address from the coprocessor
01867     // mrc p15, #0, SR0, c13, c0, #3
01868     AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
01869                      .addImm(15)
01870                      .addImm(0)
01871                      .addImm(13)
01872                      .addImm(0)
01873                      .addImm(3));
01874 
01875     // Use the last tls slot on android and a private field of the TCP on linux.
01876     assert(ST->isTargetAndroid() || ST->isTargetLinux());
01877     unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
01878 
01879     // Get the stack limit from the right offset
01880     // ldr SR0, [sr0, #4 * TlsOffset]
01881     AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
01882                       .addReg(ScratchReg0).addImm(4 * TlsOffset));
01883   }
01884 
01885   // Compare stack limit with stack size requested.
01886   // cmp SR0, SR1
01887   Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
01888   AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode))
01889                     .addReg(ScratchReg0)
01890                     .addReg(ScratchReg1));
01891 
01892   // This jump is taken if StackLimit < SP - stack required.
01893   Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
01894   BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
01895        .addImm(ARMCC::LO)
01896        .addReg(ARM::CPSR);
01897 
01898 
01899   // Calling __morestack(StackSize, Size of stack arguments).
01900   // __morestack knows that the stack size requested is in SR0(r4)
01901   // and amount size of stack arguments is in SR1(r5).
01902 
01903   // Pass first argument for the __morestack by Scratch Register #0.
01904   //   The amount size of stack required
01905   if (Thumb) {
01906     AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8),
01907                                         ScratchReg0)).addImm(AlignedStackSize));
01908   } else {
01909     AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
01910                       .addImm(AlignedStackSize)).addReg(0);
01911   }
01912   // Pass second argument for the __morestack by Scratch Register #1.
01913   //   The amount size of stack consumed to save function arguments.
01914   if (Thumb) {
01915     AddDefaultPred(
01916         AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1))
01917             .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())));
01918   } else {
01919     AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
01920                    .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())))
01921                    .addReg(0);
01922   }
01923 
01924   // push {lr} - Save return address of this function.
01925   if (Thumb) {
01926     AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH)))
01927         .addReg(ARM::LR);
01928   } else {
01929     AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
01930                    .addReg(ARM::SP, RegState::Define)
01931                    .addReg(ARM::SP))
01932         .addReg(ARM::LR);
01933   }
01934 
01935   // Emit the DWARF info about the change in stack as well as where to find the
01936   // previous link register
01937   CFIIndex =
01938       MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12));
01939   BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
01940       .addCFIIndex(CFIIndex);
01941   CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
01942         nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
01943   BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
01944       .addCFIIndex(CFIIndex);
01945 
01946   // Call __morestack().
01947   if (Thumb) {
01948     AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL)))
01949         .addExternalSymbol("__morestack");
01950   } else {
01951     BuildMI(AllocMBB, DL, TII.get(ARM::BL))
01952         .addExternalSymbol("__morestack");
01953   }
01954 
01955   // pop {lr} - Restore return address of this original function.
01956   if (Thumb) {
01957     if (ST->isThumb1Only()) {
01958       AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)))
01959                      .addReg(ScratchReg0);
01960       AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
01961                      .addReg(ScratchReg0));
01962     } else {
01963       AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
01964                      .addReg(ARM::LR, RegState::Define)
01965                      .addReg(ARM::SP, RegState::Define)
01966                      .addReg(ARM::SP)
01967                      .addImm(4));
01968     }
01969   } else {
01970     AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
01971                    .addReg(ARM::SP, RegState::Define)
01972                    .addReg(ARM::SP))
01973       .addReg(ARM::LR);
01974   }
01975 
01976   // Restore SR0 and SR1 in case of __morestack() was called.
01977   // __morestack() will skip PostStackMBB block so we need to restore
01978   // scratch registers from here.
01979   // pop {SR0, SR1}
01980   if (Thumb) {
01981     AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)))
01982       .addReg(ScratchReg0)
01983       .addReg(ScratchReg1);
01984   } else {
01985     AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
01986                    .addReg(ARM::SP, RegState::Define)
01987                    .addReg(ARM::SP))
01988       .addReg(ScratchReg0)
01989       .addReg(ScratchReg1);
01990   }
01991 
01992   // Update the CFA offset now that we've popped
01993   CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
01994   BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
01995       .addCFIIndex(CFIIndex);
01996 
01997   // bx lr - Return from this function.
01998   Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET;
01999   AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode)));
02000 
02001   // Restore SR0 and SR1 in case of __morestack() was not called.
02002   // pop {SR0, SR1}
02003   if (Thumb) {
02004     AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP)))
02005       .addReg(ScratchReg0)
02006       .addReg(ScratchReg1);
02007   } else {
02008     AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
02009                    .addReg(ARM::SP, RegState::Define)
02010                    .addReg(ARM::SP))
02011       .addReg(ScratchReg0)
02012       .addReg(ScratchReg1);
02013   }
02014 
02015   // Update the CFA offset now that we've popped
02016   CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
02017   BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
02018       .addCFIIndex(CFIIndex);
02019 
02020   // Tell debuggers that r4 and r5 are now the same as they were in the
02021   // previous function, that they're the "Same Value".
02022   CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue(
02023       nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
02024   BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
02025       .addCFIIndex(CFIIndex);
02026   CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue(
02027       nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
02028   BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
02029       .addCFIIndex(CFIIndex);
02030 
02031   // Organizing MBB lists
02032   PostStackMBB->addSuccessor(&prologueMBB);
02033 
02034   AllocMBB->addSuccessor(PostStackMBB);
02035 
02036   GetMBB->addSuccessor(PostStackMBB);
02037   GetMBB->addSuccessor(AllocMBB);
02038 
02039   McrMBB->addSuccessor(GetMBB);
02040 
02041   PrevStackMBB->addSuccessor(McrMBB);
02042 
02043 #ifdef XDEBUG
02044   MF.verify();
02045 #endif
02046 }