LLVM API Documentation

AArch64FrameLowering.cpp
Go to the documentation of this file.
00001 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains the AArch64 implementation of TargetFrameLowering class.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "AArch64FrameLowering.h"
00015 #include "AArch64InstrInfo.h"
00016 #include "AArch64MachineFunctionInfo.h"
00017 #include "AArch64Subtarget.h"
00018 #include "AArch64TargetMachine.h"
00019 #include "llvm/ADT/Statistic.h"
00020 #include "llvm/CodeGen/MachineFrameInfo.h"
00021 #include "llvm/CodeGen/MachineFunction.h"
00022 #include "llvm/CodeGen/MachineInstrBuilder.h"
00023 #include "llvm/CodeGen/MachineModuleInfo.h"
00024 #include "llvm/CodeGen/MachineRegisterInfo.h"
00025 #include "llvm/CodeGen/RegisterScavenging.h"
00026 #include "llvm/IR/DataLayout.h"
00027 #include "llvm/IR/Function.h"
00028 #include "llvm/Support/CommandLine.h"
00029 #include "llvm/Support/Debug.h"
00030 #include "llvm/Support/raw_ostream.h"
00031 
00032 using namespace llvm;
00033 
00034 #define DEBUG_TYPE "frame-info"
00035 
00036 static cl::opt<bool> EnableRedZone("aarch64-redzone",
00037                                    cl::desc("enable use of redzone on AArch64"),
00038                                    cl::init(false), cl::Hidden);
00039 
00040 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
00041 
00042 static unsigned estimateStackSize(MachineFunction &MF) {
00043   const MachineFrameInfo *FFI = MF.getFrameInfo();
00044   int Offset = 0;
00045   for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
00046     int FixedOff = -FFI->getObjectOffset(i);
00047     if (FixedOff > Offset)
00048       Offset = FixedOff;
00049   }
00050   for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
00051     if (FFI->isDeadObjectIndex(i))
00052       continue;
00053     Offset += FFI->getObjectSize(i);
00054     unsigned Align = FFI->getObjectAlignment(i);
00055     // Adjust to alignment boundary
00056     Offset = (Offset + Align - 1) / Align * Align;
00057   }
00058   // This does not include the 16 bytes used for fp and lr.
00059   return (unsigned)Offset;
00060 }
00061 
00062 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
00063   if (!EnableRedZone)
00064     return false;
00065   // Don't use the red zone if the function explicitly asks us not to.
00066   // This is typically used for kernel code.
00067   if (MF.getFunction()->getAttributes().hasAttribute(
00068           AttributeSet::FunctionIndex, Attribute::NoRedZone))
00069     return false;
00070 
00071   const MachineFrameInfo *MFI = MF.getFrameInfo();
00072   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
00073   unsigned NumBytes = AFI->getLocalStackSize();
00074 
00075   // Note: currently hasFP() is always true for hasCalls(), but that's an
00076   // implementation detail of the current code, not a strict requirement,
00077   // so stay safe here and check both.
00078   if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128)
00079     return false;
00080   return true;
00081 }
00082 
00083 /// hasFP - Return true if the specified function should have a dedicated frame
00084 /// pointer register.
00085 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
00086   const MachineFrameInfo *MFI = MF.getFrameInfo();
00087 
00088 #ifndef NDEBUG
00089   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
00090   assert(!RegInfo->needsStackRealignment(MF) &&
00091          "No stack realignment on AArch64!");
00092 #endif
00093 
00094   return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
00095           MFI->isFrameAddressTaken());
00096 }
00097 
00098 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
00099 /// not required, we reserve argument space for call sites in the function
00100 /// immediately on entry to the current function.  This eliminates the need for
00101 /// add/sub sp brackets around call sites.  Returns true if the call frame is
00102 /// included as part of the stack frame.
00103 bool
00104 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
00105   return !MF.getFrameInfo()->hasVarSizedObjects();
00106 }
00107 
00108 void AArch64FrameLowering::eliminateCallFramePseudoInstr(
00109     MachineFunction &MF, MachineBasicBlock &MBB,
00110     MachineBasicBlock::iterator I) const {
00111   const AArch64InstrInfo *TII =
00112       static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
00113   DebugLoc DL = I->getDebugLoc();
00114   int Opc = I->getOpcode();
00115   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
00116   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
00117 
00118   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
00119   if (!TFI->hasReservedCallFrame(MF)) {
00120     unsigned Align = getStackAlignment();
00121 
00122     int64_t Amount = I->getOperand(0).getImm();
00123     Amount = RoundUpToAlignment(Amount, Align);
00124     if (!IsDestroy)
00125       Amount = -Amount;
00126 
00127     // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
00128     // doesn't have to pop anything), then the first operand will be zero too so
00129     // this adjustment is a no-op.
00130     if (CalleePopAmount == 0) {
00131       // FIXME: in-function stack adjustment for calls is limited to 24-bits
00132       // because there's no guaranteed temporary register available.
00133       //
00134       // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
00135       // 1) For offset <= 12-bit, we use LSL #0
00136       // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
00137       // LSL #0, and the other uses LSL #12.
00138       //
00139       // Mostly call frames will be allocated at the start of a function so
00140       // this is OK, but it is a limitation that needs dealing with.
00141       assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
00142       emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
00143     }
00144   } else if (CalleePopAmount != 0) {
00145     // If the calling convention demands that the callee pops arguments from the
00146     // stack, we want to add it back if we have a reserved call frame.
00147     assert(CalleePopAmount < 0xffffff && "call frame too large");
00148     emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
00149                     TII);
00150   }
00151   MBB.erase(I);
00152 }
00153 
00154 void AArch64FrameLowering::emitCalleeSavedFrameMoves(
00155     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
00156     unsigned FramePtr) const {
00157   MachineFunction &MF = *MBB.getParent();
00158   MachineFrameInfo *MFI = MF.getFrameInfo();
00159   MachineModuleInfo &MMI = MF.getMMI();
00160   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
00161   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
00162   DebugLoc DL = MBB.findDebugLoc(MBBI);
00163 
00164   // Add callee saved registers to move list.
00165   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
00166   if (CSI.empty())
00167     return;
00168 
00169   const DataLayout *TD = MF.getSubtarget().getDataLayout();
00170   bool HasFP = hasFP(MF);
00171 
00172   // Calculate amount of bytes used for return address storing.
00173   int stackGrowth = -TD->getPointerSize(0);
00174 
00175   // Calculate offsets.
00176   int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth;
00177   unsigned TotalSkipped = 0;
00178   for (const auto &Info : CSI) {
00179     unsigned Reg = Info.getReg();
00180     int64_t Offset = MFI->getObjectOffset(Info.getFrameIdx()) -
00181                      getOffsetOfLocalArea() + saveAreaOffset;
00182 
00183     // Don't output a new CFI directive if we're re-saving the frame pointer or
00184     // link register. This happens when the PrologEpilogInserter has inserted an
00185     // extra "STP" of the frame pointer and link register -- the "emitPrologue"
00186     // method automatically generates the directives when frame pointers are
00187     // used. If we generate CFI directives for the extra "STP"s, the linker will
00188     // lose track of the correct values for the frame pointer and link register.
00189     if (HasFP && (FramePtr == Reg || Reg == AArch64::LR)) {
00190       TotalSkipped += stackGrowth;
00191       continue;
00192     }
00193 
00194     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
00195     unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
00196         nullptr, DwarfReg, Offset - TotalSkipped));
00197     BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
00198         .addCFIIndex(CFIIndex);
00199   }
00200 }
00201 
00202 void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
00203   MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
00204   MachineBasicBlock::iterator MBBI = MBB.begin();
00205   const MachineFrameInfo *MFI = MF.getFrameInfo();
00206   const Function *Fn = MF.getFunction();
00207   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
00208       MF.getSubtarget().getRegisterInfo());
00209   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
00210   MachineModuleInfo &MMI = MF.getMMI();
00211   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
00212   bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
00213   bool HasFP = hasFP(MF);
00214   DebugLoc DL = MBB.findDebugLoc(MBBI);
00215 
00216   int NumBytes = (int)MFI->getStackSize();
00217   if (!AFI->hasStackFrame()) {
00218     assert(!HasFP && "unexpected function without stack frame but with FP");
00219 
00220     // All of the stack allocation is for locals.
00221     AFI->setLocalStackSize(NumBytes);
00222 
00223     // Label used to tie together the PROLOG_LABEL and the MachineMoves.
00224     MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
00225 
00226     // REDZONE: If the stack size is less than 128 bytes, we don't need
00227     // to actually allocate.
00228     if (NumBytes && !canUseRedZone(MF)) {
00229       emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
00230                       MachineInstr::FrameSetup);
00231 
00232       // Encode the stack size of the leaf function.
00233       unsigned CFIIndex = MMI.addFrameInst(
00234           MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
00235       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
00236           .addCFIIndex(CFIIndex);
00237     } else if (NumBytes) {
00238       ++NumRedZoneFunctions;
00239     }
00240 
00241     return;
00242   }
00243 
00244   // Only set up FP if we actually need to.
00245   int FPOffset = 0;
00246   if (HasFP) {
00247     // First instruction must a) allocate the stack  and b) have an immediate
00248     // that is a multiple of -2.
00249     assert((MBBI->getOpcode() == AArch64::STPXpre ||
00250             MBBI->getOpcode() == AArch64::STPDpre) &&
00251            MBBI->getOperand(3).getReg() == AArch64::SP &&
00252            MBBI->getOperand(4).getImm() < 0 &&
00253            (MBBI->getOperand(4).getImm() & 1) == 0);
00254 
00255     // Frame pointer is fp = sp - 16. Since the  STPXpre subtracts the space
00256     // required for the callee saved register area we get the frame pointer
00257     // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
00258     FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8;
00259     assert(FPOffset >= 0 && "Bad Framepointer Offset");
00260   }
00261 
00262   // Move past the saves of the callee-saved registers.
00263   while (MBBI->getOpcode() == AArch64::STPXi ||
00264          MBBI->getOpcode() == AArch64::STPDi ||
00265          MBBI->getOpcode() == AArch64::STPXpre ||
00266          MBBI->getOpcode() == AArch64::STPDpre) {
00267     ++MBBI;
00268     NumBytes -= 16;
00269   }
00270   assert(NumBytes >= 0 && "Negative stack allocation size!?");
00271   if (HasFP) {
00272     // Issue    sub fp, sp, FPOffset or
00273     //          mov fp,sp          when FPOffset is zero.
00274     // Note: All stores of callee-saved registers are marked as "FrameSetup".
00275     // This code marks the instruction(s) that set the FP also.
00276     emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
00277                     MachineInstr::FrameSetup);
00278   }
00279 
00280   // All of the remaining stack allocations are for locals.
00281   AFI->setLocalStackSize(NumBytes);
00282 
00283   // Allocate space for the rest of the frame.
00284   if (NumBytes) {
00285     // If we're a leaf function, try using the red zone.
00286     if (!canUseRedZone(MF))
00287       emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
00288                       MachineInstr::FrameSetup);
00289   }
00290 
00291   // If we need a base pointer, set it up here. It's whatever the value of the
00292   // stack pointer is at this point. Any variable size objects will be allocated
00293   // after this, so we can still use the base pointer to reference locals.
00294   //
00295   // FIXME: Clarify FrameSetup flags here.
00296   // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
00297   // needed.
00298   //
00299   if (RegInfo->hasBasePointer(MF))
00300     TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);
00301 
00302   if (needsFrameMoves) {
00303     const DataLayout *TD = MF.getSubtarget().getDataLayout();
00304     const int StackGrowth = -TD->getPointerSize(0);
00305     unsigned FramePtr = RegInfo->getFrameRegister(MF);
00306 
00307     // An example of the prologue:
00308     //
00309     //     .globl __foo
00310     //     .align 2
00311     //  __foo:
00312     // Ltmp0:
00313     //     .cfi_startproc
00314     //     .cfi_personality 155, ___gxx_personality_v0
00315     // Leh_func_begin:
00316     //     .cfi_lsda 16, Lexception33
00317     //
00318     //     stp  xa,bx, [sp, -#offset]!
00319     //     ...
00320     //     stp  x28, x27, [sp, #offset-32]
00321     //     stp  fp, lr, [sp, #offset-16]
00322     //     add  fp, sp, #offset - 16
00323     //     sub  sp, sp, #1360
00324     //
00325     // The Stack:
00326     //       +-------------------------------------------+
00327     // 10000 | ........ | ........ | ........ | ........ |
00328     // 10004 | ........ | ........ | ........ | ........ |
00329     //       +-------------------------------------------+
00330     // 10008 | ........ | ........ | ........ | ........ |
00331     // 1000c | ........ | ........ | ........ | ........ |
00332     //       +===========================================+
00333     // 10010 |                X28 Register               |
00334     // 10014 |                X28 Register               |
00335     //       +-------------------------------------------+
00336     // 10018 |                X27 Register               |
00337     // 1001c |                X27 Register               |
00338     //       +===========================================+
00339     // 10020 |                Frame Pointer              |
00340     // 10024 |                Frame Pointer              |
00341     //       +-------------------------------------------+
00342     // 10028 |                Link Register              |
00343     // 1002c |                Link Register              |
00344     //       +===========================================+
00345     // 10030 | ........ | ........ | ........ | ........ |
00346     // 10034 | ........ | ........ | ........ | ........ |
00347     //       +-------------------------------------------+
00348     // 10038 | ........ | ........ | ........ | ........ |
00349     // 1003c | ........ | ........ | ........ | ........ |
00350     //       +-------------------------------------------+
00351     //
00352     //     [sp] = 10030        ::    >>initial value<<
00353     //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
00354     //     fp = sp == 10020    ::  mov fp, sp
00355     //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
00356     //     sp == 10010         ::    >>final value<<
00357     //
00358     // The frame pointer (w29) points to address 10020. If we use an offset of
00359     // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
00360     // for w27, and -32 for w28:
00361     //
00362     //  Ltmp1:
00363     //     .cfi_def_cfa w29, 16
00364     //  Ltmp2:
00365     //     .cfi_offset w30, -8
00366     //  Ltmp3:
00367     //     .cfi_offset w29, -16
00368     //  Ltmp4:
00369     //     .cfi_offset w27, -24
00370     //  Ltmp5:
00371     //     .cfi_offset w28, -32
00372 
00373     if (HasFP) {
00374       // Define the current CFA rule to use the provided FP.
00375       unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
00376       unsigned CFIIndex = MMI.addFrameInst(
00377           MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
00378       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
00379           .addCFIIndex(CFIIndex);
00380 
00381       // Record the location of the stored LR
00382       unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true);
00383       CFIIndex = MMI.addFrameInst(
00384           MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
00385       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
00386           .addCFIIndex(CFIIndex);
00387 
00388       // Record the location of the stored FP
00389       CFIIndex = MMI.addFrameInst(
00390           MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
00391       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
00392           .addCFIIndex(CFIIndex);
00393     } else {
00394       // Encode the stack size of the leaf function.
00395       unsigned CFIIndex = MMI.addFrameInst(
00396           MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
00397       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
00398           .addCFIIndex(CFIIndex);
00399     }
00400 
00401     // Now emit the moves for whatever callee saved regs we have.
00402     emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
00403   }
00404 }
00405 
00406 static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) {
00407   for (unsigned i = 0; CSRegs[i]; ++i)
00408     if (Reg == CSRegs[i])
00409       return true;
00410   return false;
00411 }
00412 
00413 static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
00414   unsigned RtIdx = 0;
00415   if (MI->getOpcode() == AArch64::LDPXpost ||
00416       MI->getOpcode() == AArch64::LDPDpost)
00417     RtIdx = 1;
00418 
00419   if (MI->getOpcode() == AArch64::LDPXpost ||
00420       MI->getOpcode() == AArch64::LDPDpost ||
00421       MI->getOpcode() == AArch64::LDPXi || MI->getOpcode() == AArch64::LDPDi) {
00422     if (!isCalleeSavedRegister(MI->getOperand(RtIdx).getReg(), CSRegs) ||
00423         !isCalleeSavedRegister(MI->getOperand(RtIdx + 1).getReg(), CSRegs) ||
00424         MI->getOperand(RtIdx + 2).getReg() != AArch64::SP)
00425       return false;
00426     return true;
00427   }
00428 
00429   return false;
00430 }
00431 
00432 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
00433                                         MachineBasicBlock &MBB) const {
00434   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
00435   assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
00436   MachineFrameInfo *MFI = MF.getFrameInfo();
00437   const AArch64InstrInfo *TII =
00438       static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
00439   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
00440       MF.getSubtarget().getRegisterInfo());
00441   DebugLoc DL = MBBI->getDebugLoc();
00442   unsigned RetOpcode = MBBI->getOpcode();
00443 
00444   int NumBytes = MFI->getStackSize();
00445   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
00446 
00447   // Initial and residual are named for consitency with the prologue. Note that
00448   // in the epilogue, the residual adjustment is executed first.
00449   uint64_t ArgumentPopSize = 0;
00450   if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) {
00451     MachineOperand &StackAdjust = MBBI->getOperand(1);
00452 
00453     // For a tail-call in a callee-pops-arguments environment, some or all of
00454     // the stack may actually be in use for the call's arguments, this is
00455     // calculated during LowerCall and consumed here...
00456     ArgumentPopSize = StackAdjust.getImm();
00457   } else {
00458     // ... otherwise the amount to pop is *all* of the argument space,
00459     // conveniently stored in the MachineFunctionInfo by
00460     // LowerFormalArguments. This will, of course, be zero for the C calling
00461     // convention.
00462     ArgumentPopSize = AFI->getArgumentStackToRestore();
00463   }
00464 
00465   // The stack frame should be like below,
00466   //
00467   //      ----------------------                     ---
00468   //      |                    |                      |
00469   //      | BytesInStackArgArea|              CalleeArgStackSize
00470   //      | (NumReusableBytes) |                (of tail call)
00471   //      |                    |                     ---
00472   //      |                    |                      |
00473   //      ---------------------|        ---           |
00474   //      |                    |         |            |
00475   //      |   CalleeSavedReg   |         |            |
00476   //      | (NumRestores * 16) |         |            |
00477   //      |                    |         |            |
00478   //      ---------------------|         |         NumBytes
00479   //      |                    |     StackSize  (StackAdjustUp)
00480   //      |   LocalStackSize   |         |            |
00481   //      | (covering callee   |         |            |
00482   //      |       args)        |         |            |
00483   //      |                    |         |            |
00484   //      ----------------------        ---          ---
00485   //
00486   // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
00487   //             = StackSize + ArgumentPopSize
00488   //
00489   // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
00490   // it as the 2nd argument of AArch64ISD::TC_RETURN.
00491   NumBytes += ArgumentPopSize;
00492 
00493   unsigned NumRestores = 0;
00494   // Move past the restores of the callee-saved registers.
00495   MachineBasicBlock::iterator LastPopI = MBBI;
00496   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
00497   if (LastPopI != MBB.begin()) {
00498     do {
00499       ++NumRestores;
00500       --LastPopI;
00501     } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs));
00502     if (!isCSRestore(LastPopI, CSRegs)) {
00503       ++LastPopI;
00504       --NumRestores;
00505     }
00506   }
00507   NumBytes -= NumRestores * 16;
00508   assert(NumBytes >= 0 && "Negative stack allocation size!?");
00509 
00510   if (!hasFP(MF)) {
00511     // If this was a redzone leaf function, we don't need to restore the
00512     // stack pointer.
00513     if (!canUseRedZone(MF))
00514       emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes,
00515                       TII);
00516     return;
00517   }
00518 
00519   // Restore the original stack pointer.
00520   // FIXME: Rather than doing the math here, we should instead just use
00521   // non-post-indexed loads for the restores if we aren't actually going to
00522   // be able to save any instructions.
00523   if (NumBytes || MFI->hasVarSizedObjects())
00524     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
00525                     -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags);
00526 }
00527 
00528 /// getFrameIndexOffset - Returns the displacement from the frame register to
00529 /// the stack frame of the specified index.
00530 int AArch64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
00531                                               int FI) const {
00532   unsigned FrameReg;
00533   return getFrameIndexReference(MF, FI, FrameReg);
00534 }
00535 
00536 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
00537 /// debug info.  It's the same as what we use for resolving the code-gen
00538 /// references for now.  FIXME: This can go wrong when references are
00539 /// SP-relative and simple call frames aren't used.
00540 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
00541                                                  int FI,
00542                                                  unsigned &FrameReg) const {
00543   return resolveFrameIndexReference(MF, FI, FrameReg);
00544 }
00545 
00546 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
00547                                                      int FI, unsigned &FrameReg,
00548                                                      bool PreferFP) const {
00549   const MachineFrameInfo *MFI = MF.getFrameInfo();
00550   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
00551       MF.getSubtarget().getRegisterInfo());
00552   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
00553   int FPOffset = MFI->getObjectOffset(FI) + 16;
00554   int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
00555   bool isFixed = MFI->isFixedObjectIndex(FI);
00556 
00557   // Use frame pointer to reference fixed objects. Use it for locals if
00558   // there are VLAs (and thus the SP isn't reliable as a base).
00559   // Make sure useFPForScavengingIndex() does the right thing for the emergency
00560   // spill slot.
00561   bool UseFP = false;
00562   if (AFI->hasStackFrame()) {
00563     // Note: Keeping the following as multiple 'if' statements rather than
00564     // merging to a single expression for readability.
00565     //
00566     // Argument access should always use the FP.
00567     if (isFixed) {
00568       UseFP = hasFP(MF);
00569     } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
00570       // Use SP or FP, whichever gives us the best chance of the offset
00571       // being in range for direct access. If the FPOffset is positive,
00572       // that'll always be best, as the SP will be even further away.
00573       // If the FPOffset is negative, we have to keep in mind that the
00574       // available offset range for negative offsets is smaller than for
00575       // positive ones. If we have variable sized objects, we're stuck with
00576       // using the FP regardless, though, as the SP offset is unknown
00577       // and we don't have a base pointer available. If an offset is
00578       // available via the FP and the SP, use whichever is closest.
00579       if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 ||
00580           (FPOffset >= -256 && Offset > -FPOffset))
00581         UseFP = true;
00582     }
00583   }
00584 
00585   if (UseFP) {
00586     FrameReg = RegInfo->getFrameRegister(MF);
00587     return FPOffset;
00588   }
00589 
00590   // Use the base pointer if we have one.
00591   if (RegInfo->hasBasePointer(MF))
00592     FrameReg = RegInfo->getBaseRegister();
00593   else {
00594     FrameReg = AArch64::SP;
00595     // If we're using the red zone for this function, the SP won't actually
00596     // be adjusted, so the offsets will be negative. They're also all
00597     // within range of the signed 9-bit immediate instructions.
00598     if (canUseRedZone(MF))
00599       Offset -= AFI->getLocalStackSize();
00600   }
00601 
00602   return Offset;
00603 }
00604 
00605 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
00606   if (Reg != AArch64::LR)
00607     return getKillRegState(true);
00608 
00609   // LR maybe referred to later by an @llvm.returnaddress intrinsic.
00610   bool LRLiveIn = MF.getRegInfo().isLiveIn(AArch64::LR);
00611   bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
00612   return getKillRegState(LRKill);
00613 }
00614 
00615 bool AArch64FrameLowering::spillCalleeSavedRegisters(
00616     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
00617     const std::vector<CalleeSavedInfo> &CSI,
00618     const TargetRegisterInfo *TRI) const {
00619   MachineFunction &MF = *MBB.getParent();
00620   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
00621   unsigned Count = CSI.size();
00622   DebugLoc DL;
00623   assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
00624 
00625   if (MI != MBB.end())
00626     DL = MI->getDebugLoc();
00627 
00628   for (unsigned i = 0; i < Count; i += 2) {
00629     unsigned idx = Count - i - 2;
00630     unsigned Reg1 = CSI[idx].getReg();
00631     unsigned Reg2 = CSI[idx + 1].getReg();
00632     // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
00633     // list to come in sorted by frame index so that we can issue the store
00634     // pair instructions directly. Assert if we see anything otherwise.
00635     //
00636     // The order of the registers in the list is controlled by
00637     // getCalleeSavedRegs(), so they will always be in-order, as well.
00638     assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() &&
00639            "Out of order callee saved regs!");
00640     unsigned StrOpc;
00641     assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
00642     assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
00643     // Issue sequence of non-sp increment and pi sp spills for cs regs. The
00644     // first spill is a pre-increment that allocates the stack.
00645     // For example:
00646     //    stp     x22, x21, [sp, #-48]!   // addImm(-6)
00647     //    stp     x20, x19, [sp, #16]    // addImm(+2)
00648     //    stp     fp, lr, [sp, #32]      // addImm(+4)
00649     // Rationale: This sequence saves uop updates compared to a sequence of
00650     // pre-increment spills like stp xi,xj,[sp,#-16]!
00651     // Note: Similar rational and sequence for restores in epilog.
00652     if (AArch64::GPR64RegClass.contains(Reg1)) {
00653       assert(AArch64::GPR64RegClass.contains(Reg2) &&
00654              "Expected GPR64 callee-saved register pair!");
00655       // For first spill use pre-increment store.
00656       if (i == 0)
00657         StrOpc = AArch64::STPXpre;
00658       else
00659         StrOpc = AArch64::STPXi;
00660     } else if (AArch64::FPR64RegClass.contains(Reg1)) {
00661       assert(AArch64::FPR64RegClass.contains(Reg2) &&
00662              "Expected FPR64 callee-saved register pair!");
00663       // For first spill use pre-increment store.
00664       if (i == 0)
00665         StrOpc = AArch64::STPDpre;
00666       else
00667         StrOpc = AArch64::STPDi;
00668     } else
00669       llvm_unreachable("Unexpected callee saved register!");
00670     DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", "
00671                  << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx()
00672                  << ", " << CSI[idx + 1].getFrameIdx() << ")\n");
00673     // Compute offset: i = 0 => offset = -Count;
00674     //                 i = 2 => offset = -(Count - 2) + Count = 2 = i; etc.
00675     const int Offset = (i == 0) ? -Count : i;
00676     assert((Offset >= -64 && Offset <= 63) &&
00677            "Offset out of bounds for STP immediate");
00678     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
00679     if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre)
00680       MIB.addReg(AArch64::SP, RegState::Define);
00681 
00682     MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
00683         .addReg(Reg1, getPrologueDeath(MF, Reg1))
00684         .addReg(AArch64::SP)
00685         .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
00686         .setMIFlag(MachineInstr::FrameSetup);
00687   }
00688   return true;
00689 }
00690 
00691 bool AArch64FrameLowering::restoreCalleeSavedRegisters(
00692     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
00693     const std::vector<CalleeSavedInfo> &CSI,
00694     const TargetRegisterInfo *TRI) const {
00695   MachineFunction &MF = *MBB.getParent();
00696   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
00697   unsigned Count = CSI.size();
00698   DebugLoc DL;
00699   assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
00700 
00701   if (MI != MBB.end())
00702     DL = MI->getDebugLoc();
00703 
00704   for (unsigned i = 0; i < Count; i += 2) {
00705     unsigned Reg1 = CSI[i].getReg();
00706     unsigned Reg2 = CSI[i + 1].getReg();
00707     // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
00708     // list to come in sorted by frame index so that we can issue the store
00709     // pair instructions directly. Assert if we see anything otherwise.
00710     assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() &&
00711            "Out of order callee saved regs!");
00712     // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only
00713     // the last load is sp-pi post-increment and de-allocates the stack:
00714     // For example:
00715     //    ldp     fp, lr, [sp, #32]       // addImm(+4)
00716     //    ldp     x20, x19, [sp, #16]     // addImm(+2)
00717     //    ldp     x22, x21, [sp], #48     // addImm(+6)
00718     // Note: see comment in spillCalleeSavedRegisters()
00719     unsigned LdrOpc;
00720 
00721     assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
00722     assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
00723     if (AArch64::GPR64RegClass.contains(Reg1)) {
00724       assert(AArch64::GPR64RegClass.contains(Reg2) &&
00725              "Expected GPR64 callee-saved register pair!");
00726       if (i == Count - 2)
00727         LdrOpc = AArch64::LDPXpost;
00728       else
00729         LdrOpc = AArch64::LDPXi;
00730     } else if (AArch64::FPR64RegClass.contains(Reg1)) {
00731       assert(AArch64::FPR64RegClass.contains(Reg2) &&
00732              "Expected FPR64 callee-saved register pair!");
00733       if (i == Count - 2)
00734         LdrOpc = AArch64::LDPDpost;
00735       else
00736         LdrOpc = AArch64::LDPDi;
00737     } else
00738       llvm_unreachable("Unexpected callee saved register!");
00739     DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", "
00740                  << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx()
00741                  << ", " << CSI[i + 1].getFrameIdx() << ")\n");
00742 
00743     // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4;
00744     // etc.
00745     const int Offset = (i == Count - 2) ? Count : Count - i - 2;
00746     assert((Offset >= -64 && Offset <= 63) &&
00747            "Offset out of bounds for LDP immediate");
00748     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
00749     if (LdrOpc == AArch64::LDPXpost || LdrOpc == AArch64::LDPDpost)
00750       MIB.addReg(AArch64::SP, RegState::Define);
00751 
00752     MIB.addReg(Reg2, getDefRegState(true))
00753         .addReg(Reg1, getDefRegState(true))
00754         .addReg(AArch64::SP)
00755         .addImm(Offset); // [sp], #offset * 8  or [sp, #offset * 8]
00756                          // where the factor * 8 is implicit
00757   }
00758   return true;
00759 }
00760 
00761 void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
00762     MachineFunction &MF, RegScavenger *RS) const {
00763   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
00764       MF.getSubtarget().getRegisterInfo());
00765   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
00766   MachineRegisterInfo *MRI = &MF.getRegInfo();
00767   SmallVector<unsigned, 4> UnspilledCSGPRs;
00768   SmallVector<unsigned, 4> UnspilledCSFPRs;
00769 
00770   // The frame record needs to be created by saving the appropriate registers
00771   if (hasFP(MF)) {
00772     MRI->setPhysRegUsed(AArch64::FP);
00773     MRI->setPhysRegUsed(AArch64::LR);
00774   }
00775 
00776   // Spill the BasePtr if it's used. Do this first thing so that the
00777   // getCalleeSavedRegs() below will get the right answer.
00778   if (RegInfo->hasBasePointer(MF))
00779     MRI->setPhysRegUsed(RegInfo->getBaseRegister());
00780 
00781   // If any callee-saved registers are used, the frame cannot be eliminated.
00782   unsigned NumGPRSpilled = 0;
00783   unsigned NumFPRSpilled = 0;
00784   bool ExtraCSSpill = false;
00785   bool CanEliminateFrame = true;
00786   DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:");
00787   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
00788 
00789   // Check pairs of consecutive callee-saved registers.
00790   for (unsigned i = 0; CSRegs[i]; i += 2) {
00791     assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");
00792 
00793     const unsigned OddReg = CSRegs[i];
00794     const unsigned EvenReg = CSRegs[i + 1];
00795     assert((AArch64::GPR64RegClass.contains(OddReg) &&
00796             AArch64::GPR64RegClass.contains(EvenReg)) ^
00797                (AArch64::FPR64RegClass.contains(OddReg) &&
00798                 AArch64::FPR64RegClass.contains(EvenReg)) &&
00799            "Register class mismatch!");
00800 
00801     const bool OddRegUsed = MRI->isPhysRegUsed(OddReg);
00802     const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg);
00803 
00804     // Early exit if none of the registers in the register pair is actually
00805     // used.
00806     if (!OddRegUsed && !EvenRegUsed) {
00807       if (AArch64::GPR64RegClass.contains(OddReg)) {
00808         UnspilledCSGPRs.push_back(OddReg);
00809         UnspilledCSGPRs.push_back(EvenReg);
00810       } else {
00811         UnspilledCSFPRs.push_back(OddReg);
00812         UnspilledCSFPRs.push_back(EvenReg);
00813       }
00814       continue;
00815     }
00816 
00817     unsigned Reg = AArch64::NoRegister;
00818     // If only one of the registers of the register pair is used, make sure to
00819     // mark the other one as used as well.
00820     if (OddRegUsed ^ EvenRegUsed) {
00821       // Find out which register is the additional spill.
00822       Reg = OddRegUsed ? EvenReg : OddReg;
00823       MRI->setPhysRegUsed(Reg);
00824     }
00825 
00826     DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
00827     DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
00828 
00829     assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) ||
00830             (RegInfo->getEncodingValue(OddReg) + 1 ==
00831              RegInfo->getEncodingValue(EvenReg))) &&
00832            "Register pair of non-adjacent registers!");
00833     if (AArch64::GPR64RegClass.contains(OddReg)) {
00834       NumGPRSpilled += 2;
00835       // If it's not a reserved register, we can use it in lieu of an
00836       // emergency spill slot for the register scavenger.
00837       // FIXME: It would be better to instead keep looking and choose another
00838       // unspilled register that isn't reserved, if there is one.
00839       if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
00840         ExtraCSSpill = true;
00841     } else
00842       NumFPRSpilled += 2;
00843 
00844     CanEliminateFrame = false;
00845   }
00846 
00847   // FIXME: Set BigStack if any stack slot references may be out of range.
00848   // For now, just conservatively guestimate based on unscaled indexing
00849   // range. We'll end up allocating an unnecessary spill slot a lot, but
00850   // realistically that's not a big deal at this stage of the game.
00851   // The CSR spill slots have not been allocated yet, so estimateStackSize
00852   // won't include them.
00853   MachineFrameInfo *MFI = MF.getFrameInfo();
00854   unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
00855   DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
00856   bool BigStack = (CFSize >= 256);
00857   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
00858     AFI->setHasStackFrame(true);
00859 
00860   // Estimate if we might need to scavenge a register at some point in order
00861   // to materialize a stack offset. If so, either spill one additional
00862   // callee-saved register or reserve a special spill slot to facilitate
00863   // register scavenging. If we already spilled an extra callee-saved register
00864   // above to keep the number of spills even, we don't need to do anything else
00865   // here.
00866   if (BigStack && !ExtraCSSpill) {
00867 
00868     // If we're adding a register to spill here, we have to add two of them
00869     // to keep the number of regs to spill even.
00870     assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
00871     unsigned Count = 0;
00872     while (!UnspilledCSGPRs.empty() && Count < 2) {
00873       unsigned Reg = UnspilledCSGPRs.back();
00874       UnspilledCSGPRs.pop_back();
00875       DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
00876                    << " to get a scratch register.\n");
00877       MRI->setPhysRegUsed(Reg);
00878       ExtraCSSpill = true;
00879       ++Count;
00880     }
00881 
00882     // If we didn't find an extra callee-saved register to spill, create
00883     // an emergency spill slot.
00884     if (!ExtraCSSpill) {
00885       const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
00886       int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
00887       RS->addScavengingFrameIndex(FI);
00888       DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
00889                    << " as the emergency spill slot.\n");
00890     }
00891   }
00892 }