LLVM API Documentation

X86FrameLowering.cpp
Go to the documentation of this file.
00001 //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains the X86 implementation of TargetFrameLowering class.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "X86FrameLowering.h"
00015 #include "X86InstrBuilder.h"
00016 #include "X86InstrInfo.h"
00017 #include "X86MachineFunctionInfo.h"
00018 #include "X86Subtarget.h"
00019 #include "X86TargetMachine.h"
00020 #include "llvm/ADT/SmallSet.h"
00021 #include "llvm/CodeGen/MachineFrameInfo.h"
00022 #include "llvm/CodeGen/MachineFunction.h"
00023 #include "llvm/CodeGen/MachineInstrBuilder.h"
00024 #include "llvm/CodeGen/MachineModuleInfo.h"
00025 #include "llvm/CodeGen/MachineRegisterInfo.h"
00026 #include "llvm/IR/DataLayout.h"
00027 #include "llvm/IR/Function.h"
00028 #include "llvm/MC/MCAsmInfo.h"
00029 #include "llvm/MC/MCSymbol.h"
00030 #include "llvm/Support/CommandLine.h"
00031 #include "llvm/Target/TargetOptions.h"
00032 #include "llvm/Support/Debug.h"
00033 
00034 using namespace llvm;
00035 
00036 // FIXME: completely move here.
00037 extern cl::opt<bool> ForceStackAlign;
00038 
00039 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
00040   return !MF.getFrameInfo()->hasVarSizedObjects();
00041 }
00042 
00043 /// hasFP - Return true if the specified function should have a dedicated frame
00044 /// pointer register.  This is true if the function has variable sized allocas
00045 /// or if frame pointer elimination is disabled.
00046 bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
00047   const MachineFrameInfo *MFI = MF.getFrameInfo();
00048   const MachineModuleInfo &MMI = MF.getMMI();
00049   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
00050 
00051   return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
00052           RegInfo->needsStackRealignment(MF) ||
00053           MFI->hasVarSizedObjects() ||
00054           MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() ||
00055           MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
00056           MMI.callsUnwindInit() || MMI.callsEHReturn());
00057 }
00058 
00059 static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
00060   if (IsLP64) {
00061     if (isInt<8>(Imm))
00062       return X86::SUB64ri8;
00063     return X86::SUB64ri32;
00064   } else {
00065     if (isInt<8>(Imm))
00066       return X86::SUB32ri8;
00067     return X86::SUB32ri;
00068   }
00069 }
00070 
00071 static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
00072   if (IsLP64) {
00073     if (isInt<8>(Imm))
00074       return X86::ADD64ri8;
00075     return X86::ADD64ri32;
00076   } else {
00077     if (isInt<8>(Imm))
00078       return X86::ADD32ri8;
00079     return X86::ADD32ri;
00080   }
00081 }
00082 
00083 static unsigned getLEArOpcode(unsigned IsLP64) {
00084   return IsLP64 ? X86::LEA64r : X86::LEA32r;
00085 }
00086 
00087 /// findDeadCallerSavedReg - Return a caller-saved register that isn't live
00088 /// when it reaches the "return" instruction. We can then pop a stack object
00089 /// to this register without worry about clobbering it.
00090 static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
00091                                        MachineBasicBlock::iterator &MBBI,
00092                                        const TargetRegisterInfo &TRI,
00093                                        bool Is64Bit) {
00094   const MachineFunction *MF = MBB.getParent();
00095   const Function *F = MF->getFunction();
00096   if (!F || MF->getMMI().callsEHReturn())
00097     return 0;
00098 
00099   static const uint16_t CallerSavedRegs32Bit[] = {
00100     X86::EAX, X86::EDX, X86::ECX, 0
00101   };
00102 
00103   static const uint16_t CallerSavedRegs64Bit[] = {
00104     X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
00105     X86::R8,  X86::R9,  X86::R10, X86::R11, 0
00106   };
00107 
00108   unsigned Opc = MBBI->getOpcode();
00109   switch (Opc) {
00110   default: return 0;
00111   case X86::RETL:
00112   case X86::RETQ:
00113   case X86::RETIL:
00114   case X86::RETIQ:
00115   case X86::TCRETURNdi:
00116   case X86::TCRETURNri:
00117   case X86::TCRETURNmi:
00118   case X86::TCRETURNdi64:
00119   case X86::TCRETURNri64:
00120   case X86::TCRETURNmi64:
00121   case X86::EH_RETURN:
00122   case X86::EH_RETURN64: {
00123     SmallSet<uint16_t, 8> Uses;
00124     for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
00125       MachineOperand &MO = MBBI->getOperand(i);
00126       if (!MO.isReg() || MO.isDef())
00127         continue;
00128       unsigned Reg = MO.getReg();
00129       if (!Reg)
00130         continue;
00131       for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
00132         Uses.insert(*AI);
00133     }
00134 
00135     const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
00136     for (; *CS; ++CS)
00137       if (!Uses.count(*CS))
00138         return *CS;
00139   }
00140   }
00141 
00142   return 0;
00143 }
00144 
00145 
00146 /// emitSPUpdate - Emit a series of instructions to increment / decrement the
00147 /// stack pointer by a constant value.
00148 static
00149 void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
00150                   unsigned StackPtr, int64_t NumBytes,
00151                   bool Is64BitTarget, bool Is64BitStackPtr, bool UseLEA,
00152                   const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
00153   bool isSub = NumBytes < 0;
00154   uint64_t Offset = isSub ? -NumBytes : NumBytes;
00155   unsigned Opc;
00156   if (UseLEA)
00157     Opc = getLEArOpcode(Is64BitStackPtr);
00158   else
00159     Opc = isSub
00160       ? getSUBriOpcode(Is64BitStackPtr, Offset)
00161       : getADDriOpcode(Is64BitStackPtr, Offset);
00162 
00163   uint64_t Chunk = (1LL << 31) - 1;
00164   DebugLoc DL = MBB.findDebugLoc(MBBI);
00165 
00166   while (Offset) {
00167     uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
00168     if (ThisVal == (Is64BitTarget ? 8 : 4)) {
00169       // Use push / pop instead.
00170       unsigned Reg = isSub
00171         ? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX)
00172         : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
00173       if (Reg) {
00174         Opc = isSub
00175           ? (Is64BitTarget ? X86::PUSH64r : X86::PUSH32r)
00176           : (Is64BitTarget ? X86::POP64r  : X86::POP32r);
00177         MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
00178           .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
00179         if (isSub)
00180           MI->setFlag(MachineInstr::FrameSetup);
00181         Offset -= ThisVal;
00182         continue;
00183       }
00184     }
00185 
00186     MachineInstr *MI = nullptr;
00187 
00188     if (UseLEA) {
00189       MI =  addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
00190                           StackPtr, false, isSub ? -ThisVal : ThisVal);
00191     } else {
00192       MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
00193             .addReg(StackPtr)
00194             .addImm(ThisVal);
00195       MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
00196     }
00197 
00198     if (isSub)
00199       MI->setFlag(MachineInstr::FrameSetup);
00200 
00201     Offset -= ThisVal;
00202   }
00203 }
00204 
00205 /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
00206 static
00207 void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
00208                       unsigned StackPtr, uint64_t *NumBytes = nullptr) {
00209   if (MBBI == MBB.begin()) return;
00210 
00211   MachineBasicBlock::iterator PI = std::prev(MBBI);
00212   unsigned Opc = PI->getOpcode();
00213   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
00214        Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
00215        Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
00216       PI->getOperand(0).getReg() == StackPtr) {
00217     if (NumBytes)
00218       *NumBytes += PI->getOperand(2).getImm();
00219     MBB.erase(PI);
00220   } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
00221               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
00222              PI->getOperand(0).getReg() == StackPtr) {
00223     if (NumBytes)
00224       *NumBytes -= PI->getOperand(2).getImm();
00225     MBB.erase(PI);
00226   }
00227 }
00228 
00229 /// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower
00230 /// iterator.
00231 static
00232 void mergeSPUpdatesDown(MachineBasicBlock &MBB,
00233                         MachineBasicBlock::iterator &MBBI,
00234                         unsigned StackPtr, uint64_t *NumBytes = nullptr) {
00235   // FIXME:  THIS ISN'T RUN!!!
00236   return;
00237 
00238   if (MBBI == MBB.end()) return;
00239 
00240   MachineBasicBlock::iterator NI = std::next(MBBI);
00241   if (NI == MBB.end()) return;
00242 
00243   unsigned Opc = NI->getOpcode();
00244   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
00245        Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
00246       NI->getOperand(0).getReg() == StackPtr) {
00247     if (NumBytes)
00248       *NumBytes -= NI->getOperand(2).getImm();
00249     MBB.erase(NI);
00250     MBBI = NI;
00251   } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
00252               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
00253              NI->getOperand(0).getReg() == StackPtr) {
00254     if (NumBytes)
00255       *NumBytes += NI->getOperand(2).getImm();
00256     MBB.erase(NI);
00257     MBBI = NI;
00258   }
00259 }
00260 
00261 /// mergeSPUpdates - Checks the instruction before/after the passed
00262 /// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and
00263 /// the stack adjustment is returned as a positive value for ADD/LEA and a
00264 /// negative for SUB.
00265 static int mergeSPUpdates(MachineBasicBlock &MBB,
00266                           MachineBasicBlock::iterator &MBBI, unsigned StackPtr,
00267                           bool doMergeWithPrevious) {
00268   if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
00269       (!doMergeWithPrevious && MBBI == MBB.end()))
00270     return 0;
00271 
00272   MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
00273   MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr
00274                                                        : std::next(MBBI);
00275   unsigned Opc = PI->getOpcode();
00276   int Offset = 0;
00277 
00278   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
00279        Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
00280        Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
00281       PI->getOperand(0).getReg() == StackPtr){
00282     Offset += PI->getOperand(2).getImm();
00283     MBB.erase(PI);
00284     if (!doMergeWithPrevious) MBBI = NI;
00285   } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
00286               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
00287              PI->getOperand(0).getReg() == StackPtr) {
00288     Offset -= PI->getOperand(2).getImm();
00289     MBB.erase(PI);
00290     if (!doMergeWithPrevious) MBBI = NI;
00291   }
00292 
00293   return Offset;
00294 }
00295 
00296 static bool isEAXLiveIn(MachineFunction &MF) {
00297   for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
00298        EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
00299     unsigned Reg = II->first;
00300 
00301     if (Reg == X86::EAX || Reg == X86::AX ||
00302         Reg == X86::AH || Reg == X86::AL)
00303       return true;
00304   }
00305 
00306   return false;
00307 }
00308 
00309 void
00310 X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
00311                                             MachineBasicBlock::iterator MBBI,
00312                                             DebugLoc DL) const {
00313   MachineFunction &MF = *MBB.getParent();
00314   MachineFrameInfo *MFI = MF.getFrameInfo();
00315   MachineModuleInfo &MMI = MF.getMMI();
00316   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
00317   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
00318 
00319   // Add callee saved registers to move list.
00320   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
00321   if (CSI.empty()) return;
00322 
00323   // Calculate offsets.
00324   for (std::vector<CalleeSavedInfo>::const_iterator
00325          I = CSI.begin(), E = CSI.end(); I != E; ++I) {
00326     int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
00327     unsigned Reg = I->getReg();
00328 
00329     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
00330     unsigned CFIIndex =
00331         MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg,
00332                                                         Offset));
00333     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
00334         .addCFIIndex(CFIIndex);
00335   }
00336 }
00337 
00338 /// usesTheStack - This function checks if any of the users of EFLAGS
00339 /// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
00340 /// to use the stack, and if we don't adjust the stack we clobber the first
00341 /// frame index.
00342 /// See X86InstrInfo::copyPhysReg.
00343 static bool usesTheStack(const MachineFunction &MF) {
00344   const MachineRegisterInfo &MRI = MF.getRegInfo();
00345 
00346   for (MachineRegisterInfo::reg_instr_iterator
00347        ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end();
00348        ri != re; ++ri)
00349     if (ri->isCopy())
00350       return true;
00351 
00352   return false;
00353 }
00354 
00355 void X86FrameLowering::getStackProbeFunction(const X86Subtarget &STI,
00356                                              unsigned &CallOp,
00357                                              const char *&Symbol) {
00358   CallOp = STI.is64Bit() ? X86::W64ALLOCA : X86::CALLpcrel32;
00359 
00360   if (STI.is64Bit()) {
00361     if (STI.isTargetCygMing()) {
00362       Symbol = "___chkstk_ms";
00363     } else {
00364       Symbol = "__chkstk";
00365     }
00366   } else if (STI.isTargetCygMing())
00367     Symbol = "_alloca";
00368   else
00369     Symbol = "_chkstk";
00370 }
00371 
00372 /// emitPrologue - Push callee-saved registers onto the stack, which
00373 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
00374 /// space for local variables. Also emit labels used by the exception handler to
00375 /// generate the exception handling frames.
00376 
00377 /*
00378   Here's a gist of what gets emitted:
00379 
00380   ; Establish frame pointer, if needed
00381   [if needs FP]
00382       push  %rbp
00383       .cfi_def_cfa_offset 16
00384       .cfi_offset %rbp, -16
00385       .seh_pushreg %rpb
00386       mov  %rsp, %rbp
00387       .cfi_def_cfa_register %rbp
00388 
00389   ; Spill general-purpose registers
00390   [for all callee-saved GPRs]
00391       pushq %<reg>
00392       [if not needs FP]
00393          .cfi_def_cfa_offset (offset from RETADDR)
00394       .seh_pushreg %<reg>
00395 
00396   ; If the required stack alignment > default stack alignment
00397   ; rsp needs to be re-aligned.  This creates a "re-alignment gap"
00398   ; of unknown size in the stack frame.
00399   [if stack needs re-alignment]
00400       and  $MASK, %rsp
00401 
00402   ; Allocate space for locals
00403   [if target is Windows and allocated space > 4096 bytes]
00404       ; Windows needs special care for allocations larger
00405       ; than one page.
00406       mov $NNN, %rax
00407       call ___chkstk_ms/___chkstk
00408       sub  %rax, %rsp
00409   [else]
00410       sub  $NNN, %rsp
00411 
00412   [if needs FP]
00413       .seh_stackalloc (size of XMM spill slots)
00414       .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
00415   [else]
00416       .seh_stackalloc NNN
00417 
00418   ; Spill XMMs
00419   ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
00420   ; they may get spilled on any platform, if the current function
00421   ; calls @llvm.eh.unwind.init
00422   [if needs FP]
00423       [for all callee-saved XMM registers]
00424           movaps  %<xmm reg>, -MMM(%rbp)
00425       [for all callee-saved XMM registers]
00426           .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
00427               ; i.e. the offset relative to (%rbp - SEHFrameOffset)
00428   [else]
00429       [for all callee-saved XMM registers]
00430           movaps  %<xmm reg>, KKK(%rsp)
00431       [for all callee-saved XMM registers]
00432           .seh_savexmm %<xmm reg>, KKK
00433 
00434   .seh_endprologue
00435 
00436   [if needs base pointer]
00437       mov  %rsp, %rbx
00438 
00439   ; Emit CFI info
00440   [if needs FP]
00441       [for all callee-saved registers]
00442           .cfi_offset %<reg>, (offset from %rbp)
00443   [else]
00444        .cfi_def_cfa_offset (offset from RETADDR)
00445       [for all callee-saved registers]
00446           .cfi_offset %<reg>, (offset from %rsp)
00447 
00448   Notes:
00449   - .seh directives are emitted only for Windows 64 ABI
00450   - .cfi directives are emitted for all other ABIs
00451   - for 32-bit code, substitute %e?? registers for %r??
00452 */
00453 
00454 void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
00455   MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
00456   MachineBasicBlock::iterator MBBI = MBB.begin();
00457   MachineFrameInfo *MFI = MF.getFrameInfo();
00458   const Function *Fn = MF.getFunction();
00459   const X86RegisterInfo *RegInfo =
00460       static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
00461   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
00462   MachineModuleInfo &MMI = MF.getMMI();
00463   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
00464   uint64_t MaxAlign  = MFI->getMaxAlignment(); // Desired stack alignment.
00465   uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
00466   bool HasFP = hasFP(MF);
00467   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
00468   bool Is64Bit = STI.is64Bit();
00469   // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
00470   const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
00471   bool IsWin64 = STI.isTargetWin64();
00472   bool IsWinEH =
00473       MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() ==
00474       ExceptionHandling::WinEH; // Not necessarily synonymous with IsWin64.
00475   bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry();
00476   bool NeedsDwarfCFI =
00477       !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
00478   bool UseLEA = STI.useLeaForSP();
00479   unsigned StackAlign = getStackAlignment();
00480   unsigned SlotSize = RegInfo->getSlotSize();
00481   unsigned FramePtr = RegInfo->getFrameRegister(MF);
00482   const unsigned MachineFramePtr = STI.isTarget64BitILP32() ?
00483                  getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr;
00484   unsigned StackPtr = RegInfo->getStackRegister();
00485   unsigned BasePtr = RegInfo->getBaseRegister();
00486   DebugLoc DL;
00487 
00488   // If we're forcing a stack realignment we can't rely on just the frame
00489   // info, we need to know the ABI stack alignment as well in case we
00490   // have a call out.  Otherwise just make sure we have some alignment - we'll
00491   // go with the minimum SlotSize.
00492   if (ForceStackAlign) {
00493     if (MFI->hasCalls())
00494       MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
00495     else if (MaxAlign < SlotSize)
00496       MaxAlign = SlotSize;
00497   }
00498 
00499   // Add RETADDR move area to callee saved frame size.
00500   int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
00501   if (TailCallReturnAddrDelta < 0)
00502     X86FI->setCalleeSavedFrameSize(
00503       X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
00504 
00505   bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMacho());
00506   
00507   // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
00508   // function, and use up to 128 bytes of stack space, don't have a frame
00509   // pointer, calls, or dynamic alloca then we do not need to adjust the
00510   // stack pointer (we fit in the Red Zone). We also check that we don't
00511   // push and pop from the stack.
00512   if (Is64Bit && !Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
00513                                                    Attribute::NoRedZone) &&
00514       !RegInfo->needsStackRealignment(MF) &&
00515       !MFI->hasVarSizedObjects() &&                     // No dynamic alloca.
00516       !MFI->adjustsStack() &&                           // No calls.
00517       !IsWin64 &&                                       // Win64 has no Red Zone
00518       !usesTheStack(MF) &&                              // Don't push and pop.
00519       !MF.shouldSplitStack()) {                         // Regular stack
00520     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
00521     if (HasFP) MinSize += SlotSize;
00522     StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
00523     MFI->setStackSize(StackSize);
00524   }
00525 
00526   // Insert stack pointer adjustment for later moving of return addr.  Only
00527   // applies to tail call optimized functions where the callee argument stack
00528   // size is bigger than the callers.
00529   if (TailCallReturnAddrDelta < 0) {
00530     MachineInstr *MI =
00531       BuildMI(MBB, MBBI, DL,
00532               TII.get(getSUBriOpcode(Uses64BitFramePtr, -TailCallReturnAddrDelta)),
00533               StackPtr)
00534         .addReg(StackPtr)
00535         .addImm(-TailCallReturnAddrDelta)
00536         .setMIFlag(MachineInstr::FrameSetup);
00537     MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
00538   }
00539 
00540   // Mapping for machine moves:
00541   //
00542   //   DST: VirtualFP AND
00543   //        SRC: VirtualFP              => DW_CFA_def_cfa_offset
00544   //        ELSE                        => DW_CFA_def_cfa
00545   //
00546   //   SRC: VirtualFP AND
00547   //        DST: Register               => DW_CFA_def_cfa_register
00548   //
00549   //   ELSE
00550   //        OFFSET < 0                  => DW_CFA_offset_extended_sf
00551   //        REG < 64                    => DW_CFA_offset + Reg
00552   //        ELSE                        => DW_CFA_offset_extended
00553 
00554   uint64_t NumBytes = 0;
00555   int stackGrowth = -SlotSize;
00556 
00557   if (HasFP) {
00558     // Calculate required stack adjustment.
00559     uint64_t FrameSize = StackSize - SlotSize;
00560     if (RegInfo->needsStackRealignment(MF)) {
00561       // Callee-saved registers are pushed on stack before the stack
00562       // is realigned.
00563       FrameSize -= X86FI->getCalleeSavedFrameSize();
00564       NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
00565     } else {
00566       NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
00567     }
00568 
00569     // Get the offset of the stack slot for the EBP register, which is
00570     // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
00571     // Update the frame offset adjustment.
00572     MFI->setOffsetAdjustment(-NumBytes);
00573 
00574     // Save EBP/RBP into the appropriate stack slot.
00575     BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
00576       .addReg(MachineFramePtr, RegState::Kill)
00577       .setMIFlag(MachineInstr::FrameSetup);
00578 
00579     if (NeedsDwarfCFI) {
00580       // Mark the place where EBP/RBP was saved.
00581       // Define the current CFA rule to use the provided offset.
00582       assert(StackSize);
00583       unsigned CFIIndex = MMI.addFrameInst(
00584           MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth));
00585       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
00586           .addCFIIndex(CFIIndex);
00587 
00588       // Change the rule for the FramePtr to be an "offset" rule.
00589       unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
00590       CFIIndex = MMI.addFrameInst(
00591           MCCFIInstruction::createOffset(nullptr,
00592                                          DwarfFramePtr, 2 * stackGrowth));
00593       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
00594           .addCFIIndex(CFIIndex);
00595     }
00596 
00597     if (NeedsWinEH) {
00598       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
00599           .addImm(FramePtr)
00600           .setMIFlag(MachineInstr::FrameSetup);
00601     }
00602 
00603     // Update EBP with the new base value.
00604     BuildMI(MBB, MBBI, DL,
00605             TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr)
00606         .addReg(StackPtr)
00607         .setMIFlag(MachineInstr::FrameSetup);
00608 
00609     if (NeedsDwarfCFI) {
00610       // Mark effective beginning of when frame pointer becomes valid.
00611       // Define the current CFA to use the EBP/RBP register.
00612       unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
00613       unsigned CFIIndex = MMI.addFrameInst(
00614           MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
00615       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
00616           .addCFIIndex(CFIIndex);
00617     }
00618 
00619     // Mark the FramePtr as live-in in every block.
00620     for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
00621       I->addLiveIn(MachineFramePtr);
00622   } else {
00623     NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
00624   }
00625 
00626   // Skip the callee-saved push instructions.
00627   bool PushedRegs = false;
00628   int StackOffset = 2 * stackGrowth;
00629 
00630   while (MBBI != MBB.end() &&
00631          (MBBI->getOpcode() == X86::PUSH32r ||
00632           MBBI->getOpcode() == X86::PUSH64r)) {
00633     PushedRegs = true;
00634     unsigned Reg = MBBI->getOperand(0).getReg();
00635     ++MBBI;
00636 
00637     if (!HasFP && NeedsDwarfCFI) {
00638       // Mark callee-saved push instruction.
00639       // Define the current CFA rule to use the provided offset.
00640       assert(StackSize);
00641       unsigned CFIIndex = MMI.addFrameInst(
00642           MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset));
00643       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
00644           .addCFIIndex(CFIIndex);
00645       StackOffset += stackGrowth;
00646     }
00647 
00648     if (NeedsWinEH) {
00649       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag(
00650           MachineInstr::FrameSetup);
00651     }
00652   }
00653 
00654   // Realign stack after we pushed callee-saved registers (so that we'll be
00655   // able to calculate their offsets from the frame pointer).
00656   if (RegInfo->needsStackRealignment(MF)) {
00657     assert(HasFP && "There should be a frame pointer if stack is realigned.");
00658     MachineInstr *MI =
00659       BuildMI(MBB, MBBI, DL,
00660               TII.get(Uses64BitFramePtr ? X86::AND64ri32 : X86::AND32ri), StackPtr)
00661       .addReg(StackPtr)
00662       .addImm(-MaxAlign)
00663       .setMIFlag(MachineInstr::FrameSetup);
00664 
00665     // The EFLAGS implicit def is dead.
00666     MI->getOperand(3).setIsDead();
00667   }
00668 
00669   // If there is an SUB32ri of ESP immediately before this instruction, merge
00670   // the two. This can be the case when tail call elimination is enabled and
00671   // the callee has more arguments then the caller.
00672   NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
00673 
00674   // If there is an ADD32ri or SUB32ri of ESP immediately after this
00675   // instruction, merge the two instructions.
00676   mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
00677 
00678   // Adjust stack pointer: ESP -= numbytes.
00679 
00680   static const size_t PageSize = 4096;
00681 
00682   // Windows and cygwin/mingw require a prologue helper routine when allocating
00683   // more than 4K bytes on the stack.  Windows uses __chkstk and cygwin/mingw
00684   // uses __alloca.  __alloca and the 32-bit version of __chkstk will probe the
00685   // stack and adjust the stack pointer in one go.  The 64-bit version of
00686   // __chkstk is only responsible for probing the stack.  The 64-bit prologue is
00687   // responsible for adjusting the stack pointer.  Touching the stack at 4K
00688   // increments is necessary to ensure that the guard pages used by the OS
00689   // virtual memory manager are allocated in correct sequence.
00690   if (NumBytes >= PageSize && UseStackProbe) {
00691     const char *StackProbeSymbol;
00692     unsigned CallOp;
00693 
00694     getStackProbeFunction(STI, CallOp, StackProbeSymbol);
00695 
00696     // Check whether EAX is livein for this function.
00697     bool isEAXAlive = isEAXLiveIn(MF);
00698 
00699     if (isEAXAlive) {
00700       // Sanity check that EAX is not livein for this function.
00701       // It should not be, so throw an assert.
00702       assert(!Is64Bit && "EAX is livein in x64 case!");
00703 
00704       // Save EAX
00705       BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
00706         .addReg(X86::EAX, RegState::Kill)
00707         .setMIFlag(MachineInstr::FrameSetup);
00708     }
00709 
00710     if (Is64Bit) {
00711       // Handle the 64-bit Windows ABI case where we need to call __chkstk.
00712       // Function prologue is responsible for adjusting the stack pointer.
00713       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
00714         .addImm(NumBytes)
00715         .setMIFlag(MachineInstr::FrameSetup);
00716     } else {
00717       // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
00718       // We'll also use 4 already allocated bytes for EAX.
00719       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
00720         .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
00721         .setMIFlag(MachineInstr::FrameSetup);
00722     }
00723 
00724     BuildMI(MBB, MBBI, DL,
00725             TII.get(CallOp))
00726       .addExternalSymbol(StackProbeSymbol)
00727       .addReg(StackPtr,    RegState::Define | RegState::Implicit)
00728       .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit)
00729       .setMIFlag(MachineInstr::FrameSetup);
00730 
00731     if (Is64Bit) {
00732       // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
00733       // themself. It also does not clobber %rax so we can reuse it when
00734       // adjusting %rsp.
00735       BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), StackPtr)
00736         .addReg(StackPtr)
00737         .addReg(X86::RAX)
00738         .setMIFlag(MachineInstr::FrameSetup);
00739     }
00740     if (isEAXAlive) {
00741       // Restore EAX
00742       MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
00743                                               X86::EAX),
00744                                       StackPtr, false, NumBytes - 4);
00745       MI->setFlag(MachineInstr::FrameSetup);
00746       MBB.insert(MBBI, MI);
00747     }
00748   } else if (NumBytes) {
00749     emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, Uses64BitFramePtr,
00750                  UseLEA, TII, *RegInfo);
00751   }
00752 
00753   int SEHFrameOffset = 0;
00754   if (NeedsWinEH) {
00755     if (HasFP) {
00756       // We need to set frame base offset low enough such that all saved
00757       // register offsets would be positive relative to it, but we can't
00758       // just use NumBytes, because .seh_setframe offset must be <=240.
00759       // So we pretend to have only allocated enough space to spill the
00760       // non-volatile registers.
00761       // We don't care about the rest of stack allocation, because unwinder
00762       // will restore SP to (BP - SEHFrameOffset)
00763       for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
00764         int offset = MFI->getObjectOffset(Info.getFrameIdx());
00765         SEHFrameOffset = std::max(SEHFrameOffset, abs(offset));
00766       }
00767       SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant
00768 
00769       // This only needs to account for XMM spill slots, GPR slots
00770       // are covered by the .seh_pushreg's emitted above.
00771       unsigned Size = SEHFrameOffset - X86FI->getCalleeSavedFrameSize();
00772       if (Size) {
00773         BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
00774             .addImm(Size)
00775             .setMIFlag(MachineInstr::FrameSetup);
00776       }
00777 
00778       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
00779           .addImm(FramePtr)
00780           .addImm(SEHFrameOffset)
00781           .setMIFlag(MachineInstr::FrameSetup);
00782     } else {
00783       // SP will be the base register for restoring XMMs
00784       if (NumBytes) {
00785         BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
00786             .addImm(NumBytes)
00787             .setMIFlag(MachineInstr::FrameSetup);
00788       }
00789     }
00790   }
00791 
00792   // Skip the rest of register spilling code
00793   while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
00794     ++MBBI;
00795 
00796   // Emit SEH info for non-GPRs
00797   if (NeedsWinEH) {
00798     for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
00799       unsigned Reg = Info.getReg();
00800       if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
00801         continue;
00802       assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class");
00803 
00804       int Offset = getFrameIndexOffset(MF, Info.getFrameIdx());
00805       Offset += SEHFrameOffset;
00806 
00807       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
00808           .addImm(Reg)
00809           .addImm(Offset)
00810           .setMIFlag(MachineInstr::FrameSetup);
00811     }
00812 
00813     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
00814         .setMIFlag(MachineInstr::FrameSetup);
00815   }
00816 
00817   // If we need a base pointer, set it up here. It's whatever the value
00818   // of the stack pointer is at this point. Any variable size objects
00819   // will be allocated after this, so we can still use the base pointer
00820   // to reference locals.
00821   if (RegInfo->hasBasePointer(MF)) {
00822     // Update the base pointer with the current stack pointer.
00823     unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
00824     BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
00825       .addReg(StackPtr)
00826       .setMIFlag(MachineInstr::FrameSetup);
00827   }
00828 
00829   if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
00830     // Mark end of stack pointer adjustment.
00831     if (!HasFP && NumBytes) {
00832       // Define the current CFA rule to use the provided offset.
00833       assert(StackSize);
00834       unsigned CFIIndex = MMI.addFrameInst(
00835           MCCFIInstruction::createDefCfaOffset(nullptr,
00836                                                -StackSize + stackGrowth));
00837 
00838       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
00839           .addCFIIndex(CFIIndex);
00840     }
00841 
00842     // Emit DWARF info specifying the offsets of the callee-saved registers.
00843     if (PushedRegs)
00844       emitCalleeSavedFrameMoves(MBB, MBBI, DL);
00845   }
00846 }
00847 
00848 void X86FrameLowering::emitEpilogue(MachineFunction &MF,
00849                                     MachineBasicBlock &MBB) const {
00850   const MachineFrameInfo *MFI = MF.getFrameInfo();
00851   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
00852   const X86RegisterInfo *RegInfo =
00853       static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
00854   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
00855   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
00856   assert(MBBI != MBB.end() && "Returning block has no instructions");
00857   unsigned RetOpcode = MBBI->getOpcode();
00858   DebugLoc DL = MBBI->getDebugLoc();
00859   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
00860   bool Is64Bit = STI.is64Bit();
00861   // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
00862   const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
00863   const bool Is64BitILP32 = STI.isTarget64BitILP32();
00864   bool UseLEA = STI.useLeaForSP();
00865   unsigned StackAlign = getStackAlignment();
00866   unsigned SlotSize = RegInfo->getSlotSize();
00867   unsigned FramePtr = RegInfo->getFrameRegister(MF);
00868   unsigned MachineFramePtr = Is64BitILP32 ?
00869              getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr;
00870   unsigned StackPtr = RegInfo->getStackRegister();
00871 
00872   bool IsWinEH =
00873       MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() ==
00874       ExceptionHandling::WinEH;
00875   bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry();
00876 
00877   switch (RetOpcode) {
00878   default:
00879     llvm_unreachable("Can only insert epilog into returning blocks");
00880   case X86::RETQ:
00881   case X86::RETL:
00882   case X86::RETIL:
00883   case X86::RETIQ:
00884   case X86::TCRETURNdi:
00885   case X86::TCRETURNri:
00886   case X86::TCRETURNmi:
00887   case X86::TCRETURNdi64:
00888   case X86::TCRETURNri64:
00889   case X86::TCRETURNmi64:
00890   case X86::EH_RETURN:
00891   case X86::EH_RETURN64:
00892     break;  // These are ok
00893   }
00894 
00895   // Get the number of bytes to allocate from the FrameInfo.
00896   uint64_t StackSize = MFI->getStackSize();
00897   uint64_t MaxAlign  = MFI->getMaxAlignment();
00898   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
00899   uint64_t NumBytes = 0;
00900 
00901   // If we're forcing a stack realignment we can't rely on just the frame
00902   // info, we need to know the ABI stack alignment as well in case we
00903   // have a call out.  Otherwise just make sure we have some alignment - we'll
00904   // go with the minimum.
00905   if (ForceStackAlign) {
00906     if (MFI->hasCalls())
00907       MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
00908     else
00909       MaxAlign = MaxAlign ? MaxAlign : 4;
00910   }
00911 
00912   if (hasFP(MF)) {
00913     // Calculate required stack adjustment.
00914     uint64_t FrameSize = StackSize - SlotSize;
00915     if (RegInfo->needsStackRealignment(MF)) {
00916       // Callee-saved registers were pushed on stack before the stack
00917       // was realigned.
00918       FrameSize -= CSSize;
00919       NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
00920     } else {
00921       NumBytes = FrameSize - CSSize;
00922     }
00923 
00924     // Pop EBP.
00925     BuildMI(MBB, MBBI, DL,
00926             TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr);
00927   } else {
00928     NumBytes = StackSize - CSSize;
00929   }
00930 
00931   // Skip the callee-saved pop instructions.
00932   while (MBBI != MBB.begin()) {
00933     MachineBasicBlock::iterator PI = std::prev(MBBI);
00934     unsigned Opc = PI->getOpcode();
00935 
00936     if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
00937         !PI->isTerminator())
00938       break;
00939 
00940     --MBBI;
00941   }
00942   MachineBasicBlock::iterator FirstCSPop = MBBI;
00943 
00944   DL = MBBI->getDebugLoc();
00945 
00946   // If there is an ADD32ri or SUB32ri of ESP immediately before this
00947   // instruction, merge the two instructions.
00948   if (NumBytes || MFI->hasVarSizedObjects())
00949     mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
00950 
00951   // If dynamic alloca is used, then reset esp to point to the last callee-saved
00952   // slot before popping them off! Same applies for the case, when stack was
00953   // realigned.
00954   if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
00955     if (RegInfo->needsStackRealignment(MF))
00956       MBBI = FirstCSPop;
00957     if (CSSize != 0) {
00958       unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
00959       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
00960                    FramePtr, false, -CSSize);
00961       --MBBI;
00962     } else {
00963       unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
00964       BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
00965         .addReg(FramePtr);
00966       --MBBI;
00967     }
00968   } else if (NumBytes) {
00969     // Adjust stack pointer back: ESP += numbytes.
00970     emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr, UseLEA,
00971                  TII, *RegInfo);
00972     --MBBI;
00973   }
00974 
00975   // Windows unwinder will not invoke function's exception handler if IP is
00976   // either in prologue or in epilogue.  This behavior causes a problem when a
00977   // call immediately precedes an epilogue, because the return address points
00978   // into the epilogue.  To cope with that, we insert an epilogue marker here,
00979   // then replace it with a 'nop' if it ends up immediately after a CALL in the
00980   // final emitted code.
00981   if (NeedsWinEH)
00982     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
00983 
00984   // We're returning from function via eh_return.
00985   if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
00986     MBBI = MBB.getLastNonDebugInstr();
00987     MachineOperand &DestAddr  = MBBI->getOperand(0);
00988     assert(DestAddr.isReg() && "Offset should be in register!");
00989     BuildMI(MBB, MBBI, DL,
00990             TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
00991             StackPtr).addReg(DestAddr.getReg());
00992   } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
00993              RetOpcode == X86::TCRETURNmi ||
00994              RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
00995              RetOpcode == X86::TCRETURNmi64) {
00996     bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
00997     // Tail call return: adjust the stack pointer and jump to callee.
00998     MBBI = MBB.getLastNonDebugInstr();
00999     MachineOperand &JumpTarget = MBBI->getOperand(0);
01000     MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
01001     assert(StackAdjust.isImm() && "Expecting immediate value.");
01002 
01003     // Adjust stack pointer.
01004     int StackAdj = StackAdjust.getImm();
01005     int MaxTCDelta = X86FI->getTCReturnAddrDelta();
01006     int Offset = 0;
01007     assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
01008 
01009     // Incoporate the retaddr area.
01010     Offset = StackAdj-MaxTCDelta;
01011     assert(Offset >= 0 && "Offset should never be negative");
01012 
01013     if (Offset) {
01014       // Check for possible merge with preceding ADD instruction.
01015       Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
01016       emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr,
01017                    UseLEA, TII, *RegInfo);
01018     }
01019 
01020     // Jump to label or value in register.
01021     if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
01022       MachineInstrBuilder MIB =
01023         BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
01024                                        ? X86::TAILJMPd : X86::TAILJMPd64));
01025       if (JumpTarget.isGlobal())
01026         MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
01027                              JumpTarget.getTargetFlags());
01028       else {
01029         assert(JumpTarget.isSymbol());
01030         MIB.addExternalSymbol(JumpTarget.getSymbolName(),
01031                               JumpTarget.getTargetFlags());
01032       }
01033     } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
01034       MachineInstrBuilder MIB =
01035         BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
01036                                        ? X86::TAILJMPm : X86::TAILJMPm64));
01037       for (unsigned i = 0; i != 5; ++i)
01038         MIB.addOperand(MBBI->getOperand(i));
01039     } else if (RetOpcode == X86::TCRETURNri64) {
01040       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
01041         addReg(JumpTarget.getReg(), RegState::Kill);
01042     } else {
01043       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
01044         addReg(JumpTarget.getReg(), RegState::Kill);
01045     }
01046 
01047     MachineInstr *NewMI = std::prev(MBBI);
01048     NewMI->copyImplicitOps(MF, MBBI);
01049 
01050     // Delete the pseudo instruction TCRETURN.
01051     MBB.erase(MBBI);
01052   } else if ((RetOpcode == X86::RETQ || RetOpcode == X86::RETL ||
01053               RetOpcode == X86::RETIQ || RetOpcode == X86::RETIL) &&
01054              (X86FI->getTCReturnAddrDelta() < 0)) {
01055     // Add the return addr area delta back since we are not tail calling.
01056     int delta = -1*X86FI->getTCReturnAddrDelta();
01057     MBBI = MBB.getLastNonDebugInstr();
01058 
01059     // Check for possible merge with preceding ADD instruction.
01060     delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
01061     emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, Uses64BitFramePtr, UseLEA, TII,
01062                  *RegInfo);
01063   }
01064 }
01065 
01066 int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
01067                                           int FI) const {
01068   const X86RegisterInfo *RegInfo =
01069       static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
01070   const MachineFrameInfo *MFI = MF.getFrameInfo();
01071   int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
01072   uint64_t StackSize = MFI->getStackSize();
01073 
01074   if (RegInfo->hasBasePointer(MF)) {
01075     assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!");
01076     if (FI < 0) {
01077       // Skip the saved EBP.
01078       return Offset + RegInfo->getSlotSize();
01079     } else {
01080       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
01081       return Offset + StackSize;
01082     }
01083   } else if (RegInfo->needsStackRealignment(MF)) {
01084     if (FI < 0) {
01085       // Skip the saved EBP.
01086       return Offset + RegInfo->getSlotSize();
01087     } else {
01088       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
01089       return Offset + StackSize;
01090     }
01091     // FIXME: Support tail calls
01092   } else {
01093     if (!hasFP(MF))
01094       return Offset + StackSize;
01095 
01096     // Skip the saved EBP.
01097     Offset += RegInfo->getSlotSize();
01098 
01099     // Skip the RETADDR move area
01100     const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
01101     int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
01102     if (TailCallReturnAddrDelta < 0)
01103       Offset -= TailCallReturnAddrDelta;
01104   }
01105 
01106   return Offset;
01107 }
01108 
01109 int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
01110                                              unsigned &FrameReg) const {
01111   const X86RegisterInfo *RegInfo =
01112       static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
01113   // We can't calculate offset from frame pointer if the stack is realigned,
01114   // so enforce usage of stack/base pointer.  The base pointer is used when we
01115   // have dynamic allocas in addition to dynamic realignment.
01116   if (RegInfo->hasBasePointer(MF))
01117     FrameReg = RegInfo->getBaseRegister();
01118   else if (RegInfo->needsStackRealignment(MF))
01119     FrameReg = RegInfo->getStackRegister();
01120   else
01121     FrameReg = RegInfo->getFrameRegister(MF);
01122   return getFrameIndexOffset(MF, FI);
01123 }
01124 
01125 bool X86FrameLowering::assignCalleeSavedSpillSlots(
01126     MachineFunction &MF, const TargetRegisterInfo *TRI,
01127     std::vector<CalleeSavedInfo> &CSI) const {
01128   MachineFrameInfo *MFI = MF.getFrameInfo();
01129   const X86RegisterInfo *RegInfo =
01130       static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
01131   unsigned SlotSize = RegInfo->getSlotSize();
01132   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
01133 
01134   unsigned CalleeSavedFrameSize = 0;
01135   int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
01136 
01137   if (hasFP(MF)) {
01138     // emitPrologue always spills frame register the first thing.
01139     SpillSlotOffset -= SlotSize;
01140     MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
01141 
01142     // Since emitPrologue and emitEpilogue will handle spilling and restoring of
01143     // the frame register, we can delete it from CSI list and not have to worry
01144     // about avoiding it later.
01145     unsigned FPReg = RegInfo->getFrameRegister(MF);
01146     for (unsigned i = 0; i < CSI.size(); ++i) {
01147       if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
01148         CSI.erase(CSI.begin() + i);
01149         break;
01150       }
01151     }
01152   }
01153 
01154   // Assign slots for GPRs. It increases frame size.
01155   for (unsigned i = CSI.size(); i != 0; --i) {
01156     unsigned Reg = CSI[i - 1].getReg();
01157 
01158     if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
01159       continue;
01160 
01161     SpillSlotOffset -= SlotSize;
01162     CalleeSavedFrameSize += SlotSize;
01163 
01164     int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
01165     CSI[i - 1].setFrameIdx(SlotIndex);
01166   }
01167 
01168   X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
01169 
01170   // Assign slots for XMMs.
01171   for (unsigned i = CSI.size(); i != 0; --i) {
01172     unsigned Reg = CSI[i - 1].getReg();
01173     if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
01174       continue;
01175 
01176     const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
01177     // ensure alignment
01178     SpillSlotOffset -= abs(SpillSlotOffset) % RC->getAlignment();
01179     // spill into slot
01180     SpillSlotOffset -= RC->getSize();
01181     int SlotIndex =
01182         MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset);
01183     CSI[i - 1].setFrameIdx(SlotIndex);
01184     MFI->ensureMaxAlignment(RC->getAlignment());
01185   }
01186 
01187   return true;
01188 }
01189 
01190 bool X86FrameLowering::spillCalleeSavedRegisters(
01191     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
01192     const std::vector<CalleeSavedInfo> &CSI,
01193     const TargetRegisterInfo *TRI) const {
01194   DebugLoc DL = MBB.findDebugLoc(MI);
01195 
01196   MachineFunction &MF = *MBB.getParent();
01197   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
01198   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
01199 
01200   // Push GPRs. It increases frame size.
01201   unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
01202   for (unsigned i = CSI.size(); i != 0; --i) {
01203     unsigned Reg = CSI[i - 1].getReg();
01204 
01205     if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
01206       continue;
01207     // Add the callee-saved register as live-in. It's killed at the spill.
01208     MBB.addLiveIn(Reg);
01209 
01210     BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
01211       .setMIFlag(MachineInstr::FrameSetup);
01212   }
01213 
01214   // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
01215   // It can be done by spilling XMMs to stack frame.
01216   for (unsigned i = CSI.size(); i != 0; --i) {
01217     unsigned Reg = CSI[i-1].getReg();
01218     if (X86::GR64RegClass.contains(Reg) ||
01219         X86::GR32RegClass.contains(Reg))
01220       continue;
01221     // Add the callee-saved register as live-in. It's killed at the spill.
01222     MBB.addLiveIn(Reg);
01223     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
01224 
01225     TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC,
01226                             TRI);
01227     --MI;
01228     MI->setFlag(MachineInstr::FrameSetup);
01229     ++MI;
01230   }
01231 
01232   return true;
01233 }
01234 
01235 bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
01236                                                MachineBasicBlock::iterator MI,
01237                                         const std::vector<CalleeSavedInfo> &CSI,
01238                                           const TargetRegisterInfo *TRI) const {
01239   if (CSI.empty())
01240     return false;
01241 
01242   DebugLoc DL = MBB.findDebugLoc(MI);
01243 
01244   MachineFunction &MF = *MBB.getParent();
01245   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
01246   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
01247 
01248   // Reload XMMs from stack frame.
01249   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
01250     unsigned Reg = CSI[i].getReg();
01251     if (X86::GR64RegClass.contains(Reg) ||
01252         X86::GR32RegClass.contains(Reg))
01253       continue;
01254 
01255     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
01256     TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
01257   }
01258 
01259   // POP GPRs.
01260   unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
01261   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
01262     unsigned Reg = CSI[i].getReg();
01263     if (!X86::GR64RegClass.contains(Reg) &&
01264         !X86::GR32RegClass.contains(Reg))
01265       continue;
01266 
01267     BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
01268   }
01269   return true;
01270 }
01271 
01272 void
01273 X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
01274                                                        RegScavenger *RS) const {
01275   MachineFrameInfo *MFI = MF.getFrameInfo();
01276   const X86RegisterInfo *RegInfo =
01277       static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
01278   unsigned SlotSize = RegInfo->getSlotSize();
01279 
01280   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
01281   int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
01282 
01283   if (TailCallReturnAddrDelta < 0) {
01284     // create RETURNADDR area
01285     //   arg
01286     //   arg
01287     //   RETADDR
01288     //   { ...
01289     //     RETADDR area
01290     //     ...
01291     //   }
01292     //   [EBP]
01293     MFI->CreateFixedObject(-TailCallReturnAddrDelta,
01294                            TailCallReturnAddrDelta - SlotSize, true);
01295   }
01296 
01297   // Spill the BasePtr if it's used.
01298   if (RegInfo->hasBasePointer(MF))
01299     MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
01300 }
01301 
01302 static bool
01303 HasNestArgument(const MachineFunction *MF) {
01304   const Function *F = MF->getFunction();
01305   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
01306        I != E; I++) {
01307     if (I->hasNestAttr())
01308       return true;
01309   }
01310   return false;
01311 }
01312 
01313 /// GetScratchRegister - Get a temp register for performing work in the
01314 /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
01315 /// and the properties of the function either one or two registers will be
01316 /// needed. Set primary to true for the first register, false for the second.
01317 static unsigned
01318 GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
01319   CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
01320 
01321   // Erlang stuff.
01322   if (CallingConvention == CallingConv::HiPE) {
01323     if (Is64Bit)
01324       return Primary ? X86::R14 : X86::R13;
01325     else
01326       return Primary ? X86::EBX : X86::EDI;
01327   }
01328 
01329   if (Is64Bit)
01330     return Primary ? X86::R11 : X86::R12;
01331 
01332   bool IsNested = HasNestArgument(&MF);
01333 
01334   if (CallingConvention == CallingConv::X86_FastCall ||
01335       CallingConvention == CallingConv::Fast) {
01336     if (IsNested)
01337       report_fatal_error("Segmented stacks does not support fastcall with "
01338                          "nested function.");
01339     return Primary ? X86::EAX : X86::ECX;
01340   }
01341   if (IsNested)
01342     return Primary ? X86::EDX : X86::EAX;
01343   return Primary ? X86::ECX : X86::EAX;
01344 }
01345 
01346 // The stack limit in the TCB is set to this many bytes above the actual stack
01347 // limit.
01348 static const uint64_t kSplitStackAvailable = 256;
01349 
01350 void
01351 X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
01352   MachineBasicBlock &prologueMBB = MF.front();
01353   MachineFrameInfo *MFI = MF.getFrameInfo();
01354   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
01355   uint64_t StackSize;
01356   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
01357   bool Is64Bit = STI.is64Bit();
01358   unsigned TlsReg, TlsOffset;
01359   DebugLoc DL;
01360 
01361   unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true);
01362   assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
01363          "Scratch register is live-in");
01364 
01365   if (MF.getFunction()->isVarArg())
01366     report_fatal_error("Segmented stacks do not support vararg functions.");
01367   if (!STI.isTargetLinux() && !STI.isTargetDarwin() &&
01368       !STI.isTargetWin32() && !STI.isTargetWin64() && !STI.isTargetFreeBSD())
01369     report_fatal_error("Segmented stacks not supported on this platform.");
01370 
01371   // Eventually StackSize will be calculated by a link-time pass; which will
01372   // also decide whether checking code needs to be injected into this particular
01373   // prologue.
01374   StackSize = MFI->getStackSize();
01375 
01376   // Do not generate a prologue for functions with a stack of size zero
01377   if (StackSize == 0)
01378     return;
01379 
01380   MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
01381   MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
01382   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
01383   bool IsNested = false;
01384 
01385   // We need to know if the function has a nest argument only in 64 bit mode.
01386   if (Is64Bit)
01387     IsNested = HasNestArgument(&MF);
01388 
01389   // The MOV R10, RAX needs to be in a different block, since the RET we emit in
01390   // allocMBB needs to be last (terminating) instruction.
01391 
01392   for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
01393          e = prologueMBB.livein_end(); i != e; i++) {
01394     allocMBB->addLiveIn(*i);
01395     checkMBB->addLiveIn(*i);
01396   }
01397 
01398   if (IsNested)
01399     allocMBB->addLiveIn(X86::R10);
01400 
01401   MF.push_front(allocMBB);
01402   MF.push_front(checkMBB);
01403 
01404   // When the frame size is less than 256 we just compare the stack
01405   // boundary directly to the value of the stack pointer, per gcc.
01406   bool CompareStackPointer = StackSize < kSplitStackAvailable;
01407 
01408   // Read the limit off the current stacklet off the stack_guard location.
01409   if (Is64Bit) {
01410     if (STI.isTargetLinux()) {
01411       TlsReg = X86::FS;
01412       TlsOffset = 0x70;
01413     } else if (STI.isTargetDarwin()) {
01414       TlsReg = X86::GS;
01415       TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
01416     } else if (STI.isTargetWin64()) {
01417       TlsReg = X86::GS;
01418       TlsOffset = 0x28; // pvArbitrary, reserved for application use
01419     } else if (STI.isTargetFreeBSD()) {
01420       TlsReg = X86::FS;
01421       TlsOffset = 0x18;
01422     } else {
01423       report_fatal_error("Segmented stacks not supported on this platform.");
01424     }
01425 
01426     if (CompareStackPointer)
01427       ScratchReg = X86::RSP;
01428     else
01429       BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
01430         .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
01431 
01432     BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
01433       .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
01434   } else {
01435     if (STI.isTargetLinux()) {
01436       TlsReg = X86::GS;
01437       TlsOffset = 0x30;
01438     } else if (STI.isTargetDarwin()) {
01439       TlsReg = X86::GS;
01440       TlsOffset = 0x48 + 90*4;
01441     } else if (STI.isTargetWin32()) {
01442       TlsReg = X86::FS;
01443       TlsOffset = 0x14; // pvArbitrary, reserved for application use
01444     } else if (STI.isTargetFreeBSD()) {
01445       report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
01446     } else {
01447       report_fatal_error("Segmented stacks not supported on this platform.");
01448     }
01449 
01450     if (CompareStackPointer)
01451       ScratchReg = X86::ESP;
01452     else
01453       BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
01454         .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
01455 
01456     if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64()) {
01457       BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
01458         .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
01459     } else if (STI.isTargetDarwin()) {
01460 
01461       // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
01462       unsigned ScratchReg2;
01463       bool SaveScratch2;
01464       if (CompareStackPointer) {
01465         // The primary scratch register is available for holding the TLS offset.
01466         ScratchReg2 = GetScratchRegister(Is64Bit, MF, true);
01467         SaveScratch2 = false;
01468       } else {
01469         // Need to use a second register to hold the TLS offset
01470         ScratchReg2 = GetScratchRegister(Is64Bit, MF, false);
01471 
01472         // Unfortunately, with fastcc the second scratch register may hold an
01473         // argument.
01474         SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
01475       }
01476 
01477       // If Scratch2 is live-in then it needs to be saved.
01478       assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
01479              "Scratch register is live-in and not saved");
01480 
01481       if (SaveScratch2)
01482         BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
01483           .addReg(ScratchReg2, RegState::Kill);
01484 
01485       BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
01486         .addImm(TlsOffset);
01487       BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
01488         .addReg(ScratchReg)
01489         .addReg(ScratchReg2).addImm(1).addReg(0)
01490         .addImm(0)
01491         .addReg(TlsReg);
01492 
01493       if (SaveScratch2)
01494         BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
01495     }
01496   }
01497 
01498   // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
01499   // It jumps to normal execution of the function body.
01500   BuildMI(checkMBB, DL, TII.get(X86::JA_4)).addMBB(&prologueMBB);
01501 
01502   // On 32 bit we first push the arguments size and then the frame size. On 64
01503   // bit, we pass the stack frame size in r10 and the argument size in r11.
01504   if (Is64Bit) {
01505     // Functions with nested arguments use R10, so it needs to be saved across
01506     // the call to _morestack
01507 
01508     if (IsNested)
01509       BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::RAX).addReg(X86::R10);
01510 
01511     BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R10)
01512       .addImm(StackSize);
01513     BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R11)
01514       .addImm(X86FI->getArgumentStackSize());
01515     MF.getRegInfo().setPhysRegUsed(X86::R10);
01516     MF.getRegInfo().setPhysRegUsed(X86::R11);
01517   } else {
01518     BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
01519       .addImm(X86FI->getArgumentStackSize());
01520     BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
01521       .addImm(StackSize);
01522   }
01523 
01524   // __morestack is in libgcc
01525   if (Is64Bit)
01526     BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
01527       .addExternalSymbol("__morestack");
01528   else
01529     BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
01530       .addExternalSymbol("__morestack");
01531 
01532   if (IsNested)
01533     BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
01534   else
01535     BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
01536 
01537   allocMBB->addSuccessor(&prologueMBB);
01538 
01539   checkMBB->addSuccessor(allocMBB);
01540   checkMBB->addSuccessor(&prologueMBB);
01541 
01542 #ifdef XDEBUG
01543   MF.verify();
01544 #endif
01545 }
01546 
01547 /// Erlang programs may need a special prologue to handle the stack size they
01548 /// might need at runtime. That is because Erlang/OTP does not implement a C
01549 /// stack but uses a custom implementation of hybrid stack/heap architecture.
01550 /// (for more information see Eric Stenman's Ph.D. thesis:
01551 /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
01552 ///
01553 /// CheckStack:
01554 ///       temp0 = sp - MaxStack
01555 ///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
01556 /// OldStart:
01557 ///       ...
01558 /// IncStack:
01559 ///       call inc_stack   # doubles the stack space
01560 ///       temp0 = sp - MaxStack
01561 ///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
01562 void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
01563   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
01564   MachineFrameInfo *MFI = MF.getFrameInfo();
01565   const unsigned SlotSize =
01566       static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo())
01567           ->getSlotSize();
01568   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
01569   const bool Is64Bit = STI.is64Bit();
01570   DebugLoc DL;
01571   // HiPE-specific values
01572   const unsigned HipeLeafWords = 24;
01573   const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
01574   const unsigned Guaranteed = HipeLeafWords * SlotSize;
01575   unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ?
01576                             MF.getFunction()->arg_size() - CCRegisteredArgs : 0;
01577   unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize;
01578 
01579   assert(STI.isTargetLinux() &&
01580          "HiPE prologue is only supported on Linux operating systems.");
01581 
01582   // Compute the largest caller's frame that is needed to fit the callees'
01583   // frames. This 'MaxStack' is computed from:
01584   //
01585   // a) the fixed frame size, which is the space needed for all spilled temps,
01586   // b) outgoing on-stack parameter areas, and
01587   // c) the minimum stack space this function needs to make available for the
01588   //    functions it calls (a tunable ABI property).
01589   if (MFI->hasCalls()) {
01590     unsigned MoreStackForCalls = 0;
01591 
01592     for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end();
01593          MBBI != MBBE; ++MBBI)
01594       for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end();
01595            MI != ME; ++MI) {
01596         if (!MI->isCall())
01597           continue;
01598 
01599         // Get callee operand.
01600         const MachineOperand &MO = MI->getOperand(0);
01601 
01602         // Only take account of global function calls (no closures etc.).
01603         if (!MO.isGlobal())
01604           continue;
01605 
01606         const Function *F = dyn_cast<Function>(MO.getGlobal());
01607         if (!F)
01608           continue;
01609 
01610         // Do not update 'MaxStack' for primitive and built-in functions
01611         // (encoded with names either starting with "erlang."/"bif_" or not
01612         // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
01613         // "_", such as the BIF "suspend_0") as they are executed on another
01614         // stack.
01615         if (F->getName().find("erlang.") != StringRef::npos ||
01616             F->getName().find("bif_") != StringRef::npos ||
01617             F->getName().find_first_of("._") == StringRef::npos)
01618           continue;
01619 
01620         unsigned CalleeStkArity =
01621           F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
01622         if (HipeLeafWords - 1 > CalleeStkArity)
01623           MoreStackForCalls = std::max(MoreStackForCalls,
01624                                (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
01625       }
01626     MaxStack += MoreStackForCalls;
01627   }
01628 
01629   // If the stack frame needed is larger than the guaranteed then runtime checks
01630   // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
01631   if (MaxStack > Guaranteed) {
01632     MachineBasicBlock &prologueMBB = MF.front();
01633     MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
01634     MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
01635 
01636     for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(),
01637            E = prologueMBB.livein_end(); I != E; I++) {
01638       stackCheckMBB->addLiveIn(*I);
01639       incStackMBB->addLiveIn(*I);
01640     }
01641 
01642     MF.push_front(incStackMBB);
01643     MF.push_front(stackCheckMBB);
01644 
01645     unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
01646     unsigned LEAop, CMPop, CALLop;
01647     if (Is64Bit) {
01648       SPReg = X86::RSP;
01649       PReg  = X86::RBP;
01650       LEAop = X86::LEA64r;
01651       CMPop = X86::CMP64rm;
01652       CALLop = X86::CALL64pcrel32;
01653       SPLimitOffset = 0x90;
01654     } else {
01655       SPReg = X86::ESP;
01656       PReg  = X86::EBP;
01657       LEAop = X86::LEA32r;
01658       CMPop = X86::CMP32rm;
01659       CALLop = X86::CALLpcrel32;
01660       SPLimitOffset = 0x4c;
01661     }
01662 
01663     ScratchReg = GetScratchRegister(Is64Bit, MF, true);
01664     assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
01665            "HiPE prologue scratch register is live-in");
01666 
01667     // Create new MBB for StackCheck:
01668     addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
01669                  SPReg, false, -MaxStack);
01670     // SPLimitOffset is in a fixed heap location (pointed by BP).
01671     addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
01672                  .addReg(ScratchReg), PReg, false, SPLimitOffset);
01673     BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_4)).addMBB(&prologueMBB);
01674 
01675     // Create new MBB for IncStack:
01676     BuildMI(incStackMBB, DL, TII.get(CALLop)).
01677       addExternalSymbol("inc_stack_0");
01678     addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
01679                  SPReg, false, -MaxStack);
01680     addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
01681                  .addReg(ScratchReg), PReg, false, SPLimitOffset);
01682     BuildMI(incStackMBB, DL, TII.get(X86::JLE_4)).addMBB(incStackMBB);
01683 
01684     stackCheckMBB->addSuccessor(&prologueMBB, 99);
01685     stackCheckMBB->addSuccessor(incStackMBB, 1);
01686     incStackMBB->addSuccessor(&prologueMBB, 99);
01687     incStackMBB->addSuccessor(incStackMBB, 1);
01688   }
01689 #ifdef XDEBUG
01690   MF.verify();
01691 #endif
01692 }
01693 
01694 void X86FrameLowering::
01695 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
01696                               MachineBasicBlock::iterator I) const {
01697   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
01698   const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
01699                                        MF.getSubtarget().getRegisterInfo());
01700   unsigned StackPtr = RegInfo.getStackRegister();
01701   bool reseveCallFrame = hasReservedCallFrame(MF);
01702   int Opcode = I->getOpcode();
01703   bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
01704   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
01705   bool IsLP64 = STI.isTarget64BitLP64();
01706   DebugLoc DL = I->getDebugLoc();
01707   uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
01708   uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
01709   I = MBB.erase(I);
01710 
01711   if (!reseveCallFrame) {
01712     // If the stack pointer can be changed after prologue, turn the
01713     // adjcallstackup instruction into a 'sub ESP, <amt>' and the
01714     // adjcallstackdown instruction into 'add ESP, <amt>'
01715     // TODO: consider using push / pop instead of sub + store / add
01716     if (Amount == 0)
01717       return;
01718 
01719     // We need to keep the stack aligned properly.  To do this, we round the
01720     // amount of space needed for the outgoing arguments up to the next
01721     // alignment boundary.
01722     unsigned StackAlign = MF.getTarget()
01723                               .getSubtargetImpl()
01724                               ->getFrameLowering()
01725                               ->getStackAlignment();
01726     Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
01727 
01728     MachineInstr *New = nullptr;
01729     if (Opcode == TII.getCallFrameSetupOpcode()) {
01730       New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)),
01731                     StackPtr)
01732         .addReg(StackPtr)
01733         .addImm(Amount);
01734     } else {
01735       assert(Opcode == TII.getCallFrameDestroyOpcode());
01736 
01737       // Factor out the amount the callee already popped.
01738       Amount -= CalleeAmt;
01739 
01740       if (Amount) {
01741         unsigned Opc = getADDriOpcode(IsLP64, Amount);
01742         New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
01743           .addReg(StackPtr).addImm(Amount);
01744       }
01745     }
01746 
01747     if (New) {
01748       // The EFLAGS implicit def is dead.
01749       New->getOperand(3).setIsDead();
01750 
01751       // Replace the pseudo instruction with a new instruction.
01752       MBB.insert(I, New);
01753     }
01754 
01755     return;
01756   }
01757 
01758   if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
01759     // If we are performing frame pointer elimination and if the callee pops
01760     // something off the stack pointer, add it back.  We do this until we have
01761     // more advanced stack pointer tracking ability.
01762     unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt);
01763     MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
01764       .addReg(StackPtr).addImm(CalleeAmt);
01765 
01766     // The EFLAGS implicit def is dead.
01767     New->getOperand(3).setIsDead();
01768 
01769     // We are not tracking the stack pointer adjustment by the callee, so make
01770     // sure we restore the stack pointer immediately after the call, there may
01771     // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
01772     MachineBasicBlock::iterator B = MBB.begin();
01773     while (I != B && !std::prev(I)->isCall())
01774       --I;
01775     MBB.insert(I, New);
01776   }
01777 }
01778