LLVM API Documentation
00001 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file contains the AArch64 implementation of TargetFrameLowering class. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "AArch64FrameLowering.h" 00015 #include "AArch64InstrInfo.h" 00016 #include "AArch64MachineFunctionInfo.h" 00017 #include "AArch64Subtarget.h" 00018 #include "AArch64TargetMachine.h" 00019 #include "llvm/ADT/Statistic.h" 00020 #include "llvm/CodeGen/MachineFrameInfo.h" 00021 #include "llvm/CodeGen/MachineFunction.h" 00022 #include "llvm/CodeGen/MachineInstrBuilder.h" 00023 #include "llvm/CodeGen/MachineModuleInfo.h" 00024 #include "llvm/CodeGen/MachineRegisterInfo.h" 00025 #include "llvm/CodeGen/RegisterScavenging.h" 00026 #include "llvm/IR/DataLayout.h" 00027 #include "llvm/IR/Function.h" 00028 #include "llvm/Support/CommandLine.h" 00029 #include "llvm/Support/Debug.h" 00030 #include "llvm/Support/raw_ostream.h" 00031 00032 using namespace llvm; 00033 00034 #define DEBUG_TYPE "frame-info" 00035 00036 static cl::opt<bool> EnableRedZone("aarch64-redzone", 00037 cl::desc("enable use of redzone on AArch64"), 00038 cl::init(false), cl::Hidden); 00039 00040 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); 00041 00042 static unsigned estimateStackSize(MachineFunction &MF) { 00043 const MachineFrameInfo *FFI = MF.getFrameInfo(); 00044 int Offset = 0; 00045 for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { 00046 int FixedOff = -FFI->getObjectOffset(i); 00047 if (FixedOff > Offset) 00048 Offset = FixedOff; 00049 } 00050 for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { 00051 if (FFI->isDeadObjectIndex(i)) 00052 continue; 00053 Offset += FFI->getObjectSize(i); 00054 unsigned Align = FFI->getObjectAlignment(i); 00055 // Adjust to alignment boundary 00056 Offset = (Offset + Align - 1) / Align * Align; 00057 } 00058 // This does not include the 16 bytes used for fp and lr. 00059 return (unsigned)Offset; 00060 } 00061 00062 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { 00063 if (!EnableRedZone) 00064 return false; 00065 // Don't use the red zone if the function explicitly asks us not to. 00066 // This is typically used for kernel code. 00067 if (MF.getFunction()->getAttributes().hasAttribute( 00068 AttributeSet::FunctionIndex, Attribute::NoRedZone)) 00069 return false; 00070 00071 const MachineFrameInfo *MFI = MF.getFrameInfo(); 00072 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 00073 unsigned NumBytes = AFI->getLocalStackSize(); 00074 00075 // Note: currently hasFP() is always true for hasCalls(), but that's an 00076 // implementation detail of the current code, not a strict requirement, 00077 // so stay safe here and check both. 00078 if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128) 00079 return false; 00080 return true; 00081 } 00082 00083 /// hasFP - Return true if the specified function should have a dedicated frame 00084 /// pointer register. 00085 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { 00086 const MachineFrameInfo *MFI = MF.getFrameInfo(); 00087 00088 #ifndef NDEBUG 00089 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 00090 assert(!RegInfo->needsStackRealignment(MF) && 00091 "No stack realignment on AArch64!"); 00092 #endif 00093 00094 return (MFI->hasCalls() || MFI->hasVarSizedObjects() || 00095 MFI->isFrameAddressTaken()); 00096 } 00097 00098 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is 00099 /// not required, we reserve argument space for call sites in the function 00100 /// immediately on entry to the current function. This eliminates the need for 00101 /// add/sub sp brackets around call sites. Returns true if the call frame is 00102 /// included as part of the stack frame. 00103 bool 00104 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 00105 return !MF.getFrameInfo()->hasVarSizedObjects(); 00106 } 00107 00108 void AArch64FrameLowering::eliminateCallFramePseudoInstr( 00109 MachineFunction &MF, MachineBasicBlock &MBB, 00110 MachineBasicBlock::iterator I) const { 00111 const AArch64InstrInfo *TII = 00112 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 00113 DebugLoc DL = I->getDebugLoc(); 00114 int Opc = I->getOpcode(); 00115 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); 00116 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; 00117 00118 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 00119 if (!TFI->hasReservedCallFrame(MF)) { 00120 unsigned Align = getStackAlignment(); 00121 00122 int64_t Amount = I->getOperand(0).getImm(); 00123 Amount = RoundUpToAlignment(Amount, Align); 00124 if (!IsDestroy) 00125 Amount = -Amount; 00126 00127 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it 00128 // doesn't have to pop anything), then the first operand will be zero too so 00129 // this adjustment is a no-op. 00130 if (CalleePopAmount == 0) { 00131 // FIXME: in-function stack adjustment for calls is limited to 24-bits 00132 // because there's no guaranteed temporary register available. 00133 // 00134 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available. 00135 // 1) For offset <= 12-bit, we use LSL #0 00136 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses 00137 // LSL #0, and the other uses LSL #12. 00138 // 00139 // Mostly call frames will be allocated at the start of a function so 00140 // this is OK, but it is a limitation that needs dealing with. 00141 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); 00142 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII); 00143 } 00144 } else if (CalleePopAmount != 0) { 00145 // If the calling convention demands that the callee pops arguments from the 00146 // stack, we want to add it back if we have a reserved call frame. 00147 assert(CalleePopAmount < 0xffffff && "call frame too large"); 00148 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount, 00149 TII); 00150 } 00151 MBB.erase(I); 00152 } 00153 00154 void AArch64FrameLowering::emitCalleeSavedFrameMoves( 00155 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 00156 unsigned FramePtr) const { 00157 MachineFunction &MF = *MBB.getParent(); 00158 MachineFrameInfo *MFI = MF.getFrameInfo(); 00159 MachineModuleInfo &MMI = MF.getMMI(); 00160 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 00161 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 00162 DebugLoc DL = MBB.findDebugLoc(MBBI); 00163 00164 // Add callee saved registers to move list. 00165 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 00166 if (CSI.empty()) 00167 return; 00168 00169 const DataLayout *TD = MF.getSubtarget().getDataLayout(); 00170 bool HasFP = hasFP(MF); 00171 00172 // Calculate amount of bytes used for return address storing. 00173 int stackGrowth = -TD->getPointerSize(0); 00174 00175 // Calculate offsets. 00176 int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth; 00177 unsigned TotalSkipped = 0; 00178 for (const auto &Info : CSI) { 00179 unsigned Reg = Info.getReg(); 00180 int64_t Offset = MFI->getObjectOffset(Info.getFrameIdx()) - 00181 getOffsetOfLocalArea() + saveAreaOffset; 00182 00183 // Don't output a new CFI directive if we're re-saving the frame pointer or 00184 // link register. This happens when the PrologEpilogInserter has inserted an 00185 // extra "STP" of the frame pointer and link register -- the "emitPrologue" 00186 // method automatically generates the directives when frame pointers are 00187 // used. If we generate CFI directives for the extra "STP"s, the linker will 00188 // lose track of the correct values for the frame pointer and link register. 00189 if (HasFP && (FramePtr == Reg || Reg == AArch64::LR)) { 00190 TotalSkipped += stackGrowth; 00191 continue; 00192 } 00193 00194 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 00195 unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( 00196 nullptr, DwarfReg, Offset - TotalSkipped)); 00197 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 00198 .addCFIIndex(CFIIndex); 00199 } 00200 } 00201 00202 void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { 00203 MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. 00204 MachineBasicBlock::iterator MBBI = MBB.begin(); 00205 const MachineFrameInfo *MFI = MF.getFrameInfo(); 00206 const Function *Fn = MF.getFunction(); 00207 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 00208 MF.getSubtarget().getRegisterInfo()); 00209 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 00210 MachineModuleInfo &MMI = MF.getMMI(); 00211 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 00212 bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); 00213 bool HasFP = hasFP(MF); 00214 DebugLoc DL = MBB.findDebugLoc(MBBI); 00215 00216 int NumBytes = (int)MFI->getStackSize(); 00217 if (!AFI->hasStackFrame()) { 00218 assert(!HasFP && "unexpected function without stack frame but with FP"); 00219 00220 // All of the stack allocation is for locals. 00221 AFI->setLocalStackSize(NumBytes); 00222 00223 // Label used to tie together the PROLOG_LABEL and the MachineMoves. 00224 MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); 00225 00226 // REDZONE: If the stack size is less than 128 bytes, we don't need 00227 // to actually allocate. 00228 if (NumBytes && !canUseRedZone(MF)) { 00229 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, 00230 MachineInstr::FrameSetup); 00231 00232 // Encode the stack size of the leaf function. 00233 unsigned CFIIndex = MMI.addFrameInst( 00234 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); 00235 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 00236 .addCFIIndex(CFIIndex); 00237 } else if (NumBytes) { 00238 ++NumRedZoneFunctions; 00239 } 00240 00241 return; 00242 } 00243 00244 // Only set up FP if we actually need to. 00245 int FPOffset = 0; 00246 if (HasFP) { 00247 // First instruction must a) allocate the stack and b) have an immediate 00248 // that is a multiple of -2. 00249 assert((MBBI->getOpcode() == AArch64::STPXpre || 00250 MBBI->getOpcode() == AArch64::STPDpre) && 00251 MBBI->getOperand(3).getReg() == AArch64::SP && 00252 MBBI->getOperand(4).getImm() < 0 && 00253 (MBBI->getOperand(4).getImm() & 1) == 0); 00254 00255 // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space 00256 // required for the callee saved register area we get the frame pointer 00257 // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8. 00258 FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8; 00259 assert(FPOffset >= 0 && "Bad Framepointer Offset"); 00260 } 00261 00262 // Move past the saves of the callee-saved registers. 00263 while (MBBI->getOpcode() == AArch64::STPXi || 00264 MBBI->getOpcode() == AArch64::STPDi || 00265 MBBI->getOpcode() == AArch64::STPXpre || 00266 MBBI->getOpcode() == AArch64::STPDpre) { 00267 ++MBBI; 00268 NumBytes -= 16; 00269 } 00270 assert(NumBytes >= 0 && "Negative stack allocation size!?"); 00271 if (HasFP) { 00272 // Issue sub fp, sp, FPOffset or 00273 // mov fp,sp when FPOffset is zero. 00274 // Note: All stores of callee-saved registers are marked as "FrameSetup". 00275 // This code marks the instruction(s) that set the FP also. 00276 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, 00277 MachineInstr::FrameSetup); 00278 } 00279 00280 // All of the remaining stack allocations are for locals. 00281 AFI->setLocalStackSize(NumBytes); 00282 00283 // Allocate space for the rest of the frame. 00284 if (NumBytes) { 00285 // If we're a leaf function, try using the red zone. 00286 if (!canUseRedZone(MF)) 00287 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, 00288 MachineInstr::FrameSetup); 00289 } 00290 00291 // If we need a base pointer, set it up here. It's whatever the value of the 00292 // stack pointer is at this point. Any variable size objects will be allocated 00293 // after this, so we can still use the base pointer to reference locals. 00294 // 00295 // FIXME: Clarify FrameSetup flags here. 00296 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is 00297 // needed. 00298 // 00299 if (RegInfo->hasBasePointer(MF)) 00300 TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false); 00301 00302 if (needsFrameMoves) { 00303 const DataLayout *TD = MF.getSubtarget().getDataLayout(); 00304 const int StackGrowth = -TD->getPointerSize(0); 00305 unsigned FramePtr = RegInfo->getFrameRegister(MF); 00306 00307 // An example of the prologue: 00308 // 00309 // .globl __foo 00310 // .align 2 00311 // __foo: 00312 // Ltmp0: 00313 // .cfi_startproc 00314 // .cfi_personality 155, ___gxx_personality_v0 00315 // Leh_func_begin: 00316 // .cfi_lsda 16, Lexception33 00317 // 00318 // stp xa,bx, [sp, -#offset]! 00319 // ... 00320 // stp x28, x27, [sp, #offset-32] 00321 // stp fp, lr, [sp, #offset-16] 00322 // add fp, sp, #offset - 16 00323 // sub sp, sp, #1360 00324 // 00325 // The Stack: 00326 // +-------------------------------------------+ 00327 // 10000 | ........ | ........ | ........ | ........ | 00328 // 10004 | ........ | ........ | ........ | ........ | 00329 // +-------------------------------------------+ 00330 // 10008 | ........ | ........ | ........ | ........ | 00331 // 1000c | ........ | ........ | ........ | ........ | 00332 // +===========================================+ 00333 // 10010 | X28 Register | 00334 // 10014 | X28 Register | 00335 // +-------------------------------------------+ 00336 // 10018 | X27 Register | 00337 // 1001c | X27 Register | 00338 // +===========================================+ 00339 // 10020 | Frame Pointer | 00340 // 10024 | Frame Pointer | 00341 // +-------------------------------------------+ 00342 // 10028 | Link Register | 00343 // 1002c | Link Register | 00344 // +===========================================+ 00345 // 10030 | ........ | ........ | ........ | ........ | 00346 // 10034 | ........ | ........ | ........ | ........ | 00347 // +-------------------------------------------+ 00348 // 10038 | ........ | ........ | ........ | ........ | 00349 // 1003c | ........ | ........ | ........ | ........ | 00350 // +-------------------------------------------+ 00351 // 00352 // [sp] = 10030 :: >>initial value<< 00353 // sp = 10020 :: stp fp, lr, [sp, #-16]! 00354 // fp = sp == 10020 :: mov fp, sp 00355 // [sp] == 10020 :: stp x28, x27, [sp, #-16]! 00356 // sp == 10010 :: >>final value<< 00357 // 00358 // The frame pointer (w29) points to address 10020. If we use an offset of 00359 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 00360 // for w27, and -32 for w28: 00361 // 00362 // Ltmp1: 00363 // .cfi_def_cfa w29, 16 00364 // Ltmp2: 00365 // .cfi_offset w30, -8 00366 // Ltmp3: 00367 // .cfi_offset w29, -16 00368 // Ltmp4: 00369 // .cfi_offset w27, -24 00370 // Ltmp5: 00371 // .cfi_offset w28, -32 00372 00373 if (HasFP) { 00374 // Define the current CFA rule to use the provided FP. 00375 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); 00376 unsigned CFIIndex = MMI.addFrameInst( 00377 MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); 00378 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 00379 .addCFIIndex(CFIIndex); 00380 00381 // Record the location of the stored LR 00382 unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true); 00383 CFIIndex = MMI.addFrameInst( 00384 MCCFIInstruction::createOffset(nullptr, LR, StackGrowth)); 00385 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 00386 .addCFIIndex(CFIIndex); 00387 00388 // Record the location of the stored FP 00389 CFIIndex = MMI.addFrameInst( 00390 MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth)); 00391 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 00392 .addCFIIndex(CFIIndex); 00393 } else { 00394 // Encode the stack size of the leaf function. 00395 unsigned CFIIndex = MMI.addFrameInst( 00396 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize())); 00397 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 00398 .addCFIIndex(CFIIndex); 00399 } 00400 00401 // Now emit the moves for whatever callee saved regs we have. 00402 emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr); 00403 } 00404 } 00405 00406 static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) { 00407 for (unsigned i = 0; CSRegs[i]; ++i) 00408 if (Reg == CSRegs[i]) 00409 return true; 00410 return false; 00411 } 00412 00413 static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) { 00414 unsigned RtIdx = 0; 00415 if (MI->getOpcode() == AArch64::LDPXpost || 00416 MI->getOpcode() == AArch64::LDPDpost) 00417 RtIdx = 1; 00418 00419 if (MI->getOpcode() == AArch64::LDPXpost || 00420 MI->getOpcode() == AArch64::LDPDpost || 00421 MI->getOpcode() == AArch64::LDPXi || MI->getOpcode() == AArch64::LDPDi) { 00422 if (!isCalleeSavedRegister(MI->getOperand(RtIdx).getReg(), CSRegs) || 00423 !isCalleeSavedRegister(MI->getOperand(RtIdx + 1).getReg(), CSRegs) || 00424 MI->getOperand(RtIdx + 2).getReg() != AArch64::SP) 00425 return false; 00426 return true; 00427 } 00428 00429 return false; 00430 } 00431 00432 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, 00433 MachineBasicBlock &MBB) const { 00434 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 00435 assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); 00436 MachineFrameInfo *MFI = MF.getFrameInfo(); 00437 const AArch64InstrInfo *TII = 00438 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 00439 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 00440 MF.getSubtarget().getRegisterInfo()); 00441 DebugLoc DL = MBBI->getDebugLoc(); 00442 unsigned RetOpcode = MBBI->getOpcode(); 00443 00444 int NumBytes = MFI->getStackSize(); 00445 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 00446 00447 // Initial and residual are named for consitency with the prologue. Note that 00448 // in the epilogue, the residual adjustment is executed first. 00449 uint64_t ArgumentPopSize = 0; 00450 if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) { 00451 MachineOperand &StackAdjust = MBBI->getOperand(1); 00452 00453 // For a tail-call in a callee-pops-arguments environment, some or all of 00454 // the stack may actually be in use for the call's arguments, this is 00455 // calculated during LowerCall and consumed here... 00456 ArgumentPopSize = StackAdjust.getImm(); 00457 } else { 00458 // ... otherwise the amount to pop is *all* of the argument space, 00459 // conveniently stored in the MachineFunctionInfo by 00460 // LowerFormalArguments. This will, of course, be zero for the C calling 00461 // convention. 00462 ArgumentPopSize = AFI->getArgumentStackToRestore(); 00463 } 00464 00465 // The stack frame should be like below, 00466 // 00467 // ---------------------- --- 00468 // | | | 00469 // | BytesInStackArgArea| CalleeArgStackSize 00470 // | (NumReusableBytes) | (of tail call) 00471 // | | --- 00472 // | | | 00473 // ---------------------| --- | 00474 // | | | | 00475 // | CalleeSavedReg | | | 00476 // | (NumRestores * 16) | | | 00477 // | | | | 00478 // ---------------------| | NumBytes 00479 // | | StackSize (StackAdjustUp) 00480 // | LocalStackSize | | | 00481 // | (covering callee | | | 00482 // | args) | | | 00483 // | | | | 00484 // ---------------------- --- --- 00485 // 00486 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize 00487 // = StackSize + ArgumentPopSize 00488 // 00489 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps 00490 // it as the 2nd argument of AArch64ISD::TC_RETURN. 00491 NumBytes += ArgumentPopSize; 00492 00493 unsigned NumRestores = 0; 00494 // Move past the restores of the callee-saved registers. 00495 MachineBasicBlock::iterator LastPopI = MBBI; 00496 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 00497 if (LastPopI != MBB.begin()) { 00498 do { 00499 ++NumRestores; 00500 --LastPopI; 00501 } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs)); 00502 if (!isCSRestore(LastPopI, CSRegs)) { 00503 ++LastPopI; 00504 --NumRestores; 00505 } 00506 } 00507 NumBytes -= NumRestores * 16; 00508 assert(NumBytes >= 0 && "Negative stack allocation size!?"); 00509 00510 if (!hasFP(MF)) { 00511 // If this was a redzone leaf function, we don't need to restore the 00512 // stack pointer. 00513 if (!canUseRedZone(MF)) 00514 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, 00515 TII); 00516 return; 00517 } 00518 00519 // Restore the original stack pointer. 00520 // FIXME: Rather than doing the math here, we should instead just use 00521 // non-post-indexed loads for the restores if we aren't actually going to 00522 // be able to save any instructions. 00523 if (NumBytes || MFI->hasVarSizedObjects()) 00524 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, 00525 -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags); 00526 } 00527 00528 /// getFrameIndexOffset - Returns the displacement from the frame register to 00529 /// the stack frame of the specified index. 00530 int AArch64FrameLowering::getFrameIndexOffset(const MachineFunction &MF, 00531 int FI) const { 00532 unsigned FrameReg; 00533 return getFrameIndexReference(MF, FI, FrameReg); 00534 } 00535 00536 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for 00537 /// debug info. It's the same as what we use for resolving the code-gen 00538 /// references for now. FIXME: This can go wrong when references are 00539 /// SP-relative and simple call frames aren't used. 00540 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, 00541 int FI, 00542 unsigned &FrameReg) const { 00543 return resolveFrameIndexReference(MF, FI, FrameReg); 00544 } 00545 00546 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, 00547 int FI, unsigned &FrameReg, 00548 bool PreferFP) const { 00549 const MachineFrameInfo *MFI = MF.getFrameInfo(); 00550 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 00551 MF.getSubtarget().getRegisterInfo()); 00552 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 00553 int FPOffset = MFI->getObjectOffset(FI) + 16; 00554 int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); 00555 bool isFixed = MFI->isFixedObjectIndex(FI); 00556 00557 // Use frame pointer to reference fixed objects. Use it for locals if 00558 // there are VLAs (and thus the SP isn't reliable as a base). 00559 // Make sure useFPForScavengingIndex() does the right thing for the emergency 00560 // spill slot. 00561 bool UseFP = false; 00562 if (AFI->hasStackFrame()) { 00563 // Note: Keeping the following as multiple 'if' statements rather than 00564 // merging to a single expression for readability. 00565 // 00566 // Argument access should always use the FP. 00567 if (isFixed) { 00568 UseFP = hasFP(MF); 00569 } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) { 00570 // Use SP or FP, whichever gives us the best chance of the offset 00571 // being in range for direct access. If the FPOffset is positive, 00572 // that'll always be best, as the SP will be even further away. 00573 // If the FPOffset is negative, we have to keep in mind that the 00574 // available offset range for negative offsets is smaller than for 00575 // positive ones. If we have variable sized objects, we're stuck with 00576 // using the FP regardless, though, as the SP offset is unknown 00577 // and we don't have a base pointer available. If an offset is 00578 // available via the FP and the SP, use whichever is closest. 00579 if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 || 00580 (FPOffset >= -256 && Offset > -FPOffset)) 00581 UseFP = true; 00582 } 00583 } 00584 00585 if (UseFP) { 00586 FrameReg = RegInfo->getFrameRegister(MF); 00587 return FPOffset; 00588 } 00589 00590 // Use the base pointer if we have one. 00591 if (RegInfo->hasBasePointer(MF)) 00592 FrameReg = RegInfo->getBaseRegister(); 00593 else { 00594 FrameReg = AArch64::SP; 00595 // If we're using the red zone for this function, the SP won't actually 00596 // be adjusted, so the offsets will be negative. They're also all 00597 // within range of the signed 9-bit immediate instructions. 00598 if (canUseRedZone(MF)) 00599 Offset -= AFI->getLocalStackSize(); 00600 } 00601 00602 return Offset; 00603 } 00604 00605 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { 00606 if (Reg != AArch64::LR) 00607 return getKillRegState(true); 00608 00609 // LR maybe referred to later by an @llvm.returnaddress intrinsic. 00610 bool LRLiveIn = MF.getRegInfo().isLiveIn(AArch64::LR); 00611 bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken()); 00612 return getKillRegState(LRKill); 00613 } 00614 00615 bool AArch64FrameLowering::spillCalleeSavedRegisters( 00616 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 00617 const std::vector<CalleeSavedInfo> &CSI, 00618 const TargetRegisterInfo *TRI) const { 00619 MachineFunction &MF = *MBB.getParent(); 00620 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 00621 unsigned Count = CSI.size(); 00622 DebugLoc DL; 00623 assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); 00624 00625 if (MI != MBB.end()) 00626 DL = MI->getDebugLoc(); 00627 00628 for (unsigned i = 0; i < Count; i += 2) { 00629 unsigned idx = Count - i - 2; 00630 unsigned Reg1 = CSI[idx].getReg(); 00631 unsigned Reg2 = CSI[idx + 1].getReg(); 00632 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI 00633 // list to come in sorted by frame index so that we can issue the store 00634 // pair instructions directly. Assert if we see anything otherwise. 00635 // 00636 // The order of the registers in the list is controlled by 00637 // getCalleeSavedRegs(), so they will always be in-order, as well. 00638 assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() && 00639 "Out of order callee saved regs!"); 00640 unsigned StrOpc; 00641 assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); 00642 assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); 00643 // Issue sequence of non-sp increment and pi sp spills for cs regs. The 00644 // first spill is a pre-increment that allocates the stack. 00645 // For example: 00646 // stp x22, x21, [sp, #-48]! // addImm(-6) 00647 // stp x20, x19, [sp, #16] // addImm(+2) 00648 // stp fp, lr, [sp, #32] // addImm(+4) 00649 // Rationale: This sequence saves uop updates compared to a sequence of 00650 // pre-increment spills like stp xi,xj,[sp,#-16]! 00651 // Note: Similar rational and sequence for restores in epilog. 00652 if (AArch64::GPR64RegClass.contains(Reg1)) { 00653 assert(AArch64::GPR64RegClass.contains(Reg2) && 00654 "Expected GPR64 callee-saved register pair!"); 00655 // For first spill use pre-increment store. 00656 if (i == 0) 00657 StrOpc = AArch64::STPXpre; 00658 else 00659 StrOpc = AArch64::STPXi; 00660 } else if (AArch64::FPR64RegClass.contains(Reg1)) { 00661 assert(AArch64::FPR64RegClass.contains(Reg2) && 00662 "Expected FPR64 callee-saved register pair!"); 00663 // For first spill use pre-increment store. 00664 if (i == 0) 00665 StrOpc = AArch64::STPDpre; 00666 else 00667 StrOpc = AArch64::STPDi; 00668 } else 00669 llvm_unreachable("Unexpected callee saved register!"); 00670 DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", " 00671 << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx() 00672 << ", " << CSI[idx + 1].getFrameIdx() << ")\n"); 00673 // Compute offset: i = 0 => offset = -Count; 00674 // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc. 00675 const int Offset = (i == 0) ? -Count : i; 00676 assert((Offset >= -64 && Offset <= 63) && 00677 "Offset out of bounds for STP immediate"); 00678 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); 00679 if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre) 00680 MIB.addReg(AArch64::SP, RegState::Define); 00681 00682 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) 00683 .addReg(Reg1, getPrologueDeath(MF, Reg1)) 00684 .addReg(AArch64::SP) 00685 .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit 00686 .setMIFlag(MachineInstr::FrameSetup); 00687 } 00688 return true; 00689 } 00690 00691 bool AArch64FrameLowering::restoreCalleeSavedRegisters( 00692 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 00693 const std::vector<CalleeSavedInfo> &CSI, 00694 const TargetRegisterInfo *TRI) const { 00695 MachineFunction &MF = *MBB.getParent(); 00696 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 00697 unsigned Count = CSI.size(); 00698 DebugLoc DL; 00699 assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); 00700 00701 if (MI != MBB.end()) 00702 DL = MI->getDebugLoc(); 00703 00704 for (unsigned i = 0; i < Count; i += 2) { 00705 unsigned Reg1 = CSI[i].getReg(); 00706 unsigned Reg2 = CSI[i + 1].getReg(); 00707 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI 00708 // list to come in sorted by frame index so that we can issue the store 00709 // pair instructions directly. Assert if we see anything otherwise. 00710 assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() && 00711 "Out of order callee saved regs!"); 00712 // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only 00713 // the last load is sp-pi post-increment and de-allocates the stack: 00714 // For example: 00715 // ldp fp, lr, [sp, #32] // addImm(+4) 00716 // ldp x20, x19, [sp, #16] // addImm(+2) 00717 // ldp x22, x21, [sp], #48 // addImm(+6) 00718 // Note: see comment in spillCalleeSavedRegisters() 00719 unsigned LdrOpc; 00720 00721 assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); 00722 assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); 00723 if (AArch64::GPR64RegClass.contains(Reg1)) { 00724 assert(AArch64::GPR64RegClass.contains(Reg2) && 00725 "Expected GPR64 callee-saved register pair!"); 00726 if (i == Count - 2) 00727 LdrOpc = AArch64::LDPXpost; 00728 else 00729 LdrOpc = AArch64::LDPXi; 00730 } else if (AArch64::FPR64RegClass.contains(Reg1)) { 00731 assert(AArch64::FPR64RegClass.contains(Reg2) && 00732 "Expected FPR64 callee-saved register pair!"); 00733 if (i == Count - 2) 00734 LdrOpc = AArch64::LDPDpost; 00735 else 00736 LdrOpc = AArch64::LDPDi; 00737 } else 00738 llvm_unreachable("Unexpected callee saved register!"); 00739 DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", " 00740 << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx() 00741 << ", " << CSI[i + 1].getFrameIdx() << ")\n"); 00742 00743 // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4; 00744 // etc. 00745 const int Offset = (i == Count - 2) ? Count : Count - i - 2; 00746 assert((Offset >= -64 && Offset <= 63) && 00747 "Offset out of bounds for LDP immediate"); 00748 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); 00749 if (LdrOpc == AArch64::LDPXpost || LdrOpc == AArch64::LDPDpost) 00750 MIB.addReg(AArch64::SP, RegState::Define); 00751 00752 MIB.addReg(Reg2, getDefRegState(true)) 00753 .addReg(Reg1, getDefRegState(true)) 00754 .addReg(AArch64::SP) 00755 .addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8] 00756 // where the factor * 8 is implicit 00757 } 00758 return true; 00759 } 00760 00761 void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( 00762 MachineFunction &MF, RegScavenger *RS) const { 00763 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 00764 MF.getSubtarget().getRegisterInfo()); 00765 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 00766 MachineRegisterInfo *MRI = &MF.getRegInfo(); 00767 SmallVector<unsigned, 4> UnspilledCSGPRs; 00768 SmallVector<unsigned, 4> UnspilledCSFPRs; 00769 00770 // The frame record needs to be created by saving the appropriate registers 00771 if (hasFP(MF)) { 00772 MRI->setPhysRegUsed(AArch64::FP); 00773 MRI->setPhysRegUsed(AArch64::LR); 00774 } 00775 00776 // Spill the BasePtr if it's used. Do this first thing so that the 00777 // getCalleeSavedRegs() below will get the right answer. 00778 if (RegInfo->hasBasePointer(MF)) 00779 MRI->setPhysRegUsed(RegInfo->getBaseRegister()); 00780 00781 // If any callee-saved registers are used, the frame cannot be eliminated. 00782 unsigned NumGPRSpilled = 0; 00783 unsigned NumFPRSpilled = 0; 00784 bool ExtraCSSpill = false; 00785 bool CanEliminateFrame = true; 00786 DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:"); 00787 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 00788 00789 // Check pairs of consecutive callee-saved registers. 00790 for (unsigned i = 0; CSRegs[i]; i += 2) { 00791 assert(CSRegs[i + 1] && "Odd number of callee-saved registers!"); 00792 00793 const unsigned OddReg = CSRegs[i]; 00794 const unsigned EvenReg = CSRegs[i + 1]; 00795 assert((AArch64::GPR64RegClass.contains(OddReg) && 00796 AArch64::GPR64RegClass.contains(EvenReg)) ^ 00797 (AArch64::FPR64RegClass.contains(OddReg) && 00798 AArch64::FPR64RegClass.contains(EvenReg)) && 00799 "Register class mismatch!"); 00800 00801 const bool OddRegUsed = MRI->isPhysRegUsed(OddReg); 00802 const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg); 00803 00804 // Early exit if none of the registers in the register pair is actually 00805 // used. 00806 if (!OddRegUsed && !EvenRegUsed) { 00807 if (AArch64::GPR64RegClass.contains(OddReg)) { 00808 UnspilledCSGPRs.push_back(OddReg); 00809 UnspilledCSGPRs.push_back(EvenReg); 00810 } else { 00811 UnspilledCSFPRs.push_back(OddReg); 00812 UnspilledCSFPRs.push_back(EvenReg); 00813 } 00814 continue; 00815 } 00816 00817 unsigned Reg = AArch64::NoRegister; 00818 // If only one of the registers of the register pair is used, make sure to 00819 // mark the other one as used as well. 00820 if (OddRegUsed ^ EvenRegUsed) { 00821 // Find out which register is the additional spill. 00822 Reg = OddRegUsed ? EvenReg : OddReg; 00823 MRI->setPhysRegUsed(Reg); 00824 } 00825 00826 DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo)); 00827 DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo)); 00828 00829 assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) || 00830 (RegInfo->getEncodingValue(OddReg) + 1 == 00831 RegInfo->getEncodingValue(EvenReg))) && 00832 "Register pair of non-adjacent registers!"); 00833 if (AArch64::GPR64RegClass.contains(OddReg)) { 00834 NumGPRSpilled += 2; 00835 // If it's not a reserved register, we can use it in lieu of an 00836 // emergency spill slot for the register scavenger. 00837 // FIXME: It would be better to instead keep looking and choose another 00838 // unspilled register that isn't reserved, if there is one. 00839 if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg)) 00840 ExtraCSSpill = true; 00841 } else 00842 NumFPRSpilled += 2; 00843 00844 CanEliminateFrame = false; 00845 } 00846 00847 // FIXME: Set BigStack if any stack slot references may be out of range. 00848 // For now, just conservatively guestimate based on unscaled indexing 00849 // range. We'll end up allocating an unnecessary spill slot a lot, but 00850 // realistically that's not a big deal at this stage of the game. 00851 // The CSR spill slots have not been allocated yet, so estimateStackSize 00852 // won't include them. 00853 MachineFrameInfo *MFI = MF.getFrameInfo(); 00854 unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); 00855 DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); 00856 bool BigStack = (CFSize >= 256); 00857 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) 00858 AFI->setHasStackFrame(true); 00859 00860 // Estimate if we might need to scavenge a register at some point in order 00861 // to materialize a stack offset. If so, either spill one additional 00862 // callee-saved register or reserve a special spill slot to facilitate 00863 // register scavenging. If we already spilled an extra callee-saved register 00864 // above to keep the number of spills even, we don't need to do anything else 00865 // here. 00866 if (BigStack && !ExtraCSSpill) { 00867 00868 // If we're adding a register to spill here, we have to add two of them 00869 // to keep the number of regs to spill even. 00870 assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!"); 00871 unsigned Count = 0; 00872 while (!UnspilledCSGPRs.empty() && Count < 2) { 00873 unsigned Reg = UnspilledCSGPRs.back(); 00874 UnspilledCSGPRs.pop_back(); 00875 DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo) 00876 << " to get a scratch register.\n"); 00877 MRI->setPhysRegUsed(Reg); 00878 ExtraCSSpill = true; 00879 ++Count; 00880 } 00881 00882 // If we didn't find an extra callee-saved register to spill, create 00883 // an emergency spill slot. 00884 if (!ExtraCSSpill) { 00885 const TargetRegisterClass *RC = &AArch64::GPR64RegClass; 00886 int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false); 00887 RS->addScavengingFrameIndex(FI); 00888 DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI 00889 << " as the emergency spill slot.\n"); 00890 } 00891 } 00892 }