LLVM API Documentation
00001 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines the interfaces that ARM uses to lower LLVM code into a 00011 // selection DAG. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "ARMISelLowering.h" 00016 #include "ARMCallingConv.h" 00017 #include "ARMConstantPoolValue.h" 00018 #include "ARMMachineFunctionInfo.h" 00019 #include "ARMPerfectShuffle.h" 00020 #include "ARMSubtarget.h" 00021 #include "ARMTargetMachine.h" 00022 #include "ARMTargetObjectFile.h" 00023 #include "MCTargetDesc/ARMAddressingModes.h" 00024 #include "llvm/ADT/Statistic.h" 00025 #include "llvm/ADT/StringExtras.h" 00026 #include "llvm/CodeGen/CallingConvLower.h" 00027 #include "llvm/CodeGen/IntrinsicLowering.h" 00028 #include "llvm/CodeGen/MachineBasicBlock.h" 00029 #include "llvm/CodeGen/MachineFrameInfo.h" 00030 #include "llvm/CodeGen/MachineFunction.h" 00031 #include "llvm/CodeGen/MachineInstrBuilder.h" 00032 #include "llvm/CodeGen/MachineJumpTableInfo.h" 00033 #include "llvm/CodeGen/MachineModuleInfo.h" 00034 #include "llvm/CodeGen/MachineRegisterInfo.h" 00035 #include "llvm/CodeGen/SelectionDAG.h" 00036 #include "llvm/IR/CallingConv.h" 00037 #include "llvm/IR/Constants.h" 00038 #include "llvm/IR/Function.h" 00039 #include "llvm/IR/GlobalValue.h" 00040 #include "llvm/IR/IRBuilder.h" 00041 #include "llvm/IR/Instruction.h" 00042 #include "llvm/IR/Instructions.h" 00043 #include "llvm/IR/Intrinsics.h" 00044 #include "llvm/IR/Type.h" 00045 #include "llvm/MC/MCSectionMachO.h" 00046 #include "llvm/Support/CommandLine.h" 00047 #include "llvm/Support/Debug.h" 00048 #include "llvm/Support/ErrorHandling.h" 00049 #include "llvm/Support/MathExtras.h" 00050 #include "llvm/Target/TargetOptions.h" 00051 #include <utility> 00052 using namespace llvm; 00053 00054 #define DEBUG_TYPE "arm-isel" 00055 00056 STATISTIC(NumTailCalls, "Number of tail calls"); 00057 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); 00058 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); 00059 00060 cl::opt<bool> 00061 EnableARMLongCalls("arm-long-calls", cl::Hidden, 00062 cl::desc("Generate calls via indirect call instructions"), 00063 cl::init(false)); 00064 00065 static cl::opt<bool> 00066 ARMInterworking("arm-interworking", cl::Hidden, 00067 cl::desc("Enable / disable ARM interworking (for debugging only)"), 00068 cl::init(true)); 00069 00070 namespace { 00071 class ARMCCState : public CCState { 00072 public: 00073 ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, 00074 SmallVectorImpl<CCValAssign> &locs, LLVMContext &C, 00075 ParmContext PC) 00076 : CCState(CC, isVarArg, MF, locs, C) { 00077 assert(((PC == Call) || (PC == Prologue)) && 00078 "ARMCCState users must specify whether their context is call" 00079 "or prologue generation."); 00080 CallOrPrologue = PC; 00081 } 00082 }; 00083 } 00084 00085 // The APCS parameter registers. 00086 static const MCPhysReg GPRArgRegs[] = { 00087 ARM::R0, ARM::R1, ARM::R2, ARM::R3 00088 }; 00089 00090 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, 00091 MVT PromotedBitwiseVT) { 00092 if (VT != PromotedLdStVT) { 00093 setOperationAction(ISD::LOAD, VT, Promote); 00094 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); 00095 00096 setOperationAction(ISD::STORE, VT, Promote); 00097 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); 00098 } 00099 00100 MVT ElemTy = VT.getVectorElementType(); 00101 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 00102 setOperationAction(ISD::SETCC, VT, Custom); 00103 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 00104 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 00105 if (ElemTy == MVT::i32) { 00106 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 00107 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 00108 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 00109 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 00110 } else { 00111 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 00112 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 00113 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 00114 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 00115 } 00116 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 00117 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 00118 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); 00119 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 00120 setOperationAction(ISD::SELECT, VT, Expand); 00121 setOperationAction(ISD::SELECT_CC, VT, Expand); 00122 setOperationAction(ISD::VSELECT, VT, Expand); 00123 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 00124 if (VT.isInteger()) { 00125 setOperationAction(ISD::SHL, VT, Custom); 00126 setOperationAction(ISD::SRA, VT, Custom); 00127 setOperationAction(ISD::SRL, VT, Custom); 00128 } 00129 00130 // Promote all bit-wise operations. 00131 if (VT.isInteger() && VT != PromotedBitwiseVT) { 00132 setOperationAction(ISD::AND, VT, Promote); 00133 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); 00134 setOperationAction(ISD::OR, VT, Promote); 00135 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); 00136 setOperationAction(ISD::XOR, VT, Promote); 00137 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); 00138 } 00139 00140 // Neon does not support vector divide/remainder operations. 00141 setOperationAction(ISD::SDIV, VT, Expand); 00142 setOperationAction(ISD::UDIV, VT, Expand); 00143 setOperationAction(ISD::FDIV, VT, Expand); 00144 setOperationAction(ISD::SREM, VT, Expand); 00145 setOperationAction(ISD::UREM, VT, Expand); 00146 setOperationAction(ISD::FREM, VT, Expand); 00147 } 00148 00149 void ARMTargetLowering::addDRTypeForNEON(MVT VT) { 00150 addRegisterClass(VT, &ARM::DPRRegClass); 00151 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 00152 } 00153 00154 void ARMTargetLowering::addQRTypeForNEON(MVT VT) { 00155 addRegisterClass(VT, &ARM::DPairRegClass); 00156 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 00157 } 00158 00159 static TargetLoweringObjectFile *createTLOF(const Triple &TT) { 00160 if (TT.isOSBinFormatMachO()) 00161 return new TargetLoweringObjectFileMachO(); 00162 if (TT.isOSWindows()) 00163 return new TargetLoweringObjectFileCOFF(); 00164 return new ARMElfTargetObjectFile(); 00165 } 00166 00167 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 00168 : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) { 00169 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 00170 RegInfo = TM.getSubtargetImpl()->getRegisterInfo(); 00171 Itins = TM.getSubtargetImpl()->getInstrItineraryData(); 00172 00173 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 00174 00175 if (Subtarget->isTargetMachO()) { 00176 // Uses VFP for Thumb libfuncs if available. 00177 if (Subtarget->isThumb() && Subtarget->hasVFP2() && 00178 Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) { 00179 // Single-precision floating-point arithmetic. 00180 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 00181 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 00182 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 00183 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 00184 00185 // Double-precision floating-point arithmetic. 00186 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 00187 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 00188 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 00189 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 00190 00191 // Single-precision comparisons. 00192 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 00193 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 00194 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 00195 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 00196 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 00197 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 00198 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 00199 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 00200 00201 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 00202 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 00203 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 00204 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 00205 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 00206 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 00207 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 00208 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 00209 00210 // Double-precision comparisons. 00211 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 00212 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 00213 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 00214 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 00215 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 00216 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 00217 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 00218 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 00219 00220 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 00221 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 00222 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 00223 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 00224 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 00225 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 00226 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 00227 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 00228 00229 // Floating-point to integer conversions. 00230 // i64 conversions are done via library routines even when generating VFP 00231 // instructions, so use the same ones. 00232 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 00233 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 00234 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 00235 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 00236 00237 // Conversions between floating types. 00238 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 00239 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 00240 00241 // Integer to floating-point conversions. 00242 // i64 conversions are done via library routines even when generating VFP 00243 // instructions, so use the same ones. 00244 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 00245 // e.g., __floatunsidf vs. __floatunssidfvfp. 00246 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 00247 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 00248 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 00249 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 00250 } 00251 } 00252 00253 // These libcalls are not available in 32-bit. 00254 setLibcallName(RTLIB::SHL_I128, nullptr); 00255 setLibcallName(RTLIB::SRL_I128, nullptr); 00256 setLibcallName(RTLIB::SRA_I128, nullptr); 00257 00258 if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() && 00259 !Subtarget->isTargetWindows()) { 00260 static const struct { 00261 const RTLIB::Libcall Op; 00262 const char * const Name; 00263 const CallingConv::ID CC; 00264 const ISD::CondCode Cond; 00265 } LibraryCalls[] = { 00266 // Double-precision floating-point arithmetic helper functions 00267 // RTABI chapter 4.1.2, Table 2 00268 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00269 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00270 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00271 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00272 00273 // Double-precision floating-point comparison helper functions 00274 // RTABI chapter 4.1.2, Table 3 00275 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, 00276 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, 00277 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, 00278 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, 00279 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, 00280 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, 00281 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, 00282 { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, 00283 00284 // Single-precision floating-point arithmetic helper functions 00285 // RTABI chapter 4.1.2, Table 4 00286 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00287 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00288 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00289 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00290 00291 // Single-precision floating-point comparison helper functions 00292 // RTABI chapter 4.1.2, Table 5 00293 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, 00294 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, 00295 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, 00296 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, 00297 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, 00298 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, 00299 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, 00300 { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, 00301 00302 // Floating-point to integer conversions. 00303 // RTABI chapter 4.1.2, Table 6 00304 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00305 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00306 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00307 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00308 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00309 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00310 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00311 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00312 00313 // Conversions between floating types. 00314 // RTABI chapter 4.1.2, Table 7 00315 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00316 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00317 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00318 00319 // Integer to floating-point conversions. 00320 // RTABI chapter 4.1.2, Table 8 00321 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00322 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00323 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00324 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00325 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00326 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00327 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00328 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00329 00330 // Long long helper functions 00331 // RTABI chapter 4.2, Table 9 00332 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00333 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00334 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00335 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00336 00337 // Integer division functions 00338 // RTABI chapter 4.3.1 00339 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00340 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00341 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00342 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00343 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00344 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00345 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00346 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00347 00348 // Memory operations 00349 // RTABI chapter 4.3.4 00350 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00351 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00352 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 00353 }; 00354 00355 for (const auto &LC : LibraryCalls) { 00356 setLibcallName(LC.Op, LC.Name); 00357 setLibcallCallingConv(LC.Op, LC.CC); 00358 if (LC.Cond != ISD::SETCC_INVALID) 00359 setCmpLibcallCC(LC.Op, LC.Cond); 00360 } 00361 } 00362 00363 if (Subtarget->isTargetWindows()) { 00364 static const struct { 00365 const RTLIB::Libcall Op; 00366 const char * const Name; 00367 const CallingConv::ID CC; 00368 } LibraryCalls[] = { 00369 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, 00370 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, 00371 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, 00372 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, 00373 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, 00374 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, 00375 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, 00376 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, 00377 }; 00378 00379 for (const auto &LC : LibraryCalls) { 00380 setLibcallName(LC.Op, LC.Name); 00381 setLibcallCallingConv(LC.Op, LC.CC); 00382 } 00383 } 00384 00385 // Use divmod compiler-rt calls for iOS 5.0 and later. 00386 if (Subtarget->getTargetTriple().isiOS() && 00387 !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { 00388 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); 00389 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); 00390 } 00391 00392 // The half <-> float conversion functions are always soft-float, but are 00393 // needed for some targets which use a hard-float calling convention by 00394 // default. 00395 if (Subtarget->isAAPCS_ABI()) { 00396 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); 00397 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); 00398 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); 00399 } else { 00400 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); 00401 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); 00402 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); 00403 } 00404 00405 if (Subtarget->isThumb1Only()) 00406 addRegisterClass(MVT::i32, &ARM::tGPRRegClass); 00407 else 00408 addRegisterClass(MVT::i32, &ARM::GPRRegClass); 00409 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 00410 !Subtarget->isThumb1Only()) { 00411 addRegisterClass(MVT::f32, &ARM::SPRRegClass); 00412 addRegisterClass(MVT::f64, &ARM::DPRRegClass); 00413 } 00414 00415 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 00416 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 00417 for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 00418 InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) 00419 setTruncStoreAction((MVT::SimpleValueType)VT, 00420 (MVT::SimpleValueType)InnerVT, Expand); 00421 setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); 00422 setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); 00423 setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); 00424 00425 setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand); 00426 setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); 00427 setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand); 00428 setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); 00429 00430 setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); 00431 } 00432 00433 setOperationAction(ISD::ConstantFP, MVT::f32, Custom); 00434 setOperationAction(ISD::ConstantFP, MVT::f64, Custom); 00435 00436 if (Subtarget->hasNEON()) { 00437 addDRTypeForNEON(MVT::v2f32); 00438 addDRTypeForNEON(MVT::v8i8); 00439 addDRTypeForNEON(MVT::v4i16); 00440 addDRTypeForNEON(MVT::v2i32); 00441 addDRTypeForNEON(MVT::v1i64); 00442 00443 addQRTypeForNEON(MVT::v4f32); 00444 addQRTypeForNEON(MVT::v2f64); 00445 addQRTypeForNEON(MVT::v16i8); 00446 addQRTypeForNEON(MVT::v8i16); 00447 addQRTypeForNEON(MVT::v4i32); 00448 addQRTypeForNEON(MVT::v2i64); 00449 00450 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 00451 // neither Neon nor VFP support any arithmetic operations on it. 00452 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively 00453 // supported for v4f32. 00454 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 00455 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 00456 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 00457 // FIXME: Code duplication: FDIV and FREM are expanded always, see 00458 // ARMTargetLowering::addTypeForNEON method for details. 00459 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 00460 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 00461 // FIXME: Create unittest. 00462 // In another words, find a way when "copysign" appears in DAG with vector 00463 // operands. 00464 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 00465 // FIXME: Code duplication: SETCC has custom operation action, see 00466 // ARMTargetLowering::addTypeForNEON method for details. 00467 setOperationAction(ISD::SETCC, MVT::v2f64, Expand); 00468 // FIXME: Create unittest for FNEG and for FABS. 00469 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 00470 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 00471 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 00472 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 00473 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 00474 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 00475 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 00476 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 00477 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 00478 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 00479 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 00480 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 00481 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. 00482 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 00483 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 00484 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 00485 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 00486 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 00487 setOperationAction(ISD::FMA, MVT::v2f64, Expand); 00488 00489 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 00490 setOperationAction(ISD::FSIN, MVT::v4f32, Expand); 00491 setOperationAction(ISD::FCOS, MVT::v4f32, Expand); 00492 setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); 00493 setOperationAction(ISD::FPOW, MVT::v4f32, Expand); 00494 setOperationAction(ISD::FLOG, MVT::v4f32, Expand); 00495 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); 00496 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); 00497 setOperationAction(ISD::FEXP, MVT::v4f32, Expand); 00498 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); 00499 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); 00500 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); 00501 setOperationAction(ISD::FRINT, MVT::v4f32, Expand); 00502 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); 00503 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); 00504 00505 // Mark v2f32 intrinsics. 00506 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); 00507 setOperationAction(ISD::FSIN, MVT::v2f32, Expand); 00508 setOperationAction(ISD::FCOS, MVT::v2f32, Expand); 00509 setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); 00510 setOperationAction(ISD::FPOW, MVT::v2f32, Expand); 00511 setOperationAction(ISD::FLOG, MVT::v2f32, Expand); 00512 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); 00513 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); 00514 setOperationAction(ISD::FEXP, MVT::v2f32, Expand); 00515 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); 00516 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); 00517 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); 00518 setOperationAction(ISD::FRINT, MVT::v2f32, Expand); 00519 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); 00520 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); 00521 00522 // Neon does not support some operations on v1i64 and v2i64 types. 00523 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 00524 // Custom handling for some quad-vector types to detect VMULL. 00525 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 00526 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 00527 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 00528 // Custom handling for some vector types to avoid expensive expansions 00529 setOperationAction(ISD::SDIV, MVT::v4i16, Custom); 00530 setOperationAction(ISD::SDIV, MVT::v8i8, Custom); 00531 setOperationAction(ISD::UDIV, MVT::v4i16, Custom); 00532 setOperationAction(ISD::UDIV, MVT::v8i8, Custom); 00533 setOperationAction(ISD::SETCC, MVT::v1i64, Expand); 00534 setOperationAction(ISD::SETCC, MVT::v2i64, Expand); 00535 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with 00536 // a destination type that is wider than the source, and nor does 00537 // it have a FP_TO_[SU]INT instruction with a narrower destination than 00538 // source. 00539 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 00540 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 00541 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); 00542 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); 00543 00544 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); 00545 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 00546 00547 // NEON does not have single instruction CTPOP for vectors with element 00548 // types wider than 8-bits. However, custom lowering can leverage the 00549 // v8i8/v16i8 vcnt instruction. 00550 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); 00551 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); 00552 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); 00553 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); 00554 00555 // NEON only has FMA instructions as of VFP4. 00556 if (!Subtarget->hasVFP4()) { 00557 setOperationAction(ISD::FMA, MVT::v2f32, Expand); 00558 setOperationAction(ISD::FMA, MVT::v4f32, Expand); 00559 } 00560 00561 setTargetDAGCombine(ISD::INTRINSIC_VOID); 00562 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 00563 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 00564 setTargetDAGCombine(ISD::SHL); 00565 setTargetDAGCombine(ISD::SRL); 00566 setTargetDAGCombine(ISD::SRA); 00567 setTargetDAGCombine(ISD::SIGN_EXTEND); 00568 setTargetDAGCombine(ISD::ZERO_EXTEND); 00569 setTargetDAGCombine(ISD::ANY_EXTEND); 00570 setTargetDAGCombine(ISD::SELECT_CC); 00571 setTargetDAGCombine(ISD::BUILD_VECTOR); 00572 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 00573 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 00574 setTargetDAGCombine(ISD::STORE); 00575 setTargetDAGCombine(ISD::FP_TO_SINT); 00576 setTargetDAGCombine(ISD::FP_TO_UINT); 00577 setTargetDAGCombine(ISD::FDIV); 00578 00579 // It is legal to extload from v4i8 to v4i16 or v4i32. 00580 MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, 00581 MVT::v4i16, MVT::v2i16, 00582 MVT::v2i32}; 00583 for (unsigned i = 0; i < 6; ++i) { 00584 setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal); 00585 setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal); 00586 setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal); 00587 } 00588 } 00589 00590 // ARM and Thumb2 support UMLAL/SMLAL. 00591 if (!Subtarget->isThumb1Only()) 00592 setTargetDAGCombine(ISD::ADDC); 00593 00594 if (Subtarget->isFPOnlySP()) { 00595 // When targetting a floating-point unit with only single-precision 00596 // operations, f64 is legal for the few double-precision instructions which 00597 // are present However, no double-precision operations other than moves, 00598 // loads and stores are provided by the hardware. 00599 setOperationAction(ISD::FADD, MVT::f64, Expand); 00600 setOperationAction(ISD::FSUB, MVT::f64, Expand); 00601 setOperationAction(ISD::FMUL, MVT::f64, Expand); 00602 setOperationAction(ISD::FMA, MVT::f64, Expand); 00603 setOperationAction(ISD::FDIV, MVT::f64, Expand); 00604 setOperationAction(ISD::FREM, MVT::f64, Expand); 00605 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 00606 setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); 00607 setOperationAction(ISD::FNEG, MVT::f64, Expand); 00608 setOperationAction(ISD::FABS, MVT::f64, Expand); 00609 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 00610 setOperationAction(ISD::FSIN, MVT::f64, Expand); 00611 setOperationAction(ISD::FCOS, MVT::f64, Expand); 00612 setOperationAction(ISD::FPOWI, MVT::f64, Expand); 00613 setOperationAction(ISD::FPOW, MVT::f64, Expand); 00614 setOperationAction(ISD::FLOG, MVT::f64, Expand); 00615 setOperationAction(ISD::FLOG2, MVT::f64, Expand); 00616 setOperationAction(ISD::FLOG10, MVT::f64, Expand); 00617 setOperationAction(ISD::FEXP, MVT::f64, Expand); 00618 setOperationAction(ISD::FEXP2, MVT::f64, Expand); 00619 setOperationAction(ISD::FCEIL, MVT::f64, Expand); 00620 setOperationAction(ISD::FTRUNC, MVT::f64, Expand); 00621 setOperationAction(ISD::FRINT, MVT::f64, Expand); 00622 setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); 00623 setOperationAction(ISD::FFLOOR, MVT::f64, Expand); 00624 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); 00625 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); 00626 } 00627 00628 computeRegisterProperties(); 00629 00630 // ARM does not have floating-point extending loads. 00631 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 00632 setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); 00633 00634 // ... or truncating stores 00635 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 00636 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 00637 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 00638 00639 // ARM does not have i1 sign extending load. 00640 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 00641 00642 // ARM supports all 4 flavors of integer indexed load / store. 00643 if (!Subtarget->isThumb1Only()) { 00644 for (unsigned im = (unsigned)ISD::PRE_INC; 00645 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 00646 setIndexedLoadAction(im, MVT::i1, Legal); 00647 setIndexedLoadAction(im, MVT::i8, Legal); 00648 setIndexedLoadAction(im, MVT::i16, Legal); 00649 setIndexedLoadAction(im, MVT::i32, Legal); 00650 setIndexedStoreAction(im, MVT::i1, Legal); 00651 setIndexedStoreAction(im, MVT::i8, Legal); 00652 setIndexedStoreAction(im, MVT::i16, Legal); 00653 setIndexedStoreAction(im, MVT::i32, Legal); 00654 } 00655 } 00656 00657 setOperationAction(ISD::SADDO, MVT::i32, Custom); 00658 setOperationAction(ISD::UADDO, MVT::i32, Custom); 00659 setOperationAction(ISD::SSUBO, MVT::i32, Custom); 00660 setOperationAction(ISD::USUBO, MVT::i32, Custom); 00661 00662 // i64 operation support. 00663 setOperationAction(ISD::MUL, MVT::i64, Expand); 00664 setOperationAction(ISD::MULHU, MVT::i32, Expand); 00665 if (Subtarget->isThumb1Only()) { 00666 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 00667 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 00668 } 00669 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() 00670 || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP())) 00671 setOperationAction(ISD::MULHS, MVT::i32, Expand); 00672 00673 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 00674 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 00675 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 00676 setOperationAction(ISD::SRL, MVT::i64, Custom); 00677 setOperationAction(ISD::SRA, MVT::i64, Custom); 00678 00679 if (!Subtarget->isThumb1Only()) { 00680 // FIXME: We should do this for Thumb1 as well. 00681 setOperationAction(ISD::ADDC, MVT::i32, Custom); 00682 setOperationAction(ISD::ADDE, MVT::i32, Custom); 00683 setOperationAction(ISD::SUBC, MVT::i32, Custom); 00684 setOperationAction(ISD::SUBE, MVT::i32, Custom); 00685 } 00686 00687 // ARM does not have ROTL. 00688 setOperationAction(ISD::ROTL, MVT::i32, Expand); 00689 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 00690 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 00691 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 00692 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 00693 00694 // These just redirect to CTTZ and CTLZ on ARM. 00695 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); 00696 setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); 00697 00698 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); 00699 00700 // Only ARMv6 has BSWAP. 00701 if (!Subtarget->hasV6Ops()) 00702 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 00703 00704 if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) && 00705 !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) { 00706 // These are expanded into libcalls if the cpu doesn't have HW divider. 00707 setOperationAction(ISD::SDIV, MVT::i32, Expand); 00708 setOperationAction(ISD::UDIV, MVT::i32, Expand); 00709 } 00710 00711 // FIXME: Also set divmod for SREM on EABI 00712 setOperationAction(ISD::SREM, MVT::i32, Expand); 00713 setOperationAction(ISD::UREM, MVT::i32, Expand); 00714 // Register based DivRem for AEABI (RTABI 4.2) 00715 if (Subtarget->isTargetAEABI()) { 00716 setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod"); 00717 setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod"); 00718 setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod"); 00719 setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod"); 00720 setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod"); 00721 setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod"); 00722 setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod"); 00723 setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod"); 00724 00725 setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS); 00726 setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS); 00727 setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS); 00728 setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS); 00729 setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS); 00730 setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS); 00731 setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS); 00732 setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS); 00733 00734 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 00735 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 00736 } else { 00737 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 00738 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 00739 } 00740 00741 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 00742 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 00743 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 00744 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 00745 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 00746 00747 setOperationAction(ISD::TRAP, MVT::Other, Legal); 00748 00749 // Use the default implementation. 00750 setOperationAction(ISD::VASTART, MVT::Other, Custom); 00751 setOperationAction(ISD::VAARG, MVT::Other, Expand); 00752 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 00753 setOperationAction(ISD::VAEND, MVT::Other, Expand); 00754 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 00755 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 00756 00757 if (!Subtarget->isTargetMachO()) { 00758 // Non-MachO platforms may return values in these registers via the 00759 // personality function. 00760 setExceptionPointerRegister(ARM::R0); 00761 setExceptionSelectorRegister(ARM::R1); 00762 } 00763 00764 if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) 00765 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 00766 else 00767 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 00768 00769 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 00770 // the default expansion. If we are targeting a single threaded system, 00771 // then set them all for expand so we can lower them later into their 00772 // non-atomic form. 00773 if (TM.Options.ThreadModel == ThreadModel::Single) 00774 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); 00775 else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { 00776 // ATOMIC_FENCE needs custom lowering; the others should have been expanded 00777 // to ldrex/strex loops already. 00778 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 00779 00780 // On v8, we have particularly efficient implementations of atomic fences 00781 // if they can be combined with nearby atomic loads and stores. 00782 if (!Subtarget->hasV8Ops()) { 00783 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. 00784 setInsertFencesForAtomic(true); 00785 } 00786 } else { 00787 // If there's anything we can use as a barrier, go through custom lowering 00788 // for ATOMIC_FENCE. 00789 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, 00790 Subtarget->hasAnyDataBarrier() ? Custom : Expand); 00791 00792 // Set them all for expansion, which will force libcalls. 00793 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 00794 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 00795 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 00796 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 00797 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 00798 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 00799 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 00800 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 00801 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); 00802 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); 00803 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); 00804 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); 00805 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the 00806 // Unordered/Monotonic case. 00807 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); 00808 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); 00809 } 00810 00811 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 00812 00813 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 00814 if (!Subtarget->hasV6Ops()) { 00815 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 00816 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 00817 } 00818 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 00819 00820 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 00821 !Subtarget->isThumb1Only()) { 00822 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 00823 // iff target supports vfp2. 00824 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 00825 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 00826 } 00827 00828 // We want to custom lower some of our intrinsics. 00829 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 00830 if (Subtarget->isTargetDarwin()) { 00831 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 00832 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 00833 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); 00834 } 00835 00836 setOperationAction(ISD::SETCC, MVT::i32, Expand); 00837 setOperationAction(ISD::SETCC, MVT::f32, Expand); 00838 setOperationAction(ISD::SETCC, MVT::f64, Expand); 00839 setOperationAction(ISD::SELECT, MVT::i32, Custom); 00840 setOperationAction(ISD::SELECT, MVT::f32, Custom); 00841 setOperationAction(ISD::SELECT, MVT::f64, Custom); 00842 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 00843 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 00844 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 00845 00846 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 00847 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 00848 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 00849 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 00850 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 00851 00852 // We don't support sin/cos/fmod/copysign/pow 00853 setOperationAction(ISD::FSIN, MVT::f64, Expand); 00854 setOperationAction(ISD::FSIN, MVT::f32, Expand); 00855 setOperationAction(ISD::FCOS, MVT::f32, Expand); 00856 setOperationAction(ISD::FCOS, MVT::f64, Expand); 00857 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 00858 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 00859 setOperationAction(ISD::FREM, MVT::f64, Expand); 00860 setOperationAction(ISD::FREM, MVT::f32, Expand); 00861 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 00862 !Subtarget->isThumb1Only()) { 00863 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 00864 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 00865 } 00866 setOperationAction(ISD::FPOW, MVT::f64, Expand); 00867 setOperationAction(ISD::FPOW, MVT::f32, Expand); 00868 00869 if (!Subtarget->hasVFP4()) { 00870 setOperationAction(ISD::FMA, MVT::f64, Expand); 00871 setOperationAction(ISD::FMA, MVT::f32, Expand); 00872 } 00873 00874 // Various VFP goodness 00875 if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { 00876 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 00877 if (Subtarget->hasVFP2()) { 00878 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 00879 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 00880 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 00881 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 00882 } 00883 00884 // v8 adds f64 <-> f16 conversion. Before that it should be expanded. 00885 if (!Subtarget->hasV8Ops()) { 00886 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); 00887 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); 00888 } 00889 00890 // fp16 is a special v7 extension that adds f16 <-> f32 conversions. 00891 if (!Subtarget->hasFP16()) { 00892 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); 00893 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); 00894 } 00895 } 00896 00897 // Combine sin / cos into one node or libcall if possible. 00898 if (Subtarget->hasSinCos()) { 00899 setLibcallName(RTLIB::SINCOS_F32, "sincosf"); 00900 setLibcallName(RTLIB::SINCOS_F64, "sincos"); 00901 if (Subtarget->getTargetTriple().getOS() == Triple::IOS) { 00902 // For iOS, we don't want to the normal expansion of a libcall to 00903 // sincos. We want to issue a libcall to __sincos_stret. 00904 setOperationAction(ISD::FSINCOS, MVT::f64, Custom); 00905 setOperationAction(ISD::FSINCOS, MVT::f32, Custom); 00906 } 00907 } 00908 00909 // ARMv8 implements a lot of rounding-like FP operations. 00910 if (Subtarget->hasV8Ops()) { 00911 static MVT RoundingTypes[] = {MVT::f32, MVT::f64}; 00912 for (const auto Ty : RoundingTypes) { 00913 setOperationAction(ISD::FFLOOR, Ty, Legal); 00914 setOperationAction(ISD::FCEIL, Ty, Legal); 00915 setOperationAction(ISD::FROUND, Ty, Legal); 00916 setOperationAction(ISD::FTRUNC, Ty, Legal); 00917 setOperationAction(ISD::FNEARBYINT, Ty, Legal); 00918 setOperationAction(ISD::FRINT, Ty, Legal); 00919 } 00920 } 00921 // We have target-specific dag combine patterns for the following nodes: 00922 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 00923 setTargetDAGCombine(ISD::ADD); 00924 setTargetDAGCombine(ISD::SUB); 00925 setTargetDAGCombine(ISD::MUL); 00926 setTargetDAGCombine(ISD::AND); 00927 setTargetDAGCombine(ISD::OR); 00928 setTargetDAGCombine(ISD::XOR); 00929 00930 if (Subtarget->hasV6Ops()) 00931 setTargetDAGCombine(ISD::SRL); 00932 00933 setStackPointerRegisterToSaveRestore(ARM::SP); 00934 00935 if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() || 00936 !Subtarget->hasVFP2()) 00937 setSchedulingPreference(Sched::RegPressure); 00938 else 00939 setSchedulingPreference(Sched::Hybrid); 00940 00941 //// temporary - rewrite interface to use type 00942 MaxStoresPerMemset = 8; 00943 MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; 00944 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores 00945 MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; 00946 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores 00947 MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; 00948 00949 // On ARM arguments smaller than 4 bytes are extended, so all arguments 00950 // are at least 4 bytes aligned. 00951 setMinStackArgumentAlignment(4); 00952 00953 // Prefer likely predicted branches to selects on out-of-order cores. 00954 PredictableSelectIsExpensive = Subtarget->isLikeA9(); 00955 00956 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); 00957 } 00958 00959 // FIXME: It might make sense to define the representative register class as the 00960 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is 00961 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, 00962 // SPR's representative would be DPR_VFP2. This should work well if register 00963 // pressure tracking were modified such that a register use would increment the 00964 // pressure of the register class's representative and all of it's super 00965 // classes' representatives transitively. We have not implemented this because 00966 // of the difficulty prior to coalescing of modeling operand register classes 00967 // due to the common occurrence of cross class copies and subregister insertions 00968 // and extractions. 00969 std::pair<const TargetRegisterClass*, uint8_t> 00970 ARMTargetLowering::findRepresentativeClass(MVT VT) const{ 00971 const TargetRegisterClass *RRC = nullptr; 00972 uint8_t Cost = 1; 00973 switch (VT.SimpleTy) { 00974 default: 00975 return TargetLowering::findRepresentativeClass(VT); 00976 // Use DPR as representative register class for all floating point 00977 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 00978 // the cost is 1 for both f32 and f64. 00979 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 00980 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 00981 RRC = &ARM::DPRRegClass; 00982 // When NEON is used for SP, only half of the register file is available 00983 // because operations that define both SP and DP results will be constrained 00984 // to the VFP2 class (D0-D15). We currently model this constraint prior to 00985 // coalescing by double-counting the SP regs. See the FIXME above. 00986 if (Subtarget->useNEONForSinglePrecisionFP()) 00987 Cost = 2; 00988 break; 00989 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 00990 case MVT::v4f32: case MVT::v2f64: 00991 RRC = &ARM::DPRRegClass; 00992 Cost = 2; 00993 break; 00994 case MVT::v4i64: 00995 RRC = &ARM::DPRRegClass; 00996 Cost = 4; 00997 break; 00998 case MVT::v8i64: 00999 RRC = &ARM::DPRRegClass; 01000 Cost = 8; 01001 break; 01002 } 01003 return std::make_pair(RRC, Cost); 01004 } 01005 01006 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 01007 switch (Opcode) { 01008 default: return nullptr; 01009 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 01010 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; 01011 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 01012 case ARMISD::CALL: return "ARMISD::CALL"; 01013 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 01014 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 01015 case ARMISD::tCALL: return "ARMISD::tCALL"; 01016 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 01017 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 01018 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 01019 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 01020 case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG"; 01021 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 01022 case ARMISD::CMP: return "ARMISD::CMP"; 01023 case ARMISD::CMN: return "ARMISD::CMN"; 01024 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 01025 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 01026 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 01027 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 01028 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 01029 01030 case ARMISD::CMOV: return "ARMISD::CMOV"; 01031 01032 case ARMISD::RBIT: return "ARMISD::RBIT"; 01033 01034 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 01035 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 01036 case ARMISD::SITOF: return "ARMISD::SITOF"; 01037 case ARMISD::UITOF: return "ARMISD::UITOF"; 01038 01039 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 01040 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 01041 case ARMISD::RRX: return "ARMISD::RRX"; 01042 01043 case ARMISD::ADDC: return "ARMISD::ADDC"; 01044 case ARMISD::ADDE: return "ARMISD::ADDE"; 01045 case ARMISD::SUBC: return "ARMISD::SUBC"; 01046 case ARMISD::SUBE: return "ARMISD::SUBE"; 01047 01048 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 01049 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 01050 01051 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 01052 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 01053 01054 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 01055 01056 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 01057 01058 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 01059 01060 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; 01061 01062 case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; 01063 01064 case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK"; 01065 01066 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 01067 case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; 01068 case ARMISD::VCGE: return "ARMISD::VCGE"; 01069 case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; 01070 case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; 01071 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 01072 case ARMISD::VCGT: return "ARMISD::VCGT"; 01073 case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; 01074 case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; 01075 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 01076 case ARMISD::VTST: return "ARMISD::VTST"; 01077 01078 case ARMISD::VSHL: return "ARMISD::VSHL"; 01079 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 01080 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 01081 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 01082 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 01083 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 01084 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 01085 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 01086 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 01087 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 01088 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 01089 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 01090 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 01091 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 01092 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 01093 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 01094 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 01095 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 01096 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 01097 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; 01098 case ARMISD::VDUP: return "ARMISD::VDUP"; 01099 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 01100 case ARMISD::VEXT: return "ARMISD::VEXT"; 01101 case ARMISD::VREV64: return "ARMISD::VREV64"; 01102 case ARMISD::VREV32: return "ARMISD::VREV32"; 01103 case ARMISD::VREV16: return "ARMISD::VREV16"; 01104 case ARMISD::VZIP: return "ARMISD::VZIP"; 01105 case ARMISD::VUZP: return "ARMISD::VUZP"; 01106 case ARMISD::VTRN: return "ARMISD::VTRN"; 01107 case ARMISD::VTBL1: return "ARMISD::VTBL1"; 01108 case ARMISD::VTBL2: return "ARMISD::VTBL2"; 01109 case ARMISD::VMULLs: return "ARMISD::VMULLs"; 01110 case ARMISD::VMULLu: return "ARMISD::VMULLu"; 01111 case ARMISD::UMLAL: return "ARMISD::UMLAL"; 01112 case ARMISD::SMLAL: return "ARMISD::SMLAL"; 01113 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 01114 case ARMISD::FMAX: return "ARMISD::FMAX"; 01115 case ARMISD::FMIN: return "ARMISD::FMIN"; 01116 case ARMISD::VMAXNM: return "ARMISD::VMAX"; 01117 case ARMISD::VMINNM: return "ARMISD::VMIN"; 01118 case ARMISD::BFI: return "ARMISD::BFI"; 01119 case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; 01120 case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; 01121 case ARMISD::VBSL: return "ARMISD::VBSL"; 01122 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; 01123 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; 01124 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; 01125 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; 01126 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; 01127 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; 01128 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; 01129 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; 01130 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; 01131 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; 01132 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; 01133 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; 01134 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; 01135 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; 01136 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; 01137 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; 01138 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; 01139 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; 01140 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; 01141 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; 01142 } 01143 } 01144 01145 EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 01146 if (!VT.isVector()) return getPointerTy(); 01147 return VT.changeVectorElementTypeToInteger(); 01148 } 01149 01150 /// getRegClassFor - Return the register class that should be used for the 01151 /// specified value type. 01152 const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { 01153 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 01154 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 01155 // load / store 4 to 8 consecutive D registers. 01156 if (Subtarget->hasNEON()) { 01157 if (VT == MVT::v4i64) 01158 return &ARM::QQPRRegClass; 01159 if (VT == MVT::v8i64) 01160 return &ARM::QQQQPRRegClass; 01161 } 01162 return TargetLowering::getRegClassFor(VT); 01163 } 01164 01165 // Create a fast isel object. 01166 FastISel * 01167 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, 01168 const TargetLibraryInfo *libInfo) const { 01169 return ARM::createFastISel(funcInfo, libInfo); 01170 } 01171 01172 /// getMaximalGlobalOffset - Returns the maximal possible offset which can 01173 /// be used for loads / stores from the global. 01174 unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 01175 return (Subtarget->isThumb1Only() ? 127 : 4095); 01176 } 01177 01178 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 01179 unsigned NumVals = N->getNumValues(); 01180 if (!NumVals) 01181 return Sched::RegPressure; 01182 01183 for (unsigned i = 0; i != NumVals; ++i) { 01184 EVT VT = N->getValueType(i); 01185 if (VT == MVT::Glue || VT == MVT::Other) 01186 continue; 01187 if (VT.isFloatingPoint() || VT.isVector()) 01188 return Sched::ILP; 01189 } 01190 01191 if (!N->isMachineOpcode()) 01192 return Sched::RegPressure; 01193 01194 // Load are scheduled for latency even if there instruction itinerary 01195 // is not available. 01196 const TargetInstrInfo *TII = 01197 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 01198 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 01199 01200 if (MCID.getNumDefs() == 0) 01201 return Sched::RegPressure; 01202 if (!Itins->isEmpty() && 01203 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) 01204 return Sched::ILP; 01205 01206 return Sched::RegPressure; 01207 } 01208 01209 //===----------------------------------------------------------------------===// 01210 // Lowering Code 01211 //===----------------------------------------------------------------------===// 01212 01213 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 01214 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 01215 switch (CC) { 01216 default: llvm_unreachable("Unknown condition code!"); 01217 case ISD::SETNE: return ARMCC::NE; 01218 case ISD::SETEQ: return ARMCC::EQ; 01219 case ISD::SETGT: return ARMCC::GT; 01220 case ISD::SETGE: return ARMCC::GE; 01221 case ISD::SETLT: return ARMCC::LT; 01222 case ISD::SETLE: return ARMCC::LE; 01223 case ISD::SETUGT: return ARMCC::HI; 01224 case ISD::SETUGE: return ARMCC::HS; 01225 case ISD::SETULT: return ARMCC::LO; 01226 case ISD::SETULE: return ARMCC::LS; 01227 } 01228 } 01229 01230 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 01231 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 01232 ARMCC::CondCodes &CondCode2) { 01233 CondCode2 = ARMCC::AL; 01234 switch (CC) { 01235 default: llvm_unreachable("Unknown FP condition!"); 01236 case ISD::SETEQ: 01237 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 01238 case ISD::SETGT: 01239 case ISD::SETOGT: CondCode = ARMCC::GT; break; 01240 case ISD::SETGE: 01241 case ISD::SETOGE: CondCode = ARMCC::GE; break; 01242 case ISD::SETOLT: CondCode = ARMCC::MI; break; 01243 case ISD::SETOLE: CondCode = ARMCC::LS; break; 01244 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 01245 case ISD::SETO: CondCode = ARMCC::VC; break; 01246 case ISD::SETUO: CondCode = ARMCC::VS; break; 01247 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 01248 case ISD::SETUGT: CondCode = ARMCC::HI; break; 01249 case ISD::SETUGE: CondCode = ARMCC::PL; break; 01250 case ISD::SETLT: 01251 case ISD::SETULT: CondCode = ARMCC::LT; break; 01252 case ISD::SETLE: 01253 case ISD::SETULE: CondCode = ARMCC::LE; break; 01254 case ISD::SETNE: 01255 case ISD::SETUNE: CondCode = ARMCC::NE; break; 01256 } 01257 } 01258 01259 //===----------------------------------------------------------------------===// 01260 // Calling Convention Implementation 01261 //===----------------------------------------------------------------------===// 01262 01263 #include "ARMGenCallingConv.inc" 01264 01265 /// getEffectiveCallingConv - Get the effective calling convention, taking into 01266 /// account presence of floating point hardware and calling convention 01267 /// limitations, such as support for variadic functions. 01268 CallingConv::ID 01269 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, 01270 bool isVarArg) const { 01271 switch (CC) { 01272 default: 01273 llvm_unreachable("Unsupported calling convention"); 01274 case CallingConv::ARM_AAPCS: 01275 case CallingConv::ARM_APCS: 01276 case CallingConv::GHC: 01277 return CC; 01278 case CallingConv::ARM_AAPCS_VFP: 01279 return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; 01280 case CallingConv::C: 01281 if (!Subtarget->isAAPCS_ABI()) 01282 return CallingConv::ARM_APCS; 01283 else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && 01284 getTargetMachine().Options.FloatABIType == FloatABI::Hard && 01285 !isVarArg) 01286 return CallingConv::ARM_AAPCS_VFP; 01287 else 01288 return CallingConv::ARM_AAPCS; 01289 case CallingConv::Fast: 01290 if (!Subtarget->isAAPCS_ABI()) { 01291 if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) 01292 return CallingConv::Fast; 01293 return CallingConv::ARM_APCS; 01294 } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) 01295 return CallingConv::ARM_AAPCS_VFP; 01296 else 01297 return CallingConv::ARM_AAPCS; 01298 } 01299 } 01300 01301 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given 01302 /// CallingConvention. 01303 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 01304 bool Return, 01305 bool isVarArg) const { 01306 switch (getEffectiveCallingConv(CC, isVarArg)) { 01307 default: 01308 llvm_unreachable("Unsupported calling convention"); 01309 case CallingConv::ARM_APCS: 01310 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 01311 case CallingConv::ARM_AAPCS: 01312 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 01313 case CallingConv::ARM_AAPCS_VFP: 01314 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 01315 case CallingConv::Fast: 01316 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); 01317 case CallingConv::GHC: 01318 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); 01319 } 01320 } 01321 01322 /// LowerCallResult - Lower the result values of a call into the 01323 /// appropriate copies out of appropriate physical registers. 01324 SDValue 01325 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 01326 CallingConv::ID CallConv, bool isVarArg, 01327 const SmallVectorImpl<ISD::InputArg> &Ins, 01328 SDLoc dl, SelectionDAG &DAG, 01329 SmallVectorImpl<SDValue> &InVals, 01330 bool isThisReturn, SDValue ThisVal) const { 01331 01332 // Assign locations to each value returned by this call. 01333 SmallVector<CCValAssign, 16> RVLocs; 01334 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 01335 *DAG.getContext(), Call); 01336 CCInfo.AnalyzeCallResult(Ins, 01337 CCAssignFnForNode(CallConv, /* Return*/ true, 01338 isVarArg)); 01339 01340 // Copy all of the result registers out of their specified physreg. 01341 for (unsigned i = 0; i != RVLocs.size(); ++i) { 01342 CCValAssign VA = RVLocs[i]; 01343 01344 // Pass 'this' value directly from the argument to return value, to avoid 01345 // reg unit interference 01346 if (i == 0 && isThisReturn) { 01347 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && 01348 "unexpected return calling convention register assignment"); 01349 InVals.push_back(ThisVal); 01350 continue; 01351 } 01352 01353 SDValue Val; 01354 if (VA.needsCustom()) { 01355 // Handle f64 or half of a v2f64. 01356 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 01357 InFlag); 01358 Chain = Lo.getValue(1); 01359 InFlag = Lo.getValue(2); 01360 VA = RVLocs[++i]; // skip ahead to next loc 01361 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 01362 InFlag); 01363 Chain = Hi.getValue(1); 01364 InFlag = Hi.getValue(2); 01365 if (!Subtarget->isLittle()) 01366 std::swap (Lo, Hi); 01367 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 01368 01369 if (VA.getLocVT() == MVT::v2f64) { 01370 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 01371 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 01372 DAG.getConstant(0, MVT::i32)); 01373 01374 VA = RVLocs[++i]; // skip ahead to next loc 01375 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 01376 Chain = Lo.getValue(1); 01377 InFlag = Lo.getValue(2); 01378 VA = RVLocs[++i]; // skip ahead to next loc 01379 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 01380 Chain = Hi.getValue(1); 01381 InFlag = Hi.getValue(2); 01382 if (!Subtarget->isLittle()) 01383 std::swap (Lo, Hi); 01384 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 01385 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 01386 DAG.getConstant(1, MVT::i32)); 01387 } 01388 } else { 01389 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 01390 InFlag); 01391 Chain = Val.getValue(1); 01392 InFlag = Val.getValue(2); 01393 } 01394 01395 switch (VA.getLocInfo()) { 01396 default: llvm_unreachable("Unknown loc info!"); 01397 case CCValAssign::Full: break; 01398 case CCValAssign::BCvt: 01399 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); 01400 break; 01401 } 01402 01403 InVals.push_back(Val); 01404 } 01405 01406 return Chain; 01407 } 01408 01409 /// LowerMemOpCallTo - Store the argument to the stack. 01410 SDValue 01411 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 01412 SDValue StackPtr, SDValue Arg, 01413 SDLoc dl, SelectionDAG &DAG, 01414 const CCValAssign &VA, 01415 ISD::ArgFlagsTy Flags) const { 01416 unsigned LocMemOffset = VA.getLocMemOffset(); 01417 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 01418 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 01419 return DAG.getStore(Chain, dl, Arg, PtrOff, 01420 MachinePointerInfo::getStack(LocMemOffset), 01421 false, false, 0); 01422 } 01423 01424 void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, 01425 SDValue Chain, SDValue &Arg, 01426 RegsToPassVector &RegsToPass, 01427 CCValAssign &VA, CCValAssign &NextVA, 01428 SDValue &StackPtr, 01429 SmallVectorImpl<SDValue> &MemOpChains, 01430 ISD::ArgFlagsTy Flags) const { 01431 01432 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 01433 DAG.getVTList(MVT::i32, MVT::i32), Arg); 01434 unsigned id = Subtarget->isLittle() ? 0 : 1; 01435 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); 01436 01437 if (NextVA.isRegLoc()) 01438 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); 01439 else { 01440 assert(NextVA.isMemLoc()); 01441 if (!StackPtr.getNode()) 01442 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 01443 01444 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id), 01445 dl, DAG, NextVA, 01446 Flags)); 01447 } 01448 } 01449 01450 /// LowerCall - Lowering a call into a callseq_start <- 01451 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 01452 /// nodes. 01453 SDValue 01454 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 01455 SmallVectorImpl<SDValue> &InVals) const { 01456 SelectionDAG &DAG = CLI.DAG; 01457 SDLoc &dl = CLI.DL; 01458 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 01459 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 01460 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 01461 SDValue Chain = CLI.Chain; 01462 SDValue Callee = CLI.Callee; 01463 bool &isTailCall = CLI.IsTailCall; 01464 CallingConv::ID CallConv = CLI.CallConv; 01465 bool doesNotRet = CLI.DoesNotReturn; 01466 bool isVarArg = CLI.IsVarArg; 01467 01468 MachineFunction &MF = DAG.getMachineFunction(); 01469 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 01470 bool isThisReturn = false; 01471 bool isSibCall = false; 01472 01473 // Disable tail calls if they're not supported. 01474 if (!Subtarget->supportsTailCall() || MF.getTarget().Options.DisableTailCalls) 01475 isTailCall = false; 01476 01477 if (isTailCall) { 01478 // Check if it's really possible to do a tail call. 01479 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 01480 isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), 01481 Outs, OutVals, Ins, DAG); 01482 if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) 01483 report_fatal_error("failed to perform tail call elimination on a call " 01484 "site marked musttail"); 01485 // We don't support GuaranteedTailCallOpt for ARM, only automatically 01486 // detected sibcalls. 01487 if (isTailCall) { 01488 ++NumTailCalls; 01489 isSibCall = true; 01490 } 01491 } 01492 01493 // Analyze operands of the call, assigning locations to each operand. 01494 SmallVector<CCValAssign, 16> ArgLocs; 01495 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 01496 *DAG.getContext(), Call); 01497 CCInfo.AnalyzeCallOperands(Outs, 01498 CCAssignFnForNode(CallConv, /* Return*/ false, 01499 isVarArg)); 01500 01501 // Get a count of how many bytes are to be pushed on the stack. 01502 unsigned NumBytes = CCInfo.getNextStackOffset(); 01503 01504 // For tail calls, memory operands are available in our caller's stack. 01505 if (isSibCall) 01506 NumBytes = 0; 01507 01508 // Adjust the stack pointer for the new arguments... 01509 // These operations are automatically eliminated by the prolog/epilog pass 01510 if (!isSibCall) 01511 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 01512 dl); 01513 01514 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 01515 01516 RegsToPassVector RegsToPass; 01517 SmallVector<SDValue, 8> MemOpChains; 01518 01519 // Walk the register/memloc assignments, inserting copies/loads. In the case 01520 // of tail call optimization, arguments are handled later. 01521 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 01522 i != e; 01523 ++i, ++realArgIdx) { 01524 CCValAssign &VA = ArgLocs[i]; 01525 SDValue Arg = OutVals[realArgIdx]; 01526 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 01527 bool isByVal = Flags.isByVal(); 01528 01529 // Promote the value if needed. 01530 switch (VA.getLocInfo()) { 01531 default: llvm_unreachable("Unknown loc info!"); 01532 case CCValAssign::Full: break; 01533 case CCValAssign::SExt: 01534 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 01535 break; 01536 case CCValAssign::ZExt: 01537 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 01538 break; 01539 case CCValAssign::AExt: 01540 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 01541 break; 01542 case CCValAssign::BCvt: 01543 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 01544 break; 01545 } 01546 01547 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 01548 if (VA.needsCustom()) { 01549 if (VA.getLocVT() == MVT::v2f64) { 01550 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 01551 DAG.getConstant(0, MVT::i32)); 01552 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 01553 DAG.getConstant(1, MVT::i32)); 01554 01555 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 01556 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 01557 01558 VA = ArgLocs[++i]; // skip ahead to next loc 01559 if (VA.isRegLoc()) { 01560 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 01561 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 01562 } else { 01563 assert(VA.isMemLoc()); 01564 01565 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 01566 dl, DAG, VA, Flags)); 01567 } 01568 } else { 01569 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 01570 StackPtr, MemOpChains, Flags); 01571 } 01572 } else if (VA.isRegLoc()) { 01573 if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) { 01574 assert(VA.getLocVT() == MVT::i32 && 01575 "unexpected calling convention register assignment"); 01576 assert(!Ins.empty() && Ins[0].VT == MVT::i32 && 01577 "unexpected use of 'returned'"); 01578 isThisReturn = true; 01579 } 01580 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 01581 } else if (isByVal) { 01582 assert(VA.isMemLoc()); 01583 unsigned offset = 0; 01584 01585 // True if this byval aggregate will be split between registers 01586 // and memory. 01587 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); 01588 unsigned CurByValIdx = CCInfo.getInRegsParamsProceed(); 01589 01590 if (CurByValIdx < ByValArgsCount) { 01591 01592 unsigned RegBegin, RegEnd; 01593 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); 01594 01595 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 01596 unsigned int i, j; 01597 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { 01598 SDValue Const = DAG.getConstant(4*i, MVT::i32); 01599 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 01600 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 01601 MachinePointerInfo(), 01602 false, false, false, 01603 DAG.InferPtrAlignment(AddArg)); 01604 MemOpChains.push_back(Load.getValue(1)); 01605 RegsToPass.push_back(std::make_pair(j, Load)); 01606 } 01607 01608 // If parameter size outsides register area, "offset" value 01609 // helps us to calculate stack slot for remained part properly. 01610 offset = RegEnd - RegBegin; 01611 01612 CCInfo.nextInRegsParam(); 01613 } 01614 01615 if (Flags.getByValSize() > 4*offset) { 01616 unsigned LocMemOffset = VA.getLocMemOffset(); 01617 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); 01618 SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, 01619 StkPtrOff); 01620 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); 01621 SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); 01622 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, 01623 MVT::i32); 01624 SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); 01625 01626 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 01627 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; 01628 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, 01629 Ops)); 01630 } 01631 } else if (!isSibCall) { 01632 assert(VA.isMemLoc()); 01633 01634 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 01635 dl, DAG, VA, Flags)); 01636 } 01637 } 01638 01639 if (!MemOpChains.empty()) 01640 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 01641 01642 // Build a sequence of copy-to-reg nodes chained together with token chain 01643 // and flag operands which copy the outgoing args into the appropriate regs. 01644 SDValue InFlag; 01645 // Tail call byval lowering might overwrite argument registers so in case of 01646 // tail call optimization the copies to registers are lowered later. 01647 if (!isTailCall) 01648 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 01649 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 01650 RegsToPass[i].second, InFlag); 01651 InFlag = Chain.getValue(1); 01652 } 01653 01654 // For tail calls lower the arguments to the 'real' stack slot. 01655 if (isTailCall) { 01656 // Force all the incoming stack arguments to be loaded from the stack 01657 // before any new outgoing arguments are stored to the stack, because the 01658 // outgoing stack slots may alias the incoming argument stack slots, and 01659 // the alias isn't otherwise explicit. This is slightly more conservative 01660 // than necessary, because it means that each store effectively depends 01661 // on every argument instead of just those arguments it would clobber. 01662 01663 // Do not flag preceding copytoreg stuff together with the following stuff. 01664 InFlag = SDValue(); 01665 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 01666 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 01667 RegsToPass[i].second, InFlag); 01668 InFlag = Chain.getValue(1); 01669 } 01670 InFlag = SDValue(); 01671 } 01672 01673 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 01674 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 01675 // node so that legalize doesn't hack it. 01676 bool isDirect = false; 01677 bool isARMFunc = false; 01678 bool isLocalARMFunc = false; 01679 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 01680 01681 if (EnableARMLongCalls) { 01682 assert((Subtarget->isTargetWindows() || 01683 getTargetMachine().getRelocationModel() == Reloc::Static) && 01684 "long-calls with non-static relocation model!"); 01685 // Handle a global address or an external symbol. If it's not one of 01686 // those, the target's already in a register, so we don't need to do 01687 // anything extra. 01688 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 01689 const GlobalValue *GV = G->getGlobal(); 01690 // Create a constant pool entry for the callee address 01691 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 01692 ARMConstantPoolValue *CPV = 01693 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); 01694 01695 // Get the address of the callee into a register 01696 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 01697 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 01698 Callee = DAG.getLoad(getPointerTy(), dl, 01699 DAG.getEntryNode(), CPAddr, 01700 MachinePointerInfo::getConstantPool(), 01701 false, false, false, 0); 01702 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 01703 const char *Sym = S->getSymbol(); 01704 01705 // Create a constant pool entry for the callee address 01706 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 01707 ARMConstantPoolValue *CPV = 01708 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 01709 ARMPCLabelIndex, 0); 01710 // Get the address of the callee into a register 01711 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 01712 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 01713 Callee = DAG.getLoad(getPointerTy(), dl, 01714 DAG.getEntryNode(), CPAddr, 01715 MachinePointerInfo::getConstantPool(), 01716 false, false, false, 0); 01717 } 01718 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 01719 const GlobalValue *GV = G->getGlobal(); 01720 isDirect = true; 01721 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 01722 bool isStub = (isExt && Subtarget->isTargetMachO()) && 01723 getTargetMachine().getRelocationModel() != Reloc::Static; 01724 isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); 01725 // ARM call to a local ARM function is predicable. 01726 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 01727 // tBX takes a register source operand. 01728 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 01729 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); 01730 Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(), 01731 DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 01732 0, ARMII::MO_NONLAZY)); 01733 Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, 01734 MachinePointerInfo::getGOT(), false, false, true, 0); 01735 } else if (Subtarget->isTargetCOFF()) { 01736 assert(Subtarget->isTargetWindows() && 01737 "Windows is the only supported COFF target"); 01738 unsigned TargetFlags = GV->hasDLLImportStorageClass() 01739 ? ARMII::MO_DLLIMPORT 01740 : ARMII::MO_NO_FLAG; 01741 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0, 01742 TargetFlags); 01743 if (GV->hasDLLImportStorageClass()) 01744 Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 01745 DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(), 01746 Callee), MachinePointerInfo::getGOT(), 01747 false, false, false, 0); 01748 } else { 01749 // On ELF targets for PIC code, direct calls should go through the PLT 01750 unsigned OpFlags = 0; 01751 if (Subtarget->isTargetELF() && 01752 getTargetMachine().getRelocationModel() == Reloc::PIC_) 01753 OpFlags = ARMII::MO_PLT; 01754 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); 01755 } 01756 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 01757 isDirect = true; 01758 bool isStub = Subtarget->isTargetMachO() && 01759 getTargetMachine().getRelocationModel() != Reloc::Static; 01760 isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); 01761 // tBX takes a register source operand. 01762 const char *Sym = S->getSymbol(); 01763 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 01764 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 01765 ARMConstantPoolValue *CPV = 01766 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 01767 ARMPCLabelIndex, 4); 01768 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 01769 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 01770 Callee = DAG.getLoad(getPointerTy(), dl, 01771 DAG.getEntryNode(), CPAddr, 01772 MachinePointerInfo::getConstantPool(), 01773 false, false, false, 0); 01774 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 01775 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 01776 getPointerTy(), Callee, PICLabel); 01777 } else { 01778 unsigned OpFlags = 0; 01779 // On ELF targets for PIC code, direct calls should go through the PLT 01780 if (Subtarget->isTargetELF() && 01781 getTargetMachine().getRelocationModel() == Reloc::PIC_) 01782 OpFlags = ARMII::MO_PLT; 01783 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); 01784 } 01785 } 01786 01787 // FIXME: handle tail calls differently. 01788 unsigned CallOpc; 01789 bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute( 01790 AttributeSet::FunctionIndex, Attribute::MinSize); 01791 if (Subtarget->isThumb()) { 01792 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 01793 CallOpc = ARMISD::CALL_NOLINK; 01794 else 01795 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 01796 } else { 01797 if (!isDirect && !Subtarget->hasV5TOps()) 01798 CallOpc = ARMISD::CALL_NOLINK; 01799 else if (doesNotRet && isDirect && Subtarget->hasRAS() && 01800 // Emit regular call when code size is the priority 01801 !HasMinSizeAttr) 01802 // "mov lr, pc; b _foo" to avoid confusing the RSP 01803 CallOpc = ARMISD::CALL_NOLINK; 01804 else 01805 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; 01806 } 01807 01808 std::vector<SDValue> Ops; 01809 Ops.push_back(Chain); 01810 Ops.push_back(Callee); 01811 01812 // Add argument registers to the end of the list so that they are known live 01813 // into the call. 01814 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 01815 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 01816 RegsToPass[i].second.getValueType())); 01817 01818 // Add a register mask operand representing the call-preserved registers. 01819 if (!isTailCall) { 01820 const uint32_t *Mask; 01821 const TargetRegisterInfo *TRI = 01822 getTargetMachine().getSubtargetImpl()->getRegisterInfo(); 01823 const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI); 01824 if (isThisReturn) { 01825 // For 'this' returns, use the R0-preserving mask if applicable 01826 Mask = ARI->getThisReturnPreservedMask(CallConv); 01827 if (!Mask) { 01828 // Set isThisReturn to false if the calling convention is not one that 01829 // allows 'returned' to be modeled in this way, so LowerCallResult does 01830 // not try to pass 'this' straight through 01831 isThisReturn = false; 01832 Mask = ARI->getCallPreservedMask(CallConv); 01833 } 01834 } else 01835 Mask = ARI->getCallPreservedMask(CallConv); 01836 01837 assert(Mask && "Missing call preserved mask for calling convention"); 01838 Ops.push_back(DAG.getRegisterMask(Mask)); 01839 } 01840 01841 if (InFlag.getNode()) 01842 Ops.push_back(InFlag); 01843 01844 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 01845 if (isTailCall) 01846 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); 01847 01848 // Returns a chain and a flag for retval copy to use. 01849 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); 01850 InFlag = Chain.getValue(1); 01851 01852 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 01853 DAG.getIntPtrConstant(0, true), InFlag, dl); 01854 if (!Ins.empty()) 01855 InFlag = Chain.getValue(1); 01856 01857 // Handle result values, copying them out of physregs into vregs that we 01858 // return. 01859 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, 01860 InVals, isThisReturn, 01861 isThisReturn ? OutVals[0] : SDValue()); 01862 } 01863 01864 /// HandleByVal - Every parameter *after* a byval parameter is passed 01865 /// on the stack. Remember the next parameter register to allocate, 01866 /// and then confiscate the rest of the parameter registers to insure 01867 /// this. 01868 void 01869 ARMTargetLowering::HandleByVal( 01870 CCState *State, unsigned &size, unsigned Align) const { 01871 unsigned reg = State->AllocateReg(GPRArgRegs, 4); 01872 assert((State->getCallOrPrologue() == Prologue || 01873 State->getCallOrPrologue() == Call) && 01874 "unhandled ParmContext"); 01875 01876 if ((ARM::R0 <= reg) && (reg <= ARM::R3)) { 01877 if (Subtarget->isAAPCS_ABI() && Align > 4) { 01878 unsigned AlignInRegs = Align / 4; 01879 unsigned Waste = (ARM::R4 - reg) % AlignInRegs; 01880 for (unsigned i = 0; i < Waste; ++i) 01881 reg = State->AllocateReg(GPRArgRegs, 4); 01882 } 01883 if (reg != 0) { 01884 unsigned excess = 4 * (ARM::R4 - reg); 01885 01886 // Special case when NSAA != SP and parameter size greater than size of 01887 // all remained GPR regs. In that case we can't split parameter, we must 01888 // send it to stack. We also must set NCRN to R4, so waste all 01889 // remained registers. 01890 const unsigned NSAAOffset = State->getNextStackOffset(); 01891 if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { 01892 while (State->AllocateReg(GPRArgRegs, 4)) 01893 ; 01894 return; 01895 } 01896 01897 // First register for byval parameter is the first register that wasn't 01898 // allocated before this method call, so it would be "reg". 01899 // If parameter is small enough to be saved in range [reg, r4), then 01900 // the end (first after last) register would be reg + param-size-in-regs, 01901 // else parameter would be splitted between registers and stack, 01902 // end register would be r4 in this case. 01903 unsigned ByValRegBegin = reg; 01904 unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4; 01905 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); 01906 // Note, first register is allocated in the beginning of function already, 01907 // allocate remained amount of registers we need. 01908 for (unsigned i = reg+1; i != ByValRegEnd; ++i) 01909 State->AllocateReg(GPRArgRegs, 4); 01910 // A byval parameter that is split between registers and memory needs its 01911 // size truncated here. 01912 // In the case where the entire structure fits in registers, we set the 01913 // size in memory to zero. 01914 if (size < excess) 01915 size = 0; 01916 else 01917 size -= excess; 01918 } 01919 } 01920 } 01921 01922 /// MatchingStackOffset - Return true if the given stack call argument is 01923 /// already available in the same position (relatively) of the caller's 01924 /// incoming argument stack. 01925 static 01926 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 01927 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 01928 const TargetInstrInfo *TII) { 01929 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 01930 int FI = INT_MAX; 01931 if (Arg.getOpcode() == ISD::CopyFromReg) { 01932 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 01933 if (!TargetRegisterInfo::isVirtualRegister(VR)) 01934 return false; 01935 MachineInstr *Def = MRI->getVRegDef(VR); 01936 if (!Def) 01937 return false; 01938 if (!Flags.isByVal()) { 01939 if (!TII->isLoadFromStackSlot(Def, FI)) 01940 return false; 01941 } else { 01942 return false; 01943 } 01944 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 01945 if (Flags.isByVal()) 01946 // ByVal argument is passed in as a pointer but it's now being 01947 // dereferenced. e.g. 01948 // define @foo(%struct.X* %A) { 01949 // tail call @bar(%struct.X* byval %A) 01950 // } 01951 return false; 01952 SDValue Ptr = Ld->getBasePtr(); 01953 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 01954 if (!FINode) 01955 return false; 01956 FI = FINode->getIndex(); 01957 } else 01958 return false; 01959 01960 assert(FI != INT_MAX); 01961 if (!MFI->isFixedObjectIndex(FI)) 01962 return false; 01963 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 01964 } 01965 01966 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 01967 /// for tail call optimization. Targets which want to do tail call 01968 /// optimization should implement this function. 01969 bool 01970 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 01971 CallingConv::ID CalleeCC, 01972 bool isVarArg, 01973 bool isCalleeStructRet, 01974 bool isCallerStructRet, 01975 const SmallVectorImpl<ISD::OutputArg> &Outs, 01976 const SmallVectorImpl<SDValue> &OutVals, 01977 const SmallVectorImpl<ISD::InputArg> &Ins, 01978 SelectionDAG& DAG) const { 01979 const Function *CallerF = DAG.getMachineFunction().getFunction(); 01980 CallingConv::ID CallerCC = CallerF->getCallingConv(); 01981 bool CCMatch = CallerCC == CalleeCC; 01982 01983 // Look for obvious safe cases to perform tail call optimization that do not 01984 // require ABI changes. This is what gcc calls sibcall. 01985 01986 // Do not sibcall optimize vararg calls unless the call site is not passing 01987 // any arguments. 01988 if (isVarArg && !Outs.empty()) 01989 return false; 01990 01991 // Exception-handling functions need a special set of instructions to indicate 01992 // a return to the hardware. Tail-calling another function would probably 01993 // break this. 01994 if (CallerF->hasFnAttribute("interrupt")) 01995 return false; 01996 01997 // Also avoid sibcall optimization if either caller or callee uses struct 01998 // return semantics. 01999 if (isCalleeStructRet || isCallerStructRet) 02000 return false; 02001 02002 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 02003 // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as 02004 // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation 02005 // support in the assembler and linker to be used. This would need to be 02006 // fixed to fully support tail calls in Thumb1. 02007 // 02008 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 02009 // LR. This means if we need to reload LR, it takes an extra instructions, 02010 // which outweighs the value of the tail call; but here we don't know yet 02011 // whether LR is going to be used. Probably the right approach is to 02012 // generate the tail call here and turn it back into CALL/RET in 02013 // emitEpilogue if LR is used. 02014 02015 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 02016 // but we need to make sure there are enough registers; the only valid 02017 // registers are the 4 used for parameters. We don't currently do this 02018 // case. 02019 if (Subtarget->isThumb1Only()) 02020 return false; 02021 02022 // Externally-defined functions with weak linkage should not be 02023 // tail-called on ARM when the OS does not support dynamic 02024 // pre-emption of symbols, as the AAELF spec requires normal calls 02025 // to undefined weak functions to be replaced with a NOP or jump to the 02026 // next instruction. The behaviour of branch instructions in this 02027 // situation (as used for tail calls) is implementation-defined, so we 02028 // cannot rely on the linker replacing the tail call with a return. 02029 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 02030 const GlobalValue *GV = G->getGlobal(); 02031 if (GV->hasExternalWeakLinkage()) 02032 return false; 02033 } 02034 02035 // If the calling conventions do not match, then we'd better make sure the 02036 // results are returned in the same way as what the caller expects. 02037 if (!CCMatch) { 02038 SmallVector<CCValAssign, 16> RVLocs1; 02039 ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1, 02040 *DAG.getContext(), Call); 02041 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 02042 02043 SmallVector<CCValAssign, 16> RVLocs2; 02044 ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2, 02045 *DAG.getContext(), Call); 02046 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 02047 02048 if (RVLocs1.size() != RVLocs2.size()) 02049 return false; 02050 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 02051 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 02052 return false; 02053 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 02054 return false; 02055 if (RVLocs1[i].isRegLoc()) { 02056 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 02057 return false; 02058 } else { 02059 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 02060 return false; 02061 } 02062 } 02063 } 02064 02065 // If Caller's vararg or byval argument has been split between registers and 02066 // stack, do not perform tail call, since part of the argument is in caller's 02067 // local frame. 02068 const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction(). 02069 getInfo<ARMFunctionInfo>(); 02070 if (AFI_Caller->getArgRegsSaveSize()) 02071 return false; 02072 02073 // If the callee takes no arguments then go on to check the results of the 02074 // call. 02075 if (!Outs.empty()) { 02076 // Check if stack adjustment is needed. For now, do not do this if any 02077 // argument is passed on the stack. 02078 SmallVector<CCValAssign, 16> ArgLocs; 02079 ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs, 02080 *DAG.getContext(), Call); 02081 CCInfo.AnalyzeCallOperands(Outs, 02082 CCAssignFnForNode(CalleeCC, false, isVarArg)); 02083 if (CCInfo.getNextStackOffset()) { 02084 MachineFunction &MF = DAG.getMachineFunction(); 02085 02086 // Check if the arguments are already laid out in the right way as 02087 // the caller's fixed stack objects. 02088 MachineFrameInfo *MFI = MF.getFrameInfo(); 02089 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 02090 const TargetInstrInfo *TII = 02091 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 02092 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 02093 i != e; 02094 ++i, ++realArgIdx) { 02095 CCValAssign &VA = ArgLocs[i]; 02096 EVT RegVT = VA.getLocVT(); 02097 SDValue Arg = OutVals[realArgIdx]; 02098 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 02099 if (VA.getLocInfo() == CCValAssign::Indirect) 02100 return false; 02101 if (VA.needsCustom()) { 02102 // f64 and vector types are split into multiple registers or 02103 // register/stack-slot combinations. The types will not match 02104 // the registers; give up on memory f64 refs until we figure 02105 // out what to do about this. 02106 if (!VA.isRegLoc()) 02107 return false; 02108 if (!ArgLocs[++i].isRegLoc()) 02109 return false; 02110 if (RegVT == MVT::v2f64) { 02111 if (!ArgLocs[++i].isRegLoc()) 02112 return false; 02113 if (!ArgLocs[++i].isRegLoc()) 02114 return false; 02115 } 02116 } else if (!VA.isRegLoc()) { 02117 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 02118 MFI, MRI, TII)) 02119 return false; 02120 } 02121 } 02122 } 02123 } 02124 02125 return true; 02126 } 02127 02128 bool 02129 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 02130 MachineFunction &MF, bool isVarArg, 02131 const SmallVectorImpl<ISD::OutputArg> &Outs, 02132 LLVMContext &Context) const { 02133 SmallVector<CCValAssign, 16> RVLocs; 02134 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 02135 return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true, 02136 isVarArg)); 02137 } 02138 02139 static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, 02140 SDLoc DL, SelectionDAG &DAG) { 02141 const MachineFunction &MF = DAG.getMachineFunction(); 02142 const Function *F = MF.getFunction(); 02143 02144 StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString(); 02145 02146 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset 02147 // version of the "preferred return address". These offsets affect the return 02148 // instruction if this is a return from PL1 without hypervisor extensions. 02149 // IRQ/FIQ: +4 "subs pc, lr, #4" 02150 // SWI: 0 "subs pc, lr, #0" 02151 // ABORT: +4 "subs pc, lr, #4" 02152 // UNDEF: +4/+2 "subs pc, lr, #0" 02153 // UNDEF varies depending on where the exception came from ARM or Thumb 02154 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0. 02155 02156 int64_t LROffset; 02157 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" || 02158 IntKind == "ABORT") 02159 LROffset = 4; 02160 else if (IntKind == "SWI" || IntKind == "UNDEF") 02161 LROffset = 0; 02162 else 02163 report_fatal_error("Unsupported interrupt attribute. If present, value " 02164 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF"); 02165 02166 RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false)); 02167 02168 return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps); 02169 } 02170 02171 SDValue 02172 ARMTargetLowering::LowerReturn(SDValue Chain, 02173 CallingConv::ID CallConv, bool isVarArg, 02174 const SmallVectorImpl<ISD::OutputArg> &Outs, 02175 const SmallVectorImpl<SDValue> &OutVals, 02176 SDLoc dl, SelectionDAG &DAG) const { 02177 02178 // CCValAssign - represent the assignment of the return value to a location. 02179 SmallVector<CCValAssign, 16> RVLocs; 02180 02181 // CCState - Info about the registers and stack slots. 02182 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 02183 *DAG.getContext(), Call); 02184 02185 // Analyze outgoing return values. 02186 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 02187 isVarArg)); 02188 02189 SDValue Flag; 02190 SmallVector<SDValue, 4> RetOps; 02191 RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 02192 bool isLittleEndian = Subtarget->isLittle(); 02193 02194 MachineFunction &MF = DAG.getMachineFunction(); 02195 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02196 AFI->setReturnRegsCount(RVLocs.size()); 02197 02198 // Copy the result values into the output registers. 02199 for (unsigned i = 0, realRVLocIdx = 0; 02200 i != RVLocs.size(); 02201 ++i, ++realRVLocIdx) { 02202 CCValAssign &VA = RVLocs[i]; 02203 assert(VA.isRegLoc() && "Can only return in registers!"); 02204 02205 SDValue Arg = OutVals[realRVLocIdx]; 02206 02207 switch (VA.getLocInfo()) { 02208 default: llvm_unreachable("Unknown loc info!"); 02209 case CCValAssign::Full: break; 02210 case CCValAssign::BCvt: 02211 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 02212 break; 02213 } 02214 02215 if (VA.needsCustom()) { 02216 if (VA.getLocVT() == MVT::v2f64) { 02217 // Extract the first half and return it in two registers. 02218 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 02219 DAG.getConstant(0, MVT::i32)); 02220 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 02221 DAG.getVTList(MVT::i32, MVT::i32), Half); 02222 02223 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 02224 HalfGPRs.getValue(isLittleEndian ? 0 : 1), 02225 Flag); 02226 Flag = Chain.getValue(1); 02227 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 02228 VA = RVLocs[++i]; // skip ahead to next loc 02229 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 02230 HalfGPRs.getValue(isLittleEndian ? 1 : 0), 02231 Flag); 02232 Flag = Chain.getValue(1); 02233 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 02234 VA = RVLocs[++i]; // skip ahead to next loc 02235 02236 // Extract the 2nd half and fall through to handle it as an f64 value. 02237 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 02238 DAG.getConstant(1, MVT::i32)); 02239 } 02240 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 02241 // available. 02242 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 02243 DAG.getVTList(MVT::i32, MVT::i32), Arg); 02244 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 02245 fmrrd.getValue(isLittleEndian ? 0 : 1), 02246 Flag); 02247 Flag = Chain.getValue(1); 02248 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 02249 VA = RVLocs[++i]; // skip ahead to next loc 02250 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 02251 fmrrd.getValue(isLittleEndian ? 1 : 0), 02252 Flag); 02253 } else 02254 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 02255 02256 // Guarantee that all emitted copies are 02257 // stuck together, avoiding something bad. 02258 Flag = Chain.getValue(1); 02259 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 02260 } 02261 02262 // Update chain and glue. 02263 RetOps[0] = Chain; 02264 if (Flag.getNode()) 02265 RetOps.push_back(Flag); 02266 02267 // CPUs which aren't M-class use a special sequence to return from 02268 // exceptions (roughly, any instruction setting pc and cpsr simultaneously, 02269 // though we use "subs pc, lr, #N"). 02270 // 02271 // M-class CPUs actually use a normal return sequence with a special 02272 // (hardware-provided) value in LR, so the normal code path works. 02273 if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") && 02274 !Subtarget->isMClass()) { 02275 if (Subtarget->isThumb1Only()) 02276 report_fatal_error("interrupt attribute is not supported in Thumb1"); 02277 return LowerInterruptReturn(RetOps, dl, DAG); 02278 } 02279 02280 return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps); 02281 } 02282 02283 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 02284 if (N->getNumValues() != 1) 02285 return false; 02286 if (!N->hasNUsesOfValue(1, 0)) 02287 return false; 02288 02289 SDValue TCChain = Chain; 02290 SDNode *Copy = *N->use_begin(); 02291 if (Copy->getOpcode() == ISD::CopyToReg) { 02292 // If the copy has a glue operand, we conservatively assume it isn't safe to 02293 // perform a tail call. 02294 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) 02295 return false; 02296 TCChain = Copy->getOperand(0); 02297 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { 02298 SDNode *VMov = Copy; 02299 // f64 returned in a pair of GPRs. 02300 SmallPtrSet<SDNode*, 2> Copies; 02301 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 02302 UI != UE; ++UI) { 02303 if (UI->getOpcode() != ISD::CopyToReg) 02304 return false; 02305 Copies.insert(*UI); 02306 } 02307 if (Copies.size() > 2) 02308 return false; 02309 02310 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 02311 UI != UE; ++UI) { 02312 SDValue UseChain = UI->getOperand(0); 02313 if (Copies.count(UseChain.getNode())) 02314 // Second CopyToReg 02315 Copy = *UI; 02316 else 02317 // First CopyToReg 02318 TCChain = UseChain; 02319 } 02320 } else if (Copy->getOpcode() == ISD::BITCAST) { 02321 // f32 returned in a single GPR. 02322 if (!Copy->hasOneUse()) 02323 return false; 02324 Copy = *Copy->use_begin(); 02325 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) 02326 return false; 02327 TCChain = Copy->getOperand(0); 02328 } else { 02329 return false; 02330 } 02331 02332 bool HasRet = false; 02333 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 02334 UI != UE; ++UI) { 02335 if (UI->getOpcode() != ARMISD::RET_FLAG && 02336 UI->getOpcode() != ARMISD::INTRET_FLAG) 02337 return false; 02338 HasRet = true; 02339 } 02340 02341 if (!HasRet) 02342 return false; 02343 02344 Chain = TCChain; 02345 return true; 02346 } 02347 02348 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 02349 if (!Subtarget->supportsTailCall()) 02350 return false; 02351 02352 if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls) 02353 return false; 02354 02355 return !Subtarget->isThumb1Only(); 02356 } 02357 02358 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 02359 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 02360 // one of the above mentioned nodes. It has to be wrapped because otherwise 02361 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 02362 // be used to form addressing mode. These wrapped nodes will be selected 02363 // into MOVi. 02364 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 02365 EVT PtrVT = Op.getValueType(); 02366 // FIXME there is no actual debug info here 02367 SDLoc dl(Op); 02368 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 02369 SDValue Res; 02370 if (CP->isMachineConstantPoolEntry()) 02371 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 02372 CP->getAlignment()); 02373 else 02374 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 02375 CP->getAlignment()); 02376 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 02377 } 02378 02379 unsigned ARMTargetLowering::getJumpTableEncoding() const { 02380 return MachineJumpTableInfo::EK_Inline; 02381 } 02382 02383 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 02384 SelectionDAG &DAG) const { 02385 MachineFunction &MF = DAG.getMachineFunction(); 02386 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02387 unsigned ARMPCLabelIndex = 0; 02388 SDLoc DL(Op); 02389 EVT PtrVT = getPointerTy(); 02390 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 02391 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 02392 SDValue CPAddr; 02393 if (RelocM == Reloc::Static) { 02394 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 02395 } else { 02396 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 02397 ARMPCLabelIndex = AFI->createPICLabelUId(); 02398 ARMConstantPoolValue *CPV = 02399 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, 02400 ARMCP::CPBlockAddress, PCAdj); 02401 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 02402 } 02403 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 02404 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 02405 MachinePointerInfo::getConstantPool(), 02406 false, false, false, 0); 02407 if (RelocM == Reloc::Static) 02408 return Result; 02409 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 02410 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 02411 } 02412 02413 // Lower ISD::GlobalTLSAddress using the "general dynamic" model 02414 SDValue 02415 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 02416 SelectionDAG &DAG) const { 02417 SDLoc dl(GA); 02418 EVT PtrVT = getPointerTy(); 02419 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 02420 MachineFunction &MF = DAG.getMachineFunction(); 02421 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02422 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 02423 ARMConstantPoolValue *CPV = 02424 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 02425 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); 02426 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 02427 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 02428 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 02429 MachinePointerInfo::getConstantPool(), 02430 false, false, false, 0); 02431 SDValue Chain = Argument.getValue(1); 02432 02433 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 02434 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 02435 02436 // call __tls_get_addr. 02437 ArgListTy Args; 02438 ArgListEntry Entry; 02439 Entry.Node = Argument; 02440 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); 02441 Args.push_back(Entry); 02442 02443 // FIXME: is there useful debug info available here? 02444 TargetLowering::CallLoweringInfo CLI(DAG); 02445 CLI.setDebugLoc(dl).setChain(Chain) 02446 .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()), 02447 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args), 02448 0); 02449 02450 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 02451 return CallResult.first; 02452 } 02453 02454 // Lower ISD::GlobalTLSAddress using the "initial exec" or 02455 // "local exec" model. 02456 SDValue 02457 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 02458 SelectionDAG &DAG, 02459 TLSModel::Model model) const { 02460 const GlobalValue *GV = GA->getGlobal(); 02461 SDLoc dl(GA); 02462 SDValue Offset; 02463 SDValue Chain = DAG.getEntryNode(); 02464 EVT PtrVT = getPointerTy(); 02465 // Get the Thread Pointer 02466 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 02467 02468 if (model == TLSModel::InitialExec) { 02469 MachineFunction &MF = DAG.getMachineFunction(); 02470 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02471 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 02472 // Initial exec model. 02473 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 02474 ARMConstantPoolValue *CPV = 02475 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 02476 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, 02477 true); 02478 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 02479 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 02480 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 02481 MachinePointerInfo::getConstantPool(), 02482 false, false, false, 0); 02483 Chain = Offset.getValue(1); 02484 02485 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 02486 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 02487 02488 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 02489 MachinePointerInfo::getConstantPool(), 02490 false, false, false, 0); 02491 } else { 02492 // local exec model 02493 assert(model == TLSModel::LocalExec); 02494 ARMConstantPoolValue *CPV = 02495 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); 02496 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 02497 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 02498 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 02499 MachinePointerInfo::getConstantPool(), 02500 false, false, false, 0); 02501 } 02502 02503 // The address of the thread local variable is the add of the thread 02504 // pointer with the offset of the variable. 02505 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 02506 } 02507 02508 SDValue 02509 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 02510 // TODO: implement the "local dynamic" model 02511 assert(Subtarget->isTargetELF() && 02512 "TLS not implemented for non-ELF targets"); 02513 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 02514 02515 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); 02516 02517 switch (model) { 02518 case TLSModel::GeneralDynamic: 02519 case TLSModel::LocalDynamic: 02520 return LowerToTLSGeneralDynamicModel(GA, DAG); 02521 case TLSModel::InitialExec: 02522 case TLSModel::LocalExec: 02523 return LowerToTLSExecModels(GA, DAG, model); 02524 } 02525 llvm_unreachable("bogus TLS model"); 02526 } 02527 02528 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 02529 SelectionDAG &DAG) const { 02530 EVT PtrVT = getPointerTy(); 02531 SDLoc dl(Op); 02532 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 02533 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 02534 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 02535 ARMConstantPoolValue *CPV = 02536 ARMConstantPoolConstant::Create(GV, 02537 UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); 02538 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 02539 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 02540 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 02541 CPAddr, 02542 MachinePointerInfo::getConstantPool(), 02543 false, false, false, 0); 02544 SDValue Chain = Result.getValue(1); 02545 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 02546 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 02547 if (!UseGOTOFF) 02548 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 02549 MachinePointerInfo::getGOT(), 02550 false, false, false, 0); 02551 return Result; 02552 } 02553 02554 // If we have T2 ops, we can materialize the address directly via movt/movw 02555 // pair. This is always cheaper. 02556 if (Subtarget->useMovt(DAG.getMachineFunction())) { 02557 ++NumMovwMovt; 02558 // FIXME: Once remat is capable of dealing with instructions with register 02559 // operands, expand this into two nodes. 02560 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 02561 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 02562 } else { 02563 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 02564 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 02565 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 02566 MachinePointerInfo::getConstantPool(), 02567 false, false, false, 0); 02568 } 02569 } 02570 02571 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 02572 SelectionDAG &DAG) const { 02573 EVT PtrVT = getPointerTy(); 02574 SDLoc dl(Op); 02575 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 02576 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 02577 02578 if (Subtarget->useMovt(DAG.getMachineFunction())) 02579 ++NumMovwMovt; 02580 02581 // FIXME: Once remat is capable of dealing with instructions with register 02582 // operands, expand this into multiple nodes 02583 unsigned Wrapper = 02584 RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper; 02585 02586 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY); 02587 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G); 02588 02589 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 02590 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, 02591 MachinePointerInfo::getGOT(), false, false, false, 0); 02592 return Result; 02593 } 02594 02595 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, 02596 SelectionDAG &DAG) const { 02597 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported"); 02598 assert(Subtarget->useMovt(DAG.getMachineFunction()) && 02599 "Windows on ARM expects to use movw/movt"); 02600 02601 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 02602 const ARMII::TOF TargetFlags = 02603 (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG); 02604 EVT PtrVT = getPointerTy(); 02605 SDValue Result; 02606 SDLoc DL(Op); 02607 02608 ++NumMovwMovt; 02609 02610 // FIXME: Once remat is capable of dealing with instructions with register 02611 // operands, expand this into two nodes. 02612 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, 02613 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0, 02614 TargetFlags)); 02615 if (GV->hasDLLImportStorageClass()) 02616 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, 02617 MachinePointerInfo::getGOT(), false, false, false, 0); 02618 return Result; 02619 } 02620 02621 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 02622 SelectionDAG &DAG) const { 02623 assert(Subtarget->isTargetELF() && 02624 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 02625 MachineFunction &MF = DAG.getMachineFunction(); 02626 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02627 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 02628 EVT PtrVT = getPointerTy(); 02629 SDLoc dl(Op); 02630 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 02631 ARMConstantPoolValue *CPV = 02632 ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", 02633 ARMPCLabelIndex, PCAdj); 02634 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 02635 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 02636 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 02637 MachinePointerInfo::getConstantPool(), 02638 false, false, false, 0); 02639 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 02640 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 02641 } 02642 02643 SDValue 02644 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 02645 SDLoc dl(Op); 02646 SDValue Val = DAG.getConstant(0, MVT::i32); 02647 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, 02648 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), 02649 Op.getOperand(1), Val); 02650 } 02651 02652 SDValue 02653 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 02654 SDLoc dl(Op); 02655 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 02656 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 02657 } 02658 02659 SDValue 02660 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 02661 const ARMSubtarget *Subtarget) const { 02662 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 02663 SDLoc dl(Op); 02664 switch (IntNo) { 02665 default: return SDValue(); // Don't custom lower most intrinsics. 02666 case Intrinsic::arm_rbit: { 02667 assert(Op.getOperand(1).getValueType() == MVT::i32 && 02668 "RBIT intrinsic must have i32 type!"); 02669 return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1)); 02670 } 02671 case Intrinsic::arm_thread_pointer: { 02672 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 02673 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 02674 } 02675 case Intrinsic::eh_sjlj_lsda: { 02676 MachineFunction &MF = DAG.getMachineFunction(); 02677 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02678 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 02679 EVT PtrVT = getPointerTy(); 02680 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 02681 SDValue CPAddr; 02682 unsigned PCAdj = (RelocM != Reloc::PIC_) 02683 ? 0 : (Subtarget->isThumb() ? 4 : 8); 02684 ARMConstantPoolValue *CPV = 02685 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, 02686 ARMCP::CPLSDA, PCAdj); 02687 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 02688 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 02689 SDValue Result = 02690 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 02691 MachinePointerInfo::getConstantPool(), 02692 false, false, false, 0); 02693 02694 if (RelocM == Reloc::PIC_) { 02695 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 02696 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 02697 } 02698 return Result; 02699 } 02700 case Intrinsic::arm_neon_vmulls: 02701 case Intrinsic::arm_neon_vmullu: { 02702 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) 02703 ? ARMISD::VMULLs : ARMISD::VMULLu; 02704 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), 02705 Op.getOperand(1), Op.getOperand(2)); 02706 } 02707 } 02708 } 02709 02710 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, 02711 const ARMSubtarget *Subtarget) { 02712 // FIXME: handle "fence singlethread" more efficiently. 02713 SDLoc dl(Op); 02714 if (!Subtarget->hasDataBarrier()) { 02715 // Some ARMv6 cpus can support data barriers with an mcr instruction. 02716 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 02717 // here. 02718 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 02719 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"); 02720 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 02721 DAG.getConstant(0, MVT::i32)); 02722 } 02723 02724 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1)); 02725 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue()); 02726 ARM_MB::MemBOpt Domain = ARM_MB::ISH; 02727 if (Subtarget->isMClass()) { 02728 // Only a full system barrier exists in the M-class architectures. 02729 Domain = ARM_MB::SY; 02730 } else if (Subtarget->isSwift() && Ord == Release) { 02731 // Swift happens to implement ISHST barriers in a way that's compatible with 02732 // Release semantics but weaker than ISH so we'd be fools not to use 02733 // it. Beware: other processors probably don't! 02734 Domain = ARM_MB::ISHST; 02735 } 02736 02737 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0), 02738 DAG.getConstant(Intrinsic::arm_dmb, MVT::i32), 02739 DAG.getConstant(Domain, MVT::i32)); 02740 } 02741 02742 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, 02743 const ARMSubtarget *Subtarget) { 02744 // ARM pre v5TE and Thumb1 does not have preload instructions. 02745 if (!(Subtarget->isThumb2() || 02746 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) 02747 // Just preserve the chain. 02748 return Op.getOperand(0); 02749 02750 SDLoc dl(Op); 02751 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; 02752 if (!isRead && 02753 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) 02754 // ARMv7 with MP extension has PLDW. 02755 return Op.getOperand(0); 02756 02757 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 02758 if (Subtarget->isThumb()) { 02759 // Invert the bits. 02760 isRead = ~isRead & 1; 02761 isData = ~isData & 1; 02762 } 02763 02764 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), 02765 Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), 02766 DAG.getConstant(isData, MVT::i32)); 02767 } 02768 02769 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 02770 MachineFunction &MF = DAG.getMachineFunction(); 02771 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 02772 02773 // vastart just stores the address of the VarArgsFrameIndex slot into the 02774 // memory location argument. 02775 SDLoc dl(Op); 02776 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 02777 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 02778 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 02779 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 02780 MachinePointerInfo(SV), false, false, 0); 02781 } 02782 02783 SDValue 02784 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 02785 SDValue &Root, SelectionDAG &DAG, 02786 SDLoc dl) const { 02787 MachineFunction &MF = DAG.getMachineFunction(); 02788 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02789 02790 const TargetRegisterClass *RC; 02791 if (AFI->isThumb1OnlyFunction()) 02792 RC = &ARM::tGPRRegClass; 02793 else 02794 RC = &ARM::GPRRegClass; 02795 02796 // Transform the arguments stored in physical registers into virtual ones. 02797 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 02798 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 02799 02800 SDValue ArgValue2; 02801 if (NextVA.isMemLoc()) { 02802 MachineFrameInfo *MFI = MF.getFrameInfo(); 02803 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 02804 02805 // Create load node to retrieve arguments from the stack. 02806 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 02807 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 02808 MachinePointerInfo::getFixedStack(FI), 02809 false, false, false, 0); 02810 } else { 02811 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 02812 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 02813 } 02814 if (!Subtarget->isLittle()) 02815 std::swap (ArgValue, ArgValue2); 02816 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 02817 } 02818 02819 void 02820 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, 02821 unsigned InRegsParamRecordIdx, 02822 unsigned ArgSize, 02823 unsigned &ArgRegsSize, 02824 unsigned &ArgRegsSaveSize) 02825 const { 02826 unsigned NumGPRs; 02827 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { 02828 unsigned RBegin, REnd; 02829 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); 02830 NumGPRs = REnd - RBegin; 02831 } else { 02832 unsigned int firstUnalloced; 02833 firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, 02834 sizeof(GPRArgRegs) / 02835 sizeof(GPRArgRegs[0])); 02836 NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; 02837 } 02838 02839 unsigned Align = MF.getTarget() 02840 .getSubtargetImpl() 02841 ->getFrameLowering() 02842 ->getStackAlignment(); 02843 ArgRegsSize = NumGPRs * 4; 02844 02845 // If parameter is split between stack and GPRs... 02846 if (NumGPRs && Align > 4 && 02847 (ArgRegsSize < ArgSize || 02848 InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) { 02849 // Add padding for part of param recovered from GPRs. For example, 02850 // if Align == 8, its last byte must be at address K*8 - 1. 02851 // We need to do it, since remained (stack) part of parameter has 02852 // stack alignment, and we need to "attach" "GPRs head" without gaps 02853 // to it: 02854 // Stack: 02855 // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes... 02856 // [ [padding] [GPRs head] ] [ Tail passed via stack .... 02857 // 02858 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02859 unsigned Padding = 02860 OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align); 02861 ArgRegsSaveSize = ArgRegsSize + Padding; 02862 } else 02863 // We don't need to extend regs save size for byval parameters if they 02864 // are passed via GPRs only. 02865 ArgRegsSaveSize = ArgRegsSize; 02866 } 02867 02868 // The remaining GPRs hold either the beginning of variable-argument 02869 // data, or the beginning of an aggregate passed by value (usually 02870 // byval). Either way, we allocate stack slots adjacent to the data 02871 // provided by our caller, and store the unallocated registers there. 02872 // If this is a variadic function, the va_list pointer will begin with 02873 // these values; otherwise, this reassembles a (byval) structure that 02874 // was split between registers and memory. 02875 // Return: The frame index registers were stored into. 02876 int 02877 ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, 02878 SDLoc dl, SDValue &Chain, 02879 const Value *OrigArg, 02880 unsigned InRegsParamRecordIdx, 02881 unsigned OffsetFromOrigArg, 02882 unsigned ArgOffset, 02883 unsigned ArgSize, 02884 bool ForceMutable, 02885 unsigned ByValStoreOffset, 02886 unsigned TotalArgRegsSaveSize) const { 02887 02888 // Currently, two use-cases possible: 02889 // Case #1. Non-var-args function, and we meet first byval parameter. 02890 // Setup first unallocated register as first byval register; 02891 // eat all remained registers 02892 // (these two actions are performed by HandleByVal method). 02893 // Then, here, we initialize stack frame with 02894 // "store-reg" instructions. 02895 // Case #2. Var-args function, that doesn't contain byval parameters. 02896 // The same: eat all remained unallocated registers, 02897 // initialize stack frame. 02898 02899 MachineFunction &MF = DAG.getMachineFunction(); 02900 MachineFrameInfo *MFI = MF.getFrameInfo(); 02901 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02902 unsigned firstRegToSaveIndex, lastRegToSaveIndex; 02903 unsigned RBegin, REnd; 02904 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { 02905 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); 02906 firstRegToSaveIndex = RBegin - ARM::R0; 02907 lastRegToSaveIndex = REnd - ARM::R0; 02908 } else { 02909 firstRegToSaveIndex = CCInfo.getFirstUnallocated 02910 (GPRArgRegs, array_lengthof(GPRArgRegs)); 02911 lastRegToSaveIndex = 4; 02912 } 02913 02914 unsigned ArgRegsSize, ArgRegsSaveSize; 02915 computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize, 02916 ArgRegsSize, ArgRegsSaveSize); 02917 02918 // Store any by-val regs to their spots on the stack so that they may be 02919 // loaded by deferencing the result of formal parameter pointer or va_next. 02920 // Note: once stack area for byval/varargs registers 02921 // was initialized, it can't be initialized again. 02922 if (ArgRegsSaveSize) { 02923 unsigned Padding = ArgRegsSaveSize - ArgRegsSize; 02924 02925 if (Padding) { 02926 assert(AFI->getStoredByValParamsPadding() == 0 && 02927 "The only parameter may be padded."); 02928 AFI->setStoredByValParamsPadding(Padding); 02929 } 02930 02931 int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize, 02932 Padding + 02933 ByValStoreOffset - 02934 (int64_t)TotalArgRegsSaveSize, 02935 false); 02936 SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); 02937 if (Padding) { 02938 MFI->CreateFixedObject(Padding, 02939 ArgOffset + ByValStoreOffset - 02940 (int64_t)ArgRegsSaveSize, 02941 false); 02942 } 02943 02944 SmallVector<SDValue, 4> MemOps; 02945 for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex; 02946 ++firstRegToSaveIndex, ++i) { 02947 const TargetRegisterClass *RC; 02948 if (AFI->isThumb1OnlyFunction()) 02949 RC = &ARM::tGPRRegClass; 02950 else 02951 RC = &ARM::GPRRegClass; 02952 02953 unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); 02954 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 02955 SDValue Store = 02956 DAG.getStore(Val.getValue(1), dl, Val, FIN, 02957 MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i), 02958 false, false, 0); 02959 MemOps.push_back(Store); 02960 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 02961 DAG.getConstant(4, getPointerTy())); 02962 } 02963 02964 AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); 02965 02966 if (!MemOps.empty()) 02967 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 02968 return FrameIndex; 02969 } else { 02970 if (ArgSize == 0) { 02971 // We cannot allocate a zero-byte object for the first variadic argument, 02972 // so just make up a size. 02973 ArgSize = 4; 02974 } 02975 // This will point to the next argument passed via stack. 02976 return MFI->CreateFixedObject( 02977 ArgSize, ArgOffset, !ForceMutable); 02978 } 02979 } 02980 02981 // Setup stack frame, the va_list pointer will start from. 02982 void 02983 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, 02984 SDLoc dl, SDValue &Chain, 02985 unsigned ArgOffset, 02986 unsigned TotalArgRegsSaveSize, 02987 bool ForceMutable) const { 02988 MachineFunction &MF = DAG.getMachineFunction(); 02989 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02990 02991 // Try to store any remaining integer argument regs 02992 // to their spots on the stack so that they may be loaded by deferencing 02993 // the result of va_next. 02994 // If there is no regs to be stored, just point address after last 02995 // argument passed via stack. 02996 int FrameIndex = 02997 StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, 02998 CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable, 02999 0, TotalArgRegsSaveSize); 03000 03001 AFI->setVarArgsFrameIndex(FrameIndex); 03002 } 03003 03004 SDValue 03005 ARMTargetLowering::LowerFormalArguments(SDValue Chain, 03006 CallingConv::ID CallConv, bool isVarArg, 03007 const SmallVectorImpl<ISD::InputArg> 03008 &Ins, 03009 SDLoc dl, SelectionDAG &DAG, 03010 SmallVectorImpl<SDValue> &InVals) 03011 const { 03012 MachineFunction &MF = DAG.getMachineFunction(); 03013 MachineFrameInfo *MFI = MF.getFrameInfo(); 03014 03015 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 03016 03017 // Assign locations to all of the incoming arguments. 03018 SmallVector<CCValAssign, 16> ArgLocs; 03019 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 03020 *DAG.getContext(), Prologue); 03021 CCInfo.AnalyzeFormalArguments(Ins, 03022 CCAssignFnForNode(CallConv, /* Return*/ false, 03023 isVarArg)); 03024 03025 SmallVector<SDValue, 16> ArgValues; 03026 int lastInsIndex = -1; 03027 SDValue ArgValue; 03028 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); 03029 unsigned CurArgIdx = 0; 03030 03031 // Initially ArgRegsSaveSize is zero. 03032 // Then we increase this value each time we meet byval parameter. 03033 // We also increase this value in case of varargs function. 03034 AFI->setArgRegsSaveSize(0); 03035 03036 unsigned ByValStoreOffset = 0; 03037 unsigned TotalArgRegsSaveSize = 0; 03038 unsigned ArgRegsSaveSizeMaxAlign = 4; 03039 03040 // Calculate the amount of stack space that we need to allocate to store 03041 // byval and variadic arguments that are passed in registers. 03042 // We need to know this before we allocate the first byval or variadic 03043 // argument, as they will be allocated a stack slot below the CFA (Canonical 03044 // Frame Address, the stack pointer at entry to the function). 03045 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 03046 CCValAssign &VA = ArgLocs[i]; 03047 if (VA.isMemLoc()) { 03048 int index = VA.getValNo(); 03049 if (index != lastInsIndex) { 03050 ISD::ArgFlagsTy Flags = Ins[index].Flags; 03051 if (Flags.isByVal()) { 03052 unsigned ExtraArgRegsSize; 03053 unsigned ExtraArgRegsSaveSize; 03054 computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(), 03055 Flags.getByValSize(), 03056 ExtraArgRegsSize, ExtraArgRegsSaveSize); 03057 03058 TotalArgRegsSaveSize += ExtraArgRegsSaveSize; 03059 if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign) 03060 ArgRegsSaveSizeMaxAlign = Flags.getByValAlign(); 03061 CCInfo.nextInRegsParam(); 03062 } 03063 lastInsIndex = index; 03064 } 03065 } 03066 } 03067 CCInfo.rewindByValRegsInfo(); 03068 lastInsIndex = -1; 03069 if (isVarArg && MFI->hasVAStart()) { 03070 unsigned ExtraArgRegsSize; 03071 unsigned ExtraArgRegsSaveSize; 03072 computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0, 03073 ExtraArgRegsSize, ExtraArgRegsSaveSize); 03074 TotalArgRegsSaveSize += ExtraArgRegsSaveSize; 03075 } 03076 // If the arg regs save area contains N-byte aligned values, the 03077 // bottom of it must be at least N-byte aligned. 03078 TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign); 03079 TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U); 03080 03081 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 03082 CCValAssign &VA = ArgLocs[i]; 03083 std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); 03084 CurArgIdx = Ins[VA.getValNo()].OrigArgIndex; 03085 // Arguments stored in registers. 03086 if (VA.isRegLoc()) { 03087 EVT RegVT = VA.getLocVT(); 03088 03089 if (VA.needsCustom()) { 03090 // f64 and vector types are split up into multiple registers or 03091 // combinations of registers and stack slots. 03092 if (VA.getLocVT() == MVT::v2f64) { 03093 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 03094 Chain, DAG, dl); 03095 VA = ArgLocs[++i]; // skip ahead to next loc 03096 SDValue ArgValue2; 03097 if (VA.isMemLoc()) { 03098 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 03099 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 03100 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 03101 MachinePointerInfo::getFixedStack(FI), 03102 false, false, false, 0); 03103 } else { 03104 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 03105 Chain, DAG, dl); 03106 } 03107 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 03108 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 03109 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 03110 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 03111 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 03112 } else 03113 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 03114 03115 } else { 03116 const TargetRegisterClass *RC; 03117 03118 if (RegVT == MVT::f32) 03119 RC = &ARM::SPRRegClass; 03120 else if (RegVT == MVT::f64) 03121 RC = &ARM::DPRRegClass; 03122 else if (RegVT == MVT::v2f64) 03123 RC = &ARM::QPRRegClass; 03124 else if (RegVT == MVT::i32) 03125 RC = AFI->isThumb1OnlyFunction() ? 03126 (const TargetRegisterClass*)&ARM::tGPRRegClass : 03127 (const TargetRegisterClass*)&ARM::GPRRegClass; 03128 else 03129 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 03130 03131 // Transform the arguments in physical registers into virtual ones. 03132 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 03133 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 03134 } 03135 03136 // If this is an 8 or 16-bit value, it is really passed promoted 03137 // to 32 bits. Insert an assert[sz]ext to capture this, then 03138 // truncate to the right size. 03139 switch (VA.getLocInfo()) { 03140 default: llvm_unreachable("Unknown loc info!"); 03141 case CCValAssign::Full: break; 03142 case CCValAssign::BCvt: 03143 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 03144 break; 03145 case CCValAssign::SExt: 03146 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 03147 DAG.getValueType(VA.getValVT())); 03148 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 03149 break; 03150 case CCValAssign::ZExt: 03151 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 03152 DAG.getValueType(VA.getValVT())); 03153 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 03154 break; 03155 } 03156 03157 InVals.push_back(ArgValue); 03158 03159 } else { // VA.isRegLoc() 03160 03161 // sanity check 03162 assert(VA.isMemLoc()); 03163 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 03164 03165 int index = ArgLocs[i].getValNo(); 03166 03167 // Some Ins[] entries become multiple ArgLoc[] entries. 03168 // Process them only once. 03169 if (index != lastInsIndex) 03170 { 03171 ISD::ArgFlagsTy Flags = Ins[index].Flags; 03172 // FIXME: For now, all byval parameter objects are marked mutable. 03173 // This can be changed with more analysis. 03174 // In case of tail call optimization mark all arguments mutable. 03175 // Since they could be overwritten by lowering of arguments in case of 03176 // a tail call. 03177 if (Flags.isByVal()) { 03178 unsigned CurByValIndex = CCInfo.getInRegsParamsProceed(); 03179 03180 ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign()); 03181 int FrameIndex = StoreByValRegs( 03182 CCInfo, DAG, dl, Chain, CurOrigArg, 03183 CurByValIndex, 03184 Ins[VA.getValNo()].PartOffset, 03185 VA.getLocMemOffset(), 03186 Flags.getByValSize(), 03187 true /*force mutable frames*/, 03188 ByValStoreOffset, 03189 TotalArgRegsSaveSize); 03190 ByValStoreOffset += Flags.getByValSize(); 03191 ByValStoreOffset = std::min(ByValStoreOffset, 16U); 03192 InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); 03193 CCInfo.nextInRegsParam(); 03194 } else { 03195 unsigned FIOffset = VA.getLocMemOffset(); 03196 int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, 03197 FIOffset, true); 03198 03199 // Create load nodes to retrieve arguments from the stack. 03200 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 03201 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 03202 MachinePointerInfo::getFixedStack(FI), 03203 false, false, false, 0)); 03204 } 03205 lastInsIndex = index; 03206 } 03207 } 03208 } 03209 03210 // varargs 03211 if (isVarArg && MFI->hasVAStart()) 03212 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 03213 CCInfo.getNextStackOffset(), 03214 TotalArgRegsSaveSize); 03215 03216 AFI->setArgumentStackSize(CCInfo.getNextStackOffset()); 03217 03218 return Chain; 03219 } 03220 03221 /// isFloatingPointZero - Return true if this is +0.0. 03222 static bool isFloatingPointZero(SDValue Op) { 03223 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 03224 return CFP->getValueAPF().isPosZero(); 03225 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 03226 // Maybe this has already been legalized into the constant pool? 03227 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 03228 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 03229 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 03230 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 03231 return CFP->getValueAPF().isPosZero(); 03232 } 03233 } 03234 return false; 03235 } 03236 03237 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for 03238 /// the given operands. 03239 SDValue 03240 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 03241 SDValue &ARMcc, SelectionDAG &DAG, 03242 SDLoc dl) const { 03243 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 03244 unsigned C = RHSC->getZExtValue(); 03245 if (!isLegalICmpImmediate(C)) { 03246 // Constant does not fit, try adjusting it by one? 03247 switch (CC) { 03248 default: break; 03249 case ISD::SETLT: 03250 case ISD::SETGE: 03251 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { 03252 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 03253 RHS = DAG.getConstant(C-1, MVT::i32); 03254 } 03255 break; 03256 case ISD::SETULT: 03257 case ISD::SETUGE: 03258 if (C != 0 && isLegalICmpImmediate(C-1)) { 03259 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 03260 RHS = DAG.getConstant(C-1, MVT::i32); 03261 } 03262 break; 03263 case ISD::SETLE: 03264 case ISD::SETGT: 03265 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { 03266 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 03267 RHS = DAG.getConstant(C+1, MVT::i32); 03268 } 03269 break; 03270 case ISD::SETULE: 03271 case ISD::SETUGT: 03272 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { 03273 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 03274 RHS = DAG.getConstant(C+1, MVT::i32); 03275 } 03276 break; 03277 } 03278 } 03279 } 03280 03281 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 03282 ARMISD::NodeType CompareType; 03283 switch (CondCode) { 03284 default: 03285 CompareType = ARMISD::CMP; 03286 break; 03287 case ARMCC::EQ: 03288 case ARMCC::NE: 03289 // Uses only Z Flag 03290 CompareType = ARMISD::CMPZ; 03291 break; 03292 } 03293 ARMcc = DAG.getConstant(CondCode, MVT::i32); 03294 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); 03295 } 03296 03297 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 03298 SDValue 03299 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 03300 SDLoc dl) const { 03301 assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64); 03302 SDValue Cmp; 03303 if (!isFloatingPointZero(RHS)) 03304 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); 03305 else 03306 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); 03307 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); 03308 } 03309 03310 /// duplicateCmp - Glue values can have only one use, so this function 03311 /// duplicates a comparison node. 03312 SDValue 03313 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { 03314 unsigned Opc = Cmp.getOpcode(); 03315 SDLoc DL(Cmp); 03316 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) 03317 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 03318 03319 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); 03320 Cmp = Cmp.getOperand(0); 03321 Opc = Cmp.getOpcode(); 03322 if (Opc == ARMISD::CMPFP) 03323 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 03324 else { 03325 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); 03326 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); 03327 } 03328 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); 03329 } 03330 03331 std::pair<SDValue, SDValue> 03332 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, 03333 SDValue &ARMcc) const { 03334 assert(Op.getValueType() == MVT::i32 && "Unsupported value type"); 03335 03336 SDValue Value, OverflowCmp; 03337 SDValue LHS = Op.getOperand(0); 03338 SDValue RHS = Op.getOperand(1); 03339 03340 03341 // FIXME: We are currently always generating CMPs because we don't support 03342 // generating CMN through the backend. This is not as good as the natural 03343 // CMP case because it causes a register dependency and cannot be folded 03344 // later. 03345 03346 switch (Op.getOpcode()) { 03347 default: 03348 llvm_unreachable("Unknown overflow instruction!"); 03349 case ISD::SADDO: 03350 ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32); 03351 Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS); 03352 OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS); 03353 break; 03354 case ISD::UADDO: 03355 ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32); 03356 Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS); 03357 OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS); 03358 break; 03359 case ISD::SSUBO: 03360 ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32); 03361 Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS); 03362 OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS); 03363 break; 03364 case ISD::USUBO: 03365 ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32); 03366 Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS); 03367 OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS); 03368 break; 03369 } // switch (...) 03370 03371 return std::make_pair(Value, OverflowCmp); 03372 } 03373 03374 03375 SDValue 03376 ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { 03377 // Let legalize expand this if it isn't a legal type yet. 03378 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) 03379 return SDValue(); 03380 03381 SDValue Value, OverflowCmp; 03382 SDValue ARMcc; 03383 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); 03384 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 03385 // We use 0 and 1 as false and true values. 03386 SDValue TVal = DAG.getConstant(1, MVT::i32); 03387 SDValue FVal = DAG.getConstant(0, MVT::i32); 03388 EVT VT = Op.getValueType(); 03389 03390 SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal, 03391 ARMcc, CCR, OverflowCmp); 03392 03393 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); 03394 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow); 03395 } 03396 03397 03398 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 03399 SDValue Cond = Op.getOperand(0); 03400 SDValue SelectTrue = Op.getOperand(1); 03401 SDValue SelectFalse = Op.getOperand(2); 03402 SDLoc dl(Op); 03403 unsigned Opc = Cond.getOpcode(); 03404 03405 if (Cond.getResNo() == 1 && 03406 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || 03407 Opc == ISD::USUBO)) { 03408 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) 03409 return SDValue(); 03410 03411 SDValue Value, OverflowCmp; 03412 SDValue ARMcc; 03413 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); 03414 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 03415 EVT VT = Op.getValueType(); 03416 03417 return getCMOV(SDLoc(Op), VT, SelectTrue, SelectFalse, ARMcc, CCR, 03418 OverflowCmp, DAG); 03419 } 03420 03421 // Convert: 03422 // 03423 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 03424 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 03425 // 03426 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 03427 const ConstantSDNode *CMOVTrue = 03428 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 03429 const ConstantSDNode *CMOVFalse = 03430 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 03431 03432 if (CMOVTrue && CMOVFalse) { 03433 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 03434 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 03435 03436 SDValue True; 03437 SDValue False; 03438 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 03439 True = SelectTrue; 03440 False = SelectFalse; 03441 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 03442 True = SelectFalse; 03443 False = SelectTrue; 03444 } 03445 03446 if (True.getNode() && False.getNode()) { 03447 EVT VT = Op.getValueType(); 03448 SDValue ARMcc = Cond.getOperand(2); 03449 SDValue CCR = Cond.getOperand(3); 03450 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); 03451 assert(True.getValueType() == VT); 03452 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG); 03453 } 03454 } 03455 } 03456 03457 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the 03458 // undefined bits before doing a full-word comparison with zero. 03459 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, 03460 DAG.getConstant(1, Cond.getValueType())); 03461 03462 return DAG.getSelectCC(dl, Cond, 03463 DAG.getConstant(0, Cond.getValueType()), 03464 SelectTrue, SelectFalse, ISD::SETNE); 03465 } 03466 03467 static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) { 03468 if (CC == ISD::SETNE) 03469 return ISD::SETEQ; 03470 return ISD::getSetCCInverse(CC, true); 03471 } 03472 03473 static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 03474 bool &swpCmpOps, bool &swpVselOps) { 03475 // Start by selecting the GE condition code for opcodes that return true for 03476 // 'equality' 03477 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE || 03478 CC == ISD::SETULE) 03479 CondCode = ARMCC::GE; 03480 03481 // and GT for opcodes that return false for 'equality'. 03482 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT || 03483 CC == ISD::SETULT) 03484 CondCode = ARMCC::GT; 03485 03486 // Since we are constrained to GE/GT, if the opcode contains 'less', we need 03487 // to swap the compare operands. 03488 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT || 03489 CC == ISD::SETULT) 03490 swpCmpOps = true; 03491 03492 // Both GT and GE are ordered comparisons, and return false for 'unordered'. 03493 // If we have an unordered opcode, we need to swap the operands to the VSEL 03494 // instruction (effectively negating the condition). 03495 // 03496 // This also has the effect of swapping which one of 'less' or 'greater' 03497 // returns true, so we also swap the compare operands. It also switches 03498 // whether we return true for 'equality', so we compensate by picking the 03499 // opposite condition code to our original choice. 03500 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE || 03501 CC == ISD::SETUGT) { 03502 swpCmpOps = !swpCmpOps; 03503 swpVselOps = !swpVselOps; 03504 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT; 03505 } 03506 03507 // 'ordered' is 'anything but unordered', so use the VS condition code and 03508 // swap the VSEL operands. 03509 if (CC == ISD::SETO) { 03510 CondCode = ARMCC::VS; 03511 swpVselOps = true; 03512 } 03513 03514 // 'unordered or not equal' is 'anything but equal', so use the EQ condition 03515 // code and swap the VSEL operands. 03516 if (CC == ISD::SETUNE) { 03517 CondCode = ARMCC::EQ; 03518 swpVselOps = true; 03519 } 03520 } 03521 03522 SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, 03523 SDValue TrueVal, SDValue ARMcc, SDValue CCR, 03524 SDValue Cmp, SelectionDAG &DAG) const { 03525 if (Subtarget->isFPOnlySP() && VT == MVT::f64) { 03526 FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl, 03527 DAG.getVTList(MVT::i32, MVT::i32), FalseVal); 03528 TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl, 03529 DAG.getVTList(MVT::i32, MVT::i32), TrueVal); 03530 03531 SDValue TrueLow = TrueVal.getValue(0); 03532 SDValue TrueHigh = TrueVal.getValue(1); 03533 SDValue FalseLow = FalseVal.getValue(0); 03534 SDValue FalseHigh = FalseVal.getValue(1); 03535 03536 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow, 03537 ARMcc, CCR, Cmp); 03538 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh, 03539 ARMcc, CCR, duplicateCmp(Cmp, DAG)); 03540 03541 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High); 03542 } else { 03543 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, 03544 Cmp); 03545 } 03546 } 03547 03548 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 03549 EVT VT = Op.getValueType(); 03550 SDValue LHS = Op.getOperand(0); 03551 SDValue RHS = Op.getOperand(1); 03552 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 03553 SDValue TrueVal = Op.getOperand(2); 03554 SDValue FalseVal = Op.getOperand(3); 03555 SDLoc dl(Op); 03556 03557 if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { 03558 DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, 03559 dl); 03560 03561 // If softenSetCCOperands only returned one value, we should compare it to 03562 // zero. 03563 if (!RHS.getNode()) { 03564 RHS = DAG.getConstant(0, LHS.getValueType()); 03565 CC = ISD::SETNE; 03566 } 03567 } 03568 03569 if (LHS.getValueType() == MVT::i32) { 03570 // Try to generate VSEL on ARMv8. 03571 // The VSEL instruction can't use all the usual ARM condition 03572 // codes: it only has two bits to select the condition code, so it's 03573 // constrained to use only GE, GT, VS and EQ. 03574 // 03575 // To implement all the various ISD::SETXXX opcodes, we sometimes need to 03576 // swap the operands of the previous compare instruction (effectively 03577 // inverting the compare condition, swapping 'less' and 'greater') and 03578 // sometimes need to swap the operands to the VSEL (which inverts the 03579 // condition in the sense of firing whenever the previous condition didn't) 03580 if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || 03581 TrueVal.getValueType() == MVT::f64)) { 03582 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 03583 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || 03584 CondCode == ARMCC::VC || CondCode == ARMCC::NE) { 03585 CC = getInverseCCForVSEL(CC); 03586 std::swap(TrueVal, FalseVal); 03587 } 03588 } 03589 03590 SDValue ARMcc; 03591 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 03592 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 03593 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); 03594 } 03595 03596 ARMCC::CondCodes CondCode, CondCode2; 03597 FPCCToARMCC(CC, CondCode, CondCode2); 03598 03599 // Try to generate VSEL on ARMv8. 03600 if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || 03601 TrueVal.getValueType() == MVT::f64)) { 03602 // We can select VMAXNM/VMINNM from a compare followed by a select with the 03603 // same operands, as follows: 03604 // c = fcmp [ogt, olt, ugt, ult] a, b 03605 // select c, a, b 03606 // We only do this in unsafe-fp-math, because signed zeros and NaNs are 03607 // handled differently than the original code sequence. 03608 if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal && 03609 RHS == FalseVal) { 03610 if (CC == ISD::SETOGT || CC == ISD::SETUGT) 03611 return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); 03612 if (CC == ISD::SETOLT || CC == ISD::SETULT) 03613 return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); 03614 } 03615 03616 bool swpCmpOps = false; 03617 bool swpVselOps = false; 03618 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); 03619 03620 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE || 03621 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) { 03622 if (swpCmpOps) 03623 std::swap(LHS, RHS); 03624 if (swpVselOps) 03625 std::swap(TrueVal, FalseVal); 03626 } 03627 } 03628 03629 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 03630 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 03631 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 03632 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); 03633 if (CondCode2 != ARMCC::AL) { 03634 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 03635 // FIXME: Needs another CMP because flag can have but one use. 03636 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 03637 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); 03638 } 03639 return Result; 03640 } 03641 03642 /// canChangeToInt - Given the fp compare operand, return true if it is suitable 03643 /// to morph to an integer compare sequence. 03644 static bool canChangeToInt(SDValue Op, bool &SeenZero, 03645 const ARMSubtarget *Subtarget) { 03646 SDNode *N = Op.getNode(); 03647 if (!N->hasOneUse()) 03648 // Otherwise it requires moving the value from fp to integer registers. 03649 return false; 03650 if (!N->getNumValues()) 03651 return false; 03652 EVT VT = Op.getValueType(); 03653 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 03654 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 03655 // vmrs are very slow, e.g. cortex-a8. 03656 return false; 03657 03658 if (isFloatingPointZero(Op)) { 03659 SeenZero = true; 03660 return true; 03661 } 03662 return ISD::isNormalLoad(N); 03663 } 03664 03665 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 03666 if (isFloatingPointZero(Op)) 03667 return DAG.getConstant(0, MVT::i32); 03668 03669 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 03670 return DAG.getLoad(MVT::i32, SDLoc(Op), 03671 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), 03672 Ld->isVolatile(), Ld->isNonTemporal(), 03673 Ld->isInvariant(), Ld->getAlignment()); 03674 03675 llvm_unreachable("Unknown VFP cmp argument!"); 03676 } 03677 03678 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 03679 SDValue &RetVal1, SDValue &RetVal2) { 03680 if (isFloatingPointZero(Op)) { 03681 RetVal1 = DAG.getConstant(0, MVT::i32); 03682 RetVal2 = DAG.getConstant(0, MVT::i32); 03683 return; 03684 } 03685 03686 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 03687 SDValue Ptr = Ld->getBasePtr(); 03688 RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op), 03689 Ld->getChain(), Ptr, 03690 Ld->getPointerInfo(), 03691 Ld->isVolatile(), Ld->isNonTemporal(), 03692 Ld->isInvariant(), Ld->getAlignment()); 03693 03694 EVT PtrType = Ptr.getValueType(); 03695 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 03696 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op), 03697 PtrType, Ptr, DAG.getConstant(4, PtrType)); 03698 RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op), 03699 Ld->getChain(), NewPtr, 03700 Ld->getPointerInfo().getWithOffset(4), 03701 Ld->isVolatile(), Ld->isNonTemporal(), 03702 Ld->isInvariant(), NewAlign); 03703 return; 03704 } 03705 03706 llvm_unreachable("Unknown VFP cmp argument!"); 03707 } 03708 03709 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 03710 /// f32 and even f64 comparisons to integer ones. 03711 SDValue 03712 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 03713 SDValue Chain = Op.getOperand(0); 03714 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 03715 SDValue LHS = Op.getOperand(2); 03716 SDValue RHS = Op.getOperand(3); 03717 SDValue Dest = Op.getOperand(4); 03718 SDLoc dl(Op); 03719 03720 bool LHSSeenZero = false; 03721 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); 03722 bool RHSSeenZero = false; 03723 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); 03724 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { 03725 // If unsafe fp math optimization is enabled and there are no other uses of 03726 // the CMP operands, and the condition code is EQ or NE, we can optimize it 03727 // to an integer comparison. 03728 if (CC == ISD::SETOEQ) 03729 CC = ISD::SETEQ; 03730 else if (CC == ISD::SETUNE) 03731 CC = ISD::SETNE; 03732 03733 SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32); 03734 SDValue ARMcc; 03735 if (LHS.getValueType() == MVT::f32) { 03736 LHS = DAG.getNode(ISD::AND, dl, MVT::i32, 03737 bitcastf32Toi32(LHS, DAG), Mask); 03738 RHS = DAG.getNode(ISD::AND, dl, MVT::i32, 03739 bitcastf32Toi32(RHS, DAG), Mask); 03740 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 03741 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 03742 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 03743 Chain, Dest, ARMcc, CCR, Cmp); 03744 } 03745 03746 SDValue LHS1, LHS2; 03747 SDValue RHS1, RHS2; 03748 expandf64Toi32(LHS, DAG, LHS1, LHS2); 03749 expandf64Toi32(RHS, DAG, RHS1, RHS2); 03750 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); 03751 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); 03752 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 03753 ARMcc = DAG.getConstant(CondCode, MVT::i32); 03754 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 03755 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 03756 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); 03757 } 03758 03759 return SDValue(); 03760 } 03761 03762 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 03763 SDValue Chain = Op.getOperand(0); 03764 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 03765 SDValue LHS = Op.getOperand(2); 03766 SDValue RHS = Op.getOperand(3); 03767 SDValue Dest = Op.getOperand(4); 03768 SDLoc dl(Op); 03769 03770 if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { 03771 DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, 03772 dl); 03773 03774 // If softenSetCCOperands only returned one value, we should compare it to 03775 // zero. 03776 if (!RHS.getNode()) { 03777 RHS = DAG.getConstant(0, LHS.getValueType()); 03778 CC = ISD::SETNE; 03779 } 03780 } 03781 03782 if (LHS.getValueType() == MVT::i32) { 03783 SDValue ARMcc; 03784 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 03785 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 03786 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 03787 Chain, Dest, ARMcc, CCR, Cmp); 03788 } 03789 03790 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 03791 03792 if (getTargetMachine().Options.UnsafeFPMath && 03793 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 03794 CC == ISD::SETNE || CC == ISD::SETUNE)) { 03795 SDValue Result = OptimizeVFPBrcond(Op, DAG); 03796 if (Result.getNode()) 03797 return Result; 03798 } 03799 03800 ARMCC::CondCodes CondCode, CondCode2; 03801 FPCCToARMCC(CC, CondCode, CondCode2); 03802 03803 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 03804 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 03805 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 03806 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 03807 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 03808 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); 03809 if (CondCode2 != ARMCC::AL) { 03810 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 03811 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 03812 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); 03813 } 03814 return Res; 03815 } 03816 03817 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 03818 SDValue Chain = Op.getOperand(0); 03819 SDValue Table = Op.getOperand(1); 03820 SDValue Index = Op.getOperand(2); 03821 SDLoc dl(Op); 03822 03823 EVT PTy = getPointerTy(); 03824 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 03825 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 03826 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 03827 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 03828 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 03829 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 03830 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 03831 if (Subtarget->isThumb2()) { 03832 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 03833 // which does another jump to the destination. This also makes it easier 03834 // to translate it to TBB / TBH later. 03835 // FIXME: This might not work if the function is extremely large. 03836 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 03837 Addr, Op.getOperand(2), JTI, UId); 03838 } 03839 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 03840 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 03841 MachinePointerInfo::getJumpTable(), 03842 false, false, false, 0); 03843 Chain = Addr.getValue(1); 03844 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 03845 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 03846 } else { 03847 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 03848 MachinePointerInfo::getJumpTable(), 03849 false, false, false, 0); 03850 Chain = Addr.getValue(1); 03851 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 03852 } 03853 } 03854 03855 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 03856 EVT VT = Op.getValueType(); 03857 SDLoc dl(Op); 03858 03859 if (Op.getValueType().getVectorElementType() == MVT::i32) { 03860 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) 03861 return Op; 03862 return DAG.UnrollVectorOp(Op.getNode()); 03863 } 03864 03865 assert(Op.getOperand(0).getValueType() == MVT::v4f32 && 03866 "Invalid type for custom lowering!"); 03867 if (VT != MVT::v4i16) 03868 return DAG.UnrollVectorOp(Op.getNode()); 03869 03870 Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); 03871 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); 03872 } 03873 03874 SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { 03875 EVT VT = Op.getValueType(); 03876 if (VT.isVector()) 03877 return LowerVectorFP_TO_INT(Op, DAG); 03878 03879 if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) { 03880 RTLIB::Libcall LC; 03881 if (Op.getOpcode() == ISD::FP_TO_SINT) 03882 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), 03883 Op.getValueType()); 03884 else 03885 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), 03886 Op.getValueType()); 03887 return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1, 03888 /*isSigned*/ false, SDLoc(Op)).first; 03889 } 03890 03891 SDLoc dl(Op); 03892 unsigned Opc; 03893 03894 switch (Op.getOpcode()) { 03895 default: llvm_unreachable("Invalid opcode!"); 03896 case ISD::FP_TO_SINT: 03897 Opc = ARMISD::FTOSI; 03898 break; 03899 case ISD::FP_TO_UINT: 03900 Opc = ARMISD::FTOUI; 03901 break; 03902 } 03903 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 03904 return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 03905 } 03906 03907 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 03908 EVT VT = Op.getValueType(); 03909 SDLoc dl(Op); 03910 03911 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { 03912 if (VT.getVectorElementType() == MVT::f32) 03913 return Op; 03914 return DAG.UnrollVectorOp(Op.getNode()); 03915 } 03916 03917 assert(Op.getOperand(0).getValueType() == MVT::v4i16 && 03918 "Invalid type for custom lowering!"); 03919 if (VT != MVT::v4f32) 03920 return DAG.UnrollVectorOp(Op.getNode()); 03921 03922 unsigned CastOpc; 03923 unsigned Opc; 03924 switch (Op.getOpcode()) { 03925 default: llvm_unreachable("Invalid opcode!"); 03926 case ISD::SINT_TO_FP: 03927 CastOpc = ISD::SIGN_EXTEND; 03928 Opc = ISD::SINT_TO_FP; 03929 break; 03930 case ISD::UINT_TO_FP: 03931 CastOpc = ISD::ZERO_EXTEND; 03932 Opc = ISD::UINT_TO_FP; 03933 break; 03934 } 03935 03936 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); 03937 return DAG.getNode(Opc, dl, VT, Op); 03938 } 03939 03940 SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { 03941 EVT VT = Op.getValueType(); 03942 if (VT.isVector()) 03943 return LowerVectorINT_TO_FP(Op, DAG); 03944 03945 if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) { 03946 RTLIB::Libcall LC; 03947 if (Op.getOpcode() == ISD::SINT_TO_FP) 03948 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), 03949 Op.getValueType()); 03950 else 03951 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), 03952 Op.getValueType()); 03953 return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1, 03954 /*isSigned*/ false, SDLoc(Op)).first; 03955 } 03956 03957 SDLoc dl(Op); 03958 unsigned Opc; 03959 03960 switch (Op.getOpcode()) { 03961 default: llvm_unreachable("Invalid opcode!"); 03962 case ISD::SINT_TO_FP: 03963 Opc = ARMISD::SITOF; 03964 break; 03965 case ISD::UINT_TO_FP: 03966 Opc = ARMISD::UITOF; 03967 break; 03968 } 03969 03970 Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); 03971 return DAG.getNode(Opc, dl, VT, Op); 03972 } 03973 03974 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 03975 // Implement fcopysign with a fabs and a conditional fneg. 03976 SDValue Tmp0 = Op.getOperand(0); 03977 SDValue Tmp1 = Op.getOperand(1); 03978 SDLoc dl(Op); 03979 EVT VT = Op.getValueType(); 03980 EVT SrcVT = Tmp1.getValueType(); 03981 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || 03982 Tmp0.getOpcode() == ARMISD::VMOVDRR; 03983 bool UseNEON = !InGPR && Subtarget->hasNEON(); 03984 03985 if (UseNEON) { 03986 // Use VBSL to copy the sign bit. 03987 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); 03988 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, 03989 DAG.getTargetConstant(EncodedVal, MVT::i32)); 03990 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; 03991 if (VT == MVT::f64) 03992 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, 03993 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), 03994 DAG.getConstant(32, MVT::i32)); 03995 else /*if (VT == MVT::f32)*/ 03996 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); 03997 if (SrcVT == MVT::f32) { 03998 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); 03999 if (VT == MVT::f64) 04000 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, 04001 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), 04002 DAG.getConstant(32, MVT::i32)); 04003 } else if (VT == MVT::f32) 04004 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, 04005 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), 04006 DAG.getConstant(32, MVT::i32)); 04007 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); 04008 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); 04009 04010 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), 04011 MVT::i32); 04012 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); 04013 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, 04014 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); 04015 04016 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, 04017 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), 04018 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); 04019 if (VT == MVT::f32) { 04020 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); 04021 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, 04022 DAG.getConstant(0, MVT::i32)); 04023 } else { 04024 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); 04025 } 04026 04027 return Res; 04028 } 04029 04030 // Bitcast operand 1 to i32. 04031 if (SrcVT == MVT::f64) 04032 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 04033 Tmp1).getValue(1); 04034 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); 04035 04036 // Or in the signbit with integer operations. 04037 SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); 04038 SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); 04039 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); 04040 if (VT == MVT::f32) { 04041 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, 04042 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); 04043 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 04044 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); 04045 } 04046 04047 // f64: Or the high part with signbit and then combine two parts. 04048 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 04049 Tmp0); 04050 SDValue Lo = Tmp0.getValue(0); 04051 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); 04052 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); 04053 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 04054 } 04055 04056 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 04057 MachineFunction &MF = DAG.getMachineFunction(); 04058 MachineFrameInfo *MFI = MF.getFrameInfo(); 04059 MFI->setReturnAddressIsTaken(true); 04060 04061 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 04062 return SDValue(); 04063 04064 EVT VT = Op.getValueType(); 04065 SDLoc dl(Op); 04066 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 04067 if (Depth) { 04068 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 04069 SDValue Offset = DAG.getConstant(4, MVT::i32); 04070 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 04071 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 04072 MachinePointerInfo(), false, false, false, 0); 04073 } 04074 04075 // Return LR, which contains the return address. Mark it an implicit live-in. 04076 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 04077 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 04078 } 04079 04080 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 04081 const ARMBaseRegisterInfo &ARI = 04082 *static_cast<const ARMBaseRegisterInfo*>(RegInfo); 04083 MachineFunction &MF = DAG.getMachineFunction(); 04084 MachineFrameInfo *MFI = MF.getFrameInfo(); 04085 MFI->setFrameAddressIsTaken(true); 04086 04087 EVT VT = Op.getValueType(); 04088 SDLoc dl(Op); // FIXME probably not meaningful 04089 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 04090 unsigned FrameReg = ARI.getFrameRegister(MF); 04091 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 04092 while (Depth--) 04093 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, 04094 MachinePointerInfo(), 04095 false, false, false, 0); 04096 return FrameAddr; 04097 } 04098 04099 // FIXME? Maybe this could be a TableGen attribute on some registers and 04100 // this table could be generated automatically from RegInfo. 04101 unsigned ARMTargetLowering::getRegisterByName(const char* RegName, 04102 EVT VT) const { 04103 unsigned Reg = StringSwitch<unsigned>(RegName) 04104 .Case("sp", ARM::SP) 04105 .Default(0); 04106 if (Reg) 04107 return Reg; 04108 report_fatal_error("Invalid register name global variable"); 04109 } 04110 04111 /// ExpandBITCAST - If the target supports VFP, this function is called to 04112 /// expand a bit convert where either the source or destination type is i64 to 04113 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 04114 /// operand type is illegal (e.g., v2f32 for a target that doesn't support 04115 /// vectors), since the legalizer won't know what to do with that. 04116 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { 04117 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 04118 SDLoc dl(N); 04119 SDValue Op = N->getOperand(0); 04120 04121 // This function is only supposed to be called for i64 types, either as the 04122 // source or destination of the bit convert. 04123 EVT SrcVT = Op.getValueType(); 04124 EVT DstVT = N->getValueType(0); 04125 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 04126 "ExpandBITCAST called for non-i64 type"); 04127 04128 // Turn i64->f64 into VMOVDRR. 04129 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 04130 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 04131 DAG.getConstant(0, MVT::i32)); 04132 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 04133 DAG.getConstant(1, MVT::i32)); 04134 return DAG.getNode(ISD::BITCAST, dl, DstVT, 04135 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 04136 } 04137 04138 // Turn f64->i64 into VMOVRRD. 04139 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 04140 SDValue Cvt; 04141 if (TLI.isBigEndian() && SrcVT.isVector() && 04142 SrcVT.getVectorNumElements() > 1) 04143 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 04144 DAG.getVTList(MVT::i32, MVT::i32), 04145 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op)); 04146 else 04147 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 04148 DAG.getVTList(MVT::i32, MVT::i32), Op); 04149 // Merge the pieces into a single i64 value. 04150 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 04151 } 04152 04153 return SDValue(); 04154 } 04155 04156 /// getZeroVector - Returns a vector of specified type with all zero elements. 04157 /// Zero vectors are used to represent vector negation and in those cases 04158 /// will be implemented with the NEON VNEG instruction. However, VNEG does 04159 /// not support i64 elements, so sometimes the zero vectors will need to be 04160 /// explicitly constructed. Regardless, use a canonical VMOV to create the 04161 /// zero vector. 04162 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) { 04163 assert(VT.isVector() && "Expected a vector type"); 04164 // The canonical modified immediate encoding of a zero vector is....0! 04165 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 04166 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 04167 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 04168 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 04169 } 04170 04171 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two 04172 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 04173 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 04174 SelectionDAG &DAG) const { 04175 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 04176 EVT VT = Op.getValueType(); 04177 unsigned VTBits = VT.getSizeInBits(); 04178 SDLoc dl(Op); 04179 SDValue ShOpLo = Op.getOperand(0); 04180 SDValue ShOpHi = Op.getOperand(1); 04181 SDValue ShAmt = Op.getOperand(2); 04182 SDValue ARMcc; 04183 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 04184 04185 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 04186 04187 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 04188 DAG.getConstant(VTBits, MVT::i32), ShAmt); 04189 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 04190 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 04191 DAG.getConstant(VTBits, MVT::i32)); 04192 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 04193 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 04194 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 04195 04196 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 04197 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 04198 ARMcc, DAG, dl); 04199 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 04200 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 04201 CCR, Cmp); 04202 04203 SDValue Ops[2] = { Lo, Hi }; 04204 return DAG.getMergeValues(Ops, dl); 04205 } 04206 04207 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 04208 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 04209 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 04210 SelectionDAG &DAG) const { 04211 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 04212 EVT VT = Op.getValueType(); 04213 unsigned VTBits = VT.getSizeInBits(); 04214 SDLoc dl(Op); 04215 SDValue ShOpLo = Op.getOperand(0); 04216 SDValue ShOpHi = Op.getOperand(1); 04217 SDValue ShAmt = Op.getOperand(2); 04218 SDValue ARMcc; 04219 04220 assert(Op.getOpcode() == ISD::SHL_PARTS); 04221 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 04222 DAG.getConstant(VTBits, MVT::i32), ShAmt); 04223 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 04224 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 04225 DAG.getConstant(VTBits, MVT::i32)); 04226 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 04227 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 04228 04229 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 04230 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 04231 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 04232 ARMcc, DAG, dl); 04233 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 04234 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 04235 CCR, Cmp); 04236 04237 SDValue Ops[2] = { Lo, Hi }; 04238 return DAG.getMergeValues(Ops, dl); 04239 } 04240 04241 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 04242 SelectionDAG &DAG) const { 04243 // The rounding mode is in bits 23:22 of the FPSCR. 04244 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 04245 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 04246 // so that the shift + and get folded into a bitfield extract. 04247 SDLoc dl(Op); 04248 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 04249 DAG.getConstant(Intrinsic::arm_get_fpscr, 04250 MVT::i32)); 04251 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 04252 DAG.getConstant(1U << 22, MVT::i32)); 04253 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 04254 DAG.getConstant(22, MVT::i32)); 04255 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 04256 DAG.getConstant(3, MVT::i32)); 04257 } 04258 04259 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 04260 const ARMSubtarget *ST) { 04261 EVT VT = N->getValueType(0); 04262 SDLoc dl(N); 04263 04264 if (!ST->hasV6T2Ops()) 04265 return SDValue(); 04266 04267 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 04268 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 04269 } 04270 04271 /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count 04272 /// for each 16-bit element from operand, repeated. The basic idea is to 04273 /// leverage vcnt to get the 8-bit counts, gather and add the results. 04274 /// 04275 /// Trace for v4i16: 04276 /// input = [v0 v1 v2 v3 ] (vi 16-bit element) 04277 /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element) 04278 /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi) 04279 /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6] 04280 /// [b0 b1 b2 b3 b4 b5 b6 b7] 04281 /// +[b1 b0 b3 b2 b5 b4 b7 b6] 04282 /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0, 04283 /// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits) 04284 static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { 04285 EVT VT = N->getValueType(0); 04286 SDLoc DL(N); 04287 04288 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; 04289 SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0)); 04290 SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0); 04291 SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1); 04292 SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2); 04293 return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3); 04294 } 04295 04296 /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the 04297 /// bit-count for each 16-bit element from the operand. We need slightly 04298 /// different sequencing for v4i16 and v8i16 to stay within NEON's available 04299 /// 64/128-bit registers. 04300 /// 04301 /// Trace for v4i16: 04302 /// input = [v0 v1 v2 v3 ] (vi 16-bit element) 04303 /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi) 04304 /// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ] 04305 /// v4i16:Extracted = [k0 k1 k2 k3 ] 04306 static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { 04307 EVT VT = N->getValueType(0); 04308 SDLoc DL(N); 04309 04310 SDValue BitCounts = getCTPOP16BitCounts(N, DAG); 04311 if (VT.is64BitVector()) { 04312 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts); 04313 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended, 04314 DAG.getIntPtrConstant(0)); 04315 } else { 04316 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, 04317 BitCounts, DAG.getIntPtrConstant(0)); 04318 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted); 04319 } 04320 } 04321 04322 /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the 04323 /// bit-count for each 32-bit element from the operand. The idea here is 04324 /// to split the vector into 16-bit elements, leverage the 16-bit count 04325 /// routine, and then combine the results. 04326 /// 04327 /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged): 04328 /// input = [v0 v1 ] (vi: 32-bit elements) 04329 /// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1]) 04330 /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi) 04331 /// vrev: N0 = [k1 k0 k3 k2 ] 04332 /// [k0 k1 k2 k3 ] 04333 /// N1 =+[k1 k0 k3 k2 ] 04334 /// [k0 k2 k1 k3 ] 04335 /// N2 =+[k1 k3 k0 k2 ] 04336 /// [k0 k2 k1 k3 ] 04337 /// Extended =+[k1 k3 k0 k2 ] 04338 /// [k0 k2 ] 04339 /// Extracted=+[k1 k3 ] 04340 /// 04341 static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) { 04342 EVT VT = N->getValueType(0); 04343 SDLoc DL(N); 04344 04345 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; 04346 04347 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0)); 04348 SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG); 04349 SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16); 04350 SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0); 04351 SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1); 04352 04353 if (VT.is64BitVector()) { 04354 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2); 04355 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended, 04356 DAG.getIntPtrConstant(0)); 04357 } else { 04358 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2, 04359 DAG.getIntPtrConstant(0)); 04360 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted); 04361 } 04362 } 04363 04364 static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, 04365 const ARMSubtarget *ST) { 04366 EVT VT = N->getValueType(0); 04367 04368 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON."); 04369 assert((VT == MVT::v2i32 || VT == MVT::v4i32 || 04370 VT == MVT::v4i16 || VT == MVT::v8i16) && 04371 "Unexpected type for custom ctpop lowering"); 04372 04373 if (VT.getVectorElementType() == MVT::i32) 04374 return lowerCTPOP32BitElements(N, DAG); 04375 else 04376 return lowerCTPOP16BitElements(N, DAG); 04377 } 04378 04379 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 04380 const ARMSubtarget *ST) { 04381 EVT VT = N->getValueType(0); 04382 SDLoc dl(N); 04383 04384 if (!VT.isVector()) 04385 return SDValue(); 04386 04387 // Lower vector shifts on NEON to use VSHL. 04388 assert(ST->hasNEON() && "unexpected vector shift"); 04389 04390 // Left shifts translate directly to the vshiftu intrinsic. 04391 if (N->getOpcode() == ISD::SHL) 04392 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 04393 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 04394 N->getOperand(0), N->getOperand(1)); 04395 04396 assert((N->getOpcode() == ISD::SRA || 04397 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 04398 04399 // NEON uses the same intrinsics for both left and right shifts. For 04400 // right shifts, the shift amounts are negative, so negate the vector of 04401 // shift amounts. 04402 EVT ShiftVT = N->getOperand(1).getValueType(); 04403 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 04404 getZeroVector(ShiftVT, DAG, dl), 04405 N->getOperand(1)); 04406 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 04407 Intrinsic::arm_neon_vshifts : 04408 Intrinsic::arm_neon_vshiftu); 04409 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 04410 DAG.getConstant(vshiftInt, MVT::i32), 04411 N->getOperand(0), NegatedCount); 04412 } 04413 04414 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, 04415 const ARMSubtarget *ST) { 04416 EVT VT = N->getValueType(0); 04417 SDLoc dl(N); 04418 04419 // We can get here for a node like i32 = ISD::SHL i32, i64 04420 if (VT != MVT::i64) 04421 return SDValue(); 04422 04423 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 04424 "Unknown shift to lower!"); 04425 04426 // We only lower SRA, SRL of 1 here, all others use generic lowering. 04427 if (!isa<ConstantSDNode>(N->getOperand(1)) || 04428 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 04429 return SDValue(); 04430 04431 // If we are in thumb mode, we don't have RRX. 04432 if (ST->isThumb1Only()) return SDValue(); 04433 04434 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 04435 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 04436 DAG.getConstant(0, MVT::i32)); 04437 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 04438 DAG.getConstant(1, MVT::i32)); 04439 04440 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 04441 // captures the result into a carry flag. 04442 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 04443 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi); 04444 04445 // The low part is an ARMISD::RRX operand, which shifts the carry in. 04446 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 04447 04448 // Merge the pieces into a single i64 value. 04449 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 04450 } 04451 04452 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 04453 SDValue TmpOp0, TmpOp1; 04454 bool Invert = false; 04455 bool Swap = false; 04456 unsigned Opc = 0; 04457 04458 SDValue Op0 = Op.getOperand(0); 04459 SDValue Op1 = Op.getOperand(1); 04460 SDValue CC = Op.getOperand(2); 04461 EVT VT = Op.getValueType(); 04462 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 04463 SDLoc dl(Op); 04464 04465 if (Op1.getValueType().isFloatingPoint()) { 04466 switch (SetCCOpcode) { 04467 default: llvm_unreachable("Illegal FP comparison"); 04468 case ISD::SETUNE: 04469 case ISD::SETNE: Invert = true; // Fallthrough 04470 case ISD::SETOEQ: 04471 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 04472 case ISD::SETOLT: 04473 case ISD::SETLT: Swap = true; // Fallthrough 04474 case ISD::SETOGT: 04475 case ISD::SETGT: Opc = ARMISD::VCGT; break; 04476 case ISD::SETOLE: 04477 case ISD::SETLE: Swap = true; // Fallthrough 04478 case ISD::SETOGE: 04479 case ISD::SETGE: Opc = ARMISD::VCGE; break; 04480 case ISD::SETUGE: Swap = true; // Fallthrough 04481 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 04482 case ISD::SETUGT: Swap = true; // Fallthrough 04483 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 04484 case ISD::SETUEQ: Invert = true; // Fallthrough 04485 case ISD::SETONE: 04486 // Expand this to (OLT | OGT). 04487 TmpOp0 = Op0; 04488 TmpOp1 = Op1; 04489 Opc = ISD::OR; 04490 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 04491 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 04492 break; 04493 case ISD::SETUO: Invert = true; // Fallthrough 04494 case ISD::SETO: 04495 // Expand this to (OLT | OGE). 04496 TmpOp0 = Op0; 04497 TmpOp1 = Op1; 04498 Opc = ISD::OR; 04499 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 04500 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 04501 break; 04502 } 04503 } else { 04504 // Integer comparisons. 04505 switch (SetCCOpcode) { 04506 default: llvm_unreachable("Illegal integer comparison"); 04507 case ISD::SETNE: Invert = true; 04508 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 04509 case ISD::SETLT: Swap = true; 04510 case ISD::SETGT: Opc = ARMISD::VCGT; break; 04511 case ISD::SETLE: Swap = true; 04512 case ISD::SETGE: Opc = ARMISD::VCGE; break; 04513 case ISD::SETULT: Swap = true; 04514 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 04515 case ISD::SETULE: Swap = true; 04516 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 04517 } 04518 04519 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 04520 if (Opc == ARMISD::VCEQ) { 04521 04522 SDValue AndOp; 04523 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 04524 AndOp = Op0; 04525 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 04526 AndOp = Op1; 04527 04528 // Ignore bitconvert. 04529 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) 04530 AndOp = AndOp.getOperand(0); 04531 04532 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 04533 Opc = ARMISD::VTST; 04534 Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); 04535 Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); 04536 Invert = !Invert; 04537 } 04538 } 04539 } 04540 04541 if (Swap) 04542 std::swap(Op0, Op1); 04543 04544 // If one of the operands is a constant vector zero, attempt to fold the 04545 // comparison to a specialized compare-against-zero form. 04546 SDValue SingleOp; 04547 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 04548 SingleOp = Op0; 04549 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 04550 if (Opc == ARMISD::VCGE) 04551 Opc = ARMISD::VCLEZ; 04552 else if (Opc == ARMISD::VCGT) 04553 Opc = ARMISD::VCLTZ; 04554 SingleOp = Op1; 04555 } 04556 04557 SDValue Result; 04558 if (SingleOp.getNode()) { 04559 switch (Opc) { 04560 case ARMISD::VCEQ: 04561 Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; 04562 case ARMISD::VCGE: 04563 Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; 04564 case ARMISD::VCLEZ: 04565 Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; 04566 case ARMISD::VCGT: 04567 Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; 04568 case ARMISD::VCLTZ: 04569 Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; 04570 default: 04571 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 04572 } 04573 } else { 04574 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 04575 } 04576 04577 if (Invert) 04578 Result = DAG.getNOT(dl, Result, VT); 04579 04580 return Result; 04581 } 04582 04583 /// isNEONModifiedImm - Check if the specified splat value corresponds to a 04584 /// valid vector constant for a NEON instruction with a "modified immediate" 04585 /// operand (e.g., VMOV). If so, return the encoded value. 04586 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 04587 unsigned SplatBitSize, SelectionDAG &DAG, 04588 EVT &VT, bool is128Bits, NEONModImmType type) { 04589 unsigned OpCmode, Imm; 04590 04591 // SplatBitSize is set to the smallest size that splats the vector, so a 04592 // zero vector will always have SplatBitSize == 8. However, NEON modified 04593 // immediate instructions others than VMOV do not support the 8-bit encoding 04594 // of a zero vector, and the default encoding of zero is supposed to be the 04595 // 32-bit version. 04596 if (SplatBits == 0) 04597 SplatBitSize = 32; 04598 04599 switch (SplatBitSize) { 04600 case 8: 04601 if (type != VMOVModImm) 04602 return SDValue(); 04603 // Any 1-byte value is OK. Op=0, Cmode=1110. 04604 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 04605 OpCmode = 0xe; 04606 Imm = SplatBits; 04607 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 04608 break; 04609 04610 case 16: 04611 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 04612 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 04613 if ((SplatBits & ~0xff) == 0) { 04614 // Value = 0x00nn: Op=x, Cmode=100x. 04615 OpCmode = 0x8; 04616 Imm = SplatBits; 04617 break; 04618 } 04619 if ((SplatBits & ~0xff00) == 0) { 04620 // Value = 0xnn00: Op=x, Cmode=101x. 04621 OpCmode = 0xa; 04622 Imm = SplatBits >> 8; 04623 break; 04624 } 04625 return SDValue(); 04626 04627 case 32: 04628 // NEON's 32-bit VMOV supports splat values where: 04629 // * only one byte is nonzero, or 04630 // * the least significant byte is 0xff and the second byte is nonzero, or 04631 // * the least significant 2 bytes are 0xff and the third is nonzero. 04632 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 04633 if ((SplatBits & ~0xff) == 0) { 04634 // Value = 0x000000nn: Op=x, Cmode=000x. 04635 OpCmode = 0; 04636 Imm = SplatBits; 04637 break; 04638 } 04639 if ((SplatBits & ~0xff00) == 0) { 04640 // Value = 0x0000nn00: Op=x, Cmode=001x. 04641 OpCmode = 0x2; 04642 Imm = SplatBits >> 8; 04643 break; 04644 } 04645 if ((SplatBits & ~0xff0000) == 0) { 04646 // Value = 0x00nn0000: Op=x, Cmode=010x. 04647 OpCmode = 0x4; 04648 Imm = SplatBits >> 16; 04649 break; 04650 } 04651 if ((SplatBits & ~0xff000000) == 0) { 04652 // Value = 0xnn000000: Op=x, Cmode=011x. 04653 OpCmode = 0x6; 04654 Imm = SplatBits >> 24; 04655 break; 04656 } 04657 04658 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC 04659 if (type == OtherModImm) return SDValue(); 04660 04661 if ((SplatBits & ~0xffff) == 0 && 04662 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 04663 // Value = 0x0000nnff: Op=x, Cmode=1100. 04664 OpCmode = 0xc; 04665 Imm = SplatBits >> 8; 04666 break; 04667 } 04668 04669 if ((SplatBits & ~0xffffff) == 0 && 04670 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 04671 // Value = 0x00nnffff: Op=x, Cmode=1101. 04672 OpCmode = 0xd; 04673 Imm = SplatBits >> 16; 04674 break; 04675 } 04676 04677 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 04678 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 04679 // VMOV.I32. A (very) minor optimization would be to replicate the value 04680 // and fall through here to test for a valid 64-bit splat. But, then the 04681 // caller would also need to check and handle the change in size. 04682 return SDValue(); 04683 04684 case 64: { 04685 if (type != VMOVModImm) 04686 return SDValue(); 04687 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 04688 uint64_t BitMask = 0xff; 04689 uint64_t Val = 0; 04690 unsigned ImmMask = 1; 04691 Imm = 0; 04692 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 04693 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 04694 Val |= BitMask; 04695 Imm |= ImmMask; 04696 } else if ((SplatBits & BitMask) != 0) { 04697 return SDValue(); 04698 } 04699 BitMask <<= 8; 04700 ImmMask <<= 1; 04701 } 04702 04703 if (DAG.getTargetLoweringInfo().isBigEndian()) 04704 // swap higher and lower 32 bit word 04705 Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4); 04706 04707 // Op=1, Cmode=1110. 04708 OpCmode = 0x1e; 04709 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 04710 break; 04711 } 04712 04713 default: 04714 llvm_unreachable("unexpected size for isNEONModifiedImm"); 04715 } 04716 04717 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 04718 return DAG.getTargetConstant(EncodedVal, MVT::i32); 04719 } 04720 04721 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, 04722 const ARMSubtarget *ST) const { 04723 if (!ST->hasVFP3()) 04724 return SDValue(); 04725 04726 bool IsDouble = Op.getValueType() == MVT::f64; 04727 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); 04728 04729 // Use the default (constant pool) lowering for double constants when we have 04730 // an SP-only FPU 04731 if (IsDouble && Subtarget->isFPOnlySP()) 04732 return SDValue(); 04733 04734 // Try splatting with a VMOV.f32... 04735 APFloat FPVal = CFP->getValueAPF(); 04736 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); 04737 04738 if (ImmVal != -1) { 04739 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) { 04740 // We have code in place to select a valid ConstantFP already, no need to 04741 // do any mangling. 04742 return Op; 04743 } 04744 04745 // It's a float and we are trying to use NEON operations where 04746 // possible. Lower it to a splat followed by an extract. 04747 SDLoc DL(Op); 04748 SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); 04749 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, 04750 NewVal); 04751 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, 04752 DAG.getConstant(0, MVT::i32)); 04753 } 04754 04755 // The rest of our options are NEON only, make sure that's allowed before 04756 // proceeding.. 04757 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP())) 04758 return SDValue(); 04759 04760 EVT VMovVT; 04761 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue(); 04762 04763 // It wouldn't really be worth bothering for doubles except for one very 04764 // important value, which does happen to match: 0.0. So make sure we don't do 04765 // anything stupid. 04766 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32)) 04767 return SDValue(); 04768 04769 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too). 04770 SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT, 04771 false, VMOVModImm); 04772 if (NewVal != SDValue()) { 04773 SDLoc DL(Op); 04774 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, 04775 NewVal); 04776 if (IsDouble) 04777 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); 04778 04779 // It's a float: cast and extract a vector element. 04780 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, 04781 VecConstant); 04782 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, 04783 DAG.getConstant(0, MVT::i32)); 04784 } 04785 04786 // Finally, try a VMVN.i32 04787 NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT, 04788 false, VMVNModImm); 04789 if (NewVal != SDValue()) { 04790 SDLoc DL(Op); 04791 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); 04792 04793 if (IsDouble) 04794 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); 04795 04796 // It's a float: cast and extract a vector element. 04797 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, 04798 VecConstant); 04799 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, 04800 DAG.getConstant(0, MVT::i32)); 04801 } 04802 04803 return SDValue(); 04804 } 04805 04806 // check if an VEXT instruction can handle the shuffle mask when the 04807 // vector sources of the shuffle are the same. 04808 static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) { 04809 unsigned NumElts = VT.getVectorNumElements(); 04810 04811 // Assume that the first shuffle index is not UNDEF. Fail if it is. 04812 if (M[0] < 0) 04813 return false; 04814 04815 Imm = M[0]; 04816 04817 // If this is a VEXT shuffle, the immediate value is the index of the first 04818 // element. The other shuffle indices must be the successive elements after 04819 // the first one. 04820 unsigned ExpectedElt = Imm; 04821 for (unsigned i = 1; i < NumElts; ++i) { 04822 // Increment the expected index. If it wraps around, just follow it 04823 // back to index zero and keep going. 04824 ++ExpectedElt; 04825 if (ExpectedElt == NumElts) 04826 ExpectedElt = 0; 04827 04828 if (M[i] < 0) continue; // ignore UNDEF indices 04829 if (ExpectedElt != static_cast<unsigned>(M[i])) 04830 return false; 04831 } 04832 04833 return true; 04834 } 04835 04836 04837 static bool isVEXTMask(ArrayRef<int> M, EVT VT, 04838 bool &ReverseVEXT, unsigned &Imm) { 04839 unsigned NumElts = VT.getVectorNumElements(); 04840 ReverseVEXT = false; 04841 04842 // Assume that the first shuffle index is not UNDEF. Fail if it is. 04843 if (M[0] < 0) 04844 return false; 04845 04846 Imm = M[0]; 04847 04848 // If this is a VEXT shuffle, the immediate value is the index of the first 04849 // element. The other shuffle indices must be the successive elements after 04850 // the first one. 04851 unsigned ExpectedElt = Imm; 04852 for (unsigned i = 1; i < NumElts; ++i) { 04853 // Increment the expected index. If it wraps around, it may still be 04854 // a VEXT but the source vectors must be swapped. 04855 ExpectedElt += 1; 04856 if (ExpectedElt == NumElts * 2) { 04857 ExpectedElt = 0; 04858 ReverseVEXT = true; 04859 } 04860 04861 if (M[i] < 0) continue; // ignore UNDEF indices 04862 if (ExpectedElt != static_cast<unsigned>(M[i])) 04863 return false; 04864 } 04865 04866 // Adjust the index value if the source operands will be swapped. 04867 if (ReverseVEXT) 04868 Imm -= NumElts; 04869 04870 return true; 04871 } 04872 04873 /// isVREVMask - Check if a vector shuffle corresponds to a VREV 04874 /// instruction with the specified blocksize. (The order of the elements 04875 /// within each block of the vector is reversed.) 04876 static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { 04877 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 04878 "Only possible block sizes for VREV are: 16, 32, 64"); 04879 04880 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 04881 if (EltSz == 64) 04882 return false; 04883 04884 unsigned NumElts = VT.getVectorNumElements(); 04885 unsigned BlockElts = M[0] + 1; 04886 // If the first shuffle index is UNDEF, be optimistic. 04887 if (M[0] < 0) 04888 BlockElts = BlockSize / EltSz; 04889 04890 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 04891 return false; 04892 04893 for (unsigned i = 0; i < NumElts; ++i) { 04894 if (M[i] < 0) continue; // ignore UNDEF indices 04895 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 04896 return false; 04897 } 04898 04899 return true; 04900 } 04901 04902 static bool isVTBLMask(ArrayRef<int> M, EVT VT) { 04903 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of 04904 // range, then 0 is placed into the resulting vector. So pretty much any mask 04905 // of 8 elements can work here. 04906 return VT == MVT::v8i8 && M.size() == 8; 04907 } 04908 04909 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 04910 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 04911 if (EltSz == 64) 04912 return false; 04913 04914 unsigned NumElts = VT.getVectorNumElements(); 04915 WhichResult = (M[0] == 0 ? 0 : 1); 04916 for (unsigned i = 0; i < NumElts; i += 2) { 04917 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 04918 (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) 04919 return false; 04920 } 04921 return true; 04922 } 04923 04924 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 04925 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 04926 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 04927 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 04928 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 04929 if (EltSz == 64) 04930 return false; 04931 04932 unsigned NumElts = VT.getVectorNumElements(); 04933 WhichResult = (M[0] == 0 ? 0 : 1); 04934 for (unsigned i = 0; i < NumElts; i += 2) { 04935 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 04936 (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) 04937 return false; 04938 } 04939 return true; 04940 } 04941 04942 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 04943 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 04944 if (EltSz == 64) 04945 return false; 04946 04947 unsigned NumElts = VT.getVectorNumElements(); 04948 WhichResult = (M[0] == 0 ? 0 : 1); 04949 for (unsigned i = 0; i != NumElts; ++i) { 04950 if (M[i] < 0) continue; // ignore UNDEF indices 04951 if ((unsigned) M[i] != 2 * i + WhichResult) 04952 return false; 04953 } 04954 04955 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 04956 if (VT.is64BitVector() && EltSz == 32) 04957 return false; 04958 04959 return true; 04960 } 04961 04962 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 04963 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 04964 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 04965 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 04966 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 04967 if (EltSz == 64) 04968 return false; 04969 04970 unsigned Half = VT.getVectorNumElements() / 2; 04971 WhichResult = (M[0] == 0 ? 0 : 1); 04972 for (unsigned j = 0; j != 2; ++j) { 04973 unsigned Idx = WhichResult; 04974 for (unsigned i = 0; i != Half; ++i) { 04975 int MIdx = M[i + j * Half]; 04976 if (MIdx >= 0 && (unsigned) MIdx != Idx) 04977 return false; 04978 Idx += 2; 04979 } 04980 } 04981 04982 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 04983 if (VT.is64BitVector() && EltSz == 32) 04984 return false; 04985 04986 return true; 04987 } 04988 04989 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 04990 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 04991 if (EltSz == 64) 04992 return false; 04993 04994 unsigned NumElts = VT.getVectorNumElements(); 04995 WhichResult = (M[0] == 0 ? 0 : 1); 04996 unsigned Idx = WhichResult * NumElts / 2; 04997 for (unsigned i = 0; i != NumElts; i += 2) { 04998 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 04999 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) 05000 return false; 05001 Idx += 1; 05002 } 05003 05004 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 05005 if (VT.is64BitVector() && EltSz == 32) 05006 return false; 05007 05008 return true; 05009 } 05010 05011 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 05012 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 05013 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 05014 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 05015 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 05016 if (EltSz == 64) 05017 return false; 05018 05019 unsigned NumElts = VT.getVectorNumElements(); 05020 WhichResult = (M[0] == 0 ? 0 : 1); 05021 unsigned Idx = WhichResult * NumElts / 2; 05022 for (unsigned i = 0; i != NumElts; i += 2) { 05023 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 05024 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) 05025 return false; 05026 Idx += 1; 05027 } 05028 05029 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 05030 if (VT.is64BitVector() && EltSz == 32) 05031 return false; 05032 05033 return true; 05034 } 05035 05036 /// \return true if this is a reverse operation on an vector. 05037 static bool isReverseMask(ArrayRef<int> M, EVT VT) { 05038 unsigned NumElts = VT.getVectorNumElements(); 05039 // Make sure the mask has the right size. 05040 if (NumElts != M.size()) 05041 return false; 05042 05043 // Look for <15, ..., 3, -1, 1, 0>. 05044 for (unsigned i = 0; i != NumElts; ++i) 05045 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i)) 05046 return false; 05047 05048 return true; 05049 } 05050 05051 // If N is an integer constant that can be moved into a register in one 05052 // instruction, return an SDValue of such a constant (will become a MOV 05053 // instruction). Otherwise return null. 05054 static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, 05055 const ARMSubtarget *ST, SDLoc dl) { 05056 uint64_t Val; 05057 if (!isa<ConstantSDNode>(N)) 05058 return SDValue(); 05059 Val = cast<ConstantSDNode>(N)->getZExtValue(); 05060 05061 if (ST->isThumb1Only()) { 05062 if (Val <= 255 || ~Val <= 255) 05063 return DAG.getConstant(Val, MVT::i32); 05064 } else { 05065 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) 05066 return DAG.getConstant(Val, MVT::i32); 05067 } 05068 return SDValue(); 05069 } 05070 05071 // If this is a case we can't handle, return null and let the default 05072 // expansion code take care of it. 05073 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 05074 const ARMSubtarget *ST) const { 05075 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 05076 SDLoc dl(Op); 05077 EVT VT = Op.getValueType(); 05078 05079 APInt SplatBits, SplatUndef; 05080 unsigned SplatBitSize; 05081 bool HasAnyUndefs; 05082 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 05083 if (SplatBitSize <= 64) { 05084 // Check if an immediate VMOV works. 05085 EVT VmovVT; 05086 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 05087 SplatUndef.getZExtValue(), SplatBitSize, 05088 DAG, VmovVT, VT.is128BitVector(), 05089 VMOVModImm); 05090 if (Val.getNode()) { 05091 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 05092 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 05093 } 05094 05095 // Try an immediate VMVN. 05096 uint64_t NegatedImm = (~SplatBits).getZExtValue(); 05097 Val = isNEONModifiedImm(NegatedImm, 05098 SplatUndef.getZExtValue(), SplatBitSize, 05099 DAG, VmovVT, VT.is128BitVector(), 05100 VMVNModImm); 05101 if (Val.getNode()) { 05102 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 05103 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 05104 } 05105 05106 // Use vmov.f32 to materialize other v2f32 and v4f32 splats. 05107 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { 05108 int ImmVal = ARM_AM::getFP32Imm(SplatBits); 05109 if (ImmVal != -1) { 05110 SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); 05111 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); 05112 } 05113 } 05114 } 05115 } 05116 05117 // Scan through the operands to see if only one value is used. 05118 // 05119 // As an optimisation, even if more than one value is used it may be more 05120 // profitable to splat with one value then change some lanes. 05121 // 05122 // Heuristically we decide to do this if the vector has a "dominant" value, 05123 // defined as splatted to more than half of the lanes. 05124 unsigned NumElts = VT.getVectorNumElements(); 05125 bool isOnlyLowElement = true; 05126 bool usesOnlyOneValue = true; 05127 bool hasDominantValue = false; 05128 bool isConstant = true; 05129 05130 // Map of the number of times a particular SDValue appears in the 05131 // element list. 05132 DenseMap<SDValue, unsigned> ValueCounts; 05133 SDValue Value; 05134 for (unsigned i = 0; i < NumElts; ++i) { 05135 SDValue V = Op.getOperand(i); 05136 if (V.getOpcode() == ISD::UNDEF) 05137 continue; 05138 if (i > 0) 05139 isOnlyLowElement = false; 05140 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 05141 isConstant = false; 05142 05143 ValueCounts.insert(std::make_pair(V, 0)); 05144 unsigned &Count = ValueCounts[V]; 05145 05146 // Is this value dominant? (takes up more than half of the lanes) 05147 if (++Count > (NumElts / 2)) { 05148 hasDominantValue = true; 05149 Value = V; 05150 } 05151 } 05152 if (ValueCounts.size() != 1) 05153 usesOnlyOneValue = false; 05154 if (!Value.getNode() && ValueCounts.size() > 0) 05155 Value = ValueCounts.begin()->first; 05156 05157 if (ValueCounts.size() == 0) 05158 return DAG.getUNDEF(VT); 05159 05160 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR. 05161 // Keep going if we are hitting this case. 05162 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode())) 05163 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 05164 05165 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 05166 05167 // Use VDUP for non-constant splats. For f32 constant splats, reduce to 05168 // i32 and try again. 05169 if (hasDominantValue && EltSize <= 32) { 05170 if (!isConstant) { 05171 SDValue N; 05172 05173 // If we are VDUPing a value that comes directly from a vector, that will 05174 // cause an unnecessary move to and from a GPR, where instead we could 05175 // just use VDUPLANE. We can only do this if the lane being extracted 05176 // is at a constant index, as the VDUP from lane instructions only have 05177 // constant-index forms. 05178 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 05179 isa<ConstantSDNode>(Value->getOperand(1))) { 05180 // We need to create a new undef vector to use for the VDUPLANE if the 05181 // size of the vector from which we get the value is different than the 05182 // size of the vector that we need to create. We will insert the element 05183 // such that the register coalescer will remove unnecessary copies. 05184 if (VT != Value->getOperand(0).getValueType()) { 05185 ConstantSDNode *constIndex; 05186 constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)); 05187 assert(constIndex && "The index is not a constant!"); 05188 unsigned index = constIndex->getAPIntValue().getLimitedValue() % 05189 VT.getVectorNumElements(); 05190 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, 05191 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT), 05192 Value, DAG.getConstant(index, MVT::i32)), 05193 DAG.getConstant(index, MVT::i32)); 05194 } else 05195 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, 05196 Value->getOperand(0), Value->getOperand(1)); 05197 } else 05198 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); 05199 05200 if (!usesOnlyOneValue) { 05201 // The dominant value was splatted as 'N', but we now have to insert 05202 // all differing elements. 05203 for (unsigned I = 0; I < NumElts; ++I) { 05204 if (Op.getOperand(I) == Value) 05205 continue; 05206 SmallVector<SDValue, 3> Ops; 05207 Ops.push_back(N); 05208 Ops.push_back(Op.getOperand(I)); 05209 Ops.push_back(DAG.getConstant(I, MVT::i32)); 05210 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops); 05211 } 05212 } 05213 return N; 05214 } 05215 if (VT.getVectorElementType().isFloatingPoint()) { 05216 SmallVector<SDValue, 8> Ops; 05217 for (unsigned i = 0; i < NumElts; ++i) 05218 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, 05219 Op.getOperand(i))); 05220 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 05221 SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); 05222 Val = LowerBUILD_VECTOR(Val, DAG, ST); 05223 if (Val.getNode()) 05224 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 05225 } 05226 if (usesOnlyOneValue) { 05227 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); 05228 if (isConstant && Val.getNode()) 05229 return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 05230 } 05231 } 05232 05233 // If all elements are constants and the case above didn't get hit, fall back 05234 // to the default expansion, which will generate a load from the constant 05235 // pool. 05236 if (isConstant) 05237 return SDValue(); 05238 05239 // Empirical tests suggest this is rarely worth it for vectors of length <= 2. 05240 if (NumElts >= 4) { 05241 SDValue shuffle = ReconstructShuffle(Op, DAG); 05242 if (shuffle != SDValue()) 05243 return shuffle; 05244 } 05245 05246 // Vectors with 32- or 64-bit elements can be built by directly assigning 05247 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 05248 // will be legalized. 05249 if (EltSize >= 32) { 05250 // Do the expansion with floating-point types, since that is what the VFP 05251 // registers are defined to use, and since i64 is not legal. 05252 EVT EltVT = EVT::getFloatingPointVT(EltSize); 05253 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 05254 SmallVector<SDValue, 8> Ops; 05255 for (unsigned i = 0; i < NumElts; ++i) 05256 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); 05257 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); 05258 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 05259 } 05260 05261 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we 05262 // know the default expansion would otherwise fall back on something even 05263 // worse. For a vector with one or two non-undef values, that's 05264 // scalar_to_vector for the elements followed by a shuffle (provided the 05265 // shuffle is valid for the target) and materialization element by element 05266 // on the stack followed by a load for everything else. 05267 if (!isConstant && !usesOnlyOneValue) { 05268 SDValue Vec = DAG.getUNDEF(VT); 05269 for (unsigned i = 0 ; i < NumElts; ++i) { 05270 SDValue V = Op.getOperand(i); 05271 if (V.getOpcode() == ISD::UNDEF) 05272 continue; 05273 SDValue LaneIdx = DAG.getConstant(i, MVT::i32); 05274 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); 05275 } 05276 return Vec; 05277 } 05278 05279 return SDValue(); 05280 } 05281 05282 // Gather data to see if the operation can be modelled as a 05283 // shuffle in combination with VEXTs. 05284 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, 05285 SelectionDAG &DAG) const { 05286 SDLoc dl(Op); 05287 EVT VT = Op.getValueType(); 05288 unsigned NumElts = VT.getVectorNumElements(); 05289 05290 SmallVector<SDValue, 2> SourceVecs; 05291 SmallVector<unsigned, 2> MinElts; 05292 SmallVector<unsigned, 2> MaxElts; 05293 05294 for (unsigned i = 0; i < NumElts; ++i) { 05295 SDValue V = Op.getOperand(i); 05296 if (V.getOpcode() == ISD::UNDEF) 05297 continue; 05298 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { 05299 // A shuffle can only come from building a vector from various 05300 // elements of other vectors. 05301 return SDValue(); 05302 } else if (V.getOperand(0).getValueType().getVectorElementType() != 05303 VT.getVectorElementType()) { 05304 // This code doesn't know how to handle shuffles where the vector 05305 // element types do not match (this happens because type legalization 05306 // promotes the return type of EXTRACT_VECTOR_ELT). 05307 // FIXME: It might be appropriate to extend this code to handle 05308 // mismatched types. 05309 return SDValue(); 05310 } 05311 05312 // Record this extraction against the appropriate vector if possible... 05313 SDValue SourceVec = V.getOperand(0); 05314 // If the element number isn't a constant, we can't effectively 05315 // analyze what's going on. 05316 if (!isa<ConstantSDNode>(V.getOperand(1))) 05317 return SDValue(); 05318 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); 05319 bool FoundSource = false; 05320 for (unsigned j = 0; j < SourceVecs.size(); ++j) { 05321 if (SourceVecs[j] == SourceVec) { 05322 if (MinElts[j] > EltNo) 05323 MinElts[j] = EltNo; 05324 if (MaxElts[j] < EltNo) 05325 MaxElts[j] = EltNo; 05326 FoundSource = true; 05327 break; 05328 } 05329 } 05330 05331 // Or record a new source if not... 05332 if (!FoundSource) { 05333 SourceVecs.push_back(SourceVec); 05334 MinElts.push_back(EltNo); 05335 MaxElts.push_back(EltNo); 05336 } 05337 } 05338 05339 // Currently only do something sane when at most two source vectors 05340 // involved. 05341 if (SourceVecs.size() > 2) 05342 return SDValue(); 05343 05344 SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; 05345 int VEXTOffsets[2] = {0, 0}; 05346 05347 // This loop extracts the usage patterns of the source vectors 05348 // and prepares appropriate SDValues for a shuffle if possible. 05349 for (unsigned i = 0; i < SourceVecs.size(); ++i) { 05350 if (SourceVecs[i].getValueType() == VT) { 05351 // No VEXT necessary 05352 ShuffleSrcs[i] = SourceVecs[i]; 05353 VEXTOffsets[i] = 0; 05354 continue; 05355 } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { 05356 // It probably isn't worth padding out a smaller vector just to 05357 // break it down again in a shuffle. 05358 return SDValue(); 05359 } 05360 05361 // Since only 64-bit and 128-bit vectors are legal on ARM and 05362 // we've eliminated the other cases... 05363 assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && 05364 "unexpected vector sizes in ReconstructShuffle"); 05365 05366 if (MaxElts[i] - MinElts[i] >= NumElts) { 05367 // Span too large for a VEXT to cope 05368 return SDValue(); 05369 } 05370 05371 if (MinElts[i] >= NumElts) { 05372 // The extraction can just take the second half 05373 VEXTOffsets[i] = NumElts; 05374 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 05375 SourceVecs[i], 05376 DAG.getIntPtrConstant(NumElts)); 05377 } else if (MaxElts[i] < NumElts) { 05378 // The extraction can just take the first half 05379 VEXTOffsets[i] = 0; 05380 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 05381 SourceVecs[i], 05382 DAG.getIntPtrConstant(0)); 05383 } else { 05384 // An actual VEXT is needed 05385 VEXTOffsets[i] = MinElts[i]; 05386 SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 05387 SourceVecs[i], 05388 DAG.getIntPtrConstant(0)); 05389 SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 05390 SourceVecs[i], 05391 DAG.getIntPtrConstant(NumElts)); 05392 ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2, 05393 DAG.getConstant(VEXTOffsets[i], MVT::i32)); 05394 } 05395 } 05396 05397 SmallVector<int, 8> Mask; 05398 05399 for (unsigned i = 0; i < NumElts; ++i) { 05400 SDValue Entry = Op.getOperand(i); 05401 if (Entry.getOpcode() == ISD::UNDEF) { 05402 Mask.push_back(-1); 05403 continue; 05404 } 05405 05406 SDValue ExtractVec = Entry.getOperand(0); 05407 int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i) 05408 .getOperand(1))->getSExtValue(); 05409 if (ExtractVec == SourceVecs[0]) { 05410 Mask.push_back(ExtractElt - VEXTOffsets[0]); 05411 } else { 05412 Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); 05413 } 05414 } 05415 05416 // Final check before we try to produce nonsense... 05417 if (isShuffleMaskLegal(Mask, VT)) 05418 return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], 05419 &Mask[0]); 05420 05421 return SDValue(); 05422 } 05423 05424 /// isShuffleMaskLegal - Targets can use this to indicate that they only 05425 /// support *some* VECTOR_SHUFFLE operations, those with specific masks. 05426 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 05427 /// are assumed to be legal. 05428 bool 05429 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 05430 EVT VT) const { 05431 if (VT.getVectorNumElements() == 4 && 05432 (VT.is128BitVector() || VT.is64BitVector())) { 05433 unsigned PFIndexes[4]; 05434 for (unsigned i = 0; i != 4; ++i) { 05435 if (M[i] < 0) 05436 PFIndexes[i] = 8; 05437 else 05438 PFIndexes[i] = M[i]; 05439 } 05440 05441 // Compute the index in the perfect shuffle table. 05442 unsigned PFTableIndex = 05443 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 05444 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 05445 unsigned Cost = (PFEntry >> 30); 05446 05447 if (Cost <= 4) 05448 return true; 05449 } 05450 05451 bool ReverseVEXT; 05452 unsigned Imm, WhichResult; 05453 05454 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 05455 return (EltSize >= 32 || 05456 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 05457 isVREVMask(M, VT, 64) || 05458 isVREVMask(M, VT, 32) || 05459 isVREVMask(M, VT, 16) || 05460 isVEXTMask(M, VT, ReverseVEXT, Imm) || 05461 isVTBLMask(M, VT) || 05462 isVTRNMask(M, VT, WhichResult) || 05463 isVUZPMask(M, VT, WhichResult) || 05464 isVZIPMask(M, VT, WhichResult) || 05465 isVTRN_v_undef_Mask(M, VT, WhichResult) || 05466 isVUZP_v_undef_Mask(M, VT, WhichResult) || 05467 isVZIP_v_undef_Mask(M, VT, WhichResult) || 05468 ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT))); 05469 } 05470 05471 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 05472 /// the specified operations to build the shuffle. 05473 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 05474 SDValue RHS, SelectionDAG &DAG, 05475 SDLoc dl) { 05476 unsigned OpNum = (PFEntry >> 26) & 0x0F; 05477 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 05478 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 05479 05480 enum { 05481 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 05482 OP_VREV, 05483 OP_VDUP0, 05484 OP_VDUP1, 05485 OP_VDUP2, 05486 OP_VDUP3, 05487 OP_VEXT1, 05488 OP_VEXT2, 05489 OP_VEXT3, 05490 OP_VUZPL, // VUZP, left result 05491 OP_VUZPR, // VUZP, right result 05492 OP_VZIPL, // VZIP, left result 05493 OP_VZIPR, // VZIP, right result 05494 OP_VTRNL, // VTRN, left result 05495 OP_VTRNR // VTRN, right result 05496 }; 05497 05498 if (OpNum == OP_COPY) { 05499 if (LHSID == (1*9+2)*9+3) return LHS; 05500 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 05501 return RHS; 05502 } 05503 05504 SDValue OpLHS, OpRHS; 05505 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 05506 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 05507 EVT VT = OpLHS.getValueType(); 05508 05509 switch (OpNum) { 05510 default: llvm_unreachable("Unknown shuffle opcode!"); 05511 case OP_VREV: 05512 // VREV divides the vector in half and swaps within the half. 05513 if (VT.getVectorElementType() == MVT::i32 || 05514 VT.getVectorElementType() == MVT::f32) 05515 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 05516 // vrev <4 x i16> -> VREV32 05517 if (VT.getVectorElementType() == MVT::i16) 05518 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS); 05519 // vrev <4 x i8> -> VREV16 05520 assert(VT.getVectorElementType() == MVT::i8); 05521 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS); 05522 case OP_VDUP0: 05523 case OP_VDUP1: 05524 case OP_VDUP2: 05525 case OP_VDUP3: 05526 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 05527 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 05528 case OP_VEXT1: 05529 case OP_VEXT2: 05530 case OP_VEXT3: 05531 return DAG.getNode(ARMISD::VEXT, dl, VT, 05532 OpLHS, OpRHS, 05533 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 05534 case OP_VUZPL: 05535 case OP_VUZPR: 05536 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 05537 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 05538 case OP_VZIPL: 05539 case OP_VZIPR: 05540 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 05541 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 05542 case OP_VTRNL: 05543 case OP_VTRNR: 05544 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 05545 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 05546 } 05547 } 05548 05549 static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, 05550 ArrayRef<int> ShuffleMask, 05551 SelectionDAG &DAG) { 05552 // Check to see if we can use the VTBL instruction. 05553 SDValue V1 = Op.getOperand(0); 05554 SDValue V2 = Op.getOperand(1); 05555 SDLoc DL(Op); 05556 05557 SmallVector<SDValue, 8> VTBLMask; 05558 for (ArrayRef<int>::iterator 05559 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) 05560 VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); 05561 05562 if (V2.getNode()->getOpcode() == ISD::UNDEF) 05563 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, 05564 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); 05565 05566 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, 05567 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); 05568 } 05569 05570 static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, 05571 SelectionDAG &DAG) { 05572 SDLoc DL(Op); 05573 SDValue OpLHS = Op.getOperand(0); 05574 EVT VT = OpLHS.getValueType(); 05575 05576 assert((VT == MVT::v8i16 || VT == MVT::v16i8) && 05577 "Expect an v8i16/v16i8 type"); 05578 OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS); 05579 // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now, 05580 // extract the first 8 bytes into the top double word and the last 8 bytes 05581 // into the bottom double word. The v8i16 case is similar. 05582 unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4; 05583 return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS, 05584 DAG.getConstant(ExtractNum, MVT::i32)); 05585 } 05586 05587 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 05588 SDValue V1 = Op.getOperand(0); 05589 SDValue V2 = Op.getOperand(1); 05590 SDLoc dl(Op); 05591 EVT VT = Op.getValueType(); 05592 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 05593 05594 // Convert shuffles that are directly supported on NEON to target-specific 05595 // DAG nodes, instead of keeping them as shuffles and matching them again 05596 // during code selection. This is more efficient and avoids the possibility 05597 // of inconsistencies between legalization and selection. 05598 // FIXME: floating-point vectors should be canonicalized to integer vectors 05599 // of the same time so that they get CSEd properly. 05600 ArrayRef<int> ShuffleMask = SVN->getMask(); 05601 05602 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 05603 if (EltSize <= 32) { 05604 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 05605 int Lane = SVN->getSplatIndex(); 05606 // If this is undef splat, generate it via "just" vdup, if possible. 05607 if (Lane == -1) Lane = 0; 05608 05609 // Test if V1 is a SCALAR_TO_VECTOR. 05610 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 05611 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 05612 } 05613 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR 05614 // (and probably will turn into a SCALAR_TO_VECTOR once legalization 05615 // reaches it). 05616 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && 05617 !isa<ConstantSDNode>(V1.getOperand(0))) { 05618 bool IsScalarToVector = true; 05619 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) 05620 if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { 05621 IsScalarToVector = false; 05622 break; 05623 } 05624 if (IsScalarToVector) 05625 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 05626 } 05627 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 05628 DAG.getConstant(Lane, MVT::i32)); 05629 } 05630 05631 bool ReverseVEXT; 05632 unsigned Imm; 05633 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 05634 if (ReverseVEXT) 05635 std::swap(V1, V2); 05636 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 05637 DAG.getConstant(Imm, MVT::i32)); 05638 } 05639 05640 if (isVREVMask(ShuffleMask, VT, 64)) 05641 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 05642 if (isVREVMask(ShuffleMask, VT, 32)) 05643 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 05644 if (isVREVMask(ShuffleMask, VT, 16)) 05645 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 05646 05647 if (V2->getOpcode() == ISD::UNDEF && 05648 isSingletonVEXTMask(ShuffleMask, VT, Imm)) { 05649 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1, 05650 DAG.getConstant(Imm, MVT::i32)); 05651 } 05652 05653 // Check for Neon shuffles that modify both input vectors in place. 05654 // If both results are used, i.e., if there are two shuffles with the same 05655 // source operands and with masks corresponding to both results of one of 05656 // these operations, DAG memoization will ensure that a single node is 05657 // used for both shuffles. 05658 unsigned WhichResult; 05659 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 05660 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 05661 V1, V2).getValue(WhichResult); 05662 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 05663 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 05664 V1, V2).getValue(WhichResult); 05665 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 05666 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 05667 V1, V2).getValue(WhichResult); 05668 05669 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 05670 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 05671 V1, V1).getValue(WhichResult); 05672 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 05673 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 05674 V1, V1).getValue(WhichResult); 05675 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 05676 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 05677 V1, V1).getValue(WhichResult); 05678 } 05679 05680 // If the shuffle is not directly supported and it has 4 elements, use 05681 // the PerfectShuffle-generated table to synthesize it from other shuffles. 05682 unsigned NumElts = VT.getVectorNumElements(); 05683 if (NumElts == 4) { 05684 unsigned PFIndexes[4]; 05685 for (unsigned i = 0; i != 4; ++i) { 05686 if (ShuffleMask[i] < 0) 05687 PFIndexes[i] = 8; 05688 else 05689 PFIndexes[i] = ShuffleMask[i]; 05690 } 05691 05692 // Compute the index in the perfect shuffle table. 05693 unsigned PFTableIndex = 05694 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 05695 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 05696 unsigned Cost = (PFEntry >> 30); 05697 05698 if (Cost <= 4) 05699 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 05700 } 05701 05702 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 05703 if (EltSize >= 32) { 05704 // Do the expansion with floating-point types, since that is what the VFP 05705 // registers are defined to use, and since i64 is not legal. 05706 EVT EltVT = EVT::getFloatingPointVT(EltSize); 05707 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 05708 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); 05709 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); 05710 SmallVector<SDValue, 8> Ops; 05711 for (unsigned i = 0; i < NumElts; ++i) { 05712 if (ShuffleMask[i] < 0) 05713 Ops.push_back(DAG.getUNDEF(EltVT)); 05714 else 05715 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 05716 ShuffleMask[i] < (int)NumElts ? V1 : V2, 05717 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 05718 MVT::i32))); 05719 } 05720 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); 05721 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 05722 } 05723 05724 if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) 05725 return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); 05726 05727 if (VT == MVT::v8i8) { 05728 SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); 05729 if (NewOp.getNode()) 05730 return NewOp; 05731 } 05732 05733 return SDValue(); 05734 } 05735 05736 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 05737 // INSERT_VECTOR_ELT is legal only for immediate indexes. 05738 SDValue Lane = Op.getOperand(2); 05739 if (!isa<ConstantSDNode>(Lane)) 05740 return SDValue(); 05741 05742 return Op; 05743 } 05744 05745 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 05746 // EXTRACT_VECTOR_ELT is legal only for immediate indexes. 05747 SDValue Lane = Op.getOperand(1); 05748 if (!isa<ConstantSDNode>(Lane)) 05749 return SDValue(); 05750 05751 SDValue Vec = Op.getOperand(0); 05752 if (Op.getValueType() == MVT::i32 && 05753 Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { 05754 SDLoc dl(Op); 05755 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 05756 } 05757 05758 return Op; 05759 } 05760 05761 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 05762 // The only time a CONCAT_VECTORS operation can have legal types is when 05763 // two 64-bit vectors are concatenated to a 128-bit vector. 05764 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 05765 "unexpected CONCAT_VECTORS"); 05766 SDLoc dl(Op); 05767 SDValue Val = DAG.getUNDEF(MVT::v2f64); 05768 SDValue Op0 = Op.getOperand(0); 05769 SDValue Op1 = Op.getOperand(1); 05770 if (Op0.getOpcode() != ISD::UNDEF) 05771 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 05772 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), 05773 DAG.getIntPtrConstant(0)); 05774 if (Op1.getOpcode() != ISD::UNDEF) 05775 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 05776 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), 05777 DAG.getIntPtrConstant(1)); 05778 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); 05779 } 05780 05781 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each 05782 /// element has been zero/sign-extended, depending on the isSigned parameter, 05783 /// from an integer type half its size. 05784 static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, 05785 bool isSigned) { 05786 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. 05787 EVT VT = N->getValueType(0); 05788 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { 05789 SDNode *BVN = N->getOperand(0).getNode(); 05790 if (BVN->getValueType(0) != MVT::v4i32 || 05791 BVN->getOpcode() != ISD::BUILD_VECTOR) 05792 return false; 05793 unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 05794 unsigned HiElt = 1 - LoElt; 05795 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt)); 05796 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt)); 05797 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2)); 05798 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2)); 05799 if (!Lo0 || !Hi0 || !Lo1 || !Hi1) 05800 return false; 05801 if (isSigned) { 05802 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && 05803 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) 05804 return true; 05805 } else { 05806 if (Hi0->isNullValue() && Hi1->isNullValue()) 05807 return true; 05808 } 05809 return false; 05810 } 05811 05812 if (N->getOpcode() != ISD::BUILD_VECTOR) 05813 return false; 05814 05815 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 05816 SDNode *Elt = N->getOperand(i).getNode(); 05817 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 05818 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 05819 unsigned HalfSize = EltSize / 2; 05820 if (isSigned) { 05821 if (!isIntN(HalfSize, C->getSExtValue())) 05822 return false; 05823 } else { 05824 if (!isUIntN(HalfSize, C->getZExtValue())) 05825 return false; 05826 } 05827 continue; 05828 } 05829 return false; 05830 } 05831 05832 return true; 05833 } 05834 05835 /// isSignExtended - Check if a node is a vector value that is sign-extended 05836 /// or a constant BUILD_VECTOR with sign-extended elements. 05837 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { 05838 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) 05839 return true; 05840 if (isExtendedBUILD_VECTOR(N, DAG, true)) 05841 return true; 05842 return false; 05843 } 05844 05845 /// isZeroExtended - Check if a node is a vector value that is zero-extended 05846 /// or a constant BUILD_VECTOR with zero-extended elements. 05847 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { 05848 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) 05849 return true; 05850 if (isExtendedBUILD_VECTOR(N, DAG, false)) 05851 return true; 05852 return false; 05853 } 05854 05855 static EVT getExtensionTo64Bits(const EVT &OrigVT) { 05856 if (OrigVT.getSizeInBits() >= 64) 05857 return OrigVT; 05858 05859 assert(OrigVT.isSimple() && "Expecting a simple value type"); 05860 05861 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; 05862 switch (OrigSimpleTy) { 05863 default: llvm_unreachable("Unexpected Vector Type"); 05864 case MVT::v2i8: 05865 case MVT::v2i16: 05866 return MVT::v2i32; 05867 case MVT::v4i8: 05868 return MVT::v4i16; 05869 } 05870 } 05871 05872 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total 05873 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. 05874 /// We insert the required extension here to get the vector to fill a D register. 05875 static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, 05876 const EVT &OrigTy, 05877 const EVT &ExtTy, 05878 unsigned ExtOpcode) { 05879 // The vector originally had a size of OrigTy. It was then extended to ExtTy. 05880 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than 05881 // 64-bits we need to insert a new extension so that it will be 64-bits. 05882 assert(ExtTy.is128BitVector() && "Unexpected extension size"); 05883 if (OrigTy.getSizeInBits() >= 64) 05884 return N; 05885 05886 // Must extend size to at least 64 bits to be used as an operand for VMULL. 05887 EVT NewVT = getExtensionTo64Bits(OrigTy); 05888 05889 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N); 05890 } 05891 05892 /// SkipLoadExtensionForVMULL - return a load of the original vector size that 05893 /// does not do any sign/zero extension. If the original vector is less 05894 /// than 64 bits, an appropriate extension will be added after the load to 05895 /// reach a total size of 64 bits. We have to add the extension separately 05896 /// because ARM does not have a sign/zero extending load for vectors. 05897 static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { 05898 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT()); 05899 05900 // The load already has the right type. 05901 if (ExtendedTy == LD->getMemoryVT()) 05902 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(), 05903 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), 05904 LD->isNonTemporal(), LD->isInvariant(), 05905 LD->getAlignment()); 05906 05907 // We need to create a zextload/sextload. We cannot just create a load 05908 // followed by a zext/zext node because LowerMUL is also run during normal 05909 // operation legalization where we can't create illegal types. 05910 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, 05911 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), 05912 LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(), 05913 LD->isNonTemporal(), LD->getAlignment()); 05914 } 05915 05916 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, 05917 /// extending load, or BUILD_VECTOR with extended elements, return the 05918 /// unextended value. The unextended vector should be 64 bits so that it can 05919 /// be used as an operand to a VMULL instruction. If the original vector size 05920 /// before extension is less than 64 bits we add a an extension to resize 05921 /// the vector to 64 bits. 05922 static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { 05923 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) 05924 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG, 05925 N->getOperand(0)->getValueType(0), 05926 N->getValueType(0), 05927 N->getOpcode()); 05928 05929 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) 05930 return SkipLoadExtensionForVMULL(LD, DAG); 05931 05932 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will 05933 // have been legalized as a BITCAST from v4i32. 05934 if (N->getOpcode() == ISD::BITCAST) { 05935 SDNode *BVN = N->getOperand(0).getNode(); 05936 assert(BVN->getOpcode() == ISD::BUILD_VECTOR && 05937 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); 05938 unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 05939 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32, 05940 BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); 05941 } 05942 // Construct a new BUILD_VECTOR with elements truncated to half the size. 05943 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); 05944 EVT VT = N->getValueType(0); 05945 unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; 05946 unsigned NumElts = VT.getVectorNumElements(); 05947 MVT TruncVT = MVT::getIntegerVT(EltSize); 05948 SmallVector<SDValue, 8> Ops; 05949 for (unsigned i = 0; i != NumElts; ++i) { 05950 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); 05951 const APInt &CInt = C->getAPIntValue(); 05952 // Element types smaller than 32 bits are not legal, so use i32 elements. 05953 // The values are implicitly truncated so sext vs. zext doesn't matter. 05954 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); 05955 } 05956 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), 05957 MVT::getVectorVT(TruncVT, NumElts), Ops); 05958 } 05959 05960 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { 05961 unsigned Opcode = N->getOpcode(); 05962 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 05963 SDNode *N0 = N->getOperand(0).getNode(); 05964 SDNode *N1 = N->getOperand(1).getNode(); 05965 return N0->hasOneUse() && N1->hasOneUse() && 05966 isSignExtended(N0, DAG) && isSignExtended(N1, DAG); 05967 } 05968 return false; 05969 } 05970 05971 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { 05972 unsigned Opcode = N->getOpcode(); 05973 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 05974 SDNode *N0 = N->getOperand(0).getNode(); 05975 SDNode *N1 = N->getOperand(1).getNode(); 05976 return N0->hasOneUse() && N1->hasOneUse() && 05977 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); 05978 } 05979 return false; 05980 } 05981 05982 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { 05983 // Multiplications are only custom-lowered for 128-bit vectors so that 05984 // VMULL can be detected. Otherwise v2i64 multiplications are not legal. 05985 EVT VT = Op.getValueType(); 05986 assert(VT.is128BitVector() && VT.isInteger() && 05987 "unexpected type for custom-lowering ISD::MUL"); 05988 SDNode *N0 = Op.getOperand(0).getNode(); 05989 SDNode *N1 = Op.getOperand(1).getNode(); 05990 unsigned NewOpc = 0; 05991 bool isMLA = false; 05992 bool isN0SExt = isSignExtended(N0, DAG); 05993 bool isN1SExt = isSignExtended(N1, DAG); 05994 if (isN0SExt && isN1SExt) 05995 NewOpc = ARMISD::VMULLs; 05996 else { 05997 bool isN0ZExt = isZeroExtended(N0, DAG); 05998 bool isN1ZExt = isZeroExtended(N1, DAG); 05999 if (isN0ZExt && isN1ZExt) 06000 NewOpc = ARMISD::VMULLu; 06001 else if (isN1SExt || isN1ZExt) { 06002 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these 06003 // into (s/zext A * s/zext C) + (s/zext B * s/zext C) 06004 if (isN1SExt && isAddSubSExt(N0, DAG)) { 06005 NewOpc = ARMISD::VMULLs; 06006 isMLA = true; 06007 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { 06008 NewOpc = ARMISD::VMULLu; 06009 isMLA = true; 06010 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { 06011 std::swap(N0, N1); 06012 NewOpc = ARMISD::VMULLu; 06013 isMLA = true; 06014 } 06015 } 06016 06017 if (!NewOpc) { 06018 if (VT == MVT::v2i64) 06019 // Fall through to expand this. It is not legal. 06020 return SDValue(); 06021 else 06022 // Other vector multiplications are legal. 06023 return Op; 06024 } 06025 } 06026 06027 // Legalize to a VMULL instruction. 06028 SDLoc DL(Op); 06029 SDValue Op0; 06030 SDValue Op1 = SkipExtensionForVMULL(N1, DAG); 06031 if (!isMLA) { 06032 Op0 = SkipExtensionForVMULL(N0, DAG); 06033 assert(Op0.getValueType().is64BitVector() && 06034 Op1.getValueType().is64BitVector() && 06035 "unexpected types for extended operands to VMULL"); 06036 return DAG.getNode(NewOpc, DL, VT, Op0, Op1); 06037 } 06038 06039 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during 06040 // isel lowering to take advantage of no-stall back to back vmul + vmla. 06041 // vmull q0, d4, d6 06042 // vmlal q0, d5, d6 06043 // is faster than 06044 // vaddl q0, d4, d5 06045 // vmovl q1, d6 06046 // vmul q0, q0, q1 06047 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG); 06048 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG); 06049 EVT Op1VT = Op1.getValueType(); 06050 return DAG.getNode(N0->getOpcode(), DL, VT, 06051 DAG.getNode(NewOpc, DL, VT, 06052 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), 06053 DAG.getNode(NewOpc, DL, VT, 06054 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); 06055 } 06056 06057 static SDValue 06058 LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { 06059 // Convert to float 06060 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); 06061 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); 06062 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X); 06063 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y); 06064 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X); 06065 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); 06066 // Get reciprocal estimate. 06067 // float4 recip = vrecpeq_f32(yf); 06068 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 06069 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y); 06070 // Because char has a smaller range than uchar, we can actually get away 06071 // without any newton steps. This requires that we use a weird bias 06072 // of 0xb000, however (again, this has been exhaustively tested). 06073 // float4 result = as_float4(as_int4(xf*recip) + 0xb000); 06074 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); 06075 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); 06076 Y = DAG.getConstant(0xb000, MVT::i32); 06077 Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y); 06078 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); 06079 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); 06080 // Convert back to short. 06081 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X); 06082 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X); 06083 return X; 06084 } 06085 06086 static SDValue 06087 LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) { 06088 SDValue N2; 06089 // Convert to float. 06090 // float4 yf = vcvt_f32_s32(vmovl_s16(y)); 06091 // float4 xf = vcvt_f32_s32(vmovl_s16(x)); 06092 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0); 06093 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); 06094 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 06095 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 06096 06097 // Use reciprocal estimate and one refinement step. 06098 // float4 recip = vrecpeq_f32(yf); 06099 // recip *= vrecpsq_f32(yf, recip); 06100 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 06101 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); 06102 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 06103 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 06104 N1, N2); 06105 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 06106 // Because short has a smaller range than ushort, we can actually get away 06107 // with only a single newton step. This requires that we use a weird bias 06108 // of 89, however (again, this has been exhaustively tested). 06109 // float4 result = as_float4(as_int4(xf*recip) + 0x89); 06110 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 06111 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 06112 N1 = DAG.getConstant(0x89, MVT::i32); 06113 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 06114 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 06115 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 06116 // Convert back to integer and return. 06117 // return vmovn_s32(vcvt_s32_f32(result)); 06118 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 06119 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 06120 return N0; 06121 } 06122 06123 static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { 06124 EVT VT = Op.getValueType(); 06125 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 06126 "unexpected type for custom-lowering ISD::SDIV"); 06127 06128 SDLoc dl(Op); 06129 SDValue N0 = Op.getOperand(0); 06130 SDValue N1 = Op.getOperand(1); 06131 SDValue N2, N3; 06132 06133 if (VT == MVT::v8i8) { 06134 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); 06135 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); 06136 06137 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 06138 DAG.getIntPtrConstant(4)); 06139 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 06140 DAG.getIntPtrConstant(4)); 06141 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 06142 DAG.getIntPtrConstant(0)); 06143 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 06144 DAG.getIntPtrConstant(0)); 06145 06146 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16 06147 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 06148 06149 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 06150 N0 = LowerCONCAT_VECTORS(N0, DAG); 06151 06152 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); 06153 return N0; 06154 } 06155 return LowerSDIV_v4i16(N0, N1, dl, DAG); 06156 } 06157 06158 static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { 06159 EVT VT = Op.getValueType(); 06160 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 06161 "unexpected type for custom-lowering ISD::UDIV"); 06162 06163 SDLoc dl(Op); 06164 SDValue N0 = Op.getOperand(0); 06165 SDValue N1 = Op.getOperand(1); 06166 SDValue N2, N3; 06167 06168 if (VT == MVT::v8i8) { 06169 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); 06170 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); 06171 06172 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 06173 DAG.getIntPtrConstant(4)); 06174 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 06175 DAG.getIntPtrConstant(4)); 06176 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 06177 DAG.getIntPtrConstant(0)); 06178 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 06179 DAG.getIntPtrConstant(0)); 06180 06181 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 06182 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 06183 06184 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 06185 N0 = LowerCONCAT_VECTORS(N0, DAG); 06186 06187 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, 06188 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32), 06189 N0); 06190 return N0; 06191 } 06192 06193 // v4i16 sdiv ... Convert to float. 06194 // float4 yf = vcvt_f32_s32(vmovl_u16(y)); 06195 // float4 xf = vcvt_f32_s32(vmovl_u16(x)); 06196 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0); 06197 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1); 06198 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 06199 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 06200 06201 // Use reciprocal estimate and two refinement steps. 06202 // float4 recip = vrecpeq_f32(yf); 06203 // recip *= vrecpsq_f32(yf, recip); 06204 // recip *= vrecpsq_f32(yf, recip); 06205 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 06206 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1); 06207 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 06208 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 06209 BN1, N2); 06210 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 06211 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 06212 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 06213 BN1, N2); 06214 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 06215 // Simply multiplying by the reciprocal estimate can leave us a few ulps 06216 // too low, so we add 2 ulps (exhaustive testing shows that this is enough, 06217 // and that it will never cause us to return an answer too large). 06218 // float4 result = as_float4(as_int4(xf*recip) + 2); 06219 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 06220 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 06221 N1 = DAG.getConstant(2, MVT::i32); 06222 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 06223 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 06224 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 06225 // Convert back to integer and return. 06226 // return vmovn_u32(vcvt_s32_f32(result)); 06227 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 06228 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 06229 return N0; 06230 } 06231 06232 static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { 06233 EVT VT = Op.getNode()->getValueType(0); 06234 SDVTList VTs = DAG.getVTList(VT, MVT::i32); 06235 06236 unsigned Opc; 06237 bool ExtraOp = false; 06238 switch (Op.getOpcode()) { 06239 default: llvm_unreachable("Invalid code"); 06240 case ISD::ADDC: Opc = ARMISD::ADDC; break; 06241 case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; 06242 case ISD::SUBC: Opc = ARMISD::SUBC; break; 06243 case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break; 06244 } 06245 06246 if (!ExtraOp) 06247 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), 06248 Op.getOperand(1)); 06249 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), 06250 Op.getOperand(1), Op.getOperand(2)); 06251 } 06252 06253 SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { 06254 assert(Subtarget->isTargetDarwin()); 06255 06256 // For iOS, we want to call an alternative entry point: __sincos_stret, 06257 // return values are passed via sret. 06258 SDLoc dl(Op); 06259 SDValue Arg = Op.getOperand(0); 06260 EVT ArgVT = Arg.getValueType(); 06261 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); 06262 06263 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 06264 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 06265 06266 // Pair of floats / doubles used to pass the result. 06267 StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); 06268 06269 // Create stack object for sret. 06270 const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy); 06271 const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy); 06272 int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); 06273 SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy()); 06274 06275 ArgListTy Args; 06276 ArgListEntry Entry; 06277 06278 Entry.Node = SRet; 06279 Entry.Ty = RetTy->getPointerTo(); 06280 Entry.isSExt = false; 06281 Entry.isZExt = false; 06282 Entry.isSRet = true; 06283 Args.push_back(Entry); 06284 06285 Entry.Node = Arg; 06286 Entry.Ty = ArgTy; 06287 Entry.isSExt = false; 06288 Entry.isZExt = false; 06289 Args.push_back(Entry); 06290 06291 const char *LibcallName = (ArgVT == MVT::f64) 06292 ? "__sincos_stret" : "__sincosf_stret"; 06293 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); 06294 06295 TargetLowering::CallLoweringInfo CLI(DAG); 06296 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) 06297 .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee, 06298 std::move(Args), 0) 06299 .setDiscardResult(); 06300 06301 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 06302 06303 SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, 06304 MachinePointerInfo(), false, false, false, 0); 06305 06306 // Address of cos field. 06307 SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet, 06308 DAG.getIntPtrConstant(ArgVT.getStoreSize())); 06309 SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, 06310 MachinePointerInfo(), false, false, false, 0); 06311 06312 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); 06313 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, 06314 LoadSin.getValue(0), LoadCos.getValue(0)); 06315 } 06316 06317 static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { 06318 // Monotonic load/store is legal for all targets 06319 if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) 06320 return Op; 06321 06322 // Acquire/Release load/store is not legal for targets without a 06323 // dmb or equivalent available. 06324 return SDValue(); 06325 } 06326 06327 static void ReplaceREADCYCLECOUNTER(SDNode *N, 06328 SmallVectorImpl<SDValue> &Results, 06329 SelectionDAG &DAG, 06330 const ARMSubtarget *Subtarget) { 06331 SDLoc DL(N); 06332 SDValue Cycles32, OutChain; 06333 06334 if (Subtarget->hasPerfMon()) { 06335 // Under Power Management extensions, the cycle-count is: 06336 // mrc p15, #0, <Rt>, c9, c13, #0 06337 SDValue Ops[] = { N->getOperand(0), // Chain 06338 DAG.getConstant(Intrinsic::arm_mrc, MVT::i32), 06339 DAG.getConstant(15, MVT::i32), 06340 DAG.getConstant(0, MVT::i32), 06341 DAG.getConstant(9, MVT::i32), 06342 DAG.getConstant(13, MVT::i32), 06343 DAG.getConstant(0, MVT::i32) 06344 }; 06345 06346 Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, 06347 DAG.getVTList(MVT::i32, MVT::Other), Ops); 06348 OutChain = Cycles32.getValue(1); 06349 } else { 06350 // Intrinsic is defined to return 0 on unsupported platforms. Technically 06351 // there are older ARM CPUs that have implementation-specific ways of 06352 // obtaining this information (FIXME!). 06353 Cycles32 = DAG.getConstant(0, MVT::i32); 06354 OutChain = DAG.getEntryNode(); 06355 } 06356 06357 06358 SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, 06359 Cycles32, DAG.getConstant(0, MVT::i32)); 06360 Results.push_back(Cycles64); 06361 Results.push_back(OutChain); 06362 } 06363 06364 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 06365 switch (Op.getOpcode()) { 06366 default: llvm_unreachable("Don't know how to custom lower this!"); 06367 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 06368 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 06369 case ISD::GlobalAddress: 06370 switch (Subtarget->getTargetTriple().getObjectFormat()) { 06371 default: llvm_unreachable("unknown object format"); 06372 case Triple::COFF: 06373 return LowerGlobalAddressWindows(Op, DAG); 06374 case Triple::ELF: 06375 return LowerGlobalAddressELF(Op, DAG); 06376 case Triple::MachO: 06377 return LowerGlobalAddressDarwin(Op, DAG); 06378 } 06379 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 06380 case ISD::SELECT: return LowerSELECT(Op, DAG); 06381 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 06382 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 06383 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 06384 case ISD::VASTART: return LowerVASTART(Op, DAG); 06385 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); 06386 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); 06387 case ISD::SINT_TO_FP: 06388 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 06389 case ISD::FP_TO_SINT: 06390 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 06391 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 06392 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 06393 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 06394 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 06395 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); 06396 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); 06397 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 06398 Subtarget); 06399 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); 06400 case ISD::SHL: 06401 case ISD::SRL: 06402 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 06403 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 06404 case ISD::SRL_PARTS: 06405 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 06406 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 06407 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); 06408 case ISD::SETCC: return LowerVSETCC(Op, DAG); 06409 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); 06410 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); 06411 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 06412 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 06413 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 06414 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 06415 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 06416 case ISD::MUL: return LowerMUL(Op, DAG); 06417 case ISD::SDIV: return LowerSDIV(Op, DAG); 06418 case ISD::UDIV: return LowerUDIV(Op, DAG); 06419 case ISD::ADDC: 06420 case ISD::ADDE: 06421 case ISD::SUBC: 06422 case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); 06423 case ISD::SADDO: 06424 case ISD::UADDO: 06425 case ISD::SSUBO: 06426 case ISD::USUBO: 06427 return LowerXALUO(Op, DAG); 06428 case ISD::ATOMIC_LOAD: 06429 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); 06430 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); 06431 case ISD::SDIVREM: 06432 case ISD::UDIVREM: return LowerDivRem(Op, DAG); 06433 case ISD::DYNAMIC_STACKALLOC: 06434 if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) 06435 return LowerDYNAMIC_STACKALLOC(Op, DAG); 06436 llvm_unreachable("Don't know how to custom lower this!"); 06437 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); 06438 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); 06439 } 06440 } 06441 06442 /// ReplaceNodeResults - Replace the results of node with an illegal result 06443 /// type with new values built out of custom code. 06444 void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 06445 SmallVectorImpl<SDValue>&Results, 06446 SelectionDAG &DAG) const { 06447 SDValue Res; 06448 switch (N->getOpcode()) { 06449 default: 06450 llvm_unreachable("Don't know how to custom expand this!"); 06451 case ISD::BITCAST: 06452 Res = ExpandBITCAST(N, DAG); 06453 break; 06454 case ISD::SRL: 06455 case ISD::SRA: 06456 Res = Expand64BitShift(N, DAG, Subtarget); 06457 break; 06458 case ISD::READCYCLECOUNTER: 06459 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); 06460 return; 06461 } 06462 if (Res.getNode()) 06463 Results.push_back(Res); 06464 } 06465 06466 //===----------------------------------------------------------------------===// 06467 // ARM Scheduler Hooks 06468 //===----------------------------------------------------------------------===// 06469 06470 /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and 06471 /// registers the function context. 06472 void ARMTargetLowering:: 06473 SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, 06474 MachineBasicBlock *DispatchBB, int FI) const { 06475 const TargetInstrInfo *TII = 06476 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 06477 DebugLoc dl = MI->getDebugLoc(); 06478 MachineFunction *MF = MBB->getParent(); 06479 MachineRegisterInfo *MRI = &MF->getRegInfo(); 06480 MachineConstantPool *MCP = MF->getConstantPool(); 06481 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 06482 const Function *F = MF->getFunction(); 06483 06484 bool isThumb = Subtarget->isThumb(); 06485 bool isThumb2 = Subtarget->isThumb2(); 06486 06487 unsigned PCLabelId = AFI->createPICLabelUId(); 06488 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; 06489 ARMConstantPoolValue *CPV = 06490 ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); 06491 unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); 06492 06493 const TargetRegisterClass *TRC = isThumb ? 06494 (const TargetRegisterClass*)&ARM::tGPRRegClass : 06495 (const TargetRegisterClass*)&ARM::GPRRegClass; 06496 06497 // Grab constant pool and fixed stack memory operands. 06498 MachineMemOperand *CPMMO = 06499 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(), 06500 MachineMemOperand::MOLoad, 4, 4); 06501 06502 MachineMemOperand *FIMMOSt = 06503 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 06504 MachineMemOperand::MOStore, 4, 4); 06505 06506 // Load the address of the dispatch MBB into the jump buffer. 06507 if (isThumb2) { 06508 // Incoming value: jbuf 06509 // ldr.n r5, LCPI1_1 06510 // orr r5, r5, #1 06511 // add r5, pc 06512 // str r5, [$jbuf, #+4] ; &jbuf[1] 06513 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 06514 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) 06515 .addConstantPoolIndex(CPI) 06516 .addMemOperand(CPMMO)); 06517 // Set the low bit because of thumb mode. 06518 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 06519 AddDefaultCC( 06520 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) 06521 .addReg(NewVReg1, RegState::Kill) 06522 .addImm(0x01))); 06523 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 06524 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) 06525 .addReg(NewVReg2, RegState::Kill) 06526 .addImm(PCLabelId); 06527 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) 06528 .addReg(NewVReg3, RegState::Kill) 06529 .addFrameIndex(FI) 06530 .addImm(36) // &jbuf[1] :: pc 06531 .addMemOperand(FIMMOSt)); 06532 } else if (isThumb) { 06533 // Incoming value: jbuf 06534 // ldr.n r1, LCPI1_4 06535 // add r1, pc 06536 // mov r2, #1 06537 // orrs r1, r2 06538 // add r2, $jbuf, #+4 ; &jbuf[1] 06539 // str r1, [r2] 06540 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 06541 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) 06542 .addConstantPoolIndex(CPI) 06543 .addMemOperand(CPMMO)); 06544 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 06545 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) 06546 .addReg(NewVReg1, RegState::Kill) 06547 .addImm(PCLabelId); 06548 // Set the low bit because of thumb mode. 06549 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 06550 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) 06551 .addReg(ARM::CPSR, RegState::Define) 06552 .addImm(1)); 06553 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 06554 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) 06555 .addReg(ARM::CPSR, RegState::Define) 06556 .addReg(NewVReg2, RegState::Kill) 06557 .addReg(NewVReg3, RegState::Kill)); 06558 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 06559 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5) 06560 .addFrameIndex(FI) 06561 .addImm(36)); // &jbuf[1] :: pc 06562 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) 06563 .addReg(NewVReg4, RegState::Kill) 06564 .addReg(NewVReg5, RegState::Kill) 06565 .addImm(0) 06566 .addMemOperand(FIMMOSt)); 06567 } else { 06568 // Incoming value: jbuf 06569 // ldr r1, LCPI1_1 06570 // add r1, pc, r1 06571 // str r1, [$jbuf, #+4] ; &jbuf[1] 06572 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 06573 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) 06574 .addConstantPoolIndex(CPI) 06575 .addImm(0) 06576 .addMemOperand(CPMMO)); 06577 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 06578 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) 06579 .addReg(NewVReg1, RegState::Kill) 06580 .addImm(PCLabelId)); 06581 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) 06582 .addReg(NewVReg2, RegState::Kill) 06583 .addFrameIndex(FI) 06584 .addImm(36) // &jbuf[1] :: pc 06585 .addMemOperand(FIMMOSt)); 06586 } 06587 } 06588 06589 MachineBasicBlock *ARMTargetLowering:: 06590 EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { 06591 const TargetInstrInfo *TII = 06592 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 06593 DebugLoc dl = MI->getDebugLoc(); 06594 MachineFunction *MF = MBB->getParent(); 06595 MachineRegisterInfo *MRI = &MF->getRegInfo(); 06596 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 06597 MachineFrameInfo *MFI = MF->getFrameInfo(); 06598 int FI = MFI->getFunctionContextIndex(); 06599 06600 const TargetRegisterClass *TRC = Subtarget->isThumb() ? 06601 (const TargetRegisterClass*)&ARM::tGPRRegClass : 06602 (const TargetRegisterClass*)&ARM::GPRnopcRegClass; 06603 06604 // Get a mapping of the call site numbers to all of the landing pads they're 06605 // associated with. 06606 DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; 06607 unsigned MaxCSNum = 0; 06608 MachineModuleInfo &MMI = MF->getMMI(); 06609 for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; 06610 ++BB) { 06611 if (!BB->isLandingPad()) continue; 06612 06613 // FIXME: We should assert that the EH_LABEL is the first MI in the landing 06614 // pad. 06615 for (MachineBasicBlock::iterator 06616 II = BB->begin(), IE = BB->end(); II != IE; ++II) { 06617 if (!II->isEHLabel()) continue; 06618 06619 MCSymbol *Sym = II->getOperand(0).getMCSymbol(); 06620 if (!MMI.hasCallSiteLandingPad(Sym)) continue; 06621 06622 SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym); 06623 for (SmallVectorImpl<unsigned>::iterator 06624 CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); 06625 CSI != CSE; ++CSI) { 06626 CallSiteNumToLPad[*CSI].push_back(BB); 06627 MaxCSNum = std::max(MaxCSNum, *CSI); 06628 } 06629 break; 06630 } 06631 } 06632 06633 // Get an ordered list of the machine basic blocks for the jump table. 06634 std::vector<MachineBasicBlock*> LPadList; 06635 SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs; 06636 LPadList.reserve(CallSiteNumToLPad.size()); 06637 for (unsigned I = 1; I <= MaxCSNum; ++I) { 06638 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I]; 06639 for (SmallVectorImpl<MachineBasicBlock*>::iterator 06640 II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) { 06641 LPadList.push_back(*II); 06642 InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end()); 06643 } 06644 } 06645 06646 assert(!LPadList.empty() && 06647 "No landing pad destinations for the dispatch jump table!"); 06648 06649 // Create the jump table and associated information. 06650 MachineJumpTableInfo *JTI = 06651 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); 06652 unsigned MJTI = JTI->createJumpTableIndex(LPadList); 06653 unsigned UId = AFI->createJumpTableUId(); 06654 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 06655 06656 // Create the MBBs for the dispatch code. 06657 06658 // Shove the dispatch's address into the return slot in the function context. 06659 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); 06660 DispatchBB->setIsLandingPad(); 06661 06662 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); 06663 unsigned trap_opcode; 06664 if (Subtarget->isThumb()) 06665 trap_opcode = ARM::tTRAP; 06666 else 06667 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP; 06668 06669 BuildMI(TrapBB, dl, TII->get(trap_opcode)); 06670 DispatchBB->addSuccessor(TrapBB); 06671 06672 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); 06673 DispatchBB->addSuccessor(DispContBB); 06674 06675 // Insert and MBBs. 06676 MF->insert(MF->end(), DispatchBB); 06677 MF->insert(MF->end(), DispContBB); 06678 MF->insert(MF->end(), TrapBB); 06679 06680 // Insert code into the entry block that creates and registers the function 06681 // context. 06682 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI); 06683 06684 MachineMemOperand *FIMMOLd = 06685 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 06686 MachineMemOperand::MOLoad | 06687 MachineMemOperand::MOVolatile, 4, 4); 06688 06689 MachineInstrBuilder MIB; 06690 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); 06691 06692 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); 06693 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); 06694 06695 // Add a register mask with no preserved registers. This results in all 06696 // registers being marked as clobbered. 06697 MIB.addRegMask(RI.getNoPreservedMask()); 06698 06699 unsigned NumLPads = LPadList.size(); 06700 if (Subtarget->isThumb2()) { 06701 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 06702 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) 06703 .addFrameIndex(FI) 06704 .addImm(4) 06705 .addMemOperand(FIMMOLd)); 06706 06707 if (NumLPads < 256) { 06708 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) 06709 .addReg(NewVReg1) 06710 .addImm(LPadList.size())); 06711 } else { 06712 unsigned VReg1 = MRI->createVirtualRegister(TRC); 06713 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) 06714 .addImm(NumLPads & 0xFFFF)); 06715 06716 unsigned VReg2 = VReg1; 06717 if ((NumLPads & 0xFFFF0000) != 0) { 06718 VReg2 = MRI->createVirtualRegister(TRC); 06719 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) 06720 .addReg(VReg1) 06721 .addImm(NumLPads >> 16)); 06722 } 06723 06724 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) 06725 .addReg(NewVReg1) 06726 .addReg(VReg2)); 06727 } 06728 06729 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) 06730 .addMBB(TrapBB) 06731 .addImm(ARMCC::HI) 06732 .addReg(ARM::CPSR); 06733 06734 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 06735 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3) 06736 .addJumpTableIndex(MJTI) 06737 .addImm(UId)); 06738 06739 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 06740 AddDefaultCC( 06741 AddDefaultPred( 06742 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) 06743 .addReg(NewVReg3, RegState::Kill) 06744 .addReg(NewVReg1) 06745 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 06746 06747 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) 06748 .addReg(NewVReg4, RegState::Kill) 06749 .addReg(NewVReg1) 06750 .addJumpTableIndex(MJTI) 06751 .addImm(UId); 06752 } else if (Subtarget->isThumb()) { 06753 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 06754 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) 06755 .addFrameIndex(FI) 06756 .addImm(1) 06757 .addMemOperand(FIMMOLd)); 06758 06759 if (NumLPads < 256) { 06760 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) 06761 .addReg(NewVReg1) 06762 .addImm(NumLPads)); 06763 } else { 06764 MachineConstantPool *ConstantPool = MF->getConstantPool(); 06765 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 06766 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 06767 06768 // MachineConstantPool wants an explicit alignment. 06769 unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); 06770 if (Align == 0) 06771 Align = getDataLayout()->getTypeAllocSize(C->getType()); 06772 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 06773 06774 unsigned VReg1 = MRI->createVirtualRegister(TRC); 06775 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) 06776 .addReg(VReg1, RegState::Define) 06777 .addConstantPoolIndex(Idx)); 06778 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) 06779 .addReg(NewVReg1) 06780 .addReg(VReg1)); 06781 } 06782 06783 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) 06784 .addMBB(TrapBB) 06785 .addImm(ARMCC::HI) 06786 .addReg(ARM::CPSR); 06787 06788 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 06789 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) 06790 .addReg(ARM::CPSR, RegState::Define) 06791 .addReg(NewVReg1) 06792 .addImm(2)); 06793 06794 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 06795 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) 06796 .addJumpTableIndex(MJTI) 06797 .addImm(UId)); 06798 06799 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 06800 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) 06801 .addReg(ARM::CPSR, RegState::Define) 06802 .addReg(NewVReg2, RegState::Kill) 06803 .addReg(NewVReg3)); 06804 06805 MachineMemOperand *JTMMOLd = 06806 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 06807 MachineMemOperand::MOLoad, 4, 4); 06808 06809 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 06810 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) 06811 .addReg(NewVReg4, RegState::Kill) 06812 .addImm(0) 06813 .addMemOperand(JTMMOLd)); 06814 06815 unsigned NewVReg6 = NewVReg5; 06816 if (RelocM == Reloc::PIC_) { 06817 NewVReg6 = MRI->createVirtualRegister(TRC); 06818 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) 06819 .addReg(ARM::CPSR, RegState::Define) 06820 .addReg(NewVReg5, RegState::Kill) 06821 .addReg(NewVReg3)); 06822 } 06823 06824 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) 06825 .addReg(NewVReg6, RegState::Kill) 06826 .addJumpTableIndex(MJTI) 06827 .addImm(UId); 06828 } else { 06829 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 06830 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) 06831 .addFrameIndex(FI) 06832 .addImm(4) 06833 .addMemOperand(FIMMOLd)); 06834 06835 if (NumLPads < 256) { 06836 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) 06837 .addReg(NewVReg1) 06838 .addImm(NumLPads)); 06839 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { 06840 unsigned VReg1 = MRI->createVirtualRegister(TRC); 06841 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) 06842 .addImm(NumLPads & 0xFFFF)); 06843 06844 unsigned VReg2 = VReg1; 06845 if ((NumLPads & 0xFFFF0000) != 0) { 06846 VReg2 = MRI->createVirtualRegister(TRC); 06847 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) 06848 .addReg(VReg1) 06849 .addImm(NumLPads >> 16)); 06850 } 06851 06852 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 06853 .addReg(NewVReg1) 06854 .addReg(VReg2)); 06855 } else { 06856 MachineConstantPool *ConstantPool = MF->getConstantPool(); 06857 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 06858 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 06859 06860 // MachineConstantPool wants an explicit alignment. 06861 unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); 06862 if (Align == 0) 06863 Align = getDataLayout()->getTypeAllocSize(C->getType()); 06864 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 06865 06866 unsigned VReg1 = MRI->createVirtualRegister(TRC); 06867 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) 06868 .addReg(VReg1, RegState::Define) 06869 .addConstantPoolIndex(Idx) 06870 .addImm(0)); 06871 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 06872 .addReg(NewVReg1) 06873 .addReg(VReg1, RegState::Kill)); 06874 } 06875 06876 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) 06877 .addMBB(TrapBB) 06878 .addImm(ARMCC::HI) 06879 .addReg(ARM::CPSR); 06880 06881 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 06882 AddDefaultCC( 06883 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) 06884 .addReg(NewVReg1) 06885 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 06886 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 06887 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) 06888 .addJumpTableIndex(MJTI) 06889 .addImm(UId)); 06890 06891 MachineMemOperand *JTMMOLd = 06892 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 06893 MachineMemOperand::MOLoad, 4, 4); 06894 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 06895 AddDefaultPred( 06896 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) 06897 .addReg(NewVReg3, RegState::Kill) 06898 .addReg(NewVReg4) 06899 .addImm(0) 06900 .addMemOperand(JTMMOLd)); 06901 06902 if (RelocM == Reloc::PIC_) { 06903 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) 06904 .addReg(NewVReg5, RegState::Kill) 06905 .addReg(NewVReg4) 06906 .addJumpTableIndex(MJTI) 06907 .addImm(UId); 06908 } else { 06909 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr)) 06910 .addReg(NewVReg5, RegState::Kill) 06911 .addJumpTableIndex(MJTI) 06912 .addImm(UId); 06913 } 06914 } 06915 06916 // Add the jump table entries as successors to the MBB. 06917 SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs; 06918 for (std::vector<MachineBasicBlock*>::iterator 06919 I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { 06920 MachineBasicBlock *CurMBB = *I; 06921 if (SeenMBBs.insert(CurMBB)) 06922 DispContBB->addSuccessor(CurMBB); 06923 } 06924 06925 // N.B. the order the invoke BBs are processed in doesn't matter here. 06926 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF); 06927 SmallVector<MachineBasicBlock*, 64> MBBLPads; 06928 for (MachineBasicBlock *BB : InvokeBBs) { 06929 06930 // Remove the landing pad successor from the invoke block and replace it 06931 // with the new dispatch block. 06932 SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(), 06933 BB->succ_end()); 06934 while (!Successors.empty()) { 06935 MachineBasicBlock *SMBB = Successors.pop_back_val(); 06936 if (SMBB->isLandingPad()) { 06937 BB->removeSuccessor(SMBB); 06938 MBBLPads.push_back(SMBB); 06939 } 06940 } 06941 06942 BB->addSuccessor(DispatchBB); 06943 06944 // Find the invoke call and mark all of the callee-saved registers as 06945 // 'implicit defined' so that they're spilled. This prevents code from 06946 // moving instructions to before the EH block, where they will never be 06947 // executed. 06948 for (MachineBasicBlock::reverse_iterator 06949 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { 06950 if (!II->isCall()) continue; 06951 06952 DenseMap<unsigned, bool> DefRegs; 06953 for (MachineInstr::mop_iterator 06954 OI = II->operands_begin(), OE = II->operands_end(); 06955 OI != OE; ++OI) { 06956 if (!OI->isReg()) continue; 06957 DefRegs[OI->getReg()] = true; 06958 } 06959 06960 MachineInstrBuilder MIB(*MF, &*II); 06961 06962 for (unsigned i = 0; SavedRegs[i] != 0; ++i) { 06963 unsigned Reg = SavedRegs[i]; 06964 if (Subtarget->isThumb2() && 06965 !ARM::tGPRRegClass.contains(Reg) && 06966 !ARM::hGPRRegClass.contains(Reg)) 06967 continue; 06968 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg)) 06969 continue; 06970 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg)) 06971 continue; 06972 if (!DefRegs[Reg]) 06973 MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); 06974 } 06975 06976 break; 06977 } 06978 } 06979 06980 // Mark all former landing pads as non-landing pads. The dispatch is the only 06981 // landing pad now. 06982 for (SmallVectorImpl<MachineBasicBlock*>::iterator 06983 I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I) 06984 (*I)->setIsLandingPad(false); 06985 06986 // The instruction is gone now. 06987 MI->eraseFromParent(); 06988 06989 return MBB; 06990 } 06991 06992 static 06993 MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 06994 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 06995 E = MBB->succ_end(); I != E; ++I) 06996 if (*I != Succ) 06997 return *I; 06998 llvm_unreachable("Expecting a BB with two successors!"); 06999 } 07000 07001 /// Return the load opcode for a given load size. If load size >= 8, 07002 /// neon opcode will be returned. 07003 static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) { 07004 if (LdSize >= 8) 07005 return LdSize == 16 ? ARM::VLD1q32wb_fixed 07006 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0; 07007 if (IsThumb1) 07008 return LdSize == 4 ? ARM::tLDRi 07009 : LdSize == 2 ? ARM::tLDRHi 07010 : LdSize == 1 ? ARM::tLDRBi : 0; 07011 if (IsThumb2) 07012 return LdSize == 4 ? ARM::t2LDR_POST 07013 : LdSize == 2 ? ARM::t2LDRH_POST 07014 : LdSize == 1 ? ARM::t2LDRB_POST : 0; 07015 return LdSize == 4 ? ARM::LDR_POST_IMM 07016 : LdSize == 2 ? ARM::LDRH_POST 07017 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0; 07018 } 07019 07020 /// Return the store opcode for a given store size. If store size >= 8, 07021 /// neon opcode will be returned. 07022 static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) { 07023 if (StSize >= 8) 07024 return StSize == 16 ? ARM::VST1q32wb_fixed 07025 : StSize == 8 ? ARM::VST1d32wb_fixed : 0; 07026 if (IsThumb1) 07027 return StSize == 4 ? ARM::tSTRi 07028 : StSize == 2 ? ARM::tSTRHi 07029 : StSize == 1 ? ARM::tSTRBi : 0; 07030 if (IsThumb2) 07031 return StSize == 4 ? ARM::t2STR_POST 07032 : StSize == 2 ? ARM::t2STRH_POST 07033 : StSize == 1 ? ARM::t2STRB_POST : 0; 07034 return StSize == 4 ? ARM::STR_POST_IMM 07035 : StSize == 2 ? ARM::STRH_POST 07036 : StSize == 1 ? ARM::STRB_POST_IMM : 0; 07037 } 07038 07039 /// Emit a post-increment load operation with given size. The instructions 07040 /// will be added to BB at Pos. 07041 static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos, 07042 const TargetInstrInfo *TII, DebugLoc dl, 07043 unsigned LdSize, unsigned Data, unsigned AddrIn, 07044 unsigned AddrOut, bool IsThumb1, bool IsThumb2) { 07045 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2); 07046 assert(LdOpc != 0 && "Should have a load opcode"); 07047 if (LdSize >= 8) { 07048 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) 07049 .addReg(AddrOut, RegState::Define).addReg(AddrIn) 07050 .addImm(0)); 07051 } else if (IsThumb1) { 07052 // load + update AddrIn 07053 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) 07054 .addReg(AddrIn).addImm(0)); 07055 MachineInstrBuilder MIB = 07056 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); 07057 MIB = AddDefaultT1CC(MIB); 07058 MIB.addReg(AddrIn).addImm(LdSize); 07059 AddDefaultPred(MIB); 07060 } else if (IsThumb2) { 07061 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) 07062 .addReg(AddrOut, RegState::Define).addReg(AddrIn) 07063 .addImm(LdSize)); 07064 } else { // arm 07065 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) 07066 .addReg(AddrOut, RegState::Define).addReg(AddrIn) 07067 .addReg(0).addImm(LdSize)); 07068 } 07069 } 07070 07071 /// Emit a post-increment store operation with given size. The instructions 07072 /// will be added to BB at Pos. 07073 static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos, 07074 const TargetInstrInfo *TII, DebugLoc dl, 07075 unsigned StSize, unsigned Data, unsigned AddrIn, 07076 unsigned AddrOut, bool IsThumb1, bool IsThumb2) { 07077 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2); 07078 assert(StOpc != 0 && "Should have a store opcode"); 07079 if (StSize >= 8) { 07080 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) 07081 .addReg(AddrIn).addImm(0).addReg(Data)); 07082 } else if (IsThumb1) { 07083 // store + update AddrIn 07084 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data) 07085 .addReg(AddrIn).addImm(0)); 07086 MachineInstrBuilder MIB = 07087 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); 07088 MIB = AddDefaultT1CC(MIB); 07089 MIB.addReg(AddrIn).addImm(StSize); 07090 AddDefaultPred(MIB); 07091 } else if (IsThumb2) { 07092 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) 07093 .addReg(Data).addReg(AddrIn).addImm(StSize)); 07094 } else { // arm 07095 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) 07096 .addReg(Data).addReg(AddrIn).addReg(0) 07097 .addImm(StSize)); 07098 } 07099 } 07100 07101 MachineBasicBlock * 07102 ARMTargetLowering::EmitStructByval(MachineInstr *MI, 07103 MachineBasicBlock *BB) const { 07104 // This pseudo instruction has 3 operands: dst, src, size 07105 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). 07106 // Otherwise, we will generate unrolled scalar copies. 07107 const TargetInstrInfo *TII = 07108 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 07109 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 07110 MachineFunction::iterator It = BB; 07111 ++It; 07112 07113 unsigned dest = MI->getOperand(0).getReg(); 07114 unsigned src = MI->getOperand(1).getReg(); 07115 unsigned SizeVal = MI->getOperand(2).getImm(); 07116 unsigned Align = MI->getOperand(3).getImm(); 07117 DebugLoc dl = MI->getDebugLoc(); 07118 07119 MachineFunction *MF = BB->getParent(); 07120 MachineRegisterInfo &MRI = MF->getRegInfo(); 07121 unsigned UnitSize = 0; 07122 const TargetRegisterClass *TRC = nullptr; 07123 const TargetRegisterClass *VecTRC = nullptr; 07124 07125 bool IsThumb1 = Subtarget->isThumb1Only(); 07126 bool IsThumb2 = Subtarget->isThumb2(); 07127 07128 if (Align & 1) { 07129 UnitSize = 1; 07130 } else if (Align & 2) { 07131 UnitSize = 2; 07132 } else { 07133 // Check whether we can use NEON instructions. 07134 if (!MF->getFunction()->getAttributes(). 07135 hasAttribute(AttributeSet::FunctionIndex, 07136 Attribute::NoImplicitFloat) && 07137 Subtarget->hasNEON()) { 07138 if ((Align % 16 == 0) && SizeVal >= 16) 07139 UnitSize = 16; 07140 else if ((Align % 8 == 0) && SizeVal >= 8) 07141 UnitSize = 8; 07142 } 07143 // Can't use NEON instructions. 07144 if (UnitSize == 0) 07145 UnitSize = 4; 07146 } 07147 07148 // Select the correct opcode and register class for unit size load/store 07149 bool IsNeon = UnitSize >= 8; 07150 TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass 07151 : (const TargetRegisterClass *)&ARM::GPRRegClass; 07152 if (IsNeon) 07153 VecTRC = UnitSize == 16 07154 ? (const TargetRegisterClass *)&ARM::DPairRegClass 07155 : UnitSize == 8 07156 ? (const TargetRegisterClass *)&ARM::DPRRegClass 07157 : nullptr; 07158 07159 unsigned BytesLeft = SizeVal % UnitSize; 07160 unsigned LoopSize = SizeVal - BytesLeft; 07161 07162 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) { 07163 // Use LDR and STR to copy. 07164 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize) 07165 // [destOut] = STR_POST(scratch, destIn, UnitSize) 07166 unsigned srcIn = src; 07167 unsigned destIn = dest; 07168 for (unsigned i = 0; i < LoopSize; i+=UnitSize) { 07169 unsigned srcOut = MRI.createVirtualRegister(TRC); 07170 unsigned destOut = MRI.createVirtualRegister(TRC); 07171 unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); 07172 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut, 07173 IsThumb1, IsThumb2); 07174 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut, 07175 IsThumb1, IsThumb2); 07176 srcIn = srcOut; 07177 destIn = destOut; 07178 } 07179 07180 // Handle the leftover bytes with LDRB and STRB. 07181 // [scratch, srcOut] = LDRB_POST(srcIn, 1) 07182 // [destOut] = STRB_POST(scratch, destIn, 1) 07183 for (unsigned i = 0; i < BytesLeft; i++) { 07184 unsigned srcOut = MRI.createVirtualRegister(TRC); 07185 unsigned destOut = MRI.createVirtualRegister(TRC); 07186 unsigned scratch = MRI.createVirtualRegister(TRC); 07187 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut, 07188 IsThumb1, IsThumb2); 07189 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut, 07190 IsThumb1, IsThumb2); 07191 srcIn = srcOut; 07192 destIn = destOut; 07193 } 07194 MI->eraseFromParent(); // The instruction is gone now. 07195 return BB; 07196 } 07197 07198 // Expand the pseudo op to a loop. 07199 // thisMBB: 07200 // ... 07201 // movw varEnd, # --> with thumb2 07202 // movt varEnd, # 07203 // ldrcp varEnd, idx --> without thumb2 07204 // fallthrough --> loopMBB 07205 // loopMBB: 07206 // PHI varPhi, varEnd, varLoop 07207 // PHI srcPhi, src, srcLoop 07208 // PHI destPhi, dst, destLoop 07209 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) 07210 // [destLoop] = STR_POST(scratch, destPhi, UnitSize) 07211 // subs varLoop, varPhi, #UnitSize 07212 // bne loopMBB 07213 // fallthrough --> exitMBB 07214 // exitMBB: 07215 // epilogue to handle left-over bytes 07216 // [scratch, srcOut] = LDRB_POST(srcLoop, 1) 07217 // [destOut] = STRB_POST(scratch, destLoop, 1) 07218 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 07219 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 07220 MF->insert(It, loopMBB); 07221 MF->insert(It, exitMBB); 07222 07223 // Transfer the remainder of BB and its successor edges to exitMBB. 07224 exitMBB->splice(exitMBB->begin(), BB, 07225 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 07226 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 07227 07228 // Load an immediate to varEnd. 07229 unsigned varEnd = MRI.createVirtualRegister(TRC); 07230 if (IsThumb2) { 07231 unsigned Vtmp = varEnd; 07232 if ((LoopSize & 0xFFFF0000) != 0) 07233 Vtmp = MRI.createVirtualRegister(TRC); 07234 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp) 07235 .addImm(LoopSize & 0xFFFF)); 07236 07237 if ((LoopSize & 0xFFFF0000) != 0) 07238 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd) 07239 .addReg(Vtmp).addImm(LoopSize >> 16)); 07240 } else { 07241 MachineConstantPool *ConstantPool = MF->getConstantPool(); 07242 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 07243 const Constant *C = ConstantInt::get(Int32Ty, LoopSize); 07244 07245 // MachineConstantPool wants an explicit alignment. 07246 unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); 07247 if (Align == 0) 07248 Align = getDataLayout()->getTypeAllocSize(C->getType()); 07249 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 07250 07251 if (IsThumb1) 07252 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg( 07253 varEnd, RegState::Define).addConstantPoolIndex(Idx)); 07254 else 07255 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg( 07256 varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0)); 07257 } 07258 BB->addSuccessor(loopMBB); 07259 07260 // Generate the loop body: 07261 // varPhi = PHI(varLoop, varEnd) 07262 // srcPhi = PHI(srcLoop, src) 07263 // destPhi = PHI(destLoop, dst) 07264 MachineBasicBlock *entryBB = BB; 07265 BB = loopMBB; 07266 unsigned varLoop = MRI.createVirtualRegister(TRC); 07267 unsigned varPhi = MRI.createVirtualRegister(TRC); 07268 unsigned srcLoop = MRI.createVirtualRegister(TRC); 07269 unsigned srcPhi = MRI.createVirtualRegister(TRC); 07270 unsigned destLoop = MRI.createVirtualRegister(TRC); 07271 unsigned destPhi = MRI.createVirtualRegister(TRC); 07272 07273 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi) 07274 .addReg(varLoop).addMBB(loopMBB) 07275 .addReg(varEnd).addMBB(entryBB); 07276 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi) 07277 .addReg(srcLoop).addMBB(loopMBB) 07278 .addReg(src).addMBB(entryBB); 07279 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi) 07280 .addReg(destLoop).addMBB(loopMBB) 07281 .addReg(dest).addMBB(entryBB); 07282 07283 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) 07284 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) 07285 unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); 07286 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop, 07287 IsThumb1, IsThumb2); 07288 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop, 07289 IsThumb1, IsThumb2); 07290 07291 // Decrement loop variable by UnitSize. 07292 if (IsThumb1) { 07293 MachineInstrBuilder MIB = 07294 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop); 07295 MIB = AddDefaultT1CC(MIB); 07296 MIB.addReg(varPhi).addImm(UnitSize); 07297 AddDefaultPred(MIB); 07298 } else { 07299 MachineInstrBuilder MIB = 07300 BuildMI(*BB, BB->end(), dl, 07301 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); 07302 AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); 07303 MIB->getOperand(5).setReg(ARM::CPSR); 07304 MIB->getOperand(5).setIsDef(true); 07305 } 07306 BuildMI(*BB, BB->end(), dl, 07307 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc)) 07308 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 07309 07310 // loopMBB can loop back to loopMBB or fall through to exitMBB. 07311 BB->addSuccessor(loopMBB); 07312 BB->addSuccessor(exitMBB); 07313 07314 // Add epilogue to handle BytesLeft. 07315 BB = exitMBB; 07316 MachineInstr *StartOfExit = exitMBB->begin(); 07317 07318 // [scratch, srcOut] = LDRB_POST(srcLoop, 1) 07319 // [destOut] = STRB_POST(scratch, destLoop, 1) 07320 unsigned srcIn = srcLoop; 07321 unsigned destIn = destLoop; 07322 for (unsigned i = 0; i < BytesLeft; i++) { 07323 unsigned srcOut = MRI.createVirtualRegister(TRC); 07324 unsigned destOut = MRI.createVirtualRegister(TRC); 07325 unsigned scratch = MRI.createVirtualRegister(TRC); 07326 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut, 07327 IsThumb1, IsThumb2); 07328 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut, 07329 IsThumb1, IsThumb2); 07330 srcIn = srcOut; 07331 destIn = destOut; 07332 } 07333 07334 MI->eraseFromParent(); // The instruction is gone now. 07335 return BB; 07336 } 07337 07338 MachineBasicBlock * 07339 ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, 07340 MachineBasicBlock *MBB) const { 07341 const TargetMachine &TM = getTargetMachine(); 07342 const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); 07343 DebugLoc DL = MI->getDebugLoc(); 07344 07345 assert(Subtarget->isTargetWindows() && 07346 "__chkstk is only supported on Windows"); 07347 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode"); 07348 07349 // __chkstk takes the number of words to allocate on the stack in R4, and 07350 // returns the stack adjustment in number of bytes in R4. This will not 07351 // clober any other registers (other than the obvious lr). 07352 // 07353 // Although, technically, IP should be considered a register which may be 07354 // clobbered, the call itself will not touch it. Windows on ARM is a pure 07355 // thumb-2 environment, so there is no interworking required. As a result, we 07356 // do not expect a veneer to be emitted by the linker, clobbering IP. 07357 // 07358 // Each module receives its own copy of __chkstk, so no import thunk is 07359 // required, again, ensuring that IP is not clobbered. 07360 // 07361 // Finally, although some linkers may theoretically provide a trampoline for 07362 // out of range calls (which is quite common due to a 32M range limitation of 07363 // branches for Thumb), we can generate the long-call version via 07364 // -mcmodel=large, alleviating the need for the trampoline which may clobber 07365 // IP. 07366 07367 switch (TM.getCodeModel()) { 07368 case CodeModel::Small: 07369 case CodeModel::Medium: 07370 case CodeModel::Default: 07371 case CodeModel::Kernel: 07372 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL)) 07373 .addImm((unsigned)ARMCC::AL).addReg(0) 07374 .addExternalSymbol("__chkstk") 07375 .addReg(ARM::R4, RegState::Implicit | RegState::Kill) 07376 .addReg(ARM::R4, RegState::Implicit | RegState::Define) 07377 .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); 07378 break; 07379 case CodeModel::Large: 07380 case CodeModel::JITDefault: { 07381 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 07382 unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); 07383 07384 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) 07385 .addExternalSymbol("__chkstk"); 07386 BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr)) 07387 .addImm((unsigned)ARMCC::AL).addReg(0) 07388 .addReg(Reg, RegState::Kill) 07389 .addReg(ARM::R4, RegState::Implicit | RegState::Kill) 07390 .addReg(ARM::R4, RegState::Implicit | RegState::Define) 07391 .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); 07392 break; 07393 } 07394 } 07395 07396 AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), 07397 ARM::SP) 07398 .addReg(ARM::SP).addReg(ARM::R4))); 07399 07400 MI->eraseFromParent(); 07401 return MBB; 07402 } 07403 07404 MachineBasicBlock * 07405 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 07406 MachineBasicBlock *BB) const { 07407 const TargetInstrInfo *TII = 07408 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 07409 DebugLoc dl = MI->getDebugLoc(); 07410 bool isThumb2 = Subtarget->isThumb2(); 07411 switch (MI->getOpcode()) { 07412 default: { 07413 MI->dump(); 07414 llvm_unreachable("Unexpected instr type to insert"); 07415 } 07416 // The Thumb2 pre-indexed stores have the same MI operands, they just 07417 // define them differently in the .td files from the isel patterns, so 07418 // they need pseudos. 07419 case ARM::t2STR_preidx: 07420 MI->setDesc(TII->get(ARM::t2STR_PRE)); 07421 return BB; 07422 case ARM::t2STRB_preidx: 07423 MI->setDesc(TII->get(ARM::t2STRB_PRE)); 07424 return BB; 07425 case ARM::t2STRH_preidx: 07426 MI->setDesc(TII->get(ARM::t2STRH_PRE)); 07427 return BB; 07428 07429 case ARM::STRi_preidx: 07430 case ARM::STRBi_preidx: { 07431 unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ? 07432 ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; 07433 // Decode the offset. 07434 unsigned Offset = MI->getOperand(4).getImm(); 07435 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; 07436 Offset = ARM_AM::getAM2Offset(Offset); 07437 if (isSub) 07438 Offset = -Offset; 07439 07440 MachineMemOperand *MMO = *MI->memoperands_begin(); 07441 BuildMI(*BB, MI, dl, TII->get(NewOpc)) 07442 .addOperand(MI->getOperand(0)) // Rn_wb 07443 .addOperand(MI->getOperand(1)) // Rt 07444 .addOperand(MI->getOperand(2)) // Rn 07445 .addImm(Offset) // offset (skip GPR==zero_reg) 07446 .addOperand(MI->getOperand(5)) // pred 07447 .addOperand(MI->getOperand(6)) 07448 .addMemOperand(MMO); 07449 MI->eraseFromParent(); 07450 return BB; 07451 } 07452 case ARM::STRr_preidx: 07453 case ARM::STRBr_preidx: 07454 case ARM::STRH_preidx: { 07455 unsigned NewOpc; 07456 switch (MI->getOpcode()) { 07457 default: llvm_unreachable("unexpected opcode!"); 07458 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; 07459 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; 07460 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; 07461 } 07462 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); 07463 for (unsigned i = 0; i < MI->getNumOperands(); ++i) 07464 MIB.addOperand(MI->getOperand(i)); 07465 MI->eraseFromParent(); 07466 return BB; 07467 } 07468 07469 case ARM::tMOVCCr_pseudo: { 07470 // To "insert" a SELECT_CC instruction, we actually have to insert the 07471 // diamond control-flow pattern. The incoming instruction knows the 07472 // destination vreg to set, the condition code register to branch on, the 07473 // true/false values to select between, and a branch opcode to use. 07474 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 07475 MachineFunction::iterator It = BB; 07476 ++It; 07477 07478 // thisMBB: 07479 // ... 07480 // TrueVal = ... 07481 // cmpTY ccX, r1, r2 07482 // bCC copy1MBB 07483 // fallthrough --> copy0MBB 07484 MachineBasicBlock *thisMBB = BB; 07485 MachineFunction *F = BB->getParent(); 07486 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 07487 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 07488 F->insert(It, copy0MBB); 07489 F->insert(It, sinkMBB); 07490 07491 // Transfer the remainder of BB and its successor edges to sinkMBB. 07492 sinkMBB->splice(sinkMBB->begin(), BB, 07493 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 07494 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 07495 07496 BB->addSuccessor(copy0MBB); 07497 BB->addSuccessor(sinkMBB); 07498 07499 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 07500 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 07501 07502 // copy0MBB: 07503 // %FalseValue = ... 07504 // # fallthrough to sinkMBB 07505 BB = copy0MBB; 07506 07507 // Update machine-CFG edges 07508 BB->addSuccessor(sinkMBB); 07509 07510 // sinkMBB: 07511 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 07512 // ... 07513 BB = sinkMBB; 07514 BuildMI(*BB, BB->begin(), dl, 07515 TII->get(ARM::PHI), MI->getOperand(0).getReg()) 07516 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 07517 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 07518 07519 MI->eraseFromParent(); // The pseudo instruction is gone now. 07520 return BB; 07521 } 07522 07523 case ARM::BCCi64: 07524 case ARM::BCCZi64: { 07525 // If there is an unconditional branch to the other successor, remove it. 07526 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end()); 07527 07528 // Compare both parts that make up the double comparison separately for 07529 // equality. 07530 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; 07531 07532 unsigned LHS1 = MI->getOperand(1).getReg(); 07533 unsigned LHS2 = MI->getOperand(2).getReg(); 07534 if (RHSisZero) { 07535 AddDefaultPred(BuildMI(BB, dl, 07536 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 07537 .addReg(LHS1).addImm(0)); 07538 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 07539 .addReg(LHS2).addImm(0) 07540 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 07541 } else { 07542 unsigned RHS1 = MI->getOperand(3).getReg(); 07543 unsigned RHS2 = MI->getOperand(4).getReg(); 07544 AddDefaultPred(BuildMI(BB, dl, 07545 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 07546 .addReg(LHS1).addReg(RHS1)); 07547 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 07548 .addReg(LHS2).addReg(RHS2) 07549 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 07550 } 07551 07552 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); 07553 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); 07554 if (MI->getOperand(0).getImm() == ARMCC::NE) 07555 std::swap(destMBB, exitMBB); 07556 07557 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 07558 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 07559 if (isThumb2) 07560 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); 07561 else 07562 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); 07563 07564 MI->eraseFromParent(); // The pseudo instruction is gone now. 07565 return BB; 07566 } 07567 07568 case ARM::Int_eh_sjlj_setjmp: 07569 case ARM::Int_eh_sjlj_setjmp_nofp: 07570 case ARM::tInt_eh_sjlj_setjmp: 07571 case ARM::t2Int_eh_sjlj_setjmp: 07572 case ARM::t2Int_eh_sjlj_setjmp_nofp: 07573 EmitSjLjDispatchBlock(MI, BB); 07574 return BB; 07575 07576 case ARM::ABS: 07577 case ARM::t2ABS: { 07578 // To insert an ABS instruction, we have to insert the 07579 // diamond control-flow pattern. The incoming instruction knows the 07580 // source vreg to test against 0, the destination vreg to set, 07581 // the condition code register to branch on, the 07582 // true/false values to select between, and a branch opcode to use. 07583 // It transforms 07584 // V1 = ABS V0 07585 // into 07586 // V2 = MOVS V0 07587 // BCC (branch to SinkBB if V0 >= 0) 07588 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) 07589 // SinkBB: V1 = PHI(V2, V3) 07590 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 07591 MachineFunction::iterator BBI = BB; 07592 ++BBI; 07593 MachineFunction *Fn = BB->getParent(); 07594 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); 07595 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); 07596 Fn->insert(BBI, RSBBB); 07597 Fn->insert(BBI, SinkBB); 07598 07599 unsigned int ABSSrcReg = MI->getOperand(1).getReg(); 07600 unsigned int ABSDstReg = MI->getOperand(0).getReg(); 07601 bool isThumb2 = Subtarget->isThumb2(); 07602 MachineRegisterInfo &MRI = Fn->getRegInfo(); 07603 // In Thumb mode S must not be specified if source register is the SP or 07604 // PC and if destination register is the SP, so restrict register class 07605 unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ? 07606 (const TargetRegisterClass*)&ARM::rGPRRegClass : 07607 (const TargetRegisterClass*)&ARM::GPRRegClass); 07608 07609 // Transfer the remainder of BB and its successor edges to sinkMBB. 07610 SinkBB->splice(SinkBB->begin(), BB, 07611 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 07612 SinkBB->transferSuccessorsAndUpdatePHIs(BB); 07613 07614 BB->addSuccessor(RSBBB); 07615 BB->addSuccessor(SinkBB); 07616 07617 // fall through to SinkMBB 07618 RSBBB->addSuccessor(SinkBB); 07619 07620 // insert a cmp at the end of BB 07621 AddDefaultPred(BuildMI(BB, dl, 07622 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 07623 .addReg(ABSSrcReg).addImm(0)); 07624 07625 // insert a bcc with opposite CC to ARMCC::MI at the end of BB 07626 BuildMI(BB, dl, 07627 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) 07628 .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); 07629 07630 // insert rsbri in RSBBB 07631 // Note: BCC and rsbri will be converted into predicated rsbmi 07632 // by if-conversion pass 07633 BuildMI(*RSBBB, RSBBB->begin(), dl, 07634 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) 07635 .addReg(ABSSrcReg, RegState::Kill) 07636 .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 07637 07638 // insert PHI in SinkBB, 07639 // reuse ABSDstReg to not change uses of ABS instruction 07640 BuildMI(*SinkBB, SinkBB->begin(), dl, 07641 TII->get(ARM::PHI), ABSDstReg) 07642 .addReg(NewRsbDstReg).addMBB(RSBBB) 07643 .addReg(ABSSrcReg).addMBB(BB); 07644 07645 // remove ABS instruction 07646 MI->eraseFromParent(); 07647 07648 // return last added BB 07649 return SinkBB; 07650 } 07651 case ARM::COPY_STRUCT_BYVAL_I32: 07652 ++NumLoopByVals; 07653 return EmitStructByval(MI, BB); 07654 case ARM::WIN__CHKSTK: 07655 return EmitLowered__chkstk(MI, BB); 07656 } 07657 } 07658 07659 void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, 07660 SDNode *Node) const { 07661 if (!MI->hasPostISelHook()) { 07662 assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && 07663 "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); 07664 return; 07665 } 07666 07667 const MCInstrDesc *MCID = &MI->getDesc(); 07668 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, 07669 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional 07670 // operand is still set to noreg. If needed, set the optional operand's 07671 // register to CPSR, and remove the redundant implicit def. 07672 // 07673 // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>). 07674 07675 // Rename pseudo opcodes. 07676 unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); 07677 if (NewOpc) { 07678 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 07679 getTargetMachine().getSubtargetImpl()->getInstrInfo()); 07680 MCID = &TII->get(NewOpc); 07681 07682 assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && 07683 "converted opcode should be the same except for cc_out"); 07684 07685 MI->setDesc(*MCID); 07686 07687 // Add the optional cc_out operand 07688 MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); 07689 } 07690 unsigned ccOutIdx = MCID->getNumOperands() - 1; 07691 07692 // Any ARM instruction that sets the 's' bit should specify an optional 07693 // "cc_out" operand in the last operand position. 07694 if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { 07695 assert(!NewOpc && "Optional cc_out operand required"); 07696 return; 07697 } 07698 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it 07699 // since we already have an optional CPSR def. 07700 bool definesCPSR = false; 07701 bool deadCPSR = false; 07702 for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands(); 07703 i != e; ++i) { 07704 const MachineOperand &MO = MI->getOperand(i); 07705 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { 07706 definesCPSR = true; 07707 if (MO.isDead()) 07708 deadCPSR = true; 07709 MI->RemoveOperand(i); 07710 break; 07711 } 07712 } 07713 if (!definesCPSR) { 07714 assert(!NewOpc && "Optional cc_out operand required"); 07715 return; 07716 } 07717 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); 07718 if (deadCPSR) { 07719 assert(!MI->getOperand(ccOutIdx).getReg() && 07720 "expect uninitialized optional cc_out operand"); 07721 return; 07722 } 07723 07724 // If this instruction was defined with an optional CPSR def and its dag node 07725 // had a live implicit CPSR def, then activate the optional CPSR def. 07726 MachineOperand &MO = MI->getOperand(ccOutIdx); 07727 MO.setReg(ARM::CPSR); 07728 MO.setIsDef(true); 07729 } 07730 07731 //===----------------------------------------------------------------------===// 07732 // ARM Optimization Hooks 07733 //===----------------------------------------------------------------------===// 07734 07735 // Helper function that checks if N is a null or all ones constant. 07736 static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) { 07737 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); 07738 if (!C) 07739 return false; 07740 return AllOnes ? C->isAllOnesValue() : C->isNullValue(); 07741 } 07742 07743 // Return true if N is conditionally 0 or all ones. 07744 // Detects these expressions where cc is an i1 value: 07745 // 07746 // (select cc 0, y) [AllOnes=0] 07747 // (select cc y, 0) [AllOnes=0] 07748 // (zext cc) [AllOnes=0] 07749 // (sext cc) [AllOnes=0/1] 07750 // (select cc -1, y) [AllOnes=1] 07751 // (select cc y, -1) [AllOnes=1] 07752 // 07753 // Invert is set when N is the null/all ones constant when CC is false. 07754 // OtherOp is set to the alternative value of N. 07755 static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, 07756 SDValue &CC, bool &Invert, 07757 SDValue &OtherOp, 07758 SelectionDAG &DAG) { 07759 switch (N->getOpcode()) { 07760 default: return false; 07761 case ISD::SELECT: { 07762 CC = N->getOperand(0); 07763 SDValue N1 = N->getOperand(1); 07764 SDValue N2 = N->getOperand(2); 07765 if (isZeroOrAllOnes(N1, AllOnes)) { 07766 Invert = false; 07767 OtherOp = N2; 07768 return true; 07769 } 07770 if (isZeroOrAllOnes(N2, AllOnes)) { 07771 Invert = true; 07772 OtherOp = N1; 07773 return true; 07774 } 07775 return false; 07776 } 07777 case ISD::ZERO_EXTEND: 07778 // (zext cc) can never be the all ones value. 07779 if (AllOnes) 07780 return false; 07781 // Fall through. 07782 case ISD::SIGN_EXTEND: { 07783 EVT VT = N->getValueType(0); 07784 CC = N->getOperand(0); 07785 if (CC.getValueType() != MVT::i1) 07786 return false; 07787 Invert = !AllOnes; 07788 if (AllOnes) 07789 // When looking for an AllOnes constant, N is an sext, and the 'other' 07790 // value is 0. 07791 OtherOp = DAG.getConstant(0, VT); 07792 else if (N->getOpcode() == ISD::ZERO_EXTEND) 07793 // When looking for a 0 constant, N can be zext or sext. 07794 OtherOp = DAG.getConstant(1, VT); 07795 else 07796 OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); 07797 return true; 07798 } 07799 } 07800 } 07801 07802 // Combine a constant select operand into its use: 07803 // 07804 // (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 07805 // (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 07806 // (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1] 07807 // (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) 07808 // (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) 07809 // 07810 // The transform is rejected if the select doesn't have a constant operand that 07811 // is null, or all ones when AllOnes is set. 07812 // 07813 // Also recognize sext/zext from i1: 07814 // 07815 // (add (zext cc), x) -> (select cc (add x, 1), x) 07816 // (add (sext cc), x) -> (select cc (add x, -1), x) 07817 // 07818 // These transformations eventually create predicated instructions. 07819 // 07820 // @param N The node to transform. 07821 // @param Slct The N operand that is a select. 07822 // @param OtherOp The other N operand (x above). 07823 // @param DCI Context. 07824 // @param AllOnes Require the select constant to be all ones instead of null. 07825 // @returns The new node, or SDValue() on failure. 07826 static 07827 SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 07828 TargetLowering::DAGCombinerInfo &DCI, 07829 bool AllOnes = false) { 07830 SelectionDAG &DAG = DCI.DAG; 07831 EVT VT = N->getValueType(0); 07832 SDValue NonConstantVal; 07833 SDValue CCOp; 07834 bool SwapSelectOps; 07835 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps, 07836 NonConstantVal, DAG)) 07837 return SDValue(); 07838 07839 // Slct is now know to be the desired identity constant when CC is true. 07840 SDValue TrueVal = OtherOp; 07841 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 07842 OtherOp, NonConstantVal); 07843 // Unless SwapSelectOps says CC should be false. 07844 if (SwapSelectOps) 07845 std::swap(TrueVal, FalseVal); 07846 07847 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, 07848 CCOp, TrueVal, FalseVal); 07849 } 07850 07851 // Attempt combineSelectAndUse on each operand of a commutative operator N. 07852 static 07853 SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, 07854 TargetLowering::DAGCombinerInfo &DCI) { 07855 SDValue N0 = N->getOperand(0); 07856 SDValue N1 = N->getOperand(1); 07857 if (N0.getNode()->hasOneUse()) { 07858 SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes); 07859 if (Result.getNode()) 07860 return Result; 07861 } 07862 if (N1.getNode()->hasOneUse()) { 07863 SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes); 07864 if (Result.getNode()) 07865 return Result; 07866 } 07867 return SDValue(); 07868 } 07869 07870 // AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction 07871 // (only after legalization). 07872 static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, 07873 TargetLowering::DAGCombinerInfo &DCI, 07874 const ARMSubtarget *Subtarget) { 07875 07876 // Only perform optimization if after legalize, and if NEON is available. We 07877 // also expected both operands to be BUILD_VECTORs. 07878 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON() 07879 || N0.getOpcode() != ISD::BUILD_VECTOR 07880 || N1.getOpcode() != ISD::BUILD_VECTOR) 07881 return SDValue(); 07882 07883 // Check output type since VPADDL operand elements can only be 8, 16, or 32. 07884 EVT VT = N->getValueType(0); 07885 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64) 07886 return SDValue(); 07887 07888 // Check that the vector operands are of the right form. 07889 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR 07890 // operands, where N is the size of the formed vector. 07891 // Each EXTRACT_VECTOR should have the same input vector and odd or even 07892 // index such that we have a pair wise add pattern. 07893 07894 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing. 07895 if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 07896 return SDValue(); 07897 SDValue Vec = N0->getOperand(0)->getOperand(0); 07898 SDNode *V = Vec.getNode(); 07899 unsigned nextIndex = 0; 07900 07901 // For each operands to the ADD which are BUILD_VECTORs, 07902 // check to see if each of their operands are an EXTRACT_VECTOR with 07903 // the same vector and appropriate index. 07904 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) { 07905 if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT 07906 && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 07907 07908 SDValue ExtVec0 = N0->getOperand(i); 07909 SDValue ExtVec1 = N1->getOperand(i); 07910 07911 // First operand is the vector, verify its the same. 07912 if (V != ExtVec0->getOperand(0).getNode() || 07913 V != ExtVec1->getOperand(0).getNode()) 07914 return SDValue(); 07915 07916 // Second is the constant, verify its correct. 07917 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1)); 07918 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1)); 07919 07920 // For the constant, we want to see all the even or all the odd. 07921 if (!C0 || !C1 || C0->getZExtValue() != nextIndex 07922 || C1->getZExtValue() != nextIndex+1) 07923 return SDValue(); 07924 07925 // Increment index. 07926 nextIndex+=2; 07927 } else 07928 return SDValue(); 07929 } 07930 07931 // Create VPADDL node. 07932 SelectionDAG &DAG = DCI.DAG; 07933 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 07934 07935 // Build operand list. 07936 SmallVector<SDValue, 8> Ops; 07937 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, 07938 TLI.getPointerTy())); 07939 07940 // Input is the vector. 07941 Ops.push_back(Vec); 07942 07943 // Get widened type and narrowed type. 07944 MVT widenType; 07945 unsigned numElem = VT.getVectorNumElements(); 07946 07947 EVT inputLaneType = Vec.getValueType().getVectorElementType(); 07948 switch (inputLaneType.getSimpleVT().SimpleTy) { 07949 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; 07950 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; 07951 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; 07952 default: 07953 llvm_unreachable("Invalid vector element type for padd optimization."); 07954 } 07955 07956 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, Ops); 07957 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE; 07958 return DAG.getNode(ExtOp, SDLoc(N), VT, tmp); 07959 } 07960 07961 static SDValue findMUL_LOHI(SDValue V) { 07962 if (V->getOpcode() == ISD::UMUL_LOHI || 07963 V->getOpcode() == ISD::SMUL_LOHI) 07964 return V; 07965 return SDValue(); 07966 } 07967 07968 static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, 07969 TargetLowering::DAGCombinerInfo &DCI, 07970 const ARMSubtarget *Subtarget) { 07971 07972 if (Subtarget->isThumb1Only()) return SDValue(); 07973 07974 // Only perform the checks after legalize when the pattern is available. 07975 if (DCI.isBeforeLegalize()) return SDValue(); 07976 07977 // Look for multiply add opportunities. 07978 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where 07979 // each add nodes consumes a value from ISD::UMUL_LOHI and there is 07980 // a glue link from the first add to the second add. 07981 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by 07982 // a S/UMLAL instruction. 07983 // loAdd UMUL_LOHI 07984 // \ / :lo \ :hi 07985 // \ / \ [no multiline comment] 07986 // ADDC | hiAdd 07987 // \ :glue / / 07988 // \ / / 07989 // ADDE 07990 // 07991 assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC"); 07992 SDValue AddcOp0 = AddcNode->getOperand(0); 07993 SDValue AddcOp1 = AddcNode->getOperand(1); 07994 07995 // Check if the two operands are from the same mul_lohi node. 07996 if (AddcOp0.getNode() == AddcOp1.getNode()) 07997 return SDValue(); 07998 07999 assert(AddcNode->getNumValues() == 2 && 08000 AddcNode->getValueType(0) == MVT::i32 && 08001 "Expect ADDC with two result values. First: i32"); 08002 08003 // Check that we have a glued ADDC node. 08004 if (AddcNode->getValueType(1) != MVT::Glue) 08005 return SDValue(); 08006 08007 // Check that the ADDC adds the low result of the S/UMUL_LOHI. 08008 if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && 08009 AddcOp0->getOpcode() != ISD::SMUL_LOHI && 08010 AddcOp1->getOpcode() != ISD::UMUL_LOHI && 08011 AddcOp1->getOpcode() != ISD::SMUL_LOHI) 08012 return SDValue(); 08013 08014 // Look for the glued ADDE. 08015 SDNode* AddeNode = AddcNode->getGluedUser(); 08016 if (!AddeNode) 08017 return SDValue(); 08018 08019 // Make sure it is really an ADDE. 08020 if (AddeNode->getOpcode() != ISD::ADDE) 08021 return SDValue(); 08022 08023 assert(AddeNode->getNumOperands() == 3 && 08024 AddeNode->getOperand(2).getValueType() == MVT::Glue && 08025 "ADDE node has the wrong inputs"); 08026 08027 // Check for the triangle shape. 08028 SDValue AddeOp0 = AddeNode->getOperand(0); 08029 SDValue AddeOp1 = AddeNode->getOperand(1); 08030 08031 // Make sure that the ADDE operands are not coming from the same node. 08032 if (AddeOp0.getNode() == AddeOp1.getNode()) 08033 return SDValue(); 08034 08035 // Find the MUL_LOHI node walking up ADDE's operands. 08036 bool IsLeftOperandMUL = false; 08037 SDValue MULOp = findMUL_LOHI(AddeOp0); 08038 if (MULOp == SDValue()) 08039 MULOp = findMUL_LOHI(AddeOp1); 08040 else 08041 IsLeftOperandMUL = true; 08042 if (MULOp == SDValue()) 08043 return SDValue(); 08044 08045 // Figure out the right opcode. 08046 unsigned Opc = MULOp->getOpcode(); 08047 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL; 08048 08049 // Figure out the high and low input values to the MLAL node. 08050 SDValue* HiMul = &MULOp; 08051 SDValue* HiAdd = nullptr; 08052 SDValue* LoMul = nullptr; 08053 SDValue* LowAdd = nullptr; 08054 08055 if (IsLeftOperandMUL) 08056 HiAdd = &AddeOp1; 08057 else 08058 HiAdd = &AddeOp0; 08059 08060 08061 if (AddcOp0->getOpcode() == Opc) { 08062 LoMul = &AddcOp0; 08063 LowAdd = &AddcOp1; 08064 } 08065 if (AddcOp1->getOpcode() == Opc) { 08066 LoMul = &AddcOp1; 08067 LowAdd = &AddcOp0; 08068 } 08069 08070 if (!LoMul) 08071 return SDValue(); 08072 08073 if (LoMul->getNode() != HiMul->getNode()) 08074 return SDValue(); 08075 08076 // Create the merged node. 08077 SelectionDAG &DAG = DCI.DAG; 08078 08079 // Build operand list. 08080 SmallVector<SDValue, 8> Ops; 08081 Ops.push_back(LoMul->getOperand(0)); 08082 Ops.push_back(LoMul->getOperand(1)); 08083 Ops.push_back(*LowAdd); 08084 Ops.push_back(*HiAdd); 08085 08086 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), 08087 DAG.getVTList(MVT::i32, MVT::i32), Ops); 08088 08089 // Replace the ADDs' nodes uses by the MLA node's values. 08090 SDValue HiMLALResult(MLALNode.getNode(), 1); 08091 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); 08092 08093 SDValue LoMLALResult(MLALNode.getNode(), 0); 08094 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); 08095 08096 // Return original node to notify the driver to stop replacing. 08097 SDValue resNode(AddcNode, 0); 08098 return resNode; 08099 } 08100 08101 /// PerformADDCCombine - Target-specific dag combine transform from 08102 /// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL. 08103 static SDValue PerformADDCCombine(SDNode *N, 08104 TargetLowering::DAGCombinerInfo &DCI, 08105 const ARMSubtarget *Subtarget) { 08106 08107 return AddCombineTo64bitMLAL(N, DCI, Subtarget); 08108 08109 } 08110 08111 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 08112 /// operands N0 and N1. This is a helper for PerformADDCombine that is 08113 /// called with the default operands, and if that fails, with commuted 08114 /// operands. 08115 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 08116 TargetLowering::DAGCombinerInfo &DCI, 08117 const ARMSubtarget *Subtarget){ 08118 08119 // Attempt to create vpaddl for this add. 08120 SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget); 08121 if (Result.getNode()) 08122 return Result; 08123 08124 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 08125 if (N0.getNode()->hasOneUse()) { 08126 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 08127 if (Result.getNode()) return Result; 08128 } 08129 return SDValue(); 08130 } 08131 08132 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 08133 /// 08134 static SDValue PerformADDCombine(SDNode *N, 08135 TargetLowering::DAGCombinerInfo &DCI, 08136 const ARMSubtarget *Subtarget) { 08137 SDValue N0 = N->getOperand(0); 08138 SDValue N1 = N->getOperand(1); 08139 08140 // First try with the default operand order. 08141 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget); 08142 if (Result.getNode()) 08143 return Result; 08144 08145 // If that didn't work, try again with the operands commuted. 08146 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget); 08147 } 08148 08149 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 08150 /// 08151 static SDValue PerformSUBCombine(SDNode *N, 08152 TargetLowering::DAGCombinerInfo &DCI) { 08153 SDValue N0 = N->getOperand(0); 08154 SDValue N1 = N->getOperand(1); 08155 08156 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 08157 if (N1.getNode()->hasOneUse()) { 08158 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 08159 if (Result.getNode()) return Result; 08160 } 08161 08162 return SDValue(); 08163 } 08164 08165 /// PerformVMULCombine 08166 /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the 08167 /// special multiplier accumulator forwarding. 08168 /// vmul d3, d0, d2 08169 /// vmla d3, d1, d2 08170 /// is faster than 08171 /// vadd d3, d0, d1 08172 /// vmul d3, d3, d2 08173 // However, for (A + B) * (A + B), 08174 // vadd d2, d0, d1 08175 // vmul d3, d0, d2 08176 // vmla d3, d1, d2 08177 // is slower than 08178 // vadd d2, d0, d1 08179 // vmul d3, d2, d2 08180 static SDValue PerformVMULCombine(SDNode *N, 08181 TargetLowering::DAGCombinerInfo &DCI, 08182 const ARMSubtarget *Subtarget) { 08183 if (!Subtarget->hasVMLxForwarding()) 08184 return SDValue(); 08185 08186 SelectionDAG &DAG = DCI.DAG; 08187 SDValue N0 = N->getOperand(0); 08188 SDValue N1 = N->getOperand(1); 08189 unsigned Opcode = N0.getOpcode(); 08190 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 08191 Opcode != ISD::FADD && Opcode != ISD::FSUB) { 08192 Opcode = N1.getOpcode(); 08193 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 08194 Opcode != ISD::FADD && Opcode != ISD::FSUB) 08195 return SDValue(); 08196 std::swap(N0, N1); 08197 } 08198 08199 if (N0 == N1) 08200 return SDValue(); 08201 08202 EVT VT = N->getValueType(0); 08203 SDLoc DL(N); 08204 SDValue N00 = N0->getOperand(0); 08205 SDValue N01 = N0->getOperand(1); 08206 return DAG.getNode(Opcode, DL, VT, 08207 DAG.getNode(ISD::MUL, DL, VT, N00, N1), 08208 DAG.getNode(ISD::MUL, DL, VT, N01, N1)); 08209 } 08210 08211 static SDValue PerformMULCombine(SDNode *N, 08212 TargetLowering::DAGCombinerInfo &DCI, 08213 const ARMSubtarget *Subtarget) { 08214 SelectionDAG &DAG = DCI.DAG; 08215 08216 if (Subtarget->isThumb1Only()) 08217 return SDValue(); 08218 08219 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 08220 return SDValue(); 08221 08222 EVT VT = N->getValueType(0); 08223 if (VT.is64BitVector() || VT.is128BitVector()) 08224 return PerformVMULCombine(N, DCI, Subtarget); 08225 if (VT != MVT::i32) 08226 return SDValue(); 08227 08228 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 08229 if (!C) 08230 return SDValue(); 08231 08232 int64_t MulAmt = C->getSExtValue(); 08233 unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt); 08234 08235 ShiftAmt = ShiftAmt & (32 - 1); 08236 SDValue V = N->getOperand(0); 08237 SDLoc DL(N); 08238 08239 SDValue Res; 08240 MulAmt >>= ShiftAmt; 08241 08242 if (MulAmt >= 0) { 08243 if (isPowerOf2_32(MulAmt - 1)) { 08244 // (mul x, 2^N + 1) => (add (shl x, N), x) 08245 Res = DAG.getNode(ISD::ADD, DL, VT, 08246 V, 08247 DAG.getNode(ISD::SHL, DL, VT, 08248 V, 08249 DAG.getConstant(Log2_32(MulAmt - 1), 08250 MVT::i32))); 08251 } else if (isPowerOf2_32(MulAmt + 1)) { 08252 // (mul x, 2^N - 1) => (sub (shl x, N), x) 08253 Res = DAG.getNode(ISD::SUB, DL, VT, 08254 DAG.getNode(ISD::SHL, DL, VT, 08255 V, 08256 DAG.getConstant(Log2_32(MulAmt + 1), 08257 MVT::i32)), 08258 V); 08259 } else 08260 return SDValue(); 08261 } else { 08262 uint64_t MulAmtAbs = -MulAmt; 08263 if (isPowerOf2_32(MulAmtAbs + 1)) { 08264 // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) 08265 Res = DAG.getNode(ISD::SUB, DL, VT, 08266 V, 08267 DAG.getNode(ISD::SHL, DL, VT, 08268 V, 08269 DAG.getConstant(Log2_32(MulAmtAbs + 1), 08270 MVT::i32))); 08271 } else if (isPowerOf2_32(MulAmtAbs - 1)) { 08272 // (mul x, -(2^N + 1)) => - (add (shl x, N), x) 08273 Res = DAG.getNode(ISD::ADD, DL, VT, 08274 V, 08275 DAG.getNode(ISD::SHL, DL, VT, 08276 V, 08277 DAG.getConstant(Log2_32(MulAmtAbs-1), 08278 MVT::i32))); 08279 Res = DAG.getNode(ISD::SUB, DL, VT, 08280 DAG.getConstant(0, MVT::i32),Res); 08281 08282 } else 08283 return SDValue(); 08284 } 08285 08286 if (ShiftAmt != 0) 08287 Res = DAG.getNode(ISD::SHL, DL, VT, 08288 Res, DAG.getConstant(ShiftAmt, MVT::i32)); 08289 08290 // Do not add new nodes to DAG combiner worklist. 08291 DCI.CombineTo(N, Res, false); 08292 return SDValue(); 08293 } 08294 08295 static SDValue PerformANDCombine(SDNode *N, 08296 TargetLowering::DAGCombinerInfo &DCI, 08297 const ARMSubtarget *Subtarget) { 08298 08299 // Attempt to use immediate-form VBIC 08300 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 08301 SDLoc dl(N); 08302 EVT VT = N->getValueType(0); 08303 SelectionDAG &DAG = DCI.DAG; 08304 08305 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 08306 return SDValue(); 08307 08308 APInt SplatBits, SplatUndef; 08309 unsigned SplatBitSize; 08310 bool HasAnyUndefs; 08311 if (BVN && 08312 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 08313 if (SplatBitSize <= 64) { 08314 EVT VbicVT; 08315 SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), 08316 SplatUndef.getZExtValue(), SplatBitSize, 08317 DAG, VbicVT, VT.is128BitVector(), 08318 OtherModImm); 08319 if (Val.getNode()) { 08320 SDValue Input = 08321 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); 08322 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); 08323 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); 08324 } 08325 } 08326 } 08327 08328 if (!Subtarget->isThumb1Only()) { 08329 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) 08330 SDValue Result = combineSelectAndUseCommutative(N, true, DCI); 08331 if (Result.getNode()) 08332 return Result; 08333 } 08334 08335 return SDValue(); 08336 } 08337 08338 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR 08339 static SDValue PerformORCombine(SDNode *N, 08340 TargetLowering::DAGCombinerInfo &DCI, 08341 const ARMSubtarget *Subtarget) { 08342 // Attempt to use immediate-form VORR 08343 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 08344 SDLoc dl(N); 08345 EVT VT = N->getValueType(0); 08346 SelectionDAG &DAG = DCI.DAG; 08347 08348 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 08349 return SDValue(); 08350 08351 APInt SplatBits, SplatUndef; 08352 unsigned SplatBitSize; 08353 bool HasAnyUndefs; 08354 if (BVN && Subtarget->hasNEON() && 08355 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 08356 if (SplatBitSize <= 64) { 08357 EVT VorrVT; 08358 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 08359 SplatUndef.getZExtValue(), SplatBitSize, 08360 DAG, VorrVT, VT.is128BitVector(), 08361 OtherModImm); 08362 if (Val.getNode()) { 08363 SDValue Input = 08364 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); 08365 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); 08366 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); 08367 } 08368 } 08369 } 08370 08371 if (!Subtarget->isThumb1Only()) { 08372 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) 08373 SDValue Result = combineSelectAndUseCommutative(N, false, DCI); 08374 if (Result.getNode()) 08375 return Result; 08376 } 08377 08378 // The code below optimizes (or (and X, Y), Z). 08379 // The AND operand needs to have a single user to make these optimizations 08380 // profitable. 08381 SDValue N0 = N->getOperand(0); 08382 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) 08383 return SDValue(); 08384 SDValue N1 = N->getOperand(1); 08385 08386 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. 08387 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && 08388 DAG.getTargetLoweringInfo().isTypeLegal(VT)) { 08389 APInt SplatUndef; 08390 unsigned SplatBitSize; 08391 bool HasAnyUndefs; 08392 08393 APInt SplatBits0, SplatBits1; 08394 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); 08395 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); 08396 // Ensure that the second operand of both ands are constants 08397 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, 08398 HasAnyUndefs) && !HasAnyUndefs) { 08399 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, 08400 HasAnyUndefs) && !HasAnyUndefs) { 08401 // Ensure that the bit width of the constants are the same and that 08402 // the splat arguments are logical inverses as per the pattern we 08403 // are trying to simplify. 08404 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && 08405 SplatBits0 == ~SplatBits1) { 08406 // Canonicalize the vector type to make instruction selection 08407 // simpler. 08408 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 08409 SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, 08410 N0->getOperand(1), 08411 N0->getOperand(0), 08412 N1->getOperand(0)); 08413 return DAG.getNode(ISD::BITCAST, dl, VT, Result); 08414 } 08415 } 08416 } 08417 } 08418 08419 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when 08420 // reasonable. 08421 08422 // BFI is only available on V6T2+ 08423 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) 08424 return SDValue(); 08425 08426 SDLoc DL(N); 08427 // 1) or (and A, mask), val => ARMbfi A, val, mask 08428 // iff (val & mask) == val 08429 // 08430 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 08431 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) 08432 // && mask == ~mask2 08433 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) 08434 // && ~mask == mask2 08435 // (i.e., copy a bitfield value into another bitfield of the same width) 08436 08437 if (VT != MVT::i32) 08438 return SDValue(); 08439 08440 SDValue N00 = N0.getOperand(0); 08441 08442 // The value and the mask need to be constants so we can verify this is 08443 // actually a bitfield set. If the mask is 0xffff, we can do better 08444 // via a movt instruction, so don't use BFI in that case. 08445 SDValue MaskOp = N0.getOperand(1); 08446 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp); 08447 if (!MaskC) 08448 return SDValue(); 08449 unsigned Mask = MaskC->getZExtValue(); 08450 if (Mask == 0xffff) 08451 return SDValue(); 08452 SDValue Res; 08453 // Case (1): or (and A, mask), val => ARMbfi A, val, mask 08454 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 08455 if (N1C) { 08456 unsigned Val = N1C->getZExtValue(); 08457 if ((Val & ~Mask) != Val) 08458 return SDValue(); 08459 08460 if (ARM::isBitFieldInvertedMask(Mask)) { 08461 Val >>= countTrailingZeros(~Mask); 08462 08463 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, 08464 DAG.getConstant(Val, MVT::i32), 08465 DAG.getConstant(Mask, MVT::i32)); 08466 08467 // Do not add new nodes to DAG combiner worklist. 08468 DCI.CombineTo(N, Res, false); 08469 return SDValue(); 08470 } 08471 } else if (N1.getOpcode() == ISD::AND) { 08472 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 08473 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 08474 if (!N11C) 08475 return SDValue(); 08476 unsigned Mask2 = N11C->getZExtValue(); 08477 08478 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern 08479 // as is to match. 08480 if (ARM::isBitFieldInvertedMask(Mask) && 08481 (Mask == ~Mask2)) { 08482 // The pack halfword instruction works better for masks that fit it, 08483 // so use that when it's available. 08484 if (Subtarget->hasT2ExtractPack() && 08485 (Mask == 0xffff || Mask == 0xffff0000)) 08486 return SDValue(); 08487 // 2a 08488 unsigned amt = countTrailingZeros(Mask2); 08489 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), 08490 DAG.getConstant(amt, MVT::i32)); 08491 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, 08492 DAG.getConstant(Mask, MVT::i32)); 08493 // Do not add new nodes to DAG combiner worklist. 08494 DCI.CombineTo(N, Res, false); 08495 return SDValue(); 08496 } else if (ARM::isBitFieldInvertedMask(~Mask) && 08497 (~Mask == Mask2)) { 08498 // The pack halfword instruction works better for masks that fit it, 08499 // so use that when it's available. 08500 if (Subtarget->hasT2ExtractPack() && 08501 (Mask2 == 0xffff || Mask2 == 0xffff0000)) 08502 return SDValue(); 08503 // 2b 08504 unsigned lsb = countTrailingZeros(Mask); 08505 Res = DAG.getNode(ISD::SRL, DL, VT, N00, 08506 DAG.getConstant(lsb, MVT::i32)); 08507 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, 08508 DAG.getConstant(Mask2, MVT::i32)); 08509 // Do not add new nodes to DAG combiner worklist. 08510 DCI.CombineTo(N, Res, false); 08511 return SDValue(); 08512 } 08513 } 08514 08515 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) && 08516 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) && 08517 ARM::isBitFieldInvertedMask(~Mask)) { 08518 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask 08519 // where lsb(mask) == #shamt and masked bits of B are known zero. 08520 SDValue ShAmt = N00.getOperand(1); 08521 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 08522 unsigned LSB = countTrailingZeros(Mask); 08523 if (ShAmtC != LSB) 08524 return SDValue(); 08525 08526 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), 08527 DAG.getConstant(~Mask, MVT::i32)); 08528 08529 // Do not add new nodes to DAG combiner worklist. 08530 DCI.CombineTo(N, Res, false); 08531 } 08532 08533 return SDValue(); 08534 } 08535 08536 static SDValue PerformXORCombine(SDNode *N, 08537 TargetLowering::DAGCombinerInfo &DCI, 08538 const ARMSubtarget *Subtarget) { 08539 EVT VT = N->getValueType(0); 08540 SelectionDAG &DAG = DCI.DAG; 08541 08542 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 08543 return SDValue(); 08544 08545 if (!Subtarget->isThumb1Only()) { 08546 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) 08547 SDValue Result = combineSelectAndUseCommutative(N, false, DCI); 08548 if (Result.getNode()) 08549 return Result; 08550 } 08551 08552 return SDValue(); 08553 } 08554 08555 /// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff 08556 /// the bits being cleared by the AND are not demanded by the BFI. 08557 static SDValue PerformBFICombine(SDNode *N, 08558 TargetLowering::DAGCombinerInfo &DCI) { 08559 SDValue N1 = N->getOperand(1); 08560 if (N1.getOpcode() == ISD::AND) { 08561 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 08562 if (!N11C) 08563 return SDValue(); 08564 unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 08565 unsigned LSB = countTrailingZeros(~InvMask); 08566 unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB; 08567 unsigned Mask = (1 << Width)-1; 08568 unsigned Mask2 = N11C->getZExtValue(); 08569 if ((Mask & (~Mask2)) == 0) 08570 return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0), 08571 N->getOperand(0), N1.getOperand(0), 08572 N->getOperand(2)); 08573 } 08574 return SDValue(); 08575 } 08576 08577 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for 08578 /// ARMISD::VMOVRRD. 08579 static SDValue PerformVMOVRRDCombine(SDNode *N, 08580 TargetLowering::DAGCombinerInfo &DCI, 08581 const ARMSubtarget *Subtarget) { 08582 // vmovrrd(vmovdrr x, y) -> x,y 08583 SDValue InDouble = N->getOperand(0); 08584 if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP()) 08585 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 08586 08587 // vmovrrd(load f64) -> (load i32), (load i32) 08588 SDNode *InNode = InDouble.getNode(); 08589 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && 08590 InNode->getValueType(0) == MVT::f64 && 08591 InNode->getOperand(1).getOpcode() == ISD::FrameIndex && 08592 !cast<LoadSDNode>(InNode)->isVolatile()) { 08593 // TODO: Should this be done for non-FrameIndex operands? 08594 LoadSDNode *LD = cast<LoadSDNode>(InNode); 08595 08596 SelectionDAG &DAG = DCI.DAG; 08597 SDLoc DL(LD); 08598 SDValue BasePtr = LD->getBasePtr(); 08599 SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, 08600 LD->getPointerInfo(), LD->isVolatile(), 08601 LD->isNonTemporal(), LD->isInvariant(), 08602 LD->getAlignment()); 08603 08604 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 08605 DAG.getConstant(4, MVT::i32)); 08606 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, 08607 LD->getPointerInfo(), LD->isVolatile(), 08608 LD->isNonTemporal(), LD->isInvariant(), 08609 std::min(4U, LD->getAlignment() / 2)); 08610 08611 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); 08612 if (DCI.DAG.getTargetLoweringInfo().isBigEndian()) 08613 std::swap (NewLD1, NewLD2); 08614 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); 08615 return Result; 08616 } 08617 08618 return SDValue(); 08619 } 08620 08621 /// PerformVMOVDRRCombine - Target-specific dag combine xforms for 08622 /// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. 08623 static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { 08624 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) 08625 SDValue Op0 = N->getOperand(0); 08626 SDValue Op1 = N->getOperand(1); 08627 if (Op0.getOpcode() == ISD::BITCAST) 08628 Op0 = Op0.getOperand(0); 08629 if (Op1.getOpcode() == ISD::BITCAST) 08630 Op1 = Op1.getOperand(0); 08631 if (Op0.getOpcode() == ARMISD::VMOVRRD && 08632 Op0.getNode() == Op1.getNode() && 08633 Op0.getResNo() == 0 && Op1.getResNo() == 1) 08634 return DAG.getNode(ISD::BITCAST, SDLoc(N), 08635 N->getValueType(0), Op0.getOperand(0)); 08636 return SDValue(); 08637 } 08638 08639 /// PerformSTORECombine - Target-specific dag combine xforms for 08640 /// ISD::STORE. 08641 static SDValue PerformSTORECombine(SDNode *N, 08642 TargetLowering::DAGCombinerInfo &DCI) { 08643 StoreSDNode *St = cast<StoreSDNode>(N); 08644 if (St->isVolatile()) 08645 return SDValue(); 08646 08647 // Optimize trunc store (of multiple scalars) to shuffle and store. First, 08648 // pack all of the elements in one place. Next, store to memory in fewer 08649 // chunks. 08650 SDValue StVal = St->getValue(); 08651 EVT VT = StVal.getValueType(); 08652 if (St->isTruncatingStore() && VT.isVector()) { 08653 SelectionDAG &DAG = DCI.DAG; 08654 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 08655 EVT StVT = St->getMemoryVT(); 08656 unsigned NumElems = VT.getVectorNumElements(); 08657 assert(StVT != VT && "Cannot truncate to the same type"); 08658 unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); 08659 unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); 08660 08661 // From, To sizes and ElemCount must be pow of two 08662 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); 08663 08664 // We are going to use the original vector elt for storing. 08665 // Accumulated smaller vector elements must be a multiple of the store size. 08666 if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); 08667 08668 unsigned SizeRatio = FromEltSz / ToEltSz; 08669 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); 08670 08671 // Create a type on which we perform the shuffle. 08672 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), 08673 NumElems*SizeRatio); 08674 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); 08675 08676 SDLoc DL(St); 08677 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); 08678 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); 08679 for (unsigned i = 0; i < NumElems; ++i) 08680 ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio; 08681 08682 // Can't shuffle using an illegal type. 08683 if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); 08684 08685 SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, 08686 DAG.getUNDEF(WideVec.getValueType()), 08687 ShuffleVec.data()); 08688 // At this point all of the data is stored at the bottom of the 08689 // register. We now need to save it to mem. 08690 08691 // Find the largest store unit 08692 MVT StoreType = MVT::i8; 08693 for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; 08694 tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { 08695 MVT Tp = (MVT::SimpleValueType)tp; 08696 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) 08697 StoreType = Tp; 08698 } 08699 // Didn't find a legal store type. 08700 if (!TLI.isTypeLegal(StoreType)) 08701 return SDValue(); 08702 08703 // Bitcast the original vector into a vector of store-size units 08704 EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), 08705 StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); 08706 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); 08707 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); 08708 SmallVector<SDValue, 8> Chains; 08709 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, 08710 TLI.getPointerTy()); 08711 SDValue BasePtr = St->getBasePtr(); 08712 08713 // Perform one or more big stores into memory. 08714 unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); 08715 for (unsigned I = 0; I < E; I++) { 08716 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 08717 StoreType, ShuffWide, 08718 DAG.getIntPtrConstant(I)); 08719 SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, 08720 St->getPointerInfo(), St->isVolatile(), 08721 St->isNonTemporal(), St->getAlignment()); 08722 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, 08723 Increment); 08724 Chains.push_back(Ch); 08725 } 08726 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 08727 } 08728 08729 if (!ISD::isNormalStore(St)) 08730 return SDValue(); 08731 08732 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and 08733 // ARM stores of arguments in the same cache line. 08734 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && 08735 StVal.getNode()->hasOneUse()) { 08736 SelectionDAG &DAG = DCI.DAG; 08737 bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian(); 08738 SDLoc DL(St); 08739 SDValue BasePtr = St->getBasePtr(); 08740 SDValue NewST1 = DAG.getStore(St->getChain(), DL, 08741 StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ), 08742 BasePtr, St->getPointerInfo(), St->isVolatile(), 08743 St->isNonTemporal(), St->getAlignment()); 08744 08745 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 08746 DAG.getConstant(4, MVT::i32)); 08747 return DAG.getStore(NewST1.getValue(0), DL, 08748 StVal.getNode()->getOperand(isBigEndian ? 0 : 1), 08749 OffsetPtr, St->getPointerInfo(), St->isVolatile(), 08750 St->isNonTemporal(), 08751 std::min(4U, St->getAlignment() / 2)); 08752 } 08753 08754 if (StVal.getValueType() != MVT::i64 || 08755 StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 08756 return SDValue(); 08757 08758 // Bitcast an i64 store extracted from a vector to f64. 08759 // Otherwise, the i64 value will be legalized to a pair of i32 values. 08760 SelectionDAG &DAG = DCI.DAG; 08761 SDLoc dl(StVal); 08762 SDValue IntVec = StVal.getOperand(0); 08763 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 08764 IntVec.getValueType().getVectorNumElements()); 08765 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); 08766 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, 08767 Vec, StVal.getOperand(1)); 08768 dl = SDLoc(N); 08769 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); 08770 // Make the DAGCombiner fold the bitcasts. 08771 DCI.AddToWorklist(Vec.getNode()); 08772 DCI.AddToWorklist(ExtElt.getNode()); 08773 DCI.AddToWorklist(V.getNode()); 08774 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), 08775 St->getPointerInfo(), St->isVolatile(), 08776 St->isNonTemporal(), St->getAlignment(), 08777 St->getAAInfo()); 08778 } 08779 08780 /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node 08781 /// are normal, non-volatile loads. If so, it is profitable to bitcast an 08782 /// i64 vector to have f64 elements, since the value can then be loaded 08783 /// directly into a VFP register. 08784 static bool hasNormalLoadOperand(SDNode *N) { 08785 unsigned NumElts = N->getValueType(0).getVectorNumElements(); 08786 for (unsigned i = 0; i < NumElts; ++i) { 08787 SDNode *Elt = N->getOperand(i).getNode(); 08788 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile()) 08789 return true; 08790 } 08791 return false; 08792 } 08793 08794 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for 08795 /// ISD::BUILD_VECTOR. 08796 static SDValue PerformBUILD_VECTORCombine(SDNode *N, 08797 TargetLowering::DAGCombinerInfo &DCI, 08798 const ARMSubtarget *Subtarget) { 08799 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): 08800 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value 08801 // into a pair of GPRs, which is fine when the value is used as a scalar, 08802 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. 08803 SelectionDAG &DAG = DCI.DAG; 08804 if (N->getNumOperands() == 2) { 08805 SDValue RV = PerformVMOVDRRCombine(N, DAG); 08806 if (RV.getNode()) 08807 return RV; 08808 } 08809 08810 // Load i64 elements as f64 values so that type legalization does not split 08811 // them up into i32 values. 08812 EVT VT = N->getValueType(0); 08813 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) 08814 return SDValue(); 08815 SDLoc dl(N); 08816 SmallVector<SDValue, 8> Ops; 08817 unsigned NumElts = VT.getVectorNumElements(); 08818 for (unsigned i = 0; i < NumElts; ++i) { 08819 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i)); 08820 Ops.push_back(V); 08821 // Make the DAGCombiner fold the bitcast. 08822 DCI.AddToWorklist(V.getNode()); 08823 } 08824 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); 08825 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops); 08826 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 08827 } 08828 08829 /// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR. 08830 static SDValue 08831 PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 08832 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR. 08833 // At that time, we may have inserted bitcasts from integer to float. 08834 // If these bitcasts have survived DAGCombine, change the lowering of this 08835 // BUILD_VECTOR in something more vector friendly, i.e., that does not 08836 // force to use floating point types. 08837 08838 // Make sure we can change the type of the vector. 08839 // This is possible iff: 08840 // 1. The vector is only used in a bitcast to a integer type. I.e., 08841 // 1.1. Vector is used only once. 08842 // 1.2. Use is a bit convert to an integer type. 08843 // 2. The size of its operands are 32-bits (64-bits are not legal). 08844 EVT VT = N->getValueType(0); 08845 EVT EltVT = VT.getVectorElementType(); 08846 08847 // Check 1.1. and 2. 08848 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse()) 08849 return SDValue(); 08850 08851 // By construction, the input type must be float. 08852 assert(EltVT == MVT::f32 && "Unexpected type!"); 08853 08854 // Check 1.2. 08855 SDNode *Use = *N->use_begin(); 08856 if (Use->getOpcode() != ISD::BITCAST || 08857 Use->getValueType(0).isFloatingPoint()) 08858 return SDValue(); 08859 08860 // Check profitability. 08861 // Model is, if more than half of the relevant operands are bitcast from 08862 // i32, turn the build_vector into a sequence of insert_vector_elt. 08863 // Relevant operands are everything that is not statically 08864 // (i.e., at compile time) bitcasted. 08865 unsigned NumOfBitCastedElts = 0; 08866 unsigned NumElts = VT.getVectorNumElements(); 08867 unsigned NumOfRelevantElts = NumElts; 08868 for (unsigned Idx = 0; Idx < NumElts; ++Idx) { 08869 SDValue Elt = N->getOperand(Idx); 08870 if (Elt->getOpcode() == ISD::BITCAST) { 08871 // Assume only bit cast to i32 will go away. 08872 if (Elt->getOperand(0).getValueType() == MVT::i32) 08873 ++NumOfBitCastedElts; 08874 } else if (Elt.getOpcode() == ISD::UNDEF || isa<ConstantSDNode>(Elt)) 08875 // Constants are statically casted, thus do not count them as 08876 // relevant operands. 08877 --NumOfRelevantElts; 08878 } 08879 08880 // Check if more than half of the elements require a non-free bitcast. 08881 if (NumOfBitCastedElts <= NumOfRelevantElts / 2) 08882 return SDValue(); 08883 08884 SelectionDAG &DAG = DCI.DAG; 08885 // Create the new vector type. 08886 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 08887 // Check if the type is legal. 08888 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 08889 if (!TLI.isTypeLegal(VecVT)) 08890 return SDValue(); 08891 08892 // Combine: 08893 // ARMISD::BUILD_VECTOR E1, E2, ..., EN. 08894 // => BITCAST INSERT_VECTOR_ELT 08895 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1), 08896 // (BITCAST EN), N. 08897 SDValue Vec = DAG.getUNDEF(VecVT); 08898 SDLoc dl(N); 08899 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) { 08900 SDValue V = N->getOperand(Idx); 08901 if (V.getOpcode() == ISD::UNDEF) 08902 continue; 08903 if (V.getOpcode() == ISD::BITCAST && 08904 V->getOperand(0).getValueType() == MVT::i32) 08905 // Fold obvious case. 08906 V = V.getOperand(0); 08907 else { 08908 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); 08909 // Make the DAGCombiner fold the bitcasts. 08910 DCI.AddToWorklist(V.getNode()); 08911 } 08912 SDValue LaneIdx = DAG.getConstant(Idx, MVT::i32); 08913 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx); 08914 } 08915 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec); 08916 // Make the DAGCombiner fold the bitcasts. 08917 DCI.AddToWorklist(Vec.getNode()); 08918 return Vec; 08919 } 08920 08921 /// PerformInsertEltCombine - Target-specific dag combine xforms for 08922 /// ISD::INSERT_VECTOR_ELT. 08923 static SDValue PerformInsertEltCombine(SDNode *N, 08924 TargetLowering::DAGCombinerInfo &DCI) { 08925 // Bitcast an i64 load inserted into a vector to f64. 08926 // Otherwise, the i64 value will be legalized to a pair of i32 values. 08927 EVT VT = N->getValueType(0); 08928 SDNode *Elt = N->getOperand(1).getNode(); 08929 if (VT.getVectorElementType() != MVT::i64 || 08930 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile()) 08931 return SDValue(); 08932 08933 SelectionDAG &DAG = DCI.DAG; 08934 SDLoc dl(N); 08935 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 08936 VT.getVectorNumElements()); 08937 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); 08938 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1)); 08939 // Make the DAGCombiner fold the bitcasts. 08940 DCI.AddToWorklist(Vec.getNode()); 08941 DCI.AddToWorklist(V.getNode()); 08942 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT, 08943 Vec, V, N->getOperand(2)); 08944 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt); 08945 } 08946 08947 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for 08948 /// ISD::VECTOR_SHUFFLE. 08949 static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { 08950 // The LLVM shufflevector instruction does not require the shuffle mask 08951 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does 08952 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the 08953 // operands do not match the mask length, they are extended by concatenating 08954 // them with undef vectors. That is probably the right thing for other 08955 // targets, but for NEON it is better to concatenate two double-register 08956 // size vector operands into a single quad-register size vector. Do that 08957 // transformation here: 08958 // shuffle(concat(v1, undef), concat(v2, undef)) -> 08959 // shuffle(concat(v1, v2), undef) 08960 SDValue Op0 = N->getOperand(0); 08961 SDValue Op1 = N->getOperand(1); 08962 if (Op0.getOpcode() != ISD::CONCAT_VECTORS || 08963 Op1.getOpcode() != ISD::CONCAT_VECTORS || 08964 Op0.getNumOperands() != 2 || 08965 Op1.getNumOperands() != 2) 08966 return SDValue(); 08967 SDValue Concat0Op1 = Op0.getOperand(1); 08968 SDValue Concat1Op1 = Op1.getOperand(1); 08969 if (Concat0Op1.getOpcode() != ISD::UNDEF || 08970 Concat1Op1.getOpcode() != ISD::UNDEF) 08971 return SDValue(); 08972 // Skip the transformation if any of the types are illegal. 08973 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 08974 EVT VT = N->getValueType(0); 08975 if (!TLI.isTypeLegal(VT) || 08976 !TLI.isTypeLegal(Concat0Op1.getValueType()) || 08977 !TLI.isTypeLegal(Concat1Op1.getValueType())) 08978 return SDValue(); 08979 08980 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, 08981 Op0.getOperand(0), Op1.getOperand(0)); 08982 // Translate the shuffle mask. 08983 SmallVector<int, 16> NewMask; 08984 unsigned NumElts = VT.getVectorNumElements(); 08985 unsigned HalfElts = NumElts/2; 08986 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 08987 for (unsigned n = 0; n < NumElts; ++n) { 08988 int MaskElt = SVN->getMaskElt(n); 08989 int NewElt = -1; 08990 if (MaskElt < (int)HalfElts) 08991 NewElt = MaskElt; 08992 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) 08993 NewElt = HalfElts + MaskElt - NumElts; 08994 NewMask.push_back(NewElt); 08995 } 08996 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat, 08997 DAG.getUNDEF(VT), NewMask.data()); 08998 } 08999 09000 /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and 09001 /// NEON load/store intrinsics to merge base address updates. 09002 static SDValue CombineBaseUpdate(SDNode *N, 09003 TargetLowering::DAGCombinerInfo &DCI) { 09004 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 09005 return SDValue(); 09006 09007 SelectionDAG &DAG = DCI.DAG; 09008 bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || 09009 N->getOpcode() == ISD::INTRINSIC_W_CHAIN); 09010 unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); 09011 SDValue Addr = N->getOperand(AddrOpIdx); 09012 09013 // Search for a use of the address operand that is an increment. 09014 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), 09015 UE = Addr.getNode()->use_end(); UI != UE; ++UI) { 09016 SDNode *User = *UI; 09017 if (User->getOpcode() != ISD::ADD || 09018 UI.getUse().getResNo() != Addr.getResNo()) 09019 continue; 09020 09021 // Check that the add is independent of the load/store. Otherwise, folding 09022 // it would create a cycle. 09023 if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) 09024 continue; 09025 09026 // Find the new opcode for the updating load/store. 09027 bool isLoad = true; 09028 bool isLaneOp = false; 09029 unsigned NewOpc = 0; 09030 unsigned NumVecs = 0; 09031 if (isIntrinsic) { 09032 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 09033 switch (IntNo) { 09034 default: llvm_unreachable("unexpected intrinsic for Neon base update"); 09035 case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; 09036 NumVecs = 1; break; 09037 case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; 09038 NumVecs = 2; break; 09039 case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD; 09040 NumVecs = 3; break; 09041 case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD; 09042 NumVecs = 4; break; 09043 case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD; 09044 NumVecs = 2; isLaneOp = true; break; 09045 case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD; 09046 NumVecs = 3; isLaneOp = true; break; 09047 case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; 09048 NumVecs = 4; isLaneOp = true; break; 09049 case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; 09050 NumVecs = 1; isLoad = false; break; 09051 case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; 09052 NumVecs = 2; isLoad = false; break; 09053 case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; 09054 NumVecs = 3; isLoad = false; break; 09055 case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; 09056 NumVecs = 4; isLoad = false; break; 09057 case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; 09058 NumVecs = 2; isLoad = false; isLaneOp = true; break; 09059 case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; 09060 NumVecs = 3; isLoad = false; isLaneOp = true; break; 09061 case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; 09062 NumVecs = 4; isLoad = false; isLaneOp = true; break; 09063 } 09064 } else { 09065 isLaneOp = true; 09066 switch (N->getOpcode()) { 09067 default: llvm_unreachable("unexpected opcode for Neon base update"); 09068 case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; 09069 case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; 09070 case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; 09071 } 09072 } 09073 09074 // Find the size of memory referenced by the load/store. 09075 EVT VecTy; 09076 if (isLoad) 09077 VecTy = N->getValueType(0); 09078 else 09079 VecTy = N->getOperand(AddrOpIdx+1).getValueType(); 09080 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; 09081 if (isLaneOp) 09082 NumBytes /= VecTy.getVectorNumElements(); 09083 09084 // If the increment is a constant, it must match the memory ref size. 09085 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); 09086 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { 09087 uint64_t IncVal = CInc->getZExtValue(); 09088 if (IncVal != NumBytes) 09089 continue; 09090 } else if (NumBytes >= 3 * 16) { 09091 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two 09092 // separate instructions that make it harder to use a non-constant update. 09093 continue; 09094 } 09095 09096 // Create the new updating load/store node. 09097 EVT Tys[6]; 09098 unsigned NumResultVecs = (isLoad ? NumVecs : 0); 09099 unsigned n; 09100 for (n = 0; n < NumResultVecs; ++n) 09101 Tys[n] = VecTy; 09102 Tys[n++] = MVT::i32; 09103 Tys[n] = MVT::Other; 09104 SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2)); 09105 SmallVector<SDValue, 8> Ops; 09106 Ops.push_back(N->getOperand(0)); // incoming chain 09107 Ops.push_back(N->getOperand(AddrOpIdx)); 09108 Ops.push_back(Inc); 09109 for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { 09110 Ops.push_back(N->getOperand(i)); 09111 } 09112 MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); 09113 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, 09114 Ops, MemInt->getMemoryVT(), 09115 MemInt->getMemOperand()); 09116 09117 // Update the uses. 09118 std::vector<SDValue> NewResults; 09119 for (unsigned i = 0; i < NumResultVecs; ++i) { 09120 NewResults.push_back(SDValue(UpdN.getNode(), i)); 09121 } 09122 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain 09123 DCI.CombineTo(N, NewResults); 09124 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); 09125 09126 break; 09127 } 09128 return SDValue(); 09129 } 09130 09131 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a 09132 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic 09133 /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and 09134 /// return true. 09135 static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 09136 SelectionDAG &DAG = DCI.DAG; 09137 EVT VT = N->getValueType(0); 09138 // vldN-dup instructions only support 64-bit vectors for N > 1. 09139 if (!VT.is64BitVector()) 09140 return false; 09141 09142 // Check if the VDUPLANE operand is a vldN-dup intrinsic. 09143 SDNode *VLD = N->getOperand(0).getNode(); 09144 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) 09145 return false; 09146 unsigned NumVecs = 0; 09147 unsigned NewOpc = 0; 09148 unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue(); 09149 if (IntNo == Intrinsic::arm_neon_vld2lane) { 09150 NumVecs = 2; 09151 NewOpc = ARMISD::VLD2DUP; 09152 } else if (IntNo == Intrinsic::arm_neon_vld3lane) { 09153 NumVecs = 3; 09154 NewOpc = ARMISD::VLD3DUP; 09155 } else if (IntNo == Intrinsic::arm_neon_vld4lane) { 09156 NumVecs = 4; 09157 NewOpc = ARMISD::VLD4DUP; 09158 } else { 09159 return false; 09160 } 09161 09162 // First check that all the vldN-lane uses are VDUPLANEs and that the lane 09163 // numbers match the load. 09164 unsigned VLDLaneNo = 09165 cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue(); 09166 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 09167 UI != UE; ++UI) { 09168 // Ignore uses of the chain result. 09169 if (UI.getUse().getResNo() == NumVecs) 09170 continue; 09171 SDNode *User = *UI; 09172 if (User->getOpcode() != ARMISD::VDUPLANE || 09173 VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue()) 09174 return false; 09175 } 09176 09177 // Create the vldN-dup node. 09178 EVT Tys[5]; 09179 unsigned n; 09180 for (n = 0; n < NumVecs; ++n) 09181 Tys[n] = VT; 09182 Tys[n] = MVT::Other; 09183 SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1)); 09184 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; 09185 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); 09186 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, 09187 Ops, VLDMemInt->getMemoryVT(), 09188 VLDMemInt->getMemOperand()); 09189 09190 // Update the uses. 09191 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 09192 UI != UE; ++UI) { 09193 unsigned ResNo = UI.getUse().getResNo(); 09194 // Ignore uses of the chain result. 09195 if (ResNo == NumVecs) 09196 continue; 09197 SDNode *User = *UI; 09198 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); 09199 } 09200 09201 // Now the vldN-lane intrinsic is dead except for its chain result. 09202 // Update uses of the chain. 09203 std::vector<SDValue> VLDDupResults; 09204 for (unsigned n = 0; n < NumVecs; ++n) 09205 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); 09206 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); 09207 DCI.CombineTo(VLD, VLDDupResults); 09208 09209 return true; 09210 } 09211 09212 /// PerformVDUPLANECombine - Target-specific dag combine xforms for 09213 /// ARMISD::VDUPLANE. 09214 static SDValue PerformVDUPLANECombine(SDNode *N, 09215 TargetLowering::DAGCombinerInfo &DCI) { 09216 SDValue Op = N->getOperand(0); 09217 09218 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses 09219 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. 09220 if (CombineVLDDUP(N, DCI)) 09221 return SDValue(N, 0); 09222 09223 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is 09224 // redundant. Ignore bit_converts for now; element sizes are checked below. 09225 while (Op.getOpcode() == ISD::BITCAST) 09226 Op = Op.getOperand(0); 09227 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) 09228 return SDValue(); 09229 09230 // Make sure the VMOV element size is not bigger than the VDUPLANE elements. 09231 unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); 09232 // The canonical VMOV for a zero vector uses a 32-bit element size. 09233 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 09234 unsigned EltBits; 09235 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) 09236 EltSize = 8; 09237 EVT VT = N->getValueType(0); 09238 if (EltSize > VT.getVectorElementType().getSizeInBits()) 09239 return SDValue(); 09240 09241 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); 09242 } 09243 09244 // isConstVecPow2 - Return true if each vector element is a power of 2, all 09245 // elements are the same constant, C, and Log2(C) ranges from 1 to 32. 09246 static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) 09247 { 09248 integerPart cN; 09249 integerPart c0 = 0; 09250 for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements(); 09251 I != E; I++) { 09252 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I)); 09253 if (!C) 09254 return false; 09255 09256 bool isExact; 09257 APFloat APF = C->getValueAPF(); 09258 if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact) 09259 != APFloat::opOK || !isExact) 09260 return false; 09261 09262 c0 = (I == 0) ? cN : c0; 09263 if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32) 09264 return false; 09265 } 09266 C = c0; 09267 return true; 09268 } 09269 09270 /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) 09271 /// can replace combinations of VMUL and VCVT (floating-point to integer) 09272 /// when the VMUL has a constant operand that is a power of 2. 09273 /// 09274 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 09275 /// vmul.f32 d16, d17, d16 09276 /// vcvt.s32.f32 d16, d16 09277 /// becomes: 09278 /// vcvt.s32.f32 d16, d16, #3 09279 static SDValue PerformVCVTCombine(SDNode *N, 09280 TargetLowering::DAGCombinerInfo &DCI, 09281 const ARMSubtarget *Subtarget) { 09282 SelectionDAG &DAG = DCI.DAG; 09283 SDValue Op = N->getOperand(0); 09284 09285 if (!Subtarget->hasNEON() || !Op.getValueType().isVector() || 09286 Op.getOpcode() != ISD::FMUL) 09287 return SDValue(); 09288 09289 uint64_t C; 09290 SDValue N0 = Op->getOperand(0); 09291 SDValue ConstVec = Op->getOperand(1); 09292 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; 09293 09294 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 09295 !isConstVecPow2(ConstVec, isSigned, C)) 09296 return SDValue(); 09297 09298 MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); 09299 MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); 09300 if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { 09301 // These instructions only exist converting from f32 to i32. We can handle 09302 // smaller integers by generating an extra truncate, but larger ones would 09303 // be lossy. 09304 return SDValue(); 09305 } 09306 09307 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : 09308 Intrinsic::arm_neon_vcvtfp2fxu; 09309 unsigned NumLanes = Op.getValueType().getVectorNumElements(); 09310 SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), 09311 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, 09312 DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, 09313 DAG.getConstant(Log2_64(C), MVT::i32)); 09314 09315 if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) 09316 FixConv = DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), FixConv); 09317 09318 return FixConv; 09319 } 09320 09321 /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) 09322 /// can replace combinations of VCVT (integer to floating-point) and VDIV 09323 /// when the VDIV has a constant operand that is a power of 2. 09324 /// 09325 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 09326 /// vcvt.f32.s32 d16, d16 09327 /// vdiv.f32 d16, d17, d16 09328 /// becomes: 09329 /// vcvt.f32.s32 d16, d16, #3 09330 static SDValue PerformVDIVCombine(SDNode *N, 09331 TargetLowering::DAGCombinerInfo &DCI, 09332 const ARMSubtarget *Subtarget) { 09333 SelectionDAG &DAG = DCI.DAG; 09334 SDValue Op = N->getOperand(0); 09335 unsigned OpOpcode = Op.getNode()->getOpcode(); 09336 09337 if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() || 09338 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) 09339 return SDValue(); 09340 09341 uint64_t C; 09342 SDValue ConstVec = N->getOperand(1); 09343 bool isSigned = OpOpcode == ISD::SINT_TO_FP; 09344 09345 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 09346 !isConstVecPow2(ConstVec, isSigned, C)) 09347 return SDValue(); 09348 09349 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); 09350 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); 09351 if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { 09352 // These instructions only exist converting from i32 to f32. We can handle 09353 // smaller integers by generating an extra extend, but larger ones would 09354 // be lossy. 09355 return SDValue(); 09356 } 09357 09358 SDValue ConvInput = Op.getOperand(0); 09359 unsigned NumLanes = Op.getValueType().getVectorNumElements(); 09360 if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) 09361 ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, 09362 SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, 09363 ConvInput); 09364 09365 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : 09366 Intrinsic::arm_neon_vcvtfxu2fp; 09367 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), 09368 Op.getValueType(), 09369 DAG.getConstant(IntrinsicOpcode, MVT::i32), 09370 ConvInput, DAG.getConstant(Log2_64(C), MVT::i32)); 09371 } 09372 09373 /// Getvshiftimm - Check if this is a valid build_vector for the immediate 09374 /// operand of a vector shift operation, where all the elements of the 09375 /// build_vector must have the same constant integer value. 09376 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 09377 // Ignore bit_converts. 09378 while (Op.getOpcode() == ISD::BITCAST) 09379 Op = Op.getOperand(0); 09380 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 09381 APInt SplatBits, SplatUndef; 09382 unsigned SplatBitSize; 09383 bool HasAnyUndefs; 09384 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 09385 HasAnyUndefs, ElementBits) || 09386 SplatBitSize > ElementBits) 09387 return false; 09388 Cnt = SplatBits.getSExtValue(); 09389 return true; 09390 } 09391 09392 /// isVShiftLImm - Check if this is a valid build_vector for the immediate 09393 /// operand of a vector shift left operation. That value must be in the range: 09394 /// 0 <= Value < ElementBits for a left shift; or 09395 /// 0 <= Value <= ElementBits for a long left shift. 09396 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 09397 assert(VT.isVector() && "vector shift count is not a vector type"); 09398 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 09399 if (! getVShiftImm(Op, ElementBits, Cnt)) 09400 return false; 09401 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 09402 } 09403 09404 /// isVShiftRImm - Check if this is a valid build_vector for the immediate 09405 /// operand of a vector shift right operation. For a shift opcode, the value 09406 /// is positive, but for an intrinsic the value count must be negative. The 09407 /// absolute value must be in the range: 09408 /// 1 <= |Value| <= ElementBits for a right shift; or 09409 /// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 09410 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 09411 int64_t &Cnt) { 09412 assert(VT.isVector() && "vector shift count is not a vector type"); 09413 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 09414 if (! getVShiftImm(Op, ElementBits, Cnt)) 09415 return false; 09416 if (isIntrinsic) 09417 Cnt = -Cnt; 09418 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 09419 } 09420 09421 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 09422 static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 09423 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 09424 switch (IntNo) { 09425 default: 09426 // Don't do anything for most intrinsics. 09427 break; 09428 09429 // Vector shifts: check for immediate versions and lower them. 09430 // Note: This is done during DAG combining instead of DAG legalizing because 09431 // the build_vectors for 64-bit vector element shift counts are generally 09432 // not legal, and it is hard to see their values after they get legalized to 09433 // loads from a constant pool. 09434 case Intrinsic::arm_neon_vshifts: 09435 case Intrinsic::arm_neon_vshiftu: 09436 case Intrinsic::arm_neon_vrshifts: 09437 case Intrinsic::arm_neon_vrshiftu: 09438 case Intrinsic::arm_neon_vrshiftn: 09439 case Intrinsic::arm_neon_vqshifts: 09440 case Intrinsic::arm_neon_vqshiftu: 09441 case Intrinsic::arm_neon_vqshiftsu: 09442 case Intrinsic::arm_neon_vqshiftns: 09443 case Intrinsic::arm_neon_vqshiftnu: 09444 case Intrinsic::arm_neon_vqshiftnsu: 09445 case Intrinsic::arm_neon_vqrshiftns: 09446 case Intrinsic::arm_neon_vqrshiftnu: 09447 case Intrinsic::arm_neon_vqrshiftnsu: { 09448 EVT VT = N->getOperand(1).getValueType(); 09449 int64_t Cnt; 09450 unsigned VShiftOpc = 0; 09451 09452 switch (IntNo) { 09453 case Intrinsic::arm_neon_vshifts: 09454 case Intrinsic::arm_neon_vshiftu: 09455 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 09456 VShiftOpc = ARMISD::VSHL; 09457 break; 09458 } 09459 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 09460 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 09461 ARMISD::VSHRs : ARMISD::VSHRu); 09462 break; 09463 } 09464 return SDValue(); 09465 09466 case Intrinsic::arm_neon_vrshifts: 09467 case Intrinsic::arm_neon_vrshiftu: 09468 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 09469 break; 09470 return SDValue(); 09471 09472 case Intrinsic::arm_neon_vqshifts: 09473 case Intrinsic::arm_neon_vqshiftu: 09474 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 09475 break; 09476 return SDValue(); 09477 09478 case Intrinsic::arm_neon_vqshiftsu: 09479 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 09480 break; 09481 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 09482 09483 case Intrinsic::arm_neon_vrshiftn: 09484 case Intrinsic::arm_neon_vqshiftns: 09485 case Intrinsic::arm_neon_vqshiftnu: 09486 case Intrinsic::arm_neon_vqshiftnsu: 09487 case Intrinsic::arm_neon_vqrshiftns: 09488 case Intrinsic::arm_neon_vqrshiftnu: 09489 case Intrinsic::arm_neon_vqrshiftnsu: 09490 // Narrowing shifts require an immediate right shift. 09491 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 09492 break; 09493 llvm_unreachable("invalid shift count for narrowing vector shift " 09494 "intrinsic"); 09495 09496 default: 09497 llvm_unreachable("unhandled vector shift"); 09498 } 09499 09500 switch (IntNo) { 09501 case Intrinsic::arm_neon_vshifts: 09502 case Intrinsic::arm_neon_vshiftu: 09503 // Opcode already set above. 09504 break; 09505 case Intrinsic::arm_neon_vrshifts: 09506 VShiftOpc = ARMISD::VRSHRs; break; 09507 case Intrinsic::arm_neon_vrshiftu: 09508 VShiftOpc = ARMISD::VRSHRu; break; 09509 case Intrinsic::arm_neon_vrshiftn: 09510 VShiftOpc = ARMISD::VRSHRN; break; 09511 case Intrinsic::arm_neon_vqshifts: 09512 VShiftOpc = ARMISD::VQSHLs; break; 09513 case Intrinsic::arm_neon_vqshiftu: 09514 VShiftOpc = ARMISD::VQSHLu; break; 09515 case Intrinsic::arm_neon_vqshiftsu: 09516 VShiftOpc = ARMISD::VQSHLsu; break; 09517 case Intrinsic::arm_neon_vqshiftns: 09518 VShiftOpc = ARMISD::VQSHRNs; break; 09519 case Intrinsic::arm_neon_vqshiftnu: 09520 VShiftOpc = ARMISD::VQSHRNu; break; 09521 case Intrinsic::arm_neon_vqshiftnsu: 09522 VShiftOpc = ARMISD::VQSHRNsu; break; 09523 case Intrinsic::arm_neon_vqrshiftns: 09524 VShiftOpc = ARMISD::VQRSHRNs; break; 09525 case Intrinsic::arm_neon_vqrshiftnu: 09526 VShiftOpc = ARMISD::VQRSHRNu; break; 09527 case Intrinsic::arm_neon_vqrshiftnsu: 09528 VShiftOpc = ARMISD::VQRSHRNsu; break; 09529 } 09530 09531 return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), 09532 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 09533 } 09534 09535 case Intrinsic::arm_neon_vshiftins: { 09536 EVT VT = N->getOperand(1).getValueType(); 09537 int64_t Cnt; 09538 unsigned VShiftOpc = 0; 09539 09540 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 09541 VShiftOpc = ARMISD::VSLI; 09542 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 09543 VShiftOpc = ARMISD::VSRI; 09544 else { 09545 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 09546 } 09547 09548 return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), 09549 N->getOperand(1), N->getOperand(2), 09550 DAG.getConstant(Cnt, MVT::i32)); 09551 } 09552 09553 case Intrinsic::arm_neon_vqrshifts: 09554 case Intrinsic::arm_neon_vqrshiftu: 09555 // No immediate versions of these to check for. 09556 break; 09557 } 09558 09559 return SDValue(); 09560 } 09561 09562 /// PerformShiftCombine - Checks for immediate versions of vector shifts and 09563 /// lowers them. As with the vector shift intrinsics, this is done during DAG 09564 /// combining instead of DAG legalizing because the build_vectors for 64-bit 09565 /// vector element shift counts are generally not legal, and it is hard to see 09566 /// their values after they get legalized to loads from a constant pool. 09567 static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 09568 const ARMSubtarget *ST) { 09569 EVT VT = N->getValueType(0); 09570 if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { 09571 // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high 09572 // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. 09573 SDValue N1 = N->getOperand(1); 09574 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 09575 SDValue N0 = N->getOperand(0); 09576 if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && 09577 DAG.MaskedValueIsZero(N0.getOperand(0), 09578 APInt::getHighBitsSet(32, 16))) 09579 return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1); 09580 } 09581 } 09582 09583 // Nothing to be done for scalar shifts. 09584 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 09585 if (!VT.isVector() || !TLI.isTypeLegal(VT)) 09586 return SDValue(); 09587 09588 assert(ST->hasNEON() && "unexpected vector shift"); 09589 int64_t Cnt; 09590 09591 switch (N->getOpcode()) { 09592 default: llvm_unreachable("unexpected shift opcode"); 09593 09594 case ISD::SHL: 09595 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 09596 return DAG.getNode(ARMISD::VSHL, SDLoc(N), VT, N->getOperand(0), 09597 DAG.getConstant(Cnt, MVT::i32)); 09598 break; 09599 09600 case ISD::SRA: 09601 case ISD::SRL: 09602 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 09603 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 09604 ARMISD::VSHRs : ARMISD::VSHRu); 09605 return DAG.getNode(VShiftOpc, SDLoc(N), VT, N->getOperand(0), 09606 DAG.getConstant(Cnt, MVT::i32)); 09607 } 09608 } 09609 return SDValue(); 09610 } 09611 09612 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 09613 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 09614 static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 09615 const ARMSubtarget *ST) { 09616 SDValue N0 = N->getOperand(0); 09617 09618 // Check for sign- and zero-extensions of vector extract operations of 8- 09619 // and 16-bit vector elements. NEON supports these directly. They are 09620 // handled during DAG combining because type legalization will promote them 09621 // to 32-bit types and it is messy to recognize the operations after that. 09622 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 09623 SDValue Vec = N0.getOperand(0); 09624 SDValue Lane = N0.getOperand(1); 09625 EVT VT = N->getValueType(0); 09626 EVT EltVT = N0.getValueType(); 09627 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 09628 09629 if (VT == MVT::i32 && 09630 (EltVT == MVT::i8 || EltVT == MVT::i16) && 09631 TLI.isTypeLegal(Vec.getValueType()) && 09632 isa<ConstantSDNode>(Lane)) { 09633 09634 unsigned Opc = 0; 09635 switch (N->getOpcode()) { 09636 default: llvm_unreachable("unexpected opcode"); 09637 case ISD::SIGN_EXTEND: 09638 Opc = ARMISD::VGETLANEs; 09639 break; 09640 case ISD::ZERO_EXTEND: 09641 case ISD::ANY_EXTEND: 09642 Opc = ARMISD::VGETLANEu; 09643 break; 09644 } 09645 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane); 09646 } 09647 } 09648 09649 return SDValue(); 09650 } 09651 09652 /// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 09653 /// to match f32 max/min patterns to use NEON vmax/vmin instructions. 09654 static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 09655 const ARMSubtarget *ST) { 09656 // If the target supports NEON, try to use vmax/vmin instructions for f32 09657 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, 09658 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 09659 // a NaN; only do the transformation when it matches that behavior. 09660 09661 // For now only do this when using NEON for FP operations; if using VFP, it 09662 // is not obvious that the benefit outweighs the cost of switching to the 09663 // NEON pipeline. 09664 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 09665 N->getValueType(0) != MVT::f32) 09666 return SDValue(); 09667 09668 SDValue CondLHS = N->getOperand(0); 09669 SDValue CondRHS = N->getOperand(1); 09670 SDValue LHS = N->getOperand(2); 09671 SDValue RHS = N->getOperand(3); 09672 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 09673 09674 unsigned Opcode = 0; 09675 bool IsReversed; 09676 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 09677 IsReversed = false; // x CC y ? x : y 09678 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 09679 IsReversed = true ; // x CC y ? y : x 09680 } else { 09681 return SDValue(); 09682 } 09683 09684 bool IsUnordered; 09685 switch (CC) { 09686 default: break; 09687 case ISD::SETOLT: 09688 case ISD::SETOLE: 09689 case ISD::SETLT: 09690 case ISD::SETLE: 09691 case ISD::SETULT: 09692 case ISD::SETULE: 09693 // If LHS is NaN, an ordered comparison will be false and the result will 09694 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 09695 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 09696 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 09697 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 09698 break; 09699 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 09700 // will return -0, so vmin can only be used for unsafe math or if one of 09701 // the operands is known to be nonzero. 09702 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 09703 !DAG.getTarget().Options.UnsafeFPMath && 09704 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 09705 break; 09706 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 09707 break; 09708 09709 case ISD::SETOGT: 09710 case ISD::SETOGE: 09711 case ISD::SETGT: 09712 case ISD::SETGE: 09713 case ISD::SETUGT: 09714 case ISD::SETUGE: 09715 // If LHS is NaN, an ordered comparison will be false and the result will 09716 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 09717 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 09718 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 09719 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 09720 break; 09721 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 09722 // will return +0, so vmax can only be used for unsafe math or if one of 09723 // the operands is known to be nonzero. 09724 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 09725 !DAG.getTarget().Options.UnsafeFPMath && 09726 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 09727 break; 09728 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 09729 break; 09730 } 09731 09732 if (!Opcode) 09733 return SDValue(); 09734 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS); 09735 } 09736 09737 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. 09738 SDValue 09739 ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { 09740 SDValue Cmp = N->getOperand(4); 09741 if (Cmp.getOpcode() != ARMISD::CMPZ) 09742 // Only looking at EQ and NE cases. 09743 return SDValue(); 09744 09745 EVT VT = N->getValueType(0); 09746 SDLoc dl(N); 09747 SDValue LHS = Cmp.getOperand(0); 09748 SDValue RHS = Cmp.getOperand(1); 09749 SDValue FalseVal = N->getOperand(0); 09750 SDValue TrueVal = N->getOperand(1); 09751 SDValue ARMcc = N->getOperand(2); 09752 ARMCC::CondCodes CC = 09753 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 09754 09755 // Simplify 09756 // mov r1, r0 09757 // cmp r1, x 09758 // mov r0, y 09759 // moveq r0, x 09760 // to 09761 // cmp r0, x 09762 // movne r0, y 09763 // 09764 // mov r1, r0 09765 // cmp r1, x 09766 // mov r0, x 09767 // movne r0, y 09768 // to 09769 // cmp r0, x 09770 // movne r0, y 09771 /// FIXME: Turn this into a target neutral optimization? 09772 SDValue Res; 09773 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { 09774 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, 09775 N->getOperand(3), Cmp); 09776 } else if (CC == ARMCC::EQ && TrueVal == RHS) { 09777 SDValue ARMcc; 09778 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); 09779 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, 09780 N->getOperand(3), NewCmp); 09781 } 09782 09783 if (Res.getNode()) { 09784 APInt KnownZero, KnownOne; 09785 DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne); 09786 // Capture demanded bits information that would be otherwise lost. 09787 if (KnownZero == 0xfffffffe) 09788 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 09789 DAG.getValueType(MVT::i1)); 09790 else if (KnownZero == 0xffffff00) 09791 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 09792 DAG.getValueType(MVT::i8)); 09793 else if (KnownZero == 0xffff0000) 09794 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 09795 DAG.getValueType(MVT::i16)); 09796 } 09797 09798 return Res; 09799 } 09800 09801 SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 09802 DAGCombinerInfo &DCI) const { 09803 switch (N->getOpcode()) { 09804 default: break; 09805 case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget); 09806 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); 09807 case ISD::SUB: return PerformSUBCombine(N, DCI); 09808 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); 09809 case ISD::OR: return PerformORCombine(N, DCI, Subtarget); 09810 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); 09811 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); 09812 case ARMISD::BFI: return PerformBFICombine(N, DCI); 09813 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); 09814 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); 09815 case ISD::STORE: return PerformSTORECombine(N, DCI); 09816 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget); 09817 case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); 09818 case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); 09819 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); 09820 case ISD::FP_TO_SINT: 09821 case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget); 09822 case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget); 09823 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 09824 case ISD::SHL: 09825 case ISD::SRA: 09826 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 09827 case ISD::SIGN_EXTEND: 09828 case ISD::ZERO_EXTEND: 09829 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 09830 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 09831 case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); 09832 case ARMISD::VLD2DUP: 09833 case ARMISD::VLD3DUP: 09834 case ARMISD::VLD4DUP: 09835 return CombineBaseUpdate(N, DCI); 09836 case ARMISD::BUILD_VECTOR: 09837 return PerformARMBUILD_VECTORCombine(N, DCI); 09838 case ISD::INTRINSIC_VOID: 09839 case ISD::INTRINSIC_W_CHAIN: 09840 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 09841 case Intrinsic::arm_neon_vld1: 09842 case Intrinsic::arm_neon_vld2: 09843 case Intrinsic::arm_neon_vld3: 09844 case Intrinsic::arm_neon_vld4: 09845 case Intrinsic::arm_neon_vld2lane: 09846 case Intrinsic::arm_neon_vld3lane: 09847 case Intrinsic::arm_neon_vld4lane: 09848 case Intrinsic::arm_neon_vst1: 09849 case Intrinsic::arm_neon_vst2: 09850 case Intrinsic::arm_neon_vst3: 09851 case Intrinsic::arm_neon_vst4: 09852 case Intrinsic::arm_neon_vst2lane: 09853 case Intrinsic::arm_neon_vst3lane: 09854 case Intrinsic::arm_neon_vst4lane: 09855 return CombineBaseUpdate(N, DCI); 09856 default: break; 09857 } 09858 break; 09859 } 09860 return SDValue(); 09861 } 09862 09863 bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, 09864 EVT VT) const { 09865 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); 09866 } 09867 09868 bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 09869 unsigned, 09870 unsigned, 09871 bool *Fast) const { 09872 // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus 09873 bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); 09874 09875 switch (VT.getSimpleVT().SimpleTy) { 09876 default: 09877 return false; 09878 case MVT::i8: 09879 case MVT::i16: 09880 case MVT::i32: { 09881 // Unaligned access can use (for example) LRDB, LRDH, LDR 09882 if (AllowsUnaligned) { 09883 if (Fast) 09884 *Fast = Subtarget->hasV7Ops(); 09885 return true; 09886 } 09887 return false; 09888 } 09889 case MVT::f64: 09890 case MVT::v2f64: { 09891 // For any little-endian targets with neon, we can support unaligned ld/st 09892 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. 09893 // A big-endian target may also explicitly support unaligned accesses 09894 if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) { 09895 if (Fast) 09896 *Fast = true; 09897 return true; 09898 } 09899 return false; 09900 } 09901 } 09902 } 09903 09904 static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, 09905 unsigned AlignCheck) { 09906 return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && 09907 (DstAlign == 0 || DstAlign % AlignCheck == 0)); 09908 } 09909 09910 EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, 09911 unsigned DstAlign, unsigned SrcAlign, 09912 bool IsMemset, bool ZeroMemset, 09913 bool MemcpyStrSrc, 09914 MachineFunction &MF) const { 09915 const Function *F = MF.getFunction(); 09916 09917 // See if we can use NEON instructions for this... 09918 if ((!IsMemset || ZeroMemset) && 09919 Subtarget->hasNEON() && 09920 !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, 09921 Attribute::NoImplicitFloat)) { 09922 bool Fast; 09923 if (Size >= 16 && 09924 (memOpAlign(SrcAlign, DstAlign, 16) || 09925 (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) { 09926 return MVT::v2f64; 09927 } else if (Size >= 8 && 09928 (memOpAlign(SrcAlign, DstAlign, 8) || 09929 (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) && 09930 Fast))) { 09931 return MVT::f64; 09932 } 09933 } 09934 09935 // Lowering to i32/i16 if the size permits. 09936 if (Size >= 4) 09937 return MVT::i32; 09938 else if (Size >= 2) 09939 return MVT::i16; 09940 09941 // Let the target-independent logic figure it out. 09942 return MVT::Other; 09943 } 09944 09945 bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 09946 if (Val.getOpcode() != ISD::LOAD) 09947 return false; 09948 09949 EVT VT1 = Val.getValueType(); 09950 if (!VT1.isSimple() || !VT1.isInteger() || 09951 !VT2.isSimple() || !VT2.isInteger()) 09952 return false; 09953 09954 switch (VT1.getSimpleVT().SimpleTy) { 09955 default: break; 09956 case MVT::i1: 09957 case MVT::i8: 09958 case MVT::i16: 09959 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits. 09960 return true; 09961 } 09962 09963 return false; 09964 } 09965 09966 bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { 09967 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 09968 return false; 09969 09970 if (!isTypeLegal(EVT::getEVT(Ty1))) 09971 return false; 09972 09973 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); 09974 09975 // Assuming the caller doesn't have a zeroext or signext return parameter, 09976 // truncation all the way down to i1 is valid. 09977 return true; 09978 } 09979 09980 09981 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 09982 if (V < 0) 09983 return false; 09984 09985 unsigned Scale = 1; 09986 switch (VT.getSimpleVT().SimpleTy) { 09987 default: return false; 09988 case MVT::i1: 09989 case MVT::i8: 09990 // Scale == 1; 09991 break; 09992 case MVT::i16: 09993 // Scale == 2; 09994 Scale = 2; 09995 break; 09996 case MVT::i32: 09997 // Scale == 4; 09998 Scale = 4; 09999 break; 10000 } 10001 10002 if ((V & (Scale - 1)) != 0) 10003 return false; 10004 V /= Scale; 10005 return V == (V & ((1LL << 5) - 1)); 10006 } 10007 10008 static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 10009 const ARMSubtarget *Subtarget) { 10010 bool isNeg = false; 10011 if (V < 0) { 10012 isNeg = true; 10013 V = - V; 10014 } 10015 10016 switch (VT.getSimpleVT().SimpleTy) { 10017 default: return false; 10018 case MVT::i1: 10019 case MVT::i8: 10020 case MVT::i16: 10021 case MVT::i32: 10022 // + imm12 or - imm8 10023 if (isNeg) 10024 return V == (V & ((1LL << 8) - 1)); 10025 return V == (V & ((1LL << 12) - 1)); 10026 case MVT::f32: 10027 case MVT::f64: 10028 // Same as ARM mode. FIXME: NEON? 10029 if (!Subtarget->hasVFP2()) 10030 return false; 10031 if ((V & 3) != 0) 10032 return false; 10033 V >>= 2; 10034 return V == (V & ((1LL << 8) - 1)); 10035 } 10036 } 10037 10038 /// isLegalAddressImmediate - Return true if the integer value can be used 10039 /// as the offset of the target addressing mode for load / store of the 10040 /// given type. 10041 static bool isLegalAddressImmediate(int64_t V, EVT VT, 10042 const ARMSubtarget *Subtarget) { 10043 if (V == 0) 10044 return true; 10045 10046 if (!VT.isSimple()) 10047 return false; 10048 10049 if (Subtarget->isThumb1Only()) 10050 return isLegalT1AddressImmediate(V, VT); 10051 else if (Subtarget->isThumb2()) 10052 return isLegalT2AddressImmediate(V, VT, Subtarget); 10053 10054 // ARM mode. 10055 if (V < 0) 10056 V = - V; 10057 switch (VT.getSimpleVT().SimpleTy) { 10058 default: return false; 10059 case MVT::i1: 10060 case MVT::i8: 10061 case MVT::i32: 10062 // +- imm12 10063 return V == (V & ((1LL << 12) - 1)); 10064 case MVT::i16: 10065 // +- imm8 10066 return V == (V & ((1LL << 8) - 1)); 10067 case MVT::f32: 10068 case MVT::f64: 10069 if (!Subtarget->hasVFP2()) // FIXME: NEON? 10070 return false; 10071 if ((V & 3) != 0) 10072 return false; 10073 V >>= 2; 10074 return V == (V & ((1LL << 8) - 1)); 10075 } 10076 } 10077 10078 bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 10079 EVT VT) const { 10080 int Scale = AM.Scale; 10081 if (Scale < 0) 10082 return false; 10083 10084 switch (VT.getSimpleVT().SimpleTy) { 10085 default: return false; 10086 case MVT::i1: 10087 case MVT::i8: 10088 case MVT::i16: 10089 case MVT::i32: 10090 if (Scale == 1) 10091 return true; 10092 // r + r << imm 10093 Scale = Scale & ~1; 10094 return Scale == 2 || Scale == 4 || Scale == 8; 10095 case MVT::i64: 10096 // r + r 10097 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 10098 return true; 10099 return false; 10100 case MVT::isVoid: 10101 // Note, we allow "void" uses (basically, uses that aren't loads or 10102 // stores), because arm allows folding a scale into many arithmetic 10103 // operations. This should be made more precise and revisited later. 10104 10105 // Allow r << imm, but the imm has to be a multiple of two. 10106 if (Scale & 1) return false; 10107 return isPowerOf2_32(Scale); 10108 } 10109 } 10110 10111 /// isLegalAddressingMode - Return true if the addressing mode represented 10112 /// by AM is legal for this target, for a load/store of the specified type. 10113 bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 10114 Type *Ty) const { 10115 EVT VT = getValueType(Ty, true); 10116 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 10117 return false; 10118 10119 // Can never fold addr of global into load/store. 10120 if (AM.BaseGV) 10121 return false; 10122 10123 switch (AM.Scale) { 10124 case 0: // no scale reg, must be "r+i" or "r", or "i". 10125 break; 10126 case 1: 10127 if (Subtarget->isThumb1Only()) 10128 return false; 10129 // FALL THROUGH. 10130 default: 10131 // ARM doesn't support any R+R*scale+imm addr modes. 10132 if (AM.BaseOffs) 10133 return false; 10134 10135 if (!VT.isSimple()) 10136 return false; 10137 10138 if (Subtarget->isThumb2()) 10139 return isLegalT2ScaledAddressingMode(AM, VT); 10140 10141 int Scale = AM.Scale; 10142 switch (VT.getSimpleVT().SimpleTy) { 10143 default: return false; 10144 case MVT::i1: 10145 case MVT::i8: 10146 case MVT::i32: 10147 if (Scale < 0) Scale = -Scale; 10148 if (Scale == 1) 10149 return true; 10150 // r + r << imm 10151 return isPowerOf2_32(Scale & ~1); 10152 case MVT::i16: 10153 case MVT::i64: 10154 // r + r 10155 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 10156 return true; 10157 return false; 10158 10159 case MVT::isVoid: 10160 // Note, we allow "void" uses (basically, uses that aren't loads or 10161 // stores), because arm allows folding a scale into many arithmetic 10162 // operations. This should be made more precise and revisited later. 10163 10164 // Allow r << imm, but the imm has to be a multiple of two. 10165 if (Scale & 1) return false; 10166 return isPowerOf2_32(Scale); 10167 } 10168 } 10169 return true; 10170 } 10171 10172 /// isLegalICmpImmediate - Return true if the specified immediate is legal 10173 /// icmp immediate, that is the target has icmp instructions which can compare 10174 /// a register against the immediate without having to materialize the 10175 /// immediate into a register. 10176 bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 10177 // Thumb2 and ARM modes can use cmn for negative immediates. 10178 if (!Subtarget->isThumb()) 10179 return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1; 10180 if (Subtarget->isThumb2()) 10181 return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1; 10182 // Thumb1 doesn't have cmn, and only 8-bit immediates. 10183 return Imm >= 0 && Imm <= 255; 10184 } 10185 10186 /// isLegalAddImmediate - Return true if the specified immediate is a legal add 10187 /// *or sub* immediate, that is the target has add or sub instructions which can 10188 /// add a register with the immediate without having to materialize the 10189 /// immediate into a register. 10190 bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { 10191 // Same encoding for add/sub, just flip the sign. 10192 int64_t AbsImm = llvm::abs64(Imm); 10193 if (!Subtarget->isThumb()) 10194 return ARM_AM::getSOImmVal(AbsImm) != -1; 10195 if (Subtarget->isThumb2()) 10196 return ARM_AM::getT2SOImmVal(AbsImm) != -1; 10197 // Thumb1 only has 8-bit unsigned immediate. 10198 return AbsImm >= 0 && AbsImm <= 255; 10199 } 10200 10201 static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 10202 bool isSEXTLoad, SDValue &Base, 10203 SDValue &Offset, bool &isInc, 10204 SelectionDAG &DAG) { 10205 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 10206 return false; 10207 10208 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 10209 // AddressingMode 3 10210 Base = Ptr->getOperand(0); 10211 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 10212 int RHSC = (int)RHS->getZExtValue(); 10213 if (RHSC < 0 && RHSC > -256) { 10214 assert(Ptr->getOpcode() == ISD::ADD); 10215 isInc = false; 10216 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 10217 return true; 10218 } 10219 } 10220 isInc = (Ptr->getOpcode() == ISD::ADD); 10221 Offset = Ptr->getOperand(1); 10222 return true; 10223 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 10224 // AddressingMode 2 10225 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 10226 int RHSC = (int)RHS->getZExtValue(); 10227 if (RHSC < 0 && RHSC > -0x1000) { 10228 assert(Ptr->getOpcode() == ISD::ADD); 10229 isInc = false; 10230 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 10231 Base = Ptr->getOperand(0); 10232 return true; 10233 } 10234 } 10235 10236 if (Ptr->getOpcode() == ISD::ADD) { 10237 isInc = true; 10238 ARM_AM::ShiftOpc ShOpcVal= 10239 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode()); 10240 if (ShOpcVal != ARM_AM::no_shift) { 10241 Base = Ptr->getOperand(1); 10242 Offset = Ptr->getOperand(0); 10243 } else { 10244 Base = Ptr->getOperand(0); 10245 Offset = Ptr->getOperand(1); 10246 } 10247 return true; 10248 } 10249 10250 isInc = (Ptr->getOpcode() == ISD::ADD); 10251 Base = Ptr->getOperand(0); 10252 Offset = Ptr->getOperand(1); 10253 return true; 10254 } 10255 10256 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 10257 return false; 10258 } 10259 10260 static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 10261 bool isSEXTLoad, SDValue &Base, 10262 SDValue &Offset, bool &isInc, 10263 SelectionDAG &DAG) { 10264 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 10265 return false; 10266 10267 Base = Ptr->getOperand(0); 10268 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 10269 int RHSC = (int)RHS->getZExtValue(); 10270 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 10271 assert(Ptr->getOpcode() == ISD::ADD); 10272 isInc = false; 10273 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 10274 return true; 10275 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 10276 isInc = Ptr->getOpcode() == ISD::ADD; 10277 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 10278 return true; 10279 } 10280 } 10281 10282 return false; 10283 } 10284 10285 /// getPreIndexedAddressParts - returns true by value, base pointer and 10286 /// offset pointer and addressing mode by reference if the node's address 10287 /// can be legally represented as pre-indexed load / store address. 10288 bool 10289 ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 10290 SDValue &Offset, 10291 ISD::MemIndexedMode &AM, 10292 SelectionDAG &DAG) const { 10293 if (Subtarget->isThumb1Only()) 10294 return false; 10295 10296 EVT VT; 10297 SDValue Ptr; 10298 bool isSEXTLoad = false; 10299 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 10300 Ptr = LD->getBasePtr(); 10301 VT = LD->getMemoryVT(); 10302 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 10303 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 10304 Ptr = ST->getBasePtr(); 10305 VT = ST->getMemoryVT(); 10306 } else 10307 return false; 10308 10309 bool isInc; 10310 bool isLegal = false; 10311 if (Subtarget->isThumb2()) 10312 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 10313 Offset, isInc, DAG); 10314 else 10315 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 10316 Offset, isInc, DAG); 10317 if (!isLegal) 10318 return false; 10319 10320 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 10321 return true; 10322 } 10323 10324 /// getPostIndexedAddressParts - returns true by value, base pointer and 10325 /// offset pointer and addressing mode by reference if this node can be 10326 /// combined with a load / store to form a post-indexed load / store. 10327 bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 10328 SDValue &Base, 10329 SDValue &Offset, 10330 ISD::MemIndexedMode &AM, 10331 SelectionDAG &DAG) const { 10332 if (Subtarget->isThumb1Only()) 10333 return false; 10334 10335 EVT VT; 10336 SDValue Ptr; 10337 bool isSEXTLoad = false; 10338 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 10339 VT = LD->getMemoryVT(); 10340 Ptr = LD->getBasePtr(); 10341 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 10342 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 10343 VT = ST->getMemoryVT(); 10344 Ptr = ST->getBasePtr(); 10345 } else 10346 return false; 10347 10348 bool isInc; 10349 bool isLegal = false; 10350 if (Subtarget->isThumb2()) 10351 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 10352 isInc, DAG); 10353 else 10354 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 10355 isInc, DAG); 10356 if (!isLegal) 10357 return false; 10358 10359 if (Ptr != Base) { 10360 // Swap base ptr and offset to catch more post-index load / store when 10361 // it's legal. In Thumb2 mode, offset must be an immediate. 10362 if (Ptr == Offset && Op->getOpcode() == ISD::ADD && 10363 !Subtarget->isThumb2()) 10364 std::swap(Base, Offset); 10365 10366 // Post-indexed load / store update the base pointer. 10367 if (Ptr != Base) 10368 return false; 10369 } 10370 10371 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 10372 return true; 10373 } 10374 10375 void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 10376 APInt &KnownZero, 10377 APInt &KnownOne, 10378 const SelectionDAG &DAG, 10379 unsigned Depth) const { 10380 unsigned BitWidth = KnownOne.getBitWidth(); 10381 KnownZero = KnownOne = APInt(BitWidth, 0); 10382 switch (Op.getOpcode()) { 10383 default: break; 10384 case ARMISD::ADDC: 10385 case ARMISD::ADDE: 10386 case ARMISD::SUBC: 10387 case ARMISD::SUBE: 10388 // These nodes' second result is a boolean 10389 if (Op.getResNo() == 0) 10390 break; 10391 KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); 10392 break; 10393 case ARMISD::CMOV: { 10394 // Bits are known zero/one if known on the LHS and RHS. 10395 DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); 10396 if (KnownZero == 0 && KnownOne == 0) return; 10397 10398 APInt KnownZeroRHS, KnownOneRHS; 10399 DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); 10400 KnownZero &= KnownZeroRHS; 10401 KnownOne &= KnownOneRHS; 10402 return; 10403 } 10404 case ISD::INTRINSIC_W_CHAIN: { 10405 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1)); 10406 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue()); 10407 switch (IntID) { 10408 default: return; 10409 case Intrinsic::arm_ldaex: 10410 case Intrinsic::arm_ldrex: { 10411 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT(); 10412 unsigned MemBits = VT.getScalarType().getSizeInBits(); 10413 KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); 10414 return; 10415 } 10416 } 10417 } 10418 } 10419 } 10420 10421 //===----------------------------------------------------------------------===// 10422 // ARM Inline Assembly Support 10423 //===----------------------------------------------------------------------===// 10424 10425 bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { 10426 // Looking for "rev" which is V6+. 10427 if (!Subtarget->hasV6Ops()) 10428 return false; 10429 10430 InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); 10431 std::string AsmStr = IA->getAsmString(); 10432 SmallVector<StringRef, 4> AsmPieces; 10433 SplitString(AsmStr, AsmPieces, ";\n"); 10434 10435 switch (AsmPieces.size()) { 10436 default: return false; 10437 case 1: 10438 AsmStr = AsmPieces[0]; 10439 AsmPieces.clear(); 10440 SplitString(AsmStr, AsmPieces, " \t,"); 10441 10442 // rev $0, $1 10443 if (AsmPieces.size() == 3 && 10444 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && 10445 IA->getConstraintString().compare(0, 4, "=l,l") == 0) { 10446 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); 10447 if (Ty && Ty->getBitWidth() == 32) 10448 return IntrinsicLowering::LowerToByteSwap(CI); 10449 } 10450 break; 10451 } 10452 10453 return false; 10454 } 10455 10456 /// getConstraintType - Given a constraint letter, return the type of 10457 /// constraint it is for this target. 10458 ARMTargetLowering::ConstraintType 10459 ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 10460 if (Constraint.size() == 1) { 10461 switch (Constraint[0]) { 10462 default: break; 10463 case 'l': return C_RegisterClass; 10464 case 'w': return C_RegisterClass; 10465 case 'h': return C_RegisterClass; 10466 case 'x': return C_RegisterClass; 10467 case 't': return C_RegisterClass; 10468 case 'j': return C_Other; // Constant for movw. 10469 // An address with a single base register. Due to the way we 10470 // currently handle addresses it is the same as an 'r' memory constraint. 10471 case 'Q': return C_Memory; 10472 } 10473 } else if (Constraint.size() == 2) { 10474 switch (Constraint[0]) { 10475 default: break; 10476 // All 'U+' constraints are addresses. 10477 case 'U': return C_Memory; 10478 } 10479 } 10480 return TargetLowering::getConstraintType(Constraint); 10481 } 10482 10483 /// Examine constraint type and operand type and determine a weight value. 10484 /// This object must already have been set up with the operand type 10485 /// and the current alternative constraint selected. 10486 TargetLowering::ConstraintWeight 10487 ARMTargetLowering::getSingleConstraintMatchWeight( 10488 AsmOperandInfo &info, const char *constraint) const { 10489 ConstraintWeight weight = CW_Invalid; 10490 Value *CallOperandVal = info.CallOperandVal; 10491 // If we don't have a value, we can't do a match, 10492 // but allow it at the lowest weight. 10493 if (!CallOperandVal) 10494 return CW_Default; 10495 Type *type = CallOperandVal->getType(); 10496 // Look at the constraint type. 10497 switch (*constraint) { 10498 default: 10499 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 10500 break; 10501 case 'l': 10502 if (type->isIntegerTy()) { 10503 if (Subtarget->isThumb()) 10504 weight = CW_SpecificReg; 10505 else 10506 weight = CW_Register; 10507 } 10508 break; 10509 case 'w': 10510 if (type->isFloatingPointTy()) 10511 weight = CW_Register; 10512 break; 10513 } 10514 return weight; 10515 } 10516 10517 typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; 10518 RCPair 10519 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 10520 MVT VT) const { 10521 if (Constraint.size() == 1) { 10522 // GCC ARM Constraint Letters 10523 switch (Constraint[0]) { 10524 case 'l': // Low regs or general regs. 10525 if (Subtarget->isThumb()) 10526 return RCPair(0U, &ARM::tGPRRegClass); 10527 return RCPair(0U, &ARM::GPRRegClass); 10528 case 'h': // High regs or no regs. 10529 if (Subtarget->isThumb()) 10530 return RCPair(0U, &ARM::hGPRRegClass); 10531 break; 10532 case 'r': 10533 return RCPair(0U, &ARM::GPRRegClass); 10534 case 'w': 10535 if (VT == MVT::Other) 10536 break; 10537 if (VT == MVT::f32) 10538 return RCPair(0U, &ARM::SPRRegClass); 10539 if (VT.getSizeInBits() == 64) 10540 return RCPair(0U, &ARM::DPRRegClass); 10541 if (VT.getSizeInBits() == 128) 10542 return RCPair(0U, &ARM::QPRRegClass); 10543 break; 10544 case 'x': 10545 if (VT == MVT::Other) 10546 break; 10547 if (VT == MVT::f32) 10548 return RCPair(0U, &ARM::SPR_8RegClass); 10549 if (VT.getSizeInBits() == 64) 10550 return RCPair(0U, &ARM::DPR_8RegClass); 10551 if (VT.getSizeInBits() == 128) 10552 return RCPair(0U, &ARM::QPR_8RegClass); 10553 break; 10554 case 't': 10555 if (VT == MVT::f32) 10556 return RCPair(0U, &ARM::SPRRegClass); 10557 break; 10558 } 10559 } 10560 if (StringRef("{cc}").equals_lower(Constraint)) 10561 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass); 10562 10563 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 10564 } 10565 10566 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 10567 /// vector. If it is invalid, don't add anything to Ops. 10568 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 10569 std::string &Constraint, 10570 std::vector<SDValue>&Ops, 10571 SelectionDAG &DAG) const { 10572 SDValue Result; 10573 10574 // Currently only support length 1 constraints. 10575 if (Constraint.length() != 1) return; 10576 10577 char ConstraintLetter = Constraint[0]; 10578 switch (ConstraintLetter) { 10579 default: break; 10580 case 'j': 10581 case 'I': case 'J': case 'K': case 'L': 10582 case 'M': case 'N': case 'O': 10583 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 10584 if (!C) 10585 return; 10586 10587 int64_t CVal64 = C->getSExtValue(); 10588 int CVal = (int) CVal64; 10589 // None of these constraints allow values larger than 32 bits. Check 10590 // that the value fits in an int. 10591 if (CVal != CVal64) 10592 return; 10593 10594 switch (ConstraintLetter) { 10595 case 'j': 10596 // Constant suitable for movw, must be between 0 and 10597 // 65535. 10598 if (Subtarget->hasV6T2Ops()) 10599 if (CVal >= 0 && CVal <= 65535) 10600 break; 10601 return; 10602 case 'I': 10603 if (Subtarget->isThumb1Only()) { 10604 // This must be a constant between 0 and 255, for ADD 10605 // immediates. 10606 if (CVal >= 0 && CVal <= 255) 10607 break; 10608 } else if (Subtarget->isThumb2()) { 10609 // A constant that can be used as an immediate value in a 10610 // data-processing instruction. 10611 if (ARM_AM::getT2SOImmVal(CVal) != -1) 10612 break; 10613 } else { 10614 // A constant that can be used as an immediate value in a 10615 // data-processing instruction. 10616 if (ARM_AM::getSOImmVal(CVal) != -1) 10617 break; 10618 } 10619 return; 10620 10621 case 'J': 10622 if (Subtarget->isThumb()) { // FIXME thumb2 10623 // This must be a constant between -255 and -1, for negated ADD 10624 // immediates. This can be used in GCC with an "n" modifier that 10625 // prints the negated value, for use with SUB instructions. It is 10626 // not useful otherwise but is implemented for compatibility. 10627 if (CVal >= -255 && CVal <= -1) 10628 break; 10629 } else { 10630 // This must be a constant between -4095 and 4095. It is not clear 10631 // what this constraint is intended for. Implemented for 10632 // compatibility with GCC. 10633 if (CVal >= -4095 && CVal <= 4095) 10634 break; 10635 } 10636 return; 10637 10638 case 'K': 10639 if (Subtarget->isThumb1Only()) { 10640 // A 32-bit value where only one byte has a nonzero value. Exclude 10641 // zero to match GCC. This constraint is used by GCC internally for 10642 // constants that can be loaded with a move/shift combination. 10643 // It is not useful otherwise but is implemented for compatibility. 10644 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 10645 break; 10646 } else if (Subtarget->isThumb2()) { 10647 // A constant whose bitwise inverse can be used as an immediate 10648 // value in a data-processing instruction. This can be used in GCC 10649 // with a "B" modifier that prints the inverted value, for use with 10650 // BIC and MVN instructions. It is not useful otherwise but is 10651 // implemented for compatibility. 10652 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 10653 break; 10654 } else { 10655 // A constant whose bitwise inverse can be used as an immediate 10656 // value in a data-processing instruction. This can be used in GCC 10657 // with a "B" modifier that prints the inverted value, for use with 10658 // BIC and MVN instructions. It is not useful otherwise but is 10659 // implemented for compatibility. 10660 if (ARM_AM::getSOImmVal(~CVal) != -1) 10661 break; 10662 } 10663 return; 10664 10665 case 'L': 10666 if (Subtarget->isThumb1Only()) { 10667 // This must be a constant between -7 and 7, 10668 // for 3-operand ADD/SUB immediate instructions. 10669 if (CVal >= -7 && CVal < 7) 10670 break; 10671 } else if (Subtarget->isThumb2()) { 10672 // A constant whose negation can be used as an immediate value in a 10673 // data-processing instruction. This can be used in GCC with an "n" 10674 // modifier that prints the negated value, for use with SUB 10675 // instructions. It is not useful otherwise but is implemented for 10676 // compatibility. 10677 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 10678 break; 10679 } else { 10680 // A constant whose negation can be used as an immediate value in a 10681 // data-processing instruction. This can be used in GCC with an "n" 10682 // modifier that prints the negated value, for use with SUB 10683 // instructions. It is not useful otherwise but is implemented for 10684 // compatibility. 10685 if (ARM_AM::getSOImmVal(-CVal) != -1) 10686 break; 10687 } 10688 return; 10689 10690 case 'M': 10691 if (Subtarget->isThumb()) { // FIXME thumb2 10692 // This must be a multiple of 4 between 0 and 1020, for 10693 // ADD sp + immediate. 10694 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 10695 break; 10696 } else { 10697 // A power of two or a constant between 0 and 32. This is used in 10698 // GCC for the shift amount on shifted register operands, but it is 10699 // useful in general for any shift amounts. 10700 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 10701 break; 10702 } 10703 return; 10704 10705 case 'N': 10706 if (Subtarget->isThumb()) { // FIXME thumb2 10707 // This must be a constant between 0 and 31, for shift amounts. 10708 if (CVal >= 0 && CVal <= 31) 10709 break; 10710 } 10711 return; 10712 10713 case 'O': 10714 if (Subtarget->isThumb()) { // FIXME thumb2 10715 // This must be a multiple of 4 between -508 and 508, for 10716 // ADD/SUB sp = sp + immediate. 10717 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 10718 break; 10719 } 10720 return; 10721 } 10722 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 10723 break; 10724 } 10725 10726 if (Result.getNode()) { 10727 Ops.push_back(Result); 10728 return; 10729 } 10730 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 10731 } 10732 10733 SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { 10734 assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only"); 10735 unsigned Opcode = Op->getOpcode(); 10736 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && 10737 "Invalid opcode for Div/Rem lowering"); 10738 bool isSigned = (Opcode == ISD::SDIVREM); 10739 EVT VT = Op->getValueType(0); 10740 Type *Ty = VT.getTypeForEVT(*DAG.getContext()); 10741 10742 RTLIB::Libcall LC; 10743 switch (VT.getSimpleVT().SimpleTy) { 10744 default: llvm_unreachable("Unexpected request for libcall!"); 10745 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; 10746 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; 10747 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; 10748 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; 10749 } 10750 10751 SDValue InChain = DAG.getEntryNode(); 10752 10753 TargetLowering::ArgListTy Args; 10754 TargetLowering::ArgListEntry Entry; 10755 for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { 10756 EVT ArgVT = Op->getOperand(i).getValueType(); 10757 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); 10758 Entry.Node = Op->getOperand(i); 10759 Entry.Ty = ArgTy; 10760 Entry.isSExt = isSigned; 10761 Entry.isZExt = !isSigned; 10762 Args.push_back(Entry); 10763 } 10764 10765 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), 10766 getPointerTy()); 10767 10768 Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); 10769 10770 SDLoc dl(Op); 10771 TargetLowering::CallLoweringInfo CLI(DAG); 10772 CLI.setDebugLoc(dl).setChain(InChain) 10773 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) 10774 .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); 10775 10776 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); 10777 return CallInfo.first; 10778 } 10779 10780 SDValue 10781 ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { 10782 assert(Subtarget->isTargetWindows() && "unsupported target platform"); 10783 SDLoc DL(Op); 10784 10785 // Get the inputs. 10786 SDValue Chain = Op.getOperand(0); 10787 SDValue Size = Op.getOperand(1); 10788 10789 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size, 10790 DAG.getConstant(2, MVT::i32)); 10791 10792 SDValue Flag; 10793 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag); 10794 Flag = Chain.getValue(1); 10795 10796 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 10797 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag); 10798 10799 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32); 10800 Chain = NewSP.getValue(1); 10801 10802 SDValue Ops[2] = { NewSP, Chain }; 10803 return DAG.getMergeValues(Ops, DL); 10804 } 10805 10806 SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { 10807 assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && 10808 "Unexpected type for custom-lowering FP_EXTEND"); 10809 10810 RTLIB::Libcall LC; 10811 LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); 10812 10813 SDValue SrcVal = Op.getOperand(0); 10814 return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, 10815 /*isSigned*/ false, SDLoc(Op)).first; 10816 } 10817 10818 SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { 10819 assert(Op.getOperand(0).getValueType() == MVT::f64 && 10820 Subtarget->isFPOnlySP() && 10821 "Unexpected type for custom-lowering FP_ROUND"); 10822 10823 RTLIB::Libcall LC; 10824 LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); 10825 10826 SDValue SrcVal = Op.getOperand(0); 10827 return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, 10828 /*isSigned*/ false, SDLoc(Op)).first; 10829 } 10830 10831 bool 10832 ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 10833 // The ARM target isn't yet aware of offsets. 10834 return false; 10835 } 10836 10837 bool ARM::isBitFieldInvertedMask(unsigned v) { 10838 if (v == 0xffffffff) 10839 return false; 10840 10841 // there can be 1's on either or both "outsides", all the "inside" 10842 // bits must be 0's 10843 unsigned TO = CountTrailingOnes_32(v); 10844 unsigned LO = CountLeadingOnes_32(v); 10845 v = (v >> TO) << TO; 10846 v = (v << LO) >> LO; 10847 return v == 0; 10848 } 10849 10850 /// isFPImmLegal - Returns true if the target can instruction select the 10851 /// specified FP immediate natively. If false, the legalizer will 10852 /// materialize the FP immediate as a load from a constant pool. 10853 bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 10854 if (!Subtarget->hasVFP3()) 10855 return false; 10856 if (VT == MVT::f32) 10857 return ARM_AM::getFP32Imm(Imm) != -1; 10858 if (VT == MVT::f64 && !Subtarget->isFPOnlySP()) 10859 return ARM_AM::getFP64Imm(Imm) != -1; 10860 return false; 10861 } 10862 10863 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as 10864 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment 10865 /// specified in the intrinsic calls. 10866 bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 10867 const CallInst &I, 10868 unsigned Intrinsic) const { 10869 switch (Intrinsic) { 10870 case Intrinsic::arm_neon_vld1: 10871 case Intrinsic::arm_neon_vld2: 10872 case Intrinsic::arm_neon_vld3: 10873 case Intrinsic::arm_neon_vld4: 10874 case Intrinsic::arm_neon_vld2lane: 10875 case Intrinsic::arm_neon_vld3lane: 10876 case Intrinsic::arm_neon_vld4lane: { 10877 Info.opc = ISD::INTRINSIC_W_CHAIN; 10878 // Conservatively set memVT to the entire set of vectors loaded. 10879 uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; 10880 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 10881 Info.ptrVal = I.getArgOperand(0); 10882 Info.offset = 0; 10883 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 10884 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 10885 Info.vol = false; // volatile loads with NEON intrinsics not supported 10886 Info.readMem = true; 10887 Info.writeMem = false; 10888 return true; 10889 } 10890 case Intrinsic::arm_neon_vst1: 10891 case Intrinsic::arm_neon_vst2: 10892 case Intrinsic::arm_neon_vst3: 10893 case Intrinsic::arm_neon_vst4: 10894 case Intrinsic::arm_neon_vst2lane: 10895 case Intrinsic::arm_neon_vst3lane: 10896 case Intrinsic::arm_neon_vst4lane: { 10897 Info.opc = ISD::INTRINSIC_VOID; 10898 // Conservatively set memVT to the entire set of vectors stored. 10899 unsigned NumElts = 0; 10900 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { 10901 Type *ArgTy = I.getArgOperand(ArgI)->getType(); 10902 if (!ArgTy->isVectorTy()) 10903 break; 10904 NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; 10905 } 10906 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 10907 Info.ptrVal = I.getArgOperand(0); 10908 Info.offset = 0; 10909 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 10910 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 10911 Info.vol = false; // volatile stores with NEON intrinsics not supported 10912 Info.readMem = false; 10913 Info.writeMem = true; 10914 return true; 10915 } 10916 case Intrinsic::arm_ldaex: 10917 case Intrinsic::arm_ldrex: { 10918 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 10919 Info.opc = ISD::INTRINSIC_W_CHAIN; 10920 Info.memVT = MVT::getVT(PtrTy->getElementType()); 10921 Info.ptrVal = I.getArgOperand(0); 10922 Info.offset = 0; 10923 Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); 10924 Info.vol = true; 10925 Info.readMem = true; 10926 Info.writeMem = false; 10927 return true; 10928 } 10929 case Intrinsic::arm_stlex: 10930 case Intrinsic::arm_strex: { 10931 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); 10932 Info.opc = ISD::INTRINSIC_W_CHAIN; 10933 Info.memVT = MVT::getVT(PtrTy->getElementType()); 10934 Info.ptrVal = I.getArgOperand(1); 10935 Info.offset = 0; 10936 Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); 10937 Info.vol = true; 10938 Info.readMem = false; 10939 Info.writeMem = true; 10940 return true; 10941 } 10942 case Intrinsic::arm_stlexd: 10943 case Intrinsic::arm_strexd: { 10944 Info.opc = ISD::INTRINSIC_W_CHAIN; 10945 Info.memVT = MVT::i64; 10946 Info.ptrVal = I.getArgOperand(2); 10947 Info.offset = 0; 10948 Info.align = 8; 10949 Info.vol = true; 10950 Info.readMem = false; 10951 Info.writeMem = true; 10952 return true; 10953 } 10954 case Intrinsic::arm_ldaexd: 10955 case Intrinsic::arm_ldrexd: { 10956 Info.opc = ISD::INTRINSIC_W_CHAIN; 10957 Info.memVT = MVT::i64; 10958 Info.ptrVal = I.getArgOperand(0); 10959 Info.offset = 0; 10960 Info.align = 8; 10961 Info.vol = true; 10962 Info.readMem = true; 10963 Info.writeMem = false; 10964 return true; 10965 } 10966 default: 10967 break; 10968 } 10969 10970 return false; 10971 } 10972 10973 /// \brief Returns true if it is beneficial to convert a load of a constant 10974 /// to just the constant itself. 10975 bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 10976 Type *Ty) const { 10977 assert(Ty->isIntegerTy()); 10978 10979 unsigned Bits = Ty->getPrimitiveSizeInBits(); 10980 if (Bits == 0 || Bits > 32) 10981 return false; 10982 return true; 10983 } 10984 10985 bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; } 10986 10987 static void makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) { 10988 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 10989 Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); 10990 Constant *CDomain = Builder.getInt32(Domain); 10991 Builder.CreateCall(DMB, CDomain); 10992 } 10993 10994 // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html 10995 void ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 10996 AtomicOrdering Ord, bool IsStore, 10997 bool IsLoad) const { 10998 if (!getInsertFencesForAtomic()) 10999 return; 11000 11001 switch (Ord) { 11002 case NotAtomic: 11003 case Unordered: 11004 llvm_unreachable("Invalid fence: unordered/non-atomic"); 11005 case Monotonic: 11006 case Acquire: 11007 return; // Nothing to do 11008 case SequentiallyConsistent: 11009 if (!IsStore) 11010 return; // Nothing to do 11011 /*FALLTHROUGH*/ 11012 case Release: 11013 case AcquireRelease: 11014 if (Subtarget->isSwift()) 11015 makeDMB(Builder, ARM_MB::ISHST); 11016 // FIXME: add a comment with a link to documentation justifying this. 11017 else 11018 makeDMB(Builder, ARM_MB::ISH); 11019 return; 11020 } 11021 } 11022 11023 void ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 11024 AtomicOrdering Ord, bool IsStore, 11025 bool IsLoad) const { 11026 if (!getInsertFencesForAtomic()) 11027 return; 11028 11029 switch (Ord) { 11030 case NotAtomic: 11031 case Unordered: 11032 llvm_unreachable("Invalid fence: unordered/not-atomic"); 11033 case Monotonic: 11034 case Release: 11035 return; // Nothing to do 11036 case Acquire: 11037 case AcquireRelease: 11038 case SequentiallyConsistent: 11039 makeDMB(Builder, ARM_MB::ISH); 11040 return; 11041 } 11042 } 11043 11044 // Loads and stores less than 64-bits are already atomic; ones above that 11045 // are doomed anyway, so defer to the default libcall and blame the OS when 11046 // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit 11047 // anything for those. 11048 bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { 11049 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); 11050 return (Size == 64) && !Subtarget->isMClass(); 11051 } 11052 11053 // Loads and stores less than 64-bits are already atomic; ones above that 11054 // are doomed anyway, so defer to the default libcall and blame the OS when 11055 // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit 11056 // anything for those. 11057 bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { 11058 unsigned Size = LI->getType()->getPrimitiveSizeInBits(); 11059 return (Size == 64) && !Subtarget->isMClass(); 11060 } 11061 11062 // For the real atomic operations, we have ldrex/strex up to 32 bits, 11063 // and up to 64 bits on the non-M profiles 11064 bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 11065 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 11066 return Size <= (Subtarget->isMClass() ? 32U : 64U); 11067 } 11068 11069 // This has so far only been implemented for MachO. 11070 bool ARMTargetLowering::useLoadStackGuardNode() const { 11071 return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO; 11072 } 11073 11074 Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, 11075 AtomicOrdering Ord) const { 11076 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 11077 Type *ValTy = cast<PointerType>(Addr->getType())->getElementType(); 11078 bool IsAcquire = isAtLeastAcquire(Ord); 11079 11080 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd 11081 // intrinsic must return {i32, i32} and we have to recombine them into a 11082 // single i64 here. 11083 if (ValTy->getPrimitiveSizeInBits() == 64) { 11084 Intrinsic::ID Int = 11085 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; 11086 Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int); 11087 11088 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); 11089 Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); 11090 11091 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); 11092 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); 11093 if (!Subtarget->isLittle()) 11094 std::swap (Lo, Hi); 11095 Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); 11096 Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); 11097 return Builder.CreateOr( 11098 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64"); 11099 } 11100 11101 Type *Tys[] = { Addr->getType() }; 11102 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; 11103 Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys); 11104 11105 return Builder.CreateTruncOrBitCast( 11106 Builder.CreateCall(Ldrex, Addr), 11107 cast<PointerType>(Addr->getType())->getElementType()); 11108 } 11109 11110 Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, 11111 Value *Addr, 11112 AtomicOrdering Ord) const { 11113 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 11114 bool IsRelease = isAtLeastRelease(Ord); 11115 11116 // Since the intrinsics must have legal type, the i64 intrinsics take two 11117 // parameters: "i32, i32". We must marshal Val into the appropriate form 11118 // before the call. 11119 if (Val->getType()->getPrimitiveSizeInBits() == 64) { 11120 Intrinsic::ID Int = 11121 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd; 11122 Function *Strex = Intrinsic::getDeclaration(M, Int); 11123 Type *Int32Ty = Type::getInt32Ty(M->getContext()); 11124 11125 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); 11126 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); 11127 if (!Subtarget->isLittle()) 11128 std::swap (Lo, Hi); 11129 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); 11130 return Builder.CreateCall3(Strex, Lo, Hi, Addr); 11131 } 11132 11133 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex; 11134 Type *Tys[] = { Addr->getType() }; 11135 Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); 11136 11137 return Builder.CreateCall2( 11138 Strex, Builder.CreateZExtOrBitCast( 11139 Val, Strex->getFunctionType()->getParamType(0)), 11140 Addr); 11141 } 11142 11143 enum HABaseType { 11144 HA_UNKNOWN = 0, 11145 HA_FLOAT, 11146 HA_DOUBLE, 11147 HA_VECT64, 11148 HA_VECT128 11149 }; 11150 11151 static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, 11152 uint64_t &Members) { 11153 if (const StructType *ST = dyn_cast<StructType>(Ty)) { 11154 for (unsigned i = 0; i < ST->getNumElements(); ++i) { 11155 uint64_t SubMembers = 0; 11156 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers)) 11157 return false; 11158 Members += SubMembers; 11159 } 11160 } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { 11161 uint64_t SubMembers = 0; 11162 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers)) 11163 return false; 11164 Members += SubMembers * AT->getNumElements(); 11165 } else if (Ty->isFloatTy()) { 11166 if (Base != HA_UNKNOWN && Base != HA_FLOAT) 11167 return false; 11168 Members = 1; 11169 Base = HA_FLOAT; 11170 } else if (Ty->isDoubleTy()) { 11171 if (Base != HA_UNKNOWN && Base != HA_DOUBLE) 11172 return false; 11173 Members = 1; 11174 Base = HA_DOUBLE; 11175 } else if (const VectorType *VT = dyn_cast<VectorType>(Ty)) { 11176 Members = 1; 11177 switch (Base) { 11178 case HA_FLOAT: 11179 case HA_DOUBLE: 11180 return false; 11181 case HA_VECT64: 11182 return VT->getBitWidth() == 64; 11183 case HA_VECT128: 11184 return VT->getBitWidth() == 128; 11185 case HA_UNKNOWN: 11186 switch (VT->getBitWidth()) { 11187 case 64: 11188 Base = HA_VECT64; 11189 return true; 11190 case 128: 11191 Base = HA_VECT128; 11192 return true; 11193 default: 11194 return false; 11195 } 11196 } 11197 } 11198 11199 return (Members > 0 && Members <= 4); 11200 } 11201 11202 /// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate. 11203 bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( 11204 Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { 11205 if (getEffectiveCallingConv(CallConv, isVarArg) != 11206 CallingConv::ARM_AAPCS_VFP) 11207 return false; 11208 11209 HABaseType Base = HA_UNKNOWN; 11210 uint64_t Members = 0; 11211 bool result = isHomogeneousAggregate(Ty, Base, Members); 11212 DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump()); 11213 return result; 11214 }