LLVM API Documentation
00001 // 00002 // The LLVM Compiler Infrastructure 00003 // 00004 // This file is distributed under the University of Illinois Open Source 00005 // License. See LICENSE.TXT for details. 00006 // 00007 //===----------------------------------------------------------------------===// 00008 // 00009 // This file defines the interfaces that NVPTX uses to lower LLVM code into a 00010 // selection DAG. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "NVPTXISelLowering.h" 00015 #include "NVPTX.h" 00016 #include "NVPTXTargetMachine.h" 00017 #include "NVPTXTargetObjectFile.h" 00018 #include "NVPTXUtilities.h" 00019 #include "llvm/CodeGen/Analysis.h" 00020 #include "llvm/CodeGen/MachineFrameInfo.h" 00021 #include "llvm/CodeGen/MachineFunction.h" 00022 #include "llvm/CodeGen/MachineInstrBuilder.h" 00023 #include "llvm/CodeGen/MachineRegisterInfo.h" 00024 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 00025 #include "llvm/IR/CallSite.h" 00026 #include "llvm/IR/DerivedTypes.h" 00027 #include "llvm/IR/Function.h" 00028 #include "llvm/IR/GlobalValue.h" 00029 #include "llvm/IR/IntrinsicInst.h" 00030 #include "llvm/IR/Intrinsics.h" 00031 #include "llvm/IR/Module.h" 00032 #include "llvm/MC/MCSectionELF.h" 00033 #include "llvm/Support/CommandLine.h" 00034 #include "llvm/Support/Debug.h" 00035 #include "llvm/Support/ErrorHandling.h" 00036 #include "llvm/Support/MathExtras.h" 00037 #include "llvm/Support/raw_ostream.h" 00038 #include <sstream> 00039 00040 #undef DEBUG_TYPE 00041 #define DEBUG_TYPE "nvptx-lower" 00042 00043 using namespace llvm; 00044 00045 static unsigned int uniqueCallSite = 0; 00046 00047 static cl::opt<bool> sched4reg( 00048 "nvptx-sched4reg", 00049 cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); 00050 00051 static cl::opt<unsigned> 00052 FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, 00053 cl::desc("NVPTX Specific: FMA contraction (0: don't do it" 00054 " 1: do it 2: do it aggressively"), 00055 cl::init(2)); 00056 00057 static bool IsPTXVectorType(MVT VT) { 00058 switch (VT.SimpleTy) { 00059 default: 00060 return false; 00061 case MVT::v2i1: 00062 case MVT::v4i1: 00063 case MVT::v2i8: 00064 case MVT::v4i8: 00065 case MVT::v2i16: 00066 case MVT::v4i16: 00067 case MVT::v2i32: 00068 case MVT::v4i32: 00069 case MVT::v2i64: 00070 case MVT::v2f32: 00071 case MVT::v4f32: 00072 case MVT::v2f64: 00073 return true; 00074 } 00075 } 00076 00077 /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive 00078 /// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors 00079 /// into their primitive components. 00080 /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the 00081 /// same number of types as the Ins/Outs arrays in LowerFormalArguments, 00082 /// LowerCall, and LowerReturn. 00083 static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, 00084 SmallVectorImpl<EVT> &ValueVTs, 00085 SmallVectorImpl<uint64_t> *Offsets = nullptr, 00086 uint64_t StartingOffset = 0) { 00087 SmallVector<EVT, 16> TempVTs; 00088 SmallVector<uint64_t, 16> TempOffsets; 00089 00090 ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset); 00091 for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { 00092 EVT VT = TempVTs[i]; 00093 uint64_t Off = TempOffsets[i]; 00094 if (VT.isVector()) 00095 for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) { 00096 ValueVTs.push_back(VT.getVectorElementType()); 00097 if (Offsets) 00098 Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize()); 00099 } 00100 else { 00101 ValueVTs.push_back(VT); 00102 if (Offsets) 00103 Offsets->push_back(Off); 00104 } 00105 } 00106 } 00107 00108 // NVPTXTargetLowering Constructor. 00109 NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) 00110 : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM), 00111 nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { 00112 00113 // always lower memset, memcpy, and memmove intrinsics to load/store 00114 // instructions, rather 00115 // then generating calls to memset, mempcy or memmove. 00116 MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; 00117 MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; 00118 MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; 00119 00120 setBooleanContents(ZeroOrNegativeOneBooleanContent); 00121 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 00122 00123 // Jump is Expensive. Don't create extra control flow for 'and', 'or' 00124 // condition branches. 00125 setJumpIsExpensive(true); 00126 00127 // By default, use the Source scheduling 00128 if (sched4reg) 00129 setSchedulingPreference(Sched::RegPressure); 00130 else 00131 setSchedulingPreference(Sched::Source); 00132 00133 addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); 00134 addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); 00135 addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); 00136 addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); 00137 addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); 00138 addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); 00139 00140 // Operations not directly supported by NVPTX. 00141 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 00142 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 00143 setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); 00144 setOperationAction(ISD::SELECT_CC, MVT::i8, Expand); 00145 setOperationAction(ISD::SELECT_CC, MVT::i16, Expand); 00146 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 00147 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 00148 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 00149 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 00150 setOperationAction(ISD::BR_CC, MVT::i1, Expand); 00151 setOperationAction(ISD::BR_CC, MVT::i8, Expand); 00152 setOperationAction(ISD::BR_CC, MVT::i16, Expand); 00153 setOperationAction(ISD::BR_CC, MVT::i32, Expand); 00154 setOperationAction(ISD::BR_CC, MVT::i64, Expand); 00155 // Some SIGN_EXTEND_INREG can be done using cvt instruction. 00156 // For others we will expand to a SHL/SRA pair. 00157 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); 00158 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 00159 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); 00160 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 00161 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 00162 00163 setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom); 00164 setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom); 00165 setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom); 00166 setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom); 00167 setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); 00168 setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); 00169 00170 if (nvptxSubtarget.hasROT64()) { 00171 setOperationAction(ISD::ROTL, MVT::i64, Legal); 00172 setOperationAction(ISD::ROTR, MVT::i64, Legal); 00173 } else { 00174 setOperationAction(ISD::ROTL, MVT::i64, Expand); 00175 setOperationAction(ISD::ROTR, MVT::i64, Expand); 00176 } 00177 if (nvptxSubtarget.hasROT32()) { 00178 setOperationAction(ISD::ROTL, MVT::i32, Legal); 00179 setOperationAction(ISD::ROTR, MVT::i32, Legal); 00180 } else { 00181 setOperationAction(ISD::ROTL, MVT::i32, Expand); 00182 setOperationAction(ISD::ROTR, MVT::i32, Expand); 00183 } 00184 00185 setOperationAction(ISD::ROTL, MVT::i16, Expand); 00186 setOperationAction(ISD::ROTR, MVT::i16, Expand); 00187 setOperationAction(ISD::ROTL, MVT::i8, Expand); 00188 setOperationAction(ISD::ROTR, MVT::i8, Expand); 00189 setOperationAction(ISD::BSWAP, MVT::i16, Expand); 00190 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 00191 setOperationAction(ISD::BSWAP, MVT::i64, Expand); 00192 00193 // Indirect branch is not supported. 00194 // This also disables Jump Table creation. 00195 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 00196 setOperationAction(ISD::BRIND, MVT::Other, Expand); 00197 00198 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 00199 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 00200 00201 // We want to legalize constant related memmove and memcopy 00202 // intrinsics. 00203 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 00204 00205 // Turn FP extload into load/fextend 00206 setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); 00207 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 00208 // Turn FP truncstore into trunc + store. 00209 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 00210 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 00211 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 00212 00213 // PTX does not support load / store predicate registers 00214 setOperationAction(ISD::LOAD, MVT::i1, Custom); 00215 setOperationAction(ISD::STORE, MVT::i1, Custom); 00216 00217 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 00218 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); 00219 setTruncStoreAction(MVT::i64, MVT::i1, Expand); 00220 setTruncStoreAction(MVT::i32, MVT::i1, Expand); 00221 setTruncStoreAction(MVT::i16, MVT::i1, Expand); 00222 setTruncStoreAction(MVT::i8, MVT::i1, Expand); 00223 00224 // This is legal in NVPTX 00225 setOperationAction(ISD::ConstantFP, MVT::f64, Legal); 00226 setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 00227 00228 // TRAP can be lowered to PTX trap 00229 setOperationAction(ISD::TRAP, MVT::Other, Legal); 00230 00231 setOperationAction(ISD::ADDC, MVT::i64, Expand); 00232 setOperationAction(ISD::ADDE, MVT::i64, Expand); 00233 00234 // Register custom handling for vector loads/stores 00235 for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE; 00236 ++i) { 00237 MVT VT = (MVT::SimpleValueType) i; 00238 if (IsPTXVectorType(VT)) { 00239 setOperationAction(ISD::LOAD, VT, Custom); 00240 setOperationAction(ISD::STORE, VT, Custom); 00241 setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); 00242 } 00243 } 00244 00245 // Custom handling for i8 intrinsics 00246 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 00247 00248 setOperationAction(ISD::CTLZ, MVT::i16, Legal); 00249 setOperationAction(ISD::CTLZ, MVT::i32, Legal); 00250 setOperationAction(ISD::CTLZ, MVT::i64, Legal); 00251 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal); 00252 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal); 00253 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal); 00254 setOperationAction(ISD::CTTZ, MVT::i16, Expand); 00255 setOperationAction(ISD::CTTZ, MVT::i32, Expand); 00256 setOperationAction(ISD::CTTZ, MVT::i64, Expand); 00257 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); 00258 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 00259 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 00260 setOperationAction(ISD::CTPOP, MVT::i16, Legal); 00261 setOperationAction(ISD::CTPOP, MVT::i32, Legal); 00262 setOperationAction(ISD::CTPOP, MVT::i64, Legal); 00263 00264 // We have some custom DAG combine patterns for these nodes 00265 setTargetDAGCombine(ISD::ADD); 00266 setTargetDAGCombine(ISD::AND); 00267 setTargetDAGCombine(ISD::FADD); 00268 setTargetDAGCombine(ISD::MUL); 00269 setTargetDAGCombine(ISD::SHL); 00270 00271 // Now deduce the information based on the above mentioned 00272 // actions 00273 computeRegisterProperties(); 00274 } 00275 00276 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { 00277 switch (Opcode) { 00278 default: 00279 return nullptr; 00280 case NVPTXISD::CALL: 00281 return "NVPTXISD::CALL"; 00282 case NVPTXISD::RET_FLAG: 00283 return "NVPTXISD::RET_FLAG"; 00284 case NVPTXISD::Wrapper: 00285 return "NVPTXISD::Wrapper"; 00286 case NVPTXISD::DeclareParam: 00287 return "NVPTXISD::DeclareParam"; 00288 case NVPTXISD::DeclareScalarParam: 00289 return "NVPTXISD::DeclareScalarParam"; 00290 case NVPTXISD::DeclareRet: 00291 return "NVPTXISD::DeclareRet"; 00292 case NVPTXISD::DeclareRetParam: 00293 return "NVPTXISD::DeclareRetParam"; 00294 case NVPTXISD::PrintCall: 00295 return "NVPTXISD::PrintCall"; 00296 case NVPTXISD::LoadParam: 00297 return "NVPTXISD::LoadParam"; 00298 case NVPTXISD::LoadParamV2: 00299 return "NVPTXISD::LoadParamV2"; 00300 case NVPTXISD::LoadParamV4: 00301 return "NVPTXISD::LoadParamV4"; 00302 case NVPTXISD::StoreParam: 00303 return "NVPTXISD::StoreParam"; 00304 case NVPTXISD::StoreParamV2: 00305 return "NVPTXISD::StoreParamV2"; 00306 case NVPTXISD::StoreParamV4: 00307 return "NVPTXISD::StoreParamV4"; 00308 case NVPTXISD::StoreParamS32: 00309 return "NVPTXISD::StoreParamS32"; 00310 case NVPTXISD::StoreParamU32: 00311 return "NVPTXISD::StoreParamU32"; 00312 case NVPTXISD::CallArgBegin: 00313 return "NVPTXISD::CallArgBegin"; 00314 case NVPTXISD::CallArg: 00315 return "NVPTXISD::CallArg"; 00316 case NVPTXISD::LastCallArg: 00317 return "NVPTXISD::LastCallArg"; 00318 case NVPTXISD::CallArgEnd: 00319 return "NVPTXISD::CallArgEnd"; 00320 case NVPTXISD::CallVoid: 00321 return "NVPTXISD::CallVoid"; 00322 case NVPTXISD::CallVal: 00323 return "NVPTXISD::CallVal"; 00324 case NVPTXISD::CallSymbol: 00325 return "NVPTXISD::CallSymbol"; 00326 case NVPTXISD::Prototype: 00327 return "NVPTXISD::Prototype"; 00328 case NVPTXISD::MoveParam: 00329 return "NVPTXISD::MoveParam"; 00330 case NVPTXISD::StoreRetval: 00331 return "NVPTXISD::StoreRetval"; 00332 case NVPTXISD::StoreRetvalV2: 00333 return "NVPTXISD::StoreRetvalV2"; 00334 case NVPTXISD::StoreRetvalV4: 00335 return "NVPTXISD::StoreRetvalV4"; 00336 case NVPTXISD::PseudoUseParam: 00337 return "NVPTXISD::PseudoUseParam"; 00338 case NVPTXISD::RETURN: 00339 return "NVPTXISD::RETURN"; 00340 case NVPTXISD::CallSeqBegin: 00341 return "NVPTXISD::CallSeqBegin"; 00342 case NVPTXISD::CallSeqEnd: 00343 return "NVPTXISD::CallSeqEnd"; 00344 case NVPTXISD::CallPrototype: 00345 return "NVPTXISD::CallPrototype"; 00346 case NVPTXISD::LoadV2: 00347 return "NVPTXISD::LoadV2"; 00348 case NVPTXISD::LoadV4: 00349 return "NVPTXISD::LoadV4"; 00350 case NVPTXISD::LDGV2: 00351 return "NVPTXISD::LDGV2"; 00352 case NVPTXISD::LDGV4: 00353 return "NVPTXISD::LDGV4"; 00354 case NVPTXISD::LDUV2: 00355 return "NVPTXISD::LDUV2"; 00356 case NVPTXISD::LDUV4: 00357 return "NVPTXISD::LDUV4"; 00358 case NVPTXISD::StoreV2: 00359 return "NVPTXISD::StoreV2"; 00360 case NVPTXISD::StoreV4: 00361 return "NVPTXISD::StoreV4"; 00362 case NVPTXISD::FUN_SHFL_CLAMP: 00363 return "NVPTXISD::FUN_SHFL_CLAMP"; 00364 case NVPTXISD::FUN_SHFR_CLAMP: 00365 return "NVPTXISD::FUN_SHFR_CLAMP"; 00366 case NVPTXISD::IMAD: 00367 return "NVPTXISD::IMAD"; 00368 case NVPTXISD::MUL_WIDE_SIGNED: 00369 return "NVPTXISD::MUL_WIDE_SIGNED"; 00370 case NVPTXISD::MUL_WIDE_UNSIGNED: 00371 return "NVPTXISD::MUL_WIDE_UNSIGNED"; 00372 case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32"; 00373 case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; 00374 case NVPTXISD::Tex1DFloatFloatLevel: 00375 return "NVPTXISD::Tex1DFloatFloatLevel"; 00376 case NVPTXISD::Tex1DFloatFloatGrad: 00377 return "NVPTXISD::Tex1DFloatFloatGrad"; 00378 case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32"; 00379 case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float"; 00380 case NVPTXISD::Tex1DS32FloatLevel: 00381 return "NVPTXISD::Tex1DS32FloatLevel"; 00382 case NVPTXISD::Tex1DS32FloatGrad: 00383 return "NVPTXISD::Tex1DS32FloatGrad"; 00384 case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32"; 00385 case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float"; 00386 case NVPTXISD::Tex1DU32FloatLevel: 00387 return "NVPTXISD::Tex1DU32FloatLevel"; 00388 case NVPTXISD::Tex1DU32FloatGrad: 00389 return "NVPTXISD::Tex1DU32FloatGrad"; 00390 case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32"; 00391 case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat"; 00392 case NVPTXISD::Tex1DArrayFloatFloatLevel: 00393 return "NVPTXISD::Tex1DArrayFloatFloatLevel"; 00394 case NVPTXISD::Tex1DArrayFloatFloatGrad: 00395 return "NVPTXISD::Tex1DArrayFloatFloatGrad"; 00396 case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32"; 00397 case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float"; 00398 case NVPTXISD::Tex1DArrayS32FloatLevel: 00399 return "NVPTXISD::Tex1DArrayS32FloatLevel"; 00400 case NVPTXISD::Tex1DArrayS32FloatGrad: 00401 return "NVPTXISD::Tex1DArrayS32FloatGrad"; 00402 case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32"; 00403 case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float"; 00404 case NVPTXISD::Tex1DArrayU32FloatLevel: 00405 return "NVPTXISD::Tex1DArrayU32FloatLevel"; 00406 case NVPTXISD::Tex1DArrayU32FloatGrad: 00407 return "NVPTXISD::Tex1DArrayU32FloatGrad"; 00408 case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32"; 00409 case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; 00410 case NVPTXISD::Tex2DFloatFloatLevel: 00411 return "NVPTXISD::Tex2DFloatFloatLevel"; 00412 case NVPTXISD::Tex2DFloatFloatGrad: 00413 return "NVPTXISD::Tex2DFloatFloatGrad"; 00414 case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32"; 00415 case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float"; 00416 case NVPTXISD::Tex2DS32FloatLevel: 00417 return "NVPTXISD::Tex2DS32FloatLevel"; 00418 case NVPTXISD::Tex2DS32FloatGrad: 00419 return "NVPTXISD::Tex2DS32FloatGrad"; 00420 case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32"; 00421 case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float"; 00422 case NVPTXISD::Tex2DU32FloatLevel: 00423 return "NVPTXISD::Tex2DU32FloatLevel"; 00424 case NVPTXISD::Tex2DU32FloatGrad: 00425 return "NVPTXISD::Tex2DU32FloatGrad"; 00426 case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32"; 00427 case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; 00428 case NVPTXISD::Tex2DArrayFloatFloatLevel: 00429 return "NVPTXISD::Tex2DArrayFloatFloatLevel"; 00430 case NVPTXISD::Tex2DArrayFloatFloatGrad: 00431 return "NVPTXISD::Tex2DArrayFloatFloatGrad"; 00432 case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32"; 00433 case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float"; 00434 case NVPTXISD::Tex2DArrayS32FloatLevel: 00435 return "NVPTXISD::Tex2DArrayS32FloatLevel"; 00436 case NVPTXISD::Tex2DArrayS32FloatGrad: 00437 return "NVPTXISD::Tex2DArrayS32FloatGrad"; 00438 case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32"; 00439 case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float"; 00440 case NVPTXISD::Tex2DArrayU32FloatLevel: 00441 return "NVPTXISD::Tex2DArrayU32FloatLevel"; 00442 case NVPTXISD::Tex2DArrayU32FloatGrad: 00443 return "NVPTXISD::Tex2DArrayU32FloatGrad"; 00444 case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32"; 00445 case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; 00446 case NVPTXISD::Tex3DFloatFloatLevel: 00447 return "NVPTXISD::Tex3DFloatFloatLevel"; 00448 case NVPTXISD::Tex3DFloatFloatGrad: 00449 return "NVPTXISD::Tex3DFloatFloatGrad"; 00450 case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32"; 00451 case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float"; 00452 case NVPTXISD::Tex3DS32FloatLevel: 00453 return "NVPTXISD::Tex3DS32FloatLevel"; 00454 case NVPTXISD::Tex3DS32FloatGrad: 00455 return "NVPTXISD::Tex3DS32FloatGrad"; 00456 case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32"; 00457 case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float"; 00458 case NVPTXISD::Tex3DU32FloatLevel: 00459 return "NVPTXISD::Tex3DU32FloatLevel"; 00460 case NVPTXISD::Tex3DU32FloatGrad: 00461 return "NVPTXISD::Tex3DU32FloatGrad"; 00462 case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat"; 00463 case NVPTXISD::TexCubeFloatFloatLevel: 00464 return "NVPTXISD::TexCubeFloatFloatLevel"; 00465 case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float"; 00466 case NVPTXISD::TexCubeS32FloatLevel: 00467 return "NVPTXISD::TexCubeS32FloatLevel"; 00468 case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float"; 00469 case NVPTXISD::TexCubeU32FloatLevel: 00470 return "NVPTXISD::TexCubeU32FloatLevel"; 00471 case NVPTXISD::TexCubeArrayFloatFloat: 00472 return "NVPTXISD::TexCubeArrayFloatFloat"; 00473 case NVPTXISD::TexCubeArrayFloatFloatLevel: 00474 return "NVPTXISD::TexCubeArrayFloatFloatLevel"; 00475 case NVPTXISD::TexCubeArrayS32Float: 00476 return "NVPTXISD::TexCubeArrayS32Float"; 00477 case NVPTXISD::TexCubeArrayS32FloatLevel: 00478 return "NVPTXISD::TexCubeArrayS32FloatLevel"; 00479 case NVPTXISD::TexCubeArrayU32Float: 00480 return "NVPTXISD::TexCubeArrayU32Float"; 00481 case NVPTXISD::TexCubeArrayU32FloatLevel: 00482 return "NVPTXISD::TexCubeArrayU32FloatLevel"; 00483 case NVPTXISD::Tld4R2DFloatFloat: 00484 return "NVPTXISD::Tld4R2DFloatFloat"; 00485 case NVPTXISD::Tld4G2DFloatFloat: 00486 return "NVPTXISD::Tld4G2DFloatFloat"; 00487 case NVPTXISD::Tld4B2DFloatFloat: 00488 return "NVPTXISD::Tld4B2DFloatFloat"; 00489 case NVPTXISD::Tld4A2DFloatFloat: 00490 return "NVPTXISD::Tld4A2DFloatFloat"; 00491 case NVPTXISD::Tld4R2DS64Float: 00492 return "NVPTXISD::Tld4R2DS64Float"; 00493 case NVPTXISD::Tld4G2DS64Float: 00494 return "NVPTXISD::Tld4G2DS64Float"; 00495 case NVPTXISD::Tld4B2DS64Float: 00496 return "NVPTXISD::Tld4B2DS64Float"; 00497 case NVPTXISD::Tld4A2DS64Float: 00498 return "NVPTXISD::Tld4A2DS64Float"; 00499 case NVPTXISD::Tld4R2DU64Float: 00500 return "NVPTXISD::Tld4R2DU64Float"; 00501 case NVPTXISD::Tld4G2DU64Float: 00502 return "NVPTXISD::Tld4G2DU64Float"; 00503 case NVPTXISD::Tld4B2DU64Float: 00504 return "NVPTXISD::Tld4B2DU64Float"; 00505 case NVPTXISD::Tld4A2DU64Float: 00506 return "NVPTXISD::Tld4A2DU64Float"; 00507 00508 case NVPTXISD::TexUnified1DFloatS32: 00509 return "NVPTXISD::TexUnified1DFloatS32"; 00510 case NVPTXISD::TexUnified1DFloatFloat: 00511 return "NVPTXISD::TexUnified1DFloatFloat"; 00512 case NVPTXISD::TexUnified1DFloatFloatLevel: 00513 return "NVPTXISD::TexUnified1DFloatFloatLevel"; 00514 case NVPTXISD::TexUnified1DFloatFloatGrad: 00515 return "NVPTXISD::TexUnified1DFloatFloatGrad"; 00516 case NVPTXISD::TexUnified1DS32S32: 00517 return "NVPTXISD::TexUnified1DS32S32"; 00518 case NVPTXISD::TexUnified1DS32Float: 00519 return "NVPTXISD::TexUnified1DS32Float"; 00520 case NVPTXISD::TexUnified1DS32FloatLevel: 00521 return "NVPTXISD::TexUnified1DS32FloatLevel"; 00522 case NVPTXISD::TexUnified1DS32FloatGrad: 00523 return "NVPTXISD::TexUnified1DS32FloatGrad"; 00524 case NVPTXISD::TexUnified1DU32S32: 00525 return "NVPTXISD::TexUnified1DU32S32"; 00526 case NVPTXISD::TexUnified1DU32Float: 00527 return "NVPTXISD::TexUnified1DU32Float"; 00528 case NVPTXISD::TexUnified1DU32FloatLevel: 00529 return "NVPTXISD::TexUnified1DU32FloatLevel"; 00530 case NVPTXISD::TexUnified1DU32FloatGrad: 00531 return "NVPTXISD::TexUnified1DU32FloatGrad"; 00532 case NVPTXISD::TexUnified1DArrayFloatS32: 00533 return "NVPTXISD::TexUnified1DArrayFloatS32"; 00534 case NVPTXISD::TexUnified1DArrayFloatFloat: 00535 return "NVPTXISD::TexUnified1DArrayFloatFloat"; 00536 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 00537 return "NVPTXISD::TexUnified1DArrayFloatFloatLevel"; 00538 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 00539 return "NVPTXISD::TexUnified1DArrayFloatFloatGrad"; 00540 case NVPTXISD::TexUnified1DArrayS32S32: 00541 return "NVPTXISD::TexUnified1DArrayS32S32"; 00542 case NVPTXISD::TexUnified1DArrayS32Float: 00543 return "NVPTXISD::TexUnified1DArrayS32Float"; 00544 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 00545 return "NVPTXISD::TexUnified1DArrayS32FloatLevel"; 00546 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 00547 return "NVPTXISD::TexUnified1DArrayS32FloatGrad"; 00548 case NVPTXISD::TexUnified1DArrayU32S32: 00549 return "NVPTXISD::TexUnified1DArrayU32S32"; 00550 case NVPTXISD::TexUnified1DArrayU32Float: 00551 return "NVPTXISD::TexUnified1DArrayU32Float"; 00552 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 00553 return "NVPTXISD::TexUnified1DArrayU32FloatLevel"; 00554 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 00555 return "NVPTXISD::TexUnified1DArrayU32FloatGrad"; 00556 case NVPTXISD::TexUnified2DFloatS32: 00557 return "NVPTXISD::TexUnified2DFloatS32"; 00558 case NVPTXISD::TexUnified2DFloatFloat: 00559 return "NVPTXISD::TexUnified2DFloatFloat"; 00560 case NVPTXISD::TexUnified2DFloatFloatLevel: 00561 return "NVPTXISD::TexUnified2DFloatFloatLevel"; 00562 case NVPTXISD::TexUnified2DFloatFloatGrad: 00563 return "NVPTXISD::TexUnified2DFloatFloatGrad"; 00564 case NVPTXISD::TexUnified2DS32S32: 00565 return "NVPTXISD::TexUnified2DS32S32"; 00566 case NVPTXISD::TexUnified2DS32Float: 00567 return "NVPTXISD::TexUnified2DS32Float"; 00568 case NVPTXISD::TexUnified2DS32FloatLevel: 00569 return "NVPTXISD::TexUnified2DS32FloatLevel"; 00570 case NVPTXISD::TexUnified2DS32FloatGrad: 00571 return "NVPTXISD::TexUnified2DS32FloatGrad"; 00572 case NVPTXISD::TexUnified2DU32S32: 00573 return "NVPTXISD::TexUnified2DU32S32"; 00574 case NVPTXISD::TexUnified2DU32Float: 00575 return "NVPTXISD::TexUnified2DU32Float"; 00576 case NVPTXISD::TexUnified2DU32FloatLevel: 00577 return "NVPTXISD::TexUnified2DU32FloatLevel"; 00578 case NVPTXISD::TexUnified2DU32FloatGrad: 00579 return "NVPTXISD::TexUnified2DU32FloatGrad"; 00580 case NVPTXISD::TexUnified2DArrayFloatS32: 00581 return "NVPTXISD::TexUnified2DArrayFloatS32"; 00582 case NVPTXISD::TexUnified2DArrayFloatFloat: 00583 return "NVPTXISD::TexUnified2DArrayFloatFloat"; 00584 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 00585 return "NVPTXISD::TexUnified2DArrayFloatFloatLevel"; 00586 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 00587 return "NVPTXISD::TexUnified2DArrayFloatFloatGrad"; 00588 case NVPTXISD::TexUnified2DArrayS32S32: 00589 return "NVPTXISD::TexUnified2DArrayS32S32"; 00590 case NVPTXISD::TexUnified2DArrayS32Float: 00591 return "NVPTXISD::TexUnified2DArrayS32Float"; 00592 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 00593 return "NVPTXISD::TexUnified2DArrayS32FloatLevel"; 00594 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 00595 return "NVPTXISD::TexUnified2DArrayS32FloatGrad"; 00596 case NVPTXISD::TexUnified2DArrayU32S32: 00597 return "NVPTXISD::TexUnified2DArrayU32S32"; 00598 case NVPTXISD::TexUnified2DArrayU32Float: 00599 return "NVPTXISD::TexUnified2DArrayU32Float"; 00600 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 00601 return "NVPTXISD::TexUnified2DArrayU32FloatLevel"; 00602 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 00603 return "NVPTXISD::TexUnified2DArrayU32FloatGrad"; 00604 case NVPTXISD::TexUnified3DFloatS32: 00605 return "NVPTXISD::TexUnified3DFloatS32"; 00606 case NVPTXISD::TexUnified3DFloatFloat: 00607 return "NVPTXISD::TexUnified3DFloatFloat"; 00608 case NVPTXISD::TexUnified3DFloatFloatLevel: 00609 return "NVPTXISD::TexUnified3DFloatFloatLevel"; 00610 case NVPTXISD::TexUnified3DFloatFloatGrad: 00611 return "NVPTXISD::TexUnified3DFloatFloatGrad"; 00612 case NVPTXISD::TexUnified3DS32S32: 00613 return "NVPTXISD::TexUnified3DS32S32"; 00614 case NVPTXISD::TexUnified3DS32Float: 00615 return "NVPTXISD::TexUnified3DS32Float"; 00616 case NVPTXISD::TexUnified3DS32FloatLevel: 00617 return "NVPTXISD::TexUnified3DS32FloatLevel"; 00618 case NVPTXISD::TexUnified3DS32FloatGrad: 00619 return "NVPTXISD::TexUnified3DS32FloatGrad"; 00620 case NVPTXISD::TexUnified3DU32S32: 00621 return "NVPTXISD::TexUnified3DU32S32"; 00622 case NVPTXISD::TexUnified3DU32Float: 00623 return "NVPTXISD::TexUnified3DU32Float"; 00624 case NVPTXISD::TexUnified3DU32FloatLevel: 00625 return "NVPTXISD::TexUnified3DU32FloatLevel"; 00626 case NVPTXISD::TexUnified3DU32FloatGrad: 00627 return "NVPTXISD::TexUnified3DU32FloatGrad"; 00628 case NVPTXISD::TexUnifiedCubeFloatFloat: 00629 return "NVPTXISD::TexUnifiedCubeFloatFloat"; 00630 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 00631 return "NVPTXISD::TexUnifiedCubeFloatFloatLevel"; 00632 case NVPTXISD::TexUnifiedCubeS32Float: 00633 return "NVPTXISD::TexUnifiedCubeS32Float"; 00634 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 00635 return "NVPTXISD::TexUnifiedCubeS32FloatLevel"; 00636 case NVPTXISD::TexUnifiedCubeU32Float: 00637 return "NVPTXISD::TexUnifiedCubeU32Float"; 00638 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 00639 return "NVPTXISD::TexUnifiedCubeU32FloatLevel"; 00640 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 00641 return "NVPTXISD::TexUnifiedCubeArrayFloatFloat"; 00642 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 00643 return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel"; 00644 case NVPTXISD::TexUnifiedCubeArrayS32Float: 00645 return "NVPTXISD::TexUnifiedCubeArrayS32Float"; 00646 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 00647 return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel"; 00648 case NVPTXISD::TexUnifiedCubeArrayU32Float: 00649 return "NVPTXISD::TexUnifiedCubeArrayU32Float"; 00650 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 00651 return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel"; 00652 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 00653 return "NVPTXISD::Tld4UnifiedR2DFloatFloat"; 00654 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 00655 return "NVPTXISD::Tld4UnifiedG2DFloatFloat"; 00656 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 00657 return "NVPTXISD::Tld4UnifiedB2DFloatFloat"; 00658 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 00659 return "NVPTXISD::Tld4UnifiedA2DFloatFloat"; 00660 case NVPTXISD::Tld4UnifiedR2DS64Float: 00661 return "NVPTXISD::Tld4UnifiedR2DS64Float"; 00662 case NVPTXISD::Tld4UnifiedG2DS64Float: 00663 return "NVPTXISD::Tld4UnifiedG2DS64Float"; 00664 case NVPTXISD::Tld4UnifiedB2DS64Float: 00665 return "NVPTXISD::Tld4UnifiedB2DS64Float"; 00666 case NVPTXISD::Tld4UnifiedA2DS64Float: 00667 return "NVPTXISD::Tld4UnifiedA2DS64Float"; 00668 case NVPTXISD::Tld4UnifiedR2DU64Float: 00669 return "NVPTXISD::Tld4UnifiedR2DU64Float"; 00670 case NVPTXISD::Tld4UnifiedG2DU64Float: 00671 return "NVPTXISD::Tld4UnifiedG2DU64Float"; 00672 case NVPTXISD::Tld4UnifiedB2DU64Float: 00673 return "NVPTXISD::Tld4UnifiedB2DU64Float"; 00674 case NVPTXISD::Tld4UnifiedA2DU64Float: 00675 return "NVPTXISD::Tld4UnifiedA2DU64Float"; 00676 00677 case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp"; 00678 case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp"; 00679 case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp"; 00680 case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp"; 00681 case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp"; 00682 case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp"; 00683 case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp"; 00684 case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp"; 00685 case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp"; 00686 case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp"; 00687 case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp"; 00688 00689 case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp"; 00690 case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp"; 00691 case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp"; 00692 case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp"; 00693 case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp"; 00694 case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp"; 00695 case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp"; 00696 case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp"; 00697 case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp"; 00698 case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp"; 00699 case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp"; 00700 00701 case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp"; 00702 case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp"; 00703 case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp"; 00704 case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp"; 00705 case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp"; 00706 case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp"; 00707 case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp"; 00708 case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp"; 00709 case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp"; 00710 case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp"; 00711 case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp"; 00712 00713 case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp"; 00714 case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp"; 00715 case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp"; 00716 case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp"; 00717 case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp"; 00718 case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp"; 00719 case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp"; 00720 case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp"; 00721 case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp"; 00722 case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp"; 00723 case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp"; 00724 00725 case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp"; 00726 case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp"; 00727 case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp"; 00728 case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp"; 00729 case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp"; 00730 case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp"; 00731 case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp"; 00732 case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp"; 00733 case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp"; 00734 case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp"; 00735 case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp"; 00736 00737 case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; 00738 case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; 00739 case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; 00740 case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap"; 00741 case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; 00742 case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; 00743 case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; 00744 case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap"; 00745 case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; 00746 case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; 00747 case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; 00748 00749 case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; 00750 case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; 00751 case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; 00752 case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap"; 00753 case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; 00754 case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; 00755 case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; 00756 case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap"; 00757 case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; 00758 case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; 00759 case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; 00760 00761 case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; 00762 case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; 00763 case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; 00764 case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap"; 00765 case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; 00766 case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; 00767 case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; 00768 case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap"; 00769 case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; 00770 case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; 00771 case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; 00772 00773 case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; 00774 case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; 00775 case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; 00776 case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap"; 00777 case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; 00778 case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; 00779 case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; 00780 case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap"; 00781 case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; 00782 case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; 00783 case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; 00784 00785 case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; 00786 case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; 00787 case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; 00788 case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap"; 00789 case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; 00790 case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; 00791 case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; 00792 case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap"; 00793 case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; 00794 case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; 00795 case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; 00796 00797 case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero"; 00798 case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero"; 00799 case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero"; 00800 case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero"; 00801 case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero"; 00802 case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero"; 00803 case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero"; 00804 case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero"; 00805 case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero"; 00806 case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero"; 00807 case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero"; 00808 00809 case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero"; 00810 case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero"; 00811 case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero"; 00812 case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero"; 00813 case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero"; 00814 case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero"; 00815 case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero"; 00816 case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero"; 00817 case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero"; 00818 case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero"; 00819 case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero"; 00820 00821 case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero"; 00822 case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero"; 00823 case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero"; 00824 case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero"; 00825 case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero"; 00826 case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero"; 00827 case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero"; 00828 case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero"; 00829 case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero"; 00830 case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero"; 00831 case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero"; 00832 00833 case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero"; 00834 case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero"; 00835 case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero"; 00836 case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero"; 00837 case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero"; 00838 case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero"; 00839 case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero"; 00840 case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero"; 00841 case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero"; 00842 case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero"; 00843 case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero"; 00844 00845 case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero"; 00846 case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero"; 00847 case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero"; 00848 case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero"; 00849 case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero"; 00850 case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero"; 00851 case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero"; 00852 case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero"; 00853 case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero"; 00854 case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; 00855 case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; 00856 } 00857 } 00858 00859 TargetLoweringBase::LegalizeTypeAction 00860 NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const { 00861 if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1) 00862 return TypeSplitVector; 00863 00864 return TargetLoweringBase::getPreferredVectorAction(VT); 00865 } 00866 00867 SDValue 00868 NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { 00869 SDLoc dl(Op); 00870 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 00871 Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); 00872 return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); 00873 } 00874 00875 std::string 00876 NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, 00877 const SmallVectorImpl<ISD::OutputArg> &Outs, 00878 unsigned retAlignment, 00879 const ImmutableCallSite *CS) const { 00880 00881 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 00882 assert(isABI && "Non-ABI compilation is not supported"); 00883 if (!isABI) 00884 return ""; 00885 00886 std::stringstream O; 00887 O << "prototype_" << uniqueCallSite << " : .callprototype "; 00888 00889 if (retTy->getTypeID() == Type::VoidTyID) { 00890 O << "()"; 00891 } else { 00892 O << "("; 00893 if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) { 00894 unsigned size = 0; 00895 if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { 00896 size = ITy->getBitWidth(); 00897 if (size < 32) 00898 size = 32; 00899 } else { 00900 assert(retTy->isFloatingPointTy() && 00901 "Floating point type expected here"); 00902 size = retTy->getPrimitiveSizeInBits(); 00903 } 00904 00905 O << ".param .b" << size << " _"; 00906 } else if (isa<PointerType>(retTy)) { 00907 O << ".param .b" << getPointerTy().getSizeInBits() << " _"; 00908 } else { 00909 if((retTy->getTypeID() == Type::StructTyID) || 00910 isa<VectorType>(retTy)) { 00911 O << ".param .align " 00912 << retAlignment 00913 << " .b8 _[" 00914 << getDataLayout()->getTypeAllocSize(retTy) << "]"; 00915 } else { 00916 assert(false && "Unknown return type"); 00917 } 00918 } 00919 O << ") "; 00920 } 00921 O << "_ ("; 00922 00923 bool first = true; 00924 MVT thePointerTy = getPointerTy(); 00925 00926 unsigned OIdx = 0; 00927 for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { 00928 Type *Ty = Args[i].Ty; 00929 if (!first) { 00930 O << ", "; 00931 } 00932 first = false; 00933 00934 if (Outs[OIdx].Flags.isByVal() == false) { 00935 if (Ty->isAggregateType() || Ty->isVectorTy()) { 00936 unsigned align = 0; 00937 const CallInst *CallI = cast<CallInst>(CS->getInstruction()); 00938 const DataLayout *TD = getDataLayout(); 00939 // +1 because index 0 is reserved for return type alignment 00940 if (!llvm::getAlign(*CallI, i + 1, align)) 00941 align = TD->getABITypeAlignment(Ty); 00942 unsigned sz = TD->getTypeAllocSize(Ty); 00943 O << ".param .align " << align << " .b8 "; 00944 O << "_"; 00945 O << "[" << sz << "]"; 00946 // update the index for Outs 00947 SmallVector<EVT, 16> vtparts; 00948 ComputeValueVTs(*this, Ty, vtparts); 00949 if (unsigned len = vtparts.size()) 00950 OIdx += len - 1; 00951 continue; 00952 } 00953 // i8 types in IR will be i16 types in SDAG 00954 assert((getValueType(Ty) == Outs[OIdx].VT || 00955 (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && 00956 "type mismatch between callee prototype and arguments"); 00957 // scalar type 00958 unsigned sz = 0; 00959 if (isa<IntegerType>(Ty)) { 00960 sz = cast<IntegerType>(Ty)->getBitWidth(); 00961 if (sz < 32) 00962 sz = 32; 00963 } else if (isa<PointerType>(Ty)) 00964 sz = thePointerTy.getSizeInBits(); 00965 else 00966 sz = Ty->getPrimitiveSizeInBits(); 00967 O << ".param .b" << sz << " "; 00968 O << "_"; 00969 continue; 00970 } 00971 const PointerType *PTy = dyn_cast<PointerType>(Ty); 00972 assert(PTy && "Param with byval attribute should be a pointer type"); 00973 Type *ETy = PTy->getElementType(); 00974 00975 unsigned align = Outs[OIdx].Flags.getByValAlign(); 00976 unsigned sz = getDataLayout()->getTypeAllocSize(ETy); 00977 O << ".param .align " << align << " .b8 "; 00978 O << "_"; 00979 O << "[" << sz << "]"; 00980 } 00981 O << ");"; 00982 return O.str(); 00983 } 00984 00985 unsigned 00986 NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, 00987 const ImmutableCallSite *CS, 00988 Type *Ty, 00989 unsigned Idx) const { 00990 const DataLayout *TD = getDataLayout(); 00991 unsigned Align = 0; 00992 const Value *DirectCallee = CS->getCalledFunction(); 00993 00994 if (!DirectCallee) { 00995 // We don't have a direct function symbol, but that may be because of 00996 // constant cast instructions in the call. 00997 const Instruction *CalleeI = CS->getInstruction(); 00998 assert(CalleeI && "Call target is not a function or derived value?"); 00999 01000 // With bitcast'd call targets, the instruction will be the call 01001 if (isa<CallInst>(CalleeI)) { 01002 // Check if we have call alignment metadata 01003 if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align)) 01004 return Align; 01005 01006 const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue(); 01007 // Ignore any bitcast instructions 01008 while(isa<ConstantExpr>(CalleeV)) { 01009 const ConstantExpr *CE = cast<ConstantExpr>(CalleeV); 01010 if (!CE->isCast()) 01011 break; 01012 // Look through the bitcast 01013 CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0); 01014 } 01015 01016 // We have now looked past all of the bitcasts. Do we finally have a 01017 // Function? 01018 if (isa<Function>(CalleeV)) 01019 DirectCallee = CalleeV; 01020 } 01021 } 01022 01023 // Check for function alignment information if we found that the 01024 // ultimate target is a Function 01025 if (DirectCallee) 01026 if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align)) 01027 return Align; 01028 01029 // Call is indirect or alignment information is not available, fall back to 01030 // the ABI type alignment 01031 return TD->getABITypeAlignment(Ty); 01032 } 01033 01034 SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 01035 SmallVectorImpl<SDValue> &InVals) const { 01036 SelectionDAG &DAG = CLI.DAG; 01037 SDLoc dl = CLI.DL; 01038 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 01039 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 01040 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 01041 SDValue Chain = CLI.Chain; 01042 SDValue Callee = CLI.Callee; 01043 bool &isTailCall = CLI.IsTailCall; 01044 ArgListTy &Args = CLI.getArgs(); 01045 Type *retTy = CLI.RetTy; 01046 ImmutableCallSite *CS = CLI.CS; 01047 01048 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 01049 assert(isABI && "Non-ABI compilation is not supported"); 01050 if (!isABI) 01051 return Chain; 01052 const DataLayout *TD = getDataLayout(); 01053 MachineFunction &MF = DAG.getMachineFunction(); 01054 const Function *F = MF.getFunction(); 01055 01056 SDValue tempChain = Chain; 01057 Chain = 01058 DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), 01059 dl); 01060 SDValue InFlag = Chain.getValue(1); 01061 01062 unsigned paramCount = 0; 01063 // Args.size() and Outs.size() need not match. 01064 // Outs.size() will be larger 01065 // * if there is an aggregate argument with multiple fields (each field 01066 // showing up separately in Outs) 01067 // * if there is a vector argument with more than typical vector-length 01068 // elements (generally if more than 4) where each vector element is 01069 // individually present in Outs. 01070 // So a different index should be used for indexing into Outs/OutVals. 01071 // See similar issue in LowerFormalArguments. 01072 unsigned OIdx = 0; 01073 // Declare the .params or .reg need to pass values 01074 // to the function 01075 for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { 01076 EVT VT = Outs[OIdx].VT; 01077 Type *Ty = Args[i].Ty; 01078 01079 if (Outs[OIdx].Flags.isByVal() == false) { 01080 if (Ty->isAggregateType()) { 01081 // aggregate 01082 SmallVector<EVT, 16> vtparts; 01083 SmallVector<uint64_t, 16> Offsets; 01084 ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0); 01085 01086 unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); 01087 // declare .param .align <align> .b8 .param<n>[<size>]; 01088 unsigned sz = TD->getTypeAllocSize(Ty); 01089 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01090 SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), 01091 DAG.getConstant(paramCount, MVT::i32), 01092 DAG.getConstant(sz, MVT::i32), InFlag }; 01093 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 01094 DeclareParamOps); 01095 InFlag = Chain.getValue(1); 01096 for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 01097 EVT elemtype = vtparts[j]; 01098 unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]); 01099 if (elemtype.isInteger() && (sz < 8)) 01100 sz = 8; 01101 SDValue StVal = OutVals[OIdx]; 01102 if (elemtype.getSizeInBits() < 16) { 01103 StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); 01104 } 01105 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01106 SDValue CopyParamOps[] = { Chain, 01107 DAG.getConstant(paramCount, MVT::i32), 01108 DAG.getConstant(Offsets[j], MVT::i32), 01109 StVal, InFlag }; 01110 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, 01111 CopyParamVTs, CopyParamOps, 01112 elemtype, MachinePointerInfo(), 01113 ArgAlign); 01114 InFlag = Chain.getValue(1); 01115 ++OIdx; 01116 } 01117 if (vtparts.size() > 0) 01118 --OIdx; 01119 ++paramCount; 01120 continue; 01121 } 01122 if (Ty->isVectorTy()) { 01123 EVT ObjectVT = getValueType(Ty); 01124 unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); 01125 // declare .param .align <align> .b8 .param<n>[<size>]; 01126 unsigned sz = TD->getTypeAllocSize(Ty); 01127 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01128 SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), 01129 DAG.getConstant(paramCount, MVT::i32), 01130 DAG.getConstant(sz, MVT::i32), InFlag }; 01131 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 01132 DeclareParamOps); 01133 InFlag = Chain.getValue(1); 01134 unsigned NumElts = ObjectVT.getVectorNumElements(); 01135 EVT EltVT = ObjectVT.getVectorElementType(); 01136 EVT MemVT = EltVT; 01137 bool NeedExtend = false; 01138 if (EltVT.getSizeInBits() < 16) { 01139 NeedExtend = true; 01140 EltVT = MVT::i16; 01141 } 01142 01143 // V1 store 01144 if (NumElts == 1) { 01145 SDValue Elt = OutVals[OIdx++]; 01146 if (NeedExtend) 01147 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt); 01148 01149 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01150 SDValue CopyParamOps[] = { Chain, 01151 DAG.getConstant(paramCount, MVT::i32), 01152 DAG.getConstant(0, MVT::i32), Elt, 01153 InFlag }; 01154 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, 01155 CopyParamVTs, CopyParamOps, 01156 MemVT, MachinePointerInfo()); 01157 InFlag = Chain.getValue(1); 01158 } else if (NumElts == 2) { 01159 SDValue Elt0 = OutVals[OIdx++]; 01160 SDValue Elt1 = OutVals[OIdx++]; 01161 if (NeedExtend) { 01162 Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0); 01163 Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1); 01164 } 01165 01166 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01167 SDValue CopyParamOps[] = { Chain, 01168 DAG.getConstant(paramCount, MVT::i32), 01169 DAG.getConstant(0, MVT::i32), Elt0, Elt1, 01170 InFlag }; 01171 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl, 01172 CopyParamVTs, CopyParamOps, 01173 MemVT, MachinePointerInfo()); 01174 InFlag = Chain.getValue(1); 01175 } else { 01176 unsigned curOffset = 0; 01177 // V4 stores 01178 // We have at least 4 elements (<3 x Ty> expands to 4 elements) and 01179 // the 01180 // vector will be expanded to a power of 2 elements, so we know we can 01181 // always round up to the next multiple of 4 when creating the vector 01182 // stores. 01183 // e.g. 4 elem => 1 st.v4 01184 // 6 elem => 2 st.v4 01185 // 8 elem => 2 st.v4 01186 // 11 elem => 3 st.v4 01187 unsigned VecSize = 4; 01188 if (EltVT.getSizeInBits() == 64) 01189 VecSize = 2; 01190 01191 // This is potentially only part of a vector, so assume all elements 01192 // are packed together. 01193 unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize; 01194 01195 for (unsigned i = 0; i < NumElts; i += VecSize) { 01196 // Get values 01197 SDValue StoreVal; 01198 SmallVector<SDValue, 8> Ops; 01199 Ops.push_back(Chain); 01200 Ops.push_back(DAG.getConstant(paramCount, MVT::i32)); 01201 Ops.push_back(DAG.getConstant(curOffset, MVT::i32)); 01202 01203 unsigned Opc = NVPTXISD::StoreParamV2; 01204 01205 StoreVal = OutVals[OIdx++]; 01206 if (NeedExtend) 01207 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 01208 Ops.push_back(StoreVal); 01209 01210 if (i + 1 < NumElts) { 01211 StoreVal = OutVals[OIdx++]; 01212 if (NeedExtend) 01213 StoreVal = 01214 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 01215 } else { 01216 StoreVal = DAG.getUNDEF(EltVT); 01217 } 01218 Ops.push_back(StoreVal); 01219 01220 if (VecSize == 4) { 01221 Opc = NVPTXISD::StoreParamV4; 01222 if (i + 2 < NumElts) { 01223 StoreVal = OutVals[OIdx++]; 01224 if (NeedExtend) 01225 StoreVal = 01226 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 01227 } else { 01228 StoreVal = DAG.getUNDEF(EltVT); 01229 } 01230 Ops.push_back(StoreVal); 01231 01232 if (i + 3 < NumElts) { 01233 StoreVal = OutVals[OIdx++]; 01234 if (NeedExtend) 01235 StoreVal = 01236 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 01237 } else { 01238 StoreVal = DAG.getUNDEF(EltVT); 01239 } 01240 Ops.push_back(StoreVal); 01241 } 01242 01243 Ops.push_back(InFlag); 01244 01245 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01246 Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops, 01247 MemVT, MachinePointerInfo()); 01248 InFlag = Chain.getValue(1); 01249 curOffset += PerStoreOffset; 01250 } 01251 } 01252 ++paramCount; 01253 --OIdx; 01254 continue; 01255 } 01256 // Plain scalar 01257 // for ABI, declare .param .b<size> .param<n>; 01258 unsigned sz = VT.getSizeInBits(); 01259 bool needExtend = false; 01260 if (VT.isInteger()) { 01261 if (sz < 16) 01262 needExtend = true; 01263 if (sz < 32) 01264 sz = 32; 01265 } 01266 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01267 SDValue DeclareParamOps[] = { Chain, 01268 DAG.getConstant(paramCount, MVT::i32), 01269 DAG.getConstant(sz, MVT::i32), 01270 DAG.getConstant(0, MVT::i32), InFlag }; 01271 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, 01272 DeclareParamOps); 01273 InFlag = Chain.getValue(1); 01274 SDValue OutV = OutVals[OIdx]; 01275 if (needExtend) { 01276 // zext/sext i1 to i16 01277 unsigned opc = ISD::ZERO_EXTEND; 01278 if (Outs[OIdx].Flags.isSExt()) 01279 opc = ISD::SIGN_EXTEND; 01280 OutV = DAG.getNode(opc, dl, MVT::i16, OutV); 01281 } 01282 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01283 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), 01284 DAG.getConstant(0, MVT::i32), OutV, InFlag }; 01285 01286 unsigned opcode = NVPTXISD::StoreParam; 01287 if (Outs[OIdx].Flags.isZExt()) 01288 opcode = NVPTXISD::StoreParamU32; 01289 else if (Outs[OIdx].Flags.isSExt()) 01290 opcode = NVPTXISD::StoreParamS32; 01291 Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, 01292 VT, MachinePointerInfo()); 01293 01294 InFlag = Chain.getValue(1); 01295 ++paramCount; 01296 continue; 01297 } 01298 // struct or vector 01299 SmallVector<EVT, 16> vtparts; 01300 SmallVector<uint64_t, 16> Offsets; 01301 const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); 01302 assert(PTy && "Type of a byval parameter should be pointer"); 01303 ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0); 01304 01305 // declare .param .align <align> .b8 .param<n>[<size>]; 01306 unsigned sz = Outs[OIdx].Flags.getByValSize(); 01307 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01308 unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign(); 01309 // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, 01310 // so we don't need to worry about natural alignment or not. 01311 // See TargetLowering::LowerCallTo(). 01312 SDValue DeclareParamOps[] = { 01313 Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), MVT::i32), 01314 DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), 01315 InFlag 01316 }; 01317 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 01318 DeclareParamOps); 01319 InFlag = Chain.getValue(1); 01320 for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 01321 EVT elemtype = vtparts[j]; 01322 int curOffset = Offsets[j]; 01323 unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); 01324 SDValue srcAddr = 01325 DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx], 01326 DAG.getConstant(curOffset, getPointerTy())); 01327 SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, 01328 MachinePointerInfo(), false, false, false, 01329 PartAlign); 01330 if (elemtype.getSizeInBits() < 16) { 01331 theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); 01332 } 01333 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01334 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), 01335 DAG.getConstant(curOffset, MVT::i32), theVal, 01336 InFlag }; 01337 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, 01338 CopyParamOps, elemtype, 01339 MachinePointerInfo()); 01340 01341 InFlag = Chain.getValue(1); 01342 } 01343 ++paramCount; 01344 } 01345 01346 GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); 01347 unsigned retAlignment = 0; 01348 01349 // Handle Result 01350 if (Ins.size() > 0) { 01351 SmallVector<EVT, 16> resvtparts; 01352 ComputeValueVTs(*this, retTy, resvtparts); 01353 01354 // Declare 01355 // .param .align 16 .b8 retval0[<size-in-bytes>], or 01356 // .param .b<size-in-bits> retval0 01357 unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); 01358 if (retTy->isSingleValueType()) { 01359 // Scalar needs to be at least 32bit wide 01360 if (resultsz < 32) 01361 resultsz = 32; 01362 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01363 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), 01364 DAG.getConstant(resultsz, MVT::i32), 01365 DAG.getConstant(0, MVT::i32), InFlag }; 01366 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, 01367 DeclareRetOps); 01368 InFlag = Chain.getValue(1); 01369 } else { 01370 retAlignment = getArgumentAlignment(Callee, CS, retTy, 0); 01371 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01372 SDValue DeclareRetOps[] = { Chain, 01373 DAG.getConstant(retAlignment, MVT::i32), 01374 DAG.getConstant(resultsz / 8, MVT::i32), 01375 DAG.getConstant(0, MVT::i32), InFlag }; 01376 Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, 01377 DeclareRetOps); 01378 InFlag = Chain.getValue(1); 01379 } 01380 } 01381 01382 if (!Func) { 01383 // This is indirect function call case : PTX requires a prototype of the 01384 // form 01385 // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); 01386 // to be emitted, and the label has to used as the last arg of call 01387 // instruction. 01388 // The prototype is embedded in a string and put as the operand for a 01389 // CallPrototype SDNode which will print out to the value of the string. 01390 SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01391 std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS); 01392 const char *ProtoStr = 01393 nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); 01394 SDValue ProtoOps[] = { 01395 Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag, 01396 }; 01397 Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps); 01398 InFlag = Chain.getValue(1); 01399 } 01400 // Op to just print "call" 01401 SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01402 SDValue PrintCallOps[] = { 01403 Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag 01404 }; 01405 Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), 01406 dl, PrintCallVTs, PrintCallOps); 01407 InFlag = Chain.getValue(1); 01408 01409 // Ops to print out the function name 01410 SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01411 SDValue CallVoidOps[] = { Chain, Callee, InFlag }; 01412 Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps); 01413 InFlag = Chain.getValue(1); 01414 01415 // Ops to print out the param list 01416 SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01417 SDValue CallArgBeginOps[] = { Chain, InFlag }; 01418 Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, 01419 CallArgBeginOps); 01420 InFlag = Chain.getValue(1); 01421 01422 for (unsigned i = 0, e = paramCount; i != e; ++i) { 01423 unsigned opcode; 01424 if (i == (e - 1)) 01425 opcode = NVPTXISD::LastCallArg; 01426 else 01427 opcode = NVPTXISD::CallArg; 01428 SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01429 SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), 01430 DAG.getConstant(i, MVT::i32), InFlag }; 01431 Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps); 01432 InFlag = Chain.getValue(1); 01433 } 01434 SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01435 SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32), 01436 InFlag }; 01437 Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); 01438 InFlag = Chain.getValue(1); 01439 01440 if (!Func) { 01441 SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); 01442 SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32), 01443 InFlag }; 01444 Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); 01445 InFlag = Chain.getValue(1); 01446 } 01447 01448 // Generate loads from param memory/moves from registers for result 01449 if (Ins.size() > 0) { 01450 if (retTy && retTy->isVectorTy()) { 01451 EVT ObjectVT = getValueType(retTy); 01452 unsigned NumElts = ObjectVT.getVectorNumElements(); 01453 EVT EltVT = ObjectVT.getVectorElementType(); 01454 assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters( 01455 F->getContext(), ObjectVT) == NumElts && 01456 "Vector was not scalarized"); 01457 unsigned sz = EltVT.getSizeInBits(); 01458 bool needTruncate = sz < 8 ? true : false; 01459 01460 if (NumElts == 1) { 01461 // Just a simple load 01462 SmallVector<EVT, 4> LoadRetVTs; 01463 if (EltVT == MVT::i1 || EltVT == MVT::i8) { 01464 // If loading i1/i8 result, generate 01465 // load.b8 i16 01466 // if i1 01467 // trunc i16 to i1 01468 LoadRetVTs.push_back(MVT::i16); 01469 } else 01470 LoadRetVTs.push_back(EltVT); 01471 LoadRetVTs.push_back(MVT::Other); 01472 LoadRetVTs.push_back(MVT::Glue); 01473 SmallVector<SDValue, 4> LoadRetOps; 01474 LoadRetOps.push_back(Chain); 01475 LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 01476 LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); 01477 LoadRetOps.push_back(InFlag); 01478 SDValue retval = DAG.getMemIntrinsicNode( 01479 NVPTXISD::LoadParam, dl, 01480 DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); 01481 Chain = retval.getValue(1); 01482 InFlag = retval.getValue(2); 01483 SDValue Ret0 = retval; 01484 if (needTruncate) 01485 Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0); 01486 InVals.push_back(Ret0); 01487 } else if (NumElts == 2) { 01488 // LoadV2 01489 SmallVector<EVT, 4> LoadRetVTs; 01490 if (EltVT == MVT::i1 || EltVT == MVT::i8) { 01491 // If loading i1/i8 result, generate 01492 // load.b8 i16 01493 // if i1 01494 // trunc i16 to i1 01495 LoadRetVTs.push_back(MVT::i16); 01496 LoadRetVTs.push_back(MVT::i16); 01497 } else { 01498 LoadRetVTs.push_back(EltVT); 01499 LoadRetVTs.push_back(EltVT); 01500 } 01501 LoadRetVTs.push_back(MVT::Other); 01502 LoadRetVTs.push_back(MVT::Glue); 01503 SmallVector<SDValue, 4> LoadRetOps; 01504 LoadRetOps.push_back(Chain); 01505 LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 01506 LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); 01507 LoadRetOps.push_back(InFlag); 01508 SDValue retval = DAG.getMemIntrinsicNode( 01509 NVPTXISD::LoadParamV2, dl, 01510 DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); 01511 Chain = retval.getValue(2); 01512 InFlag = retval.getValue(3); 01513 SDValue Ret0 = retval.getValue(0); 01514 SDValue Ret1 = retval.getValue(1); 01515 if (needTruncate) { 01516 Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0); 01517 InVals.push_back(Ret0); 01518 Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1); 01519 InVals.push_back(Ret1); 01520 } else { 01521 InVals.push_back(Ret0); 01522 InVals.push_back(Ret1); 01523 } 01524 } else { 01525 // Split into N LoadV4 01526 unsigned Ofst = 0; 01527 unsigned VecSize = 4; 01528 unsigned Opc = NVPTXISD::LoadParamV4; 01529 if (EltVT.getSizeInBits() == 64) { 01530 VecSize = 2; 01531 Opc = NVPTXISD::LoadParamV2; 01532 } 01533 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); 01534 for (unsigned i = 0; i < NumElts; i += VecSize) { 01535 SmallVector<EVT, 8> LoadRetVTs; 01536 if (EltVT == MVT::i1 || EltVT == MVT::i8) { 01537 // If loading i1/i8 result, generate 01538 // load.b8 i16 01539 // if i1 01540 // trunc i16 to i1 01541 for (unsigned j = 0; j < VecSize; ++j) 01542 LoadRetVTs.push_back(MVT::i16); 01543 } else { 01544 for (unsigned j = 0; j < VecSize; ++j) 01545 LoadRetVTs.push_back(EltVT); 01546 } 01547 LoadRetVTs.push_back(MVT::Other); 01548 LoadRetVTs.push_back(MVT::Glue); 01549 SmallVector<SDValue, 4> LoadRetOps; 01550 LoadRetOps.push_back(Chain); 01551 LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 01552 LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32)); 01553 LoadRetOps.push_back(InFlag); 01554 SDValue retval = DAG.getMemIntrinsicNode( 01555 Opc, dl, DAG.getVTList(LoadRetVTs), 01556 LoadRetOps, EltVT, MachinePointerInfo()); 01557 if (VecSize == 2) { 01558 Chain = retval.getValue(2); 01559 InFlag = retval.getValue(3); 01560 } else { 01561 Chain = retval.getValue(4); 01562 InFlag = retval.getValue(5); 01563 } 01564 01565 for (unsigned j = 0; j < VecSize; ++j) { 01566 if (i + j >= NumElts) 01567 break; 01568 SDValue Elt = retval.getValue(j); 01569 if (needTruncate) 01570 Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); 01571 InVals.push_back(Elt); 01572 } 01573 Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 01574 } 01575 } 01576 } else { 01577 SmallVector<EVT, 16> VTs; 01578 SmallVector<uint64_t, 16> Offsets; 01579 ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0); 01580 assert(VTs.size() == Ins.size() && "Bad value decomposition"); 01581 unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0); 01582 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 01583 unsigned sz = VTs[i].getSizeInBits(); 01584 unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]); 01585 bool needTruncate = sz < 8 ? true : false; 01586 if (VTs[i].isInteger() && (sz < 8)) 01587 sz = 8; 01588 01589 SmallVector<EVT, 4> LoadRetVTs; 01590 EVT TheLoadType = VTs[i]; 01591 if (retTy->isIntegerTy() && 01592 TD->getTypeAllocSizeInBits(retTy) < 32) { 01593 // This is for integer types only, and specifically not for 01594 // aggregates. 01595 LoadRetVTs.push_back(MVT::i32); 01596 TheLoadType = MVT::i32; 01597 } else if (sz < 16) { 01598 // If loading i1/i8 result, generate 01599 // load i8 (-> i16) 01600 // trunc i16 to i1/i8 01601 LoadRetVTs.push_back(MVT::i16); 01602 } else 01603 LoadRetVTs.push_back(Ins[i].VT); 01604 LoadRetVTs.push_back(MVT::Other); 01605 LoadRetVTs.push_back(MVT::Glue); 01606 01607 SmallVector<SDValue, 4> LoadRetOps; 01608 LoadRetOps.push_back(Chain); 01609 LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); 01610 LoadRetOps.push_back(DAG.getConstant(Offsets[i], MVT::i32)); 01611 LoadRetOps.push_back(InFlag); 01612 SDValue retval = DAG.getMemIntrinsicNode( 01613 NVPTXISD::LoadParam, dl, 01614 DAG.getVTList(LoadRetVTs), LoadRetOps, 01615 TheLoadType, MachinePointerInfo(), AlignI); 01616 Chain = retval.getValue(1); 01617 InFlag = retval.getValue(2); 01618 SDValue Ret0 = retval.getValue(0); 01619 if (needTruncate) 01620 Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0); 01621 InVals.push_back(Ret0); 01622 } 01623 } 01624 } 01625 01626 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), 01627 DAG.getIntPtrConstant(uniqueCallSite + 1, true), 01628 InFlag, dl); 01629 uniqueCallSite++; 01630 01631 // set isTailCall to false for now, until we figure out how to express 01632 // tail call optimization in PTX 01633 isTailCall = false; 01634 return Chain; 01635 } 01636 01637 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() 01638 // (see LegalizeDAG.cpp). This is slow and uses local memory. 01639 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 01640 SDValue 01641 NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { 01642 SDNode *Node = Op.getNode(); 01643 SDLoc dl(Node); 01644 SmallVector<SDValue, 8> Ops; 01645 unsigned NumOperands = Node->getNumOperands(); 01646 for (unsigned i = 0; i < NumOperands; ++i) { 01647 SDValue SubOp = Node->getOperand(i); 01648 EVT VVT = SubOp.getNode()->getValueType(0); 01649 EVT EltVT = VVT.getVectorElementType(); 01650 unsigned NumSubElem = VVT.getVectorNumElements(); 01651 for (unsigned j = 0; j < NumSubElem; ++j) { 01652 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, 01653 DAG.getIntPtrConstant(j))); 01654 } 01655 } 01656 return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops); 01657 } 01658 01659 /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which 01660 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift 01661 /// amount, or 01662 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift 01663 /// amount. 01664 SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, 01665 SelectionDAG &DAG) const { 01666 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 01667 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 01668 01669 EVT VT = Op.getValueType(); 01670 unsigned VTBits = VT.getSizeInBits(); 01671 SDLoc dl(Op); 01672 SDValue ShOpLo = Op.getOperand(0); 01673 SDValue ShOpHi = Op.getOperand(1); 01674 SDValue ShAmt = Op.getOperand(2); 01675 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 01676 01677 if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) { 01678 01679 // For 32bit and sm35, we can use the funnel shift 'shf' instruction. 01680 // {dHi, dLo} = {aHi, aLo} >> Amt 01681 // dHi = aHi >> Amt 01682 // dLo = shf.r.clamp aLo, aHi, Amt 01683 01684 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 01685 SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi, 01686 ShAmt); 01687 01688 SDValue Ops[2] = { Lo, Hi }; 01689 return DAG.getMergeValues(Ops, dl); 01690 } 01691 else { 01692 01693 // {dHi, dLo} = {aHi, aLo} >> Amt 01694 // - if (Amt>=size) then 01695 // dLo = aHi >> (Amt-size) 01696 // dHi = aHi >> Amt (this is either all 0 or all 1) 01697 // else 01698 // dLo = (aLo >>logic Amt) | (aHi << (size-Amt)) 01699 // dHi = aHi >> Amt 01700 01701 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 01702 DAG.getConstant(VTBits, MVT::i32), ShAmt); 01703 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 01704 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 01705 DAG.getConstant(VTBits, MVT::i32)); 01706 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 01707 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 01708 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 01709 01710 SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, 01711 DAG.getConstant(VTBits, MVT::i32), ISD::SETGE); 01712 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 01713 SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); 01714 01715 SDValue Ops[2] = { Lo, Hi }; 01716 return DAG.getMergeValues(Ops, dl); 01717 } 01718 } 01719 01720 /// LowerShiftLeftParts - Lower SHL_PARTS, which 01721 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift 01722 /// amount, or 01723 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift 01724 /// amount. 01725 SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, 01726 SelectionDAG &DAG) const { 01727 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 01728 assert(Op.getOpcode() == ISD::SHL_PARTS); 01729 01730 EVT VT = Op.getValueType(); 01731 unsigned VTBits = VT.getSizeInBits(); 01732 SDLoc dl(Op); 01733 SDValue ShOpLo = Op.getOperand(0); 01734 SDValue ShOpHi = Op.getOperand(1); 01735 SDValue ShAmt = Op.getOperand(2); 01736 01737 if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) { 01738 01739 // For 32bit and sm35, we can use the funnel shift 'shf' instruction. 01740 // {dHi, dLo} = {aHi, aLo} << Amt 01741 // dHi = shf.l.clamp aLo, aHi, Amt 01742 // dLo = aLo << Amt 01743 01744 SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi, 01745 ShAmt); 01746 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 01747 01748 SDValue Ops[2] = { Lo, Hi }; 01749 return DAG.getMergeValues(Ops, dl); 01750 } 01751 else { 01752 01753 // {dHi, dLo} = {aHi, aLo} << Amt 01754 // - if (Amt>=size) then 01755 // dLo = aLo << Amt (all 0) 01756 // dLo = aLo << (Amt-size) 01757 // else 01758 // dLo = aLo << Amt 01759 // dHi = (aHi << Amt) | (aLo >> (size-Amt)) 01760 01761 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 01762 DAG.getConstant(VTBits, MVT::i32), ShAmt); 01763 SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 01764 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 01765 DAG.getConstant(VTBits, MVT::i32)); 01766 SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 01767 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 01768 SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 01769 01770 SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, 01771 DAG.getConstant(VTBits, MVT::i32), ISD::SETGE); 01772 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 01773 SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); 01774 01775 SDValue Ops[2] = { Lo, Hi }; 01776 return DAG.getMergeValues(Ops, dl); 01777 } 01778 } 01779 01780 SDValue 01781 NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 01782 switch (Op.getOpcode()) { 01783 case ISD::RETURNADDR: 01784 return SDValue(); 01785 case ISD::FRAMEADDR: 01786 return SDValue(); 01787 case ISD::GlobalAddress: 01788 return LowerGlobalAddress(Op, DAG); 01789 case ISD::INTRINSIC_W_CHAIN: 01790 return Op; 01791 case ISD::BUILD_VECTOR: 01792 case ISD::EXTRACT_SUBVECTOR: 01793 return Op; 01794 case ISD::CONCAT_VECTORS: 01795 return LowerCONCAT_VECTORS(Op, DAG); 01796 case ISD::STORE: 01797 return LowerSTORE(Op, DAG); 01798 case ISD::LOAD: 01799 return LowerLOAD(Op, DAG); 01800 case ISD::SHL_PARTS: 01801 return LowerShiftLeftParts(Op, DAG); 01802 case ISD::SRA_PARTS: 01803 case ISD::SRL_PARTS: 01804 return LowerShiftRightParts(Op, DAG); 01805 default: 01806 llvm_unreachable("Custom lowering not defined for operation"); 01807 } 01808 } 01809 01810 SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 01811 if (Op.getValueType() == MVT::i1) 01812 return LowerLOADi1(Op, DAG); 01813 else 01814 return SDValue(); 01815 } 01816 01817 // v = ld i1* addr 01818 // => 01819 // v1 = ld i8* addr (-> i16) 01820 // v = trunc i16 to i1 01821 SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { 01822 SDNode *Node = Op.getNode(); 01823 LoadSDNode *LD = cast<LoadSDNode>(Node); 01824 SDLoc dl(Node); 01825 assert(LD->getExtensionType() == ISD::NON_EXTLOAD); 01826 assert(Node->getValueType(0) == MVT::i1 && 01827 "Custom lowering for i1 load only"); 01828 SDValue newLD = 01829 DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), 01830 LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), 01831 LD->isInvariant(), LD->getAlignment()); 01832 SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); 01833 // The legalizer (the caller) is expecting two values from the legalized 01834 // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() 01835 // in LegalizeDAG.cpp which also uses MergeValues. 01836 SDValue Ops[] = { result, LD->getChain() }; 01837 return DAG.getMergeValues(Ops, dl); 01838 } 01839 01840 SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 01841 EVT ValVT = Op.getOperand(1).getValueType(); 01842 if (ValVT == MVT::i1) 01843 return LowerSTOREi1(Op, DAG); 01844 else if (ValVT.isVector()) 01845 return LowerSTOREVector(Op, DAG); 01846 else 01847 return SDValue(); 01848 } 01849 01850 SDValue 01851 NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { 01852 SDNode *N = Op.getNode(); 01853 SDValue Val = N->getOperand(1); 01854 SDLoc DL(N); 01855 EVT ValVT = Val.getValueType(); 01856 01857 if (ValVT.isVector()) { 01858 // We only handle "native" vector sizes for now, e.g. <4 x double> is not 01859 // legal. We can (and should) split that into 2 stores of <2 x double> here 01860 // but I'm leaving that as a TODO for now. 01861 if (!ValVT.isSimple()) 01862 return SDValue(); 01863 switch (ValVT.getSimpleVT().SimpleTy) { 01864 default: 01865 return SDValue(); 01866 case MVT::v2i8: 01867 case MVT::v2i16: 01868 case MVT::v2i32: 01869 case MVT::v2i64: 01870 case MVT::v2f32: 01871 case MVT::v2f64: 01872 case MVT::v4i8: 01873 case MVT::v4i16: 01874 case MVT::v4i32: 01875 case MVT::v4f32: 01876 // This is a "native" vector type 01877 break; 01878 } 01879 01880 MemSDNode *MemSD = cast<MemSDNode>(N); 01881 const DataLayout *TD = getDataLayout(); 01882 01883 unsigned Align = MemSD->getAlignment(); 01884 unsigned PrefAlign = 01885 TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); 01886 if (Align < PrefAlign) { 01887 // This store is not sufficiently aligned, so bail out and let this vector 01888 // store be scalarized. Note that we may still be able to emit smaller 01889 // vector stores. For example, if we are storing a <4 x float> with an 01890 // alignment of 8, this check will fail but the legalizer will try again 01891 // with 2 x <2 x float>, which will succeed with an alignment of 8. 01892 return SDValue(); 01893 } 01894 01895 unsigned Opcode = 0; 01896 EVT EltVT = ValVT.getVectorElementType(); 01897 unsigned NumElts = ValVT.getVectorNumElements(); 01898 01899 // Since StoreV2 is a target node, we cannot rely on DAG type legalization. 01900 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 01901 // stored type to i16 and propagate the "real" type as the memory type. 01902 bool NeedExt = false; 01903 if (EltVT.getSizeInBits() < 16) 01904 NeedExt = true; 01905 01906 switch (NumElts) { 01907 default: 01908 return SDValue(); 01909 case 2: 01910 Opcode = NVPTXISD::StoreV2; 01911 break; 01912 case 4: { 01913 Opcode = NVPTXISD::StoreV4; 01914 break; 01915 } 01916 } 01917 01918 SmallVector<SDValue, 8> Ops; 01919 01920 // First is the chain 01921 Ops.push_back(N->getOperand(0)); 01922 01923 // Then the split values 01924 for (unsigned i = 0; i < NumElts; ++i) { 01925 SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, 01926 DAG.getIntPtrConstant(i)); 01927 if (NeedExt) 01928 ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); 01929 Ops.push_back(ExtVal); 01930 } 01931 01932 // Then any remaining arguments 01933 for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) { 01934 Ops.push_back(N->getOperand(i)); 01935 } 01936 01937 SDValue NewSt = DAG.getMemIntrinsicNode( 01938 Opcode, DL, DAG.getVTList(MVT::Other), Ops, 01939 MemSD->getMemoryVT(), MemSD->getMemOperand()); 01940 01941 //return DCI.CombineTo(N, NewSt, true); 01942 return NewSt; 01943 } 01944 01945 return SDValue(); 01946 } 01947 01948 // st i1 v, addr 01949 // => 01950 // v1 = zxt v to i16 01951 // st.u8 i16, addr 01952 SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { 01953 SDNode *Node = Op.getNode(); 01954 SDLoc dl(Node); 01955 StoreSDNode *ST = cast<StoreSDNode>(Node); 01956 SDValue Tmp1 = ST->getChain(); 01957 SDValue Tmp2 = ST->getBasePtr(); 01958 SDValue Tmp3 = ST->getValue(); 01959 assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only"); 01960 unsigned Alignment = ST->getAlignment(); 01961 bool isVolatile = ST->isVolatile(); 01962 bool isNonTemporal = ST->isNonTemporal(); 01963 Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); 01964 SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, 01965 ST->getPointerInfo(), MVT::i8, isNonTemporal, 01966 isVolatile, Alignment); 01967 return Result; 01968 } 01969 01970 SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, 01971 int idx, EVT v) const { 01972 std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); 01973 std::stringstream suffix; 01974 suffix << idx; 01975 *name += suffix.str(); 01976 return DAG.getTargetExternalSymbol(name->c_str(), v); 01977 } 01978 01979 SDValue 01980 NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { 01981 std::string ParamSym; 01982 raw_string_ostream ParamStr(ParamSym); 01983 01984 ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx; 01985 ParamStr.flush(); 01986 01987 std::string *SavedStr = 01988 nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str()); 01989 return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); 01990 } 01991 01992 SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { 01993 return getExtSymb(DAG, ".HLPPARAM", idx); 01994 } 01995 01996 // Check to see if the kernel argument is image*_t or sampler_t 01997 01998 bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { 01999 static const char *const specialTypes[] = { "struct._image2d_t", 02000 "struct._image3d_t", 02001 "struct._sampler_t" }; 02002 02003 const Type *Ty = arg->getType(); 02004 const PointerType *PTy = dyn_cast<PointerType>(Ty); 02005 02006 if (!PTy) 02007 return false; 02008 02009 if (!context) 02010 return false; 02011 02012 const StructType *STy = dyn_cast<StructType>(PTy->getElementType()); 02013 const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : ""; 02014 02015 for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i) 02016 if (TypeName == specialTypes[i]) 02017 return true; 02018 02019 return false; 02020 } 02021 02022 SDValue NVPTXTargetLowering::LowerFormalArguments( 02023 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 02024 const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG, 02025 SmallVectorImpl<SDValue> &InVals) const { 02026 MachineFunction &MF = DAG.getMachineFunction(); 02027 const DataLayout *TD = getDataLayout(); 02028 02029 const Function *F = MF.getFunction(); 02030 const AttributeSet &PAL = F->getAttributes(); 02031 const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering(); 02032 02033 SDValue Root = DAG.getRoot(); 02034 std::vector<SDValue> OutChains; 02035 02036 bool isKernel = llvm::isKernelFunction(*F); 02037 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 02038 assert(isABI && "Non-ABI compilation is not supported"); 02039 if (!isABI) 02040 return Chain; 02041 02042 std::vector<Type *> argTypes; 02043 std::vector<const Argument *> theArgs; 02044 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 02045 I != E; ++I) { 02046 theArgs.push_back(I); 02047 argTypes.push_back(I->getType()); 02048 } 02049 // argTypes.size() (or theArgs.size()) and Ins.size() need not match. 02050 // Ins.size() will be larger 02051 // * if there is an aggregate argument with multiple fields (each field 02052 // showing up separately in Ins) 02053 // * if there is a vector argument with more than typical vector-length 02054 // elements (generally if more than 4) where each vector element is 02055 // individually present in Ins. 02056 // So a different index should be used for indexing into Ins. 02057 // See similar issue in LowerCall. 02058 unsigned InsIdx = 0; 02059 02060 int idx = 0; 02061 for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { 02062 Type *Ty = argTypes[i]; 02063 02064 // If the kernel argument is image*_t or sampler_t, convert it to 02065 // a i32 constant holding the parameter position. This can later 02066 // matched in the AsmPrinter to output the correct mangled name. 02067 if (isImageOrSamplerVal( 02068 theArgs[i], 02069 (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() 02070 : nullptr))) { 02071 assert(isKernel && "Only kernels can have image/sampler params"); 02072 InVals.push_back(DAG.getConstant(i + 1, MVT::i32)); 02073 continue; 02074 } 02075 02076 if (theArgs[i]->use_empty()) { 02077 // argument is dead 02078 if (Ty->isAggregateType()) { 02079 SmallVector<EVT, 16> vtparts; 02080 02081 ComputePTXValueVTs(*this, Ty, vtparts); 02082 assert(vtparts.size() > 0 && "empty aggregate type not expected"); 02083 for (unsigned parti = 0, parte = vtparts.size(); parti != parte; 02084 ++parti) { 02085 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 02086 ++InsIdx; 02087 } 02088 if (vtparts.size() > 0) 02089 --InsIdx; 02090 continue; 02091 } 02092 if (Ty->isVectorTy()) { 02093 EVT ObjectVT = getValueType(Ty); 02094 unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); 02095 for (unsigned parti = 0; parti < NumRegs; ++parti) { 02096 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 02097 ++InsIdx; 02098 } 02099 if (NumRegs > 0) 02100 --InsIdx; 02101 continue; 02102 } 02103 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 02104 continue; 02105 } 02106 02107 // In the following cases, assign a node order of "idx+1" 02108 // to newly created nodes. The SDNodes for params have to 02109 // appear in the same order as their order of appearance 02110 // in the original function. "idx+1" holds that order. 02111 if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) { 02112 if (Ty->isAggregateType()) { 02113 SmallVector<EVT, 16> vtparts; 02114 SmallVector<uint64_t, 16> offsets; 02115 02116 // NOTE: Here, we lose the ability to issue vector loads for vectors 02117 // that are a part of a struct. This should be investigated in the 02118 // future. 02119 ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0); 02120 assert(vtparts.size() > 0 && "empty aggregate type not expected"); 02121 bool aggregateIsPacked = false; 02122 if (StructType *STy = llvm::dyn_cast<StructType>(Ty)) 02123 aggregateIsPacked = STy->isPacked(); 02124 02125 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 02126 for (unsigned parti = 0, parte = vtparts.size(); parti != parte; 02127 ++parti) { 02128 EVT partVT = vtparts[parti]; 02129 Value *srcValue = Constant::getNullValue( 02130 PointerType::get(partVT.getTypeForEVT(F->getContext()), 02131 llvm::ADDRESS_SPACE_PARAM)); 02132 SDValue srcAddr = 02133 DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, 02134 DAG.getConstant(offsets[parti], getPointerTy())); 02135 unsigned partAlign = 02136 aggregateIsPacked ? 1 02137 : TD->getABITypeAlignment( 02138 partVT.getTypeForEVT(F->getContext())); 02139 SDValue p; 02140 if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) { 02141 ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 02142 ISD::SEXTLOAD : ISD::ZEXTLOAD; 02143 p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr, 02144 MachinePointerInfo(srcValue), partVT, false, 02145 false, false, partAlign); 02146 } else { 02147 p = DAG.getLoad(partVT, dl, Root, srcAddr, 02148 MachinePointerInfo(srcValue), false, false, false, 02149 partAlign); 02150 } 02151 if (p.getNode()) 02152 p.getNode()->setIROrder(idx + 1); 02153 InVals.push_back(p); 02154 ++InsIdx; 02155 } 02156 if (vtparts.size() > 0) 02157 --InsIdx; 02158 continue; 02159 } 02160 if (Ty->isVectorTy()) { 02161 EVT ObjectVT = getValueType(Ty); 02162 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 02163 unsigned NumElts = ObjectVT.getVectorNumElements(); 02164 assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && 02165 "Vector was not scalarized"); 02166 unsigned Ofst = 0; 02167 EVT EltVT = ObjectVT.getVectorElementType(); 02168 02169 // V1 load 02170 // f32 = load ... 02171 if (NumElts == 1) { 02172 // We only have one element, so just directly load it 02173 Value *SrcValue = Constant::getNullValue(PointerType::get( 02174 EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 02175 SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, 02176 DAG.getConstant(Ofst, getPointerTy())); 02177 SDValue P = DAG.getLoad( 02178 EltVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, 02179 false, true, 02180 TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); 02181 if (P.getNode()) 02182 P.getNode()->setIROrder(idx + 1); 02183 02184 if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) 02185 P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P); 02186 InVals.push_back(P); 02187 Ofst += TD->getTypeAllocSize(EltVT.getTypeForEVT(F->getContext())); 02188 ++InsIdx; 02189 } else if (NumElts == 2) { 02190 // V2 load 02191 // f32,f32 = load ... 02192 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2); 02193 Value *SrcValue = Constant::getNullValue(PointerType::get( 02194 VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 02195 SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, 02196 DAG.getConstant(Ofst, getPointerTy())); 02197 SDValue P = DAG.getLoad( 02198 VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, 02199 false, true, 02200 TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); 02201 if (P.getNode()) 02202 P.getNode()->setIROrder(idx + 1); 02203 02204 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 02205 DAG.getIntPtrConstant(0)); 02206 SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 02207 DAG.getIntPtrConstant(1)); 02208 02209 if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) { 02210 Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0); 02211 Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1); 02212 } 02213 02214 InVals.push_back(Elt0); 02215 InVals.push_back(Elt1); 02216 Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 02217 InsIdx += 2; 02218 } else { 02219 // V4 loads 02220 // We have at least 4 elements (<3 x Ty> expands to 4 elements) and 02221 // the 02222 // vector will be expanded to a power of 2 elements, so we know we can 02223 // always round up to the next multiple of 4 when creating the vector 02224 // loads. 02225 // e.g. 4 elem => 1 ld.v4 02226 // 6 elem => 2 ld.v4 02227 // 8 elem => 2 ld.v4 02228 // 11 elem => 3 ld.v4 02229 unsigned VecSize = 4; 02230 if (EltVT.getSizeInBits() == 64) { 02231 VecSize = 2; 02232 } 02233 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); 02234 for (unsigned i = 0; i < NumElts; i += VecSize) { 02235 Value *SrcValue = Constant::getNullValue( 02236 PointerType::get(VecVT.getTypeForEVT(F->getContext()), 02237 llvm::ADDRESS_SPACE_PARAM)); 02238 SDValue SrcAddr = 02239 DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, 02240 DAG.getConstant(Ofst, getPointerTy())); 02241 SDValue P = DAG.getLoad( 02242 VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, 02243 false, true, 02244 TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); 02245 if (P.getNode()) 02246 P.getNode()->setIROrder(idx + 1); 02247 02248 for (unsigned j = 0; j < VecSize; ++j) { 02249 if (i + j >= NumElts) 02250 break; 02251 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 02252 DAG.getIntPtrConstant(j)); 02253 if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) 02254 Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt); 02255 InVals.push_back(Elt); 02256 } 02257 Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 02258 } 02259 InsIdx += NumElts; 02260 } 02261 02262 if (NumElts > 0) 02263 --InsIdx; 02264 continue; 02265 } 02266 // A plain scalar. 02267 EVT ObjectVT = getValueType(Ty); 02268 // If ABI, load from the param symbol 02269 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 02270 Value *srcValue = Constant::getNullValue(PointerType::get( 02271 ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 02272 SDValue p; 02273 if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) { 02274 ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 02275 ISD::SEXTLOAD : ISD::ZEXTLOAD; 02276 p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg, 02277 MachinePointerInfo(srcValue), ObjectVT, false, false, 02278 false, 02279 TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); 02280 } else { 02281 p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg, 02282 MachinePointerInfo(srcValue), false, false, false, 02283 TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); 02284 } 02285 if (p.getNode()) 02286 p.getNode()->setIROrder(idx + 1); 02287 InVals.push_back(p); 02288 continue; 02289 } 02290 02291 // Param has ByVal attribute 02292 // Return MoveParam(param symbol). 02293 // Ideally, the param symbol can be returned directly, 02294 // but when SDNode builder decides to use it in a CopyToReg(), 02295 // machine instruction fails because TargetExternalSymbol 02296 // (not lowered) is target dependent, and CopyToReg assumes 02297 // the source is lowered. 02298 EVT ObjectVT = getValueType(Ty); 02299 assert(ObjectVT == Ins[InsIdx].VT && 02300 "Ins type did not match function type"); 02301 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 02302 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); 02303 if (p.getNode()) 02304 p.getNode()->setIROrder(idx + 1); 02305 if (isKernel) 02306 InVals.push_back(p); 02307 else { 02308 SDValue p2 = DAG.getNode( 02309 ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, 02310 DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); 02311 InVals.push_back(p2); 02312 } 02313 } 02314 02315 // Clang will check explicit VarArg and issue error if any. However, Clang 02316 // will let code with 02317 // implicit var arg like f() pass. See bug 617733. 02318 // We treat this case as if the arg list is empty. 02319 // if (F.isVarArg()) { 02320 // assert(0 && "VarArg not supported yet!"); 02321 //} 02322 02323 if (!OutChains.empty()) 02324 DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains)); 02325 02326 return Chain; 02327 } 02328 02329 02330 SDValue 02331 NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 02332 bool isVarArg, 02333 const SmallVectorImpl<ISD::OutputArg> &Outs, 02334 const SmallVectorImpl<SDValue> &OutVals, 02335 SDLoc dl, SelectionDAG &DAG) const { 02336 MachineFunction &MF = DAG.getMachineFunction(); 02337 const Function *F = MF.getFunction(); 02338 Type *RetTy = F->getReturnType(); 02339 const DataLayout *TD = getDataLayout(); 02340 02341 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 02342 assert(isABI && "Non-ABI compilation is not supported"); 02343 if (!isABI) 02344 return Chain; 02345 02346 if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) { 02347 // If we have a vector type, the OutVals array will be the scalarized 02348 // components and we have combine them into 1 or more vector stores. 02349 unsigned NumElts = VTy->getNumElements(); 02350 assert(NumElts == Outs.size() && "Bad scalarization of return value"); 02351 02352 // const_cast can be removed in later LLVM versions 02353 EVT EltVT = getValueType(RetTy).getVectorElementType(); 02354 bool NeedExtend = false; 02355 if (EltVT.getSizeInBits() < 16) 02356 NeedExtend = true; 02357 02358 // V1 store 02359 if (NumElts == 1) { 02360 SDValue StoreVal = OutVals[0]; 02361 // We only have one element, so just directly store it 02362 if (NeedExtend) 02363 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 02364 SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal }; 02365 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, 02366 DAG.getVTList(MVT::Other), Ops, 02367 EltVT, MachinePointerInfo()); 02368 02369 } else if (NumElts == 2) { 02370 // V2 store 02371 SDValue StoreVal0 = OutVals[0]; 02372 SDValue StoreVal1 = OutVals[1]; 02373 02374 if (NeedExtend) { 02375 StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0); 02376 StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1); 02377 } 02378 02379 SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0, 02380 StoreVal1 }; 02381 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl, 02382 DAG.getVTList(MVT::Other), Ops, 02383 EltVT, MachinePointerInfo()); 02384 } else { 02385 // V4 stores 02386 // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the 02387 // vector will be expanded to a power of 2 elements, so we know we can 02388 // always round up to the next multiple of 4 when creating the vector 02389 // stores. 02390 // e.g. 4 elem => 1 st.v4 02391 // 6 elem => 2 st.v4 02392 // 8 elem => 2 st.v4 02393 // 11 elem => 3 st.v4 02394 02395 unsigned VecSize = 4; 02396 if (OutVals[0].getValueType().getSizeInBits() == 64) 02397 VecSize = 2; 02398 02399 unsigned Offset = 0; 02400 02401 EVT VecVT = 02402 EVT::getVectorVT(F->getContext(), EltVT, VecSize); 02403 unsigned PerStoreOffset = 02404 TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 02405 02406 for (unsigned i = 0; i < NumElts; i += VecSize) { 02407 // Get values 02408 SDValue StoreVal; 02409 SmallVector<SDValue, 8> Ops; 02410 Ops.push_back(Chain); 02411 Ops.push_back(DAG.getConstant(Offset, MVT::i32)); 02412 unsigned Opc = NVPTXISD::StoreRetvalV2; 02413 EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType(); 02414 02415 StoreVal = OutVals[i]; 02416 if (NeedExtend) 02417 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 02418 Ops.push_back(StoreVal); 02419 02420 if (i + 1 < NumElts) { 02421 StoreVal = OutVals[i + 1]; 02422 if (NeedExtend) 02423 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 02424 } else { 02425 StoreVal = DAG.getUNDEF(ExtendedVT); 02426 } 02427 Ops.push_back(StoreVal); 02428 02429 if (VecSize == 4) { 02430 Opc = NVPTXISD::StoreRetvalV4; 02431 if (i + 2 < NumElts) { 02432 StoreVal = OutVals[i + 2]; 02433 if (NeedExtend) 02434 StoreVal = 02435 DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 02436 } else { 02437 StoreVal = DAG.getUNDEF(ExtendedVT); 02438 } 02439 Ops.push_back(StoreVal); 02440 02441 if (i + 3 < NumElts) { 02442 StoreVal = OutVals[i + 3]; 02443 if (NeedExtend) 02444 StoreVal = 02445 DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 02446 } else { 02447 StoreVal = DAG.getUNDEF(ExtendedVT); 02448 } 02449 Ops.push_back(StoreVal); 02450 } 02451 02452 // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size()); 02453 Chain = 02454 DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops, 02455 EltVT, MachinePointerInfo()); 02456 Offset += PerStoreOffset; 02457 } 02458 } 02459 } else { 02460 SmallVector<EVT, 16> ValVTs; 02461 SmallVector<uint64_t, 16> Offsets; 02462 ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0); 02463 assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition"); 02464 02465 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 02466 SDValue theVal = OutVals[i]; 02467 EVT TheValType = theVal.getValueType(); 02468 unsigned numElems = 1; 02469 if (TheValType.isVector()) 02470 numElems = TheValType.getVectorNumElements(); 02471 for (unsigned j = 0, je = numElems; j != je; ++j) { 02472 SDValue TmpVal = theVal; 02473 if (TheValType.isVector()) 02474 TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 02475 TheValType.getVectorElementType(), TmpVal, 02476 DAG.getIntPtrConstant(j)); 02477 EVT TheStoreType = ValVTs[i]; 02478 if (RetTy->isIntegerTy() && 02479 TD->getTypeAllocSizeInBits(RetTy) < 32) { 02480 // The following zero-extension is for integer types only, and 02481 // specifically not for aggregates. 02482 TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal); 02483 TheStoreType = MVT::i32; 02484 } 02485 else if (TmpVal.getValueType().getSizeInBits() < 16) 02486 TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal); 02487 02488 SDValue Ops[] = { 02489 Chain, 02490 DAG.getConstant(Offsets[i], MVT::i32), 02491 TmpVal }; 02492 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, 02493 DAG.getVTList(MVT::Other), Ops, 02494 TheStoreType, 02495 MachinePointerInfo()); 02496 } 02497 } 02498 } 02499 02500 return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); 02501 } 02502 02503 02504 void NVPTXTargetLowering::LowerAsmOperandForConstraint( 02505 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 02506 SelectionDAG &DAG) const { 02507 if (Constraint.length() > 1) 02508 return; 02509 else 02510 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 02511 } 02512 02513 // NVPTX suuport vector of legal types of any length in Intrinsics because the 02514 // NVPTX specific type legalizer 02515 // will legalize them to the PTX supported length. 02516 bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { 02517 if (isTypeLegal(VT)) 02518 return true; 02519 if (VT.isVector()) { 02520 MVT eVT = VT.getVectorElementType(); 02521 if (isTypeLegal(eVT)) 02522 return true; 02523 } 02524 return false; 02525 } 02526 02527 static unsigned getOpcForTextureInstr(unsigned Intrinsic) { 02528 switch (Intrinsic) { 02529 default: 02530 return 0; 02531 02532 case Intrinsic::nvvm_tex_1d_v4f32_s32: 02533 return NVPTXISD::Tex1DFloatS32; 02534 case Intrinsic::nvvm_tex_1d_v4f32_f32: 02535 return NVPTXISD::Tex1DFloatFloat; 02536 case Intrinsic::nvvm_tex_1d_level_v4f32_f32: 02537 return NVPTXISD::Tex1DFloatFloatLevel; 02538 case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: 02539 return NVPTXISD::Tex1DFloatFloatGrad; 02540 case Intrinsic::nvvm_tex_1d_v4s32_s32: 02541 return NVPTXISD::Tex1DS32S32; 02542 case Intrinsic::nvvm_tex_1d_v4s32_f32: 02543 return NVPTXISD::Tex1DS32Float; 02544 case Intrinsic::nvvm_tex_1d_level_v4s32_f32: 02545 return NVPTXISD::Tex1DS32FloatLevel; 02546 case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: 02547 return NVPTXISD::Tex1DS32FloatGrad; 02548 case Intrinsic::nvvm_tex_1d_v4u32_s32: 02549 return NVPTXISD::Tex1DU32S32; 02550 case Intrinsic::nvvm_tex_1d_v4u32_f32: 02551 return NVPTXISD::Tex1DU32Float; 02552 case Intrinsic::nvvm_tex_1d_level_v4u32_f32: 02553 return NVPTXISD::Tex1DU32FloatLevel; 02554 case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: 02555 return NVPTXISD::Tex1DU32FloatGrad; 02556 02557 case Intrinsic::nvvm_tex_1d_array_v4f32_s32: 02558 return NVPTXISD::Tex1DArrayFloatS32; 02559 case Intrinsic::nvvm_tex_1d_array_v4f32_f32: 02560 return NVPTXISD::Tex1DArrayFloatFloat; 02561 case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: 02562 return NVPTXISD::Tex1DArrayFloatFloatLevel; 02563 case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: 02564 return NVPTXISD::Tex1DArrayFloatFloatGrad; 02565 case Intrinsic::nvvm_tex_1d_array_v4s32_s32: 02566 return NVPTXISD::Tex1DArrayS32S32; 02567 case Intrinsic::nvvm_tex_1d_array_v4s32_f32: 02568 return NVPTXISD::Tex1DArrayS32Float; 02569 case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: 02570 return NVPTXISD::Tex1DArrayS32FloatLevel; 02571 case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: 02572 return NVPTXISD::Tex1DArrayS32FloatGrad; 02573 case Intrinsic::nvvm_tex_1d_array_v4u32_s32: 02574 return NVPTXISD::Tex1DArrayU32S32; 02575 case Intrinsic::nvvm_tex_1d_array_v4u32_f32: 02576 return NVPTXISD::Tex1DArrayU32Float; 02577 case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: 02578 return NVPTXISD::Tex1DArrayU32FloatLevel; 02579 case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: 02580 return NVPTXISD::Tex1DArrayU32FloatGrad; 02581 02582 case Intrinsic::nvvm_tex_2d_v4f32_s32: 02583 return NVPTXISD::Tex2DFloatS32; 02584 case Intrinsic::nvvm_tex_2d_v4f32_f32: 02585 return NVPTXISD::Tex2DFloatFloat; 02586 case Intrinsic::nvvm_tex_2d_level_v4f32_f32: 02587 return NVPTXISD::Tex2DFloatFloatLevel; 02588 case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: 02589 return NVPTXISD::Tex2DFloatFloatGrad; 02590 case Intrinsic::nvvm_tex_2d_v4s32_s32: 02591 return NVPTXISD::Tex2DS32S32; 02592 case Intrinsic::nvvm_tex_2d_v4s32_f32: 02593 return NVPTXISD::Tex2DS32Float; 02594 case Intrinsic::nvvm_tex_2d_level_v4s32_f32: 02595 return NVPTXISD::Tex2DS32FloatLevel; 02596 case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: 02597 return NVPTXISD::Tex2DS32FloatGrad; 02598 case Intrinsic::nvvm_tex_2d_v4u32_s32: 02599 return NVPTXISD::Tex2DU32S32; 02600 case Intrinsic::nvvm_tex_2d_v4u32_f32: 02601 return NVPTXISD::Tex2DU32Float; 02602 case Intrinsic::nvvm_tex_2d_level_v4u32_f32: 02603 return NVPTXISD::Tex2DU32FloatLevel; 02604 case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: 02605 return NVPTXISD::Tex2DU32FloatGrad; 02606 02607 case Intrinsic::nvvm_tex_2d_array_v4f32_s32: 02608 return NVPTXISD::Tex2DArrayFloatS32; 02609 case Intrinsic::nvvm_tex_2d_array_v4f32_f32: 02610 return NVPTXISD::Tex2DArrayFloatFloat; 02611 case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: 02612 return NVPTXISD::Tex2DArrayFloatFloatLevel; 02613 case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: 02614 return NVPTXISD::Tex2DArrayFloatFloatGrad; 02615 case Intrinsic::nvvm_tex_2d_array_v4s32_s32: 02616 return NVPTXISD::Tex2DArrayS32S32; 02617 case Intrinsic::nvvm_tex_2d_array_v4s32_f32: 02618 return NVPTXISD::Tex2DArrayS32Float; 02619 case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: 02620 return NVPTXISD::Tex2DArrayS32FloatLevel; 02621 case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: 02622 return NVPTXISD::Tex2DArrayS32FloatGrad; 02623 case Intrinsic::nvvm_tex_2d_array_v4u32_s32: 02624 return NVPTXISD::Tex2DArrayU32S32; 02625 case Intrinsic::nvvm_tex_2d_array_v4u32_f32: 02626 return NVPTXISD::Tex2DArrayU32Float; 02627 case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: 02628 return NVPTXISD::Tex2DArrayU32FloatLevel; 02629 case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: 02630 return NVPTXISD::Tex2DArrayU32FloatGrad; 02631 02632 case Intrinsic::nvvm_tex_3d_v4f32_s32: 02633 return NVPTXISD::Tex3DFloatS32; 02634 case Intrinsic::nvvm_tex_3d_v4f32_f32: 02635 return NVPTXISD::Tex3DFloatFloat; 02636 case Intrinsic::nvvm_tex_3d_level_v4f32_f32: 02637 return NVPTXISD::Tex3DFloatFloatLevel; 02638 case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: 02639 return NVPTXISD::Tex3DFloatFloatGrad; 02640 case Intrinsic::nvvm_tex_3d_v4s32_s32: 02641 return NVPTXISD::Tex3DS32S32; 02642 case Intrinsic::nvvm_tex_3d_v4s32_f32: 02643 return NVPTXISD::Tex3DS32Float; 02644 case Intrinsic::nvvm_tex_3d_level_v4s32_f32: 02645 return NVPTXISD::Tex3DS32FloatLevel; 02646 case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: 02647 return NVPTXISD::Tex3DS32FloatGrad; 02648 case Intrinsic::nvvm_tex_3d_v4u32_s32: 02649 return NVPTXISD::Tex3DU32S32; 02650 case Intrinsic::nvvm_tex_3d_v4u32_f32: 02651 return NVPTXISD::Tex3DU32Float; 02652 case Intrinsic::nvvm_tex_3d_level_v4u32_f32: 02653 return NVPTXISD::Tex3DU32FloatLevel; 02654 case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: 02655 return NVPTXISD::Tex3DU32FloatGrad; 02656 02657 case Intrinsic::nvvm_tex_cube_v4f32_f32: 02658 return NVPTXISD::TexCubeFloatFloat; 02659 case Intrinsic::nvvm_tex_cube_level_v4f32_f32: 02660 return NVPTXISD::TexCubeFloatFloatLevel; 02661 case Intrinsic::nvvm_tex_cube_v4s32_f32: 02662 return NVPTXISD::TexCubeS32Float; 02663 case Intrinsic::nvvm_tex_cube_level_v4s32_f32: 02664 return NVPTXISD::TexCubeS32FloatLevel; 02665 case Intrinsic::nvvm_tex_cube_v4u32_f32: 02666 return NVPTXISD::TexCubeU32Float; 02667 case Intrinsic::nvvm_tex_cube_level_v4u32_f32: 02668 return NVPTXISD::TexCubeU32FloatLevel; 02669 02670 case Intrinsic::nvvm_tex_cube_array_v4f32_f32: 02671 return NVPTXISD::TexCubeArrayFloatFloat; 02672 case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: 02673 return NVPTXISD::TexCubeArrayFloatFloatLevel; 02674 case Intrinsic::nvvm_tex_cube_array_v4s32_f32: 02675 return NVPTXISD::TexCubeArrayS32Float; 02676 case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: 02677 return NVPTXISD::TexCubeArrayS32FloatLevel; 02678 case Intrinsic::nvvm_tex_cube_array_v4u32_f32: 02679 return NVPTXISD::TexCubeArrayU32Float; 02680 case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: 02681 return NVPTXISD::TexCubeArrayU32FloatLevel; 02682 02683 case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: 02684 return NVPTXISD::Tld4R2DFloatFloat; 02685 case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: 02686 return NVPTXISD::Tld4G2DFloatFloat; 02687 case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: 02688 return NVPTXISD::Tld4B2DFloatFloat; 02689 case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: 02690 return NVPTXISD::Tld4A2DFloatFloat; 02691 case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: 02692 return NVPTXISD::Tld4R2DS64Float; 02693 case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: 02694 return NVPTXISD::Tld4G2DS64Float; 02695 case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: 02696 return NVPTXISD::Tld4B2DS64Float; 02697 case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: 02698 return NVPTXISD::Tld4A2DS64Float; 02699 case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: 02700 return NVPTXISD::Tld4R2DU64Float; 02701 case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: 02702 return NVPTXISD::Tld4G2DU64Float; 02703 case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: 02704 return NVPTXISD::Tld4B2DU64Float; 02705 case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: 02706 return NVPTXISD::Tld4A2DU64Float; 02707 02708 case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: 02709 return NVPTXISD::TexUnified1DFloatS32; 02710 case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: 02711 return NVPTXISD::TexUnified1DFloatFloat; 02712 case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: 02713 return NVPTXISD::TexUnified1DFloatFloatLevel; 02714 case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: 02715 return NVPTXISD::TexUnified1DFloatFloatGrad; 02716 case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: 02717 return NVPTXISD::TexUnified1DS32S32; 02718 case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: 02719 return NVPTXISD::TexUnified1DS32Float; 02720 case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: 02721 return NVPTXISD::TexUnified1DS32FloatLevel; 02722 case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: 02723 return NVPTXISD::TexUnified1DS32FloatGrad; 02724 case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: 02725 return NVPTXISD::TexUnified1DU32S32; 02726 case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: 02727 return NVPTXISD::TexUnified1DU32Float; 02728 case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: 02729 return NVPTXISD::TexUnified1DU32FloatLevel; 02730 case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: 02731 return NVPTXISD::TexUnified1DU32FloatGrad; 02732 02733 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: 02734 return NVPTXISD::TexUnified1DArrayFloatS32; 02735 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: 02736 return NVPTXISD::TexUnified1DArrayFloatFloat; 02737 case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: 02738 return NVPTXISD::TexUnified1DArrayFloatFloatLevel; 02739 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: 02740 return NVPTXISD::TexUnified1DArrayFloatFloatGrad; 02741 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: 02742 return NVPTXISD::TexUnified1DArrayS32S32; 02743 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: 02744 return NVPTXISD::TexUnified1DArrayS32Float; 02745 case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: 02746 return NVPTXISD::TexUnified1DArrayS32FloatLevel; 02747 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: 02748 return NVPTXISD::TexUnified1DArrayS32FloatGrad; 02749 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: 02750 return NVPTXISD::TexUnified1DArrayU32S32; 02751 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: 02752 return NVPTXISD::TexUnified1DArrayU32Float; 02753 case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: 02754 return NVPTXISD::TexUnified1DArrayU32FloatLevel; 02755 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: 02756 return NVPTXISD::TexUnified1DArrayU32FloatGrad; 02757 02758 case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: 02759 return NVPTXISD::TexUnified2DFloatS32; 02760 case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: 02761 return NVPTXISD::TexUnified2DFloatFloat; 02762 case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: 02763 return NVPTXISD::TexUnified2DFloatFloatLevel; 02764 case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: 02765 return NVPTXISD::TexUnified2DFloatFloatGrad; 02766 case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: 02767 return NVPTXISD::TexUnified2DS32S32; 02768 case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: 02769 return NVPTXISD::TexUnified2DS32Float; 02770 case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: 02771 return NVPTXISD::TexUnified2DS32FloatLevel; 02772 case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: 02773 return NVPTXISD::TexUnified2DS32FloatGrad; 02774 case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: 02775 return NVPTXISD::TexUnified2DU32S32; 02776 case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: 02777 return NVPTXISD::TexUnified2DU32Float; 02778 case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: 02779 return NVPTXISD::TexUnified2DU32FloatLevel; 02780 case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: 02781 return NVPTXISD::TexUnified2DU32FloatGrad; 02782 02783 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: 02784 return NVPTXISD::TexUnified2DArrayFloatS32; 02785 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: 02786 return NVPTXISD::TexUnified2DArrayFloatFloat; 02787 case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: 02788 return NVPTXISD::TexUnified2DArrayFloatFloatLevel; 02789 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: 02790 return NVPTXISD::TexUnified2DArrayFloatFloatGrad; 02791 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: 02792 return NVPTXISD::TexUnified2DArrayS32S32; 02793 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: 02794 return NVPTXISD::TexUnified2DArrayS32Float; 02795 case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: 02796 return NVPTXISD::TexUnified2DArrayS32FloatLevel; 02797 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: 02798 return NVPTXISD::TexUnified2DArrayS32FloatGrad; 02799 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: 02800 return NVPTXISD::TexUnified2DArrayU32S32; 02801 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: 02802 return NVPTXISD::TexUnified2DArrayU32Float; 02803 case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: 02804 return NVPTXISD::TexUnified2DArrayU32FloatLevel; 02805 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: 02806 return NVPTXISD::TexUnified2DArrayU32FloatGrad; 02807 02808 case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: 02809 return NVPTXISD::TexUnified3DFloatS32; 02810 case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: 02811 return NVPTXISD::TexUnified3DFloatFloat; 02812 case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: 02813 return NVPTXISD::TexUnified3DFloatFloatLevel; 02814 case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: 02815 return NVPTXISD::TexUnified3DFloatFloatGrad; 02816 case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: 02817 return NVPTXISD::TexUnified3DS32S32; 02818 case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: 02819 return NVPTXISD::TexUnified3DS32Float; 02820 case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: 02821 return NVPTXISD::TexUnified3DS32FloatLevel; 02822 case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: 02823 return NVPTXISD::TexUnified3DS32FloatGrad; 02824 case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: 02825 return NVPTXISD::TexUnified3DU32S32; 02826 case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: 02827 return NVPTXISD::TexUnified3DU32Float; 02828 case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: 02829 return NVPTXISD::TexUnified3DU32FloatLevel; 02830 case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: 02831 return NVPTXISD::TexUnified3DU32FloatGrad; 02832 02833 case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: 02834 return NVPTXISD::TexUnifiedCubeFloatFloat; 02835 case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: 02836 return NVPTXISD::TexUnifiedCubeFloatFloatLevel; 02837 case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: 02838 return NVPTXISD::TexUnifiedCubeS32Float; 02839 case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: 02840 return NVPTXISD::TexUnifiedCubeS32FloatLevel; 02841 case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: 02842 return NVPTXISD::TexUnifiedCubeU32Float; 02843 case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: 02844 return NVPTXISD::TexUnifiedCubeU32FloatLevel; 02845 02846 case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: 02847 return NVPTXISD::TexUnifiedCubeArrayFloatFloat; 02848 case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: 02849 return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; 02850 case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: 02851 return NVPTXISD::TexUnifiedCubeArrayS32Float; 02852 case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: 02853 return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; 02854 case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: 02855 return NVPTXISD::TexUnifiedCubeArrayU32Float; 02856 case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: 02857 return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; 02858 02859 case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: 02860 return NVPTXISD::Tld4UnifiedR2DFloatFloat; 02861 case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: 02862 return NVPTXISD::Tld4UnifiedG2DFloatFloat; 02863 case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: 02864 return NVPTXISD::Tld4UnifiedB2DFloatFloat; 02865 case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: 02866 return NVPTXISD::Tld4UnifiedA2DFloatFloat; 02867 case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: 02868 return NVPTXISD::Tld4UnifiedR2DS64Float; 02869 case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: 02870 return NVPTXISD::Tld4UnifiedG2DS64Float; 02871 case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: 02872 return NVPTXISD::Tld4UnifiedB2DS64Float; 02873 case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: 02874 return NVPTXISD::Tld4UnifiedA2DS64Float; 02875 case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: 02876 return NVPTXISD::Tld4UnifiedR2DU64Float; 02877 case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: 02878 return NVPTXISD::Tld4UnifiedG2DU64Float; 02879 case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: 02880 return NVPTXISD::Tld4UnifiedB2DU64Float; 02881 case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: 02882 return NVPTXISD::Tld4UnifiedA2DU64Float; 02883 } 02884 } 02885 02886 static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { 02887 switch (Intrinsic) { 02888 default: 02889 return 0; 02890 case Intrinsic::nvvm_suld_1d_i8_clamp: 02891 return NVPTXISD::Suld1DI8Clamp; 02892 case Intrinsic::nvvm_suld_1d_i16_clamp: 02893 return NVPTXISD::Suld1DI16Clamp; 02894 case Intrinsic::nvvm_suld_1d_i32_clamp: 02895 return NVPTXISD::Suld1DI32Clamp; 02896 case Intrinsic::nvvm_suld_1d_i64_clamp: 02897 return NVPTXISD::Suld1DI64Clamp; 02898 case Intrinsic::nvvm_suld_1d_v2i8_clamp: 02899 return NVPTXISD::Suld1DV2I8Clamp; 02900 case Intrinsic::nvvm_suld_1d_v2i16_clamp: 02901 return NVPTXISD::Suld1DV2I16Clamp; 02902 case Intrinsic::nvvm_suld_1d_v2i32_clamp: 02903 return NVPTXISD::Suld1DV2I32Clamp; 02904 case Intrinsic::nvvm_suld_1d_v2i64_clamp: 02905 return NVPTXISD::Suld1DV2I64Clamp; 02906 case Intrinsic::nvvm_suld_1d_v4i8_clamp: 02907 return NVPTXISD::Suld1DV4I8Clamp; 02908 case Intrinsic::nvvm_suld_1d_v4i16_clamp: 02909 return NVPTXISD::Suld1DV4I16Clamp; 02910 case Intrinsic::nvvm_suld_1d_v4i32_clamp: 02911 return NVPTXISD::Suld1DV4I32Clamp; 02912 case Intrinsic::nvvm_suld_1d_array_i8_clamp: 02913 return NVPTXISD::Suld1DArrayI8Clamp; 02914 case Intrinsic::nvvm_suld_1d_array_i16_clamp: 02915 return NVPTXISD::Suld1DArrayI16Clamp; 02916 case Intrinsic::nvvm_suld_1d_array_i32_clamp: 02917 return NVPTXISD::Suld1DArrayI32Clamp; 02918 case Intrinsic::nvvm_suld_1d_array_i64_clamp: 02919 return NVPTXISD::Suld1DArrayI64Clamp; 02920 case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: 02921 return NVPTXISD::Suld1DArrayV2I8Clamp; 02922 case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: 02923 return NVPTXISD::Suld1DArrayV2I16Clamp; 02924 case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: 02925 return NVPTXISD::Suld1DArrayV2I32Clamp; 02926 case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: 02927 return NVPTXISD::Suld1DArrayV2I64Clamp; 02928 case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: 02929 return NVPTXISD::Suld1DArrayV4I8Clamp; 02930 case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: 02931 return NVPTXISD::Suld1DArrayV4I16Clamp; 02932 case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: 02933 return NVPTXISD::Suld1DArrayV4I32Clamp; 02934 case Intrinsic::nvvm_suld_2d_i8_clamp: 02935 return NVPTXISD::Suld2DI8Clamp; 02936 case Intrinsic::nvvm_suld_2d_i16_clamp: 02937 return NVPTXISD::Suld2DI16Clamp; 02938 case Intrinsic::nvvm_suld_2d_i32_clamp: 02939 return NVPTXISD::Suld2DI32Clamp; 02940 case Intrinsic::nvvm_suld_2d_i64_clamp: 02941 return NVPTXISD::Suld2DI64Clamp; 02942 case Intrinsic::nvvm_suld_2d_v2i8_clamp: 02943 return NVPTXISD::Suld2DV2I8Clamp; 02944 case Intrinsic::nvvm_suld_2d_v2i16_clamp: 02945 return NVPTXISD::Suld2DV2I16Clamp; 02946 case Intrinsic::nvvm_suld_2d_v2i32_clamp: 02947 return NVPTXISD::Suld2DV2I32Clamp; 02948 case Intrinsic::nvvm_suld_2d_v2i64_clamp: 02949 return NVPTXISD::Suld2DV2I64Clamp; 02950 case Intrinsic::nvvm_suld_2d_v4i8_clamp: 02951 return NVPTXISD::Suld2DV4I8Clamp; 02952 case Intrinsic::nvvm_suld_2d_v4i16_clamp: 02953 return NVPTXISD::Suld2DV4I16Clamp; 02954 case Intrinsic::nvvm_suld_2d_v4i32_clamp: 02955 return NVPTXISD::Suld2DV4I32Clamp; 02956 case Intrinsic::nvvm_suld_2d_array_i8_clamp: 02957 return NVPTXISD::Suld2DArrayI8Clamp; 02958 case Intrinsic::nvvm_suld_2d_array_i16_clamp: 02959 return NVPTXISD::Suld2DArrayI16Clamp; 02960 case Intrinsic::nvvm_suld_2d_array_i32_clamp: 02961 return NVPTXISD::Suld2DArrayI32Clamp; 02962 case Intrinsic::nvvm_suld_2d_array_i64_clamp: 02963 return NVPTXISD::Suld2DArrayI64Clamp; 02964 case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: 02965 return NVPTXISD::Suld2DArrayV2I8Clamp; 02966 case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: 02967 return NVPTXISD::Suld2DArrayV2I16Clamp; 02968 case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: 02969 return NVPTXISD::Suld2DArrayV2I32Clamp; 02970 case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: 02971 return NVPTXISD::Suld2DArrayV2I64Clamp; 02972 case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: 02973 return NVPTXISD::Suld2DArrayV4I8Clamp; 02974 case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: 02975 return NVPTXISD::Suld2DArrayV4I16Clamp; 02976 case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: 02977 return NVPTXISD::Suld2DArrayV4I32Clamp; 02978 case Intrinsic::nvvm_suld_3d_i8_clamp: 02979 return NVPTXISD::Suld3DI8Clamp; 02980 case Intrinsic::nvvm_suld_3d_i16_clamp: 02981 return NVPTXISD::Suld3DI16Clamp; 02982 case Intrinsic::nvvm_suld_3d_i32_clamp: 02983 return NVPTXISD::Suld3DI32Clamp; 02984 case Intrinsic::nvvm_suld_3d_i64_clamp: 02985 return NVPTXISD::Suld3DI64Clamp; 02986 case Intrinsic::nvvm_suld_3d_v2i8_clamp: 02987 return NVPTXISD::Suld3DV2I8Clamp; 02988 case Intrinsic::nvvm_suld_3d_v2i16_clamp: 02989 return NVPTXISD::Suld3DV2I16Clamp; 02990 case Intrinsic::nvvm_suld_3d_v2i32_clamp: 02991 return NVPTXISD::Suld3DV2I32Clamp; 02992 case Intrinsic::nvvm_suld_3d_v2i64_clamp: 02993 return NVPTXISD::Suld3DV2I64Clamp; 02994 case Intrinsic::nvvm_suld_3d_v4i8_clamp: 02995 return NVPTXISD::Suld3DV4I8Clamp; 02996 case Intrinsic::nvvm_suld_3d_v4i16_clamp: 02997 return NVPTXISD::Suld3DV4I16Clamp; 02998 case Intrinsic::nvvm_suld_3d_v4i32_clamp: 02999 return NVPTXISD::Suld3DV4I32Clamp; 03000 case Intrinsic::nvvm_suld_1d_i8_trap: 03001 return NVPTXISD::Suld1DI8Trap; 03002 case Intrinsic::nvvm_suld_1d_i16_trap: 03003 return NVPTXISD::Suld1DI16Trap; 03004 case Intrinsic::nvvm_suld_1d_i32_trap: 03005 return NVPTXISD::Suld1DI32Trap; 03006 case Intrinsic::nvvm_suld_1d_i64_trap: 03007 return NVPTXISD::Suld1DI64Trap; 03008 case Intrinsic::nvvm_suld_1d_v2i8_trap: 03009 return NVPTXISD::Suld1DV2I8Trap; 03010 case Intrinsic::nvvm_suld_1d_v2i16_trap: 03011 return NVPTXISD::Suld1DV2I16Trap; 03012 case Intrinsic::nvvm_suld_1d_v2i32_trap: 03013 return NVPTXISD::Suld1DV2I32Trap; 03014 case Intrinsic::nvvm_suld_1d_v2i64_trap: 03015 return NVPTXISD::Suld1DV2I64Trap; 03016 case Intrinsic::nvvm_suld_1d_v4i8_trap: 03017 return NVPTXISD::Suld1DV4I8Trap; 03018 case Intrinsic::nvvm_suld_1d_v4i16_trap: 03019 return NVPTXISD::Suld1DV4I16Trap; 03020 case Intrinsic::nvvm_suld_1d_v4i32_trap: 03021 return NVPTXISD::Suld1DV4I32Trap; 03022 case Intrinsic::nvvm_suld_1d_array_i8_trap: 03023 return NVPTXISD::Suld1DArrayI8Trap; 03024 case Intrinsic::nvvm_suld_1d_array_i16_trap: 03025 return NVPTXISD::Suld1DArrayI16Trap; 03026 case Intrinsic::nvvm_suld_1d_array_i32_trap: 03027 return NVPTXISD::Suld1DArrayI32Trap; 03028 case Intrinsic::nvvm_suld_1d_array_i64_trap: 03029 return NVPTXISD::Suld1DArrayI64Trap; 03030 case Intrinsic::nvvm_suld_1d_array_v2i8_trap: 03031 return NVPTXISD::Suld1DArrayV2I8Trap; 03032 case Intrinsic::nvvm_suld_1d_array_v2i16_trap: 03033 return NVPTXISD::Suld1DArrayV2I16Trap; 03034 case Intrinsic::nvvm_suld_1d_array_v2i32_trap: 03035 return NVPTXISD::Suld1DArrayV2I32Trap; 03036 case Intrinsic::nvvm_suld_1d_array_v2i64_trap: 03037 return NVPTXISD::Suld1DArrayV2I64Trap; 03038 case Intrinsic::nvvm_suld_1d_array_v4i8_trap: 03039 return NVPTXISD::Suld1DArrayV4I8Trap; 03040 case Intrinsic::nvvm_suld_1d_array_v4i16_trap: 03041 return NVPTXISD::Suld1DArrayV4I16Trap; 03042 case Intrinsic::nvvm_suld_1d_array_v4i32_trap: 03043 return NVPTXISD::Suld1DArrayV4I32Trap; 03044 case Intrinsic::nvvm_suld_2d_i8_trap: 03045 return NVPTXISD::Suld2DI8Trap; 03046 case Intrinsic::nvvm_suld_2d_i16_trap: 03047 return NVPTXISD::Suld2DI16Trap; 03048 case Intrinsic::nvvm_suld_2d_i32_trap: 03049 return NVPTXISD::Suld2DI32Trap; 03050 case Intrinsic::nvvm_suld_2d_i64_trap: 03051 return NVPTXISD::Suld2DI64Trap; 03052 case Intrinsic::nvvm_suld_2d_v2i8_trap: 03053 return NVPTXISD::Suld2DV2I8Trap; 03054 case Intrinsic::nvvm_suld_2d_v2i16_trap: 03055 return NVPTXISD::Suld2DV2I16Trap; 03056 case Intrinsic::nvvm_suld_2d_v2i32_trap: 03057 return NVPTXISD::Suld2DV2I32Trap; 03058 case Intrinsic::nvvm_suld_2d_v2i64_trap: 03059 return NVPTXISD::Suld2DV2I64Trap; 03060 case Intrinsic::nvvm_suld_2d_v4i8_trap: 03061 return NVPTXISD::Suld2DV4I8Trap; 03062 case Intrinsic::nvvm_suld_2d_v4i16_trap: 03063 return NVPTXISD::Suld2DV4I16Trap; 03064 case Intrinsic::nvvm_suld_2d_v4i32_trap: 03065 return NVPTXISD::Suld2DV4I32Trap; 03066 case Intrinsic::nvvm_suld_2d_array_i8_trap: 03067 return NVPTXISD::Suld2DArrayI8Trap; 03068 case Intrinsic::nvvm_suld_2d_array_i16_trap: 03069 return NVPTXISD::Suld2DArrayI16Trap; 03070 case Intrinsic::nvvm_suld_2d_array_i32_trap: 03071 return NVPTXISD::Suld2DArrayI32Trap; 03072 case Intrinsic::nvvm_suld_2d_array_i64_trap: 03073 return NVPTXISD::Suld2DArrayI64Trap; 03074 case Intrinsic::nvvm_suld_2d_array_v2i8_trap: 03075 return NVPTXISD::Suld2DArrayV2I8Trap; 03076 case Intrinsic::nvvm_suld_2d_array_v2i16_trap: 03077 return NVPTXISD::Suld2DArrayV2I16Trap; 03078 case Intrinsic::nvvm_suld_2d_array_v2i32_trap: 03079 return NVPTXISD::Suld2DArrayV2I32Trap; 03080 case Intrinsic::nvvm_suld_2d_array_v2i64_trap: 03081 return NVPTXISD::Suld2DArrayV2I64Trap; 03082 case Intrinsic::nvvm_suld_2d_array_v4i8_trap: 03083 return NVPTXISD::Suld2DArrayV4I8Trap; 03084 case Intrinsic::nvvm_suld_2d_array_v4i16_trap: 03085 return NVPTXISD::Suld2DArrayV4I16Trap; 03086 case Intrinsic::nvvm_suld_2d_array_v4i32_trap: 03087 return NVPTXISD::Suld2DArrayV4I32Trap; 03088 case Intrinsic::nvvm_suld_3d_i8_trap: 03089 return NVPTXISD::Suld3DI8Trap; 03090 case Intrinsic::nvvm_suld_3d_i16_trap: 03091 return NVPTXISD::Suld3DI16Trap; 03092 case Intrinsic::nvvm_suld_3d_i32_trap: 03093 return NVPTXISD::Suld3DI32Trap; 03094 case Intrinsic::nvvm_suld_3d_i64_trap: 03095 return NVPTXISD::Suld3DI64Trap; 03096 case Intrinsic::nvvm_suld_3d_v2i8_trap: 03097 return NVPTXISD::Suld3DV2I8Trap; 03098 case Intrinsic::nvvm_suld_3d_v2i16_trap: 03099 return NVPTXISD::Suld3DV2I16Trap; 03100 case Intrinsic::nvvm_suld_3d_v2i32_trap: 03101 return NVPTXISD::Suld3DV2I32Trap; 03102 case Intrinsic::nvvm_suld_3d_v2i64_trap: 03103 return NVPTXISD::Suld3DV2I64Trap; 03104 case Intrinsic::nvvm_suld_3d_v4i8_trap: 03105 return NVPTXISD::Suld3DV4I8Trap; 03106 case Intrinsic::nvvm_suld_3d_v4i16_trap: 03107 return NVPTXISD::Suld3DV4I16Trap; 03108 case Intrinsic::nvvm_suld_3d_v4i32_trap: 03109 return NVPTXISD::Suld3DV4I32Trap; 03110 case Intrinsic::nvvm_suld_1d_i8_zero: 03111 return NVPTXISD::Suld1DI8Zero; 03112 case Intrinsic::nvvm_suld_1d_i16_zero: 03113 return NVPTXISD::Suld1DI16Zero; 03114 case Intrinsic::nvvm_suld_1d_i32_zero: 03115 return NVPTXISD::Suld1DI32Zero; 03116 case Intrinsic::nvvm_suld_1d_i64_zero: 03117 return NVPTXISD::Suld1DI64Zero; 03118 case Intrinsic::nvvm_suld_1d_v2i8_zero: 03119 return NVPTXISD::Suld1DV2I8Zero; 03120 case Intrinsic::nvvm_suld_1d_v2i16_zero: 03121 return NVPTXISD::Suld1DV2I16Zero; 03122 case Intrinsic::nvvm_suld_1d_v2i32_zero: 03123 return NVPTXISD::Suld1DV2I32Zero; 03124 case Intrinsic::nvvm_suld_1d_v2i64_zero: 03125 return NVPTXISD::Suld1DV2I64Zero; 03126 case Intrinsic::nvvm_suld_1d_v4i8_zero: 03127 return NVPTXISD::Suld1DV4I8Zero; 03128 case Intrinsic::nvvm_suld_1d_v4i16_zero: 03129 return NVPTXISD::Suld1DV4I16Zero; 03130 case Intrinsic::nvvm_suld_1d_v4i32_zero: 03131 return NVPTXISD::Suld1DV4I32Zero; 03132 case Intrinsic::nvvm_suld_1d_array_i8_zero: 03133 return NVPTXISD::Suld1DArrayI8Zero; 03134 case Intrinsic::nvvm_suld_1d_array_i16_zero: 03135 return NVPTXISD::Suld1DArrayI16Zero; 03136 case Intrinsic::nvvm_suld_1d_array_i32_zero: 03137 return NVPTXISD::Suld1DArrayI32Zero; 03138 case Intrinsic::nvvm_suld_1d_array_i64_zero: 03139 return NVPTXISD::Suld1DArrayI64Zero; 03140 case Intrinsic::nvvm_suld_1d_array_v2i8_zero: 03141 return NVPTXISD::Suld1DArrayV2I8Zero; 03142 case Intrinsic::nvvm_suld_1d_array_v2i16_zero: 03143 return NVPTXISD::Suld1DArrayV2I16Zero; 03144 case Intrinsic::nvvm_suld_1d_array_v2i32_zero: 03145 return NVPTXISD::Suld1DArrayV2I32Zero; 03146 case Intrinsic::nvvm_suld_1d_array_v2i64_zero: 03147 return NVPTXISD::Suld1DArrayV2I64Zero; 03148 case Intrinsic::nvvm_suld_1d_array_v4i8_zero: 03149 return NVPTXISD::Suld1DArrayV4I8Zero; 03150 case Intrinsic::nvvm_suld_1d_array_v4i16_zero: 03151 return NVPTXISD::Suld1DArrayV4I16Zero; 03152 case Intrinsic::nvvm_suld_1d_array_v4i32_zero: 03153 return NVPTXISD::Suld1DArrayV4I32Zero; 03154 case Intrinsic::nvvm_suld_2d_i8_zero: 03155 return NVPTXISD::Suld2DI8Zero; 03156 case Intrinsic::nvvm_suld_2d_i16_zero: 03157 return NVPTXISD::Suld2DI16Zero; 03158 case Intrinsic::nvvm_suld_2d_i32_zero: 03159 return NVPTXISD::Suld2DI32Zero; 03160 case Intrinsic::nvvm_suld_2d_i64_zero: 03161 return NVPTXISD::Suld2DI64Zero; 03162 case Intrinsic::nvvm_suld_2d_v2i8_zero: 03163 return NVPTXISD::Suld2DV2I8Zero; 03164 case Intrinsic::nvvm_suld_2d_v2i16_zero: 03165 return NVPTXISD::Suld2DV2I16Zero; 03166 case Intrinsic::nvvm_suld_2d_v2i32_zero: 03167 return NVPTXISD::Suld2DV2I32Zero; 03168 case Intrinsic::nvvm_suld_2d_v2i64_zero: 03169 return NVPTXISD::Suld2DV2I64Zero; 03170 case Intrinsic::nvvm_suld_2d_v4i8_zero: 03171 return NVPTXISD::Suld2DV4I8Zero; 03172 case Intrinsic::nvvm_suld_2d_v4i16_zero: 03173 return NVPTXISD::Suld2DV4I16Zero; 03174 case Intrinsic::nvvm_suld_2d_v4i32_zero: 03175 return NVPTXISD::Suld2DV4I32Zero; 03176 case Intrinsic::nvvm_suld_2d_array_i8_zero: 03177 return NVPTXISD::Suld2DArrayI8Zero; 03178 case Intrinsic::nvvm_suld_2d_array_i16_zero: 03179 return NVPTXISD::Suld2DArrayI16Zero; 03180 case Intrinsic::nvvm_suld_2d_array_i32_zero: 03181 return NVPTXISD::Suld2DArrayI32Zero; 03182 case Intrinsic::nvvm_suld_2d_array_i64_zero: 03183 return NVPTXISD::Suld2DArrayI64Zero; 03184 case Intrinsic::nvvm_suld_2d_array_v2i8_zero: 03185 return NVPTXISD::Suld2DArrayV2I8Zero; 03186 case Intrinsic::nvvm_suld_2d_array_v2i16_zero: 03187 return NVPTXISD::Suld2DArrayV2I16Zero; 03188 case Intrinsic::nvvm_suld_2d_array_v2i32_zero: 03189 return NVPTXISD::Suld2DArrayV2I32Zero; 03190 case Intrinsic::nvvm_suld_2d_array_v2i64_zero: 03191 return NVPTXISD::Suld2DArrayV2I64Zero; 03192 case Intrinsic::nvvm_suld_2d_array_v4i8_zero: 03193 return NVPTXISD::Suld2DArrayV4I8Zero; 03194 case Intrinsic::nvvm_suld_2d_array_v4i16_zero: 03195 return NVPTXISD::Suld2DArrayV4I16Zero; 03196 case Intrinsic::nvvm_suld_2d_array_v4i32_zero: 03197 return NVPTXISD::Suld2DArrayV4I32Zero; 03198 case Intrinsic::nvvm_suld_3d_i8_zero: 03199 return NVPTXISD::Suld3DI8Zero; 03200 case Intrinsic::nvvm_suld_3d_i16_zero: 03201 return NVPTXISD::Suld3DI16Zero; 03202 case Intrinsic::nvvm_suld_3d_i32_zero: 03203 return NVPTXISD::Suld3DI32Zero; 03204 case Intrinsic::nvvm_suld_3d_i64_zero: 03205 return NVPTXISD::Suld3DI64Zero; 03206 case Intrinsic::nvvm_suld_3d_v2i8_zero: 03207 return NVPTXISD::Suld3DV2I8Zero; 03208 case Intrinsic::nvvm_suld_3d_v2i16_zero: 03209 return NVPTXISD::Suld3DV2I16Zero; 03210 case Intrinsic::nvvm_suld_3d_v2i32_zero: 03211 return NVPTXISD::Suld3DV2I32Zero; 03212 case Intrinsic::nvvm_suld_3d_v2i64_zero: 03213 return NVPTXISD::Suld3DV2I64Zero; 03214 case Intrinsic::nvvm_suld_3d_v4i8_zero: 03215 return NVPTXISD::Suld3DV4I8Zero; 03216 case Intrinsic::nvvm_suld_3d_v4i16_zero: 03217 return NVPTXISD::Suld3DV4I16Zero; 03218 case Intrinsic::nvvm_suld_3d_v4i32_zero: 03219 return NVPTXISD::Suld3DV4I32Zero; 03220 } 03221 } 03222 03223 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as 03224 // TgtMemIntrinsic 03225 // because we need the information that is only available in the "Value" type 03226 // of destination 03227 // pointer. In particular, the address space information. 03228 bool NVPTXTargetLowering::getTgtMemIntrinsic( 03229 IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { 03230 switch (Intrinsic) { 03231 default: 03232 return false; 03233 03234 case Intrinsic::nvvm_atomic_load_add_f32: 03235 Info.opc = ISD::INTRINSIC_W_CHAIN; 03236 Info.memVT = MVT::f32; 03237 Info.ptrVal = I.getArgOperand(0); 03238 Info.offset = 0; 03239 Info.vol = 0; 03240 Info.readMem = true; 03241 Info.writeMem = true; 03242 Info.align = 0; 03243 return true; 03244 03245 case Intrinsic::nvvm_atomic_load_inc_32: 03246 case Intrinsic::nvvm_atomic_load_dec_32: 03247 Info.opc = ISD::INTRINSIC_W_CHAIN; 03248 Info.memVT = MVT::i32; 03249 Info.ptrVal = I.getArgOperand(0); 03250 Info.offset = 0; 03251 Info.vol = 0; 03252 Info.readMem = true; 03253 Info.writeMem = true; 03254 Info.align = 0; 03255 return true; 03256 03257 case Intrinsic::nvvm_ldu_global_i: 03258 case Intrinsic::nvvm_ldu_global_f: 03259 case Intrinsic::nvvm_ldu_global_p: { 03260 03261 Info.opc = ISD::INTRINSIC_W_CHAIN; 03262 if (Intrinsic == Intrinsic::nvvm_ldu_global_i) 03263 Info.memVT = getValueType(I.getType()); 03264 else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) 03265 Info.memVT = getPointerTy(); 03266 else 03267 Info.memVT = getValueType(I.getType()); 03268 Info.ptrVal = I.getArgOperand(0); 03269 Info.offset = 0; 03270 Info.vol = 0; 03271 Info.readMem = true; 03272 Info.writeMem = false; 03273 Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); 03274 03275 return true; 03276 } 03277 case Intrinsic::nvvm_ldg_global_i: 03278 case Intrinsic::nvvm_ldg_global_f: 03279 case Intrinsic::nvvm_ldg_global_p: { 03280 03281 Info.opc = ISD::INTRINSIC_W_CHAIN; 03282 if (Intrinsic == Intrinsic::nvvm_ldg_global_i) 03283 Info.memVT = getValueType(I.getType()); 03284 else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) 03285 Info.memVT = getPointerTy(); 03286 else 03287 Info.memVT = getValueType(I.getType()); 03288 Info.ptrVal = I.getArgOperand(0); 03289 Info.offset = 0; 03290 Info.vol = 0; 03291 Info.readMem = true; 03292 Info.writeMem = false; 03293 Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); 03294 03295 return true; 03296 } 03297 03298 case Intrinsic::nvvm_tex_1d_v4f32_s32: 03299 case Intrinsic::nvvm_tex_1d_v4f32_f32: 03300 case Intrinsic::nvvm_tex_1d_level_v4f32_f32: 03301 case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: 03302 case Intrinsic::nvvm_tex_1d_array_v4f32_s32: 03303 case Intrinsic::nvvm_tex_1d_array_v4f32_f32: 03304 case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: 03305 case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: 03306 case Intrinsic::nvvm_tex_2d_v4f32_s32: 03307 case Intrinsic::nvvm_tex_2d_v4f32_f32: 03308 case Intrinsic::nvvm_tex_2d_level_v4f32_f32: 03309 case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: 03310 case Intrinsic::nvvm_tex_2d_array_v4f32_s32: 03311 case Intrinsic::nvvm_tex_2d_array_v4f32_f32: 03312 case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: 03313 case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: 03314 case Intrinsic::nvvm_tex_3d_v4f32_s32: 03315 case Intrinsic::nvvm_tex_3d_v4f32_f32: 03316 case Intrinsic::nvvm_tex_3d_level_v4f32_f32: 03317 case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: 03318 case Intrinsic::nvvm_tex_cube_v4f32_f32: 03319 case Intrinsic::nvvm_tex_cube_level_v4f32_f32: 03320 case Intrinsic::nvvm_tex_cube_array_v4f32_f32: 03321 case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: 03322 case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: 03323 case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: 03324 case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: 03325 case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: 03326 case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: 03327 case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: 03328 case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: 03329 case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: 03330 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: 03331 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: 03332 case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: 03333 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: 03334 case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: 03335 case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: 03336 case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: 03337 case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: 03338 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: 03339 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: 03340 case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: 03341 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: 03342 case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: 03343 case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: 03344 case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: 03345 case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: 03346 case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: 03347 case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: 03348 case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: 03349 case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: 03350 case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: 03351 case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: 03352 case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: 03353 case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: { 03354 Info.opc = getOpcForTextureInstr(Intrinsic); 03355 Info.memVT = MVT::v4f32; 03356 Info.ptrVal = nullptr; 03357 Info.offset = 0; 03358 Info.vol = 0; 03359 Info.readMem = true; 03360 Info.writeMem = false; 03361 Info.align = 16; 03362 return true; 03363 } 03364 case Intrinsic::nvvm_tex_1d_v4s32_s32: 03365 case Intrinsic::nvvm_tex_1d_v4s32_f32: 03366 case Intrinsic::nvvm_tex_1d_level_v4s32_f32: 03367 case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: 03368 case Intrinsic::nvvm_tex_1d_array_v4s32_s32: 03369 case Intrinsic::nvvm_tex_1d_array_v4s32_f32: 03370 case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: 03371 case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: 03372 case Intrinsic::nvvm_tex_2d_v4s32_s32: 03373 case Intrinsic::nvvm_tex_2d_v4s32_f32: 03374 case Intrinsic::nvvm_tex_2d_level_v4s32_f32: 03375 case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: 03376 case Intrinsic::nvvm_tex_2d_array_v4s32_s32: 03377 case Intrinsic::nvvm_tex_2d_array_v4s32_f32: 03378 case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: 03379 case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: 03380 case Intrinsic::nvvm_tex_3d_v4s32_s32: 03381 case Intrinsic::nvvm_tex_3d_v4s32_f32: 03382 case Intrinsic::nvvm_tex_3d_level_v4s32_f32: 03383 case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: 03384 case Intrinsic::nvvm_tex_cube_v4s32_f32: 03385 case Intrinsic::nvvm_tex_cube_level_v4s32_f32: 03386 case Intrinsic::nvvm_tex_cube_array_v4s32_f32: 03387 case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: 03388 case Intrinsic::nvvm_tex_cube_v4u32_f32: 03389 case Intrinsic::nvvm_tex_cube_level_v4u32_f32: 03390 case Intrinsic::nvvm_tex_cube_array_v4u32_f32: 03391 case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: 03392 case Intrinsic::nvvm_tex_1d_v4u32_s32: 03393 case Intrinsic::nvvm_tex_1d_v4u32_f32: 03394 case Intrinsic::nvvm_tex_1d_level_v4u32_f32: 03395 case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: 03396 case Intrinsic::nvvm_tex_1d_array_v4u32_s32: 03397 case Intrinsic::nvvm_tex_1d_array_v4u32_f32: 03398 case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: 03399 case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: 03400 case Intrinsic::nvvm_tex_2d_v4u32_s32: 03401 case Intrinsic::nvvm_tex_2d_v4u32_f32: 03402 case Intrinsic::nvvm_tex_2d_level_v4u32_f32: 03403 case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: 03404 case Intrinsic::nvvm_tex_2d_array_v4u32_s32: 03405 case Intrinsic::nvvm_tex_2d_array_v4u32_f32: 03406 case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: 03407 case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: 03408 case Intrinsic::nvvm_tex_3d_v4u32_s32: 03409 case Intrinsic::nvvm_tex_3d_v4u32_f32: 03410 case Intrinsic::nvvm_tex_3d_level_v4u32_f32: 03411 case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: 03412 case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: 03413 case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: 03414 case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: 03415 case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: 03416 case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: 03417 case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: 03418 case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: 03419 case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: 03420 case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: 03421 case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: 03422 case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: 03423 case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: 03424 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: 03425 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: 03426 case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: 03427 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: 03428 case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: 03429 case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: 03430 case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: 03431 case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: 03432 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: 03433 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: 03434 case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: 03435 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: 03436 case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: 03437 case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: 03438 case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: 03439 case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: 03440 case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: 03441 case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: 03442 case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: 03443 case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: 03444 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: 03445 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: 03446 case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: 03447 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: 03448 case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: 03449 case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: 03450 case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: 03451 case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: 03452 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: 03453 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: 03454 case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: 03455 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: 03456 case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: 03457 case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: 03458 case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: 03459 case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: 03460 case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: 03461 case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: 03462 case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: 03463 case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: 03464 case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: 03465 case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: 03466 case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: 03467 case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: 03468 case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: 03469 case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: 03470 case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: 03471 case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: 03472 case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: 03473 case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: 03474 case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: 03475 case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: { 03476 Info.opc = getOpcForTextureInstr(Intrinsic); 03477 Info.memVT = MVT::v4i32; 03478 Info.ptrVal = nullptr; 03479 Info.offset = 0; 03480 Info.vol = 0; 03481 Info.readMem = true; 03482 Info.writeMem = false; 03483 Info.align = 16; 03484 return true; 03485 } 03486 case Intrinsic::nvvm_suld_1d_i8_clamp: 03487 case Intrinsic::nvvm_suld_1d_v2i8_clamp: 03488 case Intrinsic::nvvm_suld_1d_v4i8_clamp: 03489 case Intrinsic::nvvm_suld_1d_array_i8_clamp: 03490 case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: 03491 case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: 03492 case Intrinsic::nvvm_suld_2d_i8_clamp: 03493 case Intrinsic::nvvm_suld_2d_v2i8_clamp: 03494 case Intrinsic::nvvm_suld_2d_v4i8_clamp: 03495 case Intrinsic::nvvm_suld_2d_array_i8_clamp: 03496 case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: 03497 case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: 03498 case Intrinsic::nvvm_suld_3d_i8_clamp: 03499 case Intrinsic::nvvm_suld_3d_v2i8_clamp: 03500 case Intrinsic::nvvm_suld_3d_v4i8_clamp: 03501 case Intrinsic::nvvm_suld_1d_i8_trap: 03502 case Intrinsic::nvvm_suld_1d_v2i8_trap: 03503 case Intrinsic::nvvm_suld_1d_v4i8_trap: 03504 case Intrinsic::nvvm_suld_1d_array_i8_trap: 03505 case Intrinsic::nvvm_suld_1d_array_v2i8_trap: 03506 case Intrinsic::nvvm_suld_1d_array_v4i8_trap: 03507 case Intrinsic::nvvm_suld_2d_i8_trap: 03508 case Intrinsic::nvvm_suld_2d_v2i8_trap: 03509 case Intrinsic::nvvm_suld_2d_v4i8_trap: 03510 case Intrinsic::nvvm_suld_2d_array_i8_trap: 03511 case Intrinsic::nvvm_suld_2d_array_v2i8_trap: 03512 case Intrinsic::nvvm_suld_2d_array_v4i8_trap: 03513 case Intrinsic::nvvm_suld_3d_i8_trap: 03514 case Intrinsic::nvvm_suld_3d_v2i8_trap: 03515 case Intrinsic::nvvm_suld_3d_v4i8_trap: 03516 case Intrinsic::nvvm_suld_1d_i8_zero: 03517 case Intrinsic::nvvm_suld_1d_v2i8_zero: 03518 case Intrinsic::nvvm_suld_1d_v4i8_zero: 03519 case Intrinsic::nvvm_suld_1d_array_i8_zero: 03520 case Intrinsic::nvvm_suld_1d_array_v2i8_zero: 03521 case Intrinsic::nvvm_suld_1d_array_v4i8_zero: 03522 case Intrinsic::nvvm_suld_2d_i8_zero: 03523 case Intrinsic::nvvm_suld_2d_v2i8_zero: 03524 case Intrinsic::nvvm_suld_2d_v4i8_zero: 03525 case Intrinsic::nvvm_suld_2d_array_i8_zero: 03526 case Intrinsic::nvvm_suld_2d_array_v2i8_zero: 03527 case Intrinsic::nvvm_suld_2d_array_v4i8_zero: 03528 case Intrinsic::nvvm_suld_3d_i8_zero: 03529 case Intrinsic::nvvm_suld_3d_v2i8_zero: 03530 case Intrinsic::nvvm_suld_3d_v4i8_zero: { 03531 Info.opc = getOpcForSurfaceInstr(Intrinsic); 03532 Info.memVT = MVT::i8; 03533 Info.ptrVal = nullptr; 03534 Info.offset = 0; 03535 Info.vol = 0; 03536 Info.readMem = true; 03537 Info.writeMem = false; 03538 Info.align = 16; 03539 return true; 03540 } 03541 case Intrinsic::nvvm_suld_1d_i16_clamp: 03542 case Intrinsic::nvvm_suld_1d_v2i16_clamp: 03543 case Intrinsic::nvvm_suld_1d_v4i16_clamp: 03544 case Intrinsic::nvvm_suld_1d_array_i16_clamp: 03545 case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: 03546 case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: 03547 case Intrinsic::nvvm_suld_2d_i16_clamp: 03548 case Intrinsic::nvvm_suld_2d_v2i16_clamp: 03549 case Intrinsic::nvvm_suld_2d_v4i16_clamp: 03550 case Intrinsic::nvvm_suld_2d_array_i16_clamp: 03551 case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: 03552 case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: 03553 case Intrinsic::nvvm_suld_3d_i16_clamp: 03554 case Intrinsic::nvvm_suld_3d_v2i16_clamp: 03555 case Intrinsic::nvvm_suld_3d_v4i16_clamp: 03556 case Intrinsic::nvvm_suld_1d_i16_trap: 03557 case Intrinsic::nvvm_suld_1d_v2i16_trap: 03558 case Intrinsic::nvvm_suld_1d_v4i16_trap: 03559 case Intrinsic::nvvm_suld_1d_array_i16_trap: 03560 case Intrinsic::nvvm_suld_1d_array_v2i16_trap: 03561 case Intrinsic::nvvm_suld_1d_array_v4i16_trap: 03562 case Intrinsic::nvvm_suld_2d_i16_trap: 03563 case Intrinsic::nvvm_suld_2d_v2i16_trap: 03564 case Intrinsic::nvvm_suld_2d_v4i16_trap: 03565 case Intrinsic::nvvm_suld_2d_array_i16_trap: 03566 case Intrinsic::nvvm_suld_2d_array_v2i16_trap: 03567 case Intrinsic::nvvm_suld_2d_array_v4i16_trap: 03568 case Intrinsic::nvvm_suld_3d_i16_trap: 03569 case Intrinsic::nvvm_suld_3d_v2i16_trap: 03570 case Intrinsic::nvvm_suld_3d_v4i16_trap: 03571 case Intrinsic::nvvm_suld_1d_i16_zero: 03572 case Intrinsic::nvvm_suld_1d_v2i16_zero: 03573 case Intrinsic::nvvm_suld_1d_v4i16_zero: 03574 case Intrinsic::nvvm_suld_1d_array_i16_zero: 03575 case Intrinsic::nvvm_suld_1d_array_v2i16_zero: 03576 case Intrinsic::nvvm_suld_1d_array_v4i16_zero: 03577 case Intrinsic::nvvm_suld_2d_i16_zero: 03578 case Intrinsic::nvvm_suld_2d_v2i16_zero: 03579 case Intrinsic::nvvm_suld_2d_v4i16_zero: 03580 case Intrinsic::nvvm_suld_2d_array_i16_zero: 03581 case Intrinsic::nvvm_suld_2d_array_v2i16_zero: 03582 case Intrinsic::nvvm_suld_2d_array_v4i16_zero: 03583 case Intrinsic::nvvm_suld_3d_i16_zero: 03584 case Intrinsic::nvvm_suld_3d_v2i16_zero: 03585 case Intrinsic::nvvm_suld_3d_v4i16_zero: { 03586 Info.opc = getOpcForSurfaceInstr(Intrinsic); 03587 Info.memVT = MVT::i16; 03588 Info.ptrVal = nullptr; 03589 Info.offset = 0; 03590 Info.vol = 0; 03591 Info.readMem = true; 03592 Info.writeMem = false; 03593 Info.align = 16; 03594 return true; 03595 } 03596 case Intrinsic::nvvm_suld_1d_i32_clamp: 03597 case Intrinsic::nvvm_suld_1d_v2i32_clamp: 03598 case Intrinsic::nvvm_suld_1d_v4i32_clamp: 03599 case Intrinsic::nvvm_suld_1d_array_i32_clamp: 03600 case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: 03601 case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: 03602 case Intrinsic::nvvm_suld_2d_i32_clamp: 03603 case Intrinsic::nvvm_suld_2d_v2i32_clamp: 03604 case Intrinsic::nvvm_suld_2d_v4i32_clamp: 03605 case Intrinsic::nvvm_suld_2d_array_i32_clamp: 03606 case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: 03607 case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: 03608 case Intrinsic::nvvm_suld_3d_i32_clamp: 03609 case Intrinsic::nvvm_suld_3d_v2i32_clamp: 03610 case Intrinsic::nvvm_suld_3d_v4i32_clamp: 03611 case Intrinsic::nvvm_suld_1d_i32_trap: 03612 case Intrinsic::nvvm_suld_1d_v2i32_trap: 03613 case Intrinsic::nvvm_suld_1d_v4i32_trap: 03614 case Intrinsic::nvvm_suld_1d_array_i32_trap: 03615 case Intrinsic::nvvm_suld_1d_array_v2i32_trap: 03616 case Intrinsic::nvvm_suld_1d_array_v4i32_trap: 03617 case Intrinsic::nvvm_suld_2d_i32_trap: 03618 case Intrinsic::nvvm_suld_2d_v2i32_trap: 03619 case Intrinsic::nvvm_suld_2d_v4i32_trap: 03620 case Intrinsic::nvvm_suld_2d_array_i32_trap: 03621 case Intrinsic::nvvm_suld_2d_array_v2i32_trap: 03622 case Intrinsic::nvvm_suld_2d_array_v4i32_trap: 03623 case Intrinsic::nvvm_suld_3d_i32_trap: 03624 case Intrinsic::nvvm_suld_3d_v2i32_trap: 03625 case Intrinsic::nvvm_suld_3d_v4i32_trap: 03626 case Intrinsic::nvvm_suld_1d_i32_zero: 03627 case Intrinsic::nvvm_suld_1d_v2i32_zero: 03628 case Intrinsic::nvvm_suld_1d_v4i32_zero: 03629 case Intrinsic::nvvm_suld_1d_array_i32_zero: 03630 case Intrinsic::nvvm_suld_1d_array_v2i32_zero: 03631 case Intrinsic::nvvm_suld_1d_array_v4i32_zero: 03632 case Intrinsic::nvvm_suld_2d_i32_zero: 03633 case Intrinsic::nvvm_suld_2d_v2i32_zero: 03634 case Intrinsic::nvvm_suld_2d_v4i32_zero: 03635 case Intrinsic::nvvm_suld_2d_array_i32_zero: 03636 case Intrinsic::nvvm_suld_2d_array_v2i32_zero: 03637 case Intrinsic::nvvm_suld_2d_array_v4i32_zero: 03638 case Intrinsic::nvvm_suld_3d_i32_zero: 03639 case Intrinsic::nvvm_suld_3d_v2i32_zero: 03640 case Intrinsic::nvvm_suld_3d_v4i32_zero: { 03641 Info.opc = getOpcForSurfaceInstr(Intrinsic); 03642 Info.memVT = MVT::i32; 03643 Info.ptrVal = nullptr; 03644 Info.offset = 0; 03645 Info.vol = 0; 03646 Info.readMem = true; 03647 Info.writeMem = false; 03648 Info.align = 16; 03649 return true; 03650 } 03651 case Intrinsic::nvvm_suld_1d_i64_clamp: 03652 case Intrinsic::nvvm_suld_1d_v2i64_clamp: 03653 case Intrinsic::nvvm_suld_1d_array_i64_clamp: 03654 case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: 03655 case Intrinsic::nvvm_suld_2d_i64_clamp: 03656 case Intrinsic::nvvm_suld_2d_v2i64_clamp: 03657 case Intrinsic::nvvm_suld_2d_array_i64_clamp: 03658 case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: 03659 case Intrinsic::nvvm_suld_3d_i64_clamp: 03660 case Intrinsic::nvvm_suld_3d_v2i64_clamp: 03661 case Intrinsic::nvvm_suld_1d_i64_trap: 03662 case Intrinsic::nvvm_suld_1d_v2i64_trap: 03663 case Intrinsic::nvvm_suld_1d_array_i64_trap: 03664 case Intrinsic::nvvm_suld_1d_array_v2i64_trap: 03665 case Intrinsic::nvvm_suld_2d_i64_trap: 03666 case Intrinsic::nvvm_suld_2d_v2i64_trap: 03667 case Intrinsic::nvvm_suld_2d_array_i64_trap: 03668 case Intrinsic::nvvm_suld_2d_array_v2i64_trap: 03669 case Intrinsic::nvvm_suld_3d_i64_trap: 03670 case Intrinsic::nvvm_suld_3d_v2i64_trap: 03671 case Intrinsic::nvvm_suld_1d_i64_zero: 03672 case Intrinsic::nvvm_suld_1d_v2i64_zero: 03673 case Intrinsic::nvvm_suld_1d_array_i64_zero: 03674 case Intrinsic::nvvm_suld_1d_array_v2i64_zero: 03675 case Intrinsic::nvvm_suld_2d_i64_zero: 03676 case Intrinsic::nvvm_suld_2d_v2i64_zero: 03677 case Intrinsic::nvvm_suld_2d_array_i64_zero: 03678 case Intrinsic::nvvm_suld_2d_array_v2i64_zero: 03679 case Intrinsic::nvvm_suld_3d_i64_zero: 03680 case Intrinsic::nvvm_suld_3d_v2i64_zero: { 03681 Info.opc = getOpcForSurfaceInstr(Intrinsic); 03682 Info.memVT = MVT::i64; 03683 Info.ptrVal = nullptr; 03684 Info.offset = 0; 03685 Info.vol = 0; 03686 Info.readMem = true; 03687 Info.writeMem = false; 03688 Info.align = 16; 03689 return true; 03690 } 03691 } 03692 return false; 03693 } 03694 03695 /// isLegalAddressingMode - Return true if the addressing mode represented 03696 /// by AM is legal for this target, for a load/store of the specified type. 03697 /// Used to guide target specific optimizations, like loop strength reduction 03698 /// (LoopStrengthReduce.cpp) and memory optimization for address mode 03699 /// (CodeGenPrepare.cpp) 03700 bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, 03701 Type *Ty) const { 03702 03703 // AddrMode - This represents an addressing mode of: 03704 // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg 03705 // 03706 // The legal address modes are 03707 // - [avar] 03708 // - [areg] 03709 // - [areg+immoff] 03710 // - [immAddr] 03711 03712 if (AM.BaseGV) { 03713 if (AM.BaseOffs || AM.HasBaseReg || AM.Scale) 03714 return false; 03715 return true; 03716 } 03717 03718 switch (AM.Scale) { 03719 case 0: // "r", "r+i" or "i" is allowed 03720 break; 03721 case 1: 03722 if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. 03723 return false; 03724 // Otherwise we have r+i. 03725 break; 03726 default: 03727 // No scale > 1 is allowed 03728 return false; 03729 } 03730 return true; 03731 } 03732 03733 //===----------------------------------------------------------------------===// 03734 // NVPTX Inline Assembly Support 03735 //===----------------------------------------------------------------------===// 03736 03737 /// getConstraintType - Given a constraint letter, return the type of 03738 /// constraint it is for this target. 03739 NVPTXTargetLowering::ConstraintType 03740 NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { 03741 if (Constraint.size() == 1) { 03742 switch (Constraint[0]) { 03743 default: 03744 break; 03745 case 'b': 03746 case 'r': 03747 case 'h': 03748 case 'c': 03749 case 'l': 03750 case 'f': 03751 case 'd': 03752 case '0': 03753 case 'N': 03754 return C_RegisterClass; 03755 } 03756 } 03757 return TargetLowering::getConstraintType(Constraint); 03758 } 03759 03760 std::pair<unsigned, const TargetRegisterClass *> 03761 NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 03762 MVT VT) const { 03763 if (Constraint.size() == 1) { 03764 switch (Constraint[0]) { 03765 case 'b': 03766 return std::make_pair(0U, &NVPTX::Int1RegsRegClass); 03767 case 'c': 03768 return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 03769 case 'h': 03770 return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 03771 case 'r': 03772 return std::make_pair(0U, &NVPTX::Int32RegsRegClass); 03773 case 'l': 03774 case 'N': 03775 return std::make_pair(0U, &NVPTX::Int64RegsRegClass); 03776 case 'f': 03777 return std::make_pair(0U, &NVPTX::Float32RegsRegClass); 03778 case 'd': 03779 return std::make_pair(0U, &NVPTX::Float64RegsRegClass); 03780 } 03781 } 03782 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 03783 } 03784 03785 /// getFunctionAlignment - Return the Log2 alignment of this function. 03786 unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { 03787 return 4; 03788 } 03789 03790 //===----------------------------------------------------------------------===// 03791 // NVPTX DAG Combining 03792 //===----------------------------------------------------------------------===// 03793 03794 bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, 03795 CodeGenOpt::Level OptLevel) const { 03796 const Function *F = MF.getFunction(); 03797 const TargetOptions &TO = MF.getTarget().Options; 03798 03799 // Always honor command-line argument 03800 if (FMAContractLevelOpt.getNumOccurrences() > 0) { 03801 return FMAContractLevelOpt > 0; 03802 } else if (OptLevel == 0) { 03803 // Do not contract if we're not optimizing the code 03804 return false; 03805 } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) { 03806 // Honor TargetOptions flags that explicitly say fusion is okay 03807 return true; 03808 } else if (F->hasFnAttribute("unsafe-fp-math")) { 03809 // Check for unsafe-fp-math=true coming from Clang 03810 Attribute Attr = F->getFnAttribute("unsafe-fp-math"); 03811 StringRef Val = Attr.getValueAsString(); 03812 if (Val == "true") 03813 return true; 03814 } 03815 03816 // We did not have a clear indication that fusion is allowed, so assume not 03817 return false; 03818 } 03819 03820 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 03821 /// operands N0 and N1. This is a helper for PerformADDCombine that is 03822 /// called with the default operands, and if that fails, with commuted 03823 /// operands. 03824 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 03825 TargetLowering::DAGCombinerInfo &DCI, 03826 const NVPTXSubtarget &Subtarget, 03827 CodeGenOpt::Level OptLevel) { 03828 SelectionDAG &DAG = DCI.DAG; 03829 // Skip non-integer, non-scalar case 03830 EVT VT=N0.getValueType(); 03831 if (VT.isVector()) 03832 return SDValue(); 03833 03834 // fold (add (mul a, b), c) -> (mad a, b, c) 03835 // 03836 if (N0.getOpcode() == ISD::MUL) { 03837 assert (VT.isInteger()); 03838 // For integer: 03839 // Since integer multiply-add costs the same as integer multiply 03840 // but is more costly than integer add, do the fusion only when 03841 // the mul is only used in the add. 03842 if (OptLevel==CodeGenOpt::None || VT != MVT::i32 || 03843 !N0.getNode()->hasOneUse()) 03844 return SDValue(); 03845 03846 // Do the folding 03847 return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, 03848 N0.getOperand(0), N0.getOperand(1), N1); 03849 } 03850 else if (N0.getOpcode() == ISD::FMUL) { 03851 if (VT == MVT::f32 || VT == MVT::f64) { 03852 const auto *TLI = static_cast<const NVPTXTargetLowering *>( 03853 &DAG.getTargetLoweringInfo()); 03854 if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel)) 03855 return SDValue(); 03856 03857 // For floating point: 03858 // Do the fusion only when the mul has less than 5 uses and all 03859 // are add. 03860 // The heuristic is that if a use is not an add, then that use 03861 // cannot be fused into fma, therefore mul is still needed anyway. 03862 // If there are more than 4 uses, even if they are all add, fusing 03863 // them will increase register pressue. 03864 // 03865 int numUses = 0; 03866 int nonAddCount = 0; 03867 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 03868 UE = N0.getNode()->use_end(); 03869 UI != UE; ++UI) { 03870 numUses++; 03871 SDNode *User = *UI; 03872 if (User->getOpcode() != ISD::FADD) 03873 ++nonAddCount; 03874 } 03875 if (numUses >= 5) 03876 return SDValue(); 03877 if (nonAddCount) { 03878 int orderNo = N->getIROrder(); 03879 int orderNo2 = N0.getNode()->getIROrder(); 03880 // simple heuristics here for considering potential register 03881 // pressure, the logics here is that the differnce are used 03882 // to measure the distance between def and use, the longer distance 03883 // more likely cause register pressure. 03884 if (orderNo - orderNo2 < 500) 03885 return SDValue(); 03886 03887 // Now, check if at least one of the FMUL's operands is live beyond the node N, 03888 // which guarantees that the FMA will not increase register pressure at node N. 03889 bool opIsLive = false; 03890 const SDNode *left = N0.getOperand(0).getNode(); 03891 const SDNode *right = N0.getOperand(1).getNode(); 03892 03893 if (dyn_cast<ConstantSDNode>(left) || dyn_cast<ConstantSDNode>(right)) 03894 opIsLive = true; 03895 03896 if (!opIsLive) 03897 for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) { 03898 SDNode *User = *UI; 03899 int orderNo3 = User->getIROrder(); 03900 if (orderNo3 > orderNo) { 03901 opIsLive = true; 03902 break; 03903 } 03904 } 03905 03906 if (!opIsLive) 03907 for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) { 03908 SDNode *User = *UI; 03909 int orderNo3 = User->getIROrder(); 03910 if (orderNo3 > orderNo) { 03911 opIsLive = true; 03912 break; 03913 } 03914 } 03915 03916 if (!opIsLive) 03917 return SDValue(); 03918 } 03919 03920 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 03921 N0.getOperand(0), N0.getOperand(1), N1); 03922 } 03923 } 03924 03925 return SDValue(); 03926 } 03927 03928 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 03929 /// 03930 static SDValue PerformADDCombine(SDNode *N, 03931 TargetLowering::DAGCombinerInfo &DCI, 03932 const NVPTXSubtarget &Subtarget, 03933 CodeGenOpt::Level OptLevel) { 03934 SDValue N0 = N->getOperand(0); 03935 SDValue N1 = N->getOperand(1); 03936 03937 // First try with the default operand order. 03938 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, 03939 OptLevel); 03940 if (Result.getNode()) 03941 return Result; 03942 03943 // If that didn't work, try again with the operands commuted. 03944 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel); 03945 } 03946 03947 static SDValue PerformANDCombine(SDNode *N, 03948 TargetLowering::DAGCombinerInfo &DCI) { 03949 // The type legalizer turns a vector load of i8 values into a zextload to i16 03950 // registers, optionally ANY_EXTENDs it (if target type is integer), 03951 // and ANDs off the high 8 bits. Since we turn this load into a 03952 // target-specific DAG node, the DAG combiner fails to eliminate these AND 03953 // nodes. Do that here. 03954 SDValue Val = N->getOperand(0); 03955 SDValue Mask = N->getOperand(1); 03956 03957 if (isa<ConstantSDNode>(Val)) { 03958 std::swap(Val, Mask); 03959 } 03960 03961 SDValue AExt; 03962 // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and 03963 if (Val.getOpcode() == ISD::ANY_EXTEND) { 03964 AExt = Val; 03965 Val = Val->getOperand(0); 03966 } 03967 03968 if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) { 03969 Val = Val->getOperand(0); 03970 } 03971 03972 if (Val->getOpcode() == NVPTXISD::LoadV2 || 03973 Val->getOpcode() == NVPTXISD::LoadV4) { 03974 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask); 03975 if (!MaskCnst) { 03976 // Not an AND with a constant 03977 return SDValue(); 03978 } 03979 03980 uint64_t MaskVal = MaskCnst->getZExtValue(); 03981 if (MaskVal != 0xff) { 03982 // Not an AND that chops off top 8 bits 03983 return SDValue(); 03984 } 03985 03986 MemSDNode *Mem = dyn_cast<MemSDNode>(Val); 03987 if (!Mem) { 03988 // Not a MemSDNode?!? 03989 return SDValue(); 03990 } 03991 03992 EVT MemVT = Mem->getMemoryVT(); 03993 if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) { 03994 // We only handle the i8 case 03995 return SDValue(); 03996 } 03997 03998 unsigned ExtType = 03999 cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))-> 04000 getZExtValue(); 04001 if (ExtType == ISD::SEXTLOAD) { 04002 // If for some reason the load is a sextload, the and is needed to zero 04003 // out the high 8 bits 04004 return SDValue(); 04005 } 04006 04007 bool AddTo = false; 04008 if (AExt.getNode() != 0) { 04009 // Re-insert the ext as a zext. 04010 Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 04011 AExt.getValueType(), Val); 04012 AddTo = true; 04013 } 04014 04015 // If we get here, the AND is unnecessary. Just replace it with the load 04016 DCI.CombineTo(N, Val, AddTo); 04017 } 04018 04019 return SDValue(); 04020 } 04021 04022 enum OperandSignedness { 04023 Signed = 0, 04024 Unsigned, 04025 Unknown 04026 }; 04027 04028 /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand 04029 /// that can be demoted to \p OptSize bits without loss of information. The 04030 /// signedness of the operand, if determinable, is placed in \p S. 04031 static bool IsMulWideOperandDemotable(SDValue Op, 04032 unsigned OptSize, 04033 OperandSignedness &S) { 04034 S = Unknown; 04035 04036 if (Op.getOpcode() == ISD::SIGN_EXTEND || 04037 Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { 04038 EVT OrigVT = Op.getOperand(0).getValueType(); 04039 if (OrigVT.getSizeInBits() <= OptSize) { 04040 S = Signed; 04041 return true; 04042 } 04043 } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { 04044 EVT OrigVT = Op.getOperand(0).getValueType(); 04045 if (OrigVT.getSizeInBits() <= OptSize) { 04046 S = Unsigned; 04047 return true; 04048 } 04049 } 04050 04051 return false; 04052 } 04053 04054 /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can 04055 /// be demoted to \p OptSize bits without loss of information. If the operands 04056 /// contain a constant, it should appear as the RHS operand. The signedness of 04057 /// the operands is placed in \p IsSigned. 04058 static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, 04059 unsigned OptSize, 04060 bool &IsSigned) { 04061 04062 OperandSignedness LHSSign; 04063 04064 // The LHS operand must be a demotable op 04065 if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign)) 04066 return false; 04067 04068 // We should have been able to determine the signedness from the LHS 04069 if (LHSSign == Unknown) 04070 return false; 04071 04072 IsSigned = (LHSSign == Signed); 04073 04074 // The RHS can be a demotable op or a constant 04075 if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) { 04076 APInt Val = CI->getAPIntValue(); 04077 if (LHSSign == Unsigned) { 04078 if (Val.isIntN(OptSize)) { 04079 return true; 04080 } 04081 return false; 04082 } else { 04083 if (Val.isSignedIntN(OptSize)) { 04084 return true; 04085 } 04086 return false; 04087 } 04088 } else { 04089 OperandSignedness RHSSign; 04090 if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) 04091 return false; 04092 04093 if (LHSSign != RHSSign) 04094 return false; 04095 04096 return true; 04097 } 04098 } 04099 04100 /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply 04101 /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform 04102 /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift 04103 /// amount. 04104 static SDValue TryMULWIDECombine(SDNode *N, 04105 TargetLowering::DAGCombinerInfo &DCI) { 04106 EVT MulType = N->getValueType(0); 04107 if (MulType != MVT::i32 && MulType != MVT::i64) { 04108 return SDValue(); 04109 } 04110 04111 unsigned OptSize = MulType.getSizeInBits() >> 1; 04112 SDValue LHS = N->getOperand(0); 04113 SDValue RHS = N->getOperand(1); 04114 04115 // Canonicalize the multiply so the constant (if any) is on the right 04116 if (N->getOpcode() == ISD::MUL) { 04117 if (isa<ConstantSDNode>(LHS)) { 04118 std::swap(LHS, RHS); 04119 } 04120 } 04121 04122 // If we have a SHL, determine the actual multiply amount 04123 if (N->getOpcode() == ISD::SHL) { 04124 ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS); 04125 if (!ShlRHS) { 04126 return SDValue(); 04127 } 04128 04129 APInt ShiftAmt = ShlRHS->getAPIntValue(); 04130 unsigned BitWidth = MulType.getSizeInBits(); 04131 if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) { 04132 APInt MulVal = APInt(BitWidth, 1) << ShiftAmt; 04133 RHS = DCI.DAG.getConstant(MulVal, MulType); 04134 } else { 04135 return SDValue(); 04136 } 04137 } 04138 04139 bool Signed; 04140 // Verify that our operands are demotable 04141 if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) { 04142 return SDValue(); 04143 } 04144 04145 EVT DemotedVT; 04146 if (MulType == MVT::i32) { 04147 DemotedVT = MVT::i16; 04148 } else { 04149 DemotedVT = MVT::i32; 04150 } 04151 04152 // Truncate the operands to the correct size. Note that these are just for 04153 // type consistency and will (likely) be eliminated in later phases. 04154 SDValue TruncLHS = 04155 DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, LHS); 04156 SDValue TruncRHS = 04157 DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, RHS); 04158 04159 unsigned Opc; 04160 if (Signed) { 04161 Opc = NVPTXISD::MUL_WIDE_SIGNED; 04162 } else { 04163 Opc = NVPTXISD::MUL_WIDE_UNSIGNED; 04164 } 04165 04166 return DCI.DAG.getNode(Opc, SDLoc(N), MulType, TruncLHS, TruncRHS); 04167 } 04168 04169 /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. 04170 static SDValue PerformMULCombine(SDNode *N, 04171 TargetLowering::DAGCombinerInfo &DCI, 04172 CodeGenOpt::Level OptLevel) { 04173 if (OptLevel > 0) { 04174 // Try mul.wide combining at OptLevel > 0 04175 SDValue Ret = TryMULWIDECombine(N, DCI); 04176 if (Ret.getNode()) 04177 return Ret; 04178 } 04179 04180 return SDValue(); 04181 } 04182 04183 /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes. 04184 static SDValue PerformSHLCombine(SDNode *N, 04185 TargetLowering::DAGCombinerInfo &DCI, 04186 CodeGenOpt::Level OptLevel) { 04187 if (OptLevel > 0) { 04188 // Try mul.wide combining at OptLevel > 0 04189 SDValue Ret = TryMULWIDECombine(N, DCI); 04190 if (Ret.getNode()) 04191 return Ret; 04192 } 04193 04194 return SDValue(); 04195 } 04196 04197 SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, 04198 DAGCombinerInfo &DCI) const { 04199 CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel(); 04200 switch (N->getOpcode()) { 04201 default: break; 04202 case ISD::ADD: 04203 case ISD::FADD: 04204 return PerformADDCombine(N, DCI, nvptxSubtarget, OptLevel); 04205 case ISD::MUL: 04206 return PerformMULCombine(N, DCI, OptLevel); 04207 case ISD::SHL: 04208 return PerformSHLCombine(N, DCI, OptLevel); 04209 case ISD::AND: 04210 return PerformANDCombine(N, DCI); 04211 } 04212 return SDValue(); 04213 } 04214 04215 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. 04216 static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, 04217 const DataLayout *TD, 04218 SmallVectorImpl<SDValue> &Results) { 04219 EVT ResVT = N->getValueType(0); 04220 SDLoc DL(N); 04221 04222 assert(ResVT.isVector() && "Vector load must have vector type"); 04223 04224 // We only handle "native" vector sizes for now, e.g. <4 x double> is not 04225 // legal. We can (and should) split that into 2 loads of <2 x double> here 04226 // but I'm leaving that as a TODO for now. 04227 assert(ResVT.isSimple() && "Can only handle simple types"); 04228 switch (ResVT.getSimpleVT().SimpleTy) { 04229 default: 04230 return; 04231 case MVT::v2i8: 04232 case MVT::v2i16: 04233 case MVT::v2i32: 04234 case MVT::v2i64: 04235 case MVT::v2f32: 04236 case MVT::v2f64: 04237 case MVT::v4i8: 04238 case MVT::v4i16: 04239 case MVT::v4i32: 04240 case MVT::v4f32: 04241 // This is a "native" vector type 04242 break; 04243 } 04244 04245 LoadSDNode *LD = cast<LoadSDNode>(N); 04246 04247 unsigned Align = LD->getAlignment(); 04248 unsigned PrefAlign = 04249 TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); 04250 if (Align < PrefAlign) { 04251 // This load is not sufficiently aligned, so bail out and let this vector 04252 // load be scalarized. Note that we may still be able to emit smaller 04253 // vector loads. For example, if we are loading a <4 x float> with an 04254 // alignment of 8, this check will fail but the legalizer will try again 04255 // with 2 x <2 x float>, which will succeed with an alignment of 8. 04256 return; 04257 } 04258 04259 EVT EltVT = ResVT.getVectorElementType(); 04260 unsigned NumElts = ResVT.getVectorNumElements(); 04261 04262 // Since LoadV2 is a target node, we cannot rely on DAG type legalization. 04263 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 04264 // loaded type to i16 and propagate the "real" type as the memory type. 04265 bool NeedTrunc = false; 04266 if (EltVT.getSizeInBits() < 16) { 04267 EltVT = MVT::i16; 04268 NeedTrunc = true; 04269 } 04270 04271 unsigned Opcode = 0; 04272 SDVTList LdResVTs; 04273 04274 switch (NumElts) { 04275 default: 04276 return; 04277 case 2: 04278 Opcode = NVPTXISD::LoadV2; 04279 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 04280 break; 04281 case 4: { 04282 Opcode = NVPTXISD::LoadV4; 04283 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 04284 LdResVTs = DAG.getVTList(ListVTs); 04285 break; 04286 } 04287 } 04288 04289 SmallVector<SDValue, 8> OtherOps; 04290 04291 // Copy regular operands 04292 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 04293 OtherOps.push_back(N->getOperand(i)); 04294 04295 // The select routine does not have access to the LoadSDNode instance, so 04296 // pass along the extension information 04297 OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); 04298 04299 SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, 04300 LD->getMemoryVT(), 04301 LD->getMemOperand()); 04302 04303 SmallVector<SDValue, 4> ScalarRes; 04304 04305 for (unsigned i = 0; i < NumElts; ++i) { 04306 SDValue Res = NewLD.getValue(i); 04307 if (NeedTrunc) 04308 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 04309 ScalarRes.push_back(Res); 04310 } 04311 04312 SDValue LoadChain = NewLD.getValue(NumElts); 04313 04314 SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes); 04315 04316 Results.push_back(BuildVec); 04317 Results.push_back(LoadChain); 04318 } 04319 04320 static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, 04321 SmallVectorImpl<SDValue> &Results) { 04322 SDValue Chain = N->getOperand(0); 04323 SDValue Intrin = N->getOperand(1); 04324 SDLoc DL(N); 04325 04326 // Get the intrinsic ID 04327 unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); 04328 switch (IntrinNo) { 04329 default: 04330 return; 04331 case Intrinsic::nvvm_ldg_global_i: 04332 case Intrinsic::nvvm_ldg_global_f: 04333 case Intrinsic::nvvm_ldg_global_p: 04334 case Intrinsic::nvvm_ldu_global_i: 04335 case Intrinsic::nvvm_ldu_global_f: 04336 case Intrinsic::nvvm_ldu_global_p: { 04337 EVT ResVT = N->getValueType(0); 04338 04339 if (ResVT.isVector()) { 04340 // Vector LDG/LDU 04341 04342 unsigned NumElts = ResVT.getVectorNumElements(); 04343 EVT EltVT = ResVT.getVectorElementType(); 04344 04345 // Since LDU/LDG are target nodes, we cannot rely on DAG type 04346 // legalization. 04347 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 04348 // loaded type to i16 and propagate the "real" type as the memory type. 04349 bool NeedTrunc = false; 04350 if (EltVT.getSizeInBits() < 16) { 04351 EltVT = MVT::i16; 04352 NeedTrunc = true; 04353 } 04354 04355 unsigned Opcode = 0; 04356 SDVTList LdResVTs; 04357 04358 switch (NumElts) { 04359 default: 04360 return; 04361 case 2: 04362 switch (IntrinNo) { 04363 default: 04364 return; 04365 case Intrinsic::nvvm_ldg_global_i: 04366 case Intrinsic::nvvm_ldg_global_f: 04367 case Intrinsic::nvvm_ldg_global_p: 04368 Opcode = NVPTXISD::LDGV2; 04369 break; 04370 case Intrinsic::nvvm_ldu_global_i: 04371 case Intrinsic::nvvm_ldu_global_f: 04372 case Intrinsic::nvvm_ldu_global_p: 04373 Opcode = NVPTXISD::LDUV2; 04374 break; 04375 } 04376 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 04377 break; 04378 case 4: { 04379 switch (IntrinNo) { 04380 default: 04381 return; 04382 case Intrinsic::nvvm_ldg_global_i: 04383 case Intrinsic::nvvm_ldg_global_f: 04384 case Intrinsic::nvvm_ldg_global_p: 04385 Opcode = NVPTXISD::LDGV4; 04386 break; 04387 case Intrinsic::nvvm_ldu_global_i: 04388 case Intrinsic::nvvm_ldu_global_f: 04389 case Intrinsic::nvvm_ldu_global_p: 04390 Opcode = NVPTXISD::LDUV4; 04391 break; 04392 } 04393 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 04394 LdResVTs = DAG.getVTList(ListVTs); 04395 break; 04396 } 04397 } 04398 04399 SmallVector<SDValue, 8> OtherOps; 04400 04401 // Copy regular operands 04402 04403 OtherOps.push_back(Chain); // Chain 04404 // Skip operand 1 (intrinsic ID) 04405 // Others 04406 for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) 04407 OtherOps.push_back(N->getOperand(i)); 04408 04409 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 04410 04411 SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, 04412 MemSD->getMemoryVT(), 04413 MemSD->getMemOperand()); 04414 04415 SmallVector<SDValue, 4> ScalarRes; 04416 04417 for (unsigned i = 0; i < NumElts; ++i) { 04418 SDValue Res = NewLD.getValue(i); 04419 if (NeedTrunc) 04420 Res = 04421 DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 04422 ScalarRes.push_back(Res); 04423 } 04424 04425 SDValue LoadChain = NewLD.getValue(NumElts); 04426 04427 SDValue BuildVec = 04428 DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes); 04429 04430 Results.push_back(BuildVec); 04431 Results.push_back(LoadChain); 04432 } else { 04433 // i8 LDG/LDU 04434 assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && 04435 "Custom handling of non-i8 ldu/ldg?"); 04436 04437 // Just copy all operands as-is 04438 SmallVector<SDValue, 4> Ops; 04439 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 04440 Ops.push_back(N->getOperand(i)); 04441 04442 // Force output to i16 04443 SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); 04444 04445 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 04446 04447 // We make sure the memory type is i8, which will be used during isel 04448 // to select the proper instruction. 04449 SDValue NewLD = 04450 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops, 04451 MVT::i8, MemSD->getMemOperand()); 04452 04453 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, 04454 NewLD.getValue(0))); 04455 Results.push_back(NewLD.getValue(1)); 04456 } 04457 } 04458 } 04459 } 04460 04461 void NVPTXTargetLowering::ReplaceNodeResults( 04462 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 04463 switch (N->getOpcode()) { 04464 default: 04465 report_fatal_error("Unhandled custom legalization"); 04466 case ISD::LOAD: 04467 ReplaceLoadVector(N, DAG, getDataLayout(), Results); 04468 return; 04469 case ISD::INTRINSIC_W_CHAIN: 04470 ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); 04471 return; 04472 } 04473 } 04474 04475 // Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file. 04476 void NVPTXSection::anchor() {} 04477 04478 NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { 04479 delete TextSection; 04480 delete DataSection; 04481 delete BSSSection; 04482 delete ReadOnlySection; 04483 04484 delete StaticCtorSection; 04485 delete StaticDtorSection; 04486 delete LSDASection; 04487 delete EHFrameSection; 04488 delete DwarfAbbrevSection; 04489 delete DwarfInfoSection; 04490 delete DwarfLineSection; 04491 delete DwarfFrameSection; 04492 delete DwarfPubTypesSection; 04493 delete DwarfDebugInlineSection; 04494 delete DwarfStrSection; 04495 delete DwarfLocSection; 04496 delete DwarfARangesSection; 04497 delete DwarfRangesSection; 04498 delete DwarfMacroInfoSection; 04499 }