LLVM API Documentation
00001 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines an instruction selector for the NVPTX target. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "NVPTXISelDAGToDAG.h" 00015 #include "llvm/IR/GlobalValue.h" 00016 #include "llvm/IR/Instructions.h" 00017 #include "llvm/Support/CommandLine.h" 00018 #include "llvm/Support/Debug.h" 00019 #include "llvm/Support/ErrorHandling.h" 00020 #include "llvm/Support/raw_ostream.h" 00021 #include "llvm/Target/TargetIntrinsicInfo.h" 00022 00023 using namespace llvm; 00024 00025 #define DEBUG_TYPE "nvptx-isel" 00026 00027 static cl::opt<int> UsePrecDivF32( 00028 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, 00029 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" 00030 " IEEE Compliant F32 div.rnd if available."), 00031 cl::init(2)); 00032 00033 static cl::opt<bool> 00034 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden, 00035 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), 00036 cl::init(true)); 00037 00038 static cl::opt<bool> 00039 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, 00040 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), 00041 cl::init(false)); 00042 00043 00044 /// createNVPTXISelDag - This pass converts a legalized DAG into a 00045 /// NVPTX-specific DAG, ready for instruction scheduling. 00046 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, 00047 llvm::CodeGenOpt::Level OptLevel) { 00048 return new NVPTXDAGToDAGISel(TM, OptLevel); 00049 } 00050 00051 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, 00052 CodeGenOpt::Level OptLevel) 00053 : SelectionDAGISel(tm, OptLevel), 00054 Subtarget(tm.getSubtarget<NVPTXSubtarget>()) { 00055 doMulWide = (OptLevel > 0); 00056 } 00057 00058 int NVPTXDAGToDAGISel::getDivF32Level() const { 00059 if (UsePrecDivF32.getNumOccurrences() > 0) { 00060 // If nvptx-prec-div32=N is used on the command-line, always honor it 00061 return UsePrecDivF32; 00062 } else { 00063 // Otherwise, use div.approx if fast math is enabled 00064 if (TM.Options.UnsafeFPMath) 00065 return 0; 00066 else 00067 return 2; 00068 } 00069 } 00070 00071 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const { 00072 if (UsePrecSqrtF32.getNumOccurrences() > 0) { 00073 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it 00074 return UsePrecSqrtF32; 00075 } else { 00076 // Otherwise, use sqrt.approx if fast math is enabled 00077 if (TM.Options.UnsafeFPMath) 00078 return false; 00079 else 00080 return true; 00081 } 00082 } 00083 00084 bool NVPTXDAGToDAGISel::useF32FTZ() const { 00085 if (FtzEnabled.getNumOccurrences() > 0) { 00086 // If nvptx-f32ftz is used on the command-line, always honor it 00087 return FtzEnabled; 00088 } else { 00089 const Function *F = MF->getFunction(); 00090 // Otherwise, check for an nvptx-f32ftz attribute on the function 00091 if (F->hasFnAttribute("nvptx-f32ftz")) 00092 return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex, 00093 "nvptx-f32ftz") 00094 .getValueAsString() == "true"); 00095 else 00096 return false; 00097 } 00098 } 00099 00100 bool NVPTXDAGToDAGISel::allowFMA() const { 00101 const NVPTXTargetLowering *TL = Subtarget.getTargetLowering(); 00102 return TL->allowFMA(*MF, OptLevel); 00103 } 00104 00105 /// Select - Select instructions not customized! Used for 00106 /// expanded, promoted and normal instructions. 00107 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { 00108 00109 if (N->isMachineOpcode()) { 00110 N->setNodeId(-1); 00111 return nullptr; // Already selected. 00112 } 00113 00114 SDNode *ResNode = nullptr; 00115 switch (N->getOpcode()) { 00116 case ISD::LOAD: 00117 ResNode = SelectLoad(N); 00118 break; 00119 case ISD::STORE: 00120 ResNode = SelectStore(N); 00121 break; 00122 case NVPTXISD::LoadV2: 00123 case NVPTXISD::LoadV4: 00124 ResNode = SelectLoadVector(N); 00125 break; 00126 case NVPTXISD::LDGV2: 00127 case NVPTXISD::LDGV4: 00128 case NVPTXISD::LDUV2: 00129 case NVPTXISD::LDUV4: 00130 ResNode = SelectLDGLDU(N); 00131 break; 00132 case NVPTXISD::StoreV2: 00133 case NVPTXISD::StoreV4: 00134 ResNode = SelectStoreVector(N); 00135 break; 00136 case NVPTXISD::LoadParam: 00137 case NVPTXISD::LoadParamV2: 00138 case NVPTXISD::LoadParamV4: 00139 ResNode = SelectLoadParam(N); 00140 break; 00141 case NVPTXISD::StoreRetval: 00142 case NVPTXISD::StoreRetvalV2: 00143 case NVPTXISD::StoreRetvalV4: 00144 ResNode = SelectStoreRetval(N); 00145 break; 00146 case NVPTXISD::StoreParam: 00147 case NVPTXISD::StoreParamV2: 00148 case NVPTXISD::StoreParamV4: 00149 case NVPTXISD::StoreParamS32: 00150 case NVPTXISD::StoreParamU32: 00151 ResNode = SelectStoreParam(N); 00152 break; 00153 case ISD::INTRINSIC_WO_CHAIN: 00154 ResNode = SelectIntrinsicNoChain(N); 00155 break; 00156 case ISD::INTRINSIC_W_CHAIN: 00157 ResNode = SelectIntrinsicChain(N); 00158 break; 00159 case NVPTXISD::Tex1DFloatS32: 00160 case NVPTXISD::Tex1DFloatFloat: 00161 case NVPTXISD::Tex1DFloatFloatLevel: 00162 case NVPTXISD::Tex1DFloatFloatGrad: 00163 case NVPTXISD::Tex1DS32S32: 00164 case NVPTXISD::Tex1DS32Float: 00165 case NVPTXISD::Tex1DS32FloatLevel: 00166 case NVPTXISD::Tex1DS32FloatGrad: 00167 case NVPTXISD::Tex1DU32S32: 00168 case NVPTXISD::Tex1DU32Float: 00169 case NVPTXISD::Tex1DU32FloatLevel: 00170 case NVPTXISD::Tex1DU32FloatGrad: 00171 case NVPTXISD::Tex1DArrayFloatS32: 00172 case NVPTXISD::Tex1DArrayFloatFloat: 00173 case NVPTXISD::Tex1DArrayFloatFloatLevel: 00174 case NVPTXISD::Tex1DArrayFloatFloatGrad: 00175 case NVPTXISD::Tex1DArrayS32S32: 00176 case NVPTXISD::Tex1DArrayS32Float: 00177 case NVPTXISD::Tex1DArrayS32FloatLevel: 00178 case NVPTXISD::Tex1DArrayS32FloatGrad: 00179 case NVPTXISD::Tex1DArrayU32S32: 00180 case NVPTXISD::Tex1DArrayU32Float: 00181 case NVPTXISD::Tex1DArrayU32FloatLevel: 00182 case NVPTXISD::Tex1DArrayU32FloatGrad: 00183 case NVPTXISD::Tex2DFloatS32: 00184 case NVPTXISD::Tex2DFloatFloat: 00185 case NVPTXISD::Tex2DFloatFloatLevel: 00186 case NVPTXISD::Tex2DFloatFloatGrad: 00187 case NVPTXISD::Tex2DS32S32: 00188 case NVPTXISD::Tex2DS32Float: 00189 case NVPTXISD::Tex2DS32FloatLevel: 00190 case NVPTXISD::Tex2DS32FloatGrad: 00191 case NVPTXISD::Tex2DU32S32: 00192 case NVPTXISD::Tex2DU32Float: 00193 case NVPTXISD::Tex2DU32FloatLevel: 00194 case NVPTXISD::Tex2DU32FloatGrad: 00195 case NVPTXISD::Tex2DArrayFloatS32: 00196 case NVPTXISD::Tex2DArrayFloatFloat: 00197 case NVPTXISD::Tex2DArrayFloatFloatLevel: 00198 case NVPTXISD::Tex2DArrayFloatFloatGrad: 00199 case NVPTXISD::Tex2DArrayS32S32: 00200 case NVPTXISD::Tex2DArrayS32Float: 00201 case NVPTXISD::Tex2DArrayS32FloatLevel: 00202 case NVPTXISD::Tex2DArrayS32FloatGrad: 00203 case NVPTXISD::Tex2DArrayU32S32: 00204 case NVPTXISD::Tex2DArrayU32Float: 00205 case NVPTXISD::Tex2DArrayU32FloatLevel: 00206 case NVPTXISD::Tex2DArrayU32FloatGrad: 00207 case NVPTXISD::Tex3DFloatS32: 00208 case NVPTXISD::Tex3DFloatFloat: 00209 case NVPTXISD::Tex3DFloatFloatLevel: 00210 case NVPTXISD::Tex3DFloatFloatGrad: 00211 case NVPTXISD::Tex3DS32S32: 00212 case NVPTXISD::Tex3DS32Float: 00213 case NVPTXISD::Tex3DS32FloatLevel: 00214 case NVPTXISD::Tex3DS32FloatGrad: 00215 case NVPTXISD::Tex3DU32S32: 00216 case NVPTXISD::Tex3DU32Float: 00217 case NVPTXISD::Tex3DU32FloatLevel: 00218 case NVPTXISD::Tex3DU32FloatGrad: 00219 case NVPTXISD::TexCubeFloatFloat: 00220 case NVPTXISD::TexCubeFloatFloatLevel: 00221 case NVPTXISD::TexCubeS32Float: 00222 case NVPTXISD::TexCubeS32FloatLevel: 00223 case NVPTXISD::TexCubeU32Float: 00224 case NVPTXISD::TexCubeU32FloatLevel: 00225 case NVPTXISD::TexCubeArrayFloatFloat: 00226 case NVPTXISD::TexCubeArrayFloatFloatLevel: 00227 case NVPTXISD::TexCubeArrayS32Float: 00228 case NVPTXISD::TexCubeArrayS32FloatLevel: 00229 case NVPTXISD::TexCubeArrayU32Float: 00230 case NVPTXISD::TexCubeArrayU32FloatLevel: 00231 case NVPTXISD::Tld4R2DFloatFloat: 00232 case NVPTXISD::Tld4G2DFloatFloat: 00233 case NVPTXISD::Tld4B2DFloatFloat: 00234 case NVPTXISD::Tld4A2DFloatFloat: 00235 case NVPTXISD::Tld4R2DS64Float: 00236 case NVPTXISD::Tld4G2DS64Float: 00237 case NVPTXISD::Tld4B2DS64Float: 00238 case NVPTXISD::Tld4A2DS64Float: 00239 case NVPTXISD::Tld4R2DU64Float: 00240 case NVPTXISD::Tld4G2DU64Float: 00241 case NVPTXISD::Tld4B2DU64Float: 00242 case NVPTXISD::Tld4A2DU64Float: 00243 case NVPTXISD::TexUnified1DFloatS32: 00244 case NVPTXISD::TexUnified1DFloatFloat: 00245 case NVPTXISD::TexUnified1DFloatFloatLevel: 00246 case NVPTXISD::TexUnified1DFloatFloatGrad: 00247 case NVPTXISD::TexUnified1DS32S32: 00248 case NVPTXISD::TexUnified1DS32Float: 00249 case NVPTXISD::TexUnified1DS32FloatLevel: 00250 case NVPTXISD::TexUnified1DS32FloatGrad: 00251 case NVPTXISD::TexUnified1DU32S32: 00252 case NVPTXISD::TexUnified1DU32Float: 00253 case NVPTXISD::TexUnified1DU32FloatLevel: 00254 case NVPTXISD::TexUnified1DU32FloatGrad: 00255 case NVPTXISD::TexUnified1DArrayFloatS32: 00256 case NVPTXISD::TexUnified1DArrayFloatFloat: 00257 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 00258 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 00259 case NVPTXISD::TexUnified1DArrayS32S32: 00260 case NVPTXISD::TexUnified1DArrayS32Float: 00261 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 00262 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 00263 case NVPTXISD::TexUnified1DArrayU32S32: 00264 case NVPTXISD::TexUnified1DArrayU32Float: 00265 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 00266 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 00267 case NVPTXISD::TexUnified2DFloatS32: 00268 case NVPTXISD::TexUnified2DFloatFloat: 00269 case NVPTXISD::TexUnified2DFloatFloatLevel: 00270 case NVPTXISD::TexUnified2DFloatFloatGrad: 00271 case NVPTXISD::TexUnified2DS32S32: 00272 case NVPTXISD::TexUnified2DS32Float: 00273 case NVPTXISD::TexUnified2DS32FloatLevel: 00274 case NVPTXISD::TexUnified2DS32FloatGrad: 00275 case NVPTXISD::TexUnified2DU32S32: 00276 case NVPTXISD::TexUnified2DU32Float: 00277 case NVPTXISD::TexUnified2DU32FloatLevel: 00278 case NVPTXISD::TexUnified2DU32FloatGrad: 00279 case NVPTXISD::TexUnified2DArrayFloatS32: 00280 case NVPTXISD::TexUnified2DArrayFloatFloat: 00281 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 00282 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 00283 case NVPTXISD::TexUnified2DArrayS32S32: 00284 case NVPTXISD::TexUnified2DArrayS32Float: 00285 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 00286 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 00287 case NVPTXISD::TexUnified2DArrayU32S32: 00288 case NVPTXISD::TexUnified2DArrayU32Float: 00289 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 00290 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 00291 case NVPTXISD::TexUnified3DFloatS32: 00292 case NVPTXISD::TexUnified3DFloatFloat: 00293 case NVPTXISD::TexUnified3DFloatFloatLevel: 00294 case NVPTXISD::TexUnified3DFloatFloatGrad: 00295 case NVPTXISD::TexUnified3DS32S32: 00296 case NVPTXISD::TexUnified3DS32Float: 00297 case NVPTXISD::TexUnified3DS32FloatLevel: 00298 case NVPTXISD::TexUnified3DS32FloatGrad: 00299 case NVPTXISD::TexUnified3DU32S32: 00300 case NVPTXISD::TexUnified3DU32Float: 00301 case NVPTXISD::TexUnified3DU32FloatLevel: 00302 case NVPTXISD::TexUnified3DU32FloatGrad: 00303 case NVPTXISD::TexUnifiedCubeFloatFloat: 00304 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 00305 case NVPTXISD::TexUnifiedCubeS32Float: 00306 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 00307 case NVPTXISD::TexUnifiedCubeU32Float: 00308 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 00309 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 00310 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 00311 case NVPTXISD::TexUnifiedCubeArrayS32Float: 00312 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 00313 case NVPTXISD::TexUnifiedCubeArrayU32Float: 00314 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 00315 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 00316 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 00317 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 00318 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 00319 case NVPTXISD::Tld4UnifiedR2DS64Float: 00320 case NVPTXISD::Tld4UnifiedG2DS64Float: 00321 case NVPTXISD::Tld4UnifiedB2DS64Float: 00322 case NVPTXISD::Tld4UnifiedA2DS64Float: 00323 case NVPTXISD::Tld4UnifiedR2DU64Float: 00324 case NVPTXISD::Tld4UnifiedG2DU64Float: 00325 case NVPTXISD::Tld4UnifiedB2DU64Float: 00326 case NVPTXISD::Tld4UnifiedA2DU64Float: 00327 ResNode = SelectTextureIntrinsic(N); 00328 break; 00329 case NVPTXISD::Suld1DI8Clamp: 00330 case NVPTXISD::Suld1DI16Clamp: 00331 case NVPTXISD::Suld1DI32Clamp: 00332 case NVPTXISD::Suld1DI64Clamp: 00333 case NVPTXISD::Suld1DV2I8Clamp: 00334 case NVPTXISD::Suld1DV2I16Clamp: 00335 case NVPTXISD::Suld1DV2I32Clamp: 00336 case NVPTXISD::Suld1DV2I64Clamp: 00337 case NVPTXISD::Suld1DV4I8Clamp: 00338 case NVPTXISD::Suld1DV4I16Clamp: 00339 case NVPTXISD::Suld1DV4I32Clamp: 00340 case NVPTXISD::Suld1DArrayI8Clamp: 00341 case NVPTXISD::Suld1DArrayI16Clamp: 00342 case NVPTXISD::Suld1DArrayI32Clamp: 00343 case NVPTXISD::Suld1DArrayI64Clamp: 00344 case NVPTXISD::Suld1DArrayV2I8Clamp: 00345 case NVPTXISD::Suld1DArrayV2I16Clamp: 00346 case NVPTXISD::Suld1DArrayV2I32Clamp: 00347 case NVPTXISD::Suld1DArrayV2I64Clamp: 00348 case NVPTXISD::Suld1DArrayV4I8Clamp: 00349 case NVPTXISD::Suld1DArrayV4I16Clamp: 00350 case NVPTXISD::Suld1DArrayV4I32Clamp: 00351 case NVPTXISD::Suld2DI8Clamp: 00352 case NVPTXISD::Suld2DI16Clamp: 00353 case NVPTXISD::Suld2DI32Clamp: 00354 case NVPTXISD::Suld2DI64Clamp: 00355 case NVPTXISD::Suld2DV2I8Clamp: 00356 case NVPTXISD::Suld2DV2I16Clamp: 00357 case NVPTXISD::Suld2DV2I32Clamp: 00358 case NVPTXISD::Suld2DV2I64Clamp: 00359 case NVPTXISD::Suld2DV4I8Clamp: 00360 case NVPTXISD::Suld2DV4I16Clamp: 00361 case NVPTXISD::Suld2DV4I32Clamp: 00362 case NVPTXISD::Suld2DArrayI8Clamp: 00363 case NVPTXISD::Suld2DArrayI16Clamp: 00364 case NVPTXISD::Suld2DArrayI32Clamp: 00365 case NVPTXISD::Suld2DArrayI64Clamp: 00366 case NVPTXISD::Suld2DArrayV2I8Clamp: 00367 case NVPTXISD::Suld2DArrayV2I16Clamp: 00368 case NVPTXISD::Suld2DArrayV2I32Clamp: 00369 case NVPTXISD::Suld2DArrayV2I64Clamp: 00370 case NVPTXISD::Suld2DArrayV4I8Clamp: 00371 case NVPTXISD::Suld2DArrayV4I16Clamp: 00372 case NVPTXISD::Suld2DArrayV4I32Clamp: 00373 case NVPTXISD::Suld3DI8Clamp: 00374 case NVPTXISD::Suld3DI16Clamp: 00375 case NVPTXISD::Suld3DI32Clamp: 00376 case NVPTXISD::Suld3DI64Clamp: 00377 case NVPTXISD::Suld3DV2I8Clamp: 00378 case NVPTXISD::Suld3DV2I16Clamp: 00379 case NVPTXISD::Suld3DV2I32Clamp: 00380 case NVPTXISD::Suld3DV2I64Clamp: 00381 case NVPTXISD::Suld3DV4I8Clamp: 00382 case NVPTXISD::Suld3DV4I16Clamp: 00383 case NVPTXISD::Suld3DV4I32Clamp: 00384 case NVPTXISD::Suld1DI8Trap: 00385 case NVPTXISD::Suld1DI16Trap: 00386 case NVPTXISD::Suld1DI32Trap: 00387 case NVPTXISD::Suld1DI64Trap: 00388 case NVPTXISD::Suld1DV2I8Trap: 00389 case NVPTXISD::Suld1DV2I16Trap: 00390 case NVPTXISD::Suld1DV2I32Trap: 00391 case NVPTXISD::Suld1DV2I64Trap: 00392 case NVPTXISD::Suld1DV4I8Trap: 00393 case NVPTXISD::Suld1DV4I16Trap: 00394 case NVPTXISD::Suld1DV4I32Trap: 00395 case NVPTXISD::Suld1DArrayI8Trap: 00396 case NVPTXISD::Suld1DArrayI16Trap: 00397 case NVPTXISD::Suld1DArrayI32Trap: 00398 case NVPTXISD::Suld1DArrayI64Trap: 00399 case NVPTXISD::Suld1DArrayV2I8Trap: 00400 case NVPTXISD::Suld1DArrayV2I16Trap: 00401 case NVPTXISD::Suld1DArrayV2I32Trap: 00402 case NVPTXISD::Suld1DArrayV2I64Trap: 00403 case NVPTXISD::Suld1DArrayV4I8Trap: 00404 case NVPTXISD::Suld1DArrayV4I16Trap: 00405 case NVPTXISD::Suld1DArrayV4I32Trap: 00406 case NVPTXISD::Suld2DI8Trap: 00407 case NVPTXISD::Suld2DI16Trap: 00408 case NVPTXISD::Suld2DI32Trap: 00409 case NVPTXISD::Suld2DI64Trap: 00410 case NVPTXISD::Suld2DV2I8Trap: 00411 case NVPTXISD::Suld2DV2I16Trap: 00412 case NVPTXISD::Suld2DV2I32Trap: 00413 case NVPTXISD::Suld2DV2I64Trap: 00414 case NVPTXISD::Suld2DV4I8Trap: 00415 case NVPTXISD::Suld2DV4I16Trap: 00416 case NVPTXISD::Suld2DV4I32Trap: 00417 case NVPTXISD::Suld2DArrayI8Trap: 00418 case NVPTXISD::Suld2DArrayI16Trap: 00419 case NVPTXISD::Suld2DArrayI32Trap: 00420 case NVPTXISD::Suld2DArrayI64Trap: 00421 case NVPTXISD::Suld2DArrayV2I8Trap: 00422 case NVPTXISD::Suld2DArrayV2I16Trap: 00423 case NVPTXISD::Suld2DArrayV2I32Trap: 00424 case NVPTXISD::Suld2DArrayV2I64Trap: 00425 case NVPTXISD::Suld2DArrayV4I8Trap: 00426 case NVPTXISD::Suld2DArrayV4I16Trap: 00427 case NVPTXISD::Suld2DArrayV4I32Trap: 00428 case NVPTXISD::Suld3DI8Trap: 00429 case NVPTXISD::Suld3DI16Trap: 00430 case NVPTXISD::Suld3DI32Trap: 00431 case NVPTXISD::Suld3DI64Trap: 00432 case NVPTXISD::Suld3DV2I8Trap: 00433 case NVPTXISD::Suld3DV2I16Trap: 00434 case NVPTXISD::Suld3DV2I32Trap: 00435 case NVPTXISD::Suld3DV2I64Trap: 00436 case NVPTXISD::Suld3DV4I8Trap: 00437 case NVPTXISD::Suld3DV4I16Trap: 00438 case NVPTXISD::Suld3DV4I32Trap: 00439 case NVPTXISD::Suld1DI8Zero: 00440 case NVPTXISD::Suld1DI16Zero: 00441 case NVPTXISD::Suld1DI32Zero: 00442 case NVPTXISD::Suld1DI64Zero: 00443 case NVPTXISD::Suld1DV2I8Zero: 00444 case NVPTXISD::Suld1DV2I16Zero: 00445 case NVPTXISD::Suld1DV2I32Zero: 00446 case NVPTXISD::Suld1DV2I64Zero: 00447 case NVPTXISD::Suld1DV4I8Zero: 00448 case NVPTXISD::Suld1DV4I16Zero: 00449 case NVPTXISD::Suld1DV4I32Zero: 00450 case NVPTXISD::Suld1DArrayI8Zero: 00451 case NVPTXISD::Suld1DArrayI16Zero: 00452 case NVPTXISD::Suld1DArrayI32Zero: 00453 case NVPTXISD::Suld1DArrayI64Zero: 00454 case NVPTXISD::Suld1DArrayV2I8Zero: 00455 case NVPTXISD::Suld1DArrayV2I16Zero: 00456 case NVPTXISD::Suld1DArrayV2I32Zero: 00457 case NVPTXISD::Suld1DArrayV2I64Zero: 00458 case NVPTXISD::Suld1DArrayV4I8Zero: 00459 case NVPTXISD::Suld1DArrayV4I16Zero: 00460 case NVPTXISD::Suld1DArrayV4I32Zero: 00461 case NVPTXISD::Suld2DI8Zero: 00462 case NVPTXISD::Suld2DI16Zero: 00463 case NVPTXISD::Suld2DI32Zero: 00464 case NVPTXISD::Suld2DI64Zero: 00465 case NVPTXISD::Suld2DV2I8Zero: 00466 case NVPTXISD::Suld2DV2I16Zero: 00467 case NVPTXISD::Suld2DV2I32Zero: 00468 case NVPTXISD::Suld2DV2I64Zero: 00469 case NVPTXISD::Suld2DV4I8Zero: 00470 case NVPTXISD::Suld2DV4I16Zero: 00471 case NVPTXISD::Suld2DV4I32Zero: 00472 case NVPTXISD::Suld2DArrayI8Zero: 00473 case NVPTXISD::Suld2DArrayI16Zero: 00474 case NVPTXISD::Suld2DArrayI32Zero: 00475 case NVPTXISD::Suld2DArrayI64Zero: 00476 case NVPTXISD::Suld2DArrayV2I8Zero: 00477 case NVPTXISD::Suld2DArrayV2I16Zero: 00478 case NVPTXISD::Suld2DArrayV2I32Zero: 00479 case NVPTXISD::Suld2DArrayV2I64Zero: 00480 case NVPTXISD::Suld2DArrayV4I8Zero: 00481 case NVPTXISD::Suld2DArrayV4I16Zero: 00482 case NVPTXISD::Suld2DArrayV4I32Zero: 00483 case NVPTXISD::Suld3DI8Zero: 00484 case NVPTXISD::Suld3DI16Zero: 00485 case NVPTXISD::Suld3DI32Zero: 00486 case NVPTXISD::Suld3DI64Zero: 00487 case NVPTXISD::Suld3DV2I8Zero: 00488 case NVPTXISD::Suld3DV2I16Zero: 00489 case NVPTXISD::Suld3DV2I32Zero: 00490 case NVPTXISD::Suld3DV2I64Zero: 00491 case NVPTXISD::Suld3DV4I8Zero: 00492 case NVPTXISD::Suld3DV4I16Zero: 00493 case NVPTXISD::Suld3DV4I32Zero: 00494 ResNode = SelectSurfaceIntrinsic(N); 00495 break; 00496 case ISD::AND: 00497 case ISD::SRA: 00498 case ISD::SRL: 00499 // Try to select BFE 00500 ResNode = SelectBFE(N); 00501 break; 00502 case ISD::ADDRSPACECAST: 00503 ResNode = SelectAddrSpaceCast(N); 00504 break; 00505 default: 00506 break; 00507 } 00508 if (ResNode) 00509 return ResNode; 00510 return SelectCode(N); 00511 } 00512 00513 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) { 00514 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 00515 switch (IID) { 00516 default: 00517 return NULL; 00518 case Intrinsic::nvvm_ldg_global_f: 00519 case Intrinsic::nvvm_ldg_global_i: 00520 case Intrinsic::nvvm_ldg_global_p: 00521 case Intrinsic::nvvm_ldu_global_f: 00522 case Intrinsic::nvvm_ldu_global_i: 00523 case Intrinsic::nvvm_ldu_global_p: 00524 return SelectLDGLDU(N); 00525 } 00526 } 00527 00528 static unsigned int getCodeAddrSpace(MemSDNode *N, 00529 const NVPTXSubtarget &Subtarget) { 00530 const Value *Src = N->getMemOperand()->getValue(); 00531 00532 if (!Src) 00533 return NVPTX::PTXLdStInstCode::GENERIC; 00534 00535 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) { 00536 switch (PT->getAddressSpace()) { 00537 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; 00538 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; 00539 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED; 00540 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC; 00541 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM; 00542 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT; 00543 default: break; 00544 } 00545 } 00546 return NVPTX::PTXLdStInstCode::GENERIC; 00547 } 00548 00549 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) { 00550 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 00551 switch (IID) { 00552 default: 00553 return nullptr; 00554 case Intrinsic::nvvm_texsurf_handle_internal: 00555 return SelectTexSurfHandle(N); 00556 } 00557 } 00558 00559 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) { 00560 // Op 0 is the intrinsic ID 00561 SDValue Wrapper = N->getOperand(1); 00562 SDValue GlobalVal = Wrapper.getOperand(0); 00563 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64, 00564 GlobalVal); 00565 } 00566 00567 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 00568 SDValue Src = N->getOperand(0); 00569 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N); 00570 unsigned SrcAddrSpace = CastN->getSrcAddressSpace(); 00571 unsigned DstAddrSpace = CastN->getDestAddressSpace(); 00572 00573 assert(SrcAddrSpace != DstAddrSpace && 00574 "addrspacecast must be between different address spaces"); 00575 00576 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) { 00577 // Specific to generic 00578 unsigned Opc; 00579 switch (SrcAddrSpace) { 00580 default: report_fatal_error("Bad address space in addrspacecast"); 00581 case ADDRESS_SPACE_GLOBAL: 00582 Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64 00583 : NVPTX::cvta_global_yes; 00584 break; 00585 case ADDRESS_SPACE_SHARED: 00586 Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64 00587 : NVPTX::cvta_shared_yes; 00588 break; 00589 case ADDRESS_SPACE_CONST: 00590 Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64 00591 : NVPTX::cvta_const_yes; 00592 break; 00593 case ADDRESS_SPACE_LOCAL: 00594 Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64 00595 : NVPTX::cvta_local_yes; 00596 break; 00597 } 00598 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); 00599 } else { 00600 // Generic to specific 00601 if (SrcAddrSpace != 0) 00602 report_fatal_error("Cannot cast between two non-generic address spaces"); 00603 unsigned Opc; 00604 switch (DstAddrSpace) { 00605 default: report_fatal_error("Bad address space in addrspacecast"); 00606 case ADDRESS_SPACE_GLOBAL: 00607 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64 00608 : NVPTX::cvta_to_global_yes; 00609 break; 00610 case ADDRESS_SPACE_SHARED: 00611 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64 00612 : NVPTX::cvta_to_shared_yes; 00613 break; 00614 case ADDRESS_SPACE_CONST: 00615 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64 00616 : NVPTX::cvta_to_const_yes; 00617 break; 00618 case ADDRESS_SPACE_LOCAL: 00619 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64 00620 : NVPTX::cvta_to_local_yes; 00621 break; 00622 } 00623 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); 00624 } 00625 } 00626 00627 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { 00628 SDLoc dl(N); 00629 LoadSDNode *LD = cast<LoadSDNode>(N); 00630 EVT LoadedVT = LD->getMemoryVT(); 00631 SDNode *NVPTXLD = nullptr; 00632 00633 // do not support pre/post inc/dec 00634 if (LD->isIndexed()) 00635 return nullptr; 00636 00637 if (!LoadedVT.isSimple()) 00638 return nullptr; 00639 00640 // Address Space Setting 00641 unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget); 00642 00643 // Volatile Setting 00644 // - .volatile is only availalble for .global and .shared 00645 bool isVolatile = LD->isVolatile(); 00646 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 00647 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 00648 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 00649 isVolatile = false; 00650 00651 // Vector Setting 00652 MVT SimpleVT = LoadedVT.getSimpleVT(); 00653 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; 00654 if (SimpleVT.isVector()) { 00655 unsigned num = SimpleVT.getVectorNumElements(); 00656 if (num == 2) 00657 vecType = NVPTX::PTXLdStInstCode::V2; 00658 else if (num == 4) 00659 vecType = NVPTX::PTXLdStInstCode::V4; 00660 else 00661 return nullptr; 00662 } 00663 00664 // Type Setting: fromType + fromTypeWidth 00665 // 00666 // Sign : ISD::SEXTLOAD 00667 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the 00668 // type is integer 00669 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float 00670 MVT ScalarVT = SimpleVT.getScalarType(); 00671 // Read at least 8 bits (predicates are stored as 8-bit values) 00672 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits()); 00673 unsigned int fromType; 00674 if ((LD->getExtensionType() == ISD::SEXTLOAD)) 00675 fromType = NVPTX::PTXLdStInstCode::Signed; 00676 else if (ScalarVT.isFloatingPoint()) 00677 fromType = NVPTX::PTXLdStInstCode::Float; 00678 else 00679 fromType = NVPTX::PTXLdStInstCode::Unsigned; 00680 00681 // Create the machine instruction DAG 00682 SDValue Chain = N->getOperand(0); 00683 SDValue N1 = N->getOperand(1); 00684 SDValue Addr; 00685 SDValue Offset, Base; 00686 unsigned Opcode; 00687 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; 00688 00689 if (SelectDirectAddr(N1, Addr)) { 00690 switch (TargetVT) { 00691 case MVT::i8: 00692 Opcode = NVPTX::LD_i8_avar; 00693 break; 00694 case MVT::i16: 00695 Opcode = NVPTX::LD_i16_avar; 00696 break; 00697 case MVT::i32: 00698 Opcode = NVPTX::LD_i32_avar; 00699 break; 00700 case MVT::i64: 00701 Opcode = NVPTX::LD_i64_avar; 00702 break; 00703 case MVT::f32: 00704 Opcode = NVPTX::LD_f32_avar; 00705 break; 00706 case MVT::f64: 00707 Opcode = NVPTX::LD_f64_avar; 00708 break; 00709 default: 00710 return nullptr; 00711 } 00712 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 00713 getI32Imm(vecType), getI32Imm(fromType), 00714 getI32Imm(fromTypeWidth), Addr, Chain }; 00715 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 00716 } else if (Subtarget.is64Bit() 00717 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) 00718 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { 00719 switch (TargetVT) { 00720 case MVT::i8: 00721 Opcode = NVPTX::LD_i8_asi; 00722 break; 00723 case MVT::i16: 00724 Opcode = NVPTX::LD_i16_asi; 00725 break; 00726 case MVT::i32: 00727 Opcode = NVPTX::LD_i32_asi; 00728 break; 00729 case MVT::i64: 00730 Opcode = NVPTX::LD_i64_asi; 00731 break; 00732 case MVT::f32: 00733 Opcode = NVPTX::LD_f32_asi; 00734 break; 00735 case MVT::f64: 00736 Opcode = NVPTX::LD_f64_asi; 00737 break; 00738 default: 00739 return nullptr; 00740 } 00741 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 00742 getI32Imm(vecType), getI32Imm(fromType), 00743 getI32Imm(fromTypeWidth), Base, Offset, Chain }; 00744 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 00745 } else if (Subtarget.is64Bit() 00746 ? SelectADDRri64(N1.getNode(), N1, Base, Offset) 00747 : SelectADDRri(N1.getNode(), N1, Base, Offset)) { 00748 if (Subtarget.is64Bit()) { 00749 switch (TargetVT) { 00750 case MVT::i8: 00751 Opcode = NVPTX::LD_i8_ari_64; 00752 break; 00753 case MVT::i16: 00754 Opcode = NVPTX::LD_i16_ari_64; 00755 break; 00756 case MVT::i32: 00757 Opcode = NVPTX::LD_i32_ari_64; 00758 break; 00759 case MVT::i64: 00760 Opcode = NVPTX::LD_i64_ari_64; 00761 break; 00762 case MVT::f32: 00763 Opcode = NVPTX::LD_f32_ari_64; 00764 break; 00765 case MVT::f64: 00766 Opcode = NVPTX::LD_f64_ari_64; 00767 break; 00768 default: 00769 return nullptr; 00770 } 00771 } else { 00772 switch (TargetVT) { 00773 case MVT::i8: 00774 Opcode = NVPTX::LD_i8_ari; 00775 break; 00776 case MVT::i16: 00777 Opcode = NVPTX::LD_i16_ari; 00778 break; 00779 case MVT::i32: 00780 Opcode = NVPTX::LD_i32_ari; 00781 break; 00782 case MVT::i64: 00783 Opcode = NVPTX::LD_i64_ari; 00784 break; 00785 case MVT::f32: 00786 Opcode = NVPTX::LD_f32_ari; 00787 break; 00788 case MVT::f64: 00789 Opcode = NVPTX::LD_f64_ari; 00790 break; 00791 default: 00792 return nullptr; 00793 } 00794 } 00795 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 00796 getI32Imm(vecType), getI32Imm(fromType), 00797 getI32Imm(fromTypeWidth), Base, Offset, Chain }; 00798 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 00799 } else { 00800 if (Subtarget.is64Bit()) { 00801 switch (TargetVT) { 00802 case MVT::i8: 00803 Opcode = NVPTX::LD_i8_areg_64; 00804 break; 00805 case MVT::i16: 00806 Opcode = NVPTX::LD_i16_areg_64; 00807 break; 00808 case MVT::i32: 00809 Opcode = NVPTX::LD_i32_areg_64; 00810 break; 00811 case MVT::i64: 00812 Opcode = NVPTX::LD_i64_areg_64; 00813 break; 00814 case MVT::f32: 00815 Opcode = NVPTX::LD_f32_areg_64; 00816 break; 00817 case MVT::f64: 00818 Opcode = NVPTX::LD_f64_areg_64; 00819 break; 00820 default: 00821 return nullptr; 00822 } 00823 } else { 00824 switch (TargetVT) { 00825 case MVT::i8: 00826 Opcode = NVPTX::LD_i8_areg; 00827 break; 00828 case MVT::i16: 00829 Opcode = NVPTX::LD_i16_areg; 00830 break; 00831 case MVT::i32: 00832 Opcode = NVPTX::LD_i32_areg; 00833 break; 00834 case MVT::i64: 00835 Opcode = NVPTX::LD_i64_areg; 00836 break; 00837 case MVT::f32: 00838 Opcode = NVPTX::LD_f32_areg; 00839 break; 00840 case MVT::f64: 00841 Opcode = NVPTX::LD_f64_areg; 00842 break; 00843 default: 00844 return nullptr; 00845 } 00846 } 00847 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 00848 getI32Imm(vecType), getI32Imm(fromType), 00849 getI32Imm(fromTypeWidth), N1, Chain }; 00850 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 00851 } 00852 00853 if (NVPTXLD) { 00854 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 00855 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 00856 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1); 00857 } 00858 00859 return NVPTXLD; 00860 } 00861 00862 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { 00863 00864 SDValue Chain = N->getOperand(0); 00865 SDValue Op1 = N->getOperand(1); 00866 SDValue Addr, Offset, Base; 00867 unsigned Opcode; 00868 SDLoc DL(N); 00869 SDNode *LD; 00870 MemSDNode *MemSD = cast<MemSDNode>(N); 00871 EVT LoadedVT = MemSD->getMemoryVT(); 00872 00873 if (!LoadedVT.isSimple()) 00874 return nullptr; 00875 00876 // Address Space Setting 00877 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); 00878 00879 // Volatile Setting 00880 // - .volatile is only availalble for .global and .shared 00881 bool IsVolatile = MemSD->isVolatile(); 00882 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 00883 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 00884 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 00885 IsVolatile = false; 00886 00887 // Vector Setting 00888 MVT SimpleVT = LoadedVT.getSimpleVT(); 00889 00890 // Type Setting: fromType + fromTypeWidth 00891 // 00892 // Sign : ISD::SEXTLOAD 00893 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the 00894 // type is integer 00895 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float 00896 MVT ScalarVT = SimpleVT.getScalarType(); 00897 // Read at least 8 bits (predicates are stored as 8-bit values) 00898 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits()); 00899 unsigned int FromType; 00900 // The last operand holds the original LoadSDNode::getExtensionType() value 00901 unsigned ExtensionType = cast<ConstantSDNode>( 00902 N->getOperand(N->getNumOperands() - 1))->getZExtValue(); 00903 if (ExtensionType == ISD::SEXTLOAD) 00904 FromType = NVPTX::PTXLdStInstCode::Signed; 00905 else if (ScalarVT.isFloatingPoint()) 00906 FromType = NVPTX::PTXLdStInstCode::Float; 00907 else 00908 FromType = NVPTX::PTXLdStInstCode::Unsigned; 00909 00910 unsigned VecType; 00911 00912 switch (N->getOpcode()) { 00913 case NVPTXISD::LoadV2: 00914 VecType = NVPTX::PTXLdStInstCode::V2; 00915 break; 00916 case NVPTXISD::LoadV4: 00917 VecType = NVPTX::PTXLdStInstCode::V4; 00918 break; 00919 default: 00920 return nullptr; 00921 } 00922 00923 EVT EltVT = N->getValueType(0); 00924 00925 if (SelectDirectAddr(Op1, Addr)) { 00926 switch (N->getOpcode()) { 00927 default: 00928 return nullptr; 00929 case NVPTXISD::LoadV2: 00930 switch (EltVT.getSimpleVT().SimpleTy) { 00931 default: 00932 return nullptr; 00933 case MVT::i8: 00934 Opcode = NVPTX::LDV_i8_v2_avar; 00935 break; 00936 case MVT::i16: 00937 Opcode = NVPTX::LDV_i16_v2_avar; 00938 break; 00939 case MVT::i32: 00940 Opcode = NVPTX::LDV_i32_v2_avar; 00941 break; 00942 case MVT::i64: 00943 Opcode = NVPTX::LDV_i64_v2_avar; 00944 break; 00945 case MVT::f32: 00946 Opcode = NVPTX::LDV_f32_v2_avar; 00947 break; 00948 case MVT::f64: 00949 Opcode = NVPTX::LDV_f64_v2_avar; 00950 break; 00951 } 00952 break; 00953 case NVPTXISD::LoadV4: 00954 switch (EltVT.getSimpleVT().SimpleTy) { 00955 default: 00956 return nullptr; 00957 case MVT::i8: 00958 Opcode = NVPTX::LDV_i8_v4_avar; 00959 break; 00960 case MVT::i16: 00961 Opcode = NVPTX::LDV_i16_v4_avar; 00962 break; 00963 case MVT::i32: 00964 Opcode = NVPTX::LDV_i32_v4_avar; 00965 break; 00966 case MVT::f32: 00967 Opcode = NVPTX::LDV_f32_v4_avar; 00968 break; 00969 } 00970 break; 00971 } 00972 00973 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), 00974 getI32Imm(VecType), getI32Imm(FromType), 00975 getI32Imm(FromTypeWidth), Addr, Chain }; 00976 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 00977 } else if (Subtarget.is64Bit() 00978 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) 00979 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { 00980 switch (N->getOpcode()) { 00981 default: 00982 return nullptr; 00983 case NVPTXISD::LoadV2: 00984 switch (EltVT.getSimpleVT().SimpleTy) { 00985 default: 00986 return nullptr; 00987 case MVT::i8: 00988 Opcode = NVPTX::LDV_i8_v2_asi; 00989 break; 00990 case MVT::i16: 00991 Opcode = NVPTX::LDV_i16_v2_asi; 00992 break; 00993 case MVT::i32: 00994 Opcode = NVPTX::LDV_i32_v2_asi; 00995 break; 00996 case MVT::i64: 00997 Opcode = NVPTX::LDV_i64_v2_asi; 00998 break; 00999 case MVT::f32: 01000 Opcode = NVPTX::LDV_f32_v2_asi; 01001 break; 01002 case MVT::f64: 01003 Opcode = NVPTX::LDV_f64_v2_asi; 01004 break; 01005 } 01006 break; 01007 case NVPTXISD::LoadV4: 01008 switch (EltVT.getSimpleVT().SimpleTy) { 01009 default: 01010 return nullptr; 01011 case MVT::i8: 01012 Opcode = NVPTX::LDV_i8_v4_asi; 01013 break; 01014 case MVT::i16: 01015 Opcode = NVPTX::LDV_i16_v4_asi; 01016 break; 01017 case MVT::i32: 01018 Opcode = NVPTX::LDV_i32_v4_asi; 01019 break; 01020 case MVT::f32: 01021 Opcode = NVPTX::LDV_f32_v4_asi; 01022 break; 01023 } 01024 break; 01025 } 01026 01027 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), 01028 getI32Imm(VecType), getI32Imm(FromType), 01029 getI32Imm(FromTypeWidth), Base, Offset, Chain }; 01030 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 01031 } else if (Subtarget.is64Bit() 01032 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) 01033 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { 01034 if (Subtarget.is64Bit()) { 01035 switch (N->getOpcode()) { 01036 default: 01037 return nullptr; 01038 case NVPTXISD::LoadV2: 01039 switch (EltVT.getSimpleVT().SimpleTy) { 01040 default: 01041 return nullptr; 01042 case MVT::i8: 01043 Opcode = NVPTX::LDV_i8_v2_ari_64; 01044 break; 01045 case MVT::i16: 01046 Opcode = NVPTX::LDV_i16_v2_ari_64; 01047 break; 01048 case MVT::i32: 01049 Opcode = NVPTX::LDV_i32_v2_ari_64; 01050 break; 01051 case MVT::i64: 01052 Opcode = NVPTX::LDV_i64_v2_ari_64; 01053 break; 01054 case MVT::f32: 01055 Opcode = NVPTX::LDV_f32_v2_ari_64; 01056 break; 01057 case MVT::f64: 01058 Opcode = NVPTX::LDV_f64_v2_ari_64; 01059 break; 01060 } 01061 break; 01062 case NVPTXISD::LoadV4: 01063 switch (EltVT.getSimpleVT().SimpleTy) { 01064 default: 01065 return nullptr; 01066 case MVT::i8: 01067 Opcode = NVPTX::LDV_i8_v4_ari_64; 01068 break; 01069 case MVT::i16: 01070 Opcode = NVPTX::LDV_i16_v4_ari_64; 01071 break; 01072 case MVT::i32: 01073 Opcode = NVPTX::LDV_i32_v4_ari_64; 01074 break; 01075 case MVT::f32: 01076 Opcode = NVPTX::LDV_f32_v4_ari_64; 01077 break; 01078 } 01079 break; 01080 } 01081 } else { 01082 switch (N->getOpcode()) { 01083 default: 01084 return nullptr; 01085 case NVPTXISD::LoadV2: 01086 switch (EltVT.getSimpleVT().SimpleTy) { 01087 default: 01088 return nullptr; 01089 case MVT::i8: 01090 Opcode = NVPTX::LDV_i8_v2_ari; 01091 break; 01092 case MVT::i16: 01093 Opcode = NVPTX::LDV_i16_v2_ari; 01094 break; 01095 case MVT::i32: 01096 Opcode = NVPTX::LDV_i32_v2_ari; 01097 break; 01098 case MVT::i64: 01099 Opcode = NVPTX::LDV_i64_v2_ari; 01100 break; 01101 case MVT::f32: 01102 Opcode = NVPTX::LDV_f32_v2_ari; 01103 break; 01104 case MVT::f64: 01105 Opcode = NVPTX::LDV_f64_v2_ari; 01106 break; 01107 } 01108 break; 01109 case NVPTXISD::LoadV4: 01110 switch (EltVT.getSimpleVT().SimpleTy) { 01111 default: 01112 return nullptr; 01113 case MVT::i8: 01114 Opcode = NVPTX::LDV_i8_v4_ari; 01115 break; 01116 case MVT::i16: 01117 Opcode = NVPTX::LDV_i16_v4_ari; 01118 break; 01119 case MVT::i32: 01120 Opcode = NVPTX::LDV_i32_v4_ari; 01121 break; 01122 case MVT::f32: 01123 Opcode = NVPTX::LDV_f32_v4_ari; 01124 break; 01125 } 01126 break; 01127 } 01128 } 01129 01130 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), 01131 getI32Imm(VecType), getI32Imm(FromType), 01132 getI32Imm(FromTypeWidth), Base, Offset, Chain }; 01133 01134 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 01135 } else { 01136 if (Subtarget.is64Bit()) { 01137 switch (N->getOpcode()) { 01138 default: 01139 return nullptr; 01140 case NVPTXISD::LoadV2: 01141 switch (EltVT.getSimpleVT().SimpleTy) { 01142 default: 01143 return nullptr; 01144 case MVT::i8: 01145 Opcode = NVPTX::LDV_i8_v2_areg_64; 01146 break; 01147 case MVT::i16: 01148 Opcode = NVPTX::LDV_i16_v2_areg_64; 01149 break; 01150 case MVT::i32: 01151 Opcode = NVPTX::LDV_i32_v2_areg_64; 01152 break; 01153 case MVT::i64: 01154 Opcode = NVPTX::LDV_i64_v2_areg_64; 01155 break; 01156 case MVT::f32: 01157 Opcode = NVPTX::LDV_f32_v2_areg_64; 01158 break; 01159 case MVT::f64: 01160 Opcode = NVPTX::LDV_f64_v2_areg_64; 01161 break; 01162 } 01163 break; 01164 case NVPTXISD::LoadV4: 01165 switch (EltVT.getSimpleVT().SimpleTy) { 01166 default: 01167 return nullptr; 01168 case MVT::i8: 01169 Opcode = NVPTX::LDV_i8_v4_areg_64; 01170 break; 01171 case MVT::i16: 01172 Opcode = NVPTX::LDV_i16_v4_areg_64; 01173 break; 01174 case MVT::i32: 01175 Opcode = NVPTX::LDV_i32_v4_areg_64; 01176 break; 01177 case MVT::f32: 01178 Opcode = NVPTX::LDV_f32_v4_areg_64; 01179 break; 01180 } 01181 break; 01182 } 01183 } else { 01184 switch (N->getOpcode()) { 01185 default: 01186 return nullptr; 01187 case NVPTXISD::LoadV2: 01188 switch (EltVT.getSimpleVT().SimpleTy) { 01189 default: 01190 return nullptr; 01191 case MVT::i8: 01192 Opcode = NVPTX::LDV_i8_v2_areg; 01193 break; 01194 case MVT::i16: 01195 Opcode = NVPTX::LDV_i16_v2_areg; 01196 break; 01197 case MVT::i32: 01198 Opcode = NVPTX::LDV_i32_v2_areg; 01199 break; 01200 case MVT::i64: 01201 Opcode = NVPTX::LDV_i64_v2_areg; 01202 break; 01203 case MVT::f32: 01204 Opcode = NVPTX::LDV_f32_v2_areg; 01205 break; 01206 case MVT::f64: 01207 Opcode = NVPTX::LDV_f64_v2_areg; 01208 break; 01209 } 01210 break; 01211 case NVPTXISD::LoadV4: 01212 switch (EltVT.getSimpleVT().SimpleTy) { 01213 default: 01214 return nullptr; 01215 case MVT::i8: 01216 Opcode = NVPTX::LDV_i8_v4_areg; 01217 break; 01218 case MVT::i16: 01219 Opcode = NVPTX::LDV_i16_v4_areg; 01220 break; 01221 case MVT::i32: 01222 Opcode = NVPTX::LDV_i32_v4_areg; 01223 break; 01224 case MVT::f32: 01225 Opcode = NVPTX::LDV_f32_v4_areg; 01226 break; 01227 } 01228 break; 01229 } 01230 } 01231 01232 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), 01233 getI32Imm(VecType), getI32Imm(FromType), 01234 getI32Imm(FromTypeWidth), Op1, Chain }; 01235 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 01236 } 01237 01238 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 01239 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 01240 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); 01241 01242 return LD; 01243 } 01244 01245 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { 01246 01247 SDValue Chain = N->getOperand(0); 01248 SDValue Op1; 01249 MemSDNode *Mem; 01250 bool IsLDG = true; 01251 01252 // If this is an LDG intrinsic, the address is the third operand. Its its an 01253 // LDG/LDU SD node (from custom vector handling), then its the second operand 01254 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 01255 Op1 = N->getOperand(2); 01256 Mem = cast<MemIntrinsicSDNode>(N); 01257 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 01258 switch (IID) { 01259 default: 01260 return NULL; 01261 case Intrinsic::nvvm_ldg_global_f: 01262 case Intrinsic::nvvm_ldg_global_i: 01263 case Intrinsic::nvvm_ldg_global_p: 01264 IsLDG = true; 01265 break; 01266 case Intrinsic::nvvm_ldu_global_f: 01267 case Intrinsic::nvvm_ldu_global_i: 01268 case Intrinsic::nvvm_ldu_global_p: 01269 IsLDG = false; 01270 break; 01271 } 01272 } else { 01273 Op1 = N->getOperand(1); 01274 Mem = cast<MemSDNode>(N); 01275 } 01276 01277 unsigned Opcode; 01278 SDLoc DL(N); 01279 SDNode *LD; 01280 SDValue Base, Offset, Addr; 01281 01282 EVT EltVT = Mem->getMemoryVT(); 01283 if (EltVT.isVector()) { 01284 EltVT = EltVT.getVectorElementType(); 01285 } 01286 01287 if (SelectDirectAddr(Op1, Addr)) { 01288 switch (N->getOpcode()) { 01289 default: 01290 return nullptr; 01291 case ISD::INTRINSIC_W_CHAIN: 01292 if (IsLDG) { 01293 switch (EltVT.getSimpleVT().SimpleTy) { 01294 default: 01295 return nullptr; 01296 case MVT::i8: 01297 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar; 01298 break; 01299 case MVT::i16: 01300 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar; 01301 break; 01302 case MVT::i32: 01303 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar; 01304 break; 01305 case MVT::i64: 01306 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar; 01307 break; 01308 case MVT::f32: 01309 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar; 01310 break; 01311 case MVT::f64: 01312 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar; 01313 break; 01314 } 01315 } else { 01316 switch (EltVT.getSimpleVT().SimpleTy) { 01317 default: 01318 return nullptr; 01319 case MVT::i8: 01320 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar; 01321 break; 01322 case MVT::i16: 01323 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar; 01324 break; 01325 case MVT::i32: 01326 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar; 01327 break; 01328 case MVT::i64: 01329 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar; 01330 break; 01331 case MVT::f32: 01332 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar; 01333 break; 01334 case MVT::f64: 01335 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar; 01336 break; 01337 } 01338 } 01339 break; 01340 case NVPTXISD::LDGV2: 01341 switch (EltVT.getSimpleVT().SimpleTy) { 01342 default: 01343 return nullptr; 01344 case MVT::i8: 01345 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar; 01346 break; 01347 case MVT::i16: 01348 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar; 01349 break; 01350 case MVT::i32: 01351 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar; 01352 break; 01353 case MVT::i64: 01354 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar; 01355 break; 01356 case MVT::f32: 01357 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar; 01358 break; 01359 case MVT::f64: 01360 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar; 01361 break; 01362 } 01363 break; 01364 case NVPTXISD::LDUV2: 01365 switch (EltVT.getSimpleVT().SimpleTy) { 01366 default: 01367 return nullptr; 01368 case MVT::i8: 01369 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar; 01370 break; 01371 case MVT::i16: 01372 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar; 01373 break; 01374 case MVT::i32: 01375 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar; 01376 break; 01377 case MVT::i64: 01378 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar; 01379 break; 01380 case MVT::f32: 01381 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar; 01382 break; 01383 case MVT::f64: 01384 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar; 01385 break; 01386 } 01387 break; 01388 case NVPTXISD::LDGV4: 01389 switch (EltVT.getSimpleVT().SimpleTy) { 01390 default: 01391 return nullptr; 01392 case MVT::i8: 01393 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar; 01394 break; 01395 case MVT::i16: 01396 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar; 01397 break; 01398 case MVT::i32: 01399 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar; 01400 break; 01401 case MVT::f32: 01402 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar; 01403 break; 01404 } 01405 break; 01406 case NVPTXISD::LDUV4: 01407 switch (EltVT.getSimpleVT().SimpleTy) { 01408 default: 01409 return nullptr; 01410 case MVT::i8: 01411 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar; 01412 break; 01413 case MVT::i16: 01414 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar; 01415 break; 01416 case MVT::i32: 01417 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar; 01418 break; 01419 case MVT::f32: 01420 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar; 01421 break; 01422 } 01423 break; 01424 } 01425 01426 SDValue Ops[] = { Addr, Chain }; 01427 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 01428 } else if (Subtarget.is64Bit() 01429 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) 01430 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { 01431 if (Subtarget.is64Bit()) { 01432 switch (N->getOpcode()) { 01433 default: 01434 return nullptr; 01435 case ISD::INTRINSIC_W_CHAIN: 01436 if (IsLDG) { 01437 switch (EltVT.getSimpleVT().SimpleTy) { 01438 default: 01439 return nullptr; 01440 case MVT::i8: 01441 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64; 01442 break; 01443 case MVT::i16: 01444 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64; 01445 break; 01446 case MVT::i32: 01447 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64; 01448 break; 01449 case MVT::i64: 01450 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64; 01451 break; 01452 case MVT::f32: 01453 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64; 01454 break; 01455 case MVT::f64: 01456 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64; 01457 break; 01458 } 01459 } else { 01460 switch (EltVT.getSimpleVT().SimpleTy) { 01461 default: 01462 return nullptr; 01463 case MVT::i8: 01464 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64; 01465 break; 01466 case MVT::i16: 01467 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64; 01468 break; 01469 case MVT::i32: 01470 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64; 01471 break; 01472 case MVT::i64: 01473 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64; 01474 break; 01475 case MVT::f32: 01476 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64; 01477 break; 01478 case MVT::f64: 01479 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64; 01480 break; 01481 } 01482 } 01483 break; 01484 case NVPTXISD::LDGV2: 01485 switch (EltVT.getSimpleVT().SimpleTy) { 01486 default: 01487 return nullptr; 01488 case MVT::i8: 01489 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64; 01490 break; 01491 case MVT::i16: 01492 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64; 01493 break; 01494 case MVT::i32: 01495 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64; 01496 break; 01497 case MVT::i64: 01498 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64; 01499 break; 01500 case MVT::f32: 01501 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64; 01502 break; 01503 case MVT::f64: 01504 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64; 01505 break; 01506 } 01507 break; 01508 case NVPTXISD::LDUV2: 01509 switch (EltVT.getSimpleVT().SimpleTy) { 01510 default: 01511 return nullptr; 01512 case MVT::i8: 01513 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64; 01514 break; 01515 case MVT::i16: 01516 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64; 01517 break; 01518 case MVT::i32: 01519 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64; 01520 break; 01521 case MVT::i64: 01522 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64; 01523 break; 01524 case MVT::f32: 01525 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64; 01526 break; 01527 case MVT::f64: 01528 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64; 01529 break; 01530 } 01531 break; 01532 case NVPTXISD::LDGV4: 01533 switch (EltVT.getSimpleVT().SimpleTy) { 01534 default: 01535 return nullptr; 01536 case MVT::i8: 01537 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64; 01538 break; 01539 case MVT::i16: 01540 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64; 01541 break; 01542 case MVT::i32: 01543 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64; 01544 break; 01545 case MVT::f32: 01546 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64; 01547 break; 01548 } 01549 break; 01550 case NVPTXISD::LDUV4: 01551 switch (EltVT.getSimpleVT().SimpleTy) { 01552 default: 01553 return nullptr; 01554 case MVT::i8: 01555 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64; 01556 break; 01557 case MVT::i16: 01558 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64; 01559 break; 01560 case MVT::i32: 01561 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64; 01562 break; 01563 case MVT::f32: 01564 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64; 01565 break; 01566 } 01567 break; 01568 } 01569 } else { 01570 switch (N->getOpcode()) { 01571 default: 01572 return nullptr; 01573 case ISD::INTRINSIC_W_CHAIN: 01574 if (IsLDG) { 01575 switch (EltVT.getSimpleVT().SimpleTy) { 01576 default: 01577 return nullptr; 01578 case MVT::i8: 01579 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari; 01580 break; 01581 case MVT::i16: 01582 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari; 01583 break; 01584 case MVT::i32: 01585 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari; 01586 break; 01587 case MVT::i64: 01588 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari; 01589 break; 01590 case MVT::f32: 01591 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari; 01592 break; 01593 case MVT::f64: 01594 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari; 01595 break; 01596 } 01597 } else { 01598 switch (EltVT.getSimpleVT().SimpleTy) { 01599 default: 01600 return nullptr; 01601 case MVT::i8: 01602 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari; 01603 break; 01604 case MVT::i16: 01605 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari; 01606 break; 01607 case MVT::i32: 01608 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari; 01609 break; 01610 case MVT::i64: 01611 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari; 01612 break; 01613 case MVT::f32: 01614 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari; 01615 break; 01616 case MVT::f64: 01617 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari; 01618 break; 01619 } 01620 } 01621 break; 01622 case NVPTXISD::LDGV2: 01623 switch (EltVT.getSimpleVT().SimpleTy) { 01624 default: 01625 return nullptr; 01626 case MVT::i8: 01627 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32; 01628 break; 01629 case MVT::i16: 01630 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32; 01631 break; 01632 case MVT::i32: 01633 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32; 01634 break; 01635 case MVT::i64: 01636 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32; 01637 break; 01638 case MVT::f32: 01639 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32; 01640 break; 01641 case MVT::f64: 01642 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32; 01643 break; 01644 } 01645 break; 01646 case NVPTXISD::LDUV2: 01647 switch (EltVT.getSimpleVT().SimpleTy) { 01648 default: 01649 return nullptr; 01650 case MVT::i8: 01651 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32; 01652 break; 01653 case MVT::i16: 01654 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32; 01655 break; 01656 case MVT::i32: 01657 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32; 01658 break; 01659 case MVT::i64: 01660 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32; 01661 break; 01662 case MVT::f32: 01663 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32; 01664 break; 01665 case MVT::f64: 01666 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32; 01667 break; 01668 } 01669 break; 01670 case NVPTXISD::LDGV4: 01671 switch (EltVT.getSimpleVT().SimpleTy) { 01672 default: 01673 return nullptr; 01674 case MVT::i8: 01675 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32; 01676 break; 01677 case MVT::i16: 01678 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32; 01679 break; 01680 case MVT::i32: 01681 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32; 01682 break; 01683 case MVT::f32: 01684 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32; 01685 break; 01686 } 01687 break; 01688 case NVPTXISD::LDUV4: 01689 switch (EltVT.getSimpleVT().SimpleTy) { 01690 default: 01691 return nullptr; 01692 case MVT::i8: 01693 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32; 01694 break; 01695 case MVT::i16: 01696 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32; 01697 break; 01698 case MVT::i32: 01699 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32; 01700 break; 01701 case MVT::f32: 01702 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32; 01703 break; 01704 } 01705 break; 01706 } 01707 } 01708 01709 SDValue Ops[] = { Base, Offset, Chain }; 01710 01711 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 01712 } else { 01713 if (Subtarget.is64Bit()) { 01714 switch (N->getOpcode()) { 01715 default: 01716 return nullptr; 01717 case ISD::INTRINSIC_W_CHAIN: 01718 if (IsLDG) { 01719 switch (EltVT.getSimpleVT().SimpleTy) { 01720 default: 01721 return nullptr; 01722 case MVT::i8: 01723 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64; 01724 break; 01725 case MVT::i16: 01726 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64; 01727 break; 01728 case MVT::i32: 01729 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64; 01730 break; 01731 case MVT::i64: 01732 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64; 01733 break; 01734 case MVT::f32: 01735 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64; 01736 break; 01737 case MVT::f64: 01738 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64; 01739 break; 01740 } 01741 } else { 01742 switch (EltVT.getSimpleVT().SimpleTy) { 01743 default: 01744 return nullptr; 01745 case MVT::i8: 01746 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64; 01747 break; 01748 case MVT::i16: 01749 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64; 01750 break; 01751 case MVT::i32: 01752 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64; 01753 break; 01754 case MVT::i64: 01755 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64; 01756 break; 01757 case MVT::f32: 01758 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64; 01759 break; 01760 case MVT::f64: 01761 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64; 01762 break; 01763 } 01764 } 01765 break; 01766 case NVPTXISD::LDGV2: 01767 switch (EltVT.getSimpleVT().SimpleTy) { 01768 default: 01769 return nullptr; 01770 case MVT::i8: 01771 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64; 01772 break; 01773 case MVT::i16: 01774 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64; 01775 break; 01776 case MVT::i32: 01777 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64; 01778 break; 01779 case MVT::i64: 01780 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64; 01781 break; 01782 case MVT::f32: 01783 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64; 01784 break; 01785 case MVT::f64: 01786 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64; 01787 break; 01788 } 01789 break; 01790 case NVPTXISD::LDUV2: 01791 switch (EltVT.getSimpleVT().SimpleTy) { 01792 default: 01793 return nullptr; 01794 case MVT::i8: 01795 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64; 01796 break; 01797 case MVT::i16: 01798 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64; 01799 break; 01800 case MVT::i32: 01801 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64; 01802 break; 01803 case MVT::i64: 01804 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64; 01805 break; 01806 case MVT::f32: 01807 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64; 01808 break; 01809 case MVT::f64: 01810 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64; 01811 break; 01812 } 01813 break; 01814 case NVPTXISD::LDGV4: 01815 switch (EltVT.getSimpleVT().SimpleTy) { 01816 default: 01817 return nullptr; 01818 case MVT::i8: 01819 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64; 01820 break; 01821 case MVT::i16: 01822 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64; 01823 break; 01824 case MVT::i32: 01825 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64; 01826 break; 01827 case MVT::f32: 01828 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64; 01829 break; 01830 } 01831 break; 01832 case NVPTXISD::LDUV4: 01833 switch (EltVT.getSimpleVT().SimpleTy) { 01834 default: 01835 return nullptr; 01836 case MVT::i8: 01837 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64; 01838 break; 01839 case MVT::i16: 01840 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64; 01841 break; 01842 case MVT::i32: 01843 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64; 01844 break; 01845 case MVT::f32: 01846 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64; 01847 break; 01848 } 01849 break; 01850 } 01851 } else { 01852 switch (N->getOpcode()) { 01853 default: 01854 return nullptr; 01855 case ISD::INTRINSIC_W_CHAIN: 01856 if (IsLDG) { 01857 switch (EltVT.getSimpleVT().SimpleTy) { 01858 default: 01859 return nullptr; 01860 case MVT::i8: 01861 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg; 01862 break; 01863 case MVT::i16: 01864 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg; 01865 break; 01866 case MVT::i32: 01867 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg; 01868 break; 01869 case MVT::i64: 01870 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg; 01871 break; 01872 case MVT::f32: 01873 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg; 01874 break; 01875 case MVT::f64: 01876 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg; 01877 break; 01878 } 01879 } else { 01880 switch (EltVT.getSimpleVT().SimpleTy) { 01881 default: 01882 return nullptr; 01883 case MVT::i8: 01884 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg; 01885 break; 01886 case MVT::i16: 01887 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg; 01888 break; 01889 case MVT::i32: 01890 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg; 01891 break; 01892 case MVT::i64: 01893 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg; 01894 break; 01895 case MVT::f32: 01896 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg; 01897 break; 01898 case MVT::f64: 01899 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg; 01900 break; 01901 } 01902 } 01903 break; 01904 case NVPTXISD::LDGV2: 01905 switch (EltVT.getSimpleVT().SimpleTy) { 01906 default: 01907 return nullptr; 01908 case MVT::i8: 01909 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32; 01910 break; 01911 case MVT::i16: 01912 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32; 01913 break; 01914 case MVT::i32: 01915 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32; 01916 break; 01917 case MVT::i64: 01918 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32; 01919 break; 01920 case MVT::f32: 01921 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32; 01922 break; 01923 case MVT::f64: 01924 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32; 01925 break; 01926 } 01927 break; 01928 case NVPTXISD::LDUV2: 01929 switch (EltVT.getSimpleVT().SimpleTy) { 01930 default: 01931 return nullptr; 01932 case MVT::i8: 01933 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32; 01934 break; 01935 case MVT::i16: 01936 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32; 01937 break; 01938 case MVT::i32: 01939 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32; 01940 break; 01941 case MVT::i64: 01942 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32; 01943 break; 01944 case MVT::f32: 01945 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32; 01946 break; 01947 case MVT::f64: 01948 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32; 01949 break; 01950 } 01951 break; 01952 case NVPTXISD::LDGV4: 01953 switch (EltVT.getSimpleVT().SimpleTy) { 01954 default: 01955 return nullptr; 01956 case MVT::i8: 01957 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32; 01958 break; 01959 case MVT::i16: 01960 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32; 01961 break; 01962 case MVT::i32: 01963 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32; 01964 break; 01965 case MVT::f32: 01966 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32; 01967 break; 01968 } 01969 break; 01970 case NVPTXISD::LDUV4: 01971 switch (EltVT.getSimpleVT().SimpleTy) { 01972 default: 01973 return nullptr; 01974 case MVT::i8: 01975 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32; 01976 break; 01977 case MVT::i16: 01978 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32; 01979 break; 01980 case MVT::i32: 01981 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32; 01982 break; 01983 case MVT::f32: 01984 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32; 01985 break; 01986 } 01987 break; 01988 } 01989 } 01990 01991 SDValue Ops[] = { Op1, Chain }; 01992 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 01993 } 01994 01995 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 01996 MemRefs0[0] = Mem->getMemOperand(); 01997 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); 01998 01999 return LD; 02000 } 02001 02002 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { 02003 SDLoc dl(N); 02004 StoreSDNode *ST = cast<StoreSDNode>(N); 02005 EVT StoreVT = ST->getMemoryVT(); 02006 SDNode *NVPTXST = nullptr; 02007 02008 // do not support pre/post inc/dec 02009 if (ST->isIndexed()) 02010 return nullptr; 02011 02012 if (!StoreVT.isSimple()) 02013 return nullptr; 02014 02015 // Address Space Setting 02016 unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget); 02017 02018 // Volatile Setting 02019 // - .volatile is only availalble for .global and .shared 02020 bool isVolatile = ST->isVolatile(); 02021 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 02022 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 02023 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 02024 isVolatile = false; 02025 02026 // Vector Setting 02027 MVT SimpleVT = StoreVT.getSimpleVT(); 02028 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; 02029 if (SimpleVT.isVector()) { 02030 unsigned num = SimpleVT.getVectorNumElements(); 02031 if (num == 2) 02032 vecType = NVPTX::PTXLdStInstCode::V2; 02033 else if (num == 4) 02034 vecType = NVPTX::PTXLdStInstCode::V4; 02035 else 02036 return nullptr; 02037 } 02038 02039 // Type Setting: toType + toTypeWidth 02040 // - for integer type, always use 'u' 02041 // 02042 MVT ScalarVT = SimpleVT.getScalarType(); 02043 unsigned toTypeWidth = ScalarVT.getSizeInBits(); 02044 unsigned int toType; 02045 if (ScalarVT.isFloatingPoint()) 02046 toType = NVPTX::PTXLdStInstCode::Float; 02047 else 02048 toType = NVPTX::PTXLdStInstCode::Unsigned; 02049 02050 // Create the machine instruction DAG 02051 SDValue Chain = N->getOperand(0); 02052 SDValue N1 = N->getOperand(1); 02053 SDValue N2 = N->getOperand(2); 02054 SDValue Addr; 02055 SDValue Offset, Base; 02056 unsigned Opcode; 02057 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy; 02058 02059 if (SelectDirectAddr(N2, Addr)) { 02060 switch (SourceVT) { 02061 case MVT::i8: 02062 Opcode = NVPTX::ST_i8_avar; 02063 break; 02064 case MVT::i16: 02065 Opcode = NVPTX::ST_i16_avar; 02066 break; 02067 case MVT::i32: 02068 Opcode = NVPTX::ST_i32_avar; 02069 break; 02070 case MVT::i64: 02071 Opcode = NVPTX::ST_i64_avar; 02072 break; 02073 case MVT::f32: 02074 Opcode = NVPTX::ST_f32_avar; 02075 break; 02076 case MVT::f64: 02077 Opcode = NVPTX::ST_f64_avar; 02078 break; 02079 default: 02080 return nullptr; 02081 } 02082 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 02083 getI32Imm(vecType), getI32Imm(toType), 02084 getI32Imm(toTypeWidth), Addr, Chain }; 02085 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 02086 } else if (Subtarget.is64Bit() 02087 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) 02088 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { 02089 switch (SourceVT) { 02090 case MVT::i8: 02091 Opcode = NVPTX::ST_i8_asi; 02092 break; 02093 case MVT::i16: 02094 Opcode = NVPTX::ST_i16_asi; 02095 break; 02096 case MVT::i32: 02097 Opcode = NVPTX::ST_i32_asi; 02098 break; 02099 case MVT::i64: 02100 Opcode = NVPTX::ST_i64_asi; 02101 break; 02102 case MVT::f32: 02103 Opcode = NVPTX::ST_f32_asi; 02104 break; 02105 case MVT::f64: 02106 Opcode = NVPTX::ST_f64_asi; 02107 break; 02108 default: 02109 return nullptr; 02110 } 02111 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 02112 getI32Imm(vecType), getI32Imm(toType), 02113 getI32Imm(toTypeWidth), Base, Offset, Chain }; 02114 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 02115 } else if (Subtarget.is64Bit() 02116 ? SelectADDRri64(N2.getNode(), N2, Base, Offset) 02117 : SelectADDRri(N2.getNode(), N2, Base, Offset)) { 02118 if (Subtarget.is64Bit()) { 02119 switch (SourceVT) { 02120 case MVT::i8: 02121 Opcode = NVPTX::ST_i8_ari_64; 02122 break; 02123 case MVT::i16: 02124 Opcode = NVPTX::ST_i16_ari_64; 02125 break; 02126 case MVT::i32: 02127 Opcode = NVPTX::ST_i32_ari_64; 02128 break; 02129 case MVT::i64: 02130 Opcode = NVPTX::ST_i64_ari_64; 02131 break; 02132 case MVT::f32: 02133 Opcode = NVPTX::ST_f32_ari_64; 02134 break; 02135 case MVT::f64: 02136 Opcode = NVPTX::ST_f64_ari_64; 02137 break; 02138 default: 02139 return nullptr; 02140 } 02141 } else { 02142 switch (SourceVT) { 02143 case MVT::i8: 02144 Opcode = NVPTX::ST_i8_ari; 02145 break; 02146 case MVT::i16: 02147 Opcode = NVPTX::ST_i16_ari; 02148 break; 02149 case MVT::i32: 02150 Opcode = NVPTX::ST_i32_ari; 02151 break; 02152 case MVT::i64: 02153 Opcode = NVPTX::ST_i64_ari; 02154 break; 02155 case MVT::f32: 02156 Opcode = NVPTX::ST_f32_ari; 02157 break; 02158 case MVT::f64: 02159 Opcode = NVPTX::ST_f64_ari; 02160 break; 02161 default: 02162 return nullptr; 02163 } 02164 } 02165 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 02166 getI32Imm(vecType), getI32Imm(toType), 02167 getI32Imm(toTypeWidth), Base, Offset, Chain }; 02168 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 02169 } else { 02170 if (Subtarget.is64Bit()) { 02171 switch (SourceVT) { 02172 case MVT::i8: 02173 Opcode = NVPTX::ST_i8_areg_64; 02174 break; 02175 case MVT::i16: 02176 Opcode = NVPTX::ST_i16_areg_64; 02177 break; 02178 case MVT::i32: 02179 Opcode = NVPTX::ST_i32_areg_64; 02180 break; 02181 case MVT::i64: 02182 Opcode = NVPTX::ST_i64_areg_64; 02183 break; 02184 case MVT::f32: 02185 Opcode = NVPTX::ST_f32_areg_64; 02186 break; 02187 case MVT::f64: 02188 Opcode = NVPTX::ST_f64_areg_64; 02189 break; 02190 default: 02191 return nullptr; 02192 } 02193 } else { 02194 switch (SourceVT) { 02195 case MVT::i8: 02196 Opcode = NVPTX::ST_i8_areg; 02197 break; 02198 case MVT::i16: 02199 Opcode = NVPTX::ST_i16_areg; 02200 break; 02201 case MVT::i32: 02202 Opcode = NVPTX::ST_i32_areg; 02203 break; 02204 case MVT::i64: 02205 Opcode = NVPTX::ST_i64_areg; 02206 break; 02207 case MVT::f32: 02208 Opcode = NVPTX::ST_f32_areg; 02209 break; 02210 case MVT::f64: 02211 Opcode = NVPTX::ST_f64_areg; 02212 break; 02213 default: 02214 return nullptr; 02215 } 02216 } 02217 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 02218 getI32Imm(vecType), getI32Imm(toType), 02219 getI32Imm(toTypeWidth), N2, Chain }; 02220 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 02221 } 02222 02223 if (NVPTXST) { 02224 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 02225 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 02226 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1); 02227 } 02228 02229 return NVPTXST; 02230 } 02231 02232 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { 02233 SDValue Chain = N->getOperand(0); 02234 SDValue Op1 = N->getOperand(1); 02235 SDValue Addr, Offset, Base; 02236 unsigned Opcode; 02237 SDLoc DL(N); 02238 SDNode *ST; 02239 EVT EltVT = Op1.getValueType(); 02240 MemSDNode *MemSD = cast<MemSDNode>(N); 02241 EVT StoreVT = MemSD->getMemoryVT(); 02242 02243 // Address Space Setting 02244 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); 02245 02246 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) { 02247 report_fatal_error("Cannot store to pointer that points to constant " 02248 "memory space"); 02249 } 02250 02251 // Volatile Setting 02252 // - .volatile is only availalble for .global and .shared 02253 bool IsVolatile = MemSD->isVolatile(); 02254 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 02255 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 02256 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 02257 IsVolatile = false; 02258 02259 // Type Setting: toType + toTypeWidth 02260 // - for integer type, always use 'u' 02261 assert(StoreVT.isSimple() && "Store value is not simple"); 02262 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); 02263 unsigned ToTypeWidth = ScalarVT.getSizeInBits(); 02264 unsigned ToType; 02265 if (ScalarVT.isFloatingPoint()) 02266 ToType = NVPTX::PTXLdStInstCode::Float; 02267 else 02268 ToType = NVPTX::PTXLdStInstCode::Unsigned; 02269 02270 SmallVector<SDValue, 12> StOps; 02271 SDValue N2; 02272 unsigned VecType; 02273 02274 switch (N->getOpcode()) { 02275 case NVPTXISD::StoreV2: 02276 VecType = NVPTX::PTXLdStInstCode::V2; 02277 StOps.push_back(N->getOperand(1)); 02278 StOps.push_back(N->getOperand(2)); 02279 N2 = N->getOperand(3); 02280 break; 02281 case NVPTXISD::StoreV4: 02282 VecType = NVPTX::PTXLdStInstCode::V4; 02283 StOps.push_back(N->getOperand(1)); 02284 StOps.push_back(N->getOperand(2)); 02285 StOps.push_back(N->getOperand(3)); 02286 StOps.push_back(N->getOperand(4)); 02287 N2 = N->getOperand(5); 02288 break; 02289 default: 02290 return nullptr; 02291 } 02292 02293 StOps.push_back(getI32Imm(IsVolatile)); 02294 StOps.push_back(getI32Imm(CodeAddrSpace)); 02295 StOps.push_back(getI32Imm(VecType)); 02296 StOps.push_back(getI32Imm(ToType)); 02297 StOps.push_back(getI32Imm(ToTypeWidth)); 02298 02299 if (SelectDirectAddr(N2, Addr)) { 02300 switch (N->getOpcode()) { 02301 default: 02302 return nullptr; 02303 case NVPTXISD::StoreV2: 02304 switch (EltVT.getSimpleVT().SimpleTy) { 02305 default: 02306 return nullptr; 02307 case MVT::i8: 02308 Opcode = NVPTX::STV_i8_v2_avar; 02309 break; 02310 case MVT::i16: 02311 Opcode = NVPTX::STV_i16_v2_avar; 02312 break; 02313 case MVT::i32: 02314 Opcode = NVPTX::STV_i32_v2_avar; 02315 break; 02316 case MVT::i64: 02317 Opcode = NVPTX::STV_i64_v2_avar; 02318 break; 02319 case MVT::f32: 02320 Opcode = NVPTX::STV_f32_v2_avar; 02321 break; 02322 case MVT::f64: 02323 Opcode = NVPTX::STV_f64_v2_avar; 02324 break; 02325 } 02326 break; 02327 case NVPTXISD::StoreV4: 02328 switch (EltVT.getSimpleVT().SimpleTy) { 02329 default: 02330 return nullptr; 02331 case MVT::i8: 02332 Opcode = NVPTX::STV_i8_v4_avar; 02333 break; 02334 case MVT::i16: 02335 Opcode = NVPTX::STV_i16_v4_avar; 02336 break; 02337 case MVT::i32: 02338 Opcode = NVPTX::STV_i32_v4_avar; 02339 break; 02340 case MVT::f32: 02341 Opcode = NVPTX::STV_f32_v4_avar; 02342 break; 02343 } 02344 break; 02345 } 02346 StOps.push_back(Addr); 02347 } else if (Subtarget.is64Bit() 02348 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) 02349 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { 02350 switch (N->getOpcode()) { 02351 default: 02352 return nullptr; 02353 case NVPTXISD::StoreV2: 02354 switch (EltVT.getSimpleVT().SimpleTy) { 02355 default: 02356 return nullptr; 02357 case MVT::i8: 02358 Opcode = NVPTX::STV_i8_v2_asi; 02359 break; 02360 case MVT::i16: 02361 Opcode = NVPTX::STV_i16_v2_asi; 02362 break; 02363 case MVT::i32: 02364 Opcode = NVPTX::STV_i32_v2_asi; 02365 break; 02366 case MVT::i64: 02367 Opcode = NVPTX::STV_i64_v2_asi; 02368 break; 02369 case MVT::f32: 02370 Opcode = NVPTX::STV_f32_v2_asi; 02371 break; 02372 case MVT::f64: 02373 Opcode = NVPTX::STV_f64_v2_asi; 02374 break; 02375 } 02376 break; 02377 case NVPTXISD::StoreV4: 02378 switch (EltVT.getSimpleVT().SimpleTy) { 02379 default: 02380 return nullptr; 02381 case MVT::i8: 02382 Opcode = NVPTX::STV_i8_v4_asi; 02383 break; 02384 case MVT::i16: 02385 Opcode = NVPTX::STV_i16_v4_asi; 02386 break; 02387 case MVT::i32: 02388 Opcode = NVPTX::STV_i32_v4_asi; 02389 break; 02390 case MVT::f32: 02391 Opcode = NVPTX::STV_f32_v4_asi; 02392 break; 02393 } 02394 break; 02395 } 02396 StOps.push_back(Base); 02397 StOps.push_back(Offset); 02398 } else if (Subtarget.is64Bit() 02399 ? SelectADDRri64(N2.getNode(), N2, Base, Offset) 02400 : SelectADDRri(N2.getNode(), N2, Base, Offset)) { 02401 if (Subtarget.is64Bit()) { 02402 switch (N->getOpcode()) { 02403 default: 02404 return nullptr; 02405 case NVPTXISD::StoreV2: 02406 switch (EltVT.getSimpleVT().SimpleTy) { 02407 default: 02408 return nullptr; 02409 case MVT::i8: 02410 Opcode = NVPTX::STV_i8_v2_ari_64; 02411 break; 02412 case MVT::i16: 02413 Opcode = NVPTX::STV_i16_v2_ari_64; 02414 break; 02415 case MVT::i32: 02416 Opcode = NVPTX::STV_i32_v2_ari_64; 02417 break; 02418 case MVT::i64: 02419 Opcode = NVPTX::STV_i64_v2_ari_64; 02420 break; 02421 case MVT::f32: 02422 Opcode = NVPTX::STV_f32_v2_ari_64; 02423 break; 02424 case MVT::f64: 02425 Opcode = NVPTX::STV_f64_v2_ari_64; 02426 break; 02427 } 02428 break; 02429 case NVPTXISD::StoreV4: 02430 switch (EltVT.getSimpleVT().SimpleTy) { 02431 default: 02432 return nullptr; 02433 case MVT::i8: 02434 Opcode = NVPTX::STV_i8_v4_ari_64; 02435 break; 02436 case MVT::i16: 02437 Opcode = NVPTX::STV_i16_v4_ari_64; 02438 break; 02439 case MVT::i32: 02440 Opcode = NVPTX::STV_i32_v4_ari_64; 02441 break; 02442 case MVT::f32: 02443 Opcode = NVPTX::STV_f32_v4_ari_64; 02444 break; 02445 } 02446 break; 02447 } 02448 } else { 02449 switch (N->getOpcode()) { 02450 default: 02451 return nullptr; 02452 case NVPTXISD::StoreV2: 02453 switch (EltVT.getSimpleVT().SimpleTy) { 02454 default: 02455 return nullptr; 02456 case MVT::i8: 02457 Opcode = NVPTX::STV_i8_v2_ari; 02458 break; 02459 case MVT::i16: 02460 Opcode = NVPTX::STV_i16_v2_ari; 02461 break; 02462 case MVT::i32: 02463 Opcode = NVPTX::STV_i32_v2_ari; 02464 break; 02465 case MVT::i64: 02466 Opcode = NVPTX::STV_i64_v2_ari; 02467 break; 02468 case MVT::f32: 02469 Opcode = NVPTX::STV_f32_v2_ari; 02470 break; 02471 case MVT::f64: 02472 Opcode = NVPTX::STV_f64_v2_ari; 02473 break; 02474 } 02475 break; 02476 case NVPTXISD::StoreV4: 02477 switch (EltVT.getSimpleVT().SimpleTy) { 02478 default: 02479 return nullptr; 02480 case MVT::i8: 02481 Opcode = NVPTX::STV_i8_v4_ari; 02482 break; 02483 case MVT::i16: 02484 Opcode = NVPTX::STV_i16_v4_ari; 02485 break; 02486 case MVT::i32: 02487 Opcode = NVPTX::STV_i32_v4_ari; 02488 break; 02489 case MVT::f32: 02490 Opcode = NVPTX::STV_f32_v4_ari; 02491 break; 02492 } 02493 break; 02494 } 02495 } 02496 StOps.push_back(Base); 02497 StOps.push_back(Offset); 02498 } else { 02499 if (Subtarget.is64Bit()) { 02500 switch (N->getOpcode()) { 02501 default: 02502 return nullptr; 02503 case NVPTXISD::StoreV2: 02504 switch (EltVT.getSimpleVT().SimpleTy) { 02505 default: 02506 return nullptr; 02507 case MVT::i8: 02508 Opcode = NVPTX::STV_i8_v2_areg_64; 02509 break; 02510 case MVT::i16: 02511 Opcode = NVPTX::STV_i16_v2_areg_64; 02512 break; 02513 case MVT::i32: 02514 Opcode = NVPTX::STV_i32_v2_areg_64; 02515 break; 02516 case MVT::i64: 02517 Opcode = NVPTX::STV_i64_v2_areg_64; 02518 break; 02519 case MVT::f32: 02520 Opcode = NVPTX::STV_f32_v2_areg_64; 02521 break; 02522 case MVT::f64: 02523 Opcode = NVPTX::STV_f64_v2_areg_64; 02524 break; 02525 } 02526 break; 02527 case NVPTXISD::StoreV4: 02528 switch (EltVT.getSimpleVT().SimpleTy) { 02529 default: 02530 return nullptr; 02531 case MVT::i8: 02532 Opcode = NVPTX::STV_i8_v4_areg_64; 02533 break; 02534 case MVT::i16: 02535 Opcode = NVPTX::STV_i16_v4_areg_64; 02536 break; 02537 case MVT::i32: 02538 Opcode = NVPTX::STV_i32_v4_areg_64; 02539 break; 02540 case MVT::f32: 02541 Opcode = NVPTX::STV_f32_v4_areg_64; 02542 break; 02543 } 02544 break; 02545 } 02546 } else { 02547 switch (N->getOpcode()) { 02548 default: 02549 return nullptr; 02550 case NVPTXISD::StoreV2: 02551 switch (EltVT.getSimpleVT().SimpleTy) { 02552 default: 02553 return nullptr; 02554 case MVT::i8: 02555 Opcode = NVPTX::STV_i8_v2_areg; 02556 break; 02557 case MVT::i16: 02558 Opcode = NVPTX::STV_i16_v2_areg; 02559 break; 02560 case MVT::i32: 02561 Opcode = NVPTX::STV_i32_v2_areg; 02562 break; 02563 case MVT::i64: 02564 Opcode = NVPTX::STV_i64_v2_areg; 02565 break; 02566 case MVT::f32: 02567 Opcode = NVPTX::STV_f32_v2_areg; 02568 break; 02569 case MVT::f64: 02570 Opcode = NVPTX::STV_f64_v2_areg; 02571 break; 02572 } 02573 break; 02574 case NVPTXISD::StoreV4: 02575 switch (EltVT.getSimpleVT().SimpleTy) { 02576 default: 02577 return nullptr; 02578 case MVT::i8: 02579 Opcode = NVPTX::STV_i8_v4_areg; 02580 break; 02581 case MVT::i16: 02582 Opcode = NVPTX::STV_i16_v4_areg; 02583 break; 02584 case MVT::i32: 02585 Opcode = NVPTX::STV_i32_v4_areg; 02586 break; 02587 case MVT::f32: 02588 Opcode = NVPTX::STV_f32_v4_areg; 02589 break; 02590 } 02591 break; 02592 } 02593 } 02594 StOps.push_back(N2); 02595 } 02596 02597 StOps.push_back(Chain); 02598 02599 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps); 02600 02601 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 02602 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 02603 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1); 02604 02605 return ST; 02606 } 02607 02608 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) { 02609 SDValue Chain = Node->getOperand(0); 02610 SDValue Offset = Node->getOperand(2); 02611 SDValue Flag = Node->getOperand(3); 02612 SDLoc DL(Node); 02613 MemSDNode *Mem = cast<MemSDNode>(Node); 02614 02615 unsigned VecSize; 02616 switch (Node->getOpcode()) { 02617 default: 02618 return nullptr; 02619 case NVPTXISD::LoadParam: 02620 VecSize = 1; 02621 break; 02622 case NVPTXISD::LoadParamV2: 02623 VecSize = 2; 02624 break; 02625 case NVPTXISD::LoadParamV4: 02626 VecSize = 4; 02627 break; 02628 } 02629 02630 EVT EltVT = Node->getValueType(0); 02631 EVT MemVT = Mem->getMemoryVT(); 02632 02633 unsigned Opc = 0; 02634 02635 switch (VecSize) { 02636 default: 02637 return nullptr; 02638 case 1: 02639 switch (MemVT.getSimpleVT().SimpleTy) { 02640 default: 02641 return nullptr; 02642 case MVT::i1: 02643 Opc = NVPTX::LoadParamMemI8; 02644 break; 02645 case MVT::i8: 02646 Opc = NVPTX::LoadParamMemI8; 02647 break; 02648 case MVT::i16: 02649 Opc = NVPTX::LoadParamMemI16; 02650 break; 02651 case MVT::i32: 02652 Opc = NVPTX::LoadParamMemI32; 02653 break; 02654 case MVT::i64: 02655 Opc = NVPTX::LoadParamMemI64; 02656 break; 02657 case MVT::f32: 02658 Opc = NVPTX::LoadParamMemF32; 02659 break; 02660 case MVT::f64: 02661 Opc = NVPTX::LoadParamMemF64; 02662 break; 02663 } 02664 break; 02665 case 2: 02666 switch (MemVT.getSimpleVT().SimpleTy) { 02667 default: 02668 return nullptr; 02669 case MVT::i1: 02670 Opc = NVPTX::LoadParamMemV2I8; 02671 break; 02672 case MVT::i8: 02673 Opc = NVPTX::LoadParamMemV2I8; 02674 break; 02675 case MVT::i16: 02676 Opc = NVPTX::LoadParamMemV2I16; 02677 break; 02678 case MVT::i32: 02679 Opc = NVPTX::LoadParamMemV2I32; 02680 break; 02681 case MVT::i64: 02682 Opc = NVPTX::LoadParamMemV2I64; 02683 break; 02684 case MVT::f32: 02685 Opc = NVPTX::LoadParamMemV2F32; 02686 break; 02687 case MVT::f64: 02688 Opc = NVPTX::LoadParamMemV2F64; 02689 break; 02690 } 02691 break; 02692 case 4: 02693 switch (MemVT.getSimpleVT().SimpleTy) { 02694 default: 02695 return nullptr; 02696 case MVT::i1: 02697 Opc = NVPTX::LoadParamMemV4I8; 02698 break; 02699 case MVT::i8: 02700 Opc = NVPTX::LoadParamMemV4I8; 02701 break; 02702 case MVT::i16: 02703 Opc = NVPTX::LoadParamMemV4I16; 02704 break; 02705 case MVT::i32: 02706 Opc = NVPTX::LoadParamMemV4I32; 02707 break; 02708 case MVT::f32: 02709 Opc = NVPTX::LoadParamMemV4F32; 02710 break; 02711 } 02712 break; 02713 } 02714 02715 SDVTList VTs; 02716 if (VecSize == 1) { 02717 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue); 02718 } else if (VecSize == 2) { 02719 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue); 02720 } else { 02721 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue }; 02722 VTs = CurDAG->getVTList(EVTs); 02723 } 02724 02725 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 02726 02727 SmallVector<SDValue, 2> Ops; 02728 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32)); 02729 Ops.push_back(Chain); 02730 Ops.push_back(Flag); 02731 02732 SDNode *Ret = 02733 CurDAG->getMachineNode(Opc, DL, VTs, Ops); 02734 return Ret; 02735 } 02736 02737 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) { 02738 SDLoc DL(N); 02739 SDValue Chain = N->getOperand(0); 02740 SDValue Offset = N->getOperand(1); 02741 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 02742 MemSDNode *Mem = cast<MemSDNode>(N); 02743 02744 // How many elements do we have? 02745 unsigned NumElts = 1; 02746 switch (N->getOpcode()) { 02747 default: 02748 return nullptr; 02749 case NVPTXISD::StoreRetval: 02750 NumElts = 1; 02751 break; 02752 case NVPTXISD::StoreRetvalV2: 02753 NumElts = 2; 02754 break; 02755 case NVPTXISD::StoreRetvalV4: 02756 NumElts = 4; 02757 break; 02758 } 02759 02760 // Build vector of operands 02761 SmallVector<SDValue, 6> Ops; 02762 for (unsigned i = 0; i < NumElts; ++i) 02763 Ops.push_back(N->getOperand(i + 2)); 02764 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32)); 02765 Ops.push_back(Chain); 02766 02767 // Determine target opcode 02768 // If we have an i1, use an 8-bit store. The lowering code in 02769 // NVPTXISelLowering will have already emitted an upcast. 02770 unsigned Opcode = 0; 02771 switch (NumElts) { 02772 default: 02773 return nullptr; 02774 case 1: 02775 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 02776 default: 02777 return nullptr; 02778 case MVT::i1: 02779 Opcode = NVPTX::StoreRetvalI8; 02780 break; 02781 case MVT::i8: 02782 Opcode = NVPTX::StoreRetvalI8; 02783 break; 02784 case MVT::i16: 02785 Opcode = NVPTX::StoreRetvalI16; 02786 break; 02787 case MVT::i32: 02788 Opcode = NVPTX::StoreRetvalI32; 02789 break; 02790 case MVT::i64: 02791 Opcode = NVPTX::StoreRetvalI64; 02792 break; 02793 case MVT::f32: 02794 Opcode = NVPTX::StoreRetvalF32; 02795 break; 02796 case MVT::f64: 02797 Opcode = NVPTX::StoreRetvalF64; 02798 break; 02799 } 02800 break; 02801 case 2: 02802 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 02803 default: 02804 return nullptr; 02805 case MVT::i1: 02806 Opcode = NVPTX::StoreRetvalV2I8; 02807 break; 02808 case MVT::i8: 02809 Opcode = NVPTX::StoreRetvalV2I8; 02810 break; 02811 case MVT::i16: 02812 Opcode = NVPTX::StoreRetvalV2I16; 02813 break; 02814 case MVT::i32: 02815 Opcode = NVPTX::StoreRetvalV2I32; 02816 break; 02817 case MVT::i64: 02818 Opcode = NVPTX::StoreRetvalV2I64; 02819 break; 02820 case MVT::f32: 02821 Opcode = NVPTX::StoreRetvalV2F32; 02822 break; 02823 case MVT::f64: 02824 Opcode = NVPTX::StoreRetvalV2F64; 02825 break; 02826 } 02827 break; 02828 case 4: 02829 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 02830 default: 02831 return nullptr; 02832 case MVT::i1: 02833 Opcode = NVPTX::StoreRetvalV4I8; 02834 break; 02835 case MVT::i8: 02836 Opcode = NVPTX::StoreRetvalV4I8; 02837 break; 02838 case MVT::i16: 02839 Opcode = NVPTX::StoreRetvalV4I16; 02840 break; 02841 case MVT::i32: 02842 Opcode = NVPTX::StoreRetvalV4I32; 02843 break; 02844 case MVT::f32: 02845 Opcode = NVPTX::StoreRetvalV4F32; 02846 break; 02847 } 02848 break; 02849 } 02850 02851 SDNode *Ret = 02852 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); 02853 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 02854 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 02855 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1); 02856 02857 return Ret; 02858 } 02859 02860 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { 02861 SDLoc DL(N); 02862 SDValue Chain = N->getOperand(0); 02863 SDValue Param = N->getOperand(1); 02864 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue(); 02865 SDValue Offset = N->getOperand(2); 02866 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 02867 MemSDNode *Mem = cast<MemSDNode>(N); 02868 SDValue Flag = N->getOperand(N->getNumOperands() - 1); 02869 02870 // How many elements do we have? 02871 unsigned NumElts = 1; 02872 switch (N->getOpcode()) { 02873 default: 02874 return nullptr; 02875 case NVPTXISD::StoreParamU32: 02876 case NVPTXISD::StoreParamS32: 02877 case NVPTXISD::StoreParam: 02878 NumElts = 1; 02879 break; 02880 case NVPTXISD::StoreParamV2: 02881 NumElts = 2; 02882 break; 02883 case NVPTXISD::StoreParamV4: 02884 NumElts = 4; 02885 break; 02886 } 02887 02888 // Build vector of operands 02889 SmallVector<SDValue, 8> Ops; 02890 for (unsigned i = 0; i < NumElts; ++i) 02891 Ops.push_back(N->getOperand(i + 3)); 02892 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32)); 02893 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32)); 02894 Ops.push_back(Chain); 02895 Ops.push_back(Flag); 02896 02897 // Determine target opcode 02898 // If we have an i1, use an 8-bit store. The lowering code in 02899 // NVPTXISelLowering will have already emitted an upcast. 02900 unsigned Opcode = 0; 02901 switch (N->getOpcode()) { 02902 default: 02903 switch (NumElts) { 02904 default: 02905 return nullptr; 02906 case 1: 02907 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 02908 default: 02909 return nullptr; 02910 case MVT::i1: 02911 Opcode = NVPTX::StoreParamI8; 02912 break; 02913 case MVT::i8: 02914 Opcode = NVPTX::StoreParamI8; 02915 break; 02916 case MVT::i16: 02917 Opcode = NVPTX::StoreParamI16; 02918 break; 02919 case MVT::i32: 02920 Opcode = NVPTX::StoreParamI32; 02921 break; 02922 case MVT::i64: 02923 Opcode = NVPTX::StoreParamI64; 02924 break; 02925 case MVT::f32: 02926 Opcode = NVPTX::StoreParamF32; 02927 break; 02928 case MVT::f64: 02929 Opcode = NVPTX::StoreParamF64; 02930 break; 02931 } 02932 break; 02933 case 2: 02934 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 02935 default: 02936 return nullptr; 02937 case MVT::i1: 02938 Opcode = NVPTX::StoreParamV2I8; 02939 break; 02940 case MVT::i8: 02941 Opcode = NVPTX::StoreParamV2I8; 02942 break; 02943 case MVT::i16: 02944 Opcode = NVPTX::StoreParamV2I16; 02945 break; 02946 case MVT::i32: 02947 Opcode = NVPTX::StoreParamV2I32; 02948 break; 02949 case MVT::i64: 02950 Opcode = NVPTX::StoreParamV2I64; 02951 break; 02952 case MVT::f32: 02953 Opcode = NVPTX::StoreParamV2F32; 02954 break; 02955 case MVT::f64: 02956 Opcode = NVPTX::StoreParamV2F64; 02957 break; 02958 } 02959 break; 02960 case 4: 02961 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 02962 default: 02963 return nullptr; 02964 case MVT::i1: 02965 Opcode = NVPTX::StoreParamV4I8; 02966 break; 02967 case MVT::i8: 02968 Opcode = NVPTX::StoreParamV4I8; 02969 break; 02970 case MVT::i16: 02971 Opcode = NVPTX::StoreParamV4I16; 02972 break; 02973 case MVT::i32: 02974 Opcode = NVPTX::StoreParamV4I32; 02975 break; 02976 case MVT::f32: 02977 Opcode = NVPTX::StoreParamV4F32; 02978 break; 02979 } 02980 break; 02981 } 02982 break; 02983 // Special case: if we have a sign-extend/zero-extend node, insert the 02984 // conversion instruction first, and use that as the value operand to 02985 // the selected StoreParam node. 02986 case NVPTXISD::StoreParamU32: { 02987 Opcode = NVPTX::StoreParamI32; 02988 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, 02989 MVT::i32); 02990 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL, 02991 MVT::i32, Ops[0], CvtNone); 02992 Ops[0] = SDValue(Cvt, 0); 02993 break; 02994 } 02995 case NVPTXISD::StoreParamS32: { 02996 Opcode = NVPTX::StoreParamI32; 02997 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, 02998 MVT::i32); 02999 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL, 03000 MVT::i32, Ops[0], CvtNone); 03001 Ops[0] = SDValue(Cvt, 0); 03002 break; 03003 } 03004 } 03005 03006 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue); 03007 SDNode *Ret = 03008 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops); 03009 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 03010 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 03011 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1); 03012 03013 return Ret; 03014 } 03015 03016 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { 03017 SDValue Chain = N->getOperand(0); 03018 SDNode *Ret = nullptr; 03019 unsigned Opc = 0; 03020 SmallVector<SDValue, 8> Ops; 03021 03022 switch (N->getOpcode()) { 03023 default: return nullptr; 03024 case NVPTXISD::Tex1DFloatS32: 03025 Opc = NVPTX::TEX_1D_F32_S32; 03026 break; 03027 case NVPTXISD::Tex1DFloatFloat: 03028 Opc = NVPTX::TEX_1D_F32_F32; 03029 break; 03030 case NVPTXISD::Tex1DFloatFloatLevel: 03031 Opc = NVPTX::TEX_1D_F32_F32_LEVEL; 03032 break; 03033 case NVPTXISD::Tex1DFloatFloatGrad: 03034 Opc = NVPTX::TEX_1D_F32_F32_GRAD; 03035 break; 03036 case NVPTXISD::Tex1DS32S32: 03037 Opc = NVPTX::TEX_1D_S32_S32; 03038 break; 03039 case NVPTXISD::Tex1DS32Float: 03040 Opc = NVPTX::TEX_1D_S32_F32; 03041 break; 03042 case NVPTXISD::Tex1DS32FloatLevel: 03043 Opc = NVPTX::TEX_1D_S32_F32_LEVEL; 03044 break; 03045 case NVPTXISD::Tex1DS32FloatGrad: 03046 Opc = NVPTX::TEX_1D_S32_F32_GRAD; 03047 break; 03048 case NVPTXISD::Tex1DU32S32: 03049 Opc = NVPTX::TEX_1D_U32_S32; 03050 break; 03051 case NVPTXISD::Tex1DU32Float: 03052 Opc = NVPTX::TEX_1D_U32_F32; 03053 break; 03054 case NVPTXISD::Tex1DU32FloatLevel: 03055 Opc = NVPTX::TEX_1D_U32_F32_LEVEL; 03056 break; 03057 case NVPTXISD::Tex1DU32FloatGrad: 03058 Opc = NVPTX::TEX_1D_U32_F32_GRAD; 03059 break; 03060 case NVPTXISD::Tex1DArrayFloatS32: 03061 Opc = NVPTX::TEX_1D_ARRAY_F32_S32; 03062 break; 03063 case NVPTXISD::Tex1DArrayFloatFloat: 03064 Opc = NVPTX::TEX_1D_ARRAY_F32_F32; 03065 break; 03066 case NVPTXISD::Tex1DArrayFloatFloatLevel: 03067 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL; 03068 break; 03069 case NVPTXISD::Tex1DArrayFloatFloatGrad: 03070 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD; 03071 break; 03072 case NVPTXISD::Tex1DArrayS32S32: 03073 Opc = NVPTX::TEX_1D_ARRAY_S32_S32; 03074 break; 03075 case NVPTXISD::Tex1DArrayS32Float: 03076 Opc = NVPTX::TEX_1D_ARRAY_S32_F32; 03077 break; 03078 case NVPTXISD::Tex1DArrayS32FloatLevel: 03079 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL; 03080 break; 03081 case NVPTXISD::Tex1DArrayS32FloatGrad: 03082 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD; 03083 break; 03084 case NVPTXISD::Tex1DArrayU32S32: 03085 Opc = NVPTX::TEX_1D_ARRAY_U32_S32; 03086 break; 03087 case NVPTXISD::Tex1DArrayU32Float: 03088 Opc = NVPTX::TEX_1D_ARRAY_U32_F32; 03089 break; 03090 case NVPTXISD::Tex1DArrayU32FloatLevel: 03091 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL; 03092 break; 03093 case NVPTXISD::Tex1DArrayU32FloatGrad: 03094 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD; 03095 break; 03096 case NVPTXISD::Tex2DFloatS32: 03097 Opc = NVPTX::TEX_2D_F32_S32; 03098 break; 03099 case NVPTXISD::Tex2DFloatFloat: 03100 Opc = NVPTX::TEX_2D_F32_F32; 03101 break; 03102 case NVPTXISD::Tex2DFloatFloatLevel: 03103 Opc = NVPTX::TEX_2D_F32_F32_LEVEL; 03104 break; 03105 case NVPTXISD::Tex2DFloatFloatGrad: 03106 Opc = NVPTX::TEX_2D_F32_F32_GRAD; 03107 break; 03108 case NVPTXISD::Tex2DS32S32: 03109 Opc = NVPTX::TEX_2D_S32_S32; 03110 break; 03111 case NVPTXISD::Tex2DS32Float: 03112 Opc = NVPTX::TEX_2D_S32_F32; 03113 break; 03114 case NVPTXISD::Tex2DS32FloatLevel: 03115 Opc = NVPTX::TEX_2D_S32_F32_LEVEL; 03116 break; 03117 case NVPTXISD::Tex2DS32FloatGrad: 03118 Opc = NVPTX::TEX_2D_S32_F32_GRAD; 03119 break; 03120 case NVPTXISD::Tex2DU32S32: 03121 Opc = NVPTX::TEX_2D_U32_S32; 03122 break; 03123 case NVPTXISD::Tex2DU32Float: 03124 Opc = NVPTX::TEX_2D_U32_F32; 03125 break; 03126 case NVPTXISD::Tex2DU32FloatLevel: 03127 Opc = NVPTX::TEX_2D_U32_F32_LEVEL; 03128 break; 03129 case NVPTXISD::Tex2DU32FloatGrad: 03130 Opc = NVPTX::TEX_2D_U32_F32_GRAD; 03131 break; 03132 case NVPTXISD::Tex2DArrayFloatS32: 03133 Opc = NVPTX::TEX_2D_ARRAY_F32_S32; 03134 break; 03135 case NVPTXISD::Tex2DArrayFloatFloat: 03136 Opc = NVPTX::TEX_2D_ARRAY_F32_F32; 03137 break; 03138 case NVPTXISD::Tex2DArrayFloatFloatLevel: 03139 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL; 03140 break; 03141 case NVPTXISD::Tex2DArrayFloatFloatGrad: 03142 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD; 03143 break; 03144 case NVPTXISD::Tex2DArrayS32S32: 03145 Opc = NVPTX::TEX_2D_ARRAY_S32_S32; 03146 break; 03147 case NVPTXISD::Tex2DArrayS32Float: 03148 Opc = NVPTX::TEX_2D_ARRAY_S32_F32; 03149 break; 03150 case NVPTXISD::Tex2DArrayS32FloatLevel: 03151 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL; 03152 break; 03153 case NVPTXISD::Tex2DArrayS32FloatGrad: 03154 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD; 03155 break; 03156 case NVPTXISD::Tex2DArrayU32S32: 03157 Opc = NVPTX::TEX_2D_ARRAY_U32_S32; 03158 break; 03159 case NVPTXISD::Tex2DArrayU32Float: 03160 Opc = NVPTX::TEX_2D_ARRAY_U32_F32; 03161 break; 03162 case NVPTXISD::Tex2DArrayU32FloatLevel: 03163 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL; 03164 break; 03165 case NVPTXISD::Tex2DArrayU32FloatGrad: 03166 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD; 03167 break; 03168 case NVPTXISD::Tex3DFloatS32: 03169 Opc = NVPTX::TEX_3D_F32_S32; 03170 break; 03171 case NVPTXISD::Tex3DFloatFloat: 03172 Opc = NVPTX::TEX_3D_F32_F32; 03173 break; 03174 case NVPTXISD::Tex3DFloatFloatLevel: 03175 Opc = NVPTX::TEX_3D_F32_F32_LEVEL; 03176 break; 03177 case NVPTXISD::Tex3DFloatFloatGrad: 03178 Opc = NVPTX::TEX_3D_F32_F32_GRAD; 03179 break; 03180 case NVPTXISD::Tex3DS32S32: 03181 Opc = NVPTX::TEX_3D_S32_S32; 03182 break; 03183 case NVPTXISD::Tex3DS32Float: 03184 Opc = NVPTX::TEX_3D_S32_F32; 03185 break; 03186 case NVPTXISD::Tex3DS32FloatLevel: 03187 Opc = NVPTX::TEX_3D_S32_F32_LEVEL; 03188 break; 03189 case NVPTXISD::Tex3DS32FloatGrad: 03190 Opc = NVPTX::TEX_3D_S32_F32_GRAD; 03191 break; 03192 case NVPTXISD::Tex3DU32S32: 03193 Opc = NVPTX::TEX_3D_U32_S32; 03194 break; 03195 case NVPTXISD::Tex3DU32Float: 03196 Opc = NVPTX::TEX_3D_U32_F32; 03197 break; 03198 case NVPTXISD::Tex3DU32FloatLevel: 03199 Opc = NVPTX::TEX_3D_U32_F32_LEVEL; 03200 break; 03201 case NVPTXISD::Tex3DU32FloatGrad: 03202 Opc = NVPTX::TEX_3D_U32_F32_GRAD; 03203 break; 03204 case NVPTXISD::TexCubeFloatFloat: 03205 Opc = NVPTX::TEX_CUBE_F32_F32; 03206 break; 03207 case NVPTXISD::TexCubeFloatFloatLevel: 03208 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL; 03209 break; 03210 case NVPTXISD::TexCubeS32Float: 03211 Opc = NVPTX::TEX_CUBE_S32_F32; 03212 break; 03213 case NVPTXISD::TexCubeS32FloatLevel: 03214 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL; 03215 break; 03216 case NVPTXISD::TexCubeU32Float: 03217 Opc = NVPTX::TEX_CUBE_U32_F32; 03218 break; 03219 case NVPTXISD::TexCubeU32FloatLevel: 03220 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL; 03221 break; 03222 case NVPTXISD::TexCubeArrayFloatFloat: 03223 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32; 03224 break; 03225 case NVPTXISD::TexCubeArrayFloatFloatLevel: 03226 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL; 03227 break; 03228 case NVPTXISD::TexCubeArrayS32Float: 03229 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32; 03230 break; 03231 case NVPTXISD::TexCubeArrayS32FloatLevel: 03232 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL; 03233 break; 03234 case NVPTXISD::TexCubeArrayU32Float: 03235 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32; 03236 break; 03237 case NVPTXISD::TexCubeArrayU32FloatLevel: 03238 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL; 03239 break; 03240 case NVPTXISD::Tld4R2DFloatFloat: 03241 Opc = NVPTX::TLD4_R_2D_F32_F32; 03242 break; 03243 case NVPTXISD::Tld4G2DFloatFloat: 03244 Opc = NVPTX::TLD4_G_2D_F32_F32; 03245 break; 03246 case NVPTXISD::Tld4B2DFloatFloat: 03247 Opc = NVPTX::TLD4_B_2D_F32_F32; 03248 break; 03249 case NVPTXISD::Tld4A2DFloatFloat: 03250 Opc = NVPTX::TLD4_A_2D_F32_F32; 03251 break; 03252 case NVPTXISD::Tld4R2DS64Float: 03253 Opc = NVPTX::TLD4_R_2D_S32_F32; 03254 break; 03255 case NVPTXISD::Tld4G2DS64Float: 03256 Opc = NVPTX::TLD4_G_2D_S32_F32; 03257 break; 03258 case NVPTXISD::Tld4B2DS64Float: 03259 Opc = NVPTX::TLD4_B_2D_S32_F32; 03260 break; 03261 case NVPTXISD::Tld4A2DS64Float: 03262 Opc = NVPTX::TLD4_A_2D_S32_F32; 03263 break; 03264 case NVPTXISD::Tld4R2DU64Float: 03265 Opc = NVPTX::TLD4_R_2D_U32_F32; 03266 break; 03267 case NVPTXISD::Tld4G2DU64Float: 03268 Opc = NVPTX::TLD4_G_2D_U32_F32; 03269 break; 03270 case NVPTXISD::Tld4B2DU64Float: 03271 Opc = NVPTX::TLD4_B_2D_U32_F32; 03272 break; 03273 case NVPTXISD::Tld4A2DU64Float: 03274 Opc = NVPTX::TLD4_A_2D_U32_F32; 03275 break; 03276 case NVPTXISD::TexUnified1DFloatS32: 03277 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32; 03278 break; 03279 case NVPTXISD::TexUnified1DFloatFloat: 03280 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32; 03281 break; 03282 case NVPTXISD::TexUnified1DFloatFloatLevel: 03283 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL; 03284 break; 03285 case NVPTXISD::TexUnified1DFloatFloatGrad: 03286 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD; 03287 break; 03288 case NVPTXISD::TexUnified1DS32S32: 03289 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32; 03290 break; 03291 case NVPTXISD::TexUnified1DS32Float: 03292 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32; 03293 break; 03294 case NVPTXISD::TexUnified1DS32FloatLevel: 03295 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL; 03296 break; 03297 case NVPTXISD::TexUnified1DS32FloatGrad: 03298 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD; 03299 break; 03300 case NVPTXISD::TexUnified1DU32S32: 03301 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32; 03302 break; 03303 case NVPTXISD::TexUnified1DU32Float: 03304 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32; 03305 break; 03306 case NVPTXISD::TexUnified1DU32FloatLevel: 03307 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL; 03308 break; 03309 case NVPTXISD::TexUnified1DU32FloatGrad: 03310 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD; 03311 break; 03312 case NVPTXISD::TexUnified1DArrayFloatS32: 03313 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32; 03314 break; 03315 case NVPTXISD::TexUnified1DArrayFloatFloat: 03316 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32; 03317 break; 03318 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 03319 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL; 03320 break; 03321 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 03322 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD; 03323 break; 03324 case NVPTXISD::TexUnified1DArrayS32S32: 03325 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32; 03326 break; 03327 case NVPTXISD::TexUnified1DArrayS32Float: 03328 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32; 03329 break; 03330 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 03331 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL; 03332 break; 03333 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 03334 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD; 03335 break; 03336 case NVPTXISD::TexUnified1DArrayU32S32: 03337 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32; 03338 break; 03339 case NVPTXISD::TexUnified1DArrayU32Float: 03340 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32; 03341 break; 03342 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 03343 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL; 03344 break; 03345 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 03346 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD; 03347 break; 03348 case NVPTXISD::TexUnified2DFloatS32: 03349 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32; 03350 break; 03351 case NVPTXISD::TexUnified2DFloatFloat: 03352 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32; 03353 break; 03354 case NVPTXISD::TexUnified2DFloatFloatLevel: 03355 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL; 03356 break; 03357 case NVPTXISD::TexUnified2DFloatFloatGrad: 03358 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD; 03359 break; 03360 case NVPTXISD::TexUnified2DS32S32: 03361 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32; 03362 break; 03363 case NVPTXISD::TexUnified2DS32Float: 03364 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32; 03365 break; 03366 case NVPTXISD::TexUnified2DS32FloatLevel: 03367 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL; 03368 break; 03369 case NVPTXISD::TexUnified2DS32FloatGrad: 03370 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD; 03371 break; 03372 case NVPTXISD::TexUnified2DU32S32: 03373 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32; 03374 break; 03375 case NVPTXISD::TexUnified2DU32Float: 03376 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32; 03377 break; 03378 case NVPTXISD::TexUnified2DU32FloatLevel: 03379 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL; 03380 break; 03381 case NVPTXISD::TexUnified2DU32FloatGrad: 03382 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD; 03383 break; 03384 case NVPTXISD::TexUnified2DArrayFloatS32: 03385 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32; 03386 break; 03387 case NVPTXISD::TexUnified2DArrayFloatFloat: 03388 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32; 03389 break; 03390 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 03391 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL; 03392 break; 03393 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 03394 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD; 03395 break; 03396 case NVPTXISD::TexUnified2DArrayS32S32: 03397 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32; 03398 break; 03399 case NVPTXISD::TexUnified2DArrayS32Float: 03400 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32; 03401 break; 03402 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 03403 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL; 03404 break; 03405 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 03406 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD; 03407 break; 03408 case NVPTXISD::TexUnified2DArrayU32S32: 03409 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32; 03410 break; 03411 case NVPTXISD::TexUnified2DArrayU32Float: 03412 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32; 03413 break; 03414 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 03415 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL; 03416 break; 03417 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 03418 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD; 03419 break; 03420 case NVPTXISD::TexUnified3DFloatS32: 03421 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32; 03422 break; 03423 case NVPTXISD::TexUnified3DFloatFloat: 03424 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32; 03425 break; 03426 case NVPTXISD::TexUnified3DFloatFloatLevel: 03427 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL; 03428 break; 03429 case NVPTXISD::TexUnified3DFloatFloatGrad: 03430 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD; 03431 break; 03432 case NVPTXISD::TexUnified3DS32S32: 03433 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32; 03434 break; 03435 case NVPTXISD::TexUnified3DS32Float: 03436 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32; 03437 break; 03438 case NVPTXISD::TexUnified3DS32FloatLevel: 03439 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL; 03440 break; 03441 case NVPTXISD::TexUnified3DS32FloatGrad: 03442 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD; 03443 break; 03444 case NVPTXISD::TexUnified3DU32S32: 03445 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32; 03446 break; 03447 case NVPTXISD::TexUnified3DU32Float: 03448 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32; 03449 break; 03450 case NVPTXISD::TexUnified3DU32FloatLevel: 03451 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL; 03452 break; 03453 case NVPTXISD::TexUnified3DU32FloatGrad: 03454 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD; 03455 break; 03456 case NVPTXISD::TexUnifiedCubeFloatFloat: 03457 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32; 03458 break; 03459 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 03460 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL; 03461 break; 03462 case NVPTXISD::TexUnifiedCubeS32Float: 03463 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32; 03464 break; 03465 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 03466 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL; 03467 break; 03468 case NVPTXISD::TexUnifiedCubeU32Float: 03469 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32; 03470 break; 03471 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 03472 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL; 03473 break; 03474 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 03475 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32; 03476 break; 03477 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 03478 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL; 03479 break; 03480 case NVPTXISD::TexUnifiedCubeArrayS32Float: 03481 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32; 03482 break; 03483 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 03484 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL; 03485 break; 03486 case NVPTXISD::TexUnifiedCubeArrayU32Float: 03487 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32; 03488 break; 03489 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 03490 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL; 03491 break; 03492 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 03493 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32; 03494 break; 03495 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 03496 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32; 03497 break; 03498 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 03499 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32; 03500 break; 03501 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 03502 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32; 03503 break; 03504 case NVPTXISD::Tld4UnifiedR2DS64Float: 03505 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32; 03506 break; 03507 case NVPTXISD::Tld4UnifiedG2DS64Float: 03508 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32; 03509 break; 03510 case NVPTXISD::Tld4UnifiedB2DS64Float: 03511 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32; 03512 break; 03513 case NVPTXISD::Tld4UnifiedA2DS64Float: 03514 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32; 03515 break; 03516 case NVPTXISD::Tld4UnifiedR2DU64Float: 03517 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32; 03518 break; 03519 case NVPTXISD::Tld4UnifiedG2DU64Float: 03520 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32; 03521 break; 03522 case NVPTXISD::Tld4UnifiedB2DU64Float: 03523 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32; 03524 break; 03525 case NVPTXISD::Tld4UnifiedA2DU64Float: 03526 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32; 03527 break; 03528 } 03529 03530 // Copy over operands 03531 for (unsigned i = 1; i < N->getNumOperands(); ++i) { 03532 Ops.push_back(N->getOperand(i)); 03533 } 03534 03535 Ops.push_back(Chain); 03536 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 03537 return Ret; 03538 } 03539 03540 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { 03541 SDValue Chain = N->getOperand(0); 03542 SDValue TexHandle = N->getOperand(1); 03543 SDNode *Ret = nullptr; 03544 unsigned Opc = 0; 03545 SmallVector<SDValue, 8> Ops; 03546 switch (N->getOpcode()) { 03547 default: return nullptr; 03548 case NVPTXISD::Suld1DI8Clamp: 03549 Opc = NVPTX::SULD_1D_I8_CLAMP; 03550 Ops.push_back(TexHandle); 03551 Ops.push_back(N->getOperand(2)); 03552 Ops.push_back(Chain); 03553 break; 03554 case NVPTXISD::Suld1DI16Clamp: 03555 Opc = NVPTX::SULD_1D_I16_CLAMP; 03556 Ops.push_back(TexHandle); 03557 Ops.push_back(N->getOperand(2)); 03558 Ops.push_back(Chain); 03559 break; 03560 case NVPTXISD::Suld1DI32Clamp: 03561 Opc = NVPTX::SULD_1D_I32_CLAMP; 03562 Ops.push_back(TexHandle); 03563 Ops.push_back(N->getOperand(2)); 03564 Ops.push_back(Chain); 03565 break; 03566 case NVPTXISD::Suld1DI64Clamp: 03567 Opc = NVPTX::SULD_1D_I64_CLAMP; 03568 Ops.push_back(TexHandle); 03569 Ops.push_back(N->getOperand(2)); 03570 Ops.push_back(Chain); 03571 break; 03572 case NVPTXISD::Suld1DV2I8Clamp: 03573 Opc = NVPTX::SULD_1D_V2I8_CLAMP; 03574 Ops.push_back(TexHandle); 03575 Ops.push_back(N->getOperand(2)); 03576 Ops.push_back(Chain); 03577 break; 03578 case NVPTXISD::Suld1DV2I16Clamp: 03579 Opc = NVPTX::SULD_1D_V2I16_CLAMP; 03580 Ops.push_back(TexHandle); 03581 Ops.push_back(N->getOperand(2)); 03582 Ops.push_back(Chain); 03583 break; 03584 case NVPTXISD::Suld1DV2I32Clamp: 03585 Opc = NVPTX::SULD_1D_V2I32_CLAMP; 03586 Ops.push_back(TexHandle); 03587 Ops.push_back(N->getOperand(2)); 03588 Ops.push_back(Chain); 03589 break; 03590 case NVPTXISD::Suld1DV2I64Clamp: 03591 Opc = NVPTX::SULD_1D_V2I64_CLAMP; 03592 Ops.push_back(TexHandle); 03593 Ops.push_back(N->getOperand(2)); 03594 Ops.push_back(Chain); 03595 break; 03596 case NVPTXISD::Suld1DV4I8Clamp: 03597 Opc = NVPTX::SULD_1D_V4I8_CLAMP; 03598 Ops.push_back(TexHandle); 03599 Ops.push_back(N->getOperand(2)); 03600 Ops.push_back(Chain); 03601 break; 03602 case NVPTXISD::Suld1DV4I16Clamp: 03603 Opc = NVPTX::SULD_1D_V4I16_CLAMP; 03604 Ops.push_back(TexHandle); 03605 Ops.push_back(N->getOperand(2)); 03606 Ops.push_back(Chain); 03607 break; 03608 case NVPTXISD::Suld1DV4I32Clamp: 03609 Opc = NVPTX::SULD_1D_V4I32_CLAMP; 03610 Ops.push_back(TexHandle); 03611 Ops.push_back(N->getOperand(2)); 03612 Ops.push_back(Chain); 03613 break; 03614 case NVPTXISD::Suld1DArrayI8Clamp: 03615 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP; 03616 Ops.push_back(TexHandle); 03617 Ops.push_back(N->getOperand(2)); 03618 Ops.push_back(N->getOperand(3)); 03619 Ops.push_back(Chain); 03620 break; 03621 case NVPTXISD::Suld1DArrayI16Clamp: 03622 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP; 03623 Ops.push_back(TexHandle); 03624 Ops.push_back(N->getOperand(2)); 03625 Ops.push_back(N->getOperand(3)); 03626 Ops.push_back(Chain); 03627 break; 03628 case NVPTXISD::Suld1DArrayI32Clamp: 03629 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP; 03630 Ops.push_back(TexHandle); 03631 Ops.push_back(N->getOperand(2)); 03632 Ops.push_back(N->getOperand(3)); 03633 Ops.push_back(Chain); 03634 break; 03635 case NVPTXISD::Suld1DArrayI64Clamp: 03636 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP; 03637 Ops.push_back(TexHandle); 03638 Ops.push_back(N->getOperand(2)); 03639 Ops.push_back(N->getOperand(3)); 03640 Ops.push_back(Chain); 03641 break; 03642 case NVPTXISD::Suld1DArrayV2I8Clamp: 03643 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP; 03644 Ops.push_back(TexHandle); 03645 Ops.push_back(N->getOperand(2)); 03646 Ops.push_back(N->getOperand(3)); 03647 Ops.push_back(Chain); 03648 break; 03649 case NVPTXISD::Suld1DArrayV2I16Clamp: 03650 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP; 03651 Ops.push_back(TexHandle); 03652 Ops.push_back(N->getOperand(2)); 03653 Ops.push_back(N->getOperand(3)); 03654 Ops.push_back(Chain); 03655 break; 03656 case NVPTXISD::Suld1DArrayV2I32Clamp: 03657 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP; 03658 Ops.push_back(TexHandle); 03659 Ops.push_back(N->getOperand(2)); 03660 Ops.push_back(N->getOperand(3)); 03661 Ops.push_back(Chain); 03662 break; 03663 case NVPTXISD::Suld1DArrayV2I64Clamp: 03664 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP; 03665 Ops.push_back(TexHandle); 03666 Ops.push_back(N->getOperand(2)); 03667 Ops.push_back(N->getOperand(3)); 03668 Ops.push_back(Chain); 03669 break; 03670 case NVPTXISD::Suld1DArrayV4I8Clamp: 03671 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP; 03672 Ops.push_back(TexHandle); 03673 Ops.push_back(N->getOperand(2)); 03674 Ops.push_back(N->getOperand(3)); 03675 Ops.push_back(Chain); 03676 break; 03677 case NVPTXISD::Suld1DArrayV4I16Clamp: 03678 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP; 03679 Ops.push_back(TexHandle); 03680 Ops.push_back(N->getOperand(2)); 03681 Ops.push_back(N->getOperand(3)); 03682 Ops.push_back(Chain); 03683 break; 03684 case NVPTXISD::Suld1DArrayV4I32Clamp: 03685 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP; 03686 Ops.push_back(TexHandle); 03687 Ops.push_back(N->getOperand(2)); 03688 Ops.push_back(N->getOperand(3)); 03689 Ops.push_back(Chain); 03690 break; 03691 case NVPTXISD::Suld2DI8Clamp: 03692 Opc = NVPTX::SULD_2D_I8_CLAMP; 03693 Ops.push_back(TexHandle); 03694 Ops.push_back(N->getOperand(2)); 03695 Ops.push_back(N->getOperand(3)); 03696 Ops.push_back(Chain); 03697 break; 03698 case NVPTXISD::Suld2DI16Clamp: 03699 Opc = NVPTX::SULD_2D_I16_CLAMP; 03700 Ops.push_back(TexHandle); 03701 Ops.push_back(N->getOperand(2)); 03702 Ops.push_back(N->getOperand(3)); 03703 Ops.push_back(Chain); 03704 break; 03705 case NVPTXISD::Suld2DI32Clamp: 03706 Opc = NVPTX::SULD_2D_I32_CLAMP; 03707 Ops.push_back(TexHandle); 03708 Ops.push_back(N->getOperand(2)); 03709 Ops.push_back(N->getOperand(3)); 03710 Ops.push_back(Chain); 03711 break; 03712 case NVPTXISD::Suld2DI64Clamp: 03713 Opc = NVPTX::SULD_2D_I64_CLAMP; 03714 Ops.push_back(TexHandle); 03715 Ops.push_back(N->getOperand(2)); 03716 Ops.push_back(N->getOperand(3)); 03717 Ops.push_back(Chain); 03718 break; 03719 case NVPTXISD::Suld2DV2I8Clamp: 03720 Opc = NVPTX::SULD_2D_V2I8_CLAMP; 03721 Ops.push_back(TexHandle); 03722 Ops.push_back(N->getOperand(2)); 03723 Ops.push_back(N->getOperand(3)); 03724 Ops.push_back(Chain); 03725 break; 03726 case NVPTXISD::Suld2DV2I16Clamp: 03727 Opc = NVPTX::SULD_2D_V2I16_CLAMP; 03728 Ops.push_back(TexHandle); 03729 Ops.push_back(N->getOperand(2)); 03730 Ops.push_back(N->getOperand(3)); 03731 Ops.push_back(Chain); 03732 break; 03733 case NVPTXISD::Suld2DV2I32Clamp: 03734 Opc = NVPTX::SULD_2D_V2I32_CLAMP; 03735 Ops.push_back(TexHandle); 03736 Ops.push_back(N->getOperand(2)); 03737 Ops.push_back(N->getOperand(3)); 03738 Ops.push_back(Chain); 03739 break; 03740 case NVPTXISD::Suld2DV2I64Clamp: 03741 Opc = NVPTX::SULD_2D_V2I64_CLAMP; 03742 Ops.push_back(TexHandle); 03743 Ops.push_back(N->getOperand(2)); 03744 Ops.push_back(N->getOperand(3)); 03745 Ops.push_back(Chain); 03746 break; 03747 case NVPTXISD::Suld2DV4I8Clamp: 03748 Opc = NVPTX::SULD_2D_V4I8_CLAMP; 03749 Ops.push_back(TexHandle); 03750 Ops.push_back(N->getOperand(2)); 03751 Ops.push_back(N->getOperand(3)); 03752 Ops.push_back(Chain); 03753 break; 03754 case NVPTXISD::Suld2DV4I16Clamp: 03755 Opc = NVPTX::SULD_2D_V4I16_CLAMP; 03756 Ops.push_back(TexHandle); 03757 Ops.push_back(N->getOperand(2)); 03758 Ops.push_back(N->getOperand(3)); 03759 Ops.push_back(Chain); 03760 break; 03761 case NVPTXISD::Suld2DV4I32Clamp: 03762 Opc = NVPTX::SULD_2D_V4I32_CLAMP; 03763 Ops.push_back(TexHandle); 03764 Ops.push_back(N->getOperand(2)); 03765 Ops.push_back(N->getOperand(3)); 03766 Ops.push_back(Chain); 03767 break; 03768 case NVPTXISD::Suld2DArrayI8Clamp: 03769 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP; 03770 Ops.push_back(TexHandle); 03771 Ops.push_back(N->getOperand(2)); 03772 Ops.push_back(N->getOperand(3)); 03773 Ops.push_back(N->getOperand(4)); 03774 Ops.push_back(Chain); 03775 break; 03776 case NVPTXISD::Suld2DArrayI16Clamp: 03777 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP; 03778 Ops.push_back(TexHandle); 03779 Ops.push_back(N->getOperand(2)); 03780 Ops.push_back(N->getOperand(3)); 03781 Ops.push_back(N->getOperand(4)); 03782 Ops.push_back(Chain); 03783 break; 03784 case NVPTXISD::Suld2DArrayI32Clamp: 03785 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP; 03786 Ops.push_back(TexHandle); 03787 Ops.push_back(N->getOperand(2)); 03788 Ops.push_back(N->getOperand(3)); 03789 Ops.push_back(N->getOperand(4)); 03790 Ops.push_back(Chain); 03791 break; 03792 case NVPTXISD::Suld2DArrayI64Clamp: 03793 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP; 03794 Ops.push_back(TexHandle); 03795 Ops.push_back(N->getOperand(2)); 03796 Ops.push_back(N->getOperand(3)); 03797 Ops.push_back(N->getOperand(4)); 03798 Ops.push_back(Chain); 03799 break; 03800 case NVPTXISD::Suld2DArrayV2I8Clamp: 03801 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP; 03802 Ops.push_back(TexHandle); 03803 Ops.push_back(N->getOperand(2)); 03804 Ops.push_back(N->getOperand(3)); 03805 Ops.push_back(N->getOperand(4)); 03806 Ops.push_back(Chain); 03807 break; 03808 case NVPTXISD::Suld2DArrayV2I16Clamp: 03809 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP; 03810 Ops.push_back(TexHandle); 03811 Ops.push_back(N->getOperand(2)); 03812 Ops.push_back(N->getOperand(3)); 03813 Ops.push_back(N->getOperand(4)); 03814 Ops.push_back(Chain); 03815 break; 03816 case NVPTXISD::Suld2DArrayV2I32Clamp: 03817 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP; 03818 Ops.push_back(TexHandle); 03819 Ops.push_back(N->getOperand(2)); 03820 Ops.push_back(N->getOperand(3)); 03821 Ops.push_back(N->getOperand(4)); 03822 Ops.push_back(Chain); 03823 break; 03824 case NVPTXISD::Suld2DArrayV2I64Clamp: 03825 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP; 03826 Ops.push_back(TexHandle); 03827 Ops.push_back(N->getOperand(2)); 03828 Ops.push_back(N->getOperand(3)); 03829 Ops.push_back(N->getOperand(4)); 03830 Ops.push_back(Chain); 03831 break; 03832 case NVPTXISD::Suld2DArrayV4I8Clamp: 03833 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP; 03834 Ops.push_back(TexHandle); 03835 Ops.push_back(N->getOperand(2)); 03836 Ops.push_back(N->getOperand(3)); 03837 Ops.push_back(N->getOperand(4)); 03838 Ops.push_back(Chain); 03839 break; 03840 case NVPTXISD::Suld2DArrayV4I16Clamp: 03841 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP; 03842 Ops.push_back(TexHandle); 03843 Ops.push_back(N->getOperand(2)); 03844 Ops.push_back(N->getOperand(3)); 03845 Ops.push_back(N->getOperand(4)); 03846 Ops.push_back(Chain); 03847 break; 03848 case NVPTXISD::Suld2DArrayV4I32Clamp: 03849 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP; 03850 Ops.push_back(TexHandle); 03851 Ops.push_back(N->getOperand(2)); 03852 Ops.push_back(N->getOperand(3)); 03853 Ops.push_back(N->getOperand(4)); 03854 Ops.push_back(Chain); 03855 break; 03856 case NVPTXISD::Suld3DI8Clamp: 03857 Opc = NVPTX::SULD_3D_I8_CLAMP; 03858 Ops.push_back(TexHandle); 03859 Ops.push_back(N->getOperand(2)); 03860 Ops.push_back(N->getOperand(3)); 03861 Ops.push_back(N->getOperand(4)); 03862 Ops.push_back(Chain); 03863 break; 03864 case NVPTXISD::Suld3DI16Clamp: 03865 Opc = NVPTX::SULD_3D_I16_CLAMP; 03866 Ops.push_back(TexHandle); 03867 Ops.push_back(N->getOperand(2)); 03868 Ops.push_back(N->getOperand(3)); 03869 Ops.push_back(N->getOperand(4)); 03870 Ops.push_back(Chain); 03871 break; 03872 case NVPTXISD::Suld3DI32Clamp: 03873 Opc = NVPTX::SULD_3D_I32_CLAMP; 03874 Ops.push_back(TexHandle); 03875 Ops.push_back(N->getOperand(2)); 03876 Ops.push_back(N->getOperand(3)); 03877 Ops.push_back(N->getOperand(4)); 03878 Ops.push_back(Chain); 03879 break; 03880 case NVPTXISD::Suld3DI64Clamp: 03881 Opc = NVPTX::SULD_3D_I64_CLAMP; 03882 Ops.push_back(TexHandle); 03883 Ops.push_back(N->getOperand(2)); 03884 Ops.push_back(N->getOperand(3)); 03885 Ops.push_back(N->getOperand(4)); 03886 Ops.push_back(Chain); 03887 break; 03888 case NVPTXISD::Suld3DV2I8Clamp: 03889 Opc = NVPTX::SULD_3D_V2I8_CLAMP; 03890 Ops.push_back(TexHandle); 03891 Ops.push_back(N->getOperand(2)); 03892 Ops.push_back(N->getOperand(3)); 03893 Ops.push_back(N->getOperand(4)); 03894 Ops.push_back(Chain); 03895 break; 03896 case NVPTXISD::Suld3DV2I16Clamp: 03897 Opc = NVPTX::SULD_3D_V2I16_CLAMP; 03898 Ops.push_back(TexHandle); 03899 Ops.push_back(N->getOperand(2)); 03900 Ops.push_back(N->getOperand(3)); 03901 Ops.push_back(N->getOperand(4)); 03902 Ops.push_back(Chain); 03903 break; 03904 case NVPTXISD::Suld3DV2I32Clamp: 03905 Opc = NVPTX::SULD_3D_V2I32_CLAMP; 03906 Ops.push_back(TexHandle); 03907 Ops.push_back(N->getOperand(2)); 03908 Ops.push_back(N->getOperand(3)); 03909 Ops.push_back(N->getOperand(4)); 03910 Ops.push_back(Chain); 03911 break; 03912 case NVPTXISD::Suld3DV2I64Clamp: 03913 Opc = NVPTX::SULD_3D_V2I64_CLAMP; 03914 Ops.push_back(TexHandle); 03915 Ops.push_back(N->getOperand(2)); 03916 Ops.push_back(N->getOperand(3)); 03917 Ops.push_back(N->getOperand(4)); 03918 Ops.push_back(Chain); 03919 break; 03920 case NVPTXISD::Suld3DV4I8Clamp: 03921 Opc = NVPTX::SULD_3D_V4I8_CLAMP; 03922 Ops.push_back(TexHandle); 03923 Ops.push_back(N->getOperand(2)); 03924 Ops.push_back(N->getOperand(3)); 03925 Ops.push_back(N->getOperand(4)); 03926 Ops.push_back(Chain); 03927 break; 03928 case NVPTXISD::Suld3DV4I16Clamp: 03929 Opc = NVPTX::SULD_3D_V4I16_CLAMP; 03930 Ops.push_back(TexHandle); 03931 Ops.push_back(N->getOperand(2)); 03932 Ops.push_back(N->getOperand(3)); 03933 Ops.push_back(N->getOperand(4)); 03934 Ops.push_back(Chain); 03935 break; 03936 case NVPTXISD::Suld3DV4I32Clamp: 03937 Opc = NVPTX::SULD_3D_V4I32_CLAMP; 03938 Ops.push_back(TexHandle); 03939 Ops.push_back(N->getOperand(2)); 03940 Ops.push_back(N->getOperand(3)); 03941 Ops.push_back(N->getOperand(4)); 03942 Ops.push_back(Chain); 03943 break; 03944 case NVPTXISD::Suld1DI8Trap: 03945 Opc = NVPTX::SULD_1D_I8_TRAP; 03946 Ops.push_back(TexHandle); 03947 Ops.push_back(N->getOperand(2)); 03948 Ops.push_back(Chain); 03949 break; 03950 case NVPTXISD::Suld1DI16Trap: 03951 Opc = NVPTX::SULD_1D_I16_TRAP; 03952 Ops.push_back(TexHandle); 03953 Ops.push_back(N->getOperand(2)); 03954 Ops.push_back(Chain); 03955 break; 03956 case NVPTXISD::Suld1DI32Trap: 03957 Opc = NVPTX::SULD_1D_I32_TRAP; 03958 Ops.push_back(TexHandle); 03959 Ops.push_back(N->getOperand(2)); 03960 Ops.push_back(Chain); 03961 break; 03962 case NVPTXISD::Suld1DI64Trap: 03963 Opc = NVPTX::SULD_1D_I64_TRAP; 03964 Ops.push_back(TexHandle); 03965 Ops.push_back(N->getOperand(2)); 03966 Ops.push_back(Chain); 03967 break; 03968 case NVPTXISD::Suld1DV2I8Trap: 03969 Opc = NVPTX::SULD_1D_V2I8_TRAP; 03970 Ops.push_back(TexHandle); 03971 Ops.push_back(N->getOperand(2)); 03972 Ops.push_back(Chain); 03973 break; 03974 case NVPTXISD::Suld1DV2I16Trap: 03975 Opc = NVPTX::SULD_1D_V2I16_TRAP; 03976 Ops.push_back(TexHandle); 03977 Ops.push_back(N->getOperand(2)); 03978 Ops.push_back(Chain); 03979 break; 03980 case NVPTXISD::Suld1DV2I32Trap: 03981 Opc = NVPTX::SULD_1D_V2I32_TRAP; 03982 Ops.push_back(TexHandle); 03983 Ops.push_back(N->getOperand(2)); 03984 Ops.push_back(Chain); 03985 break; 03986 case NVPTXISD::Suld1DV2I64Trap: 03987 Opc = NVPTX::SULD_1D_V2I64_TRAP; 03988 Ops.push_back(TexHandle); 03989 Ops.push_back(N->getOperand(2)); 03990 Ops.push_back(Chain); 03991 break; 03992 case NVPTXISD::Suld1DV4I8Trap: 03993 Opc = NVPTX::SULD_1D_V4I8_TRAP; 03994 Ops.push_back(TexHandle); 03995 Ops.push_back(N->getOperand(2)); 03996 Ops.push_back(Chain); 03997 break; 03998 case NVPTXISD::Suld1DV4I16Trap: 03999 Opc = NVPTX::SULD_1D_V4I16_TRAP; 04000 Ops.push_back(TexHandle); 04001 Ops.push_back(N->getOperand(2)); 04002 Ops.push_back(Chain); 04003 break; 04004 case NVPTXISD::Suld1DV4I32Trap: 04005 Opc = NVPTX::SULD_1D_V4I32_TRAP; 04006 Ops.push_back(TexHandle); 04007 Ops.push_back(N->getOperand(2)); 04008 Ops.push_back(Chain); 04009 break; 04010 case NVPTXISD::Suld1DArrayI8Trap: 04011 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP; 04012 Ops.push_back(TexHandle); 04013 Ops.push_back(N->getOperand(2)); 04014 Ops.push_back(N->getOperand(3)); 04015 Ops.push_back(Chain); 04016 break; 04017 case NVPTXISD::Suld1DArrayI16Trap: 04018 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP; 04019 Ops.push_back(TexHandle); 04020 Ops.push_back(N->getOperand(2)); 04021 Ops.push_back(N->getOperand(3)); 04022 Ops.push_back(Chain); 04023 break; 04024 case NVPTXISD::Suld1DArrayI32Trap: 04025 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP; 04026 Ops.push_back(TexHandle); 04027 Ops.push_back(N->getOperand(2)); 04028 Ops.push_back(N->getOperand(3)); 04029 Ops.push_back(Chain); 04030 break; 04031 case NVPTXISD::Suld1DArrayI64Trap: 04032 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP; 04033 Ops.push_back(TexHandle); 04034 Ops.push_back(N->getOperand(2)); 04035 Ops.push_back(N->getOperand(3)); 04036 Ops.push_back(Chain); 04037 break; 04038 case NVPTXISD::Suld1DArrayV2I8Trap: 04039 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP; 04040 Ops.push_back(TexHandle); 04041 Ops.push_back(N->getOperand(2)); 04042 Ops.push_back(N->getOperand(3)); 04043 Ops.push_back(Chain); 04044 break; 04045 case NVPTXISD::Suld1DArrayV2I16Trap: 04046 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP; 04047 Ops.push_back(TexHandle); 04048 Ops.push_back(N->getOperand(2)); 04049 Ops.push_back(N->getOperand(3)); 04050 Ops.push_back(Chain); 04051 break; 04052 case NVPTXISD::Suld1DArrayV2I32Trap: 04053 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP; 04054 Ops.push_back(TexHandle); 04055 Ops.push_back(N->getOperand(2)); 04056 Ops.push_back(N->getOperand(3)); 04057 Ops.push_back(Chain); 04058 break; 04059 case NVPTXISD::Suld1DArrayV2I64Trap: 04060 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP; 04061 Ops.push_back(TexHandle); 04062 Ops.push_back(N->getOperand(2)); 04063 Ops.push_back(N->getOperand(3)); 04064 Ops.push_back(Chain); 04065 break; 04066 case NVPTXISD::Suld1DArrayV4I8Trap: 04067 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP; 04068 Ops.push_back(TexHandle); 04069 Ops.push_back(N->getOperand(2)); 04070 Ops.push_back(N->getOperand(3)); 04071 Ops.push_back(Chain); 04072 break; 04073 case NVPTXISD::Suld1DArrayV4I16Trap: 04074 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP; 04075 Ops.push_back(TexHandle); 04076 Ops.push_back(N->getOperand(2)); 04077 Ops.push_back(N->getOperand(3)); 04078 Ops.push_back(Chain); 04079 break; 04080 case NVPTXISD::Suld1DArrayV4I32Trap: 04081 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP; 04082 Ops.push_back(TexHandle); 04083 Ops.push_back(N->getOperand(2)); 04084 Ops.push_back(N->getOperand(3)); 04085 Ops.push_back(Chain); 04086 break; 04087 case NVPTXISD::Suld2DI8Trap: 04088 Opc = NVPTX::SULD_2D_I8_TRAP; 04089 Ops.push_back(TexHandle); 04090 Ops.push_back(N->getOperand(2)); 04091 Ops.push_back(N->getOperand(3)); 04092 Ops.push_back(Chain); 04093 break; 04094 case NVPTXISD::Suld2DI16Trap: 04095 Opc = NVPTX::SULD_2D_I16_TRAP; 04096 Ops.push_back(TexHandle); 04097 Ops.push_back(N->getOperand(2)); 04098 Ops.push_back(N->getOperand(3)); 04099 Ops.push_back(Chain); 04100 break; 04101 case NVPTXISD::Suld2DI32Trap: 04102 Opc = NVPTX::SULD_2D_I32_TRAP; 04103 Ops.push_back(TexHandle); 04104 Ops.push_back(N->getOperand(2)); 04105 Ops.push_back(N->getOperand(3)); 04106 Ops.push_back(Chain); 04107 break; 04108 case NVPTXISD::Suld2DI64Trap: 04109 Opc = NVPTX::SULD_2D_I64_TRAP; 04110 Ops.push_back(TexHandle); 04111 Ops.push_back(N->getOperand(2)); 04112 Ops.push_back(N->getOperand(3)); 04113 Ops.push_back(Chain); 04114 break; 04115 case NVPTXISD::Suld2DV2I8Trap: 04116 Opc = NVPTX::SULD_2D_V2I8_TRAP; 04117 Ops.push_back(TexHandle); 04118 Ops.push_back(N->getOperand(2)); 04119 Ops.push_back(N->getOperand(3)); 04120 Ops.push_back(Chain); 04121 break; 04122 case NVPTXISD::Suld2DV2I16Trap: 04123 Opc = NVPTX::SULD_2D_V2I16_TRAP; 04124 Ops.push_back(TexHandle); 04125 Ops.push_back(N->getOperand(2)); 04126 Ops.push_back(N->getOperand(3)); 04127 Ops.push_back(Chain); 04128 break; 04129 case NVPTXISD::Suld2DV2I32Trap: 04130 Opc = NVPTX::SULD_2D_V2I32_TRAP; 04131 Ops.push_back(TexHandle); 04132 Ops.push_back(N->getOperand(2)); 04133 Ops.push_back(N->getOperand(3)); 04134 Ops.push_back(Chain); 04135 break; 04136 case NVPTXISD::Suld2DV2I64Trap: 04137 Opc = NVPTX::SULD_2D_V2I64_TRAP; 04138 Ops.push_back(TexHandle); 04139 Ops.push_back(N->getOperand(2)); 04140 Ops.push_back(N->getOperand(3)); 04141 Ops.push_back(Chain); 04142 break; 04143 case NVPTXISD::Suld2DV4I8Trap: 04144 Opc = NVPTX::SULD_2D_V4I8_TRAP; 04145 Ops.push_back(TexHandle); 04146 Ops.push_back(N->getOperand(2)); 04147 Ops.push_back(N->getOperand(3)); 04148 Ops.push_back(Chain); 04149 break; 04150 case NVPTXISD::Suld2DV4I16Trap: 04151 Opc = NVPTX::SULD_2D_V4I16_TRAP; 04152 Ops.push_back(TexHandle); 04153 Ops.push_back(N->getOperand(2)); 04154 Ops.push_back(N->getOperand(3)); 04155 Ops.push_back(Chain); 04156 break; 04157 case NVPTXISD::Suld2DV4I32Trap: 04158 Opc = NVPTX::SULD_2D_V4I32_TRAP; 04159 Ops.push_back(TexHandle); 04160 Ops.push_back(N->getOperand(2)); 04161 Ops.push_back(N->getOperand(3)); 04162 Ops.push_back(Chain); 04163 break; 04164 case NVPTXISD::Suld2DArrayI8Trap: 04165 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP; 04166 Ops.push_back(TexHandle); 04167 Ops.push_back(N->getOperand(2)); 04168 Ops.push_back(N->getOperand(3)); 04169 Ops.push_back(N->getOperand(4)); 04170 Ops.push_back(Chain); 04171 break; 04172 case NVPTXISD::Suld2DArrayI16Trap: 04173 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP; 04174 Ops.push_back(TexHandle); 04175 Ops.push_back(N->getOperand(2)); 04176 Ops.push_back(N->getOperand(3)); 04177 Ops.push_back(N->getOperand(4)); 04178 Ops.push_back(Chain); 04179 break; 04180 case NVPTXISD::Suld2DArrayI32Trap: 04181 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP; 04182 Ops.push_back(TexHandle); 04183 Ops.push_back(N->getOperand(2)); 04184 Ops.push_back(N->getOperand(3)); 04185 Ops.push_back(N->getOperand(4)); 04186 Ops.push_back(Chain); 04187 break; 04188 case NVPTXISD::Suld2DArrayI64Trap: 04189 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP; 04190 Ops.push_back(TexHandle); 04191 Ops.push_back(N->getOperand(2)); 04192 Ops.push_back(N->getOperand(3)); 04193 Ops.push_back(N->getOperand(4)); 04194 Ops.push_back(Chain); 04195 break; 04196 case NVPTXISD::Suld2DArrayV2I8Trap: 04197 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP; 04198 Ops.push_back(TexHandle); 04199 Ops.push_back(N->getOperand(2)); 04200 Ops.push_back(N->getOperand(3)); 04201 Ops.push_back(N->getOperand(4)); 04202 Ops.push_back(Chain); 04203 break; 04204 case NVPTXISD::Suld2DArrayV2I16Trap: 04205 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP; 04206 Ops.push_back(TexHandle); 04207 Ops.push_back(N->getOperand(2)); 04208 Ops.push_back(N->getOperand(3)); 04209 Ops.push_back(N->getOperand(4)); 04210 Ops.push_back(Chain); 04211 break; 04212 case NVPTXISD::Suld2DArrayV2I32Trap: 04213 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP; 04214 Ops.push_back(TexHandle); 04215 Ops.push_back(N->getOperand(2)); 04216 Ops.push_back(N->getOperand(3)); 04217 Ops.push_back(N->getOperand(4)); 04218 Ops.push_back(Chain); 04219 break; 04220 case NVPTXISD::Suld2DArrayV2I64Trap: 04221 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP; 04222 Ops.push_back(TexHandle); 04223 Ops.push_back(N->getOperand(2)); 04224 Ops.push_back(N->getOperand(3)); 04225 Ops.push_back(N->getOperand(4)); 04226 Ops.push_back(Chain); 04227 break; 04228 case NVPTXISD::Suld2DArrayV4I8Trap: 04229 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP; 04230 Ops.push_back(TexHandle); 04231 Ops.push_back(N->getOperand(2)); 04232 Ops.push_back(N->getOperand(3)); 04233 Ops.push_back(N->getOperand(4)); 04234 Ops.push_back(Chain); 04235 break; 04236 case NVPTXISD::Suld2DArrayV4I16Trap: 04237 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP; 04238 Ops.push_back(TexHandle); 04239 Ops.push_back(N->getOperand(2)); 04240 Ops.push_back(N->getOperand(3)); 04241 Ops.push_back(N->getOperand(4)); 04242 Ops.push_back(Chain); 04243 break; 04244 case NVPTXISD::Suld2DArrayV4I32Trap: 04245 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP; 04246 Ops.push_back(TexHandle); 04247 Ops.push_back(N->getOperand(2)); 04248 Ops.push_back(N->getOperand(3)); 04249 Ops.push_back(N->getOperand(4)); 04250 Ops.push_back(Chain); 04251 break; 04252 case NVPTXISD::Suld3DI8Trap: 04253 Opc = NVPTX::SULD_3D_I8_TRAP; 04254 Ops.push_back(TexHandle); 04255 Ops.push_back(N->getOperand(2)); 04256 Ops.push_back(N->getOperand(3)); 04257 Ops.push_back(N->getOperand(4)); 04258 Ops.push_back(Chain); 04259 break; 04260 case NVPTXISD::Suld3DI16Trap: 04261 Opc = NVPTX::SULD_3D_I16_TRAP; 04262 Ops.push_back(TexHandle); 04263 Ops.push_back(N->getOperand(2)); 04264 Ops.push_back(N->getOperand(3)); 04265 Ops.push_back(N->getOperand(4)); 04266 Ops.push_back(Chain); 04267 break; 04268 case NVPTXISD::Suld3DI32Trap: 04269 Opc = NVPTX::SULD_3D_I32_TRAP; 04270 Ops.push_back(TexHandle); 04271 Ops.push_back(N->getOperand(2)); 04272 Ops.push_back(N->getOperand(3)); 04273 Ops.push_back(N->getOperand(4)); 04274 Ops.push_back(Chain); 04275 break; 04276 case NVPTXISD::Suld3DI64Trap: 04277 Opc = NVPTX::SULD_3D_I64_TRAP; 04278 Ops.push_back(TexHandle); 04279 Ops.push_back(N->getOperand(2)); 04280 Ops.push_back(N->getOperand(3)); 04281 Ops.push_back(N->getOperand(4)); 04282 Ops.push_back(Chain); 04283 break; 04284 case NVPTXISD::Suld3DV2I8Trap: 04285 Opc = NVPTX::SULD_3D_V2I8_TRAP; 04286 Ops.push_back(TexHandle); 04287 Ops.push_back(N->getOperand(2)); 04288 Ops.push_back(N->getOperand(3)); 04289 Ops.push_back(N->getOperand(4)); 04290 Ops.push_back(Chain); 04291 break; 04292 case NVPTXISD::Suld3DV2I16Trap: 04293 Opc = NVPTX::SULD_3D_V2I16_TRAP; 04294 Ops.push_back(TexHandle); 04295 Ops.push_back(N->getOperand(2)); 04296 Ops.push_back(N->getOperand(3)); 04297 Ops.push_back(N->getOperand(4)); 04298 Ops.push_back(Chain); 04299 break; 04300 case NVPTXISD::Suld3DV2I32Trap: 04301 Opc = NVPTX::SULD_3D_V2I32_TRAP; 04302 Ops.push_back(TexHandle); 04303 Ops.push_back(N->getOperand(2)); 04304 Ops.push_back(N->getOperand(3)); 04305 Ops.push_back(N->getOperand(4)); 04306 Ops.push_back(Chain); 04307 break; 04308 case NVPTXISD::Suld3DV2I64Trap: 04309 Opc = NVPTX::SULD_3D_V2I64_TRAP; 04310 Ops.push_back(TexHandle); 04311 Ops.push_back(N->getOperand(2)); 04312 Ops.push_back(N->getOperand(3)); 04313 Ops.push_back(N->getOperand(4)); 04314 Ops.push_back(Chain); 04315 break; 04316 case NVPTXISD::Suld3DV4I8Trap: 04317 Opc = NVPTX::SULD_3D_V4I8_TRAP; 04318 Ops.push_back(TexHandle); 04319 Ops.push_back(N->getOperand(2)); 04320 Ops.push_back(N->getOperand(3)); 04321 Ops.push_back(N->getOperand(4)); 04322 Ops.push_back(Chain); 04323 break; 04324 case NVPTXISD::Suld3DV4I16Trap: 04325 Opc = NVPTX::SULD_3D_V4I16_TRAP; 04326 Ops.push_back(TexHandle); 04327 Ops.push_back(N->getOperand(2)); 04328 Ops.push_back(N->getOperand(3)); 04329 Ops.push_back(N->getOperand(4)); 04330 Ops.push_back(Chain); 04331 break; 04332 case NVPTXISD::Suld3DV4I32Trap: 04333 Opc = NVPTX::SULD_3D_V4I32_TRAP; 04334 Ops.push_back(TexHandle); 04335 Ops.push_back(N->getOperand(2)); 04336 Ops.push_back(N->getOperand(3)); 04337 Ops.push_back(N->getOperand(4)); 04338 Ops.push_back(Chain); 04339 break; 04340 case NVPTXISD::Suld1DI8Zero: 04341 Opc = NVPTX::SULD_1D_I8_ZERO; 04342 Ops.push_back(TexHandle); 04343 Ops.push_back(N->getOperand(2)); 04344 Ops.push_back(Chain); 04345 break; 04346 case NVPTXISD::Suld1DI16Zero: 04347 Opc = NVPTX::SULD_1D_I16_ZERO; 04348 Ops.push_back(TexHandle); 04349 Ops.push_back(N->getOperand(2)); 04350 Ops.push_back(Chain); 04351 break; 04352 case NVPTXISD::Suld1DI32Zero: 04353 Opc = NVPTX::SULD_1D_I32_ZERO; 04354 Ops.push_back(TexHandle); 04355 Ops.push_back(N->getOperand(2)); 04356 Ops.push_back(Chain); 04357 break; 04358 case NVPTXISD::Suld1DI64Zero: 04359 Opc = NVPTX::SULD_1D_I64_ZERO; 04360 Ops.push_back(TexHandle); 04361 Ops.push_back(N->getOperand(2)); 04362 Ops.push_back(Chain); 04363 break; 04364 case NVPTXISD::Suld1DV2I8Zero: 04365 Opc = NVPTX::SULD_1D_V2I8_ZERO; 04366 Ops.push_back(TexHandle); 04367 Ops.push_back(N->getOperand(2)); 04368 Ops.push_back(Chain); 04369 break; 04370 case NVPTXISD::Suld1DV2I16Zero: 04371 Opc = NVPTX::SULD_1D_V2I16_ZERO; 04372 Ops.push_back(TexHandle); 04373 Ops.push_back(N->getOperand(2)); 04374 Ops.push_back(Chain); 04375 break; 04376 case NVPTXISD::Suld1DV2I32Zero: 04377 Opc = NVPTX::SULD_1D_V2I32_ZERO; 04378 Ops.push_back(TexHandle); 04379 Ops.push_back(N->getOperand(2)); 04380 Ops.push_back(Chain); 04381 break; 04382 case NVPTXISD::Suld1DV2I64Zero: 04383 Opc = NVPTX::SULD_1D_V2I64_ZERO; 04384 Ops.push_back(TexHandle); 04385 Ops.push_back(N->getOperand(2)); 04386 Ops.push_back(Chain); 04387 break; 04388 case NVPTXISD::Suld1DV4I8Zero: 04389 Opc = NVPTX::SULD_1D_V4I8_ZERO; 04390 Ops.push_back(TexHandle); 04391 Ops.push_back(N->getOperand(2)); 04392 Ops.push_back(Chain); 04393 break; 04394 case NVPTXISD::Suld1DV4I16Zero: 04395 Opc = NVPTX::SULD_1D_V4I16_ZERO; 04396 Ops.push_back(TexHandle); 04397 Ops.push_back(N->getOperand(2)); 04398 Ops.push_back(Chain); 04399 break; 04400 case NVPTXISD::Suld1DV4I32Zero: 04401 Opc = NVPTX::SULD_1D_V4I32_ZERO; 04402 Ops.push_back(TexHandle); 04403 Ops.push_back(N->getOperand(2)); 04404 Ops.push_back(Chain); 04405 break; 04406 case NVPTXISD::Suld1DArrayI8Zero: 04407 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO; 04408 Ops.push_back(TexHandle); 04409 Ops.push_back(N->getOperand(2)); 04410 Ops.push_back(N->getOperand(3)); 04411 Ops.push_back(Chain); 04412 break; 04413 case NVPTXISD::Suld1DArrayI16Zero: 04414 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO; 04415 Ops.push_back(TexHandle); 04416 Ops.push_back(N->getOperand(2)); 04417 Ops.push_back(N->getOperand(3)); 04418 Ops.push_back(Chain); 04419 break; 04420 case NVPTXISD::Suld1DArrayI32Zero: 04421 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO; 04422 Ops.push_back(TexHandle); 04423 Ops.push_back(N->getOperand(2)); 04424 Ops.push_back(N->getOperand(3)); 04425 Ops.push_back(Chain); 04426 break; 04427 case NVPTXISD::Suld1DArrayI64Zero: 04428 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO; 04429 Ops.push_back(TexHandle); 04430 Ops.push_back(N->getOperand(2)); 04431 Ops.push_back(N->getOperand(3)); 04432 Ops.push_back(Chain); 04433 break; 04434 case NVPTXISD::Suld1DArrayV2I8Zero: 04435 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO; 04436 Ops.push_back(TexHandle); 04437 Ops.push_back(N->getOperand(2)); 04438 Ops.push_back(N->getOperand(3)); 04439 Ops.push_back(Chain); 04440 break; 04441 case NVPTXISD::Suld1DArrayV2I16Zero: 04442 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO; 04443 Ops.push_back(TexHandle); 04444 Ops.push_back(N->getOperand(2)); 04445 Ops.push_back(N->getOperand(3)); 04446 Ops.push_back(Chain); 04447 break; 04448 case NVPTXISD::Suld1DArrayV2I32Zero: 04449 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO; 04450 Ops.push_back(TexHandle); 04451 Ops.push_back(N->getOperand(2)); 04452 Ops.push_back(N->getOperand(3)); 04453 Ops.push_back(Chain); 04454 break; 04455 case NVPTXISD::Suld1DArrayV2I64Zero: 04456 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO; 04457 Ops.push_back(TexHandle); 04458 Ops.push_back(N->getOperand(2)); 04459 Ops.push_back(N->getOperand(3)); 04460 Ops.push_back(Chain); 04461 break; 04462 case NVPTXISD::Suld1DArrayV4I8Zero: 04463 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO; 04464 Ops.push_back(TexHandle); 04465 Ops.push_back(N->getOperand(2)); 04466 Ops.push_back(N->getOperand(3)); 04467 Ops.push_back(Chain); 04468 break; 04469 case NVPTXISD::Suld1DArrayV4I16Zero: 04470 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO; 04471 Ops.push_back(TexHandle); 04472 Ops.push_back(N->getOperand(2)); 04473 Ops.push_back(N->getOperand(3)); 04474 Ops.push_back(Chain); 04475 break; 04476 case NVPTXISD::Suld1DArrayV4I32Zero: 04477 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO; 04478 Ops.push_back(TexHandle); 04479 Ops.push_back(N->getOperand(2)); 04480 Ops.push_back(N->getOperand(3)); 04481 Ops.push_back(Chain); 04482 break; 04483 case NVPTXISD::Suld2DI8Zero: 04484 Opc = NVPTX::SULD_2D_I8_ZERO; 04485 Ops.push_back(TexHandle); 04486 Ops.push_back(N->getOperand(2)); 04487 Ops.push_back(N->getOperand(3)); 04488 Ops.push_back(Chain); 04489 break; 04490 case NVPTXISD::Suld2DI16Zero: 04491 Opc = NVPTX::SULD_2D_I16_ZERO; 04492 Ops.push_back(TexHandle); 04493 Ops.push_back(N->getOperand(2)); 04494 Ops.push_back(N->getOperand(3)); 04495 Ops.push_back(Chain); 04496 break; 04497 case NVPTXISD::Suld2DI32Zero: 04498 Opc = NVPTX::SULD_2D_I32_ZERO; 04499 Ops.push_back(TexHandle); 04500 Ops.push_back(N->getOperand(2)); 04501 Ops.push_back(N->getOperand(3)); 04502 Ops.push_back(Chain); 04503 break; 04504 case NVPTXISD::Suld2DI64Zero: 04505 Opc = NVPTX::SULD_2D_I64_ZERO; 04506 Ops.push_back(TexHandle); 04507 Ops.push_back(N->getOperand(2)); 04508 Ops.push_back(N->getOperand(3)); 04509 Ops.push_back(Chain); 04510 break; 04511 case NVPTXISD::Suld2DV2I8Zero: 04512 Opc = NVPTX::SULD_2D_V2I8_ZERO; 04513 Ops.push_back(TexHandle); 04514 Ops.push_back(N->getOperand(2)); 04515 Ops.push_back(N->getOperand(3)); 04516 Ops.push_back(Chain); 04517 break; 04518 case NVPTXISD::Suld2DV2I16Zero: 04519 Opc = NVPTX::SULD_2D_V2I16_ZERO; 04520 Ops.push_back(TexHandle); 04521 Ops.push_back(N->getOperand(2)); 04522 Ops.push_back(N->getOperand(3)); 04523 Ops.push_back(Chain); 04524 break; 04525 case NVPTXISD::Suld2DV2I32Zero: 04526 Opc = NVPTX::SULD_2D_V2I32_ZERO; 04527 Ops.push_back(TexHandle); 04528 Ops.push_back(N->getOperand(2)); 04529 Ops.push_back(N->getOperand(3)); 04530 Ops.push_back(Chain); 04531 break; 04532 case NVPTXISD::Suld2DV2I64Zero: 04533 Opc = NVPTX::SULD_2D_V2I64_ZERO; 04534 Ops.push_back(TexHandle); 04535 Ops.push_back(N->getOperand(2)); 04536 Ops.push_back(N->getOperand(3)); 04537 Ops.push_back(Chain); 04538 break; 04539 case NVPTXISD::Suld2DV4I8Zero: 04540 Opc = NVPTX::SULD_2D_V4I8_ZERO; 04541 Ops.push_back(TexHandle); 04542 Ops.push_back(N->getOperand(2)); 04543 Ops.push_back(N->getOperand(3)); 04544 Ops.push_back(Chain); 04545 break; 04546 case NVPTXISD::Suld2DV4I16Zero: 04547 Opc = NVPTX::SULD_2D_V4I16_ZERO; 04548 Ops.push_back(TexHandle); 04549 Ops.push_back(N->getOperand(2)); 04550 Ops.push_back(N->getOperand(3)); 04551 Ops.push_back(Chain); 04552 break; 04553 case NVPTXISD::Suld2DV4I32Zero: 04554 Opc = NVPTX::SULD_2D_V4I32_ZERO; 04555 Ops.push_back(TexHandle); 04556 Ops.push_back(N->getOperand(2)); 04557 Ops.push_back(N->getOperand(3)); 04558 Ops.push_back(Chain); 04559 break; 04560 case NVPTXISD::Suld2DArrayI8Zero: 04561 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO; 04562 Ops.push_back(TexHandle); 04563 Ops.push_back(N->getOperand(2)); 04564 Ops.push_back(N->getOperand(3)); 04565 Ops.push_back(N->getOperand(4)); 04566 Ops.push_back(Chain); 04567 break; 04568 case NVPTXISD::Suld2DArrayI16Zero: 04569 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO; 04570 Ops.push_back(TexHandle); 04571 Ops.push_back(N->getOperand(2)); 04572 Ops.push_back(N->getOperand(3)); 04573 Ops.push_back(N->getOperand(4)); 04574 Ops.push_back(Chain); 04575 break; 04576 case NVPTXISD::Suld2DArrayI32Zero: 04577 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO; 04578 Ops.push_back(TexHandle); 04579 Ops.push_back(N->getOperand(2)); 04580 Ops.push_back(N->getOperand(3)); 04581 Ops.push_back(N->getOperand(4)); 04582 Ops.push_back(Chain); 04583 break; 04584 case NVPTXISD::Suld2DArrayI64Zero: 04585 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO; 04586 Ops.push_back(TexHandle); 04587 Ops.push_back(N->getOperand(2)); 04588 Ops.push_back(N->getOperand(3)); 04589 Ops.push_back(N->getOperand(4)); 04590 Ops.push_back(Chain); 04591 break; 04592 case NVPTXISD::Suld2DArrayV2I8Zero: 04593 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO; 04594 Ops.push_back(TexHandle); 04595 Ops.push_back(N->getOperand(2)); 04596 Ops.push_back(N->getOperand(3)); 04597 Ops.push_back(N->getOperand(4)); 04598 Ops.push_back(Chain); 04599 break; 04600 case NVPTXISD::Suld2DArrayV2I16Zero: 04601 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO; 04602 Ops.push_back(TexHandle); 04603 Ops.push_back(N->getOperand(2)); 04604 Ops.push_back(N->getOperand(3)); 04605 Ops.push_back(N->getOperand(4)); 04606 Ops.push_back(Chain); 04607 break; 04608 case NVPTXISD::Suld2DArrayV2I32Zero: 04609 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO; 04610 Ops.push_back(TexHandle); 04611 Ops.push_back(N->getOperand(2)); 04612 Ops.push_back(N->getOperand(3)); 04613 Ops.push_back(N->getOperand(4)); 04614 Ops.push_back(Chain); 04615 break; 04616 case NVPTXISD::Suld2DArrayV2I64Zero: 04617 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO; 04618 Ops.push_back(TexHandle); 04619 Ops.push_back(N->getOperand(2)); 04620 Ops.push_back(N->getOperand(3)); 04621 Ops.push_back(N->getOperand(4)); 04622 Ops.push_back(Chain); 04623 break; 04624 case NVPTXISD::Suld2DArrayV4I8Zero: 04625 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO; 04626 Ops.push_back(TexHandle); 04627 Ops.push_back(N->getOperand(2)); 04628 Ops.push_back(N->getOperand(3)); 04629 Ops.push_back(N->getOperand(4)); 04630 Ops.push_back(Chain); 04631 break; 04632 case NVPTXISD::Suld2DArrayV4I16Zero: 04633 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO; 04634 Ops.push_back(TexHandle); 04635 Ops.push_back(N->getOperand(2)); 04636 Ops.push_back(N->getOperand(3)); 04637 Ops.push_back(N->getOperand(4)); 04638 Ops.push_back(Chain); 04639 break; 04640 case NVPTXISD::Suld2DArrayV4I32Zero: 04641 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO; 04642 Ops.push_back(TexHandle); 04643 Ops.push_back(N->getOperand(2)); 04644 Ops.push_back(N->getOperand(3)); 04645 Ops.push_back(N->getOperand(4)); 04646 Ops.push_back(Chain); 04647 break; 04648 case NVPTXISD::Suld3DI8Zero: 04649 Opc = NVPTX::SULD_3D_I8_ZERO; 04650 Ops.push_back(TexHandle); 04651 Ops.push_back(N->getOperand(2)); 04652 Ops.push_back(N->getOperand(3)); 04653 Ops.push_back(N->getOperand(4)); 04654 Ops.push_back(Chain); 04655 break; 04656 case NVPTXISD::Suld3DI16Zero: 04657 Opc = NVPTX::SULD_3D_I16_ZERO; 04658 Ops.push_back(TexHandle); 04659 Ops.push_back(N->getOperand(2)); 04660 Ops.push_back(N->getOperand(3)); 04661 Ops.push_back(N->getOperand(4)); 04662 Ops.push_back(Chain); 04663 break; 04664 case NVPTXISD::Suld3DI32Zero: 04665 Opc = NVPTX::SULD_3D_I32_ZERO; 04666 Ops.push_back(TexHandle); 04667 Ops.push_back(N->getOperand(2)); 04668 Ops.push_back(N->getOperand(3)); 04669 Ops.push_back(N->getOperand(4)); 04670 Ops.push_back(Chain); 04671 break; 04672 case NVPTXISD::Suld3DI64Zero: 04673 Opc = NVPTX::SULD_3D_I64_ZERO; 04674 Ops.push_back(TexHandle); 04675 Ops.push_back(N->getOperand(2)); 04676 Ops.push_back(N->getOperand(3)); 04677 Ops.push_back(N->getOperand(4)); 04678 Ops.push_back(Chain); 04679 break; 04680 case NVPTXISD::Suld3DV2I8Zero: 04681 Opc = NVPTX::SULD_3D_V2I8_ZERO; 04682 Ops.push_back(TexHandle); 04683 Ops.push_back(N->getOperand(2)); 04684 Ops.push_back(N->getOperand(3)); 04685 Ops.push_back(N->getOperand(4)); 04686 Ops.push_back(Chain); 04687 break; 04688 case NVPTXISD::Suld3DV2I16Zero: 04689 Opc = NVPTX::SULD_3D_V2I16_ZERO; 04690 Ops.push_back(TexHandle); 04691 Ops.push_back(N->getOperand(2)); 04692 Ops.push_back(N->getOperand(3)); 04693 Ops.push_back(N->getOperand(4)); 04694 Ops.push_back(Chain); 04695 break; 04696 case NVPTXISD::Suld3DV2I32Zero: 04697 Opc = NVPTX::SULD_3D_V2I32_ZERO; 04698 Ops.push_back(TexHandle); 04699 Ops.push_back(N->getOperand(2)); 04700 Ops.push_back(N->getOperand(3)); 04701 Ops.push_back(N->getOperand(4)); 04702 Ops.push_back(Chain); 04703 break; 04704 case NVPTXISD::Suld3DV2I64Zero: 04705 Opc = NVPTX::SULD_3D_V2I64_ZERO; 04706 Ops.push_back(TexHandle); 04707 Ops.push_back(N->getOperand(2)); 04708 Ops.push_back(N->getOperand(3)); 04709 Ops.push_back(N->getOperand(4)); 04710 Ops.push_back(Chain); 04711 break; 04712 case NVPTXISD::Suld3DV4I8Zero: 04713 Opc = NVPTX::SULD_3D_V4I8_ZERO; 04714 Ops.push_back(TexHandle); 04715 Ops.push_back(N->getOperand(2)); 04716 Ops.push_back(N->getOperand(3)); 04717 Ops.push_back(N->getOperand(4)); 04718 Ops.push_back(Chain); 04719 break; 04720 case NVPTXISD::Suld3DV4I16Zero: 04721 Opc = NVPTX::SULD_3D_V4I16_ZERO; 04722 Ops.push_back(TexHandle); 04723 Ops.push_back(N->getOperand(2)); 04724 Ops.push_back(N->getOperand(3)); 04725 Ops.push_back(N->getOperand(4)); 04726 Ops.push_back(Chain); 04727 break; 04728 case NVPTXISD::Suld3DV4I32Zero: 04729 Opc = NVPTX::SULD_3D_V4I32_ZERO; 04730 Ops.push_back(TexHandle); 04731 Ops.push_back(N->getOperand(2)); 04732 Ops.push_back(N->getOperand(3)); 04733 Ops.push_back(N->getOperand(4)); 04734 Ops.push_back(Chain); 04735 break; 04736 } 04737 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 04738 return Ret; 04739 } 04740 04741 04742 /// SelectBFE - Look for instruction sequences that can be made more efficient 04743 /// by using the 'bfe' (bit-field extract) PTX instruction 04744 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) { 04745 SDValue LHS = N->getOperand(0); 04746 SDValue RHS = N->getOperand(1); 04747 SDValue Len; 04748 SDValue Start; 04749 SDValue Val; 04750 bool IsSigned = false; 04751 04752 if (N->getOpcode() == ISD::AND) { 04753 // Canonicalize the operands 04754 // We want 'and %val, %mask' 04755 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) { 04756 std::swap(LHS, RHS); 04757 } 04758 04759 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS); 04760 if (!Mask) { 04761 // We need a constant mask on the RHS of the AND 04762 return NULL; 04763 } 04764 04765 // Extract the mask bits 04766 uint64_t MaskVal = Mask->getZExtValue(); 04767 if (!isMask_64(MaskVal)) { 04768 // We *could* handle shifted masks here, but doing so would require an 04769 // 'and' operation to fix up the low-order bits so we would trade 04770 // shr+and for bfe+and, which has the same throughput 04771 return NULL; 04772 } 04773 04774 // How many bits are in our mask? 04775 uint64_t NumBits = CountTrailingOnes_64(MaskVal); 04776 Len = CurDAG->getTargetConstant(NumBits, MVT::i32); 04777 04778 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) { 04779 // We have a 'srl/and' pair, extract the effective start bit and length 04780 Val = LHS.getNode()->getOperand(0); 04781 Start = LHS.getNode()->getOperand(1); 04782 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start); 04783 if (StartConst) { 04784 uint64_t StartVal = StartConst->getZExtValue(); 04785 // How many "good" bits do we have left? "good" is defined here as bits 04786 // that exist in the original value, not shifted in. 04787 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal; 04788 if (NumBits > GoodBits) { 04789 // Do not handle the case where bits have been shifted in. In theory 04790 // we could handle this, but the cost is likely higher than just 04791 // emitting the srl/and pair. 04792 return NULL; 04793 } 04794 Start = CurDAG->getTargetConstant(StartVal, MVT::i32); 04795 } else { 04796 // Do not handle the case where the shift amount (can be zero if no srl 04797 // was found) is not constant. We could handle this case, but it would 04798 // require run-time logic that would be more expensive than just 04799 // emitting the srl/and pair. 04800 return NULL; 04801 } 04802 } else { 04803 // Do not handle the case where the LHS of the and is not a shift. While 04804 // it would be trivial to handle this case, it would just transform 04805 // 'and' -> 'bfe', but 'and' has higher-throughput. 04806 return NULL; 04807 } 04808 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) { 04809 if (LHS->getOpcode() == ISD::AND) { 04810 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS); 04811 if (!ShiftCnst) { 04812 // Shift amount must be constant 04813 return NULL; 04814 } 04815 04816 uint64_t ShiftAmt = ShiftCnst->getZExtValue(); 04817 04818 SDValue AndLHS = LHS->getOperand(0); 04819 SDValue AndRHS = LHS->getOperand(1); 04820 04821 // Canonicalize the AND to have the mask on the RHS 04822 if (isa<ConstantSDNode>(AndLHS)) { 04823 std::swap(AndLHS, AndRHS); 04824 } 04825 04826 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS); 04827 if (!MaskCnst) { 04828 // Mask must be constant 04829 return NULL; 04830 } 04831 04832 uint64_t MaskVal = MaskCnst->getZExtValue(); 04833 uint64_t NumZeros; 04834 uint64_t NumBits; 04835 if (isMask_64(MaskVal)) { 04836 NumZeros = 0; 04837 // The number of bits in the result bitfield will be the number of 04838 // trailing ones (the AND) minus the number of bits we shift off 04839 NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt; 04840 } else if (isShiftedMask_64(MaskVal)) { 04841 NumZeros = countTrailingZeros(MaskVal); 04842 unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros); 04843 // The number of bits in the result bitfield will be the number of 04844 // trailing zeros plus the number of set bits in the mask minus the 04845 // number of bits we shift off 04846 NumBits = NumZeros + NumOnes - ShiftAmt; 04847 } else { 04848 // This is not a mask we can handle 04849 return NULL; 04850 } 04851 04852 if (ShiftAmt < NumZeros) { 04853 // Handling this case would require extra logic that would make this 04854 // transformation non-profitable 04855 return NULL; 04856 } 04857 04858 Val = AndLHS; 04859 Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32); 04860 Len = CurDAG->getTargetConstant(NumBits, MVT::i32); 04861 } else if (LHS->getOpcode() == ISD::SHL) { 04862 // Here, we have a pattern like: 04863 // 04864 // (sra (shl val, NN), MM) 04865 // or 04866 // (srl (shl val, NN), MM) 04867 // 04868 // If MM >= NN, we can efficiently optimize this with bfe 04869 Val = LHS->getOperand(0); 04870 04871 SDValue ShlRHS = LHS->getOperand(1); 04872 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS); 04873 if (!ShlCnst) { 04874 // Shift amount must be constant 04875 return NULL; 04876 } 04877 uint64_t InnerShiftAmt = ShlCnst->getZExtValue(); 04878 04879 SDValue ShrRHS = RHS; 04880 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS); 04881 if (!ShrCnst) { 04882 // Shift amount must be constant 04883 return NULL; 04884 } 04885 uint64_t OuterShiftAmt = ShrCnst->getZExtValue(); 04886 04887 // To avoid extra codegen and be profitable, we need Outer >= Inner 04888 if (OuterShiftAmt < InnerShiftAmt) { 04889 return NULL; 04890 } 04891 04892 // If the outer shift is more than the type size, we have no bitfield to 04893 // extract (since we also check that the inner shift is <= the outer shift 04894 // then this also implies that the inner shift is < the type size) 04895 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) { 04896 return NULL; 04897 } 04898 04899 Start = 04900 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32); 04901 Len = 04902 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() - 04903 OuterShiftAmt, MVT::i32); 04904 04905 if (N->getOpcode() == ISD::SRA) { 04906 // If we have a arithmetic right shift, we need to use the signed bfe 04907 // variant 04908 IsSigned = true; 04909 } 04910 } else { 04911 // No can do... 04912 return NULL; 04913 } 04914 } else { 04915 // No can do... 04916 return NULL; 04917 } 04918 04919 04920 unsigned Opc; 04921 // For the BFE operations we form here from "and" and "srl", always use the 04922 // unsigned variants. 04923 if (Val.getValueType() == MVT::i32) { 04924 if (IsSigned) { 04925 Opc = NVPTX::BFE_S32rii; 04926 } else { 04927 Opc = NVPTX::BFE_U32rii; 04928 } 04929 } else if (Val.getValueType() == MVT::i64) { 04930 if (IsSigned) { 04931 Opc = NVPTX::BFE_S64rii; 04932 } else { 04933 Opc = NVPTX::BFE_U64rii; 04934 } 04935 } else { 04936 // We cannot handle this type 04937 return NULL; 04938 } 04939 04940 SDValue Ops[] = { 04941 Val, Start, Len 04942 }; 04943 04944 SDNode *Ret = 04945 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 04946 04947 return Ret; 04948 } 04949 04950 // SelectDirectAddr - Match a direct address for DAG. 04951 // A direct address could be a globaladdress or externalsymbol. 04952 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { 04953 // Return true if TGA or ES. 04954 if (N.getOpcode() == ISD::TargetGlobalAddress || 04955 N.getOpcode() == ISD::TargetExternalSymbol) { 04956 Address = N; 04957 return true; 04958 } 04959 if (N.getOpcode() == NVPTXISD::Wrapper) { 04960 Address = N.getOperand(0); 04961 return true; 04962 } 04963 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) { 04964 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue(); 04965 if (IID == Intrinsic::nvvm_ptr_gen_to_param) 04966 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam) 04967 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address)); 04968 } 04969 return false; 04970 } 04971 04972 // symbol+offset 04973 bool NVPTXDAGToDAGISel::SelectADDRsi_imp( 04974 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { 04975 if (Addr.getOpcode() == ISD::ADD) { 04976 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 04977 SDValue base = Addr.getOperand(0); 04978 if (SelectDirectAddr(base, Base)) { 04979 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt); 04980 return true; 04981 } 04982 } 04983 } 04984 return false; 04985 } 04986 04987 // symbol+offset 04988 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr, 04989 SDValue &Base, SDValue &Offset) { 04990 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32); 04991 } 04992 04993 // symbol+offset 04994 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr, 04995 SDValue &Base, SDValue &Offset) { 04996 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64); 04997 } 04998 04999 // register+offset 05000 bool NVPTXDAGToDAGISel::SelectADDRri_imp( 05001 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { 05002 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 05003 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); 05004 Offset = CurDAG->getTargetConstant(0, mvt); 05005 return true; 05006 } 05007 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 05008 Addr.getOpcode() == ISD::TargetGlobalAddress) 05009 return false; // direct calls. 05010 05011 if (Addr.getOpcode() == ISD::ADD) { 05012 if (SelectDirectAddr(Addr.getOperand(0), Addr)) { 05013 return false; 05014 } 05015 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 05016 if (FrameIndexSDNode *FIN = 05017 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) 05018 // Constant offset from frame ref. 05019 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); 05020 else 05021 Base = Addr.getOperand(0); 05022 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt); 05023 return true; 05024 } 05025 } 05026 return false; 05027 } 05028 05029 // register+offset 05030 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr, 05031 SDValue &Base, SDValue &Offset) { 05032 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32); 05033 } 05034 05035 // register+offset 05036 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, 05037 SDValue &Base, SDValue &Offset) { 05038 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); 05039 } 05040 05041 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, 05042 unsigned int spN) const { 05043 const Value *Src = nullptr; 05044 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { 05045 if (spN == 0 && mN->getMemOperand()->getPseudoValue()) 05046 return true; 05047 Src = mN->getMemOperand()->getValue(); 05048 } 05049 if (!Src) 05050 return false; 05051 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) 05052 return (PT->getAddressSpace() == spN); 05053 return false; 05054 } 05055 05056 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 05057 /// inline asm expressions. 05058 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( 05059 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { 05060 SDValue Op0, Op1; 05061 switch (ConstraintCode) { 05062 default: 05063 return true; 05064 case 'm': // memory 05065 if (SelectDirectAddr(Op, Op0)) { 05066 OutOps.push_back(Op0); 05067 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); 05068 return false; 05069 } 05070 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) { 05071 OutOps.push_back(Op0); 05072 OutOps.push_back(Op1); 05073 return false; 05074 } 05075 break; 05076 } 05077 return true; 05078 }