LLVM API Documentation
00001 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //==-----------------------------------------------------------------------===// 00009 // 00010 /// \file 00011 /// \brief Defines an instruction selector for the AMDGPU target. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 #include "AMDGPUInstrInfo.h" 00015 #include "AMDGPUISelLowering.h" // For AMDGPUISD 00016 #include "AMDGPURegisterInfo.h" 00017 #include "AMDGPUSubtarget.h" 00018 #include "R600InstrInfo.h" 00019 #include "SIDefines.h" 00020 #include "SIISelLowering.h" 00021 #include "SIMachineFunctionInfo.h" 00022 #include "llvm/CodeGen/FunctionLoweringInfo.h" 00023 #include "llvm/CodeGen/PseudoSourceValue.h" 00024 #include "llvm/CodeGen/MachineFrameInfo.h" 00025 #include "llvm/CodeGen/MachineRegisterInfo.h" 00026 #include "llvm/CodeGen/SelectionDAG.h" 00027 #include "llvm/CodeGen/SelectionDAGISel.h" 00028 #include "llvm/IR/Function.h" 00029 00030 using namespace llvm; 00031 00032 //===----------------------------------------------------------------------===// 00033 // Instruction Selector Implementation 00034 //===----------------------------------------------------------------------===// 00035 00036 namespace { 00037 /// AMDGPU specific code to select AMDGPU machine instructions for 00038 /// SelectionDAG operations. 00039 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 00040 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 00041 // make the right decision when generating code for different targets. 00042 const AMDGPUSubtarget &Subtarget; 00043 public: 00044 AMDGPUDAGToDAGISel(TargetMachine &TM); 00045 virtual ~AMDGPUDAGToDAGISel(); 00046 00047 SDNode *Select(SDNode *N) override; 00048 const char *getPassName() const override; 00049 void PostprocessISelDAG() override; 00050 00051 private: 00052 bool isInlineImmediate(SDNode *N) const; 00053 inline SDValue getSmallIPtrImm(unsigned Imm); 00054 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 00055 const R600InstrInfo *TII); 00056 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 00057 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 00058 00059 // Complex pattern selectors 00060 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); 00061 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); 00062 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); 00063 00064 static bool checkType(const Value *ptr, unsigned int addrspace); 00065 static bool checkPrivateAddress(const MachineMemOperand *Op); 00066 00067 static bool isGlobalStore(const StoreSDNode *N); 00068 static bool isFlatStore(const StoreSDNode *N); 00069 static bool isPrivateStore(const StoreSDNode *N); 00070 static bool isLocalStore(const StoreSDNode *N); 00071 static bool isRegionStore(const StoreSDNode *N); 00072 00073 bool isCPLoad(const LoadSDNode *N) const; 00074 bool isConstantLoad(const LoadSDNode *N, int cbID) const; 00075 bool isGlobalLoad(const LoadSDNode *N) const; 00076 bool isFlatLoad(const LoadSDNode *N) const; 00077 bool isParamLoad(const LoadSDNode *N) const; 00078 bool isPrivateLoad(const LoadSDNode *N) const; 00079 bool isLocalLoad(const LoadSDNode *N) const; 00080 bool isRegionLoad(const LoadSDNode *N) const; 00081 00082 /// \returns True if the current basic block being selected is at control 00083 /// flow depth 0. Meaning that the current block dominates the 00084 // exit block. 00085 bool isCFDepth0() const; 00086 00087 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 00088 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 00089 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 00090 SDValue& Offset); 00091 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 00092 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 00093 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 00094 unsigned OffsetBits) const; 00095 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 00096 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 00097 SDValue &Offset1) const; 00098 void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 00099 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 00100 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 00101 SDValue &TFE) const; 00102 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 00103 SDValue &Offset) const; 00104 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 00105 SDValue &SOffset, SDValue &ImmOffset) const; 00106 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 00107 SDValue &Offset, SDValue &GLC, SDValue &SLC, 00108 SDValue &TFE) const; 00109 SDNode *SelectAddrSpaceCast(SDNode *N); 00110 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 00111 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 00112 SDValue &Clamp, SDValue &Omod) const; 00113 00114 SDNode *SelectADD_SUB_I64(SDNode *N); 00115 SDNode *SelectDIV_SCALE(SDNode *N); 00116 00117 // Include the pieces autogenerated from the target description. 00118 #include "AMDGPUGenDAGISel.inc" 00119 }; 00120 } // end anonymous namespace 00121 00122 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 00123 // DAG, ready for instruction scheduling. 00124 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { 00125 return new AMDGPUDAGToDAGISel(TM); 00126 } 00127 00128 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) 00129 : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) { 00130 } 00131 00132 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 00133 } 00134 00135 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { 00136 const SITargetLowering *TL 00137 = static_cast<const SITargetLowering *>(getTargetLowering()); 00138 return TL->analyzeImmediate(N) == 0; 00139 } 00140 00141 /// \brief Determine the register class for \p OpNo 00142 /// \returns The register class of the virtual register that will be used for 00143 /// the given operand number \OpNo or NULL if the register class cannot be 00144 /// determined. 00145 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 00146 unsigned OpNo) const { 00147 if (!N->isMachineOpcode()) 00148 return nullptr; 00149 00150 switch (N->getMachineOpcode()) { 00151 default: { 00152 const MCInstrDesc &Desc = 00153 TM.getSubtargetImpl()->getInstrInfo()->get(N->getMachineOpcode()); 00154 unsigned OpIdx = Desc.getNumDefs() + OpNo; 00155 if (OpIdx >= Desc.getNumOperands()) 00156 return nullptr; 00157 int RegClass = Desc.OpInfo[OpIdx].RegClass; 00158 if (RegClass == -1) 00159 return nullptr; 00160 00161 return TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RegClass); 00162 } 00163 case AMDGPU::REG_SEQUENCE: { 00164 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 00165 const TargetRegisterClass *SuperRC = 00166 TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RCID); 00167 00168 SDValue SubRegOp = N->getOperand(OpNo + 1); 00169 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 00170 return TM.getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg( 00171 SuperRC, SubRegIdx); 00172 } 00173 } 00174 } 00175 00176 SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) { 00177 return CurDAG->getTargetConstant(Imm, MVT::i32); 00178 } 00179 00180 bool AMDGPUDAGToDAGISel::SelectADDRParam( 00181 SDValue Addr, SDValue& R1, SDValue& R2) { 00182 00183 if (Addr.getOpcode() == ISD::FrameIndex) { 00184 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 00185 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); 00186 R2 = CurDAG->getTargetConstant(0, MVT::i32); 00187 } else { 00188 R1 = Addr; 00189 R2 = CurDAG->getTargetConstant(0, MVT::i32); 00190 } 00191 } else if (Addr.getOpcode() == ISD::ADD) { 00192 R1 = Addr.getOperand(0); 00193 R2 = Addr.getOperand(1); 00194 } else { 00195 R1 = Addr; 00196 R2 = CurDAG->getTargetConstant(0, MVT::i32); 00197 } 00198 return true; 00199 } 00200 00201 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { 00202 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 00203 Addr.getOpcode() == ISD::TargetGlobalAddress) { 00204 return false; 00205 } 00206 return SelectADDRParam(Addr, R1, R2); 00207 } 00208 00209 00210 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { 00211 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 00212 Addr.getOpcode() == ISD::TargetGlobalAddress) { 00213 return false; 00214 } 00215 00216 if (Addr.getOpcode() == ISD::FrameIndex) { 00217 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 00218 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); 00219 R2 = CurDAG->getTargetConstant(0, MVT::i64); 00220 } else { 00221 R1 = Addr; 00222 R2 = CurDAG->getTargetConstant(0, MVT::i64); 00223 } 00224 } else if (Addr.getOpcode() == ISD::ADD) { 00225 R1 = Addr.getOperand(0); 00226 R2 = Addr.getOperand(1); 00227 } else { 00228 R1 = Addr; 00229 R2 = CurDAG->getTargetConstant(0, MVT::i64); 00230 } 00231 return true; 00232 } 00233 00234 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { 00235 unsigned int Opc = N->getOpcode(); 00236 if (N->isMachineOpcode()) { 00237 N->setNodeId(-1); 00238 return nullptr; // Already selected. 00239 } 00240 00241 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 00242 switch (Opc) { 00243 default: break; 00244 // We are selecting i64 ADD here instead of custom lower it during 00245 // DAG legalization, so we can fold some i64 ADDs used for address 00246 // calculation into the LOAD and STORE instructions. 00247 case ISD::ADD: 00248 case ISD::SUB: { 00249 if (N->getValueType(0) != MVT::i64 || 00250 ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 00251 break; 00252 00253 return SelectADD_SUB_I64(N); 00254 } 00255 case ISD::SCALAR_TO_VECTOR: 00256 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 00257 case ISD::BUILD_VECTOR: { 00258 unsigned RegClassID; 00259 const AMDGPURegisterInfo *TRI = static_cast<const AMDGPURegisterInfo *>( 00260 TM.getSubtargetImpl()->getRegisterInfo()); 00261 const SIRegisterInfo *SIRI = static_cast<const SIRegisterInfo *>( 00262 TM.getSubtargetImpl()->getRegisterInfo()); 00263 EVT VT = N->getValueType(0); 00264 unsigned NumVectorElts = VT.getVectorNumElements(); 00265 EVT EltVT = VT.getVectorElementType(); 00266 assert(EltVT.bitsEq(MVT::i32)); 00267 if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 00268 bool UseVReg = true; 00269 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); 00270 U != E; ++U) { 00271 if (!U->isMachineOpcode()) { 00272 continue; 00273 } 00274 const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); 00275 if (!RC) { 00276 continue; 00277 } 00278 if (SIRI->isSGPRClass(RC)) { 00279 UseVReg = false; 00280 } 00281 } 00282 switch(NumVectorElts) { 00283 case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID : 00284 AMDGPU::SReg_32RegClassID; 00285 break; 00286 case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID : 00287 AMDGPU::SReg_64RegClassID; 00288 break; 00289 case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID : 00290 AMDGPU::SReg_128RegClassID; 00291 break; 00292 case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID : 00293 AMDGPU::SReg_256RegClassID; 00294 break; 00295 case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID : 00296 AMDGPU::SReg_512RegClassID; 00297 break; 00298 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 00299 } 00300 } else { 00301 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 00302 // that adds a 128 bits reg copy when going through TwoAddressInstructions 00303 // pass. We want to avoid 128 bits copies as much as possible because they 00304 // can't be bundled by our scheduler. 00305 switch(NumVectorElts) { 00306 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 00307 case 4: 00308 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 00309 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 00310 else 00311 RegClassID = AMDGPU::R600_Reg128RegClassID; 00312 break; 00313 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 00314 } 00315 } 00316 00317 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32); 00318 00319 if (NumVectorElts == 1) { 00320 return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, 00321 N->getOperand(0), RegClass); 00322 } 00323 00324 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 00325 "supported yet"); 00326 // 16 = Max Num Vector Elements 00327 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 00328 // 1 = Vector Register Class 00329 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 00330 00331 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32); 00332 bool IsRegSeq = true; 00333 unsigned NOps = N->getNumOperands(); 00334 for (unsigned i = 0; i < NOps; i++) { 00335 // XXX: Why is this here? 00336 if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { 00337 IsRegSeq = false; 00338 break; 00339 } 00340 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 00341 RegSeqArgs[1 + (2 * i) + 1] = 00342 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32); 00343 } 00344 00345 if (NOps != NumVectorElts) { 00346 // Fill in the missing undef elements if this was a scalar_to_vector. 00347 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 00348 00349 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 00350 SDLoc(N), EltVT); 00351 for (unsigned i = NOps; i < NumVectorElts; ++i) { 00352 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 00353 RegSeqArgs[1 + (2 * i) + 1] = 00354 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32); 00355 } 00356 } 00357 00358 if (!IsRegSeq) 00359 break; 00360 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), 00361 RegSeqArgs); 00362 } 00363 case ISD::BUILD_PAIR: { 00364 SDValue RC, SubReg0, SubReg1; 00365 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 00366 break; 00367 } 00368 if (N->getValueType(0) == MVT::i128) { 00369 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32); 00370 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32); 00371 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32); 00372 } else if (N->getValueType(0) == MVT::i64) { 00373 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32); 00374 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); 00375 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); 00376 } else { 00377 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 00378 } 00379 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 00380 N->getOperand(1), SubReg1 }; 00381 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 00382 SDLoc(N), N->getValueType(0), Ops); 00383 } 00384 00385 case ISD::Constant: 00386 case ISD::ConstantFP: { 00387 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 00388 if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 00389 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 00390 break; 00391 00392 uint64_t Imm; 00393 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 00394 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 00395 else { 00396 ConstantSDNode *C = cast<ConstantSDNode>(N); 00397 Imm = C->getZExtValue(); 00398 } 00399 00400 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32, 00401 CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32)); 00402 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32, 00403 CurDAG->getConstant(Imm >> 32, MVT::i32)); 00404 const SDValue Ops[] = { 00405 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), 00406 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), 00407 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32) 00408 }; 00409 00410 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), 00411 N->getValueType(0), Ops); 00412 } 00413 00414 case AMDGPUISD::REGISTER_LOAD: { 00415 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 00416 break; 00417 SDValue Addr, Offset; 00418 00419 SelectADDRIndirect(N->getOperand(1), Addr, Offset); 00420 const SDValue Ops[] = { 00421 Addr, 00422 Offset, 00423 CurDAG->getTargetConstant(0, MVT::i32), 00424 N->getOperand(0), 00425 }; 00426 return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N), 00427 CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other), 00428 Ops); 00429 } 00430 case AMDGPUISD::REGISTER_STORE: { 00431 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 00432 break; 00433 SDValue Addr, Offset; 00434 SelectADDRIndirect(N->getOperand(2), Addr, Offset); 00435 const SDValue Ops[] = { 00436 N->getOperand(1), 00437 Addr, 00438 Offset, 00439 CurDAG->getTargetConstant(0, MVT::i32), 00440 N->getOperand(0), 00441 }; 00442 return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N), 00443 CurDAG->getVTList(MVT::Other), 00444 Ops); 00445 } 00446 00447 case AMDGPUISD::BFE_I32: 00448 case AMDGPUISD::BFE_U32: { 00449 if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 00450 break; 00451 00452 // There is a scalar version available, but unlike the vector version which 00453 // has a separate operand for the offset and width, the scalar version packs 00454 // the width and offset into a single operand. Try to move to the scalar 00455 // version if the offsets are constant, so that we can try to keep extended 00456 // loads of kernel arguments in SGPRs. 00457 00458 // TODO: Technically we could try to pattern match scalar bitshifts of 00459 // dynamic values, but it's probably not useful. 00460 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 00461 if (!Offset) 00462 break; 00463 00464 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 00465 if (!Width) 00466 break; 00467 00468 bool Signed = Opc == AMDGPUISD::BFE_I32; 00469 00470 // Transformation function, pack the offset and width of a BFE into 00471 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 00472 // source, bits [5:0] contain the offset and bits [22:16] the width. 00473 00474 uint32_t OffsetVal = Offset->getZExtValue(); 00475 uint32_t WidthVal = Width->getZExtValue(); 00476 00477 uint32_t PackedVal = OffsetVal | WidthVal << 16; 00478 00479 SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32); 00480 return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 00481 SDLoc(N), 00482 MVT::i32, 00483 N->getOperand(0), 00484 PackedOffsetWidth); 00485 00486 } 00487 case AMDGPUISD::DIV_SCALE: { 00488 return SelectDIV_SCALE(N); 00489 } 00490 case ISD::ADDRSPACECAST: 00491 return SelectAddrSpaceCast(N); 00492 } 00493 return SelectCode(N); 00494 } 00495 00496 00497 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { 00498 assert(AS != 0 && "Use checkPrivateAddress instead."); 00499 if (!Ptr) 00500 return false; 00501 00502 return Ptr->getType()->getPointerAddressSpace() == AS; 00503 } 00504 00505 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { 00506 if (Op->getPseudoValue()) 00507 return true; 00508 00509 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType())) 00510 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 00511 00512 return false; 00513 } 00514 00515 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { 00516 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 00517 } 00518 00519 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { 00520 const Value *MemVal = N->getMemOperand()->getValue(); 00521 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 00522 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 00523 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); 00524 } 00525 00526 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { 00527 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 00528 } 00529 00530 bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) { 00531 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 00532 } 00533 00534 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { 00535 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 00536 } 00537 00538 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { 00539 const Value *MemVal = N->getMemOperand()->getValue(); 00540 if (CbId == -1) 00541 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); 00542 00543 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); 00544 } 00545 00546 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { 00547 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) { 00548 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 00549 if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 00550 N->getMemoryVT().bitsLT(MVT::i32)) { 00551 return true; 00552 } 00553 } 00554 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 00555 } 00556 00557 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { 00558 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); 00559 } 00560 00561 bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { 00562 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 00563 } 00564 00565 bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const { 00566 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 00567 } 00568 00569 bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { 00570 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 00571 } 00572 00573 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { 00574 MachineMemOperand *MMO = N->getMemOperand(); 00575 if (checkPrivateAddress(N->getMemOperand())) { 00576 if (MMO) { 00577 const PseudoSourceValue *PSV = MMO->getPseudoValue(); 00578 if (PSV && PSV == PseudoSourceValue::getConstantPool()) { 00579 return true; 00580 } 00581 } 00582 } 00583 return false; 00584 } 00585 00586 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { 00587 if (checkPrivateAddress(N->getMemOperand())) { 00588 // Check to make sure we are not a constant pool load or a constant load 00589 // that is marked as a private load 00590 if (isCPLoad(N) || isConstantLoad(N, -1)) { 00591 return false; 00592 } 00593 } 00594 00595 const Value *MemVal = N->getMemOperand()->getValue(); 00596 if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 00597 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 00598 !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) && 00599 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && 00600 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && 00601 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && 00602 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) { 00603 return true; 00604 } 00605 return false; 00606 } 00607 00608 bool AMDGPUDAGToDAGISel::isCFDepth0() const { 00609 // FIXME: Figure out a way to use DominatorTree analysis here. 00610 const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock(); 00611 const Function *Fn = FuncInfo->Fn; 00612 return &Fn->front() == CurBlock || &Fn->back() == CurBlock; 00613 } 00614 00615 00616 const char *AMDGPUDAGToDAGISel::getPassName() const { 00617 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 00618 } 00619 00620 #ifdef DEBUGTMP 00621 #undef INT64_C 00622 #endif 00623 #undef DEBUGTMP 00624 00625 //===----------------------------------------------------------------------===// 00626 // Complex Patterns 00627 //===----------------------------------------------------------------------===// 00628 00629 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 00630 SDValue& IntPtr) { 00631 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 00632 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true); 00633 return true; 00634 } 00635 return false; 00636 } 00637 00638 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 00639 SDValue& BaseReg, SDValue &Offset) { 00640 if (!isa<ConstantSDNode>(Addr)) { 00641 BaseReg = Addr; 00642 Offset = CurDAG->getIntPtrConstant(0, true); 00643 return true; 00644 } 00645 return false; 00646 } 00647 00648 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 00649 SDValue &Offset) { 00650 ConstantSDNode *IMMOffset; 00651 00652 if (Addr.getOpcode() == ISD::ADD 00653 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 00654 && isInt<16>(IMMOffset->getZExtValue())) { 00655 00656 Base = Addr.getOperand(0); 00657 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); 00658 return true; 00659 // If the pointer address is constant, we can move it to the offset field. 00660 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 00661 && isInt<16>(IMMOffset->getZExtValue())) { 00662 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 00663 SDLoc(CurDAG->getEntryNode()), 00664 AMDGPU::ZERO, MVT::i32); 00665 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); 00666 return true; 00667 } 00668 00669 // Default case, no offset 00670 Base = Addr; 00671 Offset = CurDAG->getTargetConstant(0, MVT::i32); 00672 return true; 00673 } 00674 00675 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 00676 SDValue &Offset) { 00677 ConstantSDNode *C; 00678 00679 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 00680 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 00681 Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); 00682 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 00683 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 00684 Base = Addr.getOperand(0); 00685 Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); 00686 } else { 00687 Base = Addr; 00688 Offset = CurDAG->getTargetConstant(0, MVT::i32); 00689 } 00690 00691 return true; 00692 } 00693 00694 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 00695 SDLoc DL(N); 00696 SDValue LHS = N->getOperand(0); 00697 SDValue RHS = N->getOperand(1); 00698 00699 bool IsAdd = (N->getOpcode() == ISD::ADD); 00700 00701 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); 00702 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); 00703 00704 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 00705 DL, MVT::i32, LHS, Sub0); 00706 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 00707 DL, MVT::i32, LHS, Sub1); 00708 00709 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 00710 DL, MVT::i32, RHS, Sub0); 00711 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 00712 DL, MVT::i32, RHS, Sub1); 00713 00714 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 00715 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 00716 00717 00718 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 00719 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 00720 00721 if (!isCFDepth0()) { 00722 Opc = IsAdd ? AMDGPU::V_ADD_I32_e32 : AMDGPU::V_SUB_I32_e32; 00723 CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e32 : AMDGPU::V_SUBB_U32_e32; 00724 } 00725 00726 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); 00727 SDValue Carry(AddLo, 1); 00728 SDNode *AddHi 00729 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, 00730 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 00731 00732 SDValue Args[5] = { 00733 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), 00734 SDValue(AddLo,0), 00735 Sub0, 00736 SDValue(AddHi,0), 00737 Sub1, 00738 }; 00739 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 00740 } 00741 00742 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 00743 SDLoc SL(N); 00744 EVT VT = N->getValueType(0); 00745 00746 assert(VT == MVT::f32 || VT == MVT::f64); 00747 00748 unsigned Opc 00749 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 00750 00751 const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32); 00752 00753 SDValue Ops[] = { 00754 N->getOperand(0), 00755 N->getOperand(1), 00756 N->getOperand(2), 00757 Zero, 00758 Zero, 00759 Zero, 00760 Zero 00761 }; 00762 00763 return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 00764 } 00765 00766 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 00767 unsigned OffsetBits) const { 00768 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 00769 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 00770 (OffsetBits == 8 && !isUInt<8>(Offset))) 00771 return false; 00772 00773 if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) 00774 return true; 00775 00776 // On Southern Islands instruction with a negative base value and an offset 00777 // don't seem to work. 00778 return CurDAG->SignBitIsZero(Base); 00779 } 00780 00781 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 00782 SDValue &Offset) const { 00783 if (CurDAG->isBaseWithConstantOffset(Addr)) { 00784 SDValue N0 = Addr.getOperand(0); 00785 SDValue N1 = Addr.getOperand(1); 00786 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 00787 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 00788 // (add n0, c0) 00789 Base = N0; 00790 Offset = N1; 00791 return true; 00792 } 00793 } 00794 00795 // default case 00796 Base = Addr; 00797 Offset = CurDAG->getTargetConstant(0, MVT::i16); 00798 return true; 00799 } 00800 00801 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 00802 SDValue &Offset0, 00803 SDValue &Offset1) const { 00804 if (CurDAG->isBaseWithConstantOffset(Addr)) { 00805 SDValue N0 = Addr.getOperand(0); 00806 SDValue N1 = Addr.getOperand(1); 00807 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 00808 unsigned DWordOffset0 = C1->getZExtValue() / 4; 00809 unsigned DWordOffset1 = DWordOffset0 + 1; 00810 // (add n0, c0) 00811 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 00812 Base = N0; 00813 Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8); 00814 Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8); 00815 return true; 00816 } 00817 } 00818 00819 // default case 00820 Base = Addr; 00821 Offset0 = CurDAG->getTargetConstant(0, MVT::i8); 00822 Offset1 = CurDAG->getTargetConstant(1, MVT::i8); 00823 return true; 00824 } 00825 00826 static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) { 00827 return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32, 00828 Ptr), 0); 00829 } 00830 00831 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 00832 return isUInt<12>(Imm->getZExtValue()); 00833 } 00834 00835 void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 00836 SDValue &VAddr, SDValue &SOffset, 00837 SDValue &Offset, SDValue &Offen, 00838 SDValue &Idxen, SDValue &Addr64, 00839 SDValue &GLC, SDValue &SLC, 00840 SDValue &TFE) const { 00841 SDLoc DL(Addr); 00842 00843 GLC = CurDAG->getTargetConstant(0, MVT::i1); 00844 SLC = CurDAG->getTargetConstant(0, MVT::i1); 00845 TFE = CurDAG->getTargetConstant(0, MVT::i1); 00846 00847 Idxen = CurDAG->getTargetConstant(0, MVT::i1); 00848 Offen = CurDAG->getTargetConstant(0, MVT::i1); 00849 Addr64 = CurDAG->getTargetConstant(0, MVT::i1); 00850 SOffset = CurDAG->getTargetConstant(0, MVT::i32); 00851 00852 if (CurDAG->isBaseWithConstantOffset(Addr)) { 00853 SDValue N0 = Addr.getOperand(0); 00854 SDValue N1 = Addr.getOperand(1); 00855 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 00856 00857 if (isLegalMUBUFImmOffset(C1)) { 00858 00859 if (N0.getOpcode() == ISD::ADD) { 00860 // (add (add N2, N3), C1) -> addr64 00861 SDValue N2 = N0.getOperand(0); 00862 SDValue N3 = N0.getOperand(1); 00863 Addr64 = CurDAG->getTargetConstant(1, MVT::i1); 00864 Ptr = N2; 00865 VAddr = N3; 00866 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); 00867 return; 00868 } 00869 00870 // (add N0, C1) -> offset 00871 VAddr = CurDAG->getTargetConstant(0, MVT::i32); 00872 Ptr = N0; 00873 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); 00874 return; 00875 } 00876 } 00877 if (Addr.getOpcode() == ISD::ADD) { 00878 // (add N0, N1) -> addr64 00879 SDValue N0 = Addr.getOperand(0); 00880 SDValue N1 = Addr.getOperand(1); 00881 Addr64 = CurDAG->getTargetConstant(1, MVT::i1); 00882 Ptr = N0; 00883 VAddr = N1; 00884 Offset = CurDAG->getTargetConstant(0, MVT::i16); 00885 return; 00886 } 00887 00888 // default case -> offset 00889 VAddr = CurDAG->getTargetConstant(0, MVT::i32); 00890 Ptr = Addr; 00891 Offset = CurDAG->getTargetConstant(0, MVT::i16); 00892 00893 } 00894 00895 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 00896 SDValue &VAddr, 00897 SDValue &Offset) const { 00898 SDValue Ptr, SOffset, Offen, Idxen, Addr64, GLC, SLC, TFE; 00899 00900 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 00901 GLC, SLC, TFE); 00902 00903 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 00904 if (C->getSExtValue()) { 00905 SDLoc DL(Addr); 00906 SRsrc = wrapAddr64Rsrc(CurDAG, DL, Ptr); 00907 return true; 00908 } 00909 return false; 00910 } 00911 00912 static SDValue buildRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr, 00913 uint32_t RsrcDword1, uint64_t RsrcDword2And3) { 00914 00915 SDValue PtrLo = DAG->getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr); 00916 SDValue PtrHi = DAG->getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr); 00917 if (RsrcDword1) 00918 PtrHi = SDValue(DAG->getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi, 00919 DAG->getConstant(RsrcDword1, MVT::i32)), 0); 00920 00921 SDValue DataLo = DAG->getTargetConstant( 00922 RsrcDword2And3 & APInt::getAllOnesValue(32).getZExtValue(), MVT::i32); 00923 SDValue DataHi = DAG->getTargetConstant(RsrcDword2And3 >> 32, MVT::i32); 00924 00925 const SDValue Ops[] = { PtrLo, PtrHi, DataLo, DataHi }; 00926 return SDValue(DAG->getMachineNode(AMDGPU::SI_BUFFER_RSRC, DL, 00927 MVT::v4i32, Ops), 0); 00928 } 00929 00930 /// \brief Return a resource descriptor with the 'Add TID' bit enabled 00931 /// The TID (Thread ID) is multipled by the stride value (bits [61:48] 00932 /// of the resource descriptor) to create an offset, which is added to the 00933 /// resource ponter. 00934 static SDValue buildScratchRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) { 00935 00936 uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | 00937 0xffffffff; // Size 00938 00939 return buildRSRC(DAG, DL, Ptr, 0, Rsrc); 00940 } 00941 00942 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 00943 SDValue &VAddr, SDValue &SOffset, 00944 SDValue &ImmOffset) const { 00945 00946 SDLoc DL(Addr); 00947 MachineFunction &MF = CurDAG->getMachineFunction(); 00948 const SIRegisterInfo *TRI = 00949 static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); 00950 MachineRegisterInfo &MRI = MF.getRegInfo(); 00951 const SITargetLowering& Lowering = 00952 *static_cast<const SITargetLowering*>(getTargetLowering()); 00953 00954 unsigned ScratchPtrReg = 00955 TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR); 00956 unsigned ScratchOffsetReg = 00957 TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET); 00958 Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass, 00959 ScratchOffsetReg, MVT::i32); 00960 00961 Rsrc = buildScratchRSRC(CurDAG, DL, 00962 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, 00963 MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64)); 00964 SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, 00965 MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32); 00966 00967 // (add n0, c1) 00968 if (CurDAG->isBaseWithConstantOffset(Addr)) { 00969 SDValue N1 = Addr.getOperand(1); 00970 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 00971 00972 if (isLegalMUBUFImmOffset(C1)) { 00973 VAddr = Addr.getOperand(0); 00974 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); 00975 return true; 00976 } 00977 } 00978 00979 // (add FI, n0) 00980 if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 00981 isa<FrameIndexSDNode>(Addr.getOperand(0))) { 00982 VAddr = Addr.getOperand(1); 00983 ImmOffset = Addr.getOperand(0); 00984 return true; 00985 } 00986 00987 // (FI) 00988 if (isa<FrameIndexSDNode>(Addr)) { 00989 VAddr = SDValue(CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, 00990 CurDAG->getConstant(0, MVT::i32)), 0); 00991 ImmOffset = Addr; 00992 return true; 00993 } 00994 00995 // (node) 00996 VAddr = Addr; 00997 ImmOffset = CurDAG->getTargetConstant(0, MVT::i16); 00998 return true; 00999 } 01000 01001 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 01002 SDValue &SOffset, SDValue &Offset, 01003 SDValue &GLC, SDValue &SLC, 01004 SDValue &TFE) const { 01005 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 01006 01007 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 01008 GLC, SLC, TFE); 01009 01010 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 01011 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 01012 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 01013 uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | 01014 APInt::getAllOnesValue(32).getZExtValue(); // Size 01015 SDLoc DL(Addr); 01016 SRsrc = buildRSRC(CurDAG, DL, Ptr, 0, Rsrc); 01017 return true; 01018 } 01019 return false; 01020 } 01021 01022 // FIXME: This is incorrect and only enough to be able to compile. 01023 SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 01024 AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N); 01025 SDLoc DL(N); 01026 01027 assert(Subtarget.hasFlatAddressSpace() && 01028 "addrspacecast only supported with flat address space!"); 01029 01030 assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && 01031 ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) && 01032 "Cannot cast address space to / from constant address!"); 01033 01034 assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS || 01035 ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) && 01036 "Can only cast to / from flat address space!"); 01037 01038 // The flat instructions read the address as the index of the VGPR holding the 01039 // address, so casting should just be reinterpreting the base VGPR, so just 01040 // insert trunc / bitcast / zext. 01041 01042 SDValue Src = ASC->getOperand(0); 01043 EVT DestVT = ASC->getValueType(0); 01044 EVT SrcVT = Src.getValueType(); 01045 01046 unsigned SrcSize = SrcVT.getSizeInBits(); 01047 unsigned DestSize = DestVT.getSizeInBits(); 01048 01049 if (SrcSize > DestSize) { 01050 assert(SrcSize == 64 && DestSize == 32); 01051 return CurDAG->getMachineNode( 01052 TargetOpcode::EXTRACT_SUBREG, 01053 DL, 01054 DestVT, 01055 Src, 01056 CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32)); 01057 } 01058 01059 01060 if (DestSize > SrcSize) { 01061 assert(SrcSize == 32 && DestSize == 64); 01062 01063 SDValue RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32); 01064 01065 const SDValue Ops[] = { 01066 RC, 01067 Src, 01068 CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), 01069 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32, 01070 CurDAG->getConstant(0, MVT::i32)), 0), 01071 CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32) 01072 }; 01073 01074 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 01075 SDLoc(N), N->getValueType(0), Ops); 01076 } 01077 01078 assert(SrcSize == 64 && DestSize == 64); 01079 return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode(); 01080 } 01081 01082 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 01083 SDValue &SrcMods) const { 01084 01085 unsigned Mods = 0; 01086 01087 Src = In; 01088 01089 if (Src.getOpcode() == ISD::FNEG) { 01090 Mods |= SISrcMods::NEG; 01091 Src = Src.getOperand(0); 01092 } 01093 01094 if (Src.getOpcode() == ISD::FABS) { 01095 Mods |= SISrcMods::ABS; 01096 Src = Src.getOperand(0); 01097 } 01098 01099 SrcMods = CurDAG->getTargetConstant(Mods, MVT::i32); 01100 01101 return true; 01102 } 01103 01104 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 01105 SDValue &SrcMods, SDValue &Clamp, 01106 SDValue &Omod) const { 01107 // FIXME: Handle Clamp and Omod 01108 Clamp = CurDAG->getTargetConstant(0, MVT::i32); 01109 Omod = CurDAG->getTargetConstant(0, MVT::i32); 01110 01111 return SelectVOP3Mods(In, Src, SrcMods); 01112 } 01113 01114 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 01115 const AMDGPUTargetLowering& Lowering = 01116 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 01117 bool IsModified = false; 01118 do { 01119 IsModified = false; 01120 // Go over all selected nodes and try to fold them a bit more 01121 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 01122 E = CurDAG->allnodes_end(); I != E; ++I) { 01123 01124 SDNode *Node = I; 01125 01126 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I); 01127 if (!MachineNode) 01128 continue; 01129 01130 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 01131 if (ResNode != Node) { 01132 ReplaceUses(Node, ResNode); 01133 IsModified = true; 01134 } 01135 } 01136 CurDAG->RemoveDeadNodes(); 01137 } while (IsModified); 01138 }