LLVM API Documentation
00001 //===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 /// \file 00011 /// \brief Interface definition of the TargetLowering class that is common 00012 /// to all AMD GPUs. 00013 // 00014 //===----------------------------------------------------------------------===// 00015 00016 #ifndef LLVM_LIB_TARGET_R600_AMDGPUISELLOWERING_H 00017 #define LLVM_LIB_TARGET_R600_AMDGPUISELLOWERING_H 00018 00019 #include "llvm/Target/TargetLowering.h" 00020 00021 namespace llvm { 00022 00023 class AMDGPUMachineFunction; 00024 class AMDGPUSubtarget; 00025 class MachineRegisterInfo; 00026 00027 class AMDGPUTargetLowering : public TargetLowering { 00028 protected: 00029 const AMDGPUSubtarget *Subtarget; 00030 00031 private: 00032 SDValue LowerConstantInitializer(const Constant* Init, const GlobalValue *GV, 00033 const SDValue &InitPtr, 00034 SDValue Chain, 00035 SelectionDAG &DAG) const; 00036 SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; 00037 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 00038 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 00039 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 00040 /// \brief Lower vector stores by merging the vector elements into an integer 00041 /// of the same bitwidth. 00042 SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const; 00043 /// \brief Split a vector store into multiple scalar stores. 00044 /// \returns The resulting chain. 00045 00046 SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; 00047 SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const; 00048 SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const; 00049 SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const; 00050 SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const; 00051 SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const; 00052 SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; 00053 00054 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 00055 00056 SDValue ExpandSIGN_EXTEND_INREG(SDValue Op, 00057 unsigned BitsDiff, 00058 SelectionDAG &DAG) const; 00059 SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; 00060 00061 SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; 00062 SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; 00063 00064 protected: 00065 static EVT getEquivalentMemType(LLVMContext &Context, EVT VT); 00066 static EVT getEquivalentLoadRegType(LLVMContext &Context, EVT VT); 00067 00068 virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, 00069 SelectionDAG &DAG) const; 00070 00071 /// \brief Split a vector load into a scalar load of each component. 00072 SDValue ScalarizeVectorLoad(SDValue Op, SelectionDAG &DAG) const; 00073 00074 /// \brief Split a vector load into 2 loads of half the vector. 00075 SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const; 00076 00077 /// \brief Split a vector store into a scalar store of each component. 00078 SDValue ScalarizeVectorStore(SDValue Op, SelectionDAG &DAG) const; 00079 00080 /// \brief Split a vector store into 2 stores of half the vector. 00081 SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const; 00082 00083 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 00084 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 00085 SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; 00086 SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const; 00087 bool isHWTrueValue(SDValue Op) const; 00088 bool isHWFalseValue(SDValue Op) const; 00089 00090 /// The SelectionDAGBuilder will automatically promote function arguments 00091 /// with illegal types. However, this does not work for the AMDGPU targets 00092 /// since the function arguments are stored in memory as these illegal types. 00093 /// In order to handle this properly we need to get the origianl types sizes 00094 /// from the LLVM IR Function and fixup the ISD:InputArg values before 00095 /// passing them to AnalyzeFormalArguments() 00096 void getOriginalFunctionArgs(SelectionDAG &DAG, 00097 const Function *F, 00098 const SmallVectorImpl<ISD::InputArg> &Ins, 00099 SmallVectorImpl<ISD::InputArg> &OrigIns) const; 00100 void AnalyzeFormalArguments(CCState &State, 00101 const SmallVectorImpl<ISD::InputArg> &Ins) const; 00102 00103 public: 00104 AMDGPUTargetLowering(TargetMachine &TM); 00105 00106 bool isFAbsFree(EVT VT) const override; 00107 bool isFNegFree(EVT VT) const override; 00108 bool isTruncateFree(EVT Src, EVT Dest) const override; 00109 bool isTruncateFree(Type *Src, Type *Dest) const override; 00110 00111 bool isZExtFree(Type *Src, Type *Dest) const override; 00112 bool isZExtFree(EVT Src, EVT Dest) const override; 00113 bool isZExtFree(SDValue Val, EVT VT2) const override; 00114 00115 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; 00116 00117 MVT getVectorIdxTy() const override; 00118 bool isSelectSupported(SelectSupportKind) const override; 00119 00120 bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; 00121 bool ShouldShrinkFPConstant(EVT VT) const override; 00122 00123 bool isLoadBitCastBeneficial(EVT, EVT) const override; 00124 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, 00125 bool isVarArg, 00126 const SmallVectorImpl<ISD::OutputArg> &Outs, 00127 const SmallVectorImpl<SDValue> &OutVals, 00128 SDLoc DL, SelectionDAG &DAG) const override; 00129 SDValue LowerCall(CallLoweringInfo &CLI, 00130 SmallVectorImpl<SDValue> &InVals) const override; 00131 00132 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 00133 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 00134 void ReplaceNodeResults(SDNode * N, 00135 SmallVectorImpl<SDValue> &Results, 00136 SelectionDAG &DAG) const override; 00137 00138 SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const; 00139 SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const; 00140 SDValue CombineMinMax(SDNode *N, SelectionDAG &DAG) const; 00141 const char* getTargetNodeName(unsigned Opcode) const override; 00142 00143 virtual SDNode *PostISelFolding(MachineSDNode *N, 00144 SelectionDAG &DAG) const { 00145 return N; 00146 } 00147 00148 /// \brief Determine which of the bits specified in \p Mask are known to be 00149 /// either zero or one and return them in the \p KnownZero and \p KnownOne 00150 /// bitsets. 00151 void computeKnownBitsForTargetNode(const SDValue Op, 00152 APInt &KnownZero, 00153 APInt &KnownOne, 00154 const SelectionDAG &DAG, 00155 unsigned Depth = 0) const override; 00156 00157 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG, 00158 unsigned Depth = 0) const override; 00159 00160 /// \brief Helper function that adds Reg to the LiveIn list of the DAG's 00161 /// MachineFunction. 00162 /// 00163 /// \returns a RegisterSDNode representing Reg. 00164 virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, 00165 const TargetRegisterClass *RC, 00166 unsigned Reg, EVT VT) const; 00167 }; 00168 00169 namespace AMDGPUISD { 00170 00171 enum { 00172 // AMDIL ISD Opcodes 00173 FIRST_NUMBER = ISD::BUILTIN_OP_END, 00174 CALL, // Function call based on a single integer 00175 UMUL, // 32bit unsigned multiplication 00176 RET_FLAG, 00177 BRANCH_COND, 00178 // End AMDIL ISD Opcodes 00179 DWORDADDR, 00180 FRACT, 00181 CLAMP, 00182 MAD, // Multiply + add with same result as the separate operations. 00183 00184 // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi. 00185 // Denormals handled on some parts. 00186 COS_HW, 00187 SIN_HW, 00188 FMAX, 00189 SMAX, 00190 UMAX, 00191 FMIN, 00192 SMIN, 00193 UMIN, 00194 URECIP, 00195 DIV_SCALE, 00196 DIV_FMAS, 00197 DIV_FIXUP, 00198 TRIG_PREOP, // 1 ULP max error for f64 00199 00200 // RCP, RSQ - For f32, 1 ULP max error, no denormal handling. 00201 // For f64, max error 2^29 ULP, handles denormals. 00202 RCP, 00203 RSQ, 00204 RSQ_LEGACY, 00205 RSQ_CLAMPED, 00206 LDEXP, 00207 DOT4, 00208 BFE_U32, // Extract range of bits with zero extension to 32-bits. 00209 BFE_I32, // Extract range of bits with sign extension to 32-bits. 00210 BFI, // (src0 & src1) | (~src0 & src2) 00211 BFM, // Insert a range of bits into a 32-bit word. 00212 BREV, // Reverse bits. 00213 MUL_U24, 00214 MUL_I24, 00215 MAD_U24, 00216 MAD_I24, 00217 TEXTURE_FETCH, 00218 EXPORT, 00219 CONST_ADDRESS, 00220 REGISTER_LOAD, 00221 REGISTER_STORE, 00222 LOAD_INPUT, 00223 SAMPLE, 00224 SAMPLEB, 00225 SAMPLED, 00226 SAMPLEL, 00227 00228 // These cvt_f32_ubyte* nodes need to remain consecutive and in order. 00229 CVT_F32_UBYTE0, 00230 CVT_F32_UBYTE1, 00231 CVT_F32_UBYTE2, 00232 CVT_F32_UBYTE3, 00233 /// This node is for VLIW targets and it is used to represent a vector 00234 /// that is stored in consecutive registers with the same channel. 00235 /// For example: 00236 /// |X |Y|Z|W| 00237 /// T0|v.x| | | | 00238 /// T1|v.y| | | | 00239 /// T2|v.z| | | | 00240 /// T3|v.w| | | | 00241 BUILD_VERTICAL_VECTOR, 00242 /// Pointer to the start of the shader's constant data. 00243 CONST_DATA_PTR, 00244 FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, 00245 STORE_MSKOR, 00246 LOAD_CONSTANT, 00247 TBUFFER_STORE_FORMAT, 00248 LAST_AMDGPU_ISD_NUMBER 00249 }; 00250 00251 00252 } // End namespace AMDGPUISD 00253 00254 } // End namespace llvm 00255 00256 #endif