LLVM API Documentation

AMDGPUInstrInfo.cpp
Go to the documentation of this file.
00001 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 /// \file
00011 /// \brief Implementation of the TargetInstrInfo class that is common to all
00012 /// AMD GPUs.
00013 //
00014 //===----------------------------------------------------------------------===//
00015 
00016 #include "AMDGPUInstrInfo.h"
00017 #include "AMDGPURegisterInfo.h"
00018 #include "AMDGPUTargetMachine.h"
00019 #include "llvm/CodeGen/MachineFrameInfo.h"
00020 #include "llvm/CodeGen/MachineInstrBuilder.h"
00021 #include "llvm/CodeGen/MachineRegisterInfo.h"
00022 
00023 using namespace llvm;
00024 
00025 #define GET_INSTRINFO_CTOR_DTOR
00026 #define GET_INSTRINFO_NAMED_OPS
00027 #define GET_INSTRMAP_INFO
00028 #include "AMDGPUGenInstrInfo.inc"
00029 
00030 // Pin the vtable to this file.
00031 void AMDGPUInstrInfo::anchor() {}
00032 
00033 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st)
00034   : AMDGPUGenInstrInfo(-1,-1), RI(st), ST(st) { }
00035 
00036 const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
00037   return RI;
00038 }
00039 
00040 bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
00041                                            unsigned &SrcReg, unsigned &DstReg,
00042                                            unsigned &SubIdx) const {
00043 // TODO: Implement this function
00044   return false;
00045 }
00046 
00047 unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
00048                                              int &FrameIndex) const {
00049 // TODO: Implement this function
00050   return 0;
00051 }
00052 
00053 unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
00054                                                    int &FrameIndex) const {
00055 // TODO: Implement this function
00056   return 0;
00057 }
00058 
00059 bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
00060                                           const MachineMemOperand *&MMO,
00061                                           int &FrameIndex) const {
00062 // TODO: Implement this function
00063   return false;
00064 }
00065 unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
00066                                               int &FrameIndex) const {
00067 // TODO: Implement this function
00068   return 0;
00069 }
00070 unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
00071                                                     int &FrameIndex) const {
00072 // TODO: Implement this function
00073   return 0;
00074 }
00075 bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
00076                                            const MachineMemOperand *&MMO,
00077                                            int &FrameIndex) const {
00078 // TODO: Implement this function
00079   return false;
00080 }
00081 
00082 MachineInstr *
00083 AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
00084                                       MachineBasicBlock::iterator &MBBI,
00085                                       LiveVariables *LV) const {
00086 // TODO: Implement this function
00087   return nullptr;
00088 }
00089 bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
00090                                         MachineBasicBlock &MBB) const {
00091   while (iter != MBB.end()) {
00092     switch (iter->getOpcode()) {
00093     default:
00094       break;
00095     case AMDGPU::BRANCH_COND_i32:
00096     case AMDGPU::BRANCH_COND_f32:
00097     case AMDGPU::BRANCH:
00098       return true;
00099     };
00100     ++iter;
00101   }
00102   return false;
00103 }
00104 
00105 void
00106 AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
00107                                     MachineBasicBlock::iterator MI,
00108                                     unsigned SrcReg, bool isKill,
00109                                     int FrameIndex,
00110                                     const TargetRegisterClass *RC,
00111                                     const TargetRegisterInfo *TRI) const {
00112   llvm_unreachable("Not Implemented");
00113 }
00114 
00115 void
00116 AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
00117                                      MachineBasicBlock::iterator MI,
00118                                      unsigned DestReg, int FrameIndex,
00119                                      const TargetRegisterClass *RC,
00120                                      const TargetRegisterInfo *TRI) const {
00121   llvm_unreachable("Not Implemented");
00122 }
00123 
00124 bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const {
00125   MachineBasicBlock *MBB = MI->getParent();
00126   int OffsetOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
00127                                                AMDGPU::OpName::addr);
00128    // addr is a custom operand with multiple MI operands, and only the
00129    // first MI operand is given a name.
00130   int RegOpIdx = OffsetOpIdx + 1;
00131   int ChanOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
00132                                              AMDGPU::OpName::chan);
00133   if (isRegisterLoad(*MI)) {
00134     int DstOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
00135                                               AMDGPU::OpName::dst);
00136     unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
00137     unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
00138     unsigned Address = calculateIndirectAddress(RegIndex, Channel);
00139     unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg();
00140     if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
00141       buildMovInstr(MBB, MI, MI->getOperand(DstOpIdx).getReg(),
00142                     getIndirectAddrRegClass()->getRegister(Address));
00143     } else {
00144       buildIndirectRead(MBB, MI, MI->getOperand(DstOpIdx).getReg(),
00145                         Address, OffsetReg);
00146     }
00147   } else if (isRegisterStore(*MI)) {
00148     int ValOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
00149                                               AMDGPU::OpName::val);
00150     unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
00151     unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
00152     unsigned Address = calculateIndirectAddress(RegIndex, Channel);
00153     unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg();
00154     if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
00155       buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
00156                     MI->getOperand(ValOpIdx).getReg());
00157     } else {
00158       buildIndirectWrite(MBB, MI, MI->getOperand(ValOpIdx).getReg(),
00159                          calculateIndirectAddress(RegIndex, Channel),
00160                          OffsetReg);
00161     }
00162   } else {
00163     return false;
00164   }
00165 
00166   MBB->erase(MI);
00167   return true;
00168 }
00169 
00170 
00171 MachineInstr *
00172 AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
00173                                       MachineInstr *MI,
00174                                       const SmallVectorImpl<unsigned> &Ops,
00175                                       int FrameIndex) const {
00176 // TODO: Implement this function
00177   return nullptr;
00178 }
00179 MachineInstr*
00180 AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
00181                                       MachineInstr *MI,
00182                                       const SmallVectorImpl<unsigned> &Ops,
00183                                       MachineInstr *LoadMI) const {
00184   // TODO: Implement this function
00185   return nullptr;
00186 }
00187 bool
00188 AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
00189                                      const SmallVectorImpl<unsigned> &Ops) const {
00190   // TODO: Implement this function
00191   return false;
00192 }
00193 bool
00194 AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
00195                                  unsigned Reg, bool UnfoldLoad,
00196                                  bool UnfoldStore,
00197                                  SmallVectorImpl<MachineInstr*> &NewMIs) const {
00198   // TODO: Implement this function
00199   return false;
00200 }
00201 
00202 bool
00203 AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
00204                                     SmallVectorImpl<SDNode*> &NewNodes) const {
00205   // TODO: Implement this function
00206   return false;
00207 }
00208 
00209 unsigned
00210 AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
00211                                            bool UnfoldLoad, bool UnfoldStore,
00212                                            unsigned *LoadRegIndex) const {
00213   // TODO: Implement this function
00214   return 0;
00215 }
00216 
00217 bool AMDGPUInstrInfo::enableClusterLoads() const {
00218   return true;
00219 }
00220 
00221 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
00222 // the first 16 loads will be interleaved with the stores, and the next 16 will
00223 // be clustered as expected. It should really split into 2 16 store batches.
00224 //
00225 // Loads are clustered until this returns false, rather than trying to schedule
00226 // groups of stores. This also means we have to deal with saying different
00227 // address space loads should be clustered, and ones which might cause bank
00228 // conflicts.
00229 //
00230 // This might be deprecated so it might not be worth that much effort to fix.
00231 bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
00232                                               int64_t Offset0, int64_t Offset1,
00233                                               unsigned NumLoads) const {
00234   assert(Offset1 > Offset0 &&
00235          "Second offset should be larger than first offset!");
00236   // If we have less than 16 loads in a row, and the offsets are within 64
00237   // bytes, then schedule together.
00238 
00239   // A cacheline is 64 bytes (for global memory).
00240   return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
00241 }
00242 
00243 bool
00244 AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
00245   const {
00246   // TODO: Implement this function
00247   return true;
00248 }
00249 void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
00250                                 MachineBasicBlock::iterator MI) const {
00251   // TODO: Implement this function
00252 }
00253 
00254 bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
00255   // TODO: Implement this function
00256   return false;
00257 }
00258 bool
00259 AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
00260                                   const SmallVectorImpl<MachineOperand> &Pred2)
00261   const {
00262   // TODO: Implement this function
00263   return false;
00264 }
00265 
00266 bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
00267                                       std::vector<MachineOperand> &Pred) const {
00268   // TODO: Implement this function
00269   return false;
00270 }
00271 
00272 bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
00273   // TODO: Implement this function
00274   return MI->getDesc().isPredicable();
00275 }
00276 
00277 bool
00278 AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
00279   // TODO: Implement this function
00280   return true;
00281 }
00282 
00283 bool AMDGPUInstrInfo::isRegisterStore(const MachineInstr &MI) const {
00284   return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE;
00285 }
00286 
00287 bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
00288   return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
00289 }
00290 
00291 int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
00292   const MachineRegisterInfo &MRI = MF.getRegInfo();
00293   const MachineFrameInfo *MFI = MF.getFrameInfo();
00294   int Offset = -1;
00295 
00296   if (MFI->getNumObjects() == 0) {
00297     return -1;
00298   }
00299 
00300   if (MRI.livein_empty()) {
00301     return 0;
00302   }
00303 
00304   const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
00305   for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
00306                                             LE = MRI.livein_end();
00307                                             LI != LE; ++LI) {
00308     unsigned Reg = LI->first;
00309     if (TargetRegisterInfo::isVirtualRegister(Reg) ||
00310         !IndirectRC->contains(Reg))
00311       continue;
00312 
00313     unsigned RegIndex;
00314     unsigned RegEnd;
00315     for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd;
00316                                                           ++RegIndex) {
00317       if (IndirectRC->getRegister(RegIndex) == Reg)
00318         break;
00319     }
00320     Offset = std::max(Offset, (int)RegIndex);
00321   }
00322 
00323   return Offset + 1;
00324 }
00325 
00326 int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
00327   int Offset = 0;
00328   const MachineFrameInfo *MFI = MF.getFrameInfo();
00329 
00330   // Variable sized objects are not supported
00331   assert(!MFI->hasVarSizedObjects());
00332 
00333   if (MFI->getNumObjects() == 0) {
00334     return -1;
00335   }
00336 
00337   Offset = MF.getTarget()
00338                .getSubtargetImpl()
00339                ->getFrameLowering()
00340                ->getFrameIndexOffset(MF, -1);
00341 
00342   return getIndirectIndexBegin(MF) + Offset;
00343 }
00344 
00345 int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
00346   switch (Channels) {
00347   default: return Opcode;
00348   case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
00349   case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
00350   case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
00351   }
00352 }
00353 
00354 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
00355 // header files, so we need to wrap it in a function that takes unsigned 
00356 // instead.
00357 namespace llvm {
00358 namespace AMDGPU {
00359 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
00360   return getMCOpcode(Opcode);
00361 }
00362 }
00363 }