LLVM API Documentation
00001 //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 /// \file 00011 /// \brief R600 Implementation of TargetInstrInfo. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "R600InstrInfo.h" 00016 #include "AMDGPU.h" 00017 #include "AMDGPUSubtarget.h" 00018 #include "AMDGPUTargetMachine.h" 00019 #include "R600Defines.h" 00020 #include "R600MachineFunctionInfo.h" 00021 #include "R600RegisterInfo.h" 00022 #include "llvm/CodeGen/MachineFrameInfo.h" 00023 #include "llvm/CodeGen/MachineInstrBuilder.h" 00024 #include "llvm/CodeGen/MachineRegisterInfo.h" 00025 00026 using namespace llvm; 00027 00028 #define GET_INSTRINFO_CTOR_DTOR 00029 #include "AMDGPUGenDFAPacketizer.inc" 00030 00031 R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st) 00032 : AMDGPUInstrInfo(st), 00033 RI(st) 00034 { } 00035 00036 const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { 00037 return RI; 00038 } 00039 00040 bool R600InstrInfo::isTrig(const MachineInstr &MI) const { 00041 return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; 00042 } 00043 00044 bool R600InstrInfo::isVector(const MachineInstr &MI) const { 00045 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; 00046 } 00047 00048 void 00049 R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 00050 MachineBasicBlock::iterator MI, DebugLoc DL, 00051 unsigned DestReg, unsigned SrcReg, 00052 bool KillSrc) const { 00053 unsigned VectorComponents = 0; 00054 if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) || 00055 AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) && 00056 (AMDGPU::R600_Reg128RegClass.contains(SrcReg) || 00057 AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) { 00058 VectorComponents = 4; 00059 } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) || 00060 AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) && 00061 (AMDGPU::R600_Reg64RegClass.contains(SrcReg) || 00062 AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) { 00063 VectorComponents = 2; 00064 } 00065 00066 if (VectorComponents > 0) { 00067 for (unsigned I = 0; I < VectorComponents; I++) { 00068 unsigned SubRegIndex = RI.getSubRegFromChannel(I); 00069 buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 00070 RI.getSubReg(DestReg, SubRegIndex), 00071 RI.getSubReg(SrcReg, SubRegIndex)) 00072 .addReg(DestReg, 00073 RegState::Define | RegState::Implicit); 00074 } 00075 } else { 00076 MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 00077 DestReg, SrcReg); 00078 NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0)) 00079 .setIsKill(KillSrc); 00080 } 00081 } 00082 00083 /// \returns true if \p MBBI can be moved into a new basic. 00084 bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, 00085 MachineBasicBlock::iterator MBBI) const { 00086 for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(), 00087 E = MBBI->operands_end(); I != E; ++I) { 00088 if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) && 00089 I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg())) 00090 return false; 00091 } 00092 return true; 00093 } 00094 00095 bool R600InstrInfo::isMov(unsigned Opcode) const { 00096 00097 00098 switch(Opcode) { 00099 default: return false; 00100 case AMDGPU::MOV: 00101 case AMDGPU::MOV_IMM_F32: 00102 case AMDGPU::MOV_IMM_I32: 00103 return true; 00104 } 00105 } 00106 00107 // Some instructions act as place holders to emulate operations that the GPU 00108 // hardware does automatically. This function can be used to check if 00109 // an opcode falls into this category. 00110 bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const { 00111 switch (Opcode) { 00112 default: return false; 00113 case AMDGPU::RETURN: 00114 return true; 00115 } 00116 } 00117 00118 bool R600InstrInfo::isReductionOp(unsigned Opcode) const { 00119 return false; 00120 } 00121 00122 bool R600InstrInfo::isCubeOp(unsigned Opcode) const { 00123 switch(Opcode) { 00124 default: return false; 00125 case AMDGPU::CUBE_r600_pseudo: 00126 case AMDGPU::CUBE_r600_real: 00127 case AMDGPU::CUBE_eg_pseudo: 00128 case AMDGPU::CUBE_eg_real: 00129 return true; 00130 } 00131 } 00132 00133 bool R600InstrInfo::isALUInstr(unsigned Opcode) const { 00134 unsigned TargetFlags = get(Opcode).TSFlags; 00135 00136 return (TargetFlags & R600_InstFlag::ALU_INST); 00137 } 00138 00139 bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const { 00140 unsigned TargetFlags = get(Opcode).TSFlags; 00141 00142 return ((TargetFlags & R600_InstFlag::OP1) | 00143 (TargetFlags & R600_InstFlag::OP2) | 00144 (TargetFlags & R600_InstFlag::OP3)); 00145 } 00146 00147 bool R600InstrInfo::isLDSInstr(unsigned Opcode) const { 00148 unsigned TargetFlags = get(Opcode).TSFlags; 00149 00150 return ((TargetFlags & R600_InstFlag::LDS_1A) | 00151 (TargetFlags & R600_InstFlag::LDS_1A1D) | 00152 (TargetFlags & R600_InstFlag::LDS_1A2D)); 00153 } 00154 00155 bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const { 00156 return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1; 00157 } 00158 00159 bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const { 00160 return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1; 00161 } 00162 00163 bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const { 00164 if (isALUInstr(MI->getOpcode())) 00165 return true; 00166 if (isVector(*MI) || isCubeOp(MI->getOpcode())) 00167 return true; 00168 switch (MI->getOpcode()) { 00169 case AMDGPU::PRED_X: 00170 case AMDGPU::INTERP_PAIR_XY: 00171 case AMDGPU::INTERP_PAIR_ZW: 00172 case AMDGPU::INTERP_VEC_LOAD: 00173 case AMDGPU::COPY: 00174 case AMDGPU::DOT_4: 00175 return true; 00176 default: 00177 return false; 00178 } 00179 } 00180 00181 bool R600InstrInfo::isTransOnly(unsigned Opcode) const { 00182 if (ST.hasCaymanISA()) 00183 return false; 00184 return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU); 00185 } 00186 00187 bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const { 00188 return isTransOnly(MI->getOpcode()); 00189 } 00190 00191 bool R600InstrInfo::isVectorOnly(unsigned Opcode) const { 00192 return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU); 00193 } 00194 00195 bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const { 00196 return isVectorOnly(MI->getOpcode()); 00197 } 00198 00199 bool R600InstrInfo::isExport(unsigned Opcode) const { 00200 return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT); 00201 } 00202 00203 bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { 00204 return ST.hasVertexCache() && IS_VTX(get(Opcode)); 00205 } 00206 00207 bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const { 00208 const MachineFunction *MF = MI->getParent()->getParent(); 00209 const R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>(); 00210 return MFI->getShaderType() != ShaderType::COMPUTE && 00211 usesVertexCache(MI->getOpcode()); 00212 } 00213 00214 bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { 00215 return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode)); 00216 } 00217 00218 bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const { 00219 const MachineFunction *MF = MI->getParent()->getParent(); 00220 const R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>(); 00221 return (MFI->getShaderType() == ShaderType::COMPUTE && 00222 usesVertexCache(MI->getOpcode())) || 00223 usesTextureCache(MI->getOpcode()); 00224 } 00225 00226 bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { 00227 switch (Opcode) { 00228 case AMDGPU::KILLGT: 00229 case AMDGPU::GROUP_BARRIER: 00230 return true; 00231 default: 00232 return false; 00233 } 00234 } 00235 00236 bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const { 00237 return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; 00238 } 00239 00240 bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const { 00241 return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; 00242 } 00243 00244 bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const { 00245 if (!isALUInstr(MI->getOpcode())) { 00246 return false; 00247 } 00248 for (MachineInstr::const_mop_iterator I = MI->operands_begin(), 00249 E = MI->operands_end(); I != E; ++I) { 00250 if (!I->isReg() || !I->isUse() || 00251 TargetRegisterInfo::isVirtualRegister(I->getReg())) 00252 continue; 00253 00254 if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg())) 00255 return true; 00256 } 00257 return false; 00258 } 00259 00260 int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const { 00261 static const unsigned OpTable[] = { 00262 AMDGPU::OpName::src0, 00263 AMDGPU::OpName::src1, 00264 AMDGPU::OpName::src2 00265 }; 00266 00267 assert (SrcNum < 3); 00268 return getOperandIdx(Opcode, OpTable[SrcNum]); 00269 } 00270 00271 #define SRC_SEL_ROWS 11 00272 int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { 00273 static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = { 00274 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 00275 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 00276 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 00277 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 00278 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 00279 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 00280 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 00281 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 00282 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 00283 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 00284 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W} 00285 }; 00286 00287 for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) { 00288 if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) { 00289 return getOperandIdx(Opcode, SrcSelTable[i][1]); 00290 } 00291 } 00292 return -1; 00293 } 00294 #undef SRC_SEL_ROWS 00295 00296 SmallVector<std::pair<MachineOperand *, int64_t>, 3> 00297 R600InstrInfo::getSrcs(MachineInstr *MI) const { 00298 SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result; 00299 00300 if (MI->getOpcode() == AMDGPU::DOT_4) { 00301 static const unsigned OpTable[8][2] = { 00302 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 00303 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 00304 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 00305 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 00306 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 00307 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 00308 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 00309 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}, 00310 }; 00311 00312 for (unsigned j = 0; j < 8; j++) { 00313 MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), 00314 OpTable[j][0])); 00315 unsigned Reg = MO.getReg(); 00316 if (Reg == AMDGPU::ALU_CONST) { 00317 unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(), 00318 OpTable[j][1])).getImm(); 00319 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); 00320 continue; 00321 } 00322 00323 } 00324 return Result; 00325 } 00326 00327 static const unsigned OpTable[3][2] = { 00328 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 00329 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 00330 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 00331 }; 00332 00333 for (unsigned j = 0; j < 3; j++) { 00334 int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); 00335 if (SrcIdx < 0) 00336 break; 00337 MachineOperand &MO = MI->getOperand(SrcIdx); 00338 unsigned Reg = MI->getOperand(SrcIdx).getReg(); 00339 if (Reg == AMDGPU::ALU_CONST) { 00340 unsigned Sel = MI->getOperand( 00341 getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); 00342 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); 00343 continue; 00344 } 00345 if (Reg == AMDGPU::ALU_LITERAL_X) { 00346 unsigned Imm = MI->getOperand( 00347 getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm(); 00348 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm)); 00349 continue; 00350 } 00351 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0)); 00352 } 00353 return Result; 00354 } 00355 00356 std::vector<std::pair<int, unsigned> > 00357 R600InstrInfo::ExtractSrcs(MachineInstr *MI, 00358 const DenseMap<unsigned, unsigned> &PV, 00359 unsigned &ConstCount) const { 00360 ConstCount = 0; 00361 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI); 00362 const std::pair<int, unsigned> DummyPair(-1, 0); 00363 std::vector<std::pair<int, unsigned> > Result; 00364 unsigned i = 0; 00365 for (unsigned n = Srcs.size(); i < n; ++i) { 00366 unsigned Reg = Srcs[i].first->getReg(); 00367 unsigned Index = RI.getEncodingValue(Reg) & 0xff; 00368 if (Reg == AMDGPU::OQAP) { 00369 Result.push_back(std::pair<int, unsigned>(Index, 0)); 00370 } 00371 if (PV.find(Reg) != PV.end()) { 00372 // 255 is used to tells its a PS/PV reg 00373 Result.push_back(std::pair<int, unsigned>(255, 0)); 00374 continue; 00375 } 00376 if (Index > 127) { 00377 ConstCount++; 00378 Result.push_back(DummyPair); 00379 continue; 00380 } 00381 unsigned Chan = RI.getHWRegChan(Reg); 00382 Result.push_back(std::pair<int, unsigned>(Index, Chan)); 00383 } 00384 for (; i < 3; ++i) 00385 Result.push_back(DummyPair); 00386 return Result; 00387 } 00388 00389 static std::vector<std::pair<int, unsigned> > 00390 Swizzle(std::vector<std::pair<int, unsigned> > Src, 00391 R600InstrInfo::BankSwizzle Swz) { 00392 if (Src[0] == Src[1]) 00393 Src[1].first = -1; 00394 switch (Swz) { 00395 case R600InstrInfo::ALU_VEC_012_SCL_210: 00396 break; 00397 case R600InstrInfo::ALU_VEC_021_SCL_122: 00398 std::swap(Src[1], Src[2]); 00399 break; 00400 case R600InstrInfo::ALU_VEC_102_SCL_221: 00401 std::swap(Src[0], Src[1]); 00402 break; 00403 case R600InstrInfo::ALU_VEC_120_SCL_212: 00404 std::swap(Src[0], Src[1]); 00405 std::swap(Src[0], Src[2]); 00406 break; 00407 case R600InstrInfo::ALU_VEC_201: 00408 std::swap(Src[0], Src[2]); 00409 std::swap(Src[0], Src[1]); 00410 break; 00411 case R600InstrInfo::ALU_VEC_210: 00412 std::swap(Src[0], Src[2]); 00413 break; 00414 } 00415 return Src; 00416 } 00417 00418 static unsigned 00419 getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) { 00420 switch (Swz) { 00421 case R600InstrInfo::ALU_VEC_012_SCL_210: { 00422 unsigned Cycles[3] = { 2, 1, 0}; 00423 return Cycles[Op]; 00424 } 00425 case R600InstrInfo::ALU_VEC_021_SCL_122: { 00426 unsigned Cycles[3] = { 1, 2, 2}; 00427 return Cycles[Op]; 00428 } 00429 case R600InstrInfo::ALU_VEC_120_SCL_212: { 00430 unsigned Cycles[3] = { 2, 1, 2}; 00431 return Cycles[Op]; 00432 } 00433 case R600InstrInfo::ALU_VEC_102_SCL_221: { 00434 unsigned Cycles[3] = { 2, 2, 1}; 00435 return Cycles[Op]; 00436 } 00437 default: 00438 llvm_unreachable("Wrong Swizzle for Trans Slot"); 00439 return 0; 00440 } 00441 } 00442 00443 /// returns how many MIs (whose inputs are represented by IGSrcs) can be packed 00444 /// in the same Instruction Group while meeting read port limitations given a 00445 /// Swz swizzle sequence. 00446 unsigned R600InstrInfo::isLegalUpTo( 00447 const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, 00448 const std::vector<R600InstrInfo::BankSwizzle> &Swz, 00449 const std::vector<std::pair<int, unsigned> > &TransSrcs, 00450 R600InstrInfo::BankSwizzle TransSwz) const { 00451 int Vector[4][3]; 00452 memset(Vector, -1, sizeof(Vector)); 00453 for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) { 00454 const std::vector<std::pair<int, unsigned> > &Srcs = 00455 Swizzle(IGSrcs[i], Swz[i]); 00456 for (unsigned j = 0; j < 3; j++) { 00457 const std::pair<int, unsigned> &Src = Srcs[j]; 00458 if (Src.first < 0 || Src.first == 255) 00459 continue; 00460 if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) { 00461 if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 && 00462 Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) { 00463 // The value from output queue A (denoted by register OQAP) can 00464 // only be fetched during the first cycle. 00465 return false; 00466 } 00467 // OQAP does not count towards the normal read port restrictions 00468 continue; 00469 } 00470 if (Vector[Src.second][j] < 0) 00471 Vector[Src.second][j] = Src.first; 00472 if (Vector[Src.second][j] != Src.first) 00473 return i; 00474 } 00475 } 00476 // Now check Trans Alu 00477 for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) { 00478 const std::pair<int, unsigned> &Src = TransSrcs[i]; 00479 unsigned Cycle = getTransSwizzle(TransSwz, i); 00480 if (Src.first < 0) 00481 continue; 00482 if (Src.first == 255) 00483 continue; 00484 if (Vector[Src.second][Cycle] < 0) 00485 Vector[Src.second][Cycle] = Src.first; 00486 if (Vector[Src.second][Cycle] != Src.first) 00487 return IGSrcs.size() - 1; 00488 } 00489 return IGSrcs.size(); 00490 } 00491 00492 /// Given a swizzle sequence SwzCandidate and an index Idx, returns the next 00493 /// (in lexicographic term) swizzle sequence assuming that all swizzles after 00494 /// Idx can be skipped 00495 static bool 00496 NextPossibleSolution( 00497 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 00498 unsigned Idx) { 00499 assert(Idx < SwzCandidate.size()); 00500 int ResetIdx = Idx; 00501 while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210) 00502 ResetIdx --; 00503 for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) { 00504 SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210; 00505 } 00506 if (ResetIdx == -1) 00507 return false; 00508 int NextSwizzle = SwzCandidate[ResetIdx] + 1; 00509 SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle; 00510 return true; 00511 } 00512 00513 /// Enumerate all possible Swizzle sequence to find one that can meet all 00514 /// read port requirements. 00515 bool R600InstrInfo::FindSwizzleForVectorSlot( 00516 const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, 00517 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 00518 const std::vector<std::pair<int, unsigned> > &TransSrcs, 00519 R600InstrInfo::BankSwizzle TransSwz) const { 00520 unsigned ValidUpTo = 0; 00521 do { 00522 ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz); 00523 if (ValidUpTo == IGSrcs.size()) 00524 return true; 00525 } while (NextPossibleSolution(SwzCandidate, ValidUpTo)); 00526 return false; 00527 } 00528 00529 /// Instructions in Trans slot can't read gpr at cycle 0 if they also read 00530 /// a const, and can't read a gpr at cycle 1 if they read 2 const. 00531 static bool 00532 isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, 00533 const std::vector<std::pair<int, unsigned> > &TransOps, 00534 unsigned ConstCount) { 00535 // TransALU can't read 3 constants 00536 if (ConstCount > 2) 00537 return false; 00538 for (unsigned i = 0, e = TransOps.size(); i < e; ++i) { 00539 const std::pair<int, unsigned> &Src = TransOps[i]; 00540 unsigned Cycle = getTransSwizzle(TransSwz, i); 00541 if (Src.first < 0) 00542 continue; 00543 if (ConstCount > 0 && Cycle == 0) 00544 return false; 00545 if (ConstCount > 1 && Cycle == 1) 00546 return false; 00547 } 00548 return true; 00549 } 00550 00551 bool 00552 R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG, 00553 const DenseMap<unsigned, unsigned> &PV, 00554 std::vector<BankSwizzle> &ValidSwizzle, 00555 bool isLastAluTrans) 00556 const { 00557 //Todo : support shared src0 - src1 operand 00558 00559 std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs; 00560 ValidSwizzle.clear(); 00561 unsigned ConstCount; 00562 BankSwizzle TransBS = ALU_VEC_012_SCL_210; 00563 for (unsigned i = 0, e = IG.size(); i < e; ++i) { 00564 IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount)); 00565 unsigned Op = getOperandIdx(IG[i]->getOpcode(), 00566 AMDGPU::OpName::bank_swizzle); 00567 ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) 00568 IG[i]->getOperand(Op).getImm()); 00569 } 00570 std::vector<std::pair<int, unsigned> > TransOps; 00571 if (!isLastAluTrans) 00572 return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS); 00573 00574 TransOps = IGSrcs.back(); 00575 IGSrcs.pop_back(); 00576 ValidSwizzle.pop_back(); 00577 00578 static const R600InstrInfo::BankSwizzle TransSwz[] = { 00579 ALU_VEC_012_SCL_210, 00580 ALU_VEC_021_SCL_122, 00581 ALU_VEC_120_SCL_212, 00582 ALU_VEC_102_SCL_221 00583 }; 00584 for (unsigned i = 0; i < 4; i++) { 00585 TransBS = TransSwz[i]; 00586 if (!isConstCompatible(TransBS, TransOps, ConstCount)) 00587 continue; 00588 bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, 00589 TransBS); 00590 if (Result) { 00591 ValidSwizzle.push_back(TransBS); 00592 return true; 00593 } 00594 } 00595 00596 return false; 00597 } 00598 00599 00600 bool 00601 R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) 00602 const { 00603 assert (Consts.size() <= 12 && "Too many operands in instructions group"); 00604 unsigned Pair1 = 0, Pair2 = 0; 00605 for (unsigned i = 0, n = Consts.size(); i < n; ++i) { 00606 unsigned ReadConstHalf = Consts[i] & 2; 00607 unsigned ReadConstIndex = Consts[i] & (~3); 00608 unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; 00609 if (!Pair1) { 00610 Pair1 = ReadHalfConst; 00611 continue; 00612 } 00613 if (Pair1 == ReadHalfConst) 00614 continue; 00615 if (!Pair2) { 00616 Pair2 = ReadHalfConst; 00617 continue; 00618 } 00619 if (Pair2 != ReadHalfConst) 00620 return false; 00621 } 00622 return true; 00623 } 00624 00625 bool 00626 R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs) 00627 const { 00628 std::vector<unsigned> Consts; 00629 SmallSet<int64_t, 4> Literals; 00630 for (unsigned i = 0, n = MIs.size(); i < n; i++) { 00631 MachineInstr *MI = MIs[i]; 00632 if (!isALUInstr(MI->getOpcode())) 00633 continue; 00634 00635 const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Srcs = 00636 getSrcs(MI); 00637 00638 for (unsigned j = 0, e = Srcs.size(); j < e; j++) { 00639 std::pair<MachineOperand *, unsigned> Src = Srcs[j]; 00640 if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X) 00641 Literals.insert(Src.second); 00642 if (Literals.size() > 4) 00643 return false; 00644 if (Src.first->getReg() == AMDGPU::ALU_CONST) 00645 Consts.push_back(Src.second); 00646 if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || 00647 AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) { 00648 unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff; 00649 unsigned Chan = RI.getHWRegChan(Src.first->getReg()); 00650 Consts.push_back((Index << 2) | Chan); 00651 } 00652 } 00653 } 00654 return fitsConstReadLimitations(Consts); 00655 } 00656 00657 DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, 00658 const ScheduleDAG *DAG) const { 00659 const InstrItineraryData *II = 00660 TM->getSubtargetImpl()->getInstrItineraryData(); 00661 return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); 00662 } 00663 00664 static bool 00665 isPredicateSetter(unsigned Opcode) { 00666 switch (Opcode) { 00667 case AMDGPU::PRED_X: 00668 return true; 00669 default: 00670 return false; 00671 } 00672 } 00673 00674 static MachineInstr * 00675 findFirstPredicateSetterFrom(MachineBasicBlock &MBB, 00676 MachineBasicBlock::iterator I) { 00677 while (I != MBB.begin()) { 00678 --I; 00679 MachineInstr *MI = I; 00680 if (isPredicateSetter(MI->getOpcode())) 00681 return MI; 00682 } 00683 00684 return nullptr; 00685 } 00686 00687 static 00688 bool isJump(unsigned Opcode) { 00689 return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND; 00690 } 00691 00692 static bool isBranch(unsigned Opcode) { 00693 return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 || 00694 Opcode == AMDGPU::BRANCH_COND_f32; 00695 } 00696 00697 bool 00698 R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 00699 MachineBasicBlock *&TBB, 00700 MachineBasicBlock *&FBB, 00701 SmallVectorImpl<MachineOperand> &Cond, 00702 bool AllowModify) const { 00703 // Most of the following comes from the ARM implementation of AnalyzeBranch 00704 00705 // If the block has no terminators, it just falls into the block after it. 00706 MachineBasicBlock::iterator I = MBB.end(); 00707 if (I == MBB.begin()) 00708 return false; 00709 --I; 00710 while (I->isDebugValue()) { 00711 if (I == MBB.begin()) 00712 return false; 00713 --I; 00714 } 00715 // AMDGPU::BRANCH* instructions are only available after isel and are not 00716 // handled 00717 if (isBranch(I->getOpcode())) 00718 return true; 00719 if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) { 00720 return false; 00721 } 00722 00723 // Remove successive JUMP 00724 while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) { 00725 MachineBasicBlock::iterator PriorI = std::prev(I); 00726 if (AllowModify) 00727 I->removeFromParent(); 00728 I = PriorI; 00729 } 00730 MachineInstr *LastInst = I; 00731 00732 // If there is only one terminator instruction, process it. 00733 unsigned LastOpc = LastInst->getOpcode(); 00734 if (I == MBB.begin() || 00735 !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) { 00736 if (LastOpc == AMDGPU::JUMP) { 00737 TBB = LastInst->getOperand(0).getMBB(); 00738 return false; 00739 } else if (LastOpc == AMDGPU::JUMP_COND) { 00740 MachineInstr *predSet = I; 00741 while (!isPredicateSetter(predSet->getOpcode())) { 00742 predSet = --I; 00743 } 00744 TBB = LastInst->getOperand(0).getMBB(); 00745 Cond.push_back(predSet->getOperand(1)); 00746 Cond.push_back(predSet->getOperand(2)); 00747 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 00748 return false; 00749 } 00750 return true; // Can't handle indirect branch. 00751 } 00752 00753 // Get the instruction before it if it is a terminator. 00754 MachineInstr *SecondLastInst = I; 00755 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 00756 00757 // If the block ends with a B and a Bcc, handle it. 00758 if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) { 00759 MachineInstr *predSet = --I; 00760 while (!isPredicateSetter(predSet->getOpcode())) { 00761 predSet = --I; 00762 } 00763 TBB = SecondLastInst->getOperand(0).getMBB(); 00764 FBB = LastInst->getOperand(0).getMBB(); 00765 Cond.push_back(predSet->getOperand(1)); 00766 Cond.push_back(predSet->getOperand(2)); 00767 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 00768 return false; 00769 } 00770 00771 // Otherwise, can't handle this. 00772 return true; 00773 } 00774 00775 static 00776 MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) { 00777 for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend(); 00778 It != E; ++It) { 00779 if (It->getOpcode() == AMDGPU::CF_ALU || 00780 It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) 00781 return std::prev(It.base()); 00782 } 00783 return MBB.end(); 00784 } 00785 00786 unsigned 00787 R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, 00788 MachineBasicBlock *TBB, 00789 MachineBasicBlock *FBB, 00790 const SmallVectorImpl<MachineOperand> &Cond, 00791 DebugLoc DL) const { 00792 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 00793 00794 if (!FBB) { 00795 if (Cond.empty()) { 00796 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB); 00797 return 1; 00798 } else { 00799 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 00800 assert(PredSet && "No previous predicate !"); 00801 addFlag(PredSet, 0, MO_FLAG_PUSH); 00802 PredSet->getOperand(2).setImm(Cond[1].getImm()); 00803 00804 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 00805 .addMBB(TBB) 00806 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 00807 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 00808 if (CfAlu == MBB.end()) 00809 return 1; 00810 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 00811 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 00812 return 1; 00813 } 00814 } else { 00815 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 00816 assert(PredSet && "No previous predicate !"); 00817 addFlag(PredSet, 0, MO_FLAG_PUSH); 00818 PredSet->getOperand(2).setImm(Cond[1].getImm()); 00819 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 00820 .addMBB(TBB) 00821 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 00822 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); 00823 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 00824 if (CfAlu == MBB.end()) 00825 return 2; 00826 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 00827 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 00828 return 2; 00829 } 00830 } 00831 00832 unsigned 00833 R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 00834 00835 // Note : we leave PRED* instructions there. 00836 // They may be needed when predicating instructions. 00837 00838 MachineBasicBlock::iterator I = MBB.end(); 00839 00840 if (I == MBB.begin()) { 00841 return 0; 00842 } 00843 --I; 00844 switch (I->getOpcode()) { 00845 default: 00846 return 0; 00847 case AMDGPU::JUMP_COND: { 00848 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 00849 clearFlag(predSet, 0, MO_FLAG_PUSH); 00850 I->eraseFromParent(); 00851 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 00852 if (CfAlu == MBB.end()) 00853 break; 00854 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 00855 CfAlu->setDesc(get(AMDGPU::CF_ALU)); 00856 break; 00857 } 00858 case AMDGPU::JUMP: 00859 I->eraseFromParent(); 00860 break; 00861 } 00862 I = MBB.end(); 00863 00864 if (I == MBB.begin()) { 00865 return 1; 00866 } 00867 --I; 00868 switch (I->getOpcode()) { 00869 // FIXME: only one case?? 00870 default: 00871 return 1; 00872 case AMDGPU::JUMP_COND: { 00873 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 00874 clearFlag(predSet, 0, MO_FLAG_PUSH); 00875 I->eraseFromParent(); 00876 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 00877 if (CfAlu == MBB.end()) 00878 break; 00879 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 00880 CfAlu->setDesc(get(AMDGPU::CF_ALU)); 00881 break; 00882 } 00883 case AMDGPU::JUMP: 00884 I->eraseFromParent(); 00885 break; 00886 } 00887 return 2; 00888 } 00889 00890 bool 00891 R600InstrInfo::isPredicated(const MachineInstr *MI) const { 00892 int idx = MI->findFirstPredOperandIdx(); 00893 if (idx < 0) 00894 return false; 00895 00896 unsigned Reg = MI->getOperand(idx).getReg(); 00897 switch (Reg) { 00898 default: return false; 00899 case AMDGPU::PRED_SEL_ONE: 00900 case AMDGPU::PRED_SEL_ZERO: 00901 case AMDGPU::PREDICATE_BIT: 00902 return true; 00903 } 00904 } 00905 00906 bool 00907 R600InstrInfo::isPredicable(MachineInstr *MI) const { 00908 // XXX: KILL* instructions can be predicated, but they must be the last 00909 // instruction in a clause, so this means any instructions after them cannot 00910 // be predicated. Until we have proper support for instruction clauses in the 00911 // backend, we will mark KILL* instructions as unpredicable. 00912 00913 if (MI->getOpcode() == AMDGPU::KILLGT) { 00914 return false; 00915 } else if (MI->getOpcode() == AMDGPU::CF_ALU) { 00916 // If the clause start in the middle of MBB then the MBB has more 00917 // than a single clause, unable to predicate several clauses. 00918 if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI)) 00919 return false; 00920 // TODO: We don't support KC merging atm 00921 if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0) 00922 return false; 00923 return true; 00924 } else if (isVector(*MI)) { 00925 return false; 00926 } else { 00927 return AMDGPUInstrInfo::isPredicable(MI); 00928 } 00929 } 00930 00931 00932 bool 00933 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 00934 unsigned NumCyles, 00935 unsigned ExtraPredCycles, 00936 const BranchProbability &Probability) const{ 00937 return true; 00938 } 00939 00940 bool 00941 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, 00942 unsigned NumTCycles, 00943 unsigned ExtraTCycles, 00944 MachineBasicBlock &FMBB, 00945 unsigned NumFCycles, 00946 unsigned ExtraFCycles, 00947 const BranchProbability &Probability) const { 00948 return true; 00949 } 00950 00951 bool 00952 R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, 00953 unsigned NumCyles, 00954 const BranchProbability &Probability) 00955 const { 00956 return true; 00957 } 00958 00959 bool 00960 R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 00961 MachineBasicBlock &FMBB) const { 00962 return false; 00963 } 00964 00965 00966 bool 00967 R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 00968 MachineOperand &MO = Cond[1]; 00969 switch (MO.getImm()) { 00970 case OPCODE_IS_ZERO_INT: 00971 MO.setImm(OPCODE_IS_NOT_ZERO_INT); 00972 break; 00973 case OPCODE_IS_NOT_ZERO_INT: 00974 MO.setImm(OPCODE_IS_ZERO_INT); 00975 break; 00976 case OPCODE_IS_ZERO: 00977 MO.setImm(OPCODE_IS_NOT_ZERO); 00978 break; 00979 case OPCODE_IS_NOT_ZERO: 00980 MO.setImm(OPCODE_IS_ZERO); 00981 break; 00982 default: 00983 return true; 00984 } 00985 00986 MachineOperand &MO2 = Cond[2]; 00987 switch (MO2.getReg()) { 00988 case AMDGPU::PRED_SEL_ZERO: 00989 MO2.setReg(AMDGPU::PRED_SEL_ONE); 00990 break; 00991 case AMDGPU::PRED_SEL_ONE: 00992 MO2.setReg(AMDGPU::PRED_SEL_ZERO); 00993 break; 00994 default: 00995 return true; 00996 } 00997 return false; 00998 } 00999 01000 bool 01001 R600InstrInfo::DefinesPredicate(MachineInstr *MI, 01002 std::vector<MachineOperand> &Pred) const { 01003 return isPredicateSetter(MI->getOpcode()); 01004 } 01005 01006 01007 bool 01008 R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 01009 const SmallVectorImpl<MachineOperand> &Pred2) const { 01010 return false; 01011 } 01012 01013 01014 bool 01015 R600InstrInfo::PredicateInstruction(MachineInstr *MI, 01016 const SmallVectorImpl<MachineOperand> &Pred) const { 01017 int PIdx = MI->findFirstPredOperandIdx(); 01018 01019 if (MI->getOpcode() == AMDGPU::CF_ALU) { 01020 MI->getOperand(8).setImm(0); 01021 return true; 01022 } 01023 01024 if (MI->getOpcode() == AMDGPU::DOT_4) { 01025 MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X)) 01026 .setReg(Pred[2].getReg()); 01027 MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y)) 01028 .setReg(Pred[2].getReg()); 01029 MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z)) 01030 .setReg(Pred[2].getReg()); 01031 MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W)) 01032 .setReg(Pred[2].getReg()); 01033 MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 01034 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 01035 return true; 01036 } 01037 01038 if (PIdx != -1) { 01039 MachineOperand &PMO = MI->getOperand(PIdx); 01040 PMO.setReg(Pred[2].getReg()); 01041 MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 01042 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 01043 return true; 01044 } 01045 01046 return false; 01047 } 01048 01049 unsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const { 01050 return 2; 01051 } 01052 01053 unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 01054 const MachineInstr *MI, 01055 unsigned *PredCost) const { 01056 if (PredCost) 01057 *PredCost = 2; 01058 return 2; 01059 } 01060 01061 bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { 01062 01063 switch(MI->getOpcode()) { 01064 default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); 01065 case AMDGPU::R600_EXTRACT_ELT_V2: 01066 case AMDGPU::R600_EXTRACT_ELT_V4: 01067 buildIndirectRead(MI->getParent(), MI, MI->getOperand(0).getReg(), 01068 RI.getHWRegIndex(MI->getOperand(1).getReg()), // Address 01069 MI->getOperand(2).getReg(), 01070 RI.getHWRegChan(MI->getOperand(1).getReg())); 01071 break; 01072 case AMDGPU::R600_INSERT_ELT_V2: 01073 case AMDGPU::R600_INSERT_ELT_V4: 01074 buildIndirectWrite(MI->getParent(), MI, MI->getOperand(2).getReg(), // Value 01075 RI.getHWRegIndex(MI->getOperand(1).getReg()), // Address 01076 MI->getOperand(3).getReg(), // Offset 01077 RI.getHWRegChan(MI->getOperand(1).getReg())); // Channel 01078 break; 01079 } 01080 MI->eraseFromParent(); 01081 return true; 01082 } 01083 01084 void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved, 01085 const MachineFunction &MF) const { 01086 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>( 01087 MF.getSubtarget().getFrameLowering()); 01088 01089 unsigned StackWidth = TFL->getStackWidth(MF); 01090 int End = getIndirectIndexEnd(MF); 01091 01092 if (End == -1) 01093 return; 01094 01095 for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { 01096 unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index); 01097 Reserved.set(SuperReg); 01098 for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { 01099 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); 01100 Reserved.set(Reg); 01101 } 01102 } 01103 } 01104 01105 unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, 01106 unsigned Channel) const { 01107 // XXX: Remove when we support a stack width > 2 01108 assert(Channel == 0); 01109 return RegIndex; 01110 } 01111 01112 const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const { 01113 return &AMDGPU::R600_TReg32_XRegClass; 01114 } 01115 01116 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, 01117 MachineBasicBlock::iterator I, 01118 unsigned ValueReg, unsigned Address, 01119 unsigned OffsetReg) const { 01120 return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0); 01121 } 01122 01123 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, 01124 MachineBasicBlock::iterator I, 01125 unsigned ValueReg, unsigned Address, 01126 unsigned OffsetReg, 01127 unsigned AddrChan) const { 01128 unsigned AddrReg; 01129 switch (AddrChan) { 01130 default: llvm_unreachable("Invalid Channel"); 01131 case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break; 01132 case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break; 01133 case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break; 01134 case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break; 01135 } 01136 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 01137 AMDGPU::AR_X, OffsetReg); 01138 setImmOperand(MOVA, AMDGPU::OpName::write, 0); 01139 01140 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 01141 AddrReg, ValueReg) 01142 .addReg(AMDGPU::AR_X, 01143 RegState::Implicit | RegState::Kill); 01144 setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1); 01145 return Mov; 01146 } 01147 01148 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, 01149 MachineBasicBlock::iterator I, 01150 unsigned ValueReg, unsigned Address, 01151 unsigned OffsetReg) const { 01152 return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0); 01153 } 01154 01155 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, 01156 MachineBasicBlock::iterator I, 01157 unsigned ValueReg, unsigned Address, 01158 unsigned OffsetReg, 01159 unsigned AddrChan) const { 01160 unsigned AddrReg; 01161 switch (AddrChan) { 01162 default: llvm_unreachable("Invalid Channel"); 01163 case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break; 01164 case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break; 01165 case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break; 01166 case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break; 01167 } 01168 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 01169 AMDGPU::AR_X, 01170 OffsetReg); 01171 setImmOperand(MOVA, AMDGPU::OpName::write, 0); 01172 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 01173 ValueReg, 01174 AddrReg) 01175 .addReg(AMDGPU::AR_X, 01176 RegState::Implicit | RegState::Kill); 01177 setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1); 01178 01179 return Mov; 01180 } 01181 01182 unsigned R600InstrInfo::getMaxAlusPerClause() const { 01183 return 115; 01184 } 01185 01186 MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB, 01187 MachineBasicBlock::iterator I, 01188 unsigned Opcode, 01189 unsigned DstReg, 01190 unsigned Src0Reg, 01191 unsigned Src1Reg) const { 01192 MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode), 01193 DstReg); // $dst 01194 01195 if (Src1Reg) { 01196 MIB.addImm(0) // $update_exec_mask 01197 .addImm(0); // $update_predicate 01198 } 01199 MIB.addImm(1) // $write 01200 .addImm(0) // $omod 01201 .addImm(0) // $dst_rel 01202 .addImm(0) // $dst_clamp 01203 .addReg(Src0Reg) // $src0 01204 .addImm(0) // $src0_neg 01205 .addImm(0) // $src0_rel 01206 .addImm(0) // $src0_abs 01207 .addImm(-1); // $src0_sel 01208 01209 if (Src1Reg) { 01210 MIB.addReg(Src1Reg) // $src1 01211 .addImm(0) // $src1_neg 01212 .addImm(0) // $src1_rel 01213 .addImm(0) // $src1_abs 01214 .addImm(-1); // $src1_sel 01215 } 01216 01217 //XXX: The r600g finalizer expects this to be 1, once we've moved the 01218 //scheduling to the backend, we can change the default to 0. 01219 MIB.addImm(1) // $last 01220 .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel 01221 .addImm(0) // $literal 01222 .addImm(0); // $bank_swizzle 01223 01224 return MIB; 01225 } 01226 01227 #define OPERAND_CASE(Label) \ 01228 case Label: { \ 01229 static const unsigned Ops[] = \ 01230 { \ 01231 Label##_X, \ 01232 Label##_Y, \ 01233 Label##_Z, \ 01234 Label##_W \ 01235 }; \ 01236 return Ops[Slot]; \ 01237 } 01238 01239 static unsigned getSlotedOps(unsigned Op, unsigned Slot) { 01240 switch (Op) { 01241 OPERAND_CASE(AMDGPU::OpName::update_exec_mask) 01242 OPERAND_CASE(AMDGPU::OpName::update_pred) 01243 OPERAND_CASE(AMDGPU::OpName::write) 01244 OPERAND_CASE(AMDGPU::OpName::omod) 01245 OPERAND_CASE(AMDGPU::OpName::dst_rel) 01246 OPERAND_CASE(AMDGPU::OpName::clamp) 01247 OPERAND_CASE(AMDGPU::OpName::src0) 01248 OPERAND_CASE(AMDGPU::OpName::src0_neg) 01249 OPERAND_CASE(AMDGPU::OpName::src0_rel) 01250 OPERAND_CASE(AMDGPU::OpName::src0_abs) 01251 OPERAND_CASE(AMDGPU::OpName::src0_sel) 01252 OPERAND_CASE(AMDGPU::OpName::src1) 01253 OPERAND_CASE(AMDGPU::OpName::src1_neg) 01254 OPERAND_CASE(AMDGPU::OpName::src1_rel) 01255 OPERAND_CASE(AMDGPU::OpName::src1_abs) 01256 OPERAND_CASE(AMDGPU::OpName::src1_sel) 01257 OPERAND_CASE(AMDGPU::OpName::pred_sel) 01258 default: 01259 llvm_unreachable("Wrong Operand"); 01260 } 01261 } 01262 01263 #undef OPERAND_CASE 01264 01265 MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( 01266 MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) 01267 const { 01268 assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); 01269 unsigned Opcode; 01270 if (ST.getGeneration() <= AMDGPUSubtarget::R700) 01271 Opcode = AMDGPU::DOT4_r600; 01272 else 01273 Opcode = AMDGPU::DOT4_eg; 01274 MachineBasicBlock::iterator I = MI; 01275 MachineOperand &Src0 = MI->getOperand( 01276 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot))); 01277 MachineOperand &Src1 = MI->getOperand( 01278 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot))); 01279 MachineInstr *MIB = buildDefaultInstruction( 01280 MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg()); 01281 static const unsigned Operands[14] = { 01282 AMDGPU::OpName::update_exec_mask, 01283 AMDGPU::OpName::update_pred, 01284 AMDGPU::OpName::write, 01285 AMDGPU::OpName::omod, 01286 AMDGPU::OpName::dst_rel, 01287 AMDGPU::OpName::clamp, 01288 AMDGPU::OpName::src0_neg, 01289 AMDGPU::OpName::src0_rel, 01290 AMDGPU::OpName::src0_abs, 01291 AMDGPU::OpName::src0_sel, 01292 AMDGPU::OpName::src1_neg, 01293 AMDGPU::OpName::src1_rel, 01294 AMDGPU::OpName::src1_abs, 01295 AMDGPU::OpName::src1_sel, 01296 }; 01297 01298 MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), 01299 getSlotedOps(AMDGPU::OpName::pred_sel, Slot))); 01300 MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel)) 01301 .setReg(MO.getReg()); 01302 01303 for (unsigned i = 0; i < 14; i++) { 01304 MachineOperand &MO = MI->getOperand( 01305 getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); 01306 assert (MO.isImm()); 01307 setImmOperand(MIB, Operands[i], MO.getImm()); 01308 } 01309 MIB->getOperand(20).setImm(0); 01310 return MIB; 01311 } 01312 01313 MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, 01314 MachineBasicBlock::iterator I, 01315 unsigned DstReg, 01316 uint64_t Imm) const { 01317 MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, 01318 AMDGPU::ALU_LITERAL_X); 01319 setImmOperand(MovImm, AMDGPU::OpName::literal, Imm); 01320 return MovImm; 01321 } 01322 01323 MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB, 01324 MachineBasicBlock::iterator I, 01325 unsigned DstReg, unsigned SrcReg) const { 01326 return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg); 01327 } 01328 01329 int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const { 01330 return getOperandIdx(MI.getOpcode(), Op); 01331 } 01332 01333 int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const { 01334 return AMDGPU::getNamedOperandIdx(Opcode, Op); 01335 } 01336 01337 void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op, 01338 int64_t Imm) const { 01339 int Idx = getOperandIdx(*MI, Op); 01340 assert(Idx != -1 && "Operand not supported for this instruction."); 01341 assert(MI->getOperand(Idx).isImm()); 01342 MI->getOperand(Idx).setImm(Imm); 01343 } 01344 01345 //===----------------------------------------------------------------------===// 01346 // Instruction flag getters/setters 01347 //===----------------------------------------------------------------------===// 01348 01349 bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const { 01350 return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; 01351 } 01352 01353 MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, 01354 unsigned Flag) const { 01355 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 01356 int FlagIndex = 0; 01357 if (Flag != 0) { 01358 // If we pass something other than the default value of Flag to this 01359 // function, it means we are want to set a flag on an instruction 01360 // that uses native encoding. 01361 assert(HAS_NATIVE_OPERANDS(TargetFlags)); 01362 bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; 01363 switch (Flag) { 01364 case MO_FLAG_CLAMP: 01365 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp); 01366 break; 01367 case MO_FLAG_MASK: 01368 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write); 01369 break; 01370 case MO_FLAG_NOT_LAST: 01371 case MO_FLAG_LAST: 01372 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last); 01373 break; 01374 case MO_FLAG_NEG: 01375 switch (SrcIdx) { 01376 case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break; 01377 case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break; 01378 case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break; 01379 } 01380 break; 01381 01382 case MO_FLAG_ABS: 01383 assert(!IsOP3 && "Cannot set absolute value modifier for OP3 " 01384 "instructions."); 01385 (void)IsOP3; 01386 switch (SrcIdx) { 01387 case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break; 01388 case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break; 01389 } 01390 break; 01391 01392 default: 01393 FlagIndex = -1; 01394 break; 01395 } 01396 assert(FlagIndex != -1 && "Flag not supported for this instruction"); 01397 } else { 01398 FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags); 01399 assert(FlagIndex != 0 && 01400 "Instruction flags not supported for this instruction"); 01401 } 01402 01403 MachineOperand &FlagOp = MI->getOperand(FlagIndex); 01404 assert(FlagOp.isImm()); 01405 return FlagOp; 01406 } 01407 01408 void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, 01409 unsigned Flag) const { 01410 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 01411 if (Flag == 0) { 01412 return; 01413 } 01414 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 01415 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 01416 if (Flag == MO_FLAG_NOT_LAST) { 01417 clearFlag(MI, Operand, MO_FLAG_LAST); 01418 } else if (Flag == MO_FLAG_MASK) { 01419 clearFlag(MI, Operand, Flag); 01420 } else { 01421 FlagOp.setImm(1); 01422 } 01423 } else { 01424 MachineOperand &FlagOp = getFlagOp(MI, Operand); 01425 FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); 01426 } 01427 } 01428 01429 void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, 01430 unsigned Flag) const { 01431 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 01432 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 01433 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 01434 FlagOp.setImm(0); 01435 } else { 01436 MachineOperand &FlagOp = getFlagOp(MI); 01437 unsigned InstFlags = FlagOp.getImm(); 01438 InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); 01439 FlagOp.setImm(InstFlags); 01440 } 01441 }