LLVM API Documentation
00001 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 /// \file 00011 /// This pass implements instructions packetization for R600. It unsets isLast 00012 /// bit of instructions inside a bundle and substitutes src register with 00013 /// PreviousVector when applicable. 00014 // 00015 //===----------------------------------------------------------------------===// 00016 00017 #include "llvm/Support/Debug.h" 00018 #include "AMDGPU.h" 00019 #include "AMDGPUSubtarget.h" 00020 #include "R600InstrInfo.h" 00021 #include "llvm/CodeGen/DFAPacketizer.h" 00022 #include "llvm/CodeGen/MachineDominators.h" 00023 #include "llvm/CodeGen/MachineFunctionPass.h" 00024 #include "llvm/CodeGen/MachineLoopInfo.h" 00025 #include "llvm/CodeGen/Passes.h" 00026 #include "llvm/CodeGen/ScheduleDAG.h" 00027 #include "llvm/Support/raw_ostream.h" 00028 00029 using namespace llvm; 00030 00031 #define DEBUG_TYPE "packets" 00032 00033 namespace { 00034 00035 class R600Packetizer : public MachineFunctionPass { 00036 00037 public: 00038 static char ID; 00039 R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {} 00040 00041 void getAnalysisUsage(AnalysisUsage &AU) const override { 00042 AU.setPreservesCFG(); 00043 AU.addRequired<MachineDominatorTree>(); 00044 AU.addPreserved<MachineDominatorTree>(); 00045 AU.addRequired<MachineLoopInfo>(); 00046 AU.addPreserved<MachineLoopInfo>(); 00047 MachineFunctionPass::getAnalysisUsage(AU); 00048 } 00049 00050 const char *getPassName() const override { 00051 return "R600 Packetizer"; 00052 } 00053 00054 bool runOnMachineFunction(MachineFunction &Fn) override; 00055 }; 00056 char R600Packetizer::ID = 0; 00057 00058 class R600PacketizerList : public VLIWPacketizerList { 00059 00060 private: 00061 const R600InstrInfo *TII; 00062 const R600RegisterInfo &TRI; 00063 bool VLIW5; 00064 bool ConsideredInstUsesAlreadyWrittenVectorElement; 00065 00066 unsigned getSlot(const MachineInstr *MI) const { 00067 return TRI.getHWRegChan(MI->getOperand(0).getReg()); 00068 } 00069 00070 /// \returns register to PV chan mapping for bundle/single instructions that 00071 /// immediately precedes I. 00072 DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I) 00073 const { 00074 DenseMap<unsigned, unsigned> Result; 00075 I--; 00076 if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle()) 00077 return Result; 00078 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 00079 if (I->isBundle()) 00080 BI++; 00081 int LastDstChan = -1; 00082 do { 00083 bool isTrans = false; 00084 int BISlot = getSlot(BI); 00085 if (LastDstChan >= BISlot) 00086 isTrans = true; 00087 LastDstChan = BISlot; 00088 if (TII->isPredicated(BI)) 00089 continue; 00090 int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write); 00091 if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0) 00092 continue; 00093 int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst); 00094 if (DstIdx == -1) { 00095 continue; 00096 } 00097 unsigned Dst = BI->getOperand(DstIdx).getReg(); 00098 if (isTrans || TII->isTransOnly(BI)) { 00099 Result[Dst] = AMDGPU::PS; 00100 continue; 00101 } 00102 if (BI->getOpcode() == AMDGPU::DOT4_r600 || 00103 BI->getOpcode() == AMDGPU::DOT4_eg) { 00104 Result[Dst] = AMDGPU::PV_X; 00105 continue; 00106 } 00107 if (Dst == AMDGPU::OQAP) { 00108 continue; 00109 } 00110 unsigned PVReg = 0; 00111 switch (TRI.getHWRegChan(Dst)) { 00112 case 0: 00113 PVReg = AMDGPU::PV_X; 00114 break; 00115 case 1: 00116 PVReg = AMDGPU::PV_Y; 00117 break; 00118 case 2: 00119 PVReg = AMDGPU::PV_Z; 00120 break; 00121 case 3: 00122 PVReg = AMDGPU::PV_W; 00123 break; 00124 default: 00125 llvm_unreachable("Invalid Chan"); 00126 } 00127 Result[Dst] = PVReg; 00128 } while ((++BI)->isBundledWithPred()); 00129 return Result; 00130 } 00131 00132 void substitutePV(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PVs) 00133 const { 00134 unsigned Ops[] = { 00135 AMDGPU::OpName::src0, 00136 AMDGPU::OpName::src1, 00137 AMDGPU::OpName::src2 00138 }; 00139 for (unsigned i = 0; i < 3; i++) { 00140 int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]); 00141 if (OperandIdx < 0) 00142 continue; 00143 unsigned Src = MI->getOperand(OperandIdx).getReg(); 00144 const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src); 00145 if (It != PVs.end()) 00146 MI->getOperand(OperandIdx).setReg(It->second); 00147 } 00148 } 00149 public: 00150 // Ctor. 00151 R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI) 00152 : VLIWPacketizerList(MF, MLI, true), 00153 TII(static_cast<const R600InstrInfo *>( 00154 MF.getSubtarget().getInstrInfo())), 00155 TRI(TII->getRegisterInfo()) { 00156 VLIW5 = !MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA(); 00157 } 00158 00159 // initPacketizerState - initialize some internal flags. 00160 void initPacketizerState() override { 00161 ConsideredInstUsesAlreadyWrittenVectorElement = false; 00162 } 00163 00164 // ignorePseudoInstruction - Ignore bundling of pseudo instructions. 00165 bool ignorePseudoInstruction(MachineInstr *MI, 00166 MachineBasicBlock *MBB) override { 00167 return false; 00168 } 00169 00170 // isSoloInstruction - return true if instruction MI can not be packetized 00171 // with any other instruction, which means that MI itself is a packet. 00172 bool isSoloInstruction(MachineInstr *MI) override { 00173 if (TII->isVector(*MI)) 00174 return true; 00175 if (!TII->isALUInstr(MI->getOpcode())) 00176 return true; 00177 if (MI->getOpcode() == AMDGPU::GROUP_BARRIER) 00178 return true; 00179 // XXX: This can be removed once the packetizer properly handles all the 00180 // LDS instruction group restrictions. 00181 if (TII->isLDSInstr(MI->getOpcode())) 00182 return true; 00183 return false; 00184 } 00185 00186 // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ 00187 // together. 00188 bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override { 00189 MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); 00190 if (getSlot(MII) == getSlot(MIJ)) 00191 ConsideredInstUsesAlreadyWrittenVectorElement = true; 00192 // Does MII and MIJ share the same pred_sel ? 00193 int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel), 00194 OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel); 00195 unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0, 00196 PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0; 00197 if (PredI != PredJ) 00198 return false; 00199 if (SUJ->isSucc(SUI)) { 00200 for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) { 00201 const SDep &Dep = SUJ->Succs[i]; 00202 if (Dep.getSUnit() != SUI) 00203 continue; 00204 if (Dep.getKind() == SDep::Anti) 00205 continue; 00206 if (Dep.getKind() == SDep::Output) 00207 if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg()) 00208 continue; 00209 return false; 00210 } 00211 } 00212 00213 bool ARDef = TII->definesAddressRegister(MII) || 00214 TII->definesAddressRegister(MIJ); 00215 bool ARUse = TII->usesAddressRegister(MII) || 00216 TII->usesAddressRegister(MIJ); 00217 if (ARDef && ARUse) 00218 return false; 00219 00220 return true; 00221 } 00222 00223 // isLegalToPruneDependencies - Is it legal to prune dependece between SUI 00224 // and SUJ. 00225 bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override { 00226 return false; 00227 } 00228 00229 void setIsLastBit(MachineInstr *MI, unsigned Bit) const { 00230 unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last); 00231 MI->getOperand(LastOp).setImm(Bit); 00232 } 00233 00234 bool isBundlableWithCurrentPMI(MachineInstr *MI, 00235 const DenseMap<unsigned, unsigned> &PV, 00236 std::vector<R600InstrInfo::BankSwizzle> &BS, 00237 bool &isTransSlot) { 00238 isTransSlot = TII->isTransOnly(MI); 00239 assert (!isTransSlot || VLIW5); 00240 00241 // Is the dst reg sequence legal ? 00242 if (!isTransSlot && !CurrentPacketMIs.empty()) { 00243 if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) { 00244 if (ConsideredInstUsesAlreadyWrittenVectorElement && 00245 !TII->isVectorOnly(MI) && VLIW5) { 00246 isTransSlot = true; 00247 DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump();); 00248 } 00249 else 00250 return false; 00251 } 00252 } 00253 00254 // Are the Constants limitations met ? 00255 CurrentPacketMIs.push_back(MI); 00256 if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) { 00257 DEBUG( 00258 dbgs() << "Couldn't pack :\n"; 00259 MI->dump(); 00260 dbgs() << "with the following packets :\n"; 00261 for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 00262 CurrentPacketMIs[i]->dump(); 00263 dbgs() << "\n"; 00264 } 00265 dbgs() << "because of Consts read limitations\n"; 00266 ); 00267 CurrentPacketMIs.pop_back(); 00268 return false; 00269 } 00270 00271 // Is there a BankSwizzle set that meet Read Port limitations ? 00272 if (!TII->fitsReadPortLimitations(CurrentPacketMIs, 00273 PV, BS, isTransSlot)) { 00274 DEBUG( 00275 dbgs() << "Couldn't pack :\n"; 00276 MI->dump(); 00277 dbgs() << "with the following packets :\n"; 00278 for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 00279 CurrentPacketMIs[i]->dump(); 00280 dbgs() << "\n"; 00281 } 00282 dbgs() << "because of Read port limitations\n"; 00283 ); 00284 CurrentPacketMIs.pop_back(); 00285 return false; 00286 } 00287 00288 // We cannot read LDS source registrs from the Trans slot. 00289 if (isTransSlot && TII->readsLDSSrcReg(MI)) 00290 return false; 00291 00292 CurrentPacketMIs.pop_back(); 00293 return true; 00294 } 00295 00296 MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override { 00297 MachineBasicBlock::iterator FirstInBundle = 00298 CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front(); 00299 const DenseMap<unsigned, unsigned> &PV = 00300 getPreviousVector(FirstInBundle); 00301 std::vector<R600InstrInfo::BankSwizzle> BS; 00302 bool isTransSlot; 00303 00304 if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) { 00305 for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) { 00306 MachineInstr *MI = CurrentPacketMIs[i]; 00307 unsigned Op = TII->getOperandIdx(MI->getOpcode(), 00308 AMDGPU::OpName::bank_swizzle); 00309 MI->getOperand(Op).setImm(BS[i]); 00310 } 00311 unsigned Op = TII->getOperandIdx(MI->getOpcode(), 00312 AMDGPU::OpName::bank_swizzle); 00313 MI->getOperand(Op).setImm(BS.back()); 00314 if (!CurrentPacketMIs.empty()) 00315 setIsLastBit(CurrentPacketMIs.back(), 0); 00316 substitutePV(MI, PV); 00317 MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI); 00318 if (isTransSlot) { 00319 endPacket(std::next(It)->getParent(), std::next(It)); 00320 } 00321 return It; 00322 } 00323 endPacket(MI->getParent(), MI); 00324 if (TII->isTransOnly(MI)) 00325 return MI; 00326 return VLIWPacketizerList::addToPacket(MI); 00327 } 00328 }; 00329 00330 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { 00331 const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); 00332 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 00333 00334 // Instantiate the packetizer. 00335 R600PacketizerList Packetizer(Fn, MLI); 00336 00337 // DFA state table should not be empty. 00338 assert(Packetizer.getResourceTracker() && "Empty DFA table!"); 00339 00340 // 00341 // Loop over all basic blocks and remove KILL pseudo-instructions 00342 // These instructions confuse the dependence analysis. Consider: 00343 // D0 = ... (Insn 0) 00344 // R0 = KILL R0, D0 (Insn 1) 00345 // R0 = ... (Insn 2) 00346 // Here, Insn 1 will result in the dependence graph not emitting an output 00347 // dependence between Insn 0 and Insn 2. This can lead to incorrect 00348 // packetization 00349 // 00350 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 00351 MBB != MBBe; ++MBB) { 00352 MachineBasicBlock::iterator End = MBB->end(); 00353 MachineBasicBlock::iterator MI = MBB->begin(); 00354 while (MI != End) { 00355 if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF || 00356 (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) { 00357 MachineBasicBlock::iterator DeleteMI = MI; 00358 ++MI; 00359 MBB->erase(DeleteMI); 00360 End = MBB->end(); 00361 continue; 00362 } 00363 ++MI; 00364 } 00365 } 00366 00367 // Loop over all of the basic blocks. 00368 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 00369 MBB != MBBe; ++MBB) { 00370 // Find scheduling regions and schedule / packetize each region. 00371 unsigned RemainingCount = MBB->size(); 00372 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 00373 RegionEnd != MBB->begin();) { 00374 // The next region starts above the previous region. Look backward in the 00375 // instruction stream until we find the nearest boundary. 00376 MachineBasicBlock::iterator I = RegionEnd; 00377 for(;I != MBB->begin(); --I, --RemainingCount) { 00378 if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn)) 00379 break; 00380 } 00381 I = MBB->begin(); 00382 00383 // Skip empty scheduling regions. 00384 if (I == RegionEnd) { 00385 RegionEnd = std::prev(RegionEnd); 00386 --RemainingCount; 00387 continue; 00388 } 00389 // Skip regions with one instruction. 00390 if (I == std::prev(RegionEnd)) { 00391 RegionEnd = std::prev(RegionEnd); 00392 continue; 00393 } 00394 00395 Packetizer.PacketizeMIs(MBB, I, RegionEnd); 00396 RegionEnd = I; 00397 } 00398 } 00399 00400 return true; 00401 00402 } 00403 00404 } // end anonymous namespace 00405 00406 llvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) { 00407 return new R600Packetizer(tm); 00408 }