LLVM API Documentation
00001 //===--------------------- R600MergeVectorRegisters.cpp -------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 /// \file 00011 /// This pass merges inputs of swizzeable instructions into vector sharing 00012 /// common data and/or have enough undef subreg using swizzle abilities. 00013 /// 00014 /// For instance let's consider the following pseudo code : 00015 /// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 00016 /// ... 00017 /// vreg7<def> = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2, vreg4, sub3 00018 /// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3 00019 /// 00020 /// is turned into : 00021 /// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 00022 /// ... 00023 /// vreg7<def> = INSERT_SUBREG vreg4, sub3 00024 /// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3 00025 /// 00026 /// This allow regalloc to reduce register pressure for vector registers and 00027 /// to reduce MOV count. 00028 //===----------------------------------------------------------------------===// 00029 00030 #include "llvm/Support/Debug.h" 00031 #include "AMDGPU.h" 00032 #include "R600InstrInfo.h" 00033 #include "AMDGPUSubtarget.h" 00034 #include "llvm/CodeGen/DFAPacketizer.h" 00035 #include "llvm/CodeGen/MachineDominators.h" 00036 #include "llvm/CodeGen/MachineFunctionPass.h" 00037 #include "llvm/CodeGen/MachineInstrBuilder.h" 00038 #include "llvm/CodeGen/MachineLoopInfo.h" 00039 #include "llvm/CodeGen/MachineRegisterInfo.h" 00040 #include "llvm/CodeGen/Passes.h" 00041 #include "llvm/Support/raw_ostream.h" 00042 00043 using namespace llvm; 00044 00045 #define DEBUG_TYPE "vec-merger" 00046 00047 namespace { 00048 00049 static bool 00050 isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) { 00051 for (MachineRegisterInfo::def_instr_iterator It = MRI.def_instr_begin(Reg), 00052 E = MRI.def_instr_end(); It != E; ++It) { 00053 return (*It).isImplicitDef(); 00054 } 00055 if (MRI.isReserved(Reg)) { 00056 return false; 00057 } 00058 llvm_unreachable("Reg without a def"); 00059 return false; 00060 } 00061 00062 class RegSeqInfo { 00063 public: 00064 MachineInstr *Instr; 00065 DenseMap<unsigned, unsigned> RegToChan; 00066 std::vector<unsigned> UndefReg; 00067 RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { 00068 assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE); 00069 for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { 00070 MachineOperand &MO = Instr->getOperand(i); 00071 unsigned Chan = Instr->getOperand(i + 1).getImm(); 00072 if (isImplicitlyDef(MRI, MO.getReg())) 00073 UndefReg.push_back(Chan); 00074 else 00075 RegToChan[MO.getReg()] = Chan; 00076 } 00077 } 00078 RegSeqInfo() {} 00079 00080 bool operator==(const RegSeqInfo &RSI) const { 00081 return RSI.Instr == Instr; 00082 } 00083 }; 00084 00085 class R600VectorRegMerger : public MachineFunctionPass { 00086 private: 00087 MachineRegisterInfo *MRI; 00088 const R600InstrInfo *TII; 00089 bool canSwizzle(const MachineInstr &) const; 00090 bool areAllUsesSwizzeable(unsigned Reg) const; 00091 void SwizzleInput(MachineInstr &, 00092 const std::vector<std::pair<unsigned, unsigned> > &) const; 00093 bool tryMergeVector(const RegSeqInfo *, RegSeqInfo *, 00094 std::vector<std::pair<unsigned, unsigned> > &Remap) const; 00095 bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 00096 std::vector<std::pair<unsigned, unsigned> > &RemapChan); 00097 bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 00098 std::vector<std::pair<unsigned, unsigned> > &RemapChan); 00099 MachineInstr *RebuildVector(RegSeqInfo *MI, 00100 const RegSeqInfo *BaseVec, 00101 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const; 00102 void RemoveMI(MachineInstr *); 00103 void trackRSI(const RegSeqInfo &RSI); 00104 00105 typedef DenseMap<unsigned, std::vector<MachineInstr *> > InstructionSetMap; 00106 DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq; 00107 InstructionSetMap PreviousRegSeqByReg; 00108 InstructionSetMap PreviousRegSeqByUndefCount; 00109 public: 00110 static char ID; 00111 R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID), 00112 TII(nullptr) { } 00113 00114 void getAnalysisUsage(AnalysisUsage &AU) const override { 00115 AU.setPreservesCFG(); 00116 AU.addRequired<MachineDominatorTree>(); 00117 AU.addPreserved<MachineDominatorTree>(); 00118 AU.addRequired<MachineLoopInfo>(); 00119 AU.addPreserved<MachineLoopInfo>(); 00120 MachineFunctionPass::getAnalysisUsage(AU); 00121 } 00122 00123 const char *getPassName() const override { 00124 return "R600 Vector Registers Merge Pass"; 00125 } 00126 00127 bool runOnMachineFunction(MachineFunction &Fn) override; 00128 }; 00129 00130 char R600VectorRegMerger::ID = 0; 00131 00132 bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI) 00133 const { 00134 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 00135 return true; 00136 switch (MI.getOpcode()) { 00137 case AMDGPU::R600_ExportSwz: 00138 case AMDGPU::EG_ExportSwz: 00139 return true; 00140 default: 00141 return false; 00142 } 00143 } 00144 00145 bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched, 00146 RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned> > &Remap) 00147 const { 00148 unsigned CurrentUndexIdx = 0; 00149 for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(), 00150 E = ToMerge->RegToChan.end(); It != E; ++It) { 00151 DenseMap<unsigned, unsigned>::const_iterator PosInUntouched = 00152 Untouched->RegToChan.find((*It).first); 00153 if (PosInUntouched != Untouched->RegToChan.end()) { 00154 Remap.push_back(std::pair<unsigned, unsigned> 00155 ((*It).second, (*PosInUntouched).second)); 00156 continue; 00157 } 00158 if (CurrentUndexIdx >= Untouched->UndefReg.size()) 00159 return false; 00160 Remap.push_back(std::pair<unsigned, unsigned> 00161 ((*It).second, Untouched->UndefReg[CurrentUndexIdx++])); 00162 } 00163 00164 return true; 00165 } 00166 00167 static 00168 unsigned getReassignedChan( 00169 const std::vector<std::pair<unsigned, unsigned> > &RemapChan, 00170 unsigned Chan) { 00171 for (unsigned j = 0, je = RemapChan.size(); j < je; j++) { 00172 if (RemapChan[j].first == Chan) 00173 return RemapChan[j].second; 00174 } 00175 llvm_unreachable("Chan wasn't reassigned"); 00176 } 00177 00178 MachineInstr *R600VectorRegMerger::RebuildVector( 00179 RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, 00180 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { 00181 unsigned Reg = RSI->Instr->getOperand(0).getReg(); 00182 MachineBasicBlock::iterator Pos = RSI->Instr; 00183 MachineBasicBlock &MBB = *Pos->getParent(); 00184 DebugLoc DL = Pos->getDebugLoc(); 00185 00186 unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg(); 00187 DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan; 00188 std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg; 00189 for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(), 00190 E = RSI->RegToChan.end(); It != E; ++It) { 00191 unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 00192 unsigned SubReg = (*It).first; 00193 unsigned Swizzle = (*It).second; 00194 unsigned Chan = getReassignedChan(RemapChan, Swizzle); 00195 00196 MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG), 00197 DstReg) 00198 .addReg(SrcVec) 00199 .addReg(SubReg) 00200 .addImm(Chan); 00201 UpdatedRegToChan[SubReg] = Chan; 00202 std::vector<unsigned>::iterator ChanPos = 00203 std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan); 00204 if (ChanPos != UpdatedUndef.end()) 00205 UpdatedUndef.erase(ChanPos); 00206 assert(std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan) == 00207 UpdatedUndef.end() && 00208 "UpdatedUndef shouldn't contain Chan more than once!"); 00209 DEBUG(dbgs() << " ->"; Tmp->dump();); 00210 (void)Tmp; 00211 SrcVec = DstReg; 00212 } 00213 Pos = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg) 00214 .addReg(SrcVec); 00215 DEBUG(dbgs() << " ->"; Pos->dump();); 00216 00217 DEBUG(dbgs() << " Updating Swizzle:\n"); 00218 for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), 00219 E = MRI->use_instr_end(); It != E; ++It) { 00220 DEBUG(dbgs() << " ";(*It).dump(); dbgs() << " ->"); 00221 SwizzleInput(*It, RemapChan); 00222 DEBUG((*It).dump()); 00223 } 00224 RSI->Instr->eraseFromParent(); 00225 00226 // Update RSI 00227 RSI->Instr = Pos; 00228 RSI->RegToChan = UpdatedRegToChan; 00229 RSI->UndefReg = UpdatedUndef; 00230 00231 return Pos; 00232 } 00233 00234 void R600VectorRegMerger::RemoveMI(MachineInstr *MI) { 00235 for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(), 00236 E = PreviousRegSeqByReg.end(); It != E; ++It) { 00237 std::vector<MachineInstr *> &MIs = (*It).second; 00238 MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); 00239 } 00240 for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(), 00241 E = PreviousRegSeqByUndefCount.end(); It != E; ++It) { 00242 std::vector<MachineInstr *> &MIs = (*It).second; 00243 MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); 00244 } 00245 } 00246 00247 void R600VectorRegMerger::SwizzleInput(MachineInstr &MI, 00248 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { 00249 unsigned Offset; 00250 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 00251 Offset = 2; 00252 else 00253 Offset = 3; 00254 for (unsigned i = 0; i < 4; i++) { 00255 unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1; 00256 for (unsigned j = 0, e = RemapChan.size(); j < e; j++) { 00257 if (RemapChan[j].first == Swizzle) { 00258 MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1); 00259 break; 00260 } 00261 } 00262 } 00263 } 00264 00265 bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const { 00266 for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), 00267 E = MRI->use_instr_end(); It != E; ++It) { 00268 if (!canSwizzle(*It)) 00269 return false; 00270 } 00271 return true; 00272 } 00273 00274 bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI, 00275 RegSeqInfo &CompatibleRSI, 00276 std::vector<std::pair<unsigned, unsigned> > &RemapChan) { 00277 for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(), 00278 MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) { 00279 if (!MOp->isReg()) 00280 continue; 00281 if (PreviousRegSeqByReg[MOp->getReg()].empty()) 00282 continue; 00283 std::vector<MachineInstr *> MIs = PreviousRegSeqByReg[MOp->getReg()]; 00284 for (unsigned i = 0, e = MIs.size(); i < e; i++) { 00285 CompatibleRSI = PreviousRegSeq[MIs[i]]; 00286 if (RSI == CompatibleRSI) 00287 continue; 00288 if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan)) 00289 return true; 00290 } 00291 } 00292 return false; 00293 } 00294 00295 bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI, 00296 RegSeqInfo &CompatibleRSI, 00297 std::vector<std::pair<unsigned, unsigned> > &RemapChan) { 00298 unsigned NeededUndefs = 4 - RSI.UndefReg.size(); 00299 if (PreviousRegSeqByUndefCount[NeededUndefs].empty()) 00300 return false; 00301 std::vector<MachineInstr *> &MIs = 00302 PreviousRegSeqByUndefCount[NeededUndefs]; 00303 CompatibleRSI = PreviousRegSeq[MIs.back()]; 00304 tryMergeVector(&CompatibleRSI, &RSI, RemapChan); 00305 return true; 00306 } 00307 00308 void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) { 00309 for (DenseMap<unsigned, unsigned>::const_iterator 00310 It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) { 00311 PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr); 00312 } 00313 PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr); 00314 PreviousRegSeq[RSI.Instr] = RSI; 00315 } 00316 00317 bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { 00318 TII = static_cast<const R600InstrInfo *>(Fn.getSubtarget().getInstrInfo()); 00319 MRI = &(Fn.getRegInfo()); 00320 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 00321 MBB != MBBe; ++MBB) { 00322 MachineBasicBlock *MB = MBB; 00323 PreviousRegSeq.clear(); 00324 PreviousRegSeqByReg.clear(); 00325 PreviousRegSeqByUndefCount.clear(); 00326 00327 for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end(); 00328 MII != MIIE; ++MII) { 00329 MachineInstr *MI = MII; 00330 if (MI->getOpcode() != AMDGPU::REG_SEQUENCE) { 00331 if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TEX_INST) { 00332 unsigned Reg = MI->getOperand(1).getReg(); 00333 for (MachineRegisterInfo::def_instr_iterator 00334 It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end(); 00335 It != E; ++It) { 00336 RemoveMI(&(*It)); 00337 } 00338 } 00339 continue; 00340 } 00341 00342 00343 RegSeqInfo RSI(*MRI, MI); 00344 00345 // All uses of MI are swizzeable ? 00346 unsigned Reg = MI->getOperand(0).getReg(); 00347 if (!areAllUsesSwizzeable(Reg)) 00348 continue; 00349 00350 DEBUG (dbgs() << "Trying to optimize "; 00351 MI->dump(); 00352 ); 00353 00354 RegSeqInfo CandidateRSI; 00355 std::vector<std::pair<unsigned, unsigned> > RemapChan; 00356 DEBUG(dbgs() << "Using common slots...\n";); 00357 if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) { 00358 // Remove CandidateRSI mapping 00359 RemoveMI(CandidateRSI.Instr); 00360 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 00361 trackRSI(RSI); 00362 continue; 00363 } 00364 DEBUG(dbgs() << "Using free slots...\n";); 00365 RemapChan.clear(); 00366 if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) { 00367 RemoveMI(CandidateRSI.Instr); 00368 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 00369 trackRSI(RSI); 00370 continue; 00371 } 00372 //Failed to merge 00373 trackRSI(RSI); 00374 } 00375 } 00376 return false; 00377 } 00378 00379 } 00380 00381 llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) { 00382 return new R600VectorRegMerger(tm); 00383 }