LLVM API Documentation
00001 //===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 /// \file 00011 /// Copies from VGPR to SGPR registers are illegal and the register coalescer 00012 /// will sometimes generate these illegal copies in situations like this: 00013 /// 00014 /// Register Class <vsrc> is the union of <vgpr> and <sgpr> 00015 /// 00016 /// BB0: 00017 /// %vreg0 <sgpr> = SCALAR_INST 00018 /// %vreg1 <vsrc> = COPY %vreg0 <sgpr> 00019 /// ... 00020 /// BRANCH %cond BB1, BB2 00021 /// BB1: 00022 /// %vreg2 <vgpr> = VECTOR_INST 00023 /// %vreg3 <vsrc> = COPY %vreg2 <vgpr> 00024 /// BB2: 00025 /// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1> 00026 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc> 00027 /// 00028 /// 00029 /// The coalescer will begin at BB0 and eliminate its copy, then the resulting 00030 /// code will look like this: 00031 /// 00032 /// BB0: 00033 /// %vreg0 <sgpr> = SCALAR_INST 00034 /// ... 00035 /// BRANCH %cond BB1, BB2 00036 /// BB1: 00037 /// %vreg2 <vgpr> = VECTOR_INST 00038 /// %vreg3 <vsrc> = COPY %vreg2 <vgpr> 00039 /// BB2: 00040 /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1> 00041 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> 00042 /// 00043 /// Now that the result of the PHI instruction is an SGPR, the register 00044 /// allocator is now forced to constrain the register class of %vreg3 to 00045 /// <sgpr> so we end up with final code like this: 00046 /// 00047 /// BB0: 00048 /// %vreg0 <sgpr> = SCALAR_INST 00049 /// ... 00050 /// BRANCH %cond BB1, BB2 00051 /// BB1: 00052 /// %vreg2 <vgpr> = VECTOR_INST 00053 /// %vreg3 <sgpr> = COPY %vreg2 <vgpr> 00054 /// BB2: 00055 /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1> 00056 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> 00057 /// 00058 /// Now this code contains an illegal copy from a VGPR to an SGPR. 00059 /// 00060 /// In order to avoid this problem, this pass searches for PHI instructions 00061 /// which define a <vsrc> register and constrains its definition class to 00062 /// <vgpr> if the user of the PHI's definition register is a vector instruction. 00063 /// If the PHI's definition class is constrained to <vgpr> then the coalescer 00064 /// will be unable to perform the COPY removal from the above example which 00065 /// ultimately led to the creation of an illegal COPY. 00066 //===----------------------------------------------------------------------===// 00067 00068 #include "AMDGPU.h" 00069 #include "AMDGPUSubtarget.h" 00070 #include "SIInstrInfo.h" 00071 #include "llvm/CodeGen/MachineFunctionPass.h" 00072 #include "llvm/CodeGen/MachineInstrBuilder.h" 00073 #include "llvm/CodeGen/MachineRegisterInfo.h" 00074 #include "llvm/Support/Debug.h" 00075 #include "llvm/Support/raw_ostream.h" 00076 #include "llvm/Target/TargetMachine.h" 00077 00078 using namespace llvm; 00079 00080 #define DEBUG_TYPE "sgpr-copies" 00081 00082 namespace { 00083 00084 class SIFixSGPRCopies : public MachineFunctionPass { 00085 00086 private: 00087 static char ID; 00088 const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI, 00089 const MachineRegisterInfo &MRI, 00090 unsigned Reg, 00091 unsigned SubReg) const; 00092 const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI, 00093 const MachineRegisterInfo &MRI, 00094 unsigned Reg, 00095 unsigned SubReg) const; 00096 bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI, 00097 const MachineRegisterInfo &MRI) const; 00098 00099 public: 00100 SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { } 00101 00102 bool runOnMachineFunction(MachineFunction &MF) override; 00103 00104 const char *getPassName() const override { 00105 return "SI Fix SGPR copies"; 00106 } 00107 00108 }; 00109 00110 } // End anonymous namespace 00111 00112 char SIFixSGPRCopies::ID = 0; 00113 00114 FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) { 00115 return new SIFixSGPRCopies(tm); 00116 } 00117 00118 static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) { 00119 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 00120 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 00121 if (!MI.getOperand(i).isReg() || 00122 !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) 00123 continue; 00124 00125 if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg()))) 00126 return true; 00127 } 00128 return false; 00129 } 00130 00131 /// This functions walks the use list of Reg until it finds an Instruction 00132 /// that isn't a COPY returns the register class of that instruction. 00133 /// \return The register defined by the first non-COPY instruction. 00134 const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses( 00135 const SIRegisterInfo *TRI, 00136 const MachineRegisterInfo &MRI, 00137 unsigned Reg, 00138 unsigned SubReg) const { 00139 // The Reg parameter to the function must always be defined by either a PHI 00140 // or a COPY, therefore it cannot be a physical register. 00141 assert(TargetRegisterInfo::isVirtualRegister(Reg) && 00142 "Reg cannot be a physical register"); 00143 00144 const TargetRegisterClass *RC = MRI.getRegClass(Reg); 00145 RC = TRI->getSubRegClass(RC, SubReg); 00146 for (MachineRegisterInfo::use_instr_iterator 00147 I = MRI.use_instr_begin(Reg), E = MRI.use_instr_end(); I != E; ++I) { 00148 switch (I->getOpcode()) { 00149 case AMDGPU::COPY: 00150 RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI, 00151 I->getOperand(0).getReg(), 00152 I->getOperand(0).getSubReg())); 00153 break; 00154 } 00155 } 00156 00157 return RC; 00158 } 00159 00160 const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef( 00161 const SIRegisterInfo *TRI, 00162 const MachineRegisterInfo &MRI, 00163 unsigned Reg, 00164 unsigned SubReg) const { 00165 if (!TargetRegisterInfo::isVirtualRegister(Reg)) { 00166 const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg); 00167 return TRI->getSubRegClass(RC, SubReg); 00168 } 00169 MachineInstr *Def = MRI.getVRegDef(Reg); 00170 if (Def->getOpcode() != AMDGPU::COPY) { 00171 return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg); 00172 } 00173 00174 return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(), 00175 Def->getOperand(1).getSubReg()); 00176 } 00177 00178 bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy, 00179 const SIRegisterInfo *TRI, 00180 const MachineRegisterInfo &MRI) const { 00181 00182 unsigned DstReg = Copy.getOperand(0).getReg(); 00183 unsigned SrcReg = Copy.getOperand(1).getReg(); 00184 unsigned SrcSubReg = Copy.getOperand(1).getSubReg(); 00185 const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg); 00186 const TargetRegisterClass *SrcRC; 00187 00188 if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || 00189 DstRC == &AMDGPU::M0RegRegClass || 00190 MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass) 00191 return false; 00192 00193 SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg); 00194 return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC); 00195 } 00196 00197 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { 00198 MachineRegisterInfo &MRI = MF.getRegInfo(); 00199 const SIRegisterInfo *TRI = 00200 static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); 00201 const SIInstrInfo *TII = 00202 static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); 00203 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 00204 BI != BE; ++BI) { 00205 00206 MachineBasicBlock &MBB = *BI; 00207 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 00208 I != E; ++I) { 00209 MachineInstr &MI = *I; 00210 if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) { 00211 DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n"); 00212 DEBUG(MI.print(dbgs())); 00213 TII->moveToVALU(MI); 00214 00215 } 00216 00217 switch (MI.getOpcode()) { 00218 default: continue; 00219 case AMDGPU::PHI: { 00220 DEBUG(dbgs() << " Fixing PHI:\n"); 00221 DEBUG(MI.print(dbgs())); 00222 00223 for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { 00224 unsigned Reg = MI.getOperand(i).getReg(); 00225 const TargetRegisterClass *RC = inferRegClassFromDef(TRI, MRI, Reg, 00226 MI.getOperand(0).getSubReg()); 00227 MRI.constrainRegClass(Reg, RC); 00228 } 00229 unsigned Reg = MI.getOperand(0).getReg(); 00230 const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg, 00231 MI.getOperand(0).getSubReg()); 00232 if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) { 00233 MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass); 00234 } 00235 00236 if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) 00237 break; 00238 00239 // If a PHI node defines an SGPR and any of its operands are VGPRs, 00240 // then we need to move it to the VALU. 00241 for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { 00242 unsigned Reg = MI.getOperand(i).getReg(); 00243 if (TRI->hasVGPRs(MRI.getRegClass(Reg))) { 00244 TII->moveToVALU(MI); 00245 break; 00246 } 00247 } 00248 00249 break; 00250 } 00251 case AMDGPU::REG_SEQUENCE: { 00252 if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) || 00253 !hasVGPROperands(MI, TRI)) 00254 continue; 00255 00256 DEBUG(dbgs() << "Fixing REG_SEQUENCE:\n"); 00257 DEBUG(MI.print(dbgs())); 00258 00259 TII->moveToVALU(MI); 00260 break; 00261 } 00262 case AMDGPU::INSERT_SUBREG: { 00263 const TargetRegisterClass *DstRC, *Src0RC, *Src1RC; 00264 DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); 00265 Src0RC = MRI.getRegClass(MI.getOperand(1).getReg()); 00266 Src1RC = MRI.getRegClass(MI.getOperand(2).getReg()); 00267 if (TRI->isSGPRClass(DstRC) && 00268 (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) { 00269 DEBUG(dbgs() << " Fixing INSERT_SUBREG:\n"); 00270 DEBUG(MI.print(dbgs())); 00271 TII->moveToVALU(MI); 00272 } 00273 break; 00274 } 00275 } 00276 } 00277 } 00278 return false; 00279 }