LLVM API Documentation
00001 //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 /// The pass tries to use the 32-bit encoding for instructions when possible. 00009 //===----------------------------------------------------------------------===// 00010 // 00011 00012 #include "AMDGPU.h" 00013 #include "AMDGPUSubtarget.h" 00014 #include "SIInstrInfo.h" 00015 #include "llvm/ADT/Statistic.h" 00016 #include "llvm/CodeGen/MachineFunctionPass.h" 00017 #include "llvm/CodeGen/MachineInstrBuilder.h" 00018 #include "llvm/CodeGen/MachineRegisterInfo.h" 00019 #include "llvm/IR/Constants.h" 00020 #include "llvm/IR/LLVMContext.h" 00021 #include "llvm/IR/Function.h" 00022 #include "llvm/Support/Debug.h" 00023 #include "llvm/Target/TargetMachine.h" 00024 00025 #define DEBUG_TYPE "si-shrink-instructions" 00026 00027 STATISTIC(NumInstructionsShrunk, 00028 "Number of 64-bit instruction reduced to 32-bit."); 00029 STATISTIC(NumLiteralConstantsFolded, 00030 "Number of literal constants folded into 32-bit instructions."); 00031 00032 namespace llvm { 00033 void initializeSIShrinkInstructionsPass(PassRegistry&); 00034 } 00035 00036 using namespace llvm; 00037 00038 namespace { 00039 00040 class SIShrinkInstructions : public MachineFunctionPass { 00041 public: 00042 static char ID; 00043 00044 public: 00045 SIShrinkInstructions() : MachineFunctionPass(ID) { 00046 } 00047 00048 bool runOnMachineFunction(MachineFunction &MF) override; 00049 00050 const char *getPassName() const override { 00051 return "SI Shrink Instructions"; 00052 } 00053 00054 void getAnalysisUsage(AnalysisUsage &AU) const override { 00055 AU.setPreservesCFG(); 00056 MachineFunctionPass::getAnalysisUsage(AU); 00057 } 00058 }; 00059 00060 } // End anonymous namespace. 00061 00062 INITIALIZE_PASS_BEGIN(SIShrinkInstructions, DEBUG_TYPE, 00063 "SI Lower il Copies", false, false) 00064 INITIALIZE_PASS_END(SIShrinkInstructions, DEBUG_TYPE, 00065 "SI Lower il Copies", false, false) 00066 00067 char SIShrinkInstructions::ID = 0; 00068 00069 FunctionPass *llvm::createSIShrinkInstructionsPass() { 00070 return new SIShrinkInstructions(); 00071 } 00072 00073 static bool isVGPR(const MachineOperand *MO, const SIRegisterInfo &TRI, 00074 const MachineRegisterInfo &MRI) { 00075 if (!MO->isReg()) 00076 return false; 00077 00078 if (TargetRegisterInfo::isVirtualRegister(MO->getReg())) 00079 return TRI.hasVGPRs(MRI.getRegClass(MO->getReg())); 00080 00081 return TRI.hasVGPRs(TRI.getPhysRegClass(MO->getReg())); 00082 } 00083 00084 static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, 00085 const SIRegisterInfo &TRI, 00086 const MachineRegisterInfo &MRI) { 00087 00088 const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); 00089 // Can't shrink instruction with three operands. 00090 if (Src2) 00091 return false; 00092 00093 const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); 00094 const MachineOperand *Src1Mod = 00095 TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); 00096 00097 if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0))) 00098 return false; 00099 00100 // We don't need to check src0, all input types are legal, so just make 00101 // sure src0 isn't using any modifiers. 00102 const MachineOperand *Src0Mod = 00103 TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); 00104 if (Src0Mod && Src0Mod->getImm() != 0) 00105 return false; 00106 00107 // Check output modifiers 00108 const MachineOperand *Omod = TII->getNamedOperand(MI, AMDGPU::OpName::omod); 00109 if (Omod && Omod->getImm() != 0) 00110 return false; 00111 00112 const MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp); 00113 return !Clamp || Clamp->getImm() == 0; 00114 } 00115 00116 /// \brief This function checks \p MI for operands defined by a move immediate 00117 /// instruction and then folds the literal constant into the instruction if it 00118 /// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instruction 00119 /// and will only fold literal constants if we are still in SSA. 00120 static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, 00121 MachineRegisterInfo &MRI, bool TryToCommute = true) { 00122 00123 if (!MRI.isSSA()) 00124 return; 00125 00126 assert(TII->isVOP1(MI.getOpcode()) || TII->isVOP2(MI.getOpcode()) || 00127 TII->isVOPC(MI.getOpcode())); 00128 00129 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 00130 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); 00131 00132 // Only one literal constant is allowed per instruction, so if src0 is a 00133 // literal constant then we can't do any folding. 00134 if (Src0->isImm() && TII->isLiteralConstant(*Src0)) 00135 return; 00136 00137 00138 // Literal constants and SGPRs can only be used in Src0, so if Src0 is an 00139 // SGPR, we cannot commute the instruction, so we can't fold any literal 00140 // constants. 00141 if (Src0->isReg() && !isVGPR(Src0, TRI, MRI)) 00142 return; 00143 00144 // Try to fold Src0 00145 if (Src0->isReg()) { 00146 unsigned Reg = Src0->getReg(); 00147 MachineInstr *Def = MRI.getUniqueVRegDef(Reg); 00148 if (Def && Def->isMoveImmediate()) { 00149 MachineOperand &MovSrc = Def->getOperand(1); 00150 bool ConstantFolded = false; 00151 00152 if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) { 00153 Src0->ChangeToImmediate(MovSrc.getImm()); 00154 ConstantFolded = true; 00155 } else if (MovSrc.isFPImm()) { 00156 const APFloat &APF = MovSrc.getFPImm()->getValueAPF(); 00157 if (&APF.getSemantics() == &APFloat::IEEEsingle) { 00158 MRI.removeRegOperandFromUseList(Src0); 00159 Src0->ChangeToImmediate(APF.bitcastToAPInt().getZExtValue()); 00160 ConstantFolded = true; 00161 } 00162 } 00163 if (ConstantFolded) { 00164 if (MRI.use_empty(Reg)) 00165 Def->eraseFromParent(); 00166 ++NumLiteralConstantsFolded; 00167 return; 00168 } 00169 } 00170 } 00171 00172 // We have failed to fold src0, so commute the instruction and try again. 00173 if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(&MI)) 00174 foldImmediates(MI, TII, MRI, false); 00175 00176 } 00177 00178 bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { 00179 MachineRegisterInfo &MRI = MF.getRegInfo(); 00180 const SIInstrInfo *TII = 00181 static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); 00182 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 00183 std::vector<unsigned> I1Defs; 00184 00185 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 00186 BI != BE; ++BI) { 00187 00188 MachineBasicBlock &MBB = *BI; 00189 MachineBasicBlock::iterator I, Next; 00190 for (I = MBB.begin(); I != MBB.end(); I = Next) { 00191 Next = std::next(I); 00192 MachineInstr &MI = *I; 00193 00194 if (!TII->hasVALU32BitEncoding(MI.getOpcode())) 00195 continue; 00196 00197 if (!canShrink(MI, TII, TRI, MRI)) { 00198 // Try commuting the instruction and see if that enables us to shrink 00199 // it. 00200 if (!MI.isCommutable() || !TII->commuteInstruction(&MI) || 00201 !canShrink(MI, TII, TRI, MRI)) 00202 continue; 00203 } 00204 00205 int Op32 = AMDGPU::getVOPe32(MI.getOpcode()); 00206 00207 // Op32 could be -1 here if we started with an instruction that had a 00208 // a 32-bit encoding and then commuted it to an instruction that did not. 00209 if (Op32 == -1) 00210 continue; 00211 00212 if (TII->isVOPC(Op32)) { 00213 unsigned DstReg = MI.getOperand(0).getReg(); 00214 if (TargetRegisterInfo::isVirtualRegister(DstReg)) { 00215 // VOPC instructions can only write to the VCC register. We can't 00216 // force them to use VCC here, because the register allocator has 00217 // trouble with sequences like this, which cause the allocator to run 00218 // out of registers if vreg0 and vreg1 belong to the VCCReg register 00219 // class: 00220 // vreg0 = VOPC; 00221 // vreg1 = VOPC; 00222 // S_AND_B64 vreg0, vreg1 00223 // 00224 // So, instead of forcing the instruction to write to VCC, we provide a 00225 // hint to the register allocator to use VCC and then we 00226 // we will run this pass again after RA and shrink it if it outpus to 00227 // VCC. 00228 MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC); 00229 continue; 00230 } 00231 if (DstReg != AMDGPU::VCC) 00232 continue; 00233 } 00234 00235 // We can shrink this instruction 00236 DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';); 00237 00238 MachineInstrBuilder Inst32 = 00239 BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32)); 00240 00241 // dst 00242 Inst32.addOperand(MI.getOperand(0)); 00243 00244 Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0)); 00245 00246 const MachineOperand *Src1 = 00247 TII->getNamedOperand(MI, AMDGPU::OpName::src1); 00248 if (Src1) 00249 Inst32.addOperand(*Src1); 00250 00251 ++NumInstructionsShrunk; 00252 MI.eraseFromParent(); 00253 00254 foldImmediates(*Inst32, TII, MRI); 00255 DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n'); 00256 00257 00258 } 00259 } 00260 return false; 00261 }