LLVM API Documentation
00001 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 /// \file 00011 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer. 00012 /// This pass is merging consecutive CFAlus where applicable. 00013 /// It needs to be called after IfCvt for best results. 00014 //===----------------------------------------------------------------------===// 00015 00016 #include "AMDGPU.h" 00017 #include "R600Defines.h" 00018 #include "R600InstrInfo.h" 00019 #include "R600MachineFunctionInfo.h" 00020 #include "R600RegisterInfo.h" 00021 #include "AMDGPUSubtarget.h" 00022 #include "llvm/CodeGen/MachineFunctionPass.h" 00023 #include "llvm/CodeGen/MachineInstrBuilder.h" 00024 #include "llvm/CodeGen/MachineRegisterInfo.h" 00025 #include "llvm/Support/Debug.h" 00026 #include "llvm/Support/raw_ostream.h" 00027 00028 using namespace llvm; 00029 00030 #define DEBUG_TYPE "r600mergeclause" 00031 00032 namespace { 00033 00034 static bool isCFAlu(const MachineInstr *MI) { 00035 switch (MI->getOpcode()) { 00036 case AMDGPU::CF_ALU: 00037 case AMDGPU::CF_ALU_PUSH_BEFORE: 00038 return true; 00039 default: 00040 return false; 00041 } 00042 } 00043 00044 class R600ClauseMergePass : public MachineFunctionPass { 00045 00046 private: 00047 static char ID; 00048 const R600InstrInfo *TII; 00049 00050 unsigned getCFAluSize(const MachineInstr *MI) const; 00051 bool isCFAluEnabled(const MachineInstr *MI) const; 00052 00053 /// IfCvt pass can generate "disabled" ALU clause marker that need to be 00054 /// removed and their content affected to the previous alu clause. 00055 /// This function parse instructions after CFAlu until it find a disabled 00056 /// CFAlu and merge the content, or an enabled CFAlu. 00057 void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const; 00058 00059 /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if 00060 /// it is the case. 00061 bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu) 00062 const; 00063 00064 public: 00065 R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { } 00066 00067 bool runOnMachineFunction(MachineFunction &MF) override; 00068 00069 const char *getPassName() const override; 00070 }; 00071 00072 char R600ClauseMergePass::ID = 0; 00073 00074 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const { 00075 assert(isCFAlu(MI)); 00076 return MI->getOperand( 00077 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm(); 00078 } 00079 00080 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const { 00081 assert(isCFAlu(MI)); 00082 return MI->getOperand( 00083 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm(); 00084 } 00085 00086 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) 00087 const { 00088 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); 00089 MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end(); 00090 I++; 00091 do { 00092 while (I!= E && !isCFAlu(I)) 00093 I++; 00094 if (I == E) 00095 return; 00096 MachineInstr *MI = I++; 00097 if (isCFAluEnabled(MI)) 00098 break; 00099 CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI)); 00100 MI->eraseFromParent(); 00101 } while (I != E); 00102 } 00103 00104 bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu, 00105 const MachineInstr *LatrCFAlu) const { 00106 assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu)); 00107 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); 00108 unsigned RootInstCount = getCFAluSize(RootCFAlu), 00109 LaterInstCount = getCFAluSize(LatrCFAlu); 00110 unsigned CumuledInsts = RootInstCount + LaterInstCount; 00111 if (CumuledInsts >= TII->getMaxAlusPerClause()) { 00112 DEBUG(dbgs() << "Excess inst counts\n"); 00113 return false; 00114 } 00115 if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) 00116 return false; 00117 // Is KCache Bank 0 compatible ? 00118 int Mode0Idx = 00119 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0); 00120 int KBank0Idx = 00121 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0); 00122 int KBank0LineIdx = 00123 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0); 00124 if (LatrCFAlu->getOperand(Mode0Idx).getImm() && 00125 RootCFAlu->getOperand(Mode0Idx).getImm() && 00126 (LatrCFAlu->getOperand(KBank0Idx).getImm() != 00127 RootCFAlu->getOperand(KBank0Idx).getImm() || 00128 LatrCFAlu->getOperand(KBank0LineIdx).getImm() != 00129 RootCFAlu->getOperand(KBank0LineIdx).getImm())) { 00130 DEBUG(dbgs() << "Wrong KC0\n"); 00131 return false; 00132 } 00133 // Is KCache Bank 1 compatible ? 00134 int Mode1Idx = 00135 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1); 00136 int KBank1Idx = 00137 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1); 00138 int KBank1LineIdx = 00139 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1); 00140 if (LatrCFAlu->getOperand(Mode1Idx).getImm() && 00141 RootCFAlu->getOperand(Mode1Idx).getImm() && 00142 (LatrCFAlu->getOperand(KBank1Idx).getImm() != 00143 RootCFAlu->getOperand(KBank1Idx).getImm() || 00144 LatrCFAlu->getOperand(KBank1LineIdx).getImm() != 00145 RootCFAlu->getOperand(KBank1LineIdx).getImm())) { 00146 DEBUG(dbgs() << "Wrong KC0\n"); 00147 return false; 00148 } 00149 if (LatrCFAlu->getOperand(Mode0Idx).getImm()) { 00150 RootCFAlu->getOperand(Mode0Idx).setImm( 00151 LatrCFAlu->getOperand(Mode0Idx).getImm()); 00152 RootCFAlu->getOperand(KBank0Idx).setImm( 00153 LatrCFAlu->getOperand(KBank0Idx).getImm()); 00154 RootCFAlu->getOperand(KBank0LineIdx).setImm( 00155 LatrCFAlu->getOperand(KBank0LineIdx).getImm()); 00156 } 00157 if (LatrCFAlu->getOperand(Mode1Idx).getImm()) { 00158 RootCFAlu->getOperand(Mode1Idx).setImm( 00159 LatrCFAlu->getOperand(Mode1Idx).getImm()); 00160 RootCFAlu->getOperand(KBank1Idx).setImm( 00161 LatrCFAlu->getOperand(KBank1Idx).getImm()); 00162 RootCFAlu->getOperand(KBank1LineIdx).setImm( 00163 LatrCFAlu->getOperand(KBank1LineIdx).getImm()); 00164 } 00165 RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts); 00166 RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode())); 00167 return true; 00168 } 00169 00170 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) { 00171 TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo()); 00172 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 00173 BB != BB_E; ++BB) { 00174 MachineBasicBlock &MBB = *BB; 00175 MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 00176 MachineBasicBlock::iterator LatestCFAlu = E; 00177 while (I != E) { 00178 MachineInstr *MI = I++; 00179 if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) || 00180 TII->mustBeLastInClause(MI->getOpcode())) 00181 LatestCFAlu = E; 00182 if (!isCFAlu(MI)) 00183 continue; 00184 cleanPotentialDisabledCFAlu(MI); 00185 00186 if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) { 00187 MI->eraseFromParent(); 00188 } else { 00189 assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled"); 00190 LatestCFAlu = MI; 00191 } 00192 } 00193 } 00194 return false; 00195 } 00196 00197 const char *R600ClauseMergePass::getPassName() const { 00198 return "R600 Merge Clause Markers Pass"; 00199 } 00200 00201 } // end anonymous namespace 00202 00203 00204 llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) { 00205 return new R600ClauseMergePass(TM); 00206 }