LLVM API Documentation

R600ClauseMergePass.cpp
Go to the documentation of this file.
00001 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 /// \file
00011 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
00012 /// This pass is merging consecutive CFAlus where applicable.
00013 /// It needs to be called after IfCvt for best results.
00014 //===----------------------------------------------------------------------===//
00015 
00016 #include "AMDGPU.h"
00017 #include "R600Defines.h"
00018 #include "R600InstrInfo.h"
00019 #include "R600MachineFunctionInfo.h"
00020 #include "R600RegisterInfo.h"
00021 #include "AMDGPUSubtarget.h"
00022 #include "llvm/CodeGen/MachineFunctionPass.h"
00023 #include "llvm/CodeGen/MachineInstrBuilder.h"
00024 #include "llvm/CodeGen/MachineRegisterInfo.h"
00025 #include "llvm/Support/Debug.h"
00026 #include "llvm/Support/raw_ostream.h"
00027 
00028 using namespace llvm;
00029 
00030 #define DEBUG_TYPE "r600mergeclause"
00031 
00032 namespace {
00033 
00034 static bool isCFAlu(const MachineInstr *MI) {
00035   switch (MI->getOpcode()) {
00036   case AMDGPU::CF_ALU:
00037   case AMDGPU::CF_ALU_PUSH_BEFORE:
00038     return true;
00039   default:
00040     return false;
00041   }
00042 }
00043 
00044 class R600ClauseMergePass : public MachineFunctionPass {
00045 
00046 private:
00047   static char ID;
00048   const R600InstrInfo *TII;
00049 
00050   unsigned getCFAluSize(const MachineInstr *MI) const;
00051   bool isCFAluEnabled(const MachineInstr *MI) const;
00052 
00053   /// IfCvt pass can generate "disabled" ALU clause marker that need to be
00054   /// removed and their content affected to the previous alu clause.
00055   /// This function parse instructions after CFAlu until it find a disabled
00056   /// CFAlu and merge the content, or an enabled CFAlu.
00057   void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
00058 
00059   /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
00060   /// it is the case.
00061   bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
00062       const;
00063 
00064 public:
00065   R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
00066 
00067   bool runOnMachineFunction(MachineFunction &MF) override;
00068 
00069   const char *getPassName() const override;
00070 };
00071 
00072 char R600ClauseMergePass::ID = 0;
00073 
00074 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
00075   assert(isCFAlu(MI));
00076   return MI->getOperand(
00077       TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
00078 }
00079 
00080 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
00081   assert(isCFAlu(MI));
00082   return MI->getOperand(
00083       TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
00084 }
00085 
00086 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
00087     const {
00088   int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
00089   MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
00090   I++;
00091   do {
00092     while (I!= E && !isCFAlu(I))
00093       I++;
00094     if (I == E)
00095       return;
00096     MachineInstr *MI = I++;
00097     if (isCFAluEnabled(MI))
00098       break;
00099     CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
00100     MI->eraseFromParent();
00101   } while (I != E);
00102 }
00103 
00104 bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
00105                                           const MachineInstr *LatrCFAlu) const {
00106   assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
00107   int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
00108   unsigned RootInstCount = getCFAluSize(RootCFAlu),
00109       LaterInstCount = getCFAluSize(LatrCFAlu);
00110   unsigned CumuledInsts = RootInstCount + LaterInstCount;
00111   if (CumuledInsts >= TII->getMaxAlusPerClause()) {
00112     DEBUG(dbgs() << "Excess inst counts\n");
00113     return false;
00114   }
00115   if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
00116     return false;
00117   // Is KCache Bank 0 compatible ?
00118   int Mode0Idx =
00119       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
00120   int KBank0Idx =
00121       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
00122   int KBank0LineIdx =
00123       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
00124   if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
00125       RootCFAlu->getOperand(Mode0Idx).getImm() &&
00126       (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
00127        RootCFAlu->getOperand(KBank0Idx).getImm() ||
00128       LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
00129       RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
00130     DEBUG(dbgs() << "Wrong KC0\n");
00131     return false;
00132   }
00133   // Is KCache Bank 1 compatible ?
00134   int Mode1Idx =
00135       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
00136   int KBank1Idx =
00137       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
00138   int KBank1LineIdx =
00139       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
00140   if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
00141       RootCFAlu->getOperand(Mode1Idx).getImm() &&
00142       (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
00143       RootCFAlu->getOperand(KBank1Idx).getImm() ||
00144       LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
00145       RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
00146     DEBUG(dbgs() << "Wrong KC0\n");
00147     return false;
00148   }
00149   if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
00150     RootCFAlu->getOperand(Mode0Idx).setImm(
00151         LatrCFAlu->getOperand(Mode0Idx).getImm());
00152     RootCFAlu->getOperand(KBank0Idx).setImm(
00153         LatrCFAlu->getOperand(KBank0Idx).getImm());
00154     RootCFAlu->getOperand(KBank0LineIdx).setImm(
00155         LatrCFAlu->getOperand(KBank0LineIdx).getImm());
00156   }
00157   if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
00158     RootCFAlu->getOperand(Mode1Idx).setImm(
00159         LatrCFAlu->getOperand(Mode1Idx).getImm());
00160     RootCFAlu->getOperand(KBank1Idx).setImm(
00161         LatrCFAlu->getOperand(KBank1Idx).getImm());
00162     RootCFAlu->getOperand(KBank1LineIdx).setImm(
00163         LatrCFAlu->getOperand(KBank1LineIdx).getImm());
00164   }
00165   RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
00166   RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
00167   return true;
00168 }
00169 
00170 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
00171   TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
00172   for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
00173                                                   BB != BB_E; ++BB) {
00174     MachineBasicBlock &MBB = *BB;
00175     MachineBasicBlock::iterator I = MBB.begin(),  E = MBB.end();
00176     MachineBasicBlock::iterator LatestCFAlu = E;
00177     while (I != E) {
00178       MachineInstr *MI = I++;
00179       if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
00180           TII->mustBeLastInClause(MI->getOpcode()))
00181         LatestCFAlu = E;
00182       if (!isCFAlu(MI))
00183         continue;
00184       cleanPotentialDisabledCFAlu(MI);
00185 
00186       if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
00187         MI->eraseFromParent();
00188       } else {
00189         assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
00190         LatestCFAlu = MI;
00191       }
00192     }
00193   }
00194   return false;
00195 }
00196 
00197 const char *R600ClauseMergePass::getPassName() const {
00198   return "R600 Merge Clause Markers Pass";
00199 }
00200 
00201 } // end anonymous namespace
00202 
00203 
00204 llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
00205   return new R600ClauseMergePass(TM);
00206 }