LLVM API Documentation

MLxExpansionPass.cpp
Go to the documentation of this file.
00001 //===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
00011 // multiple and add / sub instructions) when special VMLx hazards are detected.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "ARM.h"
00016 #include "ARMBaseInstrInfo.h"
00017 #include "ARMSubtarget.h"
00018 #include "llvm/ADT/SmallPtrSet.h"
00019 #include "llvm/ADT/Statistic.h"
00020 #include "llvm/CodeGen/MachineFunctionPass.h"
00021 #include "llvm/CodeGen/MachineInstr.h"
00022 #include "llvm/CodeGen/MachineInstrBuilder.h"
00023 #include "llvm/CodeGen/MachineRegisterInfo.h"
00024 #include "llvm/Support/CommandLine.h"
00025 #include "llvm/Support/Debug.h"
00026 #include "llvm/Support/raw_ostream.h"
00027 #include "llvm/Target/TargetRegisterInfo.h"
00028 using namespace llvm;
00029 
00030 #define DEBUG_TYPE "mlx-expansion"
00031 
00032 static cl::opt<bool>
00033 ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
00034 static cl::opt<unsigned>
00035 ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
00036 
00037 STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
00038 
00039 namespace {
00040   struct MLxExpansion : public MachineFunctionPass {
00041     static char ID;
00042     MLxExpansion() : MachineFunctionPass(ID) {}
00043 
00044     bool runOnMachineFunction(MachineFunction &Fn) override;
00045 
00046     const char *getPassName() const override {
00047       return "ARM MLA / MLS expansion pass";
00048     }
00049 
00050   private:
00051     const ARMBaseInstrInfo *TII;
00052     const TargetRegisterInfo *TRI;
00053     MachineRegisterInfo *MRI;
00054 
00055     bool isLikeA9;
00056     bool isSwift;
00057     unsigned MIIdx;
00058     MachineInstr* LastMIs[4];
00059     SmallPtrSet<MachineInstr*, 4> IgnoreStall;
00060 
00061     void clearStack();
00062     void pushStack(MachineInstr *MI);
00063     MachineInstr *getAccDefMI(MachineInstr *MI) const;
00064     unsigned getDefReg(MachineInstr *MI) const;
00065     bool hasLoopHazard(MachineInstr *MI) const;
00066     bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
00067     bool FindMLxHazard(MachineInstr *MI);
00068     void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
00069                                 unsigned MulOpc, unsigned AddSubOpc,
00070                                 bool NegAcc, bool HasLane);
00071     bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
00072   };
00073   char MLxExpansion::ID = 0;
00074 }
00075 
00076 void MLxExpansion::clearStack() {
00077   std::fill(LastMIs, LastMIs + 4, nullptr);
00078   MIIdx = 0;
00079 }
00080 
00081 void MLxExpansion::pushStack(MachineInstr *MI) {
00082   LastMIs[MIIdx] = MI;
00083   if (++MIIdx == 4)
00084     MIIdx = 0;
00085 }
00086 
00087 MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
00088   // Look past COPY and INSERT_SUBREG instructions to find the
00089   // real definition MI. This is important for _sfp instructions.
00090   unsigned Reg = MI->getOperand(1).getReg();
00091   if (TargetRegisterInfo::isPhysicalRegister(Reg))
00092     return nullptr;
00093 
00094   MachineBasicBlock *MBB = MI->getParent();
00095   MachineInstr *DefMI = MRI->getVRegDef(Reg);
00096   while (true) {
00097     if (DefMI->getParent() != MBB)
00098       break;
00099     if (DefMI->isCopyLike()) {
00100       Reg = DefMI->getOperand(1).getReg();
00101       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
00102         DefMI = MRI->getVRegDef(Reg);
00103         continue;
00104       }
00105     } else if (DefMI->isInsertSubreg()) {
00106       Reg = DefMI->getOperand(2).getReg();
00107       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
00108         DefMI = MRI->getVRegDef(Reg);
00109         continue;
00110       }
00111     }
00112     break;
00113   }
00114   return DefMI;
00115 }
00116 
00117 unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
00118   unsigned Reg = MI->getOperand(0).getReg();
00119   if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
00120       !MRI->hasOneNonDBGUse(Reg))
00121     return Reg;
00122 
00123   MachineBasicBlock *MBB = MI->getParent();
00124   MachineInstr *UseMI = &*MRI->use_instr_nodbg_begin(Reg);
00125   if (UseMI->getParent() != MBB)
00126     return Reg;
00127 
00128   while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
00129     Reg = UseMI->getOperand(0).getReg();
00130     if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
00131         !MRI->hasOneNonDBGUse(Reg))
00132       return Reg;
00133     UseMI = &*MRI->use_instr_nodbg_begin(Reg);
00134     if (UseMI->getParent() != MBB)
00135       return Reg;
00136   }
00137 
00138   return Reg;
00139 }
00140 
00141 /// hasLoopHazard - Check whether an MLx instruction is chained to itself across
00142 /// a single-MBB loop.
00143 bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const {
00144   unsigned Reg = MI->getOperand(1).getReg();
00145   if (TargetRegisterInfo::isPhysicalRegister(Reg))
00146     return false;
00147 
00148   MachineBasicBlock *MBB = MI->getParent();
00149   MachineInstr *DefMI = MRI->getVRegDef(Reg);
00150   while (true) {
00151 outer_continue:
00152     if (DefMI->getParent() != MBB)
00153       break;
00154 
00155     if (DefMI->isPHI()) {
00156       for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
00157         if (DefMI->getOperand(i + 1).getMBB() == MBB) {
00158           unsigned SrcReg = DefMI->getOperand(i).getReg();
00159           if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
00160             DefMI = MRI->getVRegDef(SrcReg);
00161             goto outer_continue;
00162           }
00163         }
00164       }
00165     } else if (DefMI->isCopyLike()) {
00166       Reg = DefMI->getOperand(1).getReg();
00167       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
00168         DefMI = MRI->getVRegDef(Reg);
00169         continue;
00170       }
00171     } else if (DefMI->isInsertSubreg()) {
00172       Reg = DefMI->getOperand(2).getReg();
00173       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
00174         DefMI = MRI->getVRegDef(Reg);
00175         continue;
00176       }
00177     }
00178 
00179     break;
00180   }
00181 
00182   return DefMI == MI;
00183 }
00184 
00185 bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
00186   // FIXME: Detect integer instructions properly.
00187   const MCInstrDesc &MCID = MI->getDesc();
00188   unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
00189   if (MI->mayStore())
00190     return false;
00191   unsigned Opcode = MCID.getOpcode();
00192   if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
00193     return false;
00194   if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
00195     return MI->readsRegister(Reg, TRI);
00196   return false;
00197 }
00198 
00199 static bool isFpMulInstruction(unsigned Opcode) {
00200   switch (Opcode) {
00201   case ARM::VMULS:
00202   case ARM::VMULfd:
00203   case ARM::VMULfq:
00204   case ARM::VMULD:
00205   case ARM::VMULslfd:
00206   case ARM::VMULslfq:
00207     return true;
00208   default:
00209     return false;
00210   }
00211 }
00212 
00213 bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
00214   if (NumExpand >= ExpandLimit)
00215     return false;
00216 
00217   if (ForceExapnd)
00218     return true;
00219 
00220   MachineInstr *DefMI = getAccDefMI(MI);
00221   if (TII->isFpMLxInstruction(DefMI->getOpcode())) {
00222     // r0 = vmla
00223     // r3 = vmla r0, r1, r2
00224     // takes 16 - 17 cycles
00225     //
00226     // r0 = vmla
00227     // r4 = vmul r1, r2
00228     // r3 = vadd r0, r4
00229     // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
00230     IgnoreStall.insert(DefMI);
00231     return true;
00232   }
00233 
00234   // On Swift, we mostly care about hazards from multiplication instructions
00235   // writing the accumulator and the pipelining of loop iterations by out-of-
00236   // order execution. 
00237   if (isSwift)
00238     return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI);
00239 
00240   if (IgnoreStall.count(MI))
00241     return false;
00242 
00243   // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
00244   // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
00245   // preserves the in-order retirement of the instructions.
00246   // Look at the next few instructions, if *most* of them can cause hazards,
00247   // then the scheduler can't *fix* this, we'd better break up the VMLA.
00248   unsigned Limit1 = isLikeA9 ? 1 : 4;
00249   unsigned Limit2 = isLikeA9 ? 1 : 4;
00250   for (unsigned i = 1; i <= 4; ++i) {
00251     int Idx = ((int)MIIdx - i + 4) % 4;
00252     MachineInstr *NextMI = LastMIs[Idx];
00253     if (!NextMI)
00254       continue;
00255 
00256     if (TII->canCauseFpMLxStall(NextMI->getOpcode())) {
00257       if (i <= Limit1)
00258         return true;
00259     }
00260 
00261     // Look for VMLx RAW hazard.
00262     if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI))
00263       return true;
00264   }
00265 
00266   return false;
00267 }
00268 
00269 /// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
00270 /// of MUL + ADD / SUB instructions.
00271 void
00272 MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
00273                                      unsigned MulOpc, unsigned AddSubOpc,
00274                                      bool NegAcc, bool HasLane) {
00275   unsigned DstReg = MI->getOperand(0).getReg();
00276   bool DstDead = MI->getOperand(0).isDead();
00277   unsigned AccReg = MI->getOperand(1).getReg();
00278   unsigned Src1Reg = MI->getOperand(2).getReg();
00279   unsigned Src2Reg = MI->getOperand(3).getReg();
00280   bool Src1Kill = MI->getOperand(2).isKill();
00281   bool Src2Kill = MI->getOperand(3).isKill();
00282   unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
00283   unsigned NextOp = HasLane ? 5 : 4;
00284   ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
00285   unsigned PredReg = MI->getOperand(++NextOp).getReg();
00286 
00287   const MCInstrDesc &MCID1 = TII->get(MulOpc);
00288   const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
00289   const MachineFunction &MF = *MI->getParent()->getParent();
00290   unsigned TmpReg = MRI->createVirtualRegister(
00291                       TII->getRegClass(MCID1, 0, TRI, MF));
00292 
00293   MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
00294     .addReg(Src1Reg, getKillRegState(Src1Kill))
00295     .addReg(Src2Reg, getKillRegState(Src2Kill));
00296   if (HasLane)
00297     MIB.addImm(LaneImm);
00298   MIB.addImm(Pred).addReg(PredReg);
00299 
00300   MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
00301     .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
00302 
00303   if (NegAcc) {
00304     bool AccKill = MRI->hasOneNonDBGUse(AccReg);
00305     MIB.addReg(TmpReg, getKillRegState(true))
00306        .addReg(AccReg, getKillRegState(AccKill));
00307   } else {
00308     MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
00309   }
00310   MIB.addImm(Pred).addReg(PredReg);
00311 
00312   DEBUG({
00313       dbgs() << "Expanding: " << *MI;
00314       dbgs() << "  to:\n";
00315       MachineBasicBlock::iterator MII = MI;
00316       MII = std::prev(MII);
00317       MachineInstr &MI2 = *MII;
00318       MII = std::prev(MII);
00319       MachineInstr &MI1 = *MII;
00320       dbgs() << "    " << MI1;
00321       dbgs() << "    " << MI2;
00322    });
00323 
00324   MI->eraseFromParent();
00325   ++NumExpand;
00326 }
00327 
00328 bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
00329   bool Changed = false;
00330 
00331   clearStack();
00332   IgnoreStall.clear();
00333 
00334   unsigned Skip = 0;
00335   MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
00336   while (MII != E) {
00337     MachineInstr *MI = &*MII;
00338 
00339     if (MI->isPosition() || MI->isImplicitDef() || MI->isCopy()) {
00340       ++MII;
00341       continue;
00342     }
00343 
00344     const MCInstrDesc &MCID = MI->getDesc();
00345     if (MI->isBarrier()) {
00346       clearStack();
00347       Skip = 0;
00348       ++MII;
00349       continue;
00350     }
00351 
00352     unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
00353     if (Domain == ARMII::DomainGeneral) {
00354       if (++Skip == 2)
00355         // Assume dual issues of non-VFP / NEON instructions.
00356         pushStack(nullptr);
00357     } else {
00358       Skip = 0;
00359 
00360       unsigned MulOpc, AddSubOpc;
00361       bool NegAcc, HasLane;
00362       if (!TII->isFpMLxInstruction(MCID.getOpcode(),
00363                                    MulOpc, AddSubOpc, NegAcc, HasLane) ||
00364           !FindMLxHazard(MI))
00365         pushStack(MI);
00366       else {
00367         ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
00368         E = MBB.rend(); // May have changed if MI was the 1st instruction.
00369         Changed = true;
00370         continue;
00371       }
00372     }
00373 
00374     ++MII;
00375   }
00376 
00377   return Changed;
00378 }
00379 
00380 bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
00381   TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
00382   TRI = Fn.getSubtarget().getRegisterInfo();
00383   MRI = &Fn.getRegInfo();
00384   const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
00385   isLikeA9 = STI->isLikeA9() || STI->isSwift();
00386   isSwift = STI->isSwift();
00387 
00388   bool Modified = false;
00389   for (MachineBasicBlock &MBB : Fn)
00390     Modified |= ExpandFPMLxInstructions(MBB);
00391 
00392   return Modified;
00393 }
00394 
00395 FunctionPass *llvm::createMLxExpansionPass() {
00396   return new MLxExpansion();
00397 }