LLVM API Documentation
00001 //===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of 00011 // multiple and add / sub instructions) when special VMLx hazards are detected. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "ARM.h" 00016 #include "ARMBaseInstrInfo.h" 00017 #include "ARMSubtarget.h" 00018 #include "llvm/ADT/SmallPtrSet.h" 00019 #include "llvm/ADT/Statistic.h" 00020 #include "llvm/CodeGen/MachineFunctionPass.h" 00021 #include "llvm/CodeGen/MachineInstr.h" 00022 #include "llvm/CodeGen/MachineInstrBuilder.h" 00023 #include "llvm/CodeGen/MachineRegisterInfo.h" 00024 #include "llvm/Support/CommandLine.h" 00025 #include "llvm/Support/Debug.h" 00026 #include "llvm/Support/raw_ostream.h" 00027 #include "llvm/Target/TargetRegisterInfo.h" 00028 using namespace llvm; 00029 00030 #define DEBUG_TYPE "mlx-expansion" 00031 00032 static cl::opt<bool> 00033 ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden); 00034 static cl::opt<unsigned> 00035 ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden); 00036 00037 STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded"); 00038 00039 namespace { 00040 struct MLxExpansion : public MachineFunctionPass { 00041 static char ID; 00042 MLxExpansion() : MachineFunctionPass(ID) {} 00043 00044 bool runOnMachineFunction(MachineFunction &Fn) override; 00045 00046 const char *getPassName() const override { 00047 return "ARM MLA / MLS expansion pass"; 00048 } 00049 00050 private: 00051 const ARMBaseInstrInfo *TII; 00052 const TargetRegisterInfo *TRI; 00053 MachineRegisterInfo *MRI; 00054 00055 bool isLikeA9; 00056 bool isSwift; 00057 unsigned MIIdx; 00058 MachineInstr* LastMIs[4]; 00059 SmallPtrSet<MachineInstr*, 4> IgnoreStall; 00060 00061 void clearStack(); 00062 void pushStack(MachineInstr *MI); 00063 MachineInstr *getAccDefMI(MachineInstr *MI) const; 00064 unsigned getDefReg(MachineInstr *MI) const; 00065 bool hasLoopHazard(MachineInstr *MI) const; 00066 bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const; 00067 bool FindMLxHazard(MachineInstr *MI); 00068 void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, 00069 unsigned MulOpc, unsigned AddSubOpc, 00070 bool NegAcc, bool HasLane); 00071 bool ExpandFPMLxInstructions(MachineBasicBlock &MBB); 00072 }; 00073 char MLxExpansion::ID = 0; 00074 } 00075 00076 void MLxExpansion::clearStack() { 00077 std::fill(LastMIs, LastMIs + 4, nullptr); 00078 MIIdx = 0; 00079 } 00080 00081 void MLxExpansion::pushStack(MachineInstr *MI) { 00082 LastMIs[MIIdx] = MI; 00083 if (++MIIdx == 4) 00084 MIIdx = 0; 00085 } 00086 00087 MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const { 00088 // Look past COPY and INSERT_SUBREG instructions to find the 00089 // real definition MI. This is important for _sfp instructions. 00090 unsigned Reg = MI->getOperand(1).getReg(); 00091 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 00092 return nullptr; 00093 00094 MachineBasicBlock *MBB = MI->getParent(); 00095 MachineInstr *DefMI = MRI->getVRegDef(Reg); 00096 while (true) { 00097 if (DefMI->getParent() != MBB) 00098 break; 00099 if (DefMI->isCopyLike()) { 00100 Reg = DefMI->getOperand(1).getReg(); 00101 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 00102 DefMI = MRI->getVRegDef(Reg); 00103 continue; 00104 } 00105 } else if (DefMI->isInsertSubreg()) { 00106 Reg = DefMI->getOperand(2).getReg(); 00107 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 00108 DefMI = MRI->getVRegDef(Reg); 00109 continue; 00110 } 00111 } 00112 break; 00113 } 00114 return DefMI; 00115 } 00116 00117 unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { 00118 unsigned Reg = MI->getOperand(0).getReg(); 00119 if (TargetRegisterInfo::isPhysicalRegister(Reg) || 00120 !MRI->hasOneNonDBGUse(Reg)) 00121 return Reg; 00122 00123 MachineBasicBlock *MBB = MI->getParent(); 00124 MachineInstr *UseMI = &*MRI->use_instr_nodbg_begin(Reg); 00125 if (UseMI->getParent() != MBB) 00126 return Reg; 00127 00128 while (UseMI->isCopy() || UseMI->isInsertSubreg()) { 00129 Reg = UseMI->getOperand(0).getReg(); 00130 if (TargetRegisterInfo::isPhysicalRegister(Reg) || 00131 !MRI->hasOneNonDBGUse(Reg)) 00132 return Reg; 00133 UseMI = &*MRI->use_instr_nodbg_begin(Reg); 00134 if (UseMI->getParent() != MBB) 00135 return Reg; 00136 } 00137 00138 return Reg; 00139 } 00140 00141 /// hasLoopHazard - Check whether an MLx instruction is chained to itself across 00142 /// a single-MBB loop. 00143 bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const { 00144 unsigned Reg = MI->getOperand(1).getReg(); 00145 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 00146 return false; 00147 00148 MachineBasicBlock *MBB = MI->getParent(); 00149 MachineInstr *DefMI = MRI->getVRegDef(Reg); 00150 while (true) { 00151 outer_continue: 00152 if (DefMI->getParent() != MBB) 00153 break; 00154 00155 if (DefMI->isPHI()) { 00156 for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) { 00157 if (DefMI->getOperand(i + 1).getMBB() == MBB) { 00158 unsigned SrcReg = DefMI->getOperand(i).getReg(); 00159 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { 00160 DefMI = MRI->getVRegDef(SrcReg); 00161 goto outer_continue; 00162 } 00163 } 00164 } 00165 } else if (DefMI->isCopyLike()) { 00166 Reg = DefMI->getOperand(1).getReg(); 00167 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 00168 DefMI = MRI->getVRegDef(Reg); 00169 continue; 00170 } 00171 } else if (DefMI->isInsertSubreg()) { 00172 Reg = DefMI->getOperand(2).getReg(); 00173 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 00174 DefMI = MRI->getVRegDef(Reg); 00175 continue; 00176 } 00177 } 00178 00179 break; 00180 } 00181 00182 return DefMI == MI; 00183 } 00184 00185 bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { 00186 // FIXME: Detect integer instructions properly. 00187 const MCInstrDesc &MCID = MI->getDesc(); 00188 unsigned Domain = MCID.TSFlags & ARMII::DomainMask; 00189 if (MI->mayStore()) 00190 return false; 00191 unsigned Opcode = MCID.getOpcode(); 00192 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 00193 return false; 00194 if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) 00195 return MI->readsRegister(Reg, TRI); 00196 return false; 00197 } 00198 00199 static bool isFpMulInstruction(unsigned Opcode) { 00200 switch (Opcode) { 00201 case ARM::VMULS: 00202 case ARM::VMULfd: 00203 case ARM::VMULfq: 00204 case ARM::VMULD: 00205 case ARM::VMULslfd: 00206 case ARM::VMULslfq: 00207 return true; 00208 default: 00209 return false; 00210 } 00211 } 00212 00213 bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { 00214 if (NumExpand >= ExpandLimit) 00215 return false; 00216 00217 if (ForceExapnd) 00218 return true; 00219 00220 MachineInstr *DefMI = getAccDefMI(MI); 00221 if (TII->isFpMLxInstruction(DefMI->getOpcode())) { 00222 // r0 = vmla 00223 // r3 = vmla r0, r1, r2 00224 // takes 16 - 17 cycles 00225 // 00226 // r0 = vmla 00227 // r4 = vmul r1, r2 00228 // r3 = vadd r0, r4 00229 // takes about 14 - 15 cycles even with vmul stalling for 4 cycles. 00230 IgnoreStall.insert(DefMI); 00231 return true; 00232 } 00233 00234 // On Swift, we mostly care about hazards from multiplication instructions 00235 // writing the accumulator and the pipelining of loop iterations by out-of- 00236 // order execution. 00237 if (isSwift) 00238 return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI); 00239 00240 if (IgnoreStall.count(MI)) 00241 return false; 00242 00243 // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the 00244 // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall 00245 // preserves the in-order retirement of the instructions. 00246 // Look at the next few instructions, if *most* of them can cause hazards, 00247 // then the scheduler can't *fix* this, we'd better break up the VMLA. 00248 unsigned Limit1 = isLikeA9 ? 1 : 4; 00249 unsigned Limit2 = isLikeA9 ? 1 : 4; 00250 for (unsigned i = 1; i <= 4; ++i) { 00251 int Idx = ((int)MIIdx - i + 4) % 4; 00252 MachineInstr *NextMI = LastMIs[Idx]; 00253 if (!NextMI) 00254 continue; 00255 00256 if (TII->canCauseFpMLxStall(NextMI->getOpcode())) { 00257 if (i <= Limit1) 00258 return true; 00259 } 00260 00261 // Look for VMLx RAW hazard. 00262 if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI)) 00263 return true; 00264 } 00265 00266 return false; 00267 } 00268 00269 /// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair 00270 /// of MUL + ADD / SUB instructions. 00271 void 00272 MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, 00273 unsigned MulOpc, unsigned AddSubOpc, 00274 bool NegAcc, bool HasLane) { 00275 unsigned DstReg = MI->getOperand(0).getReg(); 00276 bool DstDead = MI->getOperand(0).isDead(); 00277 unsigned AccReg = MI->getOperand(1).getReg(); 00278 unsigned Src1Reg = MI->getOperand(2).getReg(); 00279 unsigned Src2Reg = MI->getOperand(3).getReg(); 00280 bool Src1Kill = MI->getOperand(2).isKill(); 00281 bool Src2Kill = MI->getOperand(3).isKill(); 00282 unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0; 00283 unsigned NextOp = HasLane ? 5 : 4; 00284 ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm(); 00285 unsigned PredReg = MI->getOperand(++NextOp).getReg(); 00286 00287 const MCInstrDesc &MCID1 = TII->get(MulOpc); 00288 const MCInstrDesc &MCID2 = TII->get(AddSubOpc); 00289 const MachineFunction &MF = *MI->getParent()->getParent(); 00290 unsigned TmpReg = MRI->createVirtualRegister( 00291 TII->getRegClass(MCID1, 0, TRI, MF)); 00292 00293 MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg) 00294 .addReg(Src1Reg, getKillRegState(Src1Kill)) 00295 .addReg(Src2Reg, getKillRegState(Src2Kill)); 00296 if (HasLane) 00297 MIB.addImm(LaneImm); 00298 MIB.addImm(Pred).addReg(PredReg); 00299 00300 MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2) 00301 .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead)); 00302 00303 if (NegAcc) { 00304 bool AccKill = MRI->hasOneNonDBGUse(AccReg); 00305 MIB.addReg(TmpReg, getKillRegState(true)) 00306 .addReg(AccReg, getKillRegState(AccKill)); 00307 } else { 00308 MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true)); 00309 } 00310 MIB.addImm(Pred).addReg(PredReg); 00311 00312 DEBUG({ 00313 dbgs() << "Expanding: " << *MI; 00314 dbgs() << " to:\n"; 00315 MachineBasicBlock::iterator MII = MI; 00316 MII = std::prev(MII); 00317 MachineInstr &MI2 = *MII; 00318 MII = std::prev(MII); 00319 MachineInstr &MI1 = *MII; 00320 dbgs() << " " << MI1; 00321 dbgs() << " " << MI2; 00322 }); 00323 00324 MI->eraseFromParent(); 00325 ++NumExpand; 00326 } 00327 00328 bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { 00329 bool Changed = false; 00330 00331 clearStack(); 00332 IgnoreStall.clear(); 00333 00334 unsigned Skip = 0; 00335 MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend(); 00336 while (MII != E) { 00337 MachineInstr *MI = &*MII; 00338 00339 if (MI->isPosition() || MI->isImplicitDef() || MI->isCopy()) { 00340 ++MII; 00341 continue; 00342 } 00343 00344 const MCInstrDesc &MCID = MI->getDesc(); 00345 if (MI->isBarrier()) { 00346 clearStack(); 00347 Skip = 0; 00348 ++MII; 00349 continue; 00350 } 00351 00352 unsigned Domain = MCID.TSFlags & ARMII::DomainMask; 00353 if (Domain == ARMII::DomainGeneral) { 00354 if (++Skip == 2) 00355 // Assume dual issues of non-VFP / NEON instructions. 00356 pushStack(nullptr); 00357 } else { 00358 Skip = 0; 00359 00360 unsigned MulOpc, AddSubOpc; 00361 bool NegAcc, HasLane; 00362 if (!TII->isFpMLxInstruction(MCID.getOpcode(), 00363 MulOpc, AddSubOpc, NegAcc, HasLane) || 00364 !FindMLxHazard(MI)) 00365 pushStack(MI); 00366 else { 00367 ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane); 00368 E = MBB.rend(); // May have changed if MI was the 1st instruction. 00369 Changed = true; 00370 continue; 00371 } 00372 } 00373 00374 ++MII; 00375 } 00376 00377 return Changed; 00378 } 00379 00380 bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { 00381 TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo()); 00382 TRI = Fn.getSubtarget().getRegisterInfo(); 00383 MRI = &Fn.getRegInfo(); 00384 const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 00385 isLikeA9 = STI->isLikeA9() || STI->isSwift(); 00386 isSwift = STI->isSwift(); 00387 00388 bool Modified = false; 00389 for (MachineBasicBlock &MBB : Fn) 00390 Modified |= ExpandFPMLxInstructions(MBB); 00391 00392 return Modified; 00393 } 00394 00395 FunctionPass *llvm::createMLxExpansionPass() { 00396 return new MLxExpansion(); 00397 }