LLVM API Documentation
00001 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file contains the AArch64 implementation of the TargetInstrInfo class. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "AArch64InstrInfo.h" 00015 #include "AArch64Subtarget.h" 00016 #include "MCTargetDesc/AArch64AddressingModes.h" 00017 #include "AArch64MachineCombinerPattern.h" 00018 #include "llvm/CodeGen/MachineFrameInfo.h" 00019 #include "llvm/CodeGen/MachineInstrBuilder.h" 00020 #include "llvm/CodeGen/MachineMemOperand.h" 00021 #include "llvm/CodeGen/MachineRegisterInfo.h" 00022 #include "llvm/CodeGen/PseudoSourceValue.h" 00023 #include "llvm/MC/MCInst.h" 00024 #include "llvm/Support/ErrorHandling.h" 00025 #include "llvm/Support/TargetRegistry.h" 00026 00027 using namespace llvm; 00028 00029 #define GET_INSTRINFO_CTOR_DTOR 00030 #include "AArch64GenInstrInfo.inc" 00031 00032 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) 00033 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), 00034 RI(this, &STI), Subtarget(STI) {} 00035 00036 /// GetInstSize - Return the number of bytes of code the specified 00037 /// instruction may be. This returns the maximum number of bytes. 00038 unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 00039 const MachineBasicBlock &MBB = *MI->getParent(); 00040 const MachineFunction *MF = MBB.getParent(); 00041 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 00042 00043 if (MI->getOpcode() == AArch64::INLINEASM) 00044 return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); 00045 00046 const MCInstrDesc &Desc = MI->getDesc(); 00047 switch (Desc.getOpcode()) { 00048 default: 00049 // Anything not explicitly designated otherwise is a nomal 4-byte insn. 00050 return 4; 00051 case TargetOpcode::DBG_VALUE: 00052 case TargetOpcode::EH_LABEL: 00053 case TargetOpcode::IMPLICIT_DEF: 00054 case TargetOpcode::KILL: 00055 return 0; 00056 } 00057 00058 llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size"); 00059 } 00060 00061 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, 00062 SmallVectorImpl<MachineOperand> &Cond) { 00063 // Block ends with fall-through condbranch. 00064 switch (LastInst->getOpcode()) { 00065 default: 00066 llvm_unreachable("Unknown branch instruction?"); 00067 case AArch64::Bcc: 00068 Target = LastInst->getOperand(1).getMBB(); 00069 Cond.push_back(LastInst->getOperand(0)); 00070 break; 00071 case AArch64::CBZW: 00072 case AArch64::CBZX: 00073 case AArch64::CBNZW: 00074 case AArch64::CBNZX: 00075 Target = LastInst->getOperand(1).getMBB(); 00076 Cond.push_back(MachineOperand::CreateImm(-1)); 00077 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 00078 Cond.push_back(LastInst->getOperand(0)); 00079 break; 00080 case AArch64::TBZW: 00081 case AArch64::TBZX: 00082 case AArch64::TBNZW: 00083 case AArch64::TBNZX: 00084 Target = LastInst->getOperand(2).getMBB(); 00085 Cond.push_back(MachineOperand::CreateImm(-1)); 00086 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 00087 Cond.push_back(LastInst->getOperand(0)); 00088 Cond.push_back(LastInst->getOperand(1)); 00089 } 00090 } 00091 00092 // Branch analysis. 00093 bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 00094 MachineBasicBlock *&TBB, 00095 MachineBasicBlock *&FBB, 00096 SmallVectorImpl<MachineOperand> &Cond, 00097 bool AllowModify) const { 00098 // If the block has no terminators, it just falls into the block after it. 00099 MachineBasicBlock::iterator I = MBB.end(); 00100 if (I == MBB.begin()) 00101 return false; 00102 --I; 00103 while (I->isDebugValue()) { 00104 if (I == MBB.begin()) 00105 return false; 00106 --I; 00107 } 00108 if (!isUnpredicatedTerminator(I)) 00109 return false; 00110 00111 // Get the last instruction in the block. 00112 MachineInstr *LastInst = I; 00113 00114 // If there is only one terminator instruction, process it. 00115 unsigned LastOpc = LastInst->getOpcode(); 00116 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 00117 if (isUncondBranchOpcode(LastOpc)) { 00118 TBB = LastInst->getOperand(0).getMBB(); 00119 return false; 00120 } 00121 if (isCondBranchOpcode(LastOpc)) { 00122 // Block ends with fall-through condbranch. 00123 parseCondBranch(LastInst, TBB, Cond); 00124 return false; 00125 } 00126 return true; // Can't handle indirect branch. 00127 } 00128 00129 // Get the instruction before it if it is a terminator. 00130 MachineInstr *SecondLastInst = I; 00131 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 00132 00133 // If AllowModify is true and the block ends with two or more unconditional 00134 // branches, delete all but the first unconditional branch. 00135 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 00136 while (isUncondBranchOpcode(SecondLastOpc)) { 00137 LastInst->eraseFromParent(); 00138 LastInst = SecondLastInst; 00139 LastOpc = LastInst->getOpcode(); 00140 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 00141 // Return now the only terminator is an unconditional branch. 00142 TBB = LastInst->getOperand(0).getMBB(); 00143 return false; 00144 } else { 00145 SecondLastInst = I; 00146 SecondLastOpc = SecondLastInst->getOpcode(); 00147 } 00148 } 00149 } 00150 00151 // If there are three terminators, we don't know what sort of block this is. 00152 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) 00153 return true; 00154 00155 // If the block ends with a B and a Bcc, handle it. 00156 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 00157 parseCondBranch(SecondLastInst, TBB, Cond); 00158 FBB = LastInst->getOperand(0).getMBB(); 00159 return false; 00160 } 00161 00162 // If the block ends with two unconditional branches, handle it. The second 00163 // one is not executed, so remove it. 00164 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 00165 TBB = SecondLastInst->getOperand(0).getMBB(); 00166 I = LastInst; 00167 if (AllowModify) 00168 I->eraseFromParent(); 00169 return false; 00170 } 00171 00172 // ...likewise if it ends with an indirect branch followed by an unconditional 00173 // branch. 00174 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 00175 I = LastInst; 00176 if (AllowModify) 00177 I->eraseFromParent(); 00178 return true; 00179 } 00180 00181 // Otherwise, can't handle this. 00182 return true; 00183 } 00184 00185 bool AArch64InstrInfo::ReverseBranchCondition( 00186 SmallVectorImpl<MachineOperand> &Cond) const { 00187 if (Cond[0].getImm() != -1) { 00188 // Regular Bcc 00189 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 00190 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); 00191 } else { 00192 // Folded compare-and-branch 00193 switch (Cond[1].getImm()) { 00194 default: 00195 llvm_unreachable("Unknown conditional branch!"); 00196 case AArch64::CBZW: 00197 Cond[1].setImm(AArch64::CBNZW); 00198 break; 00199 case AArch64::CBNZW: 00200 Cond[1].setImm(AArch64::CBZW); 00201 break; 00202 case AArch64::CBZX: 00203 Cond[1].setImm(AArch64::CBNZX); 00204 break; 00205 case AArch64::CBNZX: 00206 Cond[1].setImm(AArch64::CBZX); 00207 break; 00208 case AArch64::TBZW: 00209 Cond[1].setImm(AArch64::TBNZW); 00210 break; 00211 case AArch64::TBNZW: 00212 Cond[1].setImm(AArch64::TBZW); 00213 break; 00214 case AArch64::TBZX: 00215 Cond[1].setImm(AArch64::TBNZX); 00216 break; 00217 case AArch64::TBNZX: 00218 Cond[1].setImm(AArch64::TBZX); 00219 break; 00220 } 00221 } 00222 00223 return false; 00224 } 00225 00226 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 00227 MachineBasicBlock::iterator I = MBB.end(); 00228 if (I == MBB.begin()) 00229 return 0; 00230 --I; 00231 while (I->isDebugValue()) { 00232 if (I == MBB.begin()) 00233 return 0; 00234 --I; 00235 } 00236 if (!isUncondBranchOpcode(I->getOpcode()) && 00237 !isCondBranchOpcode(I->getOpcode())) 00238 return 0; 00239 00240 // Remove the branch. 00241 I->eraseFromParent(); 00242 00243 I = MBB.end(); 00244 00245 if (I == MBB.begin()) 00246 return 1; 00247 --I; 00248 if (!isCondBranchOpcode(I->getOpcode())) 00249 return 1; 00250 00251 // Remove the branch. 00252 I->eraseFromParent(); 00253 return 2; 00254 } 00255 00256 void AArch64InstrInfo::instantiateCondBranch( 00257 MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB, 00258 const SmallVectorImpl<MachineOperand> &Cond) const { 00259 if (Cond[0].getImm() != -1) { 00260 // Regular Bcc 00261 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); 00262 } else { 00263 // Folded compare-and-branch 00264 const MachineInstrBuilder MIB = 00265 BuildMI(&MBB, DL, get(Cond[1].getImm())).addReg(Cond[2].getReg()); 00266 if (Cond.size() > 3) 00267 MIB.addImm(Cond[3].getImm()); 00268 MIB.addMBB(TBB); 00269 } 00270 } 00271 00272 unsigned AArch64InstrInfo::InsertBranch( 00273 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 00274 const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const { 00275 // Shouldn't be a fall through. 00276 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 00277 00278 if (!FBB) { 00279 if (Cond.empty()) // Unconditional branch? 00280 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); 00281 else 00282 instantiateCondBranch(MBB, DL, TBB, Cond); 00283 return 1; 00284 } 00285 00286 // Two-way conditional branch. 00287 instantiateCondBranch(MBB, DL, TBB, Cond); 00288 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); 00289 return 2; 00290 } 00291 00292 // Find the original register that VReg is copied from. 00293 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { 00294 while (TargetRegisterInfo::isVirtualRegister(VReg)) { 00295 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 00296 if (!DefMI->isFullCopy()) 00297 return VReg; 00298 VReg = DefMI->getOperand(1).getReg(); 00299 } 00300 return VReg; 00301 } 00302 00303 // Determine if VReg is defined by an instruction that can be folded into a 00304 // csel instruction. If so, return the folded opcode, and the replacement 00305 // register. 00306 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, 00307 unsigned *NewVReg = nullptr) { 00308 VReg = removeCopies(MRI, VReg); 00309 if (!TargetRegisterInfo::isVirtualRegister(VReg)) 00310 return 0; 00311 00312 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); 00313 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 00314 unsigned Opc = 0; 00315 unsigned SrcOpNum = 0; 00316 switch (DefMI->getOpcode()) { 00317 case AArch64::ADDSXri: 00318 case AArch64::ADDSWri: 00319 // if NZCV is used, do not fold. 00320 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 00321 return 0; 00322 // fall-through to ADDXri and ADDWri. 00323 case AArch64::ADDXri: 00324 case AArch64::ADDWri: 00325 // add x, 1 -> csinc. 00326 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || 00327 DefMI->getOperand(3).getImm() != 0) 00328 return 0; 00329 SrcOpNum = 1; 00330 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; 00331 break; 00332 00333 case AArch64::ORNXrr: 00334 case AArch64::ORNWrr: { 00335 // not x -> csinv, represented as orn dst, xzr, src. 00336 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 00337 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 00338 return 0; 00339 SrcOpNum = 2; 00340 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; 00341 break; 00342 } 00343 00344 case AArch64::SUBSXrr: 00345 case AArch64::SUBSWrr: 00346 // if NZCV is used, do not fold. 00347 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 00348 return 0; 00349 // fall-through to SUBXrr and SUBWrr. 00350 case AArch64::SUBXrr: 00351 case AArch64::SUBWrr: { 00352 // neg x -> csneg, represented as sub dst, xzr, src. 00353 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 00354 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 00355 return 0; 00356 SrcOpNum = 2; 00357 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; 00358 break; 00359 } 00360 default: 00361 return 0; 00362 } 00363 assert(Opc && SrcOpNum && "Missing parameters"); 00364 00365 if (NewVReg) 00366 *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); 00367 return Opc; 00368 } 00369 00370 bool AArch64InstrInfo::canInsertSelect( 00371 const MachineBasicBlock &MBB, const SmallVectorImpl<MachineOperand> &Cond, 00372 unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, 00373 int &FalseCycles) const { 00374 // Check register classes. 00375 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 00376 const TargetRegisterClass *RC = 00377 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 00378 if (!RC) 00379 return false; 00380 00381 // Expanding cbz/tbz requires an extra cycle of latency on the condition. 00382 unsigned ExtraCondLat = Cond.size() != 1; 00383 00384 // GPRs are handled by csel. 00385 // FIXME: Fold in x+1, -x, and ~x when applicable. 00386 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || 00387 AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 00388 // Single-cycle csel, csinc, csinv, and csneg. 00389 CondCycles = 1 + ExtraCondLat; 00390 TrueCycles = FalseCycles = 1; 00391 if (canFoldIntoCSel(MRI, TrueReg)) 00392 TrueCycles = 0; 00393 else if (canFoldIntoCSel(MRI, FalseReg)) 00394 FalseCycles = 0; 00395 return true; 00396 } 00397 00398 // Scalar floating point is handled by fcsel. 00399 // FIXME: Form fabs, fmin, and fmax when applicable. 00400 if (AArch64::FPR64RegClass.hasSubClassEq(RC) || 00401 AArch64::FPR32RegClass.hasSubClassEq(RC)) { 00402 CondCycles = 5 + ExtraCondLat; 00403 TrueCycles = FalseCycles = 2; 00404 return true; 00405 } 00406 00407 // Can't do vectors. 00408 return false; 00409 } 00410 00411 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, 00412 MachineBasicBlock::iterator I, DebugLoc DL, 00413 unsigned DstReg, 00414 const SmallVectorImpl<MachineOperand> &Cond, 00415 unsigned TrueReg, unsigned FalseReg) const { 00416 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 00417 00418 // Parse the condition code, see parseCondBranch() above. 00419 AArch64CC::CondCode CC; 00420 switch (Cond.size()) { 00421 default: 00422 llvm_unreachable("Unknown condition opcode in Cond"); 00423 case 1: // b.cc 00424 CC = AArch64CC::CondCode(Cond[0].getImm()); 00425 break; 00426 case 3: { // cbz/cbnz 00427 // We must insert a compare against 0. 00428 bool Is64Bit; 00429 switch (Cond[1].getImm()) { 00430 default: 00431 llvm_unreachable("Unknown branch opcode in Cond"); 00432 case AArch64::CBZW: 00433 Is64Bit = 0; 00434 CC = AArch64CC::EQ; 00435 break; 00436 case AArch64::CBZX: 00437 Is64Bit = 1; 00438 CC = AArch64CC::EQ; 00439 break; 00440 case AArch64::CBNZW: 00441 Is64Bit = 0; 00442 CC = AArch64CC::NE; 00443 break; 00444 case AArch64::CBNZX: 00445 Is64Bit = 1; 00446 CC = AArch64CC::NE; 00447 break; 00448 } 00449 unsigned SrcReg = Cond[2].getReg(); 00450 if (Is64Bit) { 00451 // cmp reg, #0 is actually subs xzr, reg, #0. 00452 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); 00453 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) 00454 .addReg(SrcReg) 00455 .addImm(0) 00456 .addImm(0); 00457 } else { 00458 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); 00459 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) 00460 .addReg(SrcReg) 00461 .addImm(0) 00462 .addImm(0); 00463 } 00464 break; 00465 } 00466 case 4: { // tbz/tbnz 00467 // We must insert a tst instruction. 00468 switch (Cond[1].getImm()) { 00469 default: 00470 llvm_unreachable("Unknown branch opcode in Cond"); 00471 case AArch64::TBZW: 00472 case AArch64::TBZX: 00473 CC = AArch64CC::EQ; 00474 break; 00475 case AArch64::TBNZW: 00476 case AArch64::TBNZX: 00477 CC = AArch64CC::NE; 00478 break; 00479 } 00480 // cmp reg, #foo is actually ands xzr, reg, #1<<foo. 00481 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW) 00482 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR) 00483 .addReg(Cond[2].getReg()) 00484 .addImm( 00485 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32)); 00486 else 00487 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR) 00488 .addReg(Cond[2].getReg()) 00489 .addImm( 00490 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); 00491 break; 00492 } 00493 } 00494 00495 unsigned Opc = 0; 00496 const TargetRegisterClass *RC = nullptr; 00497 bool TryFold = false; 00498 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) { 00499 RC = &AArch64::GPR64RegClass; 00500 Opc = AArch64::CSELXr; 00501 TryFold = true; 00502 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) { 00503 RC = &AArch64::GPR32RegClass; 00504 Opc = AArch64::CSELWr; 00505 TryFold = true; 00506 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) { 00507 RC = &AArch64::FPR64RegClass; 00508 Opc = AArch64::FCSELDrrr; 00509 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) { 00510 RC = &AArch64::FPR32RegClass; 00511 Opc = AArch64::FCSELSrrr; 00512 } 00513 assert(RC && "Unsupported regclass"); 00514 00515 // Try folding simple instructions into the csel. 00516 if (TryFold) { 00517 unsigned NewVReg = 0; 00518 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); 00519 if (FoldedOpc) { 00520 // The folded opcodes csinc, csinc and csneg apply the operation to 00521 // FalseReg, so we need to invert the condition. 00522 CC = AArch64CC::getInvertedCondCode(CC); 00523 TrueReg = FalseReg; 00524 } else 00525 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); 00526 00527 // Fold the operation. Leave any dead instructions for DCE to clean up. 00528 if (FoldedOpc) { 00529 FalseReg = NewVReg; 00530 Opc = FoldedOpc; 00531 // The extends the live range of NewVReg. 00532 MRI.clearKillFlags(NewVReg); 00533 } 00534 } 00535 00536 // Pull all virtual register into the appropriate class. 00537 MRI.constrainRegClass(TrueReg, RC); 00538 MRI.constrainRegClass(FalseReg, RC); 00539 00540 // Insert the csel. 00541 BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm( 00542 CC); 00543 } 00544 00545 // FIXME: this implementation should be micro-architecture dependent, so a 00546 // micro-architecture target hook should be introduced here in future. 00547 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const { 00548 if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53()) 00549 return MI->isAsCheapAsAMove(); 00550 00551 switch (MI->getOpcode()) { 00552 default: 00553 return false; 00554 00555 // add/sub on register without shift 00556 case AArch64::ADDWri: 00557 case AArch64::ADDXri: 00558 case AArch64::SUBWri: 00559 case AArch64::SUBXri: 00560 return (MI->getOperand(3).getImm() == 0); 00561 00562 // logical ops on immediate 00563 case AArch64::ANDWri: 00564 case AArch64::ANDXri: 00565 case AArch64::EORWri: 00566 case AArch64::EORXri: 00567 case AArch64::ORRWri: 00568 case AArch64::ORRXri: 00569 return true; 00570 00571 // logical ops on register without shift 00572 case AArch64::ANDWrr: 00573 case AArch64::ANDXrr: 00574 case AArch64::BICWrr: 00575 case AArch64::BICXrr: 00576 case AArch64::EONWrr: 00577 case AArch64::EONXrr: 00578 case AArch64::EORWrr: 00579 case AArch64::EORXrr: 00580 case AArch64::ORNWrr: 00581 case AArch64::ORNXrr: 00582 case AArch64::ORRWrr: 00583 case AArch64::ORRXrr: 00584 return true; 00585 } 00586 00587 llvm_unreachable("Unknown opcode to check as cheap as a move!"); 00588 } 00589 00590 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 00591 unsigned &SrcReg, unsigned &DstReg, 00592 unsigned &SubIdx) const { 00593 switch (MI.getOpcode()) { 00594 default: 00595 return false; 00596 case AArch64::SBFMXri: // aka sxtw 00597 case AArch64::UBFMXri: // aka uxtw 00598 // Check for the 32 -> 64 bit extension case, these instructions can do 00599 // much more. 00600 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) 00601 return false; 00602 // This is a signed or unsigned 32 -> 64 bit extension. 00603 SrcReg = MI.getOperand(1).getReg(); 00604 DstReg = MI.getOperand(0).getReg(); 00605 SubIdx = AArch64::sub_32; 00606 return true; 00607 } 00608 } 00609 00610 bool 00611 AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, 00612 MachineInstr *MIb, 00613 AliasAnalysis *AA) const { 00614 const TargetRegisterInfo *TRI = &getRegisterInfo(); 00615 unsigned BaseRegA = 0, BaseRegB = 0; 00616 int OffsetA = 0, OffsetB = 0; 00617 int WidthA = 0, WidthB = 0; 00618 00619 assert(MIa && (MIa->mayLoad() || MIa->mayStore()) && 00620 "MIa must be a store or a load"); 00621 assert(MIb && (MIb->mayLoad() || MIb->mayStore()) && 00622 "MIb must be a store or a load"); 00623 00624 if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() || 00625 MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef()) 00626 return false; 00627 00628 // Retrieve the base register, offset from the base register and width. Width 00629 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If 00630 // base registers are identical, and the offset of a lower memory access + 00631 // the width doesn't overlap the offset of a higher memory access, 00632 // then the memory accesses are different. 00633 if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) && 00634 getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) { 00635 if (BaseRegA == BaseRegB) { 00636 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 00637 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 00638 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 00639 if (LowOffset + LowWidth <= HighOffset) 00640 return true; 00641 } 00642 } 00643 return false; 00644 } 00645 00646 /// analyzeCompare - For a comparison instruction, return the source registers 00647 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. 00648 /// Return true if the comparison instruction can be analyzed. 00649 bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, 00650 unsigned &SrcReg2, int &CmpMask, 00651 int &CmpValue) const { 00652 switch (MI->getOpcode()) { 00653 default: 00654 break; 00655 case AArch64::SUBSWrr: 00656 case AArch64::SUBSWrs: 00657 case AArch64::SUBSWrx: 00658 case AArch64::SUBSXrr: 00659 case AArch64::SUBSXrs: 00660 case AArch64::SUBSXrx: 00661 case AArch64::ADDSWrr: 00662 case AArch64::ADDSWrs: 00663 case AArch64::ADDSWrx: 00664 case AArch64::ADDSXrr: 00665 case AArch64::ADDSXrs: 00666 case AArch64::ADDSXrx: 00667 // Replace SUBSWrr with SUBWrr if NZCV is not used. 00668 SrcReg = MI->getOperand(1).getReg(); 00669 SrcReg2 = MI->getOperand(2).getReg(); 00670 CmpMask = ~0; 00671 CmpValue = 0; 00672 return true; 00673 case AArch64::SUBSWri: 00674 case AArch64::ADDSWri: 00675 case AArch64::SUBSXri: 00676 case AArch64::ADDSXri: 00677 SrcReg = MI->getOperand(1).getReg(); 00678 SrcReg2 = 0; 00679 CmpMask = ~0; 00680 // FIXME: In order to convert CmpValue to 0 or 1 00681 CmpValue = (MI->getOperand(2).getImm() != 0); 00682 return true; 00683 case AArch64::ANDSWri: 00684 case AArch64::ANDSXri: 00685 // ANDS does not use the same encoding scheme as the others xxxS 00686 // instructions. 00687 SrcReg = MI->getOperand(1).getReg(); 00688 SrcReg2 = 0; 00689 CmpMask = ~0; 00690 // FIXME:The return val type of decodeLogicalImmediate is uint64_t, 00691 // while the type of CmpValue is int. When converting uint64_t to int, 00692 // the high 32 bits of uint64_t will be lost. 00693 // In fact it causes a bug in spec2006-483.xalancbmk 00694 // CmpValue is only used to compare with zero in OptimizeCompareInstr 00695 CmpValue = (AArch64_AM::decodeLogicalImmediate( 00696 MI->getOperand(2).getImm(), 00697 MI->getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0); 00698 return true; 00699 } 00700 00701 return false; 00702 } 00703 00704 static bool UpdateOperandRegClass(MachineInstr *Instr) { 00705 MachineBasicBlock *MBB = Instr->getParent(); 00706 assert(MBB && "Can't get MachineBasicBlock here"); 00707 MachineFunction *MF = MBB->getParent(); 00708 assert(MF && "Can't get MachineFunction here"); 00709 const TargetMachine *TM = &MF->getTarget(); 00710 const TargetInstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo(); 00711 const TargetRegisterInfo *TRI = TM->getSubtargetImpl()->getRegisterInfo(); 00712 MachineRegisterInfo *MRI = &MF->getRegInfo(); 00713 00714 for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx; 00715 ++OpIdx) { 00716 MachineOperand &MO = Instr->getOperand(OpIdx); 00717 const TargetRegisterClass *OpRegCstraints = 00718 Instr->getRegClassConstraint(OpIdx, TII, TRI); 00719 00720 // If there's no constraint, there's nothing to do. 00721 if (!OpRegCstraints) 00722 continue; 00723 // If the operand is a frame index, there's nothing to do here. 00724 // A frame index operand will resolve correctly during PEI. 00725 if (MO.isFI()) 00726 continue; 00727 00728 assert(MO.isReg() && 00729 "Operand has register constraints without being a register!"); 00730 00731 unsigned Reg = MO.getReg(); 00732 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 00733 if (!OpRegCstraints->contains(Reg)) 00734 return false; 00735 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && 00736 !MRI->constrainRegClass(Reg, OpRegCstraints)) 00737 return false; 00738 } 00739 00740 return true; 00741 } 00742 00743 /// convertFlagSettingOpcode - return opcode that does not 00744 /// set flags when possible. The caller is responsible to do 00745 /// the actual substitution and legality checking. 00746 static unsigned convertFlagSettingOpcode(MachineInstr *MI) { 00747 unsigned NewOpc; 00748 switch (MI->getOpcode()) { 00749 default: 00750 return false; 00751 case AArch64::ADDSWrr: NewOpc = AArch64::ADDWrr; break; 00752 case AArch64::ADDSWri: NewOpc = AArch64::ADDWri; break; 00753 case AArch64::ADDSWrs: NewOpc = AArch64::ADDWrs; break; 00754 case AArch64::ADDSWrx: NewOpc = AArch64::ADDWrx; break; 00755 case AArch64::ADDSXrr: NewOpc = AArch64::ADDXrr; break; 00756 case AArch64::ADDSXri: NewOpc = AArch64::ADDXri; break; 00757 case AArch64::ADDSXrs: NewOpc = AArch64::ADDXrs; break; 00758 case AArch64::ADDSXrx: NewOpc = AArch64::ADDXrx; break; 00759 case AArch64::SUBSWrr: NewOpc = AArch64::SUBWrr; break; 00760 case AArch64::SUBSWri: NewOpc = AArch64::SUBWri; break; 00761 case AArch64::SUBSWrs: NewOpc = AArch64::SUBWrs; break; 00762 case AArch64::SUBSWrx: NewOpc = AArch64::SUBWrx; break; 00763 case AArch64::SUBSXrr: NewOpc = AArch64::SUBXrr; break; 00764 case AArch64::SUBSXri: NewOpc = AArch64::SUBXri; break; 00765 case AArch64::SUBSXrs: NewOpc = AArch64::SUBXrs; break; 00766 case AArch64::SUBSXrx: NewOpc = AArch64::SUBXrx; break; 00767 } 00768 return NewOpc; 00769 } 00770 00771 /// optimizeCompareInstr - Convert the instruction supplying the argument to the 00772 /// comparison into one that sets the zero bit in the flags register. 00773 bool AArch64InstrInfo::optimizeCompareInstr( 00774 MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, 00775 int CmpValue, const MachineRegisterInfo *MRI) const { 00776 00777 // Replace SUBSWrr with SUBWrr if NZCV is not used. 00778 int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true); 00779 if (Cmp_NZCV != -1) { 00780 unsigned Opc = CmpInstr->getOpcode(); 00781 unsigned NewOpc = convertFlagSettingOpcode(CmpInstr); 00782 if (NewOpc == Opc) 00783 return false; 00784 const MCInstrDesc &MCID = get(NewOpc); 00785 CmpInstr->setDesc(MCID); 00786 CmpInstr->RemoveOperand(Cmp_NZCV); 00787 bool succeeded = UpdateOperandRegClass(CmpInstr); 00788 (void)succeeded; 00789 assert(succeeded && "Some operands reg class are incompatible!"); 00790 return true; 00791 } 00792 00793 // Continue only if we have a "ri" where immediate is zero. 00794 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare 00795 // function. 00796 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); 00797 if (CmpValue != 0 || SrcReg2 != 0) 00798 return false; 00799 00800 // CmpInstr is a Compare instruction if destination register is not used. 00801 if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg())) 00802 return false; 00803 00804 // Get the unique definition of SrcReg. 00805 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 00806 if (!MI) 00807 return false; 00808 00809 // We iterate backward, starting from the instruction before CmpInstr and 00810 // stop when reaching the definition of the source register or done with the 00811 // basic block, to check whether NZCV is used or modified in between. 00812 MachineBasicBlock::iterator I = CmpInstr, E = MI, 00813 B = CmpInstr->getParent()->begin(); 00814 00815 // Early exit if CmpInstr is at the beginning of the BB. 00816 if (I == B) 00817 return false; 00818 00819 // Check whether the definition of SrcReg is in the same basic block as 00820 // Compare. If not, we can't optimize away the Compare. 00821 if (MI->getParent() != CmpInstr->getParent()) 00822 return false; 00823 00824 // Check that NZCV isn't set between the comparison instruction and the one we 00825 // want to change. 00826 const TargetRegisterInfo *TRI = &getRegisterInfo(); 00827 for (--I; I != E; --I) { 00828 const MachineInstr &Instr = *I; 00829 00830 if (Instr.modifiesRegister(AArch64::NZCV, TRI) || 00831 Instr.readsRegister(AArch64::NZCV, TRI)) 00832 // This instruction modifies or uses NZCV after the one we want to 00833 // change. We can't do this transformation. 00834 return false; 00835 if (I == B) 00836 // The 'and' is below the comparison instruction. 00837 return false; 00838 } 00839 00840 unsigned NewOpc = MI->getOpcode(); 00841 switch (MI->getOpcode()) { 00842 default: 00843 return false; 00844 case AArch64::ADDSWrr: 00845 case AArch64::ADDSWri: 00846 case AArch64::ADDSXrr: 00847 case AArch64::ADDSXri: 00848 case AArch64::SUBSWrr: 00849 case AArch64::SUBSWri: 00850 case AArch64::SUBSXrr: 00851 case AArch64::SUBSXri: 00852 break; 00853 case AArch64::ADDWrr: NewOpc = AArch64::ADDSWrr; break; 00854 case AArch64::ADDWri: NewOpc = AArch64::ADDSWri; break; 00855 case AArch64::ADDXrr: NewOpc = AArch64::ADDSXrr; break; 00856 case AArch64::ADDXri: NewOpc = AArch64::ADDSXri; break; 00857 case AArch64::ADCWr: NewOpc = AArch64::ADCSWr; break; 00858 case AArch64::ADCXr: NewOpc = AArch64::ADCSXr; break; 00859 case AArch64::SUBWrr: NewOpc = AArch64::SUBSWrr; break; 00860 case AArch64::SUBWri: NewOpc = AArch64::SUBSWri; break; 00861 case AArch64::SUBXrr: NewOpc = AArch64::SUBSXrr; break; 00862 case AArch64::SUBXri: NewOpc = AArch64::SUBSXri; break; 00863 case AArch64::SBCWr: NewOpc = AArch64::SBCSWr; break; 00864 case AArch64::SBCXr: NewOpc = AArch64::SBCSXr; break; 00865 case AArch64::ANDWri: NewOpc = AArch64::ANDSWri; break; 00866 case AArch64::ANDXri: NewOpc = AArch64::ANDSXri; break; 00867 } 00868 00869 // Scan forward for the use of NZCV. 00870 // When checking against MI: if it's a conditional code requires 00871 // checking of V bit, then this is not safe to do. 00872 // It is safe to remove CmpInstr if NZCV is redefined or killed. 00873 // If we are done with the basic block, we need to check whether NZCV is 00874 // live-out. 00875 bool IsSafe = false; 00876 for (MachineBasicBlock::iterator I = CmpInstr, 00877 E = CmpInstr->getParent()->end(); 00878 !IsSafe && ++I != E;) { 00879 const MachineInstr &Instr = *I; 00880 for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO; 00881 ++IO) { 00882 const MachineOperand &MO = Instr.getOperand(IO); 00883 if (MO.isRegMask() && MO.clobbersPhysReg(AArch64::NZCV)) { 00884 IsSafe = true; 00885 break; 00886 } 00887 if (!MO.isReg() || MO.getReg() != AArch64::NZCV) 00888 continue; 00889 if (MO.isDef()) { 00890 IsSafe = true; 00891 break; 00892 } 00893 00894 // Decode the condition code. 00895 unsigned Opc = Instr.getOpcode(); 00896 AArch64CC::CondCode CC; 00897 switch (Opc) { 00898 default: 00899 return false; 00900 case AArch64::Bcc: 00901 CC = (AArch64CC::CondCode)Instr.getOperand(IO - 2).getImm(); 00902 break; 00903 case AArch64::CSINVWr: 00904 case AArch64::CSINVXr: 00905 case AArch64::CSINCWr: 00906 case AArch64::CSINCXr: 00907 case AArch64::CSELWr: 00908 case AArch64::CSELXr: 00909 case AArch64::CSNEGWr: 00910 case AArch64::CSNEGXr: 00911 case AArch64::FCSELSrrr: 00912 case AArch64::FCSELDrrr: 00913 CC = (AArch64CC::CondCode)Instr.getOperand(IO - 1).getImm(); 00914 break; 00915 } 00916 00917 // It is not safe to remove Compare instruction if Overflow(V) is used. 00918 switch (CC) { 00919 default: 00920 // NZCV can be used multiple times, we should continue. 00921 break; 00922 case AArch64CC::VS: 00923 case AArch64CC::VC: 00924 case AArch64CC::GE: 00925 case AArch64CC::LT: 00926 case AArch64CC::GT: 00927 case AArch64CC::LE: 00928 return false; 00929 } 00930 } 00931 } 00932 00933 // If NZCV is not killed nor re-defined, we should check whether it is 00934 // live-out. If it is live-out, do not optimize. 00935 if (!IsSafe) { 00936 MachineBasicBlock *ParentBlock = CmpInstr->getParent(); 00937 for (auto *MBB : ParentBlock->successors()) 00938 if (MBB->isLiveIn(AArch64::NZCV)) 00939 return false; 00940 } 00941 00942 // Update the instruction to set NZCV. 00943 MI->setDesc(get(NewOpc)); 00944 CmpInstr->eraseFromParent(); 00945 bool succeeded = UpdateOperandRegClass(MI); 00946 (void)succeeded; 00947 assert(succeeded && "Some operands reg class are incompatible!"); 00948 MI->addRegisterDefined(AArch64::NZCV, TRI); 00949 return true; 00950 } 00951 00952 bool 00953 AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { 00954 if (MI->getOpcode() != TargetOpcode::LOAD_STACK_GUARD) 00955 return false; 00956 00957 MachineBasicBlock &MBB = *MI->getParent(); 00958 DebugLoc DL = MI->getDebugLoc(); 00959 unsigned Reg = MI->getOperand(0).getReg(); 00960 const GlobalValue *GV = 00961 cast<GlobalValue>((*MI->memoperands_begin())->getValue()); 00962 const TargetMachine &TM = MBB.getParent()->getTarget(); 00963 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); 00964 const unsigned char MO_NC = AArch64II::MO_NC; 00965 00966 if ((OpFlags & AArch64II::MO_GOT) != 0) { 00967 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) 00968 .addGlobalAddress(GV, 0, AArch64II::MO_GOT); 00969 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 00970 .addReg(Reg, RegState::Kill).addImm(0) 00971 .addMemOperand(*MI->memoperands_begin()); 00972 } else if (TM.getCodeModel() == CodeModel::Large) { 00973 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) 00974 .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48); 00975 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 00976 .addReg(Reg, RegState::Kill) 00977 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32); 00978 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 00979 .addReg(Reg, RegState::Kill) 00980 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16); 00981 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 00982 .addReg(Reg, RegState::Kill) 00983 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0); 00984 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 00985 .addReg(Reg, RegState::Kill).addImm(0) 00986 .addMemOperand(*MI->memoperands_begin()); 00987 } else { 00988 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) 00989 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); 00990 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; 00991 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 00992 .addReg(Reg, RegState::Kill) 00993 .addGlobalAddress(GV, 0, LoFlags) 00994 .addMemOperand(*MI->memoperands_begin()); 00995 } 00996 00997 MBB.erase(MI); 00998 00999 return true; 01000 } 01001 01002 /// Return true if this is this instruction has a non-zero immediate 01003 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const { 01004 switch (MI->getOpcode()) { 01005 default: 01006 break; 01007 case AArch64::ADDSWrs: 01008 case AArch64::ADDSXrs: 01009 case AArch64::ADDWrs: 01010 case AArch64::ADDXrs: 01011 case AArch64::ANDSWrs: 01012 case AArch64::ANDSXrs: 01013 case AArch64::ANDWrs: 01014 case AArch64::ANDXrs: 01015 case AArch64::BICSWrs: 01016 case AArch64::BICSXrs: 01017 case AArch64::BICWrs: 01018 case AArch64::BICXrs: 01019 case AArch64::CRC32Brr: 01020 case AArch64::CRC32CBrr: 01021 case AArch64::CRC32CHrr: 01022 case AArch64::CRC32CWrr: 01023 case AArch64::CRC32CXrr: 01024 case AArch64::CRC32Hrr: 01025 case AArch64::CRC32Wrr: 01026 case AArch64::CRC32Xrr: 01027 case AArch64::EONWrs: 01028 case AArch64::EONXrs: 01029 case AArch64::EORWrs: 01030 case AArch64::EORXrs: 01031 case AArch64::ORNWrs: 01032 case AArch64::ORNXrs: 01033 case AArch64::ORRWrs: 01034 case AArch64::ORRXrs: 01035 case AArch64::SUBSWrs: 01036 case AArch64::SUBSXrs: 01037 case AArch64::SUBWrs: 01038 case AArch64::SUBXrs: 01039 if (MI->getOperand(3).isImm()) { 01040 unsigned val = MI->getOperand(3).getImm(); 01041 return (val != 0); 01042 } 01043 break; 01044 } 01045 return false; 01046 } 01047 01048 /// Return true if this is this instruction has a non-zero immediate 01049 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr *MI) const { 01050 switch (MI->getOpcode()) { 01051 default: 01052 break; 01053 case AArch64::ADDSWrx: 01054 case AArch64::ADDSXrx: 01055 case AArch64::ADDSXrx64: 01056 case AArch64::ADDWrx: 01057 case AArch64::ADDXrx: 01058 case AArch64::ADDXrx64: 01059 case AArch64::SUBSWrx: 01060 case AArch64::SUBSXrx: 01061 case AArch64::SUBSXrx64: 01062 case AArch64::SUBWrx: 01063 case AArch64::SUBXrx: 01064 case AArch64::SUBXrx64: 01065 if (MI->getOperand(3).isImm()) { 01066 unsigned val = MI->getOperand(3).getImm(); 01067 return (val != 0); 01068 } 01069 break; 01070 } 01071 01072 return false; 01073 } 01074 01075 // Return true if this instruction simply sets its single destination register 01076 // to zero. This is equivalent to a register rename of the zero-register. 01077 bool AArch64InstrInfo::isGPRZero(const MachineInstr *MI) const { 01078 switch (MI->getOpcode()) { 01079 default: 01080 break; 01081 case AArch64::MOVZWi: 01082 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) 01083 if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) { 01084 assert(MI->getDesc().getNumOperands() == 3 && 01085 MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands"); 01086 return true; 01087 } 01088 break; 01089 case AArch64::ANDWri: // and Rd, Rzr, #imm 01090 return MI->getOperand(1).getReg() == AArch64::WZR; 01091 case AArch64::ANDXri: 01092 return MI->getOperand(1).getReg() == AArch64::XZR; 01093 case TargetOpcode::COPY: 01094 return MI->getOperand(1).getReg() == AArch64::WZR; 01095 } 01096 return false; 01097 } 01098 01099 // Return true if this instruction simply renames a general register without 01100 // modifying bits. 01101 bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const { 01102 switch (MI->getOpcode()) { 01103 default: 01104 break; 01105 case TargetOpcode::COPY: { 01106 // GPR32 copies will by lowered to ORRXrs 01107 unsigned DstReg = MI->getOperand(0).getReg(); 01108 return (AArch64::GPR32RegClass.contains(DstReg) || 01109 AArch64::GPR64RegClass.contains(DstReg)); 01110 } 01111 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) 01112 if (MI->getOperand(1).getReg() == AArch64::XZR) { 01113 assert(MI->getDesc().getNumOperands() == 4 && 01114 MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands"); 01115 return true; 01116 } 01117 break; 01118 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) 01119 if (MI->getOperand(2).getImm() == 0) { 01120 assert(MI->getDesc().getNumOperands() == 4 && 01121 MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands"); 01122 return true; 01123 } 01124 break; 01125 } 01126 return false; 01127 } 01128 01129 // Return true if this instruction simply renames a general register without 01130 // modifying bits. 01131 bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const { 01132 switch (MI->getOpcode()) { 01133 default: 01134 break; 01135 case TargetOpcode::COPY: { 01136 // FPR64 copies will by lowered to ORR.16b 01137 unsigned DstReg = MI->getOperand(0).getReg(); 01138 return (AArch64::FPR64RegClass.contains(DstReg) || 01139 AArch64::FPR128RegClass.contains(DstReg)); 01140 } 01141 case AArch64::ORRv16i8: 01142 if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { 01143 assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() && 01144 "invalid ORRv16i8 operands"); 01145 return true; 01146 } 01147 break; 01148 } 01149 return false; 01150 } 01151 01152 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 01153 int &FrameIndex) const { 01154 switch (MI->getOpcode()) { 01155 default: 01156 break; 01157 case AArch64::LDRWui: 01158 case AArch64::LDRXui: 01159 case AArch64::LDRBui: 01160 case AArch64::LDRHui: 01161 case AArch64::LDRSui: 01162 case AArch64::LDRDui: 01163 case AArch64::LDRQui: 01164 if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && 01165 MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { 01166 FrameIndex = MI->getOperand(1).getIndex(); 01167 return MI->getOperand(0).getReg(); 01168 } 01169 break; 01170 } 01171 01172 return 0; 01173 } 01174 01175 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr *MI, 01176 int &FrameIndex) const { 01177 switch (MI->getOpcode()) { 01178 default: 01179 break; 01180 case AArch64::STRWui: 01181 case AArch64::STRXui: 01182 case AArch64::STRBui: 01183 case AArch64::STRHui: 01184 case AArch64::STRSui: 01185 case AArch64::STRDui: 01186 case AArch64::STRQui: 01187 if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && 01188 MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { 01189 FrameIndex = MI->getOperand(1).getIndex(); 01190 return MI->getOperand(0).getReg(); 01191 } 01192 break; 01193 } 01194 return 0; 01195 } 01196 01197 /// Return true if this is load/store scales or extends its register offset. 01198 /// This refers to scaling a dynamic index as opposed to scaled immediates. 01199 /// MI should be a memory op that allows scaled addressing. 01200 bool AArch64InstrInfo::isScaledAddr(const MachineInstr *MI) const { 01201 switch (MI->getOpcode()) { 01202 default: 01203 break; 01204 case AArch64::LDRBBroW: 01205 case AArch64::LDRBroW: 01206 case AArch64::LDRDroW: 01207 case AArch64::LDRHHroW: 01208 case AArch64::LDRHroW: 01209 case AArch64::LDRQroW: 01210 case AArch64::LDRSBWroW: 01211 case AArch64::LDRSBXroW: 01212 case AArch64::LDRSHWroW: 01213 case AArch64::LDRSHXroW: 01214 case AArch64::LDRSWroW: 01215 case AArch64::LDRSroW: 01216 case AArch64::LDRWroW: 01217 case AArch64::LDRXroW: 01218 case AArch64::STRBBroW: 01219 case AArch64::STRBroW: 01220 case AArch64::STRDroW: 01221 case AArch64::STRHHroW: 01222 case AArch64::STRHroW: 01223 case AArch64::STRQroW: 01224 case AArch64::STRSroW: 01225 case AArch64::STRWroW: 01226 case AArch64::STRXroW: 01227 case AArch64::LDRBBroX: 01228 case AArch64::LDRBroX: 01229 case AArch64::LDRDroX: 01230 case AArch64::LDRHHroX: 01231 case AArch64::LDRHroX: 01232 case AArch64::LDRQroX: 01233 case AArch64::LDRSBWroX: 01234 case AArch64::LDRSBXroX: 01235 case AArch64::LDRSHWroX: 01236 case AArch64::LDRSHXroX: 01237 case AArch64::LDRSWroX: 01238 case AArch64::LDRSroX: 01239 case AArch64::LDRWroX: 01240 case AArch64::LDRXroX: 01241 case AArch64::STRBBroX: 01242 case AArch64::STRBroX: 01243 case AArch64::STRDroX: 01244 case AArch64::STRHHroX: 01245 case AArch64::STRHroX: 01246 case AArch64::STRQroX: 01247 case AArch64::STRSroX: 01248 case AArch64::STRWroX: 01249 case AArch64::STRXroX: 01250 01251 unsigned Val = MI->getOperand(3).getImm(); 01252 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val); 01253 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val); 01254 } 01255 return false; 01256 } 01257 01258 /// Check all MachineMemOperands for a hint to suppress pairing. 01259 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const { 01260 assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && 01261 "Too many target MO flags"); 01262 for (auto *MM : MI->memoperands()) { 01263 if (MM->getFlags() & 01264 (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) { 01265 return true; 01266 } 01267 } 01268 return false; 01269 } 01270 01271 /// Set a flag on the first MachineMemOperand to suppress pairing. 01272 void AArch64InstrInfo::suppressLdStPair(MachineInstr *MI) const { 01273 if (MI->memoperands_empty()) 01274 return; 01275 01276 assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && 01277 "Too many target MO flags"); 01278 (*MI->memoperands_begin()) 01279 ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit); 01280 } 01281 01282 bool 01283 AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, 01284 unsigned &Offset, 01285 const TargetRegisterInfo *TRI) const { 01286 switch (LdSt->getOpcode()) { 01287 default: 01288 return false; 01289 case AArch64::STRSui: 01290 case AArch64::STRDui: 01291 case AArch64::STRQui: 01292 case AArch64::STRXui: 01293 case AArch64::STRWui: 01294 case AArch64::LDRSui: 01295 case AArch64::LDRDui: 01296 case AArch64::LDRQui: 01297 case AArch64::LDRXui: 01298 case AArch64::LDRWui: 01299 if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) 01300 return false; 01301 BaseReg = LdSt->getOperand(1).getReg(); 01302 MachineFunction &MF = *LdSt->getParent()->getParent(); 01303 unsigned Width = getRegClass(LdSt->getDesc(), 0, TRI, MF)->getSize(); 01304 Offset = LdSt->getOperand(2).getImm() * Width; 01305 return true; 01306 }; 01307 } 01308 01309 bool AArch64InstrInfo::getLdStBaseRegImmOfsWidth( 01310 MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width, 01311 const TargetRegisterInfo *TRI) const { 01312 // Handle only loads/stores with base register followed by immediate offset. 01313 if (LdSt->getNumOperands() != 3) 01314 return false; 01315 if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) 01316 return false; 01317 01318 // Offset is calculated as the immediate operand multiplied by the scaling factor. 01319 // Unscaled instructions have scaling factor set to 1. 01320 int Scale = 0; 01321 switch (LdSt->getOpcode()) { 01322 default: 01323 return false; 01324 case AArch64::LDURQi: 01325 case AArch64::STURQi: 01326 Width = 16; 01327 Scale = 1; 01328 break; 01329 case AArch64::LDURXi: 01330 case AArch64::LDURDi: 01331 case AArch64::STURXi: 01332 case AArch64::STURDi: 01333 Width = 8; 01334 Scale = 1; 01335 break; 01336 case AArch64::LDURWi: 01337 case AArch64::LDURSi: 01338 case AArch64::LDURSWi: 01339 case AArch64::STURWi: 01340 case AArch64::STURSi: 01341 Width = 4; 01342 Scale = 1; 01343 break; 01344 case AArch64::LDURHi: 01345 case AArch64::LDURHHi: 01346 case AArch64::LDURSHXi: 01347 case AArch64::LDURSHWi: 01348 case AArch64::STURHi: 01349 case AArch64::STURHHi: 01350 Width = 2; 01351 Scale = 1; 01352 break; 01353 case AArch64::LDURBi: 01354 case AArch64::LDURBBi: 01355 case AArch64::LDURSBXi: 01356 case AArch64::LDURSBWi: 01357 case AArch64::STURBi: 01358 case AArch64::STURBBi: 01359 Width = 1; 01360 Scale = 1; 01361 break; 01362 case AArch64::LDRXui: 01363 case AArch64::STRXui: 01364 Scale = Width = 8; 01365 break; 01366 case AArch64::LDRWui: 01367 case AArch64::STRWui: 01368 Scale = Width = 4; 01369 break; 01370 case AArch64::LDRBui: 01371 case AArch64::STRBui: 01372 Scale = Width = 1; 01373 break; 01374 case AArch64::LDRHui: 01375 case AArch64::STRHui: 01376 Scale = Width = 2; 01377 break; 01378 case AArch64::LDRSui: 01379 case AArch64::STRSui: 01380 Scale = Width = 4; 01381 break; 01382 case AArch64::LDRDui: 01383 case AArch64::STRDui: 01384 Scale = Width = 8; 01385 break; 01386 case AArch64::LDRQui: 01387 case AArch64::STRQui: 01388 Scale = Width = 16; 01389 break; 01390 case AArch64::LDRBBui: 01391 case AArch64::STRBBui: 01392 Scale = Width = 1; 01393 break; 01394 case AArch64::LDRHHui: 01395 case AArch64::STRHHui: 01396 Scale = Width = 2; 01397 break; 01398 }; 01399 01400 BaseReg = LdSt->getOperand(1).getReg(); 01401 Offset = LdSt->getOperand(2).getImm() * Scale; 01402 return true; 01403 } 01404 01405 /// Detect opportunities for ldp/stp formation. 01406 /// 01407 /// Only called for LdSt for which getLdStBaseRegImmOfs returns true. 01408 bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, 01409 MachineInstr *SecondLdSt, 01410 unsigned NumLoads) const { 01411 // Only cluster up to a single pair. 01412 if (NumLoads > 1) 01413 return false; 01414 if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode()) 01415 return false; 01416 // getLdStBaseRegImmOfs guarantees that oper 2 isImm. 01417 unsigned Ofs1 = FirstLdSt->getOperand(2).getImm(); 01418 // Allow 6 bits of positive range. 01419 if (Ofs1 > 64) 01420 return false; 01421 // The caller should already have ordered First/SecondLdSt by offset. 01422 unsigned Ofs2 = SecondLdSt->getOperand(2).getImm(); 01423 return Ofs1 + 1 == Ofs2; 01424 } 01425 01426 bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First, 01427 MachineInstr *Second) const { 01428 // Cyclone can fuse CMN, CMP followed by Bcc. 01429 01430 // FIXME: B0 can also fuse: 01431 // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ. 01432 if (Second->getOpcode() != AArch64::Bcc) 01433 return false; 01434 switch (First->getOpcode()) { 01435 default: 01436 return false; 01437 case AArch64::SUBSWri: 01438 case AArch64::ADDSWri: 01439 case AArch64::ANDSWri: 01440 case AArch64::SUBSXri: 01441 case AArch64::ADDSXri: 01442 case AArch64::ANDSXri: 01443 return true; 01444 } 01445 } 01446 01447 MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, 01448 int FrameIx, 01449 uint64_t Offset, 01450 const MDNode *MDPtr, 01451 DebugLoc DL) const { 01452 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) 01453 .addFrameIndex(FrameIx) 01454 .addImm(0) 01455 .addImm(Offset) 01456 .addMetadata(MDPtr); 01457 return &*MIB; 01458 } 01459 01460 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, 01461 unsigned Reg, unsigned SubIdx, 01462 unsigned State, 01463 const TargetRegisterInfo *TRI) { 01464 if (!SubIdx) 01465 return MIB.addReg(Reg, State); 01466 01467 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 01468 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 01469 return MIB.addReg(Reg, State, SubIdx); 01470 } 01471 01472 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, 01473 unsigned NumRegs) { 01474 // We really want the positive remainder mod 32 here, that happens to be 01475 // easily obtainable with a mask. 01476 return ((DestReg - SrcReg) & 0x1f) < NumRegs; 01477 } 01478 01479 void AArch64InstrInfo::copyPhysRegTuple( 01480 MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, 01481 unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, 01482 llvm::ArrayRef<unsigned> Indices) const { 01483 assert(Subtarget.hasNEON() && 01484 "Unexpected register copy without NEON"); 01485 const TargetRegisterInfo *TRI = &getRegisterInfo(); 01486 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 01487 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 01488 unsigned NumRegs = Indices.size(); 01489 01490 int SubReg = 0, End = NumRegs, Incr = 1; 01491 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { 01492 SubReg = NumRegs - 1; 01493 End = -1; 01494 Incr = -1; 01495 } 01496 01497 for (; SubReg != End; SubReg += Incr) { 01498 const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode)); 01499 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 01500 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); 01501 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 01502 } 01503 } 01504 01505 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 01506 MachineBasicBlock::iterator I, DebugLoc DL, 01507 unsigned DestReg, unsigned SrcReg, 01508 bool KillSrc) const { 01509 if (AArch64::GPR32spRegClass.contains(DestReg) && 01510 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { 01511 const TargetRegisterInfo *TRI = &getRegisterInfo(); 01512 01513 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { 01514 // If either operand is WSP, expand to ADD #0. 01515 if (Subtarget.hasZeroCycleRegMove()) { 01516 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. 01517 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 01518 &AArch64::GPR64spRegClass); 01519 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 01520 &AArch64::GPR64spRegClass); 01521 // This instruction is reading and writing X registers. This may upset 01522 // the register scavenger and machine verifier, so we need to indicate 01523 // that we are reading an undefined value from SrcRegX, but a proper 01524 // value from SrcReg. 01525 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) 01526 .addReg(SrcRegX, RegState::Undef) 01527 .addImm(0) 01528 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 01529 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 01530 } else { 01531 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) 01532 .addReg(SrcReg, getKillRegState(KillSrc)) 01533 .addImm(0) 01534 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 01535 } 01536 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) { 01537 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm( 01538 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 01539 } else { 01540 if (Subtarget.hasZeroCycleRegMove()) { 01541 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. 01542 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 01543 &AArch64::GPR64spRegClass); 01544 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 01545 &AArch64::GPR64spRegClass); 01546 // This instruction is reading and writing X registers. This may upset 01547 // the register scavenger and machine verifier, so we need to indicate 01548 // that we are reading an undefined value from SrcRegX, but a proper 01549 // value from SrcReg. 01550 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) 01551 .addReg(AArch64::XZR) 01552 .addReg(SrcRegX, RegState::Undef) 01553 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 01554 } else { 01555 // Otherwise, expand to ORR WZR. 01556 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) 01557 .addReg(AArch64::WZR) 01558 .addReg(SrcReg, getKillRegState(KillSrc)); 01559 } 01560 } 01561 return; 01562 } 01563 01564 if (AArch64::GPR64spRegClass.contains(DestReg) && 01565 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { 01566 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { 01567 // If either operand is SP, expand to ADD #0. 01568 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) 01569 .addReg(SrcReg, getKillRegState(KillSrc)) 01570 .addImm(0) 01571 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 01572 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) { 01573 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm( 01574 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 01575 } else { 01576 // Otherwise, expand to ORR XZR. 01577 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) 01578 .addReg(AArch64::XZR) 01579 .addReg(SrcReg, getKillRegState(KillSrc)); 01580 } 01581 return; 01582 } 01583 01584 // Copy a DDDD register quad by copying the individual sub-registers. 01585 if (AArch64::DDDDRegClass.contains(DestReg) && 01586 AArch64::DDDDRegClass.contains(SrcReg)) { 01587 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, 01588 AArch64::dsub2, AArch64::dsub3 }; 01589 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 01590 Indices); 01591 return; 01592 } 01593 01594 // Copy a DDD register triple by copying the individual sub-registers. 01595 if (AArch64::DDDRegClass.contains(DestReg) && 01596 AArch64::DDDRegClass.contains(SrcReg)) { 01597 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, 01598 AArch64::dsub2 }; 01599 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 01600 Indices); 01601 return; 01602 } 01603 01604 // Copy a DD register pair by copying the individual sub-registers. 01605 if (AArch64::DDRegClass.contains(DestReg) && 01606 AArch64::DDRegClass.contains(SrcReg)) { 01607 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 }; 01608 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 01609 Indices); 01610 return; 01611 } 01612 01613 // Copy a QQQQ register quad by copying the individual sub-registers. 01614 if (AArch64::QQQQRegClass.contains(DestReg) && 01615 AArch64::QQQQRegClass.contains(SrcReg)) { 01616 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, 01617 AArch64::qsub2, AArch64::qsub3 }; 01618 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 01619 Indices); 01620 return; 01621 } 01622 01623 // Copy a QQQ register triple by copying the individual sub-registers. 01624 if (AArch64::QQQRegClass.contains(DestReg) && 01625 AArch64::QQQRegClass.contains(SrcReg)) { 01626 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, 01627 AArch64::qsub2 }; 01628 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 01629 Indices); 01630 return; 01631 } 01632 01633 // Copy a QQ register pair by copying the individual sub-registers. 01634 if (AArch64::QQRegClass.contains(DestReg) && 01635 AArch64::QQRegClass.contains(SrcReg)) { 01636 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 }; 01637 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 01638 Indices); 01639 return; 01640 } 01641 01642 if (AArch64::FPR128RegClass.contains(DestReg) && 01643 AArch64::FPR128RegClass.contains(SrcReg)) { 01644 if(Subtarget.hasNEON()) { 01645 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 01646 .addReg(SrcReg) 01647 .addReg(SrcReg, getKillRegState(KillSrc)); 01648 } else { 01649 BuildMI(MBB, I, DL, get(AArch64::STRQpre)) 01650 .addReg(AArch64::SP, RegState::Define) 01651 .addReg(SrcReg, getKillRegState(KillSrc)) 01652 .addReg(AArch64::SP) 01653 .addImm(-16); 01654 BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) 01655 .addReg(AArch64::SP, RegState::Define) 01656 .addReg(DestReg, RegState::Define) 01657 .addReg(AArch64::SP) 01658 .addImm(16); 01659 } 01660 return; 01661 } 01662 01663 if (AArch64::FPR64RegClass.contains(DestReg) && 01664 AArch64::FPR64RegClass.contains(SrcReg)) { 01665 if(Subtarget.hasNEON()) { 01666 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, 01667 &AArch64::FPR128RegClass); 01668 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, 01669 &AArch64::FPR128RegClass); 01670 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 01671 .addReg(SrcReg) 01672 .addReg(SrcReg, getKillRegState(KillSrc)); 01673 } else { 01674 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) 01675 .addReg(SrcReg, getKillRegState(KillSrc)); 01676 } 01677 return; 01678 } 01679 01680 if (AArch64::FPR32RegClass.contains(DestReg) && 01681 AArch64::FPR32RegClass.contains(SrcReg)) { 01682 if(Subtarget.hasNEON()) { 01683 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, 01684 &AArch64::FPR128RegClass); 01685 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, 01686 &AArch64::FPR128RegClass); 01687 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 01688 .addReg(SrcReg) 01689 .addReg(SrcReg, getKillRegState(KillSrc)); 01690 } else { 01691 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 01692 .addReg(SrcReg, getKillRegState(KillSrc)); 01693 } 01694 return; 01695 } 01696 01697 if (AArch64::FPR16RegClass.contains(DestReg) && 01698 AArch64::FPR16RegClass.contains(SrcReg)) { 01699 if(Subtarget.hasNEON()) { 01700 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 01701 &AArch64::FPR128RegClass); 01702 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 01703 &AArch64::FPR128RegClass); 01704 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 01705 .addReg(SrcReg) 01706 .addReg(SrcReg, getKillRegState(KillSrc)); 01707 } else { 01708 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 01709 &AArch64::FPR32RegClass); 01710 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 01711 &AArch64::FPR32RegClass); 01712 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 01713 .addReg(SrcReg, getKillRegState(KillSrc)); 01714 } 01715 return; 01716 } 01717 01718 if (AArch64::FPR8RegClass.contains(DestReg) && 01719 AArch64::FPR8RegClass.contains(SrcReg)) { 01720 if(Subtarget.hasNEON()) { 01721 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 01722 &AArch64::FPR128RegClass); 01723 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 01724 &AArch64::FPR128RegClass); 01725 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 01726 .addReg(SrcReg) 01727 .addReg(SrcReg, getKillRegState(KillSrc)); 01728 } else { 01729 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 01730 &AArch64::FPR32RegClass); 01731 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 01732 &AArch64::FPR32RegClass); 01733 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 01734 .addReg(SrcReg, getKillRegState(KillSrc)); 01735 } 01736 return; 01737 } 01738 01739 // Copies between GPR64 and FPR64. 01740 if (AArch64::FPR64RegClass.contains(DestReg) && 01741 AArch64::GPR64RegClass.contains(SrcReg)) { 01742 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) 01743 .addReg(SrcReg, getKillRegState(KillSrc)); 01744 return; 01745 } 01746 if (AArch64::GPR64RegClass.contains(DestReg) && 01747 AArch64::FPR64RegClass.contains(SrcReg)) { 01748 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) 01749 .addReg(SrcReg, getKillRegState(KillSrc)); 01750 return; 01751 } 01752 // Copies between GPR32 and FPR32. 01753 if (AArch64::FPR32RegClass.contains(DestReg) && 01754 AArch64::GPR32RegClass.contains(SrcReg)) { 01755 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) 01756 .addReg(SrcReg, getKillRegState(KillSrc)); 01757 return; 01758 } 01759 if (AArch64::GPR32RegClass.contains(DestReg) && 01760 AArch64::FPR32RegClass.contains(SrcReg)) { 01761 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) 01762 .addReg(SrcReg, getKillRegState(KillSrc)); 01763 return; 01764 } 01765 01766 if (DestReg == AArch64::NZCV) { 01767 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); 01768 BuildMI(MBB, I, DL, get(AArch64::MSR)) 01769 .addImm(AArch64SysReg::NZCV) 01770 .addReg(SrcReg, getKillRegState(KillSrc)) 01771 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); 01772 return; 01773 } 01774 01775 if (SrcReg == AArch64::NZCV) { 01776 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); 01777 BuildMI(MBB, I, DL, get(AArch64::MRS)) 01778 .addReg(DestReg) 01779 .addImm(AArch64SysReg::NZCV) 01780 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); 01781 return; 01782 } 01783 01784 llvm_unreachable("unimplemented reg-to-reg copy"); 01785 } 01786 01787 void AArch64InstrInfo::storeRegToStackSlot( 01788 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, 01789 bool isKill, int FI, const TargetRegisterClass *RC, 01790 const TargetRegisterInfo *TRI) const { 01791 DebugLoc DL; 01792 if (MBBI != MBB.end()) 01793 DL = MBBI->getDebugLoc(); 01794 MachineFunction &MF = *MBB.getParent(); 01795 MachineFrameInfo &MFI = *MF.getFrameInfo(); 01796 unsigned Align = MFI.getObjectAlignment(FI); 01797 01798 MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); 01799 MachineMemOperand *MMO = MF.getMachineMemOperand( 01800 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); 01801 unsigned Opc = 0; 01802 bool Offset = true; 01803 switch (RC->getSize()) { 01804 case 1: 01805 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 01806 Opc = AArch64::STRBui; 01807 break; 01808 case 2: 01809 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 01810 Opc = AArch64::STRHui; 01811 break; 01812 case 4: 01813 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 01814 Opc = AArch64::STRWui; 01815 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 01816 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 01817 else 01818 assert(SrcReg != AArch64::WSP); 01819 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 01820 Opc = AArch64::STRSui; 01821 break; 01822 case 8: 01823 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 01824 Opc = AArch64::STRXui; 01825 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 01826 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 01827 else 01828 assert(SrcReg != AArch64::SP); 01829 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 01830 Opc = AArch64::STRDui; 01831 break; 01832 case 16: 01833 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 01834 Opc = AArch64::STRQui; 01835 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 01836 assert(Subtarget.hasNEON() && 01837 "Unexpected register store without NEON"); 01838 Opc = AArch64::ST1Twov1d, Offset = false; 01839 } 01840 break; 01841 case 24: 01842 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 01843 assert(Subtarget.hasNEON() && 01844 "Unexpected register store without NEON"); 01845 Opc = AArch64::ST1Threev1d, Offset = false; 01846 } 01847 break; 01848 case 32: 01849 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 01850 assert(Subtarget.hasNEON() && 01851 "Unexpected register store without NEON"); 01852 Opc = AArch64::ST1Fourv1d, Offset = false; 01853 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 01854 assert(Subtarget.hasNEON() && 01855 "Unexpected register store without NEON"); 01856 Opc = AArch64::ST1Twov2d, Offset = false; 01857 } 01858 break; 01859 case 48: 01860 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 01861 assert(Subtarget.hasNEON() && 01862 "Unexpected register store without NEON"); 01863 Opc = AArch64::ST1Threev2d, Offset = false; 01864 } 01865 break; 01866 case 64: 01867 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 01868 assert(Subtarget.hasNEON() && 01869 "Unexpected register store without NEON"); 01870 Opc = AArch64::ST1Fourv2d, Offset = false; 01871 } 01872 break; 01873 } 01874 assert(Opc && "Unknown register class"); 01875 01876 const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) 01877 .addReg(SrcReg, getKillRegState(isKill)) 01878 .addFrameIndex(FI); 01879 01880 if (Offset) 01881 MI.addImm(0); 01882 MI.addMemOperand(MMO); 01883 } 01884 01885 void AArch64InstrInfo::loadRegFromStackSlot( 01886 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, 01887 int FI, const TargetRegisterClass *RC, 01888 const TargetRegisterInfo *TRI) const { 01889 DebugLoc DL; 01890 if (MBBI != MBB.end()) 01891 DL = MBBI->getDebugLoc(); 01892 MachineFunction &MF = *MBB.getParent(); 01893 MachineFrameInfo &MFI = *MF.getFrameInfo(); 01894 unsigned Align = MFI.getObjectAlignment(FI); 01895 MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); 01896 MachineMemOperand *MMO = MF.getMachineMemOperand( 01897 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); 01898 01899 unsigned Opc = 0; 01900 bool Offset = true; 01901 switch (RC->getSize()) { 01902 case 1: 01903 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 01904 Opc = AArch64::LDRBui; 01905 break; 01906 case 2: 01907 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 01908 Opc = AArch64::LDRHui; 01909 break; 01910 case 4: 01911 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 01912 Opc = AArch64::LDRWui; 01913 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 01914 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); 01915 else 01916 assert(DestReg != AArch64::WSP); 01917 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 01918 Opc = AArch64::LDRSui; 01919 break; 01920 case 8: 01921 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 01922 Opc = AArch64::LDRXui; 01923 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 01924 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); 01925 else 01926 assert(DestReg != AArch64::SP); 01927 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 01928 Opc = AArch64::LDRDui; 01929 break; 01930 case 16: 01931 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 01932 Opc = AArch64::LDRQui; 01933 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 01934 assert(Subtarget.hasNEON() && 01935 "Unexpected register load without NEON"); 01936 Opc = AArch64::LD1Twov1d, Offset = false; 01937 } 01938 break; 01939 case 24: 01940 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 01941 assert(Subtarget.hasNEON() && 01942 "Unexpected register load without NEON"); 01943 Opc = AArch64::LD1Threev1d, Offset = false; 01944 } 01945 break; 01946 case 32: 01947 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 01948 assert(Subtarget.hasNEON() && 01949 "Unexpected register load without NEON"); 01950 Opc = AArch64::LD1Fourv1d, Offset = false; 01951 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 01952 assert(Subtarget.hasNEON() && 01953 "Unexpected register load without NEON"); 01954 Opc = AArch64::LD1Twov2d, Offset = false; 01955 } 01956 break; 01957 case 48: 01958 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 01959 assert(Subtarget.hasNEON() && 01960 "Unexpected register load without NEON"); 01961 Opc = AArch64::LD1Threev2d, Offset = false; 01962 } 01963 break; 01964 case 64: 01965 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 01966 assert(Subtarget.hasNEON() && 01967 "Unexpected register load without NEON"); 01968 Opc = AArch64::LD1Fourv2d, Offset = false; 01969 } 01970 break; 01971 } 01972 assert(Opc && "Unknown register class"); 01973 01974 const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) 01975 .addReg(DestReg, getDefRegState(true)) 01976 .addFrameIndex(FI); 01977 if (Offset) 01978 MI.addImm(0); 01979 MI.addMemOperand(MMO); 01980 } 01981 01982 void llvm::emitFrameOffset(MachineBasicBlock &MBB, 01983 MachineBasicBlock::iterator MBBI, DebugLoc DL, 01984 unsigned DestReg, unsigned SrcReg, int Offset, 01985 const TargetInstrInfo *TII, 01986 MachineInstr::MIFlag Flag, bool SetNZCV) { 01987 if (DestReg == SrcReg && Offset == 0) 01988 return; 01989 01990 bool isSub = Offset < 0; 01991 if (isSub) 01992 Offset = -Offset; 01993 01994 // FIXME: If the offset won't fit in 24-bits, compute the offset into a 01995 // scratch register. If DestReg is a virtual register, use it as the 01996 // scratch register; otherwise, create a new virtual register (to be 01997 // replaced by the scavenger at the end of PEI). That case can be optimized 01998 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch 01999 // register can be loaded with offset%8 and the add/sub can use an extending 02000 // instruction with LSL#3. 02001 // Currently the function handles any offsets but generates a poor sequence 02002 // of code. 02003 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); 02004 02005 unsigned Opc; 02006 if (SetNZCV) 02007 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri; 02008 else 02009 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri; 02010 const unsigned MaxEncoding = 0xfff; 02011 const unsigned ShiftSize = 12; 02012 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; 02013 while (((unsigned)Offset) >= (1 << ShiftSize)) { 02014 unsigned ThisVal; 02015 if (((unsigned)Offset) > MaxEncodableValue) { 02016 ThisVal = MaxEncodableValue; 02017 } else { 02018 ThisVal = Offset & MaxEncodableValue; 02019 } 02020 assert((ThisVal >> ShiftSize) <= MaxEncoding && 02021 "Encoding cannot handle value that big"); 02022 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 02023 .addReg(SrcReg) 02024 .addImm(ThisVal >> ShiftSize) 02025 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize)) 02026 .setMIFlag(Flag); 02027 02028 SrcReg = DestReg; 02029 Offset -= ThisVal; 02030 if (Offset == 0) 02031 return; 02032 } 02033 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 02034 .addReg(SrcReg) 02035 .addImm(Offset) 02036 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 02037 .setMIFlag(Flag); 02038 } 02039 02040 MachineInstr * 02041 AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, 02042 const SmallVectorImpl<unsigned> &Ops, 02043 int FrameIndex) const { 02044 // This is a bit of a hack. Consider this instruction: 02045 // 02046 // %vreg0<def> = COPY %SP; GPR64all:%vreg0 02047 // 02048 // We explicitly chose GPR64all for the virtual register so such a copy might 02049 // be eliminated by RegisterCoalescer. However, that may not be possible, and 02050 // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all 02051 // register class, TargetInstrInfo::foldMemoryOperand() is going to try. 02052 // 02053 // To prevent that, we are going to constrain the %vreg0 register class here. 02054 // 02055 // <rdar://problem/11522048> 02056 // 02057 if (MI->isCopy()) { 02058 unsigned DstReg = MI->getOperand(0).getReg(); 02059 unsigned SrcReg = MI->getOperand(1).getReg(); 02060 if (SrcReg == AArch64::SP && 02061 TargetRegisterInfo::isVirtualRegister(DstReg)) { 02062 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); 02063 return nullptr; 02064 } 02065 if (DstReg == AArch64::SP && 02066 TargetRegisterInfo::isVirtualRegister(SrcReg)) { 02067 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 02068 return nullptr; 02069 } 02070 } 02071 02072 // Cannot fold. 02073 return nullptr; 02074 } 02075 02076 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, 02077 bool *OutUseUnscaledOp, 02078 unsigned *OutUnscaledOp, 02079 int *EmittableOffset) { 02080 int Scale = 1; 02081 bool IsSigned = false; 02082 // The ImmIdx should be changed case by case if it is not 2. 02083 unsigned ImmIdx = 2; 02084 unsigned UnscaledOp = 0; 02085 // Set output values in case of early exit. 02086 if (EmittableOffset) 02087 *EmittableOffset = 0; 02088 if (OutUseUnscaledOp) 02089 *OutUseUnscaledOp = false; 02090 if (OutUnscaledOp) 02091 *OutUnscaledOp = 0; 02092 switch (MI.getOpcode()) { 02093 default: 02094 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex"); 02095 // Vector spills/fills can't take an immediate offset. 02096 case AArch64::LD1Twov2d: 02097 case AArch64::LD1Threev2d: 02098 case AArch64::LD1Fourv2d: 02099 case AArch64::LD1Twov1d: 02100 case AArch64::LD1Threev1d: 02101 case AArch64::LD1Fourv1d: 02102 case AArch64::ST1Twov2d: 02103 case AArch64::ST1Threev2d: 02104 case AArch64::ST1Fourv2d: 02105 case AArch64::ST1Twov1d: 02106 case AArch64::ST1Threev1d: 02107 case AArch64::ST1Fourv1d: 02108 return AArch64FrameOffsetCannotUpdate; 02109 case AArch64::PRFMui: 02110 Scale = 8; 02111 UnscaledOp = AArch64::PRFUMi; 02112 break; 02113 case AArch64::LDRXui: 02114 Scale = 8; 02115 UnscaledOp = AArch64::LDURXi; 02116 break; 02117 case AArch64::LDRWui: 02118 Scale = 4; 02119 UnscaledOp = AArch64::LDURWi; 02120 break; 02121 case AArch64::LDRBui: 02122 Scale = 1; 02123 UnscaledOp = AArch64::LDURBi; 02124 break; 02125 case AArch64::LDRHui: 02126 Scale = 2; 02127 UnscaledOp = AArch64::LDURHi; 02128 break; 02129 case AArch64::LDRSui: 02130 Scale = 4; 02131 UnscaledOp = AArch64::LDURSi; 02132 break; 02133 case AArch64::LDRDui: 02134 Scale = 8; 02135 UnscaledOp = AArch64::LDURDi; 02136 break; 02137 case AArch64::LDRQui: 02138 Scale = 16; 02139 UnscaledOp = AArch64::LDURQi; 02140 break; 02141 case AArch64::LDRBBui: 02142 Scale = 1; 02143 UnscaledOp = AArch64::LDURBBi; 02144 break; 02145 case AArch64::LDRHHui: 02146 Scale = 2; 02147 UnscaledOp = AArch64::LDURHHi; 02148 break; 02149 case AArch64::LDRSBXui: 02150 Scale = 1; 02151 UnscaledOp = AArch64::LDURSBXi; 02152 break; 02153 case AArch64::LDRSBWui: 02154 Scale = 1; 02155 UnscaledOp = AArch64::LDURSBWi; 02156 break; 02157 case AArch64::LDRSHXui: 02158 Scale = 2; 02159 UnscaledOp = AArch64::LDURSHXi; 02160 break; 02161 case AArch64::LDRSHWui: 02162 Scale = 2; 02163 UnscaledOp = AArch64::LDURSHWi; 02164 break; 02165 case AArch64::LDRSWui: 02166 Scale = 4; 02167 UnscaledOp = AArch64::LDURSWi; 02168 break; 02169 02170 case AArch64::STRXui: 02171 Scale = 8; 02172 UnscaledOp = AArch64::STURXi; 02173 break; 02174 case AArch64::STRWui: 02175 Scale = 4; 02176 UnscaledOp = AArch64::STURWi; 02177 break; 02178 case AArch64::STRBui: 02179 Scale = 1; 02180 UnscaledOp = AArch64::STURBi; 02181 break; 02182 case AArch64::STRHui: 02183 Scale = 2; 02184 UnscaledOp = AArch64::STURHi; 02185 break; 02186 case AArch64::STRSui: 02187 Scale = 4; 02188 UnscaledOp = AArch64::STURSi; 02189 break; 02190 case AArch64::STRDui: 02191 Scale = 8; 02192 UnscaledOp = AArch64::STURDi; 02193 break; 02194 case AArch64::STRQui: 02195 Scale = 16; 02196 UnscaledOp = AArch64::STURQi; 02197 break; 02198 case AArch64::STRBBui: 02199 Scale = 1; 02200 UnscaledOp = AArch64::STURBBi; 02201 break; 02202 case AArch64::STRHHui: 02203 Scale = 2; 02204 UnscaledOp = AArch64::STURHHi; 02205 break; 02206 02207 case AArch64::LDPXi: 02208 case AArch64::LDPDi: 02209 case AArch64::STPXi: 02210 case AArch64::STPDi: 02211 IsSigned = true; 02212 Scale = 8; 02213 break; 02214 case AArch64::LDPQi: 02215 case AArch64::STPQi: 02216 IsSigned = true; 02217 Scale = 16; 02218 break; 02219 case AArch64::LDPWi: 02220 case AArch64::LDPSi: 02221 case AArch64::STPWi: 02222 case AArch64::STPSi: 02223 IsSigned = true; 02224 Scale = 4; 02225 break; 02226 02227 case AArch64::LDURXi: 02228 case AArch64::LDURWi: 02229 case AArch64::LDURBi: 02230 case AArch64::LDURHi: 02231 case AArch64::LDURSi: 02232 case AArch64::LDURDi: 02233 case AArch64::LDURQi: 02234 case AArch64::LDURHHi: 02235 case AArch64::LDURBBi: 02236 case AArch64::LDURSBXi: 02237 case AArch64::LDURSBWi: 02238 case AArch64::LDURSHXi: 02239 case AArch64::LDURSHWi: 02240 case AArch64::LDURSWi: 02241 case AArch64::STURXi: 02242 case AArch64::STURWi: 02243 case AArch64::STURBi: 02244 case AArch64::STURHi: 02245 case AArch64::STURSi: 02246 case AArch64::STURDi: 02247 case AArch64::STURQi: 02248 case AArch64::STURBBi: 02249 case AArch64::STURHHi: 02250 Scale = 1; 02251 break; 02252 } 02253 02254 Offset += MI.getOperand(ImmIdx).getImm() * Scale; 02255 02256 bool useUnscaledOp = false; 02257 // If the offset doesn't match the scale, we rewrite the instruction to 02258 // use the unscaled instruction instead. Likewise, if we have a negative 02259 // offset (and have an unscaled op to use). 02260 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0)) 02261 useUnscaledOp = true; 02262 02263 // Use an unscaled addressing mode if the instruction has a negative offset 02264 // (or if the instruction is already using an unscaled addressing mode). 02265 unsigned MaskBits; 02266 if (IsSigned) { 02267 // ldp/stp instructions. 02268 MaskBits = 7; 02269 Offset /= Scale; 02270 } else if (UnscaledOp == 0 || useUnscaledOp) { 02271 MaskBits = 9; 02272 IsSigned = true; 02273 Scale = 1; 02274 } else { 02275 MaskBits = 12; 02276 IsSigned = false; 02277 Offset /= Scale; 02278 } 02279 02280 // Attempt to fold address computation. 02281 int MaxOff = (1 << (MaskBits - IsSigned)) - 1; 02282 int MinOff = (IsSigned ? (-MaxOff - 1) : 0); 02283 if (Offset >= MinOff && Offset <= MaxOff) { 02284 if (EmittableOffset) 02285 *EmittableOffset = Offset; 02286 Offset = 0; 02287 } else { 02288 int NewOff = Offset < 0 ? MinOff : MaxOff; 02289 if (EmittableOffset) 02290 *EmittableOffset = NewOff; 02291 Offset = (Offset - NewOff) * Scale; 02292 } 02293 if (OutUseUnscaledOp) 02294 *OutUseUnscaledOp = useUnscaledOp; 02295 if (OutUnscaledOp) 02296 *OutUnscaledOp = UnscaledOp; 02297 return AArch64FrameOffsetCanUpdate | 02298 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0); 02299 } 02300 02301 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 02302 unsigned FrameReg, int &Offset, 02303 const AArch64InstrInfo *TII) { 02304 unsigned Opcode = MI.getOpcode(); 02305 unsigned ImmIdx = FrameRegIdx + 1; 02306 02307 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { 02308 Offset += MI.getOperand(ImmIdx).getImm(); 02309 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), 02310 MI.getOperand(0).getReg(), FrameReg, Offset, TII, 02311 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); 02312 MI.eraseFromParent(); 02313 Offset = 0; 02314 return true; 02315 } 02316 02317 int NewOffset; 02318 unsigned UnscaledOp; 02319 bool UseUnscaledOp; 02320 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, 02321 &UnscaledOp, &NewOffset); 02322 if (Status & AArch64FrameOffsetCanUpdate) { 02323 if (Status & AArch64FrameOffsetIsLegal) 02324 // Replace the FrameIndex with FrameReg. 02325 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 02326 if (UseUnscaledOp) 02327 MI.setDesc(TII->get(UnscaledOp)); 02328 02329 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); 02330 return Offset == 0; 02331 } 02332 02333 return false; 02334 } 02335 02336 void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { 02337 NopInst.setOpcode(AArch64::HINT); 02338 NopInst.addOperand(MCOperand::CreateImm(0)); 02339 } 02340 /// useMachineCombiner - return true when a target supports MachineCombiner 02341 bool AArch64InstrInfo::useMachineCombiner() const { 02342 // AArch64 supports the combiner 02343 return true; 02344 } 02345 // 02346 // True when Opc sets flag 02347 static bool isCombineInstrSettingFlag(unsigned Opc) { 02348 switch (Opc) { 02349 case AArch64::ADDSWrr: 02350 case AArch64::ADDSWri: 02351 case AArch64::ADDSXrr: 02352 case AArch64::ADDSXri: 02353 case AArch64::SUBSWrr: 02354 case AArch64::SUBSXrr: 02355 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 02356 case AArch64::SUBSWri: 02357 case AArch64::SUBSXri: 02358 return true; 02359 default: 02360 break; 02361 } 02362 return false; 02363 } 02364 // 02365 // 32b Opcodes that can be combined with a MUL 02366 static bool isCombineInstrCandidate32(unsigned Opc) { 02367 switch (Opc) { 02368 case AArch64::ADDWrr: 02369 case AArch64::ADDWri: 02370 case AArch64::SUBWrr: 02371 case AArch64::ADDSWrr: 02372 case AArch64::ADDSWri: 02373 case AArch64::SUBSWrr: 02374 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 02375 case AArch64::SUBWri: 02376 case AArch64::SUBSWri: 02377 return true; 02378 default: 02379 break; 02380 } 02381 return false; 02382 } 02383 // 02384 // 64b Opcodes that can be combined with a MUL 02385 static bool isCombineInstrCandidate64(unsigned Opc) { 02386 switch (Opc) { 02387 case AArch64::ADDXrr: 02388 case AArch64::ADDXri: 02389 case AArch64::SUBXrr: 02390 case AArch64::ADDSXrr: 02391 case AArch64::ADDSXri: 02392 case AArch64::SUBSXrr: 02393 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 02394 case AArch64::SUBXri: 02395 case AArch64::SUBSXri: 02396 return true; 02397 default: 02398 break; 02399 } 02400 return false; 02401 } 02402 // 02403 // Opcodes that can be combined with a MUL 02404 static bool isCombineInstrCandidate(unsigned Opc) { 02405 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc)); 02406 } 02407 02408 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, 02409 unsigned MulOpc, unsigned ZeroReg) { 02410 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 02411 MachineInstr *MI = nullptr; 02412 // We need a virtual register definition. 02413 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) 02414 MI = MRI.getUniqueVRegDef(MO.getReg()); 02415 // And it needs to be in the trace (otherwise, it won't have a depth). 02416 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != MulOpc) 02417 return false; 02418 02419 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && 02420 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && 02421 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs"); 02422 02423 // The third input reg must be zero. 02424 if (MI->getOperand(3).getReg() != ZeroReg) 02425 return false; 02426 02427 // Must only used by the user we combine with. 02428 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 02429 return false; 02430 02431 return true; 02432 } 02433 02434 /// hasPattern - return true when there is potentially a faster code sequence 02435 /// for an instruction chain ending in \p Root. All potential patterns are 02436 /// listed 02437 /// in the \p Pattern vector. Pattern should be sorted in priority order since 02438 /// the pattern evaluator stops checking as soon as it finds a faster sequence. 02439 02440 bool AArch64InstrInfo::hasPattern( 02441 MachineInstr &Root, 02442 SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern) const { 02443 unsigned Opc = Root.getOpcode(); 02444 MachineBasicBlock &MBB = *Root.getParent(); 02445 bool Found = false; 02446 02447 if (!isCombineInstrCandidate(Opc)) 02448 return 0; 02449 if (isCombineInstrSettingFlag(Opc)) { 02450 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true); 02451 // When NZCV is live bail out. 02452 if (Cmp_NZCV == -1) 02453 return 0; 02454 unsigned NewOpc = convertFlagSettingOpcode(&Root); 02455 // When opcode can't change bail out. 02456 // CHECKME: do we miss any cases for opcode conversion? 02457 if (NewOpc == Opc) 02458 return 0; 02459 Opc = NewOpc; 02460 } 02461 02462 switch (Opc) { 02463 default: 02464 break; 02465 case AArch64::ADDWrr: 02466 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 02467 "ADDWrr does not have register operands"); 02468 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 02469 AArch64::WZR)) { 02470 Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP1); 02471 Found = true; 02472 } 02473 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, 02474 AArch64::WZR)) { 02475 Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP2); 02476 Found = true; 02477 } 02478 break; 02479 case AArch64::ADDXrr: 02480 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 02481 AArch64::XZR)) { 02482 Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP1); 02483 Found = true; 02484 } 02485 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, 02486 AArch64::XZR)) { 02487 Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP2); 02488 Found = true; 02489 } 02490 break; 02491 case AArch64::SUBWrr: 02492 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 02493 AArch64::WZR)) { 02494 Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP1); 02495 Found = true; 02496 } 02497 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, 02498 AArch64::WZR)) { 02499 Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP2); 02500 Found = true; 02501 } 02502 break; 02503 case AArch64::SUBXrr: 02504 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 02505 AArch64::XZR)) { 02506 Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP1); 02507 Found = true; 02508 } 02509 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, 02510 AArch64::XZR)) { 02511 Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP2); 02512 Found = true; 02513 } 02514 break; 02515 case AArch64::ADDWri: 02516 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 02517 AArch64::WZR)) { 02518 Pattern.push_back(MachineCombinerPattern::MC_MULADDWI_OP1); 02519 Found = true; 02520 } 02521 break; 02522 case AArch64::ADDXri: 02523 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 02524 AArch64::XZR)) { 02525 Pattern.push_back(MachineCombinerPattern::MC_MULADDXI_OP1); 02526 Found = true; 02527 } 02528 break; 02529 case AArch64::SUBWri: 02530 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 02531 AArch64::WZR)) { 02532 Pattern.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1); 02533 Found = true; 02534 } 02535 break; 02536 case AArch64::SUBXri: 02537 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 02538 AArch64::XZR)) { 02539 Pattern.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1); 02540 Found = true; 02541 } 02542 break; 02543 } 02544 return Found; 02545 } 02546 02547 /// genMadd - Generate madd instruction and combine mul and add. 02548 /// Example: 02549 /// MUL I=A,B,0 02550 /// ADD R,I,C 02551 /// ==> MADD R,A,B,C 02552 /// \param Root is the ADD instruction 02553 /// \param [out] InsInstrs is a vector of machine instructions and will 02554 /// contain the generated madd instruction 02555 /// \param IdxMulOpd is index of operand in Root that is the result of 02556 /// the MUL. In the example above IdxMulOpd is 1. 02557 /// \param MaddOpc the opcode fo the madd instruction 02558 static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo &MRI, 02559 const TargetInstrInfo *TII, MachineInstr &Root, 02560 SmallVectorImpl<MachineInstr *> &InsInstrs, 02561 unsigned IdxMulOpd, unsigned MaddOpc, 02562 const TargetRegisterClass *RC) { 02563 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 02564 02565 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; 02566 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 02567 unsigned ResultReg = Root.getOperand(0).getReg(); 02568 unsigned SrcReg0 = MUL->getOperand(1).getReg(); 02569 bool Src0IsKill = MUL->getOperand(1).isKill(); 02570 unsigned SrcReg1 = MUL->getOperand(2).getReg(); 02571 bool Src1IsKill = MUL->getOperand(2).isKill(); 02572 unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); 02573 bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); 02574 02575 if (TargetRegisterInfo::isVirtualRegister(ResultReg)) 02576 MRI.constrainRegClass(ResultReg, RC); 02577 if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) 02578 MRI.constrainRegClass(SrcReg0, RC); 02579 if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) 02580 MRI.constrainRegClass(SrcReg1, RC); 02581 if (TargetRegisterInfo::isVirtualRegister(SrcReg2)) 02582 MRI.constrainRegClass(SrcReg2, RC); 02583 02584 MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), 02585 ResultReg) 02586 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 02587 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 02588 .addReg(SrcReg2, getKillRegState(Src2IsKill)); 02589 // Insert the MADD 02590 InsInstrs.push_back(MIB); 02591 return MUL; 02592 } 02593 02594 /// genMaddR - Generate madd instruction and combine mul and add using 02595 /// an extra virtual register 02596 /// Example - an ADD intermediate needs to be stored in a register: 02597 /// MUL I=A,B,0 02598 /// ADD R,I,Imm 02599 /// ==> ORR V, ZR, Imm 02600 /// ==> MADD R,A,B,V 02601 /// \param Root is the ADD instruction 02602 /// \param [out] InsInstrs is a vector of machine instructions and will 02603 /// contain the generated madd instruction 02604 /// \param IdxMulOpd is index of operand in Root that is the result of 02605 /// the MUL. In the example above IdxMulOpd is 1. 02606 /// \param MaddOpc the opcode fo the madd instruction 02607 /// \param VR is a virtual register that holds the value of an ADD operand 02608 /// (V in the example above). 02609 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, 02610 const TargetInstrInfo *TII, MachineInstr &Root, 02611 SmallVectorImpl<MachineInstr *> &InsInstrs, 02612 unsigned IdxMulOpd, unsigned MaddOpc, 02613 unsigned VR, const TargetRegisterClass *RC) { 02614 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 02615 02616 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 02617 unsigned ResultReg = Root.getOperand(0).getReg(); 02618 unsigned SrcReg0 = MUL->getOperand(1).getReg(); 02619 bool Src0IsKill = MUL->getOperand(1).isKill(); 02620 unsigned SrcReg1 = MUL->getOperand(2).getReg(); 02621 bool Src1IsKill = MUL->getOperand(2).isKill(); 02622 02623 if (TargetRegisterInfo::isVirtualRegister(ResultReg)) 02624 MRI.constrainRegClass(ResultReg, RC); 02625 if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) 02626 MRI.constrainRegClass(SrcReg0, RC); 02627 if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) 02628 MRI.constrainRegClass(SrcReg1, RC); 02629 if (TargetRegisterInfo::isVirtualRegister(VR)) 02630 MRI.constrainRegClass(VR, RC); 02631 02632 MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), 02633 ResultReg) 02634 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 02635 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 02636 .addReg(VR); 02637 // Insert the MADD 02638 InsInstrs.push_back(MIB); 02639 return MUL; 02640 } 02641 02642 /// genAlternativeCodeSequence - when hasPattern() finds a pattern 02643 /// this function generates the instructions that could replace the 02644 /// original code sequence 02645 void AArch64InstrInfo::genAlternativeCodeSequence( 02646 MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern, 02647 SmallVectorImpl<MachineInstr *> &InsInstrs, 02648 SmallVectorImpl<MachineInstr *> &DelInstrs, 02649 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 02650 MachineBasicBlock &MBB = *Root.getParent(); 02651 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 02652 MachineFunction &MF = *MBB.getParent(); 02653 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 02654 02655 MachineInstr *MUL; 02656 const TargetRegisterClass *RC; 02657 unsigned Opc; 02658 switch (Pattern) { 02659 default: 02660 // signal error. 02661 break; 02662 case MachineCombinerPattern::MC_MULADDW_OP1: 02663 case MachineCombinerPattern::MC_MULADDX_OP1: 02664 // MUL I=A,B,0 02665 // ADD R,I,C 02666 // ==> MADD R,A,B,C 02667 // --- Create(MADD); 02668 if (Pattern == MachineCombinerPattern::MC_MULADDW_OP1) { 02669 Opc = AArch64::MADDWrrr; 02670 RC = &AArch64::GPR32RegClass; 02671 } else { 02672 Opc = AArch64::MADDXrrr; 02673 RC = &AArch64::GPR64RegClass; 02674 } 02675 MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 02676 break; 02677 case MachineCombinerPattern::MC_MULADDW_OP2: 02678 case MachineCombinerPattern::MC_MULADDX_OP2: 02679 // MUL I=A,B,0 02680 // ADD R,C,I 02681 // ==> MADD R,A,B,C 02682 // --- Create(MADD); 02683 if (Pattern == MachineCombinerPattern::MC_MULADDW_OP2) { 02684 Opc = AArch64::MADDWrrr; 02685 RC = &AArch64::GPR32RegClass; 02686 } else { 02687 Opc = AArch64::MADDXrrr; 02688 RC = &AArch64::GPR64RegClass; 02689 } 02690 MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 02691 break; 02692 case MachineCombinerPattern::MC_MULADDWI_OP1: 02693 case MachineCombinerPattern::MC_MULADDXI_OP1: { 02694 // MUL I=A,B,0 02695 // ADD R,I,Imm 02696 // ==> ORR V, ZR, Imm 02697 // ==> MADD R,A,B,V 02698 // --- Create(MADD); 02699 const TargetRegisterClass *OrrRC; 02700 unsigned BitSize, OrrOpc, ZeroReg; 02701 if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) { 02702 OrrOpc = AArch64::ORRWri; 02703 OrrRC = &AArch64::GPR32spRegClass; 02704 BitSize = 32; 02705 ZeroReg = AArch64::WZR; 02706 Opc = AArch64::MADDWrrr; 02707 RC = &AArch64::GPR32RegClass; 02708 } else { 02709 OrrOpc = AArch64::ORRXri; 02710 OrrRC = &AArch64::GPR64spRegClass; 02711 BitSize = 64; 02712 ZeroReg = AArch64::XZR; 02713 Opc = AArch64::MADDXrrr; 02714 RC = &AArch64::GPR64RegClass; 02715 } 02716 unsigned NewVR = MRI.createVirtualRegister(OrrRC); 02717 uint64_t Imm = Root.getOperand(2).getImm(); 02718 02719 if (Root.getOperand(3).isImm()) { 02720 unsigned Val = Root.getOperand(3).getImm(); 02721 Imm = Imm << Val; 02722 } 02723 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 02724 uint64_t Encoding; 02725 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 02726 MachineInstrBuilder MIB1 = 02727 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 02728 .addReg(ZeroReg) 02729 .addImm(Encoding); 02730 InsInstrs.push_back(MIB1); 02731 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 02732 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 02733 } 02734 break; 02735 } 02736 case MachineCombinerPattern::MC_MULSUBW_OP1: 02737 case MachineCombinerPattern::MC_MULSUBX_OP1: { 02738 // MUL I=A,B,0 02739 // SUB R,I, C 02740 // ==> SUB V, 0, C 02741 // ==> MADD R,A,B,V // = -C + A*B 02742 // --- Create(MADD); 02743 const TargetRegisterClass *SubRC; 02744 unsigned SubOpc, ZeroReg; 02745 if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) { 02746 SubOpc = AArch64::SUBWrr; 02747 SubRC = &AArch64::GPR32spRegClass; 02748 ZeroReg = AArch64::WZR; 02749 Opc = AArch64::MADDWrrr; 02750 RC = &AArch64::GPR32RegClass; 02751 } else { 02752 SubOpc = AArch64::SUBXrr; 02753 SubRC = &AArch64::GPR64spRegClass; 02754 ZeroReg = AArch64::XZR; 02755 Opc = AArch64::MADDXrrr; 02756 RC = &AArch64::GPR64RegClass; 02757 } 02758 unsigned NewVR = MRI.createVirtualRegister(SubRC); 02759 // SUB NewVR, 0, C 02760 MachineInstrBuilder MIB1 = 02761 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) 02762 .addReg(ZeroReg) 02763 .addOperand(Root.getOperand(2)); 02764 InsInstrs.push_back(MIB1); 02765 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 02766 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 02767 break; 02768 } 02769 case MachineCombinerPattern::MC_MULSUBW_OP2: 02770 case MachineCombinerPattern::MC_MULSUBX_OP2: 02771 // MUL I=A,B,0 02772 // SUB R,C,I 02773 // ==> MSUB R,A,B,C (computes C - A*B) 02774 // --- Create(MSUB); 02775 if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP2) { 02776 Opc = AArch64::MSUBWrrr; 02777 RC = &AArch64::GPR32RegClass; 02778 } else { 02779 Opc = AArch64::MSUBXrrr; 02780 RC = &AArch64::GPR64RegClass; 02781 } 02782 MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 02783 break; 02784 case MachineCombinerPattern::MC_MULSUBWI_OP1: 02785 case MachineCombinerPattern::MC_MULSUBXI_OP1: { 02786 // MUL I=A,B,0 02787 // SUB R,I, Imm 02788 // ==> ORR V, ZR, -Imm 02789 // ==> MADD R,A,B,V // = -Imm + A*B 02790 // --- Create(MADD); 02791 const TargetRegisterClass *OrrRC; 02792 unsigned BitSize, OrrOpc, ZeroReg; 02793 if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) { 02794 OrrOpc = AArch64::ORRWri; 02795 OrrRC = &AArch64::GPR32spRegClass; 02796 BitSize = 32; 02797 ZeroReg = AArch64::WZR; 02798 Opc = AArch64::MADDWrrr; 02799 RC = &AArch64::GPR32RegClass; 02800 } else { 02801 OrrOpc = AArch64::ORRXri; 02802 OrrRC = &AArch64::GPR64RegClass; 02803 BitSize = 64; 02804 ZeroReg = AArch64::XZR; 02805 Opc = AArch64::MADDXrrr; 02806 RC = &AArch64::GPR64RegClass; 02807 } 02808 unsigned NewVR = MRI.createVirtualRegister(OrrRC); 02809 int Imm = Root.getOperand(2).getImm(); 02810 if (Root.getOperand(3).isImm()) { 02811 unsigned Val = Root.getOperand(3).getImm(); 02812 Imm = Imm << Val; 02813 } 02814 uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize); 02815 uint64_t Encoding; 02816 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 02817 MachineInstrBuilder MIB1 = 02818 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 02819 .addReg(ZeroReg) 02820 .addImm(Encoding); 02821 InsInstrs.push_back(MIB1); 02822 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 02823 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 02824 } 02825 break; 02826 } 02827 } // end switch (Pattern) 02828 // Record MUL and ADD/SUB for deletion 02829 DelInstrs.push_back(MUL); 02830 DelInstrs.push_back(&Root); 02831 02832 return; 02833 }