LLVM API Documentation
00001 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines the AArch64-specific support for the FastISel class. Some 00011 // of the target-specific code is generated by tablegen in the file 00012 // AArch64GenFastISel.inc, which is #included here. 00013 // 00014 //===----------------------------------------------------------------------===// 00015 00016 #include "AArch64.h" 00017 #include "AArch64Subtarget.h" 00018 #include "AArch64TargetMachine.h" 00019 #include "MCTargetDesc/AArch64AddressingModes.h" 00020 #include "llvm/Analysis/BranchProbabilityInfo.h" 00021 #include "llvm/CodeGen/CallingConvLower.h" 00022 #include "llvm/CodeGen/FastISel.h" 00023 #include "llvm/CodeGen/FunctionLoweringInfo.h" 00024 #include "llvm/CodeGen/MachineConstantPool.h" 00025 #include "llvm/CodeGen/MachineFrameInfo.h" 00026 #include "llvm/CodeGen/MachineInstrBuilder.h" 00027 #include "llvm/CodeGen/MachineRegisterInfo.h" 00028 #include "llvm/IR/CallingConv.h" 00029 #include "llvm/IR/DataLayout.h" 00030 #include "llvm/IR/DerivedTypes.h" 00031 #include "llvm/IR/Function.h" 00032 #include "llvm/IR/GetElementPtrTypeIterator.h" 00033 #include "llvm/IR/GlobalAlias.h" 00034 #include "llvm/IR/GlobalVariable.h" 00035 #include "llvm/IR/Instructions.h" 00036 #include "llvm/IR/IntrinsicInst.h" 00037 #include "llvm/IR/Operator.h" 00038 #include "llvm/Support/CommandLine.h" 00039 using namespace llvm; 00040 00041 namespace { 00042 00043 class AArch64FastISel final : public FastISel { 00044 class Address { 00045 public: 00046 typedef enum { 00047 RegBase, 00048 FrameIndexBase 00049 } BaseKind; 00050 00051 private: 00052 BaseKind Kind; 00053 AArch64_AM::ShiftExtendType ExtType; 00054 union { 00055 unsigned Reg; 00056 int FI; 00057 } Base; 00058 unsigned OffsetReg; 00059 unsigned Shift; 00060 int64_t Offset; 00061 const GlobalValue *GV; 00062 00063 public: 00064 Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend), 00065 OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; } 00066 void setKind(BaseKind K) { Kind = K; } 00067 BaseKind getKind() const { return Kind; } 00068 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 00069 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 00070 bool isRegBase() const { return Kind == RegBase; } 00071 bool isFIBase() const { return Kind == FrameIndexBase; } 00072 void setReg(unsigned Reg) { 00073 assert(isRegBase() && "Invalid base register access!"); 00074 Base.Reg = Reg; 00075 } 00076 unsigned getReg() const { 00077 assert(isRegBase() && "Invalid base register access!"); 00078 return Base.Reg; 00079 } 00080 void setOffsetReg(unsigned Reg) { 00081 assert(isRegBase() && "Invalid offset register access!"); 00082 OffsetReg = Reg; 00083 } 00084 unsigned getOffsetReg() const { 00085 assert(isRegBase() && "Invalid offset register access!"); 00086 return OffsetReg; 00087 } 00088 void setFI(unsigned FI) { 00089 assert(isFIBase() && "Invalid base frame index access!"); 00090 Base.FI = FI; 00091 } 00092 unsigned getFI() const { 00093 assert(isFIBase() && "Invalid base frame index access!"); 00094 return Base.FI; 00095 } 00096 void setOffset(int64_t O) { Offset = O; } 00097 int64_t getOffset() { return Offset; } 00098 void setShift(unsigned S) { Shift = S; } 00099 unsigned getShift() { return Shift; } 00100 00101 void setGlobalValue(const GlobalValue *G) { GV = G; } 00102 const GlobalValue *getGlobalValue() { return GV; } 00103 }; 00104 00105 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 00106 /// make the right decision when generating code for different targets. 00107 const AArch64Subtarget *Subtarget; 00108 LLVMContext *Context; 00109 00110 bool fastLowerArguments() override; 00111 bool fastLowerCall(CallLoweringInfo &CLI) override; 00112 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 00113 00114 private: 00115 // Selection routines. 00116 bool selectAddSub(const Instruction *I); 00117 bool selectLogicalOp(const Instruction *I); 00118 bool selectLoad(const Instruction *I); 00119 bool selectStore(const Instruction *I); 00120 bool selectBranch(const Instruction *I); 00121 bool selectIndirectBr(const Instruction *I); 00122 bool selectCmp(const Instruction *I); 00123 bool selectSelect(const Instruction *I); 00124 bool selectFPExt(const Instruction *I); 00125 bool selectFPTrunc(const Instruction *I); 00126 bool selectFPToInt(const Instruction *I, bool Signed); 00127 bool selectIntToFP(const Instruction *I, bool Signed); 00128 bool selectRem(const Instruction *I, unsigned ISDOpcode); 00129 bool selectRet(const Instruction *I); 00130 bool selectTrunc(const Instruction *I); 00131 bool selectIntExt(const Instruction *I); 00132 bool selectMul(const Instruction *I); 00133 bool selectShift(const Instruction *I); 00134 bool selectBitCast(const Instruction *I); 00135 bool selectFRem(const Instruction *I); 00136 bool selectSDiv(const Instruction *I); 00137 00138 // Utility helper routines. 00139 bool isTypeLegal(Type *Ty, MVT &VT); 00140 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 00141 bool isValueAvailable(const Value *V) const; 00142 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 00143 bool computeCallAddress(const Value *V, Address &Addr); 00144 bool simplifyAddress(Address &Addr, MVT VT); 00145 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 00146 unsigned Flags, unsigned ScaleFactor, 00147 MachineMemOperand *MMO); 00148 bool isMemCpySmall(uint64_t Len, unsigned Alignment); 00149 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 00150 unsigned Alignment); 00151 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 00152 const Value *Cond); 00153 00154 // Emit helper routines. 00155 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 00156 const Value *RHS, bool SetFlags = false, 00157 bool WantResult = true, bool IsZExt = false); 00158 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 00159 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 00160 bool SetFlags = false, bool WantResult = true); 00161 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 00162 bool LHSIsKill, uint64_t Imm, bool SetFlags = false, 00163 bool WantResult = true); 00164 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 00165 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 00166 AArch64_AM::ShiftExtendType ShiftType, 00167 uint64_t ShiftImm, bool SetFlags = false, 00168 bool WantResult = true); 00169 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 00170 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 00171 AArch64_AM::ShiftExtendType ExtType, 00172 uint64_t ShiftImm, bool SetFlags = false, 00173 bool WantResult = true); 00174 00175 // Emit functions. 00176 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 00177 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 00178 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 00179 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 00180 bool emitLoad(MVT VT, unsigned &ResultReg, Address Addr, 00181 MachineMemOperand *MMO = nullptr); 00182 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 00183 MachineMemOperand *MMO = nullptr); 00184 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 00185 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 00186 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 00187 bool SetFlags = false, bool WantResult = true, 00188 bool IsZExt = false); 00189 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 00190 bool SetFlags = false, bool WantResult = true, 00191 bool IsZExt = false); 00192 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 00193 unsigned RHSReg, bool RHSIsKill, bool WantResult = true); 00194 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 00195 unsigned RHSReg, bool RHSIsKill, 00196 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 00197 bool WantResult = true); 00198 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 00199 const Value *RHS); 00200 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 00201 bool LHSIsKill, uint64_t Imm); 00202 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 00203 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 00204 uint64_t ShiftImm); 00205 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 00206 unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 00207 unsigned Op1, bool Op1IsKill); 00208 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 00209 unsigned Op1, bool Op1IsKill); 00210 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 00211 unsigned Op1, bool Op1IsKill); 00212 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 00213 unsigned Op1Reg, bool Op1IsKill); 00214 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 00215 uint64_t Imm, bool IsZExt = true); 00216 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 00217 unsigned Op1Reg, bool Op1IsKill); 00218 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 00219 uint64_t Imm, bool IsZExt = true); 00220 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 00221 unsigned Op1Reg, bool Op1IsKill); 00222 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 00223 uint64_t Imm, bool IsZExt = false); 00224 00225 unsigned materializeInt(const ConstantInt *CI, MVT VT); 00226 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 00227 unsigned materializeGV(const GlobalValue *GV); 00228 00229 // Call handling routines. 00230 private: 00231 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 00232 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 00233 unsigned &NumBytes); 00234 bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); 00235 00236 public: 00237 // Backend specific FastISel code. 00238 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 00239 unsigned fastMaterializeConstant(const Constant *C) override; 00240 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 00241 00242 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 00243 const TargetLibraryInfo *LibInfo) 00244 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 00245 Subtarget = &TM.getSubtarget<AArch64Subtarget>(); 00246 Context = &FuncInfo.Fn->getContext(); 00247 } 00248 00249 bool fastSelectInstruction(const Instruction *I) override; 00250 00251 #include "AArch64GenFastISel.inc" 00252 }; 00253 00254 } // end anonymous namespace 00255 00256 #include "AArch64GenCallingConv.inc" 00257 00258 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 00259 if (CC == CallingConv::WebKit_JS) 00260 return CC_AArch64_WebKit_JS; 00261 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; 00262 } 00263 00264 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 00265 assert(TLI.getValueType(AI->getType(), true) == MVT::i64 && 00266 "Alloca should always return a pointer."); 00267 00268 // Don't handle dynamic allocas. 00269 if (!FuncInfo.StaticAllocaMap.count(AI)) 00270 return 0; 00271 00272 DenseMap<const AllocaInst *, int>::iterator SI = 00273 FuncInfo.StaticAllocaMap.find(AI); 00274 00275 if (SI != FuncInfo.StaticAllocaMap.end()) { 00276 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 00277 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 00278 ResultReg) 00279 .addFrameIndex(SI->second) 00280 .addImm(0) 00281 .addImm(0); 00282 return ResultReg; 00283 } 00284 00285 return 0; 00286 } 00287 00288 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 00289 if (VT > MVT::i64) 00290 return 0; 00291 00292 if (!CI->isZero()) 00293 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 00294 00295 // Create a copy from the zero register to materialize a "0" value. 00296 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 00297 : &AArch64::GPR32RegClass; 00298 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 00299 unsigned ResultReg = createResultReg(RC); 00300 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 00301 ResultReg).addReg(ZeroReg, getKillRegState(true)); 00302 return ResultReg; 00303 } 00304 00305 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 00306 // Positive zero (+0.0) has to be materialized with a fmov from the zero 00307 // register, because the immediate version of fmov cannot encode zero. 00308 if (CFP->isNullValue()) 00309 return fastMaterializeFloatZero(CFP); 00310 00311 if (VT != MVT::f32 && VT != MVT::f64) 00312 return 0; 00313 00314 const APFloat Val = CFP->getValueAPF(); 00315 bool Is64Bit = (VT == MVT::f64); 00316 // This checks to see if we can use FMOV instructions to materialize 00317 // a constant, otherwise we have to materialize via the constant pool. 00318 if (TLI.isFPImmLegal(Val, VT)) { 00319 int Imm = 00320 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 00321 assert((Imm != -1) && "Cannot encode floating-point constant."); 00322 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 00323 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 00324 } 00325 00326 // Materialize via constant pool. MachineConstantPool wants an explicit 00327 // alignment. 00328 unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); 00329 if (Align == 0) 00330 Align = DL.getTypeAllocSize(CFP->getType()); 00331 00332 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 00333 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 00334 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 00335 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 00336 00337 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 00338 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 00339 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 00340 .addReg(ADRPReg) 00341 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 00342 return ResultReg; 00343 } 00344 00345 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 00346 // We can't handle thread-local variables quickly yet. 00347 if (GV->isThreadLocal()) 00348 return 0; 00349 00350 // MachO still uses GOT for large code-model accesses, but ELF requires 00351 // movz/movk sequences, which FastISel doesn't handle yet. 00352 if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO()) 00353 return 0; 00354 00355 unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 00356 00357 EVT DestEVT = TLI.getValueType(GV->getType(), true); 00358 if (!DestEVT.isSimple()) 00359 return 0; 00360 00361 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 00362 unsigned ResultReg; 00363 00364 if (OpFlags & AArch64II::MO_GOT) { 00365 // ADRP + LDRX 00366 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 00367 ADRPReg) 00368 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE); 00369 00370 ResultReg = createResultReg(&AArch64::GPR64RegClass); 00371 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), 00372 ResultReg) 00373 .addReg(ADRPReg) 00374 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 00375 AArch64II::MO_NC); 00376 } else if (OpFlags & AArch64II::MO_CONSTPOOL) { 00377 // We can't handle addresses loaded from a constant pool quickly yet. 00378 return 0; 00379 } else { 00380 // ADRP + ADDX 00381 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 00382 ADRPReg) 00383 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE); 00384 00385 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 00386 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 00387 ResultReg) 00388 .addReg(ADRPReg) 00389 .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC) 00390 .addImm(0); 00391 } 00392 return ResultReg; 00393 } 00394 00395 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 00396 EVT CEVT = TLI.getValueType(C->getType(), true); 00397 00398 // Only handle simple types. 00399 if (!CEVT.isSimple()) 00400 return 0; 00401 MVT VT = CEVT.getSimpleVT(); 00402 00403 if (const auto *CI = dyn_cast<ConstantInt>(C)) 00404 return materializeInt(CI, VT); 00405 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 00406 return materializeFP(CFP, VT); 00407 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 00408 return materializeGV(GV); 00409 00410 return 0; 00411 } 00412 00413 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 00414 assert(CFP->isNullValue() && 00415 "Floating-point constant is not a positive zero."); 00416 MVT VT; 00417 if (!isTypeLegal(CFP->getType(), VT)) 00418 return 0; 00419 00420 if (VT != MVT::f32 && VT != MVT::f64) 00421 return 0; 00422 00423 bool Is64Bit = (VT == MVT::f64); 00424 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 00425 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 00426 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true); 00427 } 00428 00429 /// \brief Check if the multiply is by a power-of-2 constant. 00430 static bool isMulPowOf2(const Value *I) { 00431 if (const auto *MI = dyn_cast<MulOperator>(I)) { 00432 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 00433 if (C->getValue().isPowerOf2()) 00434 return true; 00435 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 00436 if (C->getValue().isPowerOf2()) 00437 return true; 00438 } 00439 return false; 00440 } 00441 00442 // Computes the address to get to an object. 00443 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 00444 { 00445 const User *U = nullptr; 00446 unsigned Opcode = Instruction::UserOp1; 00447 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 00448 // Don't walk into other basic blocks unless the object is an alloca from 00449 // another block, otherwise it may not have a virtual register assigned. 00450 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 00451 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 00452 Opcode = I->getOpcode(); 00453 U = I; 00454 } 00455 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 00456 Opcode = C->getOpcode(); 00457 U = C; 00458 } 00459 00460 if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) 00461 if (Ty->getAddressSpace() > 255) 00462 // Fast instruction selection doesn't support the special 00463 // address spaces. 00464 return false; 00465 00466 switch (Opcode) { 00467 default: 00468 break; 00469 case Instruction::BitCast: { 00470 // Look through bitcasts. 00471 return computeAddress(U->getOperand(0), Addr, Ty); 00472 } 00473 case Instruction::IntToPtr: { 00474 // Look past no-op inttoptrs. 00475 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 00476 return computeAddress(U->getOperand(0), Addr, Ty); 00477 break; 00478 } 00479 case Instruction::PtrToInt: { 00480 // Look past no-op ptrtoints. 00481 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 00482 return computeAddress(U->getOperand(0), Addr, Ty); 00483 break; 00484 } 00485 case Instruction::GetElementPtr: { 00486 Address SavedAddr = Addr; 00487 uint64_t TmpOffset = Addr.getOffset(); 00488 00489 // Iterate through the GEP folding the constants into offsets where 00490 // we can. 00491 gep_type_iterator GTI = gep_type_begin(U); 00492 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; 00493 ++i, ++GTI) { 00494 const Value *Op = *i; 00495 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 00496 const StructLayout *SL = DL.getStructLayout(STy); 00497 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 00498 TmpOffset += SL->getElementOffset(Idx); 00499 } else { 00500 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 00501 for (;;) { 00502 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 00503 // Constant-offset addressing. 00504 TmpOffset += CI->getSExtValue() * S; 00505 break; 00506 } 00507 if (canFoldAddIntoGEP(U, Op)) { 00508 // A compatible add with a constant operand. Fold the constant. 00509 ConstantInt *CI = 00510 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 00511 TmpOffset += CI->getSExtValue() * S; 00512 // Iterate on the other operand. 00513 Op = cast<AddOperator>(Op)->getOperand(0); 00514 continue; 00515 } 00516 // Unsupported 00517 goto unsupported_gep; 00518 } 00519 } 00520 } 00521 00522 // Try to grab the base operand now. 00523 Addr.setOffset(TmpOffset); 00524 if (computeAddress(U->getOperand(0), Addr, Ty)) 00525 return true; 00526 00527 // We failed, restore everything and try the other options. 00528 Addr = SavedAddr; 00529 00530 unsupported_gep: 00531 break; 00532 } 00533 case Instruction::Alloca: { 00534 const AllocaInst *AI = cast<AllocaInst>(Obj); 00535 DenseMap<const AllocaInst *, int>::iterator SI = 00536 FuncInfo.StaticAllocaMap.find(AI); 00537 if (SI != FuncInfo.StaticAllocaMap.end()) { 00538 Addr.setKind(Address::FrameIndexBase); 00539 Addr.setFI(SI->second); 00540 return true; 00541 } 00542 break; 00543 } 00544 case Instruction::Add: { 00545 // Adds of constants are common and easy enough. 00546 const Value *LHS = U->getOperand(0); 00547 const Value *RHS = U->getOperand(1); 00548 00549 if (isa<ConstantInt>(LHS)) 00550 std::swap(LHS, RHS); 00551 00552 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 00553 Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue()); 00554 return computeAddress(LHS, Addr, Ty); 00555 } 00556 00557 Address Backup = Addr; 00558 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 00559 return true; 00560 Addr = Backup; 00561 00562 break; 00563 } 00564 case Instruction::Shl: 00565 if (Addr.getOffsetReg()) 00566 break; 00567 00568 if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { 00569 unsigned Val = CI->getZExtValue(); 00570 if (Val < 1 || Val > 3) 00571 break; 00572 00573 uint64_t NumBytes = 0; 00574 if (Ty && Ty->isSized()) { 00575 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 00576 NumBytes = NumBits / 8; 00577 if (!isPowerOf2_64(NumBits)) 00578 NumBytes = 0; 00579 } 00580 00581 if (NumBytes != (1ULL << Val)) 00582 break; 00583 00584 Addr.setShift(Val); 00585 Addr.setExtendType(AArch64_AM::LSL); 00586 00587 if (const auto *I = dyn_cast<Instruction>(U->getOperand(0))) 00588 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) 00589 U = I; 00590 00591 if (const auto *ZE = dyn_cast<ZExtInst>(U)) 00592 if (ZE->getOperand(0)->getType()->isIntegerTy(32)) 00593 Addr.setExtendType(AArch64_AM::UXTW); 00594 00595 if (const auto *SE = dyn_cast<SExtInst>(U)) 00596 if (SE->getOperand(0)->getType()->isIntegerTy(32)) 00597 Addr.setExtendType(AArch64_AM::SXTW); 00598 00599 if (const auto *AI = dyn_cast<BinaryOperator>(U)) 00600 if (AI->getOpcode() == Instruction::And) { 00601 const Value *LHS = AI->getOperand(0); 00602 const Value *RHS = AI->getOperand(1); 00603 00604 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 00605 if (C->getValue() == 0xffffffff) 00606 std::swap(LHS, RHS); 00607 00608 if (const auto *C = cast<ConstantInt>(RHS)) 00609 if (C->getValue() == 0xffffffff) { 00610 Addr.setExtendType(AArch64_AM::UXTW); 00611 unsigned Reg = getRegForValue(LHS); 00612 if (!Reg) 00613 return false; 00614 bool RegIsKill = hasTrivialKill(LHS); 00615 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 00616 AArch64::sub_32); 00617 Addr.setOffsetReg(Reg); 00618 return true; 00619 } 00620 } 00621 00622 unsigned Reg = getRegForValue(U->getOperand(0)); 00623 if (!Reg) 00624 return false; 00625 Addr.setOffsetReg(Reg); 00626 return true; 00627 } 00628 break; 00629 case Instruction::Mul: { 00630 if (Addr.getOffsetReg()) 00631 break; 00632 00633 if (!isMulPowOf2(U)) 00634 break; 00635 00636 const Value *LHS = U->getOperand(0); 00637 const Value *RHS = U->getOperand(1); 00638 00639 // Canonicalize power-of-2 value to the RHS. 00640 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 00641 if (C->getValue().isPowerOf2()) 00642 std::swap(LHS, RHS); 00643 00644 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 00645 const auto *C = cast<ConstantInt>(RHS); 00646 unsigned Val = C->getValue().logBase2(); 00647 if (Val < 1 || Val > 3) 00648 break; 00649 00650 uint64_t NumBytes = 0; 00651 if (Ty && Ty->isSized()) { 00652 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 00653 NumBytes = NumBits / 8; 00654 if (!isPowerOf2_64(NumBits)) 00655 NumBytes = 0; 00656 } 00657 00658 if (NumBytes != (1ULL << Val)) 00659 break; 00660 00661 Addr.setShift(Val); 00662 Addr.setExtendType(AArch64_AM::LSL); 00663 00664 if (const auto *I = dyn_cast<Instruction>(LHS)) 00665 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) 00666 U = I; 00667 00668 if (const auto *ZE = dyn_cast<ZExtInst>(U)) 00669 if (ZE->getOperand(0)->getType()->isIntegerTy(32)) { 00670 Addr.setExtendType(AArch64_AM::UXTW); 00671 LHS = U->getOperand(0); 00672 } 00673 00674 if (const auto *SE = dyn_cast<SExtInst>(U)) 00675 if (SE->getOperand(0)->getType()->isIntegerTy(32)) { 00676 Addr.setExtendType(AArch64_AM::SXTW); 00677 LHS = U->getOperand(0); 00678 } 00679 00680 unsigned Reg = getRegForValue(LHS); 00681 if (!Reg) 00682 return false; 00683 Addr.setOffsetReg(Reg); 00684 return true; 00685 } 00686 case Instruction::And: { 00687 if (Addr.getOffsetReg()) 00688 break; 00689 00690 if (DL.getTypeSizeInBits(Ty) != 8) 00691 break; 00692 00693 const Value *LHS = U->getOperand(0); 00694 const Value *RHS = U->getOperand(1); 00695 00696 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 00697 if (C->getValue() == 0xffffffff) 00698 std::swap(LHS, RHS); 00699 00700 if (const auto *C = cast<ConstantInt>(RHS)) 00701 if (C->getValue() == 0xffffffff) { 00702 Addr.setShift(0); 00703 Addr.setExtendType(AArch64_AM::LSL); 00704 Addr.setExtendType(AArch64_AM::UXTW); 00705 00706 unsigned Reg = getRegForValue(LHS); 00707 if (!Reg) 00708 return false; 00709 bool RegIsKill = hasTrivialKill(LHS); 00710 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 00711 AArch64::sub_32); 00712 Addr.setOffsetReg(Reg); 00713 return true; 00714 } 00715 break; 00716 } 00717 } // end switch 00718 00719 if (Addr.getReg()) { 00720 if (!Addr.getOffsetReg()) { 00721 unsigned Reg = getRegForValue(Obj); 00722 if (!Reg) 00723 return false; 00724 Addr.setOffsetReg(Reg); 00725 return true; 00726 } 00727 return false; 00728 } 00729 00730 unsigned Reg = getRegForValue(Obj); 00731 if (!Reg) 00732 return false; 00733 Addr.setReg(Reg); 00734 return true; 00735 } 00736 00737 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 00738 const User *U = nullptr; 00739 unsigned Opcode = Instruction::UserOp1; 00740 bool InMBB = true; 00741 00742 if (const auto *I = dyn_cast<Instruction>(V)) { 00743 Opcode = I->getOpcode(); 00744 U = I; 00745 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 00746 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 00747 Opcode = C->getOpcode(); 00748 U = C; 00749 } 00750 00751 switch (Opcode) { 00752 default: break; 00753 case Instruction::BitCast: 00754 // Look past bitcasts if its operand is in the same BB. 00755 if (InMBB) 00756 return computeCallAddress(U->getOperand(0), Addr); 00757 break; 00758 case Instruction::IntToPtr: 00759 // Look past no-op inttoptrs if its operand is in the same BB. 00760 if (InMBB && 00761 TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 00762 return computeCallAddress(U->getOperand(0), Addr); 00763 break; 00764 case Instruction::PtrToInt: 00765 // Look past no-op ptrtoints if its operand is in the same BB. 00766 if (InMBB && 00767 TLI.getValueType(U->getType()) == TLI.getPointerTy()) 00768 return computeCallAddress(U->getOperand(0), Addr); 00769 break; 00770 } 00771 00772 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 00773 Addr.setGlobalValue(GV); 00774 return true; 00775 } 00776 00777 // If all else fails, try to materialize the value in a register. 00778 if (!Addr.getGlobalValue()) { 00779 Addr.setReg(getRegForValue(V)); 00780 return Addr.getReg() != 0; 00781 } 00782 00783 return false; 00784 } 00785 00786 00787 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 00788 EVT evt = TLI.getValueType(Ty, true); 00789 00790 // Only handle simple types. 00791 if (evt == MVT::Other || !evt.isSimple()) 00792 return false; 00793 VT = evt.getSimpleVT(); 00794 00795 // This is a legal type, but it's not something we handle in fast-isel. 00796 if (VT == MVT::f128) 00797 return false; 00798 00799 // Handle all other legal types, i.e. a register that will directly hold this 00800 // value. 00801 return TLI.isTypeLegal(VT); 00802 } 00803 00804 /// \brief Determine if the value type is supported by FastISel. 00805 /// 00806 /// FastISel for AArch64 can handle more value types than are legal. This adds 00807 /// simple value type such as i1, i8, and i16. 00808 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 00809 if (Ty->isVectorTy() && !IsVectorAllowed) 00810 return false; 00811 00812 if (isTypeLegal(Ty, VT)) 00813 return true; 00814 00815 // If this is a type than can be sign or zero-extended to a basic operation 00816 // go ahead and accept it now. 00817 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 00818 return true; 00819 00820 return false; 00821 } 00822 00823 bool AArch64FastISel::isValueAvailable(const Value *V) const { 00824 if (!isa<Instruction>(V)) 00825 return true; 00826 00827 const auto *I = cast<Instruction>(V); 00828 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) 00829 return true; 00830 00831 return false; 00832 } 00833 00834 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 00835 unsigned ScaleFactor; 00836 switch (VT.SimpleTy) { 00837 default: return false; 00838 case MVT::i1: // fall-through 00839 case MVT::i8: ScaleFactor = 1; break; 00840 case MVT::i16: ScaleFactor = 2; break; 00841 case MVT::i32: // fall-through 00842 case MVT::f32: ScaleFactor = 4; break; 00843 case MVT::i64: // fall-through 00844 case MVT::f64: ScaleFactor = 8; break; 00845 } 00846 00847 bool ImmediateOffsetNeedsLowering = false; 00848 bool RegisterOffsetNeedsLowering = false; 00849 int64_t Offset = Addr.getOffset(); 00850 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 00851 ImmediateOffsetNeedsLowering = true; 00852 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 00853 !isUInt<12>(Offset / ScaleFactor)) 00854 ImmediateOffsetNeedsLowering = true; 00855 00856 // Cannot encode an offset register and an immediate offset in the same 00857 // instruction. Fold the immediate offset into the load/store instruction and 00858 // emit an additonal add to take care of the offset register. 00859 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() && 00860 Addr.getOffsetReg()) 00861 RegisterOffsetNeedsLowering = true; 00862 00863 // Cannot encode zero register as base. 00864 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 00865 RegisterOffsetNeedsLowering = true; 00866 00867 // If this is a stack pointer and the offset needs to be simplified then put 00868 // the alloca address into a register, set the base type back to register and 00869 // continue. This should almost never happen. 00870 if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) { 00871 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 00872 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 00873 ResultReg) 00874 .addFrameIndex(Addr.getFI()) 00875 .addImm(0) 00876 .addImm(0); 00877 Addr.setKind(Address::RegBase); 00878 Addr.setReg(ResultReg); 00879 } 00880 00881 if (RegisterOffsetNeedsLowering) { 00882 unsigned ResultReg = 0; 00883 if (Addr.getReg()) { 00884 if (Addr.getExtendType() == AArch64_AM::SXTW || 00885 Addr.getExtendType() == AArch64_AM::UXTW ) 00886 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 00887 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 00888 /*TODO:IsKill=*/false, Addr.getExtendType(), 00889 Addr.getShift()); 00890 else 00891 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 00892 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 00893 /*TODO:IsKill=*/false, AArch64_AM::LSL, 00894 Addr.getShift()); 00895 } else { 00896 if (Addr.getExtendType() == AArch64_AM::UXTW) 00897 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 00898 /*Op0IsKill=*/false, Addr.getShift(), 00899 /*IsZExt=*/true); 00900 else if (Addr.getExtendType() == AArch64_AM::SXTW) 00901 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 00902 /*Op0IsKill=*/false, Addr.getShift(), 00903 /*IsZExt=*/false); 00904 else 00905 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 00906 /*Op0IsKill=*/false, Addr.getShift()); 00907 } 00908 if (!ResultReg) 00909 return false; 00910 00911 Addr.setReg(ResultReg); 00912 Addr.setOffsetReg(0); 00913 Addr.setShift(0); 00914 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 00915 } 00916 00917 // Since the offset is too large for the load/store instruction get the 00918 // reg+offset into a register. 00919 if (ImmediateOffsetNeedsLowering) { 00920 unsigned ResultReg; 00921 if (Addr.getReg()) { 00922 // Try to fold the immediate into the add instruction. 00923 if (Offset < 0) 00924 ResultReg = emitAddSub_ri(/*UseAdd=*/false, MVT::i64, Addr.getReg(), 00925 /*IsKill=*/false, -Offset); 00926 else 00927 ResultReg = emitAddSub_ri(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 00928 /*IsKill=*/false, Offset); 00929 if (!ResultReg) { 00930 unsigned ImmReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 00931 ResultReg = emitAddSub_rr(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 00932 /*IsKill=*/false, ImmReg, /*IsKill=*/true); 00933 } 00934 } else 00935 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 00936 00937 if (!ResultReg) 00938 return false; 00939 Addr.setReg(ResultReg); 00940 Addr.setOffset(0); 00941 } 00942 return true; 00943 } 00944 00945 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 00946 const MachineInstrBuilder &MIB, 00947 unsigned Flags, 00948 unsigned ScaleFactor, 00949 MachineMemOperand *MMO) { 00950 int64_t Offset = Addr.getOffset() / ScaleFactor; 00951 // Frame base works a bit differently. Handle it separately. 00952 if (Addr.isFIBase()) { 00953 int FI = Addr.getFI(); 00954 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 00955 // and alignment should be based on the VT. 00956 MMO = FuncInfo.MF->getMachineMemOperand( 00957 MachinePointerInfo::getFixedStack(FI, Offset), Flags, 00958 MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); 00959 // Now add the rest of the operands. 00960 MIB.addFrameIndex(FI).addImm(Offset); 00961 } else { 00962 assert(Addr.isRegBase() && "Unexpected address kind."); 00963 const MCInstrDesc &II = MIB->getDesc(); 00964 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 00965 Addr.setReg( 00966 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 00967 Addr.setOffsetReg( 00968 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 00969 if (Addr.getOffsetReg()) { 00970 assert(Addr.getOffset() == 0 && "Unexpected offset"); 00971 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 00972 Addr.getExtendType() == AArch64_AM::SXTX; 00973 MIB.addReg(Addr.getReg()); 00974 MIB.addReg(Addr.getOffsetReg()); 00975 MIB.addImm(IsSigned); 00976 MIB.addImm(Addr.getShift() != 0); 00977 } else { 00978 MIB.addReg(Addr.getReg()); 00979 MIB.addImm(Offset); 00980 } 00981 } 00982 00983 if (MMO) 00984 MIB.addMemOperand(MMO); 00985 } 00986 00987 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 00988 const Value *RHS, bool SetFlags, 00989 bool WantResult, bool IsZExt) { 00990 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 00991 bool NeedExtend = false; 00992 switch (RetVT.SimpleTy) { 00993 default: 00994 return 0; 00995 case MVT::i1: 00996 NeedExtend = true; 00997 break; 00998 case MVT::i8: 00999 NeedExtend = true; 01000 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 01001 break; 01002 case MVT::i16: 01003 NeedExtend = true; 01004 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 01005 break; 01006 case MVT::i32: // fall-through 01007 case MVT::i64: 01008 break; 01009 } 01010 MVT SrcVT = RetVT; 01011 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 01012 01013 // Canonicalize immediates to the RHS first. 01014 if (UseAdd && isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 01015 std::swap(LHS, RHS); 01016 01017 // Canonicalize mul by power of 2 to the RHS. 01018 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 01019 if (isMulPowOf2(LHS)) 01020 std::swap(LHS, RHS); 01021 01022 // Canonicalize shift immediate to the RHS. 01023 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 01024 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 01025 if (isa<ConstantInt>(SI->getOperand(1))) 01026 if (SI->getOpcode() == Instruction::Shl || 01027 SI->getOpcode() == Instruction::LShr || 01028 SI->getOpcode() == Instruction::AShr ) 01029 std::swap(LHS, RHS); 01030 01031 unsigned LHSReg = getRegForValue(LHS); 01032 if (!LHSReg) 01033 return 0; 01034 bool LHSIsKill = hasTrivialKill(LHS); 01035 01036 if (NeedExtend) 01037 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 01038 01039 unsigned ResultReg = 0; 01040 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 01041 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 01042 if (C->isNegative()) 01043 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm, 01044 SetFlags, WantResult); 01045 else 01046 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags, 01047 WantResult); 01048 } 01049 if (ResultReg) 01050 return ResultReg; 01051 01052 // Only extend the RHS within the instruction if there is a valid extend type. 01053 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 01054 isValueAvailable(RHS)) { 01055 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) 01056 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) 01057 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { 01058 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 01059 if (!RHSReg) 01060 return 0; 01061 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 01062 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 01063 RHSIsKill, ExtendType, C->getZExtValue(), 01064 SetFlags, WantResult); 01065 } 01066 unsigned RHSReg = getRegForValue(RHS); 01067 if (!RHSReg) 01068 return 0; 01069 bool RHSIsKill = hasTrivialKill(RHS); 01070 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 01071 ExtendType, 0, SetFlags, WantResult); 01072 } 01073 01074 // Check if the mul can be folded into the instruction. 01075 if (RHS->hasOneUse() && isValueAvailable(RHS)) 01076 if (isMulPowOf2(RHS)) { 01077 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 01078 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 01079 01080 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 01081 if (C->getValue().isPowerOf2()) 01082 std::swap(MulLHS, MulRHS); 01083 01084 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 01085 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 01086 unsigned RHSReg = getRegForValue(MulLHS); 01087 if (!RHSReg) 01088 return 0; 01089 bool RHSIsKill = hasTrivialKill(MulLHS); 01090 return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 01091 AArch64_AM::LSL, ShiftVal, SetFlags, WantResult); 01092 } 01093 01094 // Check if the shift can be folded into the instruction. 01095 if (RHS->hasOneUse() && isValueAvailable(RHS)) 01096 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 01097 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 01098 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 01099 switch (SI->getOpcode()) { 01100 default: break; 01101 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 01102 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 01103 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 01104 } 01105 uint64_t ShiftVal = C->getZExtValue(); 01106 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 01107 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 01108 if (!RHSReg) 01109 return 0; 01110 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 01111 return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 01112 RHSIsKill, ShiftType, ShiftVal, SetFlags, 01113 WantResult); 01114 } 01115 } 01116 } 01117 01118 unsigned RHSReg = getRegForValue(RHS); 01119 if (!RHSReg) 01120 return 0; 01121 bool RHSIsKill = hasTrivialKill(RHS); 01122 01123 if (NeedExtend) 01124 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 01125 01126 return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 01127 SetFlags, WantResult); 01128 } 01129 01130 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 01131 bool LHSIsKill, unsigned RHSReg, 01132 bool RHSIsKill, bool SetFlags, 01133 bool WantResult) { 01134 assert(LHSReg && RHSReg && "Invalid register number."); 01135 01136 if (RetVT != MVT::i32 && RetVT != MVT::i64) 01137 return 0; 01138 01139 static const unsigned OpcTable[2][2][2] = { 01140 { { AArch64::SUBWrr, AArch64::SUBXrr }, 01141 { AArch64::ADDWrr, AArch64::ADDXrr } }, 01142 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 01143 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 01144 }; 01145 bool Is64Bit = RetVT == MVT::i64; 01146 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 01147 const TargetRegisterClass *RC = 01148 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 01149 unsigned ResultReg; 01150 if (WantResult) 01151 ResultReg = createResultReg(RC); 01152 else 01153 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 01154 01155 const MCInstrDesc &II = TII.get(Opc); 01156 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 01157 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 01158 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 01159 .addReg(LHSReg, getKillRegState(LHSIsKill)) 01160 .addReg(RHSReg, getKillRegState(RHSIsKill)); 01161 return ResultReg; 01162 } 01163 01164 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 01165 bool LHSIsKill, uint64_t Imm, 01166 bool SetFlags, bool WantResult) { 01167 assert(LHSReg && "Invalid register number."); 01168 01169 if (RetVT != MVT::i32 && RetVT != MVT::i64) 01170 return 0; 01171 01172 unsigned ShiftImm; 01173 if (isUInt<12>(Imm)) 01174 ShiftImm = 0; 01175 else if ((Imm & 0xfff000) == Imm) { 01176 ShiftImm = 12; 01177 Imm >>= 12; 01178 } else 01179 return 0; 01180 01181 static const unsigned OpcTable[2][2][2] = { 01182 { { AArch64::SUBWri, AArch64::SUBXri }, 01183 { AArch64::ADDWri, AArch64::ADDXri } }, 01184 { { AArch64::SUBSWri, AArch64::SUBSXri }, 01185 { AArch64::ADDSWri, AArch64::ADDSXri } } 01186 }; 01187 bool Is64Bit = RetVT == MVT::i64; 01188 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 01189 const TargetRegisterClass *RC; 01190 if (SetFlags) 01191 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 01192 else 01193 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 01194 unsigned ResultReg; 01195 if (WantResult) 01196 ResultReg = createResultReg(RC); 01197 else 01198 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 01199 01200 const MCInstrDesc &II = TII.get(Opc); 01201 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 01202 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 01203 .addReg(LHSReg, getKillRegState(LHSIsKill)) 01204 .addImm(Imm) 01205 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 01206 return ResultReg; 01207 } 01208 01209 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 01210 bool LHSIsKill, unsigned RHSReg, 01211 bool RHSIsKill, 01212 AArch64_AM::ShiftExtendType ShiftType, 01213 uint64_t ShiftImm, bool SetFlags, 01214 bool WantResult) { 01215 assert(LHSReg && RHSReg && "Invalid register number."); 01216 01217 if (RetVT != MVT::i32 && RetVT != MVT::i64) 01218 return 0; 01219 01220 static const unsigned OpcTable[2][2][2] = { 01221 { { AArch64::SUBWrs, AArch64::SUBXrs }, 01222 { AArch64::ADDWrs, AArch64::ADDXrs } }, 01223 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 01224 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 01225 }; 01226 bool Is64Bit = RetVT == MVT::i64; 01227 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 01228 const TargetRegisterClass *RC = 01229 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 01230 unsigned ResultReg; 01231 if (WantResult) 01232 ResultReg = createResultReg(RC); 01233 else 01234 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 01235 01236 const MCInstrDesc &II = TII.get(Opc); 01237 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 01238 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 01239 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 01240 .addReg(LHSReg, getKillRegState(LHSIsKill)) 01241 .addReg(RHSReg, getKillRegState(RHSIsKill)) 01242 .addImm(getShifterImm(ShiftType, ShiftImm)); 01243 return ResultReg; 01244 } 01245 01246 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 01247 bool LHSIsKill, unsigned RHSReg, 01248 bool RHSIsKill, 01249 AArch64_AM::ShiftExtendType ExtType, 01250 uint64_t ShiftImm, bool SetFlags, 01251 bool WantResult) { 01252 assert(LHSReg && RHSReg && "Invalid register number."); 01253 01254 if (RetVT != MVT::i32 && RetVT != MVT::i64) 01255 return 0; 01256 01257 static const unsigned OpcTable[2][2][2] = { 01258 { { AArch64::SUBWrx, AArch64::SUBXrx }, 01259 { AArch64::ADDWrx, AArch64::ADDXrx } }, 01260 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 01261 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 01262 }; 01263 bool Is64Bit = RetVT == MVT::i64; 01264 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 01265 const TargetRegisterClass *RC = nullptr; 01266 if (SetFlags) 01267 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 01268 else 01269 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 01270 unsigned ResultReg; 01271 if (WantResult) 01272 ResultReg = createResultReg(RC); 01273 else 01274 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 01275 01276 const MCInstrDesc &II = TII.get(Opc); 01277 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 01278 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 01279 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 01280 .addReg(LHSReg, getKillRegState(LHSIsKill)) 01281 .addReg(RHSReg, getKillRegState(RHSIsKill)) 01282 .addImm(getArithExtendImm(ExtType, ShiftImm)); 01283 return ResultReg; 01284 } 01285 01286 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 01287 Type *Ty = LHS->getType(); 01288 EVT EVT = TLI.getValueType(Ty, true); 01289 if (!EVT.isSimple()) 01290 return false; 01291 MVT VT = EVT.getSimpleVT(); 01292 01293 switch (VT.SimpleTy) { 01294 default: 01295 return false; 01296 case MVT::i1: 01297 case MVT::i8: 01298 case MVT::i16: 01299 case MVT::i32: 01300 case MVT::i64: 01301 return emitICmp(VT, LHS, RHS, IsZExt); 01302 case MVT::f32: 01303 case MVT::f64: 01304 return emitFCmp(VT, LHS, RHS); 01305 } 01306 } 01307 01308 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 01309 bool IsZExt) { 01310 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 01311 IsZExt) != 0; 01312 } 01313 01314 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 01315 uint64_t Imm) { 01316 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm, 01317 /*SetFlags=*/true, /*WantResult=*/false) != 0; 01318 } 01319 01320 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 01321 if (RetVT != MVT::f32 && RetVT != MVT::f64) 01322 return false; 01323 01324 // Check to see if the 2nd operand is a constant that we can encode directly 01325 // in the compare. 01326 bool UseImm = false; 01327 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 01328 if (CFP->isZero() && !CFP->isNegative()) 01329 UseImm = true; 01330 01331 unsigned LHSReg = getRegForValue(LHS); 01332 if (!LHSReg) 01333 return false; 01334 bool LHSIsKill = hasTrivialKill(LHS); 01335 01336 if (UseImm) { 01337 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 01338 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 01339 .addReg(LHSReg, getKillRegState(LHSIsKill)); 01340 return true; 01341 } 01342 01343 unsigned RHSReg = getRegForValue(RHS); 01344 if (!RHSReg) 01345 return false; 01346 bool RHSIsKill = hasTrivialKill(RHS); 01347 01348 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 01349 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 01350 .addReg(LHSReg, getKillRegState(LHSIsKill)) 01351 .addReg(RHSReg, getKillRegState(RHSIsKill)); 01352 return true; 01353 } 01354 01355 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 01356 bool SetFlags, bool WantResult, bool IsZExt) { 01357 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 01358 IsZExt); 01359 } 01360 01361 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 01362 bool SetFlags, bool WantResult, bool IsZExt) { 01363 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 01364 IsZExt); 01365 } 01366 01367 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 01368 bool LHSIsKill, unsigned RHSReg, 01369 bool RHSIsKill, bool WantResult) { 01370 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 01371 RHSIsKill, /*SetFlags=*/true, WantResult); 01372 } 01373 01374 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 01375 bool LHSIsKill, unsigned RHSReg, 01376 bool RHSIsKill, 01377 AArch64_AM::ShiftExtendType ShiftType, 01378 uint64_t ShiftImm, bool WantResult) { 01379 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 01380 RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true, 01381 WantResult); 01382 } 01383 01384 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 01385 const Value *LHS, const Value *RHS) { 01386 // Canonicalize immediates to the RHS first. 01387 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 01388 std::swap(LHS, RHS); 01389 01390 // Canonicalize mul by power-of-2 to the RHS. 01391 if (LHS->hasOneUse() && isValueAvailable(LHS)) 01392 if (isMulPowOf2(LHS)) 01393 std::swap(LHS, RHS); 01394 01395 // Canonicalize shift immediate to the RHS. 01396 if (LHS->hasOneUse() && isValueAvailable(LHS)) 01397 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 01398 if (isa<ConstantInt>(SI->getOperand(1))) 01399 std::swap(LHS, RHS); 01400 01401 unsigned LHSReg = getRegForValue(LHS); 01402 if (!LHSReg) 01403 return 0; 01404 bool LHSIsKill = hasTrivialKill(LHS); 01405 01406 unsigned ResultReg = 0; 01407 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 01408 uint64_t Imm = C->getZExtValue(); 01409 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm); 01410 } 01411 if (ResultReg) 01412 return ResultReg; 01413 01414 // Check if the mul can be folded into the instruction. 01415 if (RHS->hasOneUse() && isValueAvailable(RHS)) 01416 if (isMulPowOf2(RHS)) { 01417 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 01418 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 01419 01420 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 01421 if (C->getValue().isPowerOf2()) 01422 std::swap(MulLHS, MulRHS); 01423 01424 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 01425 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 01426 01427 unsigned RHSReg = getRegForValue(MulLHS); 01428 if (!RHSReg) 01429 return 0; 01430 bool RHSIsKill = hasTrivialKill(MulLHS); 01431 return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 01432 RHSIsKill, ShiftVal); 01433 } 01434 01435 // Check if the shift can be folded into the instruction. 01436 if (RHS->hasOneUse() && isValueAvailable(RHS)) 01437 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 01438 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 01439 uint64_t ShiftVal = C->getZExtValue(); 01440 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 01441 if (!RHSReg) 01442 return 0; 01443 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 01444 return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 01445 RHSIsKill, ShiftVal); 01446 } 01447 01448 unsigned RHSReg = getRegForValue(RHS); 01449 if (!RHSReg) 01450 return 0; 01451 bool RHSIsKill = hasTrivialKill(RHS); 01452 01453 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 01454 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 01455 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 01456 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 01457 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 01458 } 01459 return ResultReg; 01460 } 01461 01462 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 01463 unsigned LHSReg, bool LHSIsKill, 01464 uint64_t Imm) { 01465 assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) && 01466 "ISD nodes are not consecutive!"); 01467 static const unsigned OpcTable[3][2] = { 01468 { AArch64::ANDWri, AArch64::ANDXri }, 01469 { AArch64::ORRWri, AArch64::ORRXri }, 01470 { AArch64::EORWri, AArch64::EORXri } 01471 }; 01472 const TargetRegisterClass *RC; 01473 unsigned Opc; 01474 unsigned RegSize; 01475 switch (RetVT.SimpleTy) { 01476 default: 01477 return 0; 01478 case MVT::i1: 01479 case MVT::i8: 01480 case MVT::i16: 01481 case MVT::i32: { 01482 unsigned Idx = ISDOpc - ISD::AND; 01483 Opc = OpcTable[Idx][0]; 01484 RC = &AArch64::GPR32spRegClass; 01485 RegSize = 32; 01486 break; 01487 } 01488 case MVT::i64: 01489 Opc = OpcTable[ISDOpc - ISD::AND][1]; 01490 RC = &AArch64::GPR64spRegClass; 01491 RegSize = 64; 01492 break; 01493 } 01494 01495 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 01496 return 0; 01497 01498 unsigned ResultReg = 01499 fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill, 01500 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 01501 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 01502 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 01503 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 01504 } 01505 return ResultReg; 01506 } 01507 01508 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 01509 unsigned LHSReg, bool LHSIsKill, 01510 unsigned RHSReg, bool RHSIsKill, 01511 uint64_t ShiftImm) { 01512 assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) && 01513 "ISD nodes are not consecutive!"); 01514 static const unsigned OpcTable[3][2] = { 01515 { AArch64::ANDWrs, AArch64::ANDXrs }, 01516 { AArch64::ORRWrs, AArch64::ORRXrs }, 01517 { AArch64::EORWrs, AArch64::EORXrs } 01518 }; 01519 const TargetRegisterClass *RC; 01520 unsigned Opc; 01521 switch (RetVT.SimpleTy) { 01522 default: 01523 return 0; 01524 case MVT::i1: 01525 case MVT::i8: 01526 case MVT::i16: 01527 case MVT::i32: 01528 Opc = OpcTable[ISDOpc - ISD::AND][0]; 01529 RC = &AArch64::GPR32RegClass; 01530 break; 01531 case MVT::i64: 01532 Opc = OpcTable[ISDOpc - ISD::AND][1]; 01533 RC = &AArch64::GPR64RegClass; 01534 break; 01535 } 01536 unsigned ResultReg = 01537 fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 01538 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 01539 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 01540 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 01541 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 01542 } 01543 return ResultReg; 01544 } 01545 01546 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 01547 uint64_t Imm) { 01548 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm); 01549 } 01550 01551 bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr, 01552 MachineMemOperand *MMO) { 01553 // Simplify this down to something we can handle. 01554 if (!simplifyAddress(Addr, VT)) 01555 return false; 01556 01557 unsigned ScaleFactor; 01558 switch (VT.SimpleTy) { 01559 default: llvm_unreachable("Unexpected value type."); 01560 case MVT::i1: // fall-through 01561 case MVT::i8: ScaleFactor = 1; break; 01562 case MVT::i16: ScaleFactor = 2; break; 01563 case MVT::i32: // fall-through 01564 case MVT::f32: ScaleFactor = 4; break; 01565 case MVT::i64: // fall-through 01566 case MVT::f64: ScaleFactor = 8; break; 01567 } 01568 01569 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 01570 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 01571 bool UseScaled = true; 01572 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 01573 UseScaled = false; 01574 ScaleFactor = 1; 01575 } 01576 01577 static const unsigned OpcTable[4][6] = { 01578 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, AArch64::LDURXi, 01579 AArch64::LDURSi, AArch64::LDURDi }, 01580 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, AArch64::LDRXui, 01581 AArch64::LDRSui, AArch64::LDRDui }, 01582 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX, 01583 AArch64::LDRSroX, AArch64::LDRDroX }, 01584 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW, 01585 AArch64::LDRSroW, AArch64::LDRDroW } 01586 }; 01587 01588 unsigned Opc; 01589 const TargetRegisterClass *RC; 01590 bool VTIsi1 = false; 01591 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 01592 Addr.getOffsetReg(); 01593 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 01594 if (Addr.getExtendType() == AArch64_AM::UXTW || 01595 Addr.getExtendType() == AArch64_AM::SXTW) 01596 Idx++; 01597 01598 switch (VT.SimpleTy) { 01599 default: llvm_unreachable("Unexpected value type."); 01600 case MVT::i1: VTIsi1 = true; // Intentional fall-through. 01601 case MVT::i8: Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break; 01602 case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break; 01603 case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break; 01604 case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break; 01605 case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break; 01606 case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break; 01607 } 01608 01609 // Create the base instruction, then add the operands. 01610 ResultReg = createResultReg(RC); 01611 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01612 TII.get(Opc), ResultReg); 01613 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 01614 01615 // Loading an i1 requires special handling. 01616 if (VTIsi1) { 01617 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); 01618 assert(ANDReg && "Unexpected AND instruction emission failure."); 01619 ResultReg = ANDReg; 01620 } 01621 return true; 01622 } 01623 01624 bool AArch64FastISel::selectAddSub(const Instruction *I) { 01625 MVT VT; 01626 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 01627 return false; 01628 01629 if (VT.isVector()) 01630 return selectOperator(I, I->getOpcode()); 01631 01632 unsigned ResultReg; 01633 switch (I->getOpcode()) { 01634 default: 01635 llvm_unreachable("Unexpected instruction."); 01636 case Instruction::Add: 01637 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 01638 break; 01639 case Instruction::Sub: 01640 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 01641 break; 01642 } 01643 if (!ResultReg) 01644 return false; 01645 01646 updateValueMap(I, ResultReg); 01647 return true; 01648 } 01649 01650 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 01651 MVT VT; 01652 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 01653 return false; 01654 01655 if (VT.isVector()) 01656 return selectOperator(I, I->getOpcode()); 01657 01658 unsigned ResultReg; 01659 switch (I->getOpcode()) { 01660 default: 01661 llvm_unreachable("Unexpected instruction."); 01662 case Instruction::And: 01663 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 01664 break; 01665 case Instruction::Or: 01666 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 01667 break; 01668 case Instruction::Xor: 01669 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 01670 break; 01671 } 01672 if (!ResultReg) 01673 return false; 01674 01675 updateValueMap(I, ResultReg); 01676 return true; 01677 } 01678 01679 bool AArch64FastISel::selectLoad(const Instruction *I) { 01680 MVT VT; 01681 // Verify we have a legal type before going any further. Currently, we handle 01682 // simple types that will directly fit in a register (i32/f32/i64/f64) or 01683 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 01684 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 01685 cast<LoadInst>(I)->isAtomic()) 01686 return false; 01687 01688 // See if we can handle this address. 01689 Address Addr; 01690 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 01691 return false; 01692 01693 unsigned ResultReg; 01694 if (!emitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I))) 01695 return false; 01696 01697 updateValueMap(I, ResultReg); 01698 return true; 01699 } 01700 01701 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 01702 MachineMemOperand *MMO) { 01703 // Simplify this down to something we can handle. 01704 if (!simplifyAddress(Addr, VT)) 01705 return false; 01706 01707 unsigned ScaleFactor; 01708 switch (VT.SimpleTy) { 01709 default: llvm_unreachable("Unexpected value type."); 01710 case MVT::i1: // fall-through 01711 case MVT::i8: ScaleFactor = 1; break; 01712 case MVT::i16: ScaleFactor = 2; break; 01713 case MVT::i32: // fall-through 01714 case MVT::f32: ScaleFactor = 4; break; 01715 case MVT::i64: // fall-through 01716 case MVT::f64: ScaleFactor = 8; break; 01717 } 01718 01719 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 01720 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 01721 bool UseScaled = true; 01722 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 01723 UseScaled = false; 01724 ScaleFactor = 1; 01725 } 01726 01727 01728 static const unsigned OpcTable[4][6] = { 01729 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 01730 AArch64::STURSi, AArch64::STURDi }, 01731 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 01732 AArch64::STRSui, AArch64::STRDui }, 01733 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 01734 AArch64::STRSroX, AArch64::STRDroX }, 01735 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 01736 AArch64::STRSroW, AArch64::STRDroW } 01737 01738 }; 01739 01740 unsigned Opc; 01741 bool VTIsi1 = false; 01742 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 01743 Addr.getOffsetReg(); 01744 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 01745 if (Addr.getExtendType() == AArch64_AM::UXTW || 01746 Addr.getExtendType() == AArch64_AM::SXTW) 01747 Idx++; 01748 01749 switch (VT.SimpleTy) { 01750 default: llvm_unreachable("Unexpected value type."); 01751 case MVT::i1: VTIsi1 = true; 01752 case MVT::i8: Opc = OpcTable[Idx][0]; break; 01753 case MVT::i16: Opc = OpcTable[Idx][1]; break; 01754 case MVT::i32: Opc = OpcTable[Idx][2]; break; 01755 case MVT::i64: Opc = OpcTable[Idx][3]; break; 01756 case MVT::f32: Opc = OpcTable[Idx][4]; break; 01757 case MVT::f64: Opc = OpcTable[Idx][5]; break; 01758 } 01759 01760 // Storing an i1 requires special handling. 01761 if (VTIsi1 && SrcReg != AArch64::WZR) { 01762 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 01763 assert(ANDReg && "Unexpected AND instruction emission failure."); 01764 SrcReg = ANDReg; 01765 } 01766 // Create the base instruction, then add the operands. 01767 const MCInstrDesc &II = TII.get(Opc); 01768 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 01769 MachineInstrBuilder MIB = 01770 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg); 01771 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 01772 01773 return true; 01774 } 01775 01776 bool AArch64FastISel::selectStore(const Instruction *I) { 01777 MVT VT; 01778 const Value *Op0 = I->getOperand(0); 01779 // Verify we have a legal type before going any further. Currently, we handle 01780 // simple types that will directly fit in a register (i32/f32/i64/f64) or 01781 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 01782 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) || 01783 cast<StoreInst>(I)->isAtomic()) 01784 return false; 01785 01786 // Get the value to be stored into a register. Use the zero register directly 01787 // when possible to avoid an unnecessary copy and a wasted register. 01788 unsigned SrcReg = 0; 01789 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 01790 if (CI->isZero()) 01791 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 01792 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 01793 if (CF->isZero() && !CF->isNegative()) { 01794 VT = MVT::getIntegerVT(VT.getSizeInBits()); 01795 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 01796 } 01797 } 01798 01799 if (!SrcReg) 01800 SrcReg = getRegForValue(Op0); 01801 01802 if (!SrcReg) 01803 return false; 01804 01805 // See if we can handle this address. 01806 Address Addr; 01807 if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType())) 01808 return false; 01809 01810 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 01811 return false; 01812 return true; 01813 } 01814 01815 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 01816 switch (Pred) { 01817 case CmpInst::FCMP_ONE: 01818 case CmpInst::FCMP_UEQ: 01819 default: 01820 // AL is our "false" for now. The other two need more compares. 01821 return AArch64CC::AL; 01822 case CmpInst::ICMP_EQ: 01823 case CmpInst::FCMP_OEQ: 01824 return AArch64CC::EQ; 01825 case CmpInst::ICMP_SGT: 01826 case CmpInst::FCMP_OGT: 01827 return AArch64CC::GT; 01828 case CmpInst::ICMP_SGE: 01829 case CmpInst::FCMP_OGE: 01830 return AArch64CC::GE; 01831 case CmpInst::ICMP_UGT: 01832 case CmpInst::FCMP_UGT: 01833 return AArch64CC::HI; 01834 case CmpInst::FCMP_OLT: 01835 return AArch64CC::MI; 01836 case CmpInst::ICMP_ULE: 01837 case CmpInst::FCMP_OLE: 01838 return AArch64CC::LS; 01839 case CmpInst::FCMP_ORD: 01840 return AArch64CC::VC; 01841 case CmpInst::FCMP_UNO: 01842 return AArch64CC::VS; 01843 case CmpInst::FCMP_UGE: 01844 return AArch64CC::PL; 01845 case CmpInst::ICMP_SLT: 01846 case CmpInst::FCMP_ULT: 01847 return AArch64CC::LT; 01848 case CmpInst::ICMP_SLE: 01849 case CmpInst::FCMP_ULE: 01850 return AArch64CC::LE; 01851 case CmpInst::FCMP_UNE: 01852 case CmpInst::ICMP_NE: 01853 return AArch64CC::NE; 01854 case CmpInst::ICMP_UGE: 01855 return AArch64CC::HS; 01856 case CmpInst::ICMP_ULT: 01857 return AArch64CC::LO; 01858 } 01859 } 01860 01861 /// \brief Check if the comparison against zero and the following branch can be 01862 /// folded into a single instruction (CBZ or CBNZ). 01863 static bool canFoldZeroCheckIntoBranch(const CmpInst *CI) { 01864 CmpInst::Predicate Predicate = CI->getPredicate(); 01865 if ((Predicate != CmpInst::ICMP_EQ) && (Predicate != CmpInst::ICMP_NE)) 01866 return false; 01867 01868 Type *Ty = CI->getOperand(0)->getType(); 01869 if (!Ty->isIntegerTy()) 01870 return false; 01871 01872 unsigned BW = cast<IntegerType>(Ty)->getBitWidth(); 01873 if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64) 01874 return false; 01875 01876 if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(0))) 01877 if (C->isNullValue()) 01878 return true; 01879 01880 if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(1))) 01881 if (C->isNullValue()) 01882 return true; 01883 01884 return false; 01885 } 01886 01887 bool AArch64FastISel::selectBranch(const Instruction *I) { 01888 const BranchInst *BI = cast<BranchInst>(I); 01889 if (BI->isUnconditional()) { 01890 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; 01891 fastEmitBranch(MSucc, BI->getDebugLoc()); 01892 return true; 01893 } 01894 01895 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 01896 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 01897 01898 AArch64CC::CondCode CC = AArch64CC::NE; 01899 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 01900 if (CI->hasOneUse() && isValueAvailable(CI)) { 01901 // Try to optimize or fold the cmp. 01902 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 01903 switch (Predicate) { 01904 default: 01905 break; 01906 case CmpInst::FCMP_FALSE: 01907 fastEmitBranch(FBB, DbgLoc); 01908 return true; 01909 case CmpInst::FCMP_TRUE: 01910 fastEmitBranch(TBB, DbgLoc); 01911 return true; 01912 } 01913 01914 // Try to take advantage of fallthrough opportunities. 01915 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 01916 std::swap(TBB, FBB); 01917 Predicate = CmpInst::getInversePredicate(Predicate); 01918 } 01919 01920 // Try to optimize comparisons against zero. 01921 if (canFoldZeroCheckIntoBranch(CI)) { 01922 const Value *LHS = CI->getOperand(0); 01923 const Value *RHS = CI->getOperand(1); 01924 01925 // Canonicalize zero values to the RHS. 01926 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 01927 if (C->isNullValue()) 01928 std::swap(LHS, RHS); 01929 01930 int TestBit = -1; 01931 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 01932 if (AI->getOpcode() == Instruction::And) { 01933 const Value *AndLHS = AI->getOperand(0); 01934 const Value *AndRHS = AI->getOperand(1); 01935 01936 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 01937 if (C->getValue().isPowerOf2()) 01938 std::swap(AndLHS, AndRHS); 01939 01940 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 01941 if (C->getValue().isPowerOf2()) { 01942 TestBit = C->getValue().logBase2(); 01943 LHS = AndLHS; 01944 } 01945 } 01946 01947 static const unsigned OpcTable[2][2][2] = { 01948 { {AArch64::CBZW, AArch64::CBZX }, 01949 {AArch64::CBNZW, AArch64::CBNZX} }, 01950 { {AArch64::TBZW, AArch64::TBZX }, 01951 {AArch64::TBNZW, AArch64::TBNZX} } 01952 }; 01953 bool IsBitTest = TestBit != -1; 01954 bool IsCmpNE = Predicate == CmpInst::ICMP_NE; 01955 bool Is64Bit = LHS->getType()->isIntegerTy(64); 01956 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 01957 01958 unsigned SrcReg = getRegForValue(LHS); 01959 if (!SrcReg) 01960 return false; 01961 bool SrcIsKill = hasTrivialKill(LHS); 01962 01963 // Emit the combined compare and branch instruction. 01964 MachineInstrBuilder MIB = 01965 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 01966 .addReg(SrcReg, getKillRegState(SrcIsKill)); 01967 if (IsBitTest) 01968 MIB.addImm(TestBit); 01969 MIB.addMBB(TBB); 01970 01971 // Obtain the branch weight and add the TrueBB to the successor list. 01972 uint32_t BranchWeight = 0; 01973 if (FuncInfo.BPI) 01974 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 01975 TBB->getBasicBlock()); 01976 FuncInfo.MBB->addSuccessor(TBB, BranchWeight); 01977 01978 fastEmitBranch(FBB, DbgLoc); 01979 return true; 01980 } 01981 01982 // Emit the cmp. 01983 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 01984 return false; 01985 01986 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 01987 // instruction. 01988 CC = getCompareCC(Predicate); 01989 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 01990 switch (Predicate) { 01991 default: 01992 break; 01993 case CmpInst::FCMP_UEQ: 01994 ExtraCC = AArch64CC::EQ; 01995 CC = AArch64CC::VS; 01996 break; 01997 case CmpInst::FCMP_ONE: 01998 ExtraCC = AArch64CC::MI; 01999 CC = AArch64CC::GT; 02000 break; 02001 } 02002 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 02003 02004 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 02005 if (ExtraCC != AArch64CC::AL) { 02006 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 02007 .addImm(ExtraCC) 02008 .addMBB(TBB); 02009 } 02010 02011 // Emit the branch. 02012 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 02013 .addImm(CC) 02014 .addMBB(TBB); 02015 02016 // Obtain the branch weight and add the TrueBB to the successor list. 02017 uint32_t BranchWeight = 0; 02018 if (FuncInfo.BPI) 02019 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 02020 TBB->getBasicBlock()); 02021 FuncInfo.MBB->addSuccessor(TBB, BranchWeight); 02022 02023 fastEmitBranch(FBB, DbgLoc); 02024 return true; 02025 } 02026 } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { 02027 MVT SrcVT; 02028 if (TI->hasOneUse() && isValueAvailable(TI) && 02029 isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) { 02030 unsigned CondReg = getRegForValue(TI->getOperand(0)); 02031 if (!CondReg) 02032 return false; 02033 bool CondIsKill = hasTrivialKill(TI->getOperand(0)); 02034 02035 // Issue an extract_subreg to get the lower 32-bits. 02036 if (SrcVT == MVT::i64) { 02037 CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill, 02038 AArch64::sub_32); 02039 CondIsKill = true; 02040 } 02041 02042 unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1); 02043 assert(ANDReg && "Unexpected AND instruction emission failure."); 02044 emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); 02045 02046 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 02047 std::swap(TBB, FBB); 02048 CC = AArch64CC::EQ; 02049 } 02050 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 02051 .addImm(CC) 02052 .addMBB(TBB); 02053 02054 // Obtain the branch weight and add the TrueBB to the successor list. 02055 uint32_t BranchWeight = 0; 02056 if (FuncInfo.BPI) 02057 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 02058 TBB->getBasicBlock()); 02059 FuncInfo.MBB->addSuccessor(TBB, BranchWeight); 02060 02061 fastEmitBranch(FBB, DbgLoc); 02062 return true; 02063 } 02064 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 02065 uint64_t Imm = CI->getZExtValue(); 02066 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 02067 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) 02068 .addMBB(Target); 02069 02070 // Obtain the branch weight and add the target to the successor list. 02071 uint32_t BranchWeight = 0; 02072 if (FuncInfo.BPI) 02073 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 02074 Target->getBasicBlock()); 02075 FuncInfo.MBB->addSuccessor(Target, BranchWeight); 02076 return true; 02077 } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 02078 // Fake request the condition, otherwise the intrinsic might be completely 02079 // optimized away. 02080 unsigned CondReg = getRegForValue(BI->getCondition()); 02081 if (!CondReg) 02082 return false; 02083 02084 // Emit the branch. 02085 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 02086 .addImm(CC) 02087 .addMBB(TBB); 02088 02089 // Obtain the branch weight and add the TrueBB to the successor list. 02090 uint32_t BranchWeight = 0; 02091 if (FuncInfo.BPI) 02092 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 02093 TBB->getBasicBlock()); 02094 FuncInfo.MBB->addSuccessor(TBB, BranchWeight); 02095 02096 fastEmitBranch(FBB, DbgLoc); 02097 return true; 02098 } 02099 02100 unsigned CondReg = getRegForValue(BI->getCondition()); 02101 if (CondReg == 0) 02102 return false; 02103 bool CondRegIsKill = hasTrivialKill(BI->getCondition()); 02104 02105 // We've been divorced from our compare! Our block was split, and 02106 // now our compare lives in a predecessor block. We musn't 02107 // re-compare here, as the children of the compare aren't guaranteed 02108 // live across the block boundary (we *could* check for this). 02109 // Regardless, the compare has been done in the predecessor block, 02110 // and it left a value for us in a virtual register. Ergo, we test 02111 // the one-bit value left in the virtual register. 02112 emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0); 02113 02114 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 02115 std::swap(TBB, FBB); 02116 CC = AArch64CC::EQ; 02117 } 02118 02119 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 02120 .addImm(CC) 02121 .addMBB(TBB); 02122 02123 // Obtain the branch weight and add the TrueBB to the successor list. 02124 uint32_t BranchWeight = 0; 02125 if (FuncInfo.BPI) 02126 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 02127 TBB->getBasicBlock()); 02128 FuncInfo.MBB->addSuccessor(TBB, BranchWeight); 02129 02130 fastEmitBranch(FBB, DbgLoc); 02131 return true; 02132 } 02133 02134 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 02135 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 02136 unsigned AddrReg = getRegForValue(BI->getOperand(0)); 02137 if (AddrReg == 0) 02138 return false; 02139 02140 // Emit the indirect branch. 02141 const MCInstrDesc &II = TII.get(AArch64::BR); 02142 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 02143 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); 02144 02145 // Make sure the CFG is up-to-date. 02146 for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i) 02147 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]); 02148 02149 return true; 02150 } 02151 02152 bool AArch64FastISel::selectCmp(const Instruction *I) { 02153 const CmpInst *CI = cast<CmpInst>(I); 02154 02155 // Try to optimize or fold the cmp. 02156 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 02157 unsigned ResultReg = 0; 02158 switch (Predicate) { 02159 default: 02160 break; 02161 case CmpInst::FCMP_FALSE: 02162 ResultReg = createResultReg(&AArch64::GPR32RegClass); 02163 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02164 TII.get(TargetOpcode::COPY), ResultReg) 02165 .addReg(AArch64::WZR, getKillRegState(true)); 02166 break; 02167 case CmpInst::FCMP_TRUE: 02168 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 02169 break; 02170 } 02171 02172 if (ResultReg) { 02173 updateValueMap(I, ResultReg); 02174 return true; 02175 } 02176 02177 // Emit the cmp. 02178 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 02179 return false; 02180 02181 ResultReg = createResultReg(&AArch64::GPR32RegClass); 02182 02183 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 02184 // condition codes are inverted, because they are used by CSINC. 02185 static unsigned CondCodeTable[2][2] = { 02186 { AArch64CC::NE, AArch64CC::VC }, 02187 { AArch64CC::PL, AArch64CC::LE } 02188 }; 02189 unsigned *CondCodes = nullptr; 02190 switch (Predicate) { 02191 default: 02192 break; 02193 case CmpInst::FCMP_UEQ: 02194 CondCodes = &CondCodeTable[0][0]; 02195 break; 02196 case CmpInst::FCMP_ONE: 02197 CondCodes = &CondCodeTable[1][0]; 02198 break; 02199 } 02200 02201 if (CondCodes) { 02202 unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 02203 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 02204 TmpReg1) 02205 .addReg(AArch64::WZR, getKillRegState(true)) 02206 .addReg(AArch64::WZR, getKillRegState(true)) 02207 .addImm(CondCodes[0]); 02208 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 02209 ResultReg) 02210 .addReg(TmpReg1, getKillRegState(true)) 02211 .addReg(AArch64::WZR, getKillRegState(true)) 02212 .addImm(CondCodes[1]); 02213 02214 updateValueMap(I, ResultReg); 02215 return true; 02216 } 02217 02218 // Now set a register based on the comparison. 02219 AArch64CC::CondCode CC = getCompareCC(Predicate); 02220 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 02221 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 02222 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 02223 ResultReg) 02224 .addReg(AArch64::WZR, getKillRegState(true)) 02225 .addReg(AArch64::WZR, getKillRegState(true)) 02226 .addImm(invertedCC); 02227 02228 updateValueMap(I, ResultReg); 02229 return true; 02230 } 02231 02232 bool AArch64FastISel::selectSelect(const Instruction *I) { 02233 const SelectInst *SI = cast<SelectInst>(I); 02234 02235 EVT DestEVT = TLI.getValueType(SI->getType(), true); 02236 if (!DestEVT.isSimple()) 02237 return false; 02238 02239 MVT DestVT = DestEVT.getSimpleVT(); 02240 if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 && 02241 DestVT != MVT::f64) 02242 return false; 02243 02244 unsigned SelectOpc; 02245 const TargetRegisterClass *RC = nullptr; 02246 switch (DestVT.SimpleTy) { 02247 default: return false; 02248 case MVT::i32: 02249 SelectOpc = AArch64::CSELWr; RC = &AArch64::GPR32RegClass; break; 02250 case MVT::i64: 02251 SelectOpc = AArch64::CSELXr; RC = &AArch64::GPR64RegClass; break; 02252 case MVT::f32: 02253 SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break; 02254 case MVT::f64: 02255 SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break; 02256 } 02257 02258 const Value *Cond = SI->getCondition(); 02259 bool NeedTest = true; 02260 AArch64CC::CondCode CC = AArch64CC::NE; 02261 if (foldXALUIntrinsic(CC, I, Cond)) 02262 NeedTest = false; 02263 02264 unsigned CondReg = getRegForValue(Cond); 02265 if (!CondReg) 02266 return false; 02267 bool CondIsKill = hasTrivialKill(Cond); 02268 02269 if (NeedTest) { 02270 unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1); 02271 assert(ANDReg && "Unexpected AND instruction emission failure."); 02272 emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); 02273 } 02274 02275 unsigned TrueReg = getRegForValue(SI->getTrueValue()); 02276 bool TrueIsKill = hasTrivialKill(SI->getTrueValue()); 02277 02278 unsigned FalseReg = getRegForValue(SI->getFalseValue()); 02279 bool FalseIsKill = hasTrivialKill(SI->getFalseValue()); 02280 02281 if (!TrueReg || !FalseReg) 02282 return false; 02283 02284 unsigned ResultReg = fastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill, 02285 FalseReg, FalseIsKill, CC); 02286 updateValueMap(I, ResultReg); 02287 return true; 02288 } 02289 02290 bool AArch64FastISel::selectFPExt(const Instruction *I) { 02291 Value *V = I->getOperand(0); 02292 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 02293 return false; 02294 02295 unsigned Op = getRegForValue(V); 02296 if (Op == 0) 02297 return false; 02298 02299 unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass); 02300 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), 02301 ResultReg).addReg(Op); 02302 updateValueMap(I, ResultReg); 02303 return true; 02304 } 02305 02306 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 02307 Value *V = I->getOperand(0); 02308 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 02309 return false; 02310 02311 unsigned Op = getRegForValue(V); 02312 if (Op == 0) 02313 return false; 02314 02315 unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass); 02316 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), 02317 ResultReg).addReg(Op); 02318 updateValueMap(I, ResultReg); 02319 return true; 02320 } 02321 02322 // FPToUI and FPToSI 02323 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 02324 MVT DestVT; 02325 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 02326 return false; 02327 02328 unsigned SrcReg = getRegForValue(I->getOperand(0)); 02329 if (SrcReg == 0) 02330 return false; 02331 02332 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); 02333 if (SrcVT == MVT::f128) 02334 return false; 02335 02336 unsigned Opc; 02337 if (SrcVT == MVT::f64) { 02338 if (Signed) 02339 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 02340 else 02341 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 02342 } else { 02343 if (Signed) 02344 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 02345 else 02346 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 02347 } 02348 unsigned ResultReg = createResultReg( 02349 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 02350 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 02351 .addReg(SrcReg); 02352 updateValueMap(I, ResultReg); 02353 return true; 02354 } 02355 02356 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 02357 MVT DestVT; 02358 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 02359 return false; 02360 assert ((DestVT == MVT::f32 || DestVT == MVT::f64) && 02361 "Unexpected value type."); 02362 02363 unsigned SrcReg = getRegForValue(I->getOperand(0)); 02364 if (!SrcReg) 02365 return false; 02366 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 02367 02368 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); 02369 02370 // Handle sign-extension. 02371 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 02372 SrcReg = 02373 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 02374 if (!SrcReg) 02375 return false; 02376 SrcIsKill = true; 02377 } 02378 02379 unsigned Opc; 02380 if (SrcVT == MVT::i64) { 02381 if (Signed) 02382 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 02383 else 02384 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 02385 } else { 02386 if (Signed) 02387 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 02388 else 02389 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 02390 } 02391 02392 unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg, 02393 SrcIsKill); 02394 updateValueMap(I, ResultReg); 02395 return true; 02396 } 02397 02398 bool AArch64FastISel::fastLowerArguments() { 02399 if (!FuncInfo.CanLowerReturn) 02400 return false; 02401 02402 const Function *F = FuncInfo.Fn; 02403 if (F->isVarArg()) 02404 return false; 02405 02406 CallingConv::ID CC = F->getCallingConv(); 02407 if (CC != CallingConv::C) 02408 return false; 02409 02410 // Only handle simple cases of up to 8 GPR and FPR each. 02411 unsigned GPRCnt = 0; 02412 unsigned FPRCnt = 0; 02413 unsigned Idx = 0; 02414 for (auto const &Arg : F->args()) { 02415 // The first argument is at index 1. 02416 ++Idx; 02417 if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || 02418 F->getAttributes().hasAttribute(Idx, Attribute::InReg) || 02419 F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || 02420 F->getAttributes().hasAttribute(Idx, Attribute::Nest)) 02421 return false; 02422 02423 Type *ArgTy = Arg.getType(); 02424 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 02425 return false; 02426 02427 EVT ArgVT = TLI.getValueType(ArgTy); 02428 if (!ArgVT.isSimple()) 02429 return false; 02430 02431 MVT VT = ArgVT.getSimpleVT().SimpleTy; 02432 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 02433 return false; 02434 02435 if (VT.isVector() && 02436 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 02437 return false; 02438 02439 if (VT >= MVT::i1 && VT <= MVT::i64) 02440 ++GPRCnt; 02441 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 02442 VT.is128BitVector()) 02443 ++FPRCnt; 02444 else 02445 return false; 02446 02447 if (GPRCnt > 8 || FPRCnt > 8) 02448 return false; 02449 } 02450 02451 static const MCPhysReg Registers[6][8] = { 02452 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 02453 AArch64::W5, AArch64::W6, AArch64::W7 }, 02454 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 02455 AArch64::X5, AArch64::X6, AArch64::X7 }, 02456 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 02457 AArch64::H5, AArch64::H6, AArch64::H7 }, 02458 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 02459 AArch64::S5, AArch64::S6, AArch64::S7 }, 02460 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 02461 AArch64::D5, AArch64::D6, AArch64::D7 }, 02462 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 02463 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 02464 }; 02465 02466 unsigned GPRIdx = 0; 02467 unsigned FPRIdx = 0; 02468 for (auto const &Arg : F->args()) { 02469 MVT VT = TLI.getSimpleValueType(Arg.getType()); 02470 unsigned SrcReg; 02471 const TargetRegisterClass *RC; 02472 if (VT >= MVT::i1 && VT <= MVT::i32) { 02473 SrcReg = Registers[0][GPRIdx++]; 02474 RC = &AArch64::GPR32RegClass; 02475 VT = MVT::i32; 02476 } else if (VT == MVT::i64) { 02477 SrcReg = Registers[1][GPRIdx++]; 02478 RC = &AArch64::GPR64RegClass; 02479 } else if (VT == MVT::f16) { 02480 SrcReg = Registers[2][FPRIdx++]; 02481 RC = &AArch64::FPR16RegClass; 02482 } else if (VT == MVT::f32) { 02483 SrcReg = Registers[3][FPRIdx++]; 02484 RC = &AArch64::FPR32RegClass; 02485 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 02486 SrcReg = Registers[4][FPRIdx++]; 02487 RC = &AArch64::FPR64RegClass; 02488 } else if (VT.is128BitVector()) { 02489 SrcReg = Registers[5][FPRIdx++]; 02490 RC = &AArch64::FPR128RegClass; 02491 } else 02492 llvm_unreachable("Unexpected value type."); 02493 02494 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 02495 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 02496 // Without this, EmitLiveInCopies may eliminate the livein if its only 02497 // use is a bitcast (which isn't turned into an instruction). 02498 unsigned ResultReg = createResultReg(RC); 02499 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02500 TII.get(TargetOpcode::COPY), ResultReg) 02501 .addReg(DstReg, getKillRegState(true)); 02502 updateValueMap(&Arg, ResultReg); 02503 } 02504 return true; 02505 } 02506 02507 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 02508 SmallVectorImpl<MVT> &OutVTs, 02509 unsigned &NumBytes) { 02510 CallingConv::ID CC = CLI.CallConv; 02511 SmallVector<CCValAssign, 16> ArgLocs; 02512 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 02513 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 02514 02515 // Get a count of how many bytes are to be pushed on the stack. 02516 NumBytes = CCInfo.getNextStackOffset(); 02517 02518 // Issue CALLSEQ_START 02519 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 02520 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) 02521 .addImm(NumBytes); 02522 02523 // Process the args. 02524 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 02525 CCValAssign &VA = ArgLocs[i]; 02526 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 02527 MVT ArgVT = OutVTs[VA.getValNo()]; 02528 02529 unsigned ArgReg = getRegForValue(ArgVal); 02530 if (!ArgReg) 02531 return false; 02532 02533 // Handle arg promotion: SExt, ZExt, AExt. 02534 switch (VA.getLocInfo()) { 02535 case CCValAssign::Full: 02536 break; 02537 case CCValAssign::SExt: { 02538 MVT DestVT = VA.getLocVT(); 02539 MVT SrcVT = ArgVT; 02540 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 02541 if (!ArgReg) 02542 return false; 02543 break; 02544 } 02545 case CCValAssign::AExt: 02546 // Intentional fall-through. 02547 case CCValAssign::ZExt: { 02548 MVT DestVT = VA.getLocVT(); 02549 MVT SrcVT = ArgVT; 02550 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 02551 if (!ArgReg) 02552 return false; 02553 break; 02554 } 02555 default: 02556 llvm_unreachable("Unknown arg promotion!"); 02557 } 02558 02559 // Now copy/store arg to correct locations. 02560 if (VA.isRegLoc() && !VA.needsCustom()) { 02561 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02562 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 02563 CLI.OutRegs.push_back(VA.getLocReg()); 02564 } else if (VA.needsCustom()) { 02565 // FIXME: Handle custom args. 02566 return false; 02567 } else { 02568 assert(VA.isMemLoc() && "Assuming store on stack."); 02569 02570 // Don't emit stores for undef values. 02571 if (isa<UndefValue>(ArgVal)) 02572 continue; 02573 02574 // Need to store on the stack. 02575 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 02576 02577 unsigned BEAlign = 0; 02578 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 02579 BEAlign = 8 - ArgSize; 02580 02581 Address Addr; 02582 Addr.setKind(Address::RegBase); 02583 Addr.setReg(AArch64::SP); 02584 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 02585 02586 unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); 02587 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 02588 MachinePointerInfo::getStack(Addr.getOffset()), 02589 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 02590 02591 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 02592 return false; 02593 } 02594 } 02595 return true; 02596 } 02597 02598 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, 02599 unsigned NumBytes) { 02600 CallingConv::ID CC = CLI.CallConv; 02601 02602 // Issue CALLSEQ_END 02603 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 02604 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) 02605 .addImm(NumBytes).addImm(0); 02606 02607 // Now the return value. 02608 if (RetVT != MVT::isVoid) { 02609 SmallVector<CCValAssign, 16> RVLocs; 02610 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 02611 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); 02612 02613 // Only handle a single return value. 02614 if (RVLocs.size() != 1) 02615 return false; 02616 02617 // Copy all of the result registers out of their specified physreg. 02618 MVT CopyVT = RVLocs[0].getValVT(); 02619 unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); 02620 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02621 TII.get(TargetOpcode::COPY), ResultReg) 02622 .addReg(RVLocs[0].getLocReg()); 02623 CLI.InRegs.push_back(RVLocs[0].getLocReg()); 02624 02625 CLI.ResultReg = ResultReg; 02626 CLI.NumResultRegs = 1; 02627 } 02628 02629 return true; 02630 } 02631 02632 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 02633 CallingConv::ID CC = CLI.CallConv; 02634 bool IsTailCall = CLI.IsTailCall; 02635 bool IsVarArg = CLI.IsVarArg; 02636 const Value *Callee = CLI.Callee; 02637 const char *SymName = CLI.SymName; 02638 02639 if (!Callee && !SymName) 02640 return false; 02641 02642 // Allow SelectionDAG isel to handle tail calls. 02643 if (IsTailCall) 02644 return false; 02645 02646 CodeModel::Model CM = TM.getCodeModel(); 02647 // Only support the small and large code model. 02648 if (CM != CodeModel::Small && CM != CodeModel::Large) 02649 return false; 02650 02651 // FIXME: Add large code model support for ELF. 02652 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 02653 return false; 02654 02655 // Let SDISel handle vararg functions. 02656 if (IsVarArg) 02657 return false; 02658 02659 // FIXME: Only handle *simple* calls for now. 02660 MVT RetVT; 02661 if (CLI.RetTy->isVoidTy()) 02662 RetVT = MVT::isVoid; 02663 else if (!isTypeLegal(CLI.RetTy, RetVT)) 02664 return false; 02665 02666 for (auto Flag : CLI.OutFlags) 02667 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal()) 02668 return false; 02669 02670 // Set up the argument vectors. 02671 SmallVector<MVT, 16> OutVTs; 02672 OutVTs.reserve(CLI.OutVals.size()); 02673 02674 for (auto *Val : CLI.OutVals) { 02675 MVT VT; 02676 if (!isTypeLegal(Val->getType(), VT) && 02677 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 02678 return false; 02679 02680 // We don't handle vector parameters yet. 02681 if (VT.isVector() || VT.getSizeInBits() > 64) 02682 return false; 02683 02684 OutVTs.push_back(VT); 02685 } 02686 02687 Address Addr; 02688 if (Callee && !computeCallAddress(Callee, Addr)) 02689 return false; 02690 02691 // Handle the arguments now that we've gotten them. 02692 unsigned NumBytes; 02693 if (!processCallArgs(CLI, OutVTs, NumBytes)) 02694 return false; 02695 02696 // Issue the call. 02697 MachineInstrBuilder MIB; 02698 if (CM == CodeModel::Small) { 02699 const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL); 02700 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); 02701 if (SymName) 02702 MIB.addExternalSymbol(SymName, 0); 02703 else if (Addr.getGlobalValue()) 02704 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 02705 else if (Addr.getReg()) { 02706 unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 02707 MIB.addReg(Reg); 02708 } else 02709 return false; 02710 } else { 02711 unsigned CallReg = 0; 02712 if (SymName) { 02713 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 02714 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 02715 ADRPReg) 02716 .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE); 02717 02718 CallReg = createResultReg(&AArch64::GPR64RegClass); 02719 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), 02720 CallReg) 02721 .addReg(ADRPReg) 02722 .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 02723 AArch64II::MO_NC); 02724 } else if (Addr.getGlobalValue()) 02725 CallReg = materializeGV(Addr.getGlobalValue()); 02726 else if (Addr.getReg()) 02727 CallReg = Addr.getReg(); 02728 02729 if (!CallReg) 02730 return false; 02731 02732 const MCInstrDesc &II = TII.get(AArch64::BLR); 02733 CallReg = constrainOperandRegClass(II, CallReg, 0); 02734 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg); 02735 } 02736 02737 // Add implicit physical register uses to the call. 02738 for (auto Reg : CLI.OutRegs) 02739 MIB.addReg(Reg, RegState::Implicit); 02740 02741 // Add a register mask with the call-preserved registers. 02742 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 02743 MIB.addRegMask(TRI.getCallPreservedMask(CC)); 02744 02745 CLI.Call = MIB; 02746 02747 // Finish off the call including any return values. 02748 return finishCall(CLI, RetVT, NumBytes); 02749 } 02750 02751 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) { 02752 if (Alignment) 02753 return Len / Alignment <= 4; 02754 else 02755 return Len < 32; 02756 } 02757 02758 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 02759 uint64_t Len, unsigned Alignment) { 02760 // Make sure we don't bloat code by inlining very large memcpy's. 02761 if (!isMemCpySmall(Len, Alignment)) 02762 return false; 02763 02764 int64_t UnscaledOffset = 0; 02765 Address OrigDest = Dest; 02766 Address OrigSrc = Src; 02767 02768 while (Len) { 02769 MVT VT; 02770 if (!Alignment || Alignment >= 8) { 02771 if (Len >= 8) 02772 VT = MVT::i64; 02773 else if (Len >= 4) 02774 VT = MVT::i32; 02775 else if (Len >= 2) 02776 VT = MVT::i16; 02777 else { 02778 VT = MVT::i8; 02779 } 02780 } else { 02781 // Bound based on alignment. 02782 if (Len >= 4 && Alignment == 4) 02783 VT = MVT::i32; 02784 else if (Len >= 2 && Alignment == 2) 02785 VT = MVT::i16; 02786 else { 02787 VT = MVT::i8; 02788 } 02789 } 02790 02791 bool RV; 02792 unsigned ResultReg; 02793 RV = emitLoad(VT, ResultReg, Src); 02794 if (!RV) 02795 return false; 02796 02797 RV = emitStore(VT, ResultReg, Dest); 02798 if (!RV) 02799 return false; 02800 02801 int64_t Size = VT.getSizeInBits() / 8; 02802 Len -= Size; 02803 UnscaledOffset += Size; 02804 02805 // We need to recompute the unscaled offset for each iteration. 02806 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 02807 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 02808 } 02809 02810 return true; 02811 } 02812 02813 /// \brief Check if it is possible to fold the condition from the XALU intrinsic 02814 /// into the user. The condition code will only be updated on success. 02815 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 02816 const Instruction *I, 02817 const Value *Cond) { 02818 if (!isa<ExtractValueInst>(Cond)) 02819 return false; 02820 02821 const auto *EV = cast<ExtractValueInst>(Cond); 02822 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 02823 return false; 02824 02825 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 02826 MVT RetVT; 02827 const Function *Callee = II->getCalledFunction(); 02828 Type *RetTy = 02829 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 02830 if (!isTypeLegal(RetTy, RetVT)) 02831 return false; 02832 02833 if (RetVT != MVT::i32 && RetVT != MVT::i64) 02834 return false; 02835 02836 const Value *LHS = II->getArgOperand(0); 02837 const Value *RHS = II->getArgOperand(1); 02838 02839 // Canonicalize immediate to the RHS. 02840 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 02841 isCommutativeIntrinsic(II)) 02842 std::swap(LHS, RHS); 02843 02844 // Simplify multiplies. 02845 unsigned IID = II->getIntrinsicID(); 02846 switch (IID) { 02847 default: 02848 break; 02849 case Intrinsic::smul_with_overflow: 02850 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 02851 if (C->getValue() == 2) 02852 IID = Intrinsic::sadd_with_overflow; 02853 break; 02854 case Intrinsic::umul_with_overflow: 02855 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 02856 if (C->getValue() == 2) 02857 IID = Intrinsic::uadd_with_overflow; 02858 break; 02859 } 02860 02861 AArch64CC::CondCode TmpCC; 02862 switch (IID) { 02863 default: 02864 return false; 02865 case Intrinsic::sadd_with_overflow: 02866 case Intrinsic::ssub_with_overflow: 02867 TmpCC = AArch64CC::VS; 02868 break; 02869 case Intrinsic::uadd_with_overflow: 02870 TmpCC = AArch64CC::HS; 02871 break; 02872 case Intrinsic::usub_with_overflow: 02873 TmpCC = AArch64CC::LO; 02874 break; 02875 case Intrinsic::smul_with_overflow: 02876 case Intrinsic::umul_with_overflow: 02877 TmpCC = AArch64CC::NE; 02878 break; 02879 } 02880 02881 // Check if both instructions are in the same basic block. 02882 if (!isValueAvailable(II)) 02883 return false; 02884 02885 // Make sure nothing is in the way 02886 BasicBlock::const_iterator Start = I; 02887 BasicBlock::const_iterator End = II; 02888 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 02889 // We only expect extractvalue instructions between the intrinsic and the 02890 // instruction to be selected. 02891 if (!isa<ExtractValueInst>(Itr)) 02892 return false; 02893 02894 // Check that the extractvalue operand comes from the intrinsic. 02895 const auto *EVI = cast<ExtractValueInst>(Itr); 02896 if (EVI->getAggregateOperand() != II) 02897 return false; 02898 } 02899 02900 CC = TmpCC; 02901 return true; 02902 } 02903 02904 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 02905 // FIXME: Handle more intrinsics. 02906 switch (II->getIntrinsicID()) { 02907 default: return false; 02908 case Intrinsic::frameaddress: { 02909 MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo(); 02910 MFI->setFrameAddressIsTaken(true); 02911 02912 const AArch64RegisterInfo *RegInfo = 02913 static_cast<const AArch64RegisterInfo *>( 02914 TM.getSubtargetImpl()->getRegisterInfo()); 02915 unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 02916 unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 02917 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02918 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 02919 // Recursively load frame address 02920 // ldr x0, [fp] 02921 // ldr x0, [x0] 02922 // ldr x0, [x0] 02923 // ... 02924 unsigned DestReg; 02925 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 02926 while (Depth--) { 02927 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 02928 SrcReg, /*IsKill=*/true, 0); 02929 assert(DestReg && "Unexpected LDR instruction emission failure."); 02930 SrcReg = DestReg; 02931 } 02932 02933 updateValueMap(II, SrcReg); 02934 return true; 02935 } 02936 case Intrinsic::memcpy: 02937 case Intrinsic::memmove: { 02938 const auto *MTI = cast<MemTransferInst>(II); 02939 // Don't handle volatile. 02940 if (MTI->isVolatile()) 02941 return false; 02942 02943 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 02944 // we would emit dead code because we don't currently handle memmoves. 02945 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 02946 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 02947 // Small memcpy's are common enough that we want to do them without a call 02948 // if possible. 02949 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 02950 unsigned Alignment = MTI->getAlignment(); 02951 if (isMemCpySmall(Len, Alignment)) { 02952 Address Dest, Src; 02953 if (!computeAddress(MTI->getRawDest(), Dest) || 02954 !computeAddress(MTI->getRawSource(), Src)) 02955 return false; 02956 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 02957 return true; 02958 } 02959 } 02960 02961 if (!MTI->getLength()->getType()->isIntegerTy(64)) 02962 return false; 02963 02964 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 02965 // Fast instruction selection doesn't support the special 02966 // address spaces. 02967 return false; 02968 02969 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 02970 return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2); 02971 } 02972 case Intrinsic::memset: { 02973 const MemSetInst *MSI = cast<MemSetInst>(II); 02974 // Don't handle volatile. 02975 if (MSI->isVolatile()) 02976 return false; 02977 02978 if (!MSI->getLength()->getType()->isIntegerTy(64)) 02979 return false; 02980 02981 if (MSI->getDestAddressSpace() > 255) 02982 // Fast instruction selection doesn't support the special 02983 // address spaces. 02984 return false; 02985 02986 return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); 02987 } 02988 case Intrinsic::sin: 02989 case Intrinsic::cos: 02990 case Intrinsic::pow: { 02991 MVT RetVT; 02992 if (!isTypeLegal(II->getType(), RetVT)) 02993 return false; 02994 02995 if (RetVT != MVT::f32 && RetVT != MVT::f64) 02996 return false; 02997 02998 static const RTLIB::Libcall LibCallTable[3][2] = { 02999 { RTLIB::SIN_F32, RTLIB::SIN_F64 }, 03000 { RTLIB::COS_F32, RTLIB::COS_F64 }, 03001 { RTLIB::POW_F32, RTLIB::POW_F64 } 03002 }; 03003 RTLIB::Libcall LC; 03004 bool Is64Bit = RetVT == MVT::f64; 03005 switch (II->getIntrinsicID()) { 03006 default: 03007 llvm_unreachable("Unexpected intrinsic."); 03008 case Intrinsic::sin: 03009 LC = LibCallTable[0][Is64Bit]; 03010 break; 03011 case Intrinsic::cos: 03012 LC = LibCallTable[1][Is64Bit]; 03013 break; 03014 case Intrinsic::pow: 03015 LC = LibCallTable[2][Is64Bit]; 03016 break; 03017 } 03018 03019 ArgListTy Args; 03020 Args.reserve(II->getNumArgOperands()); 03021 03022 // Populate the argument list. 03023 for (auto &Arg : II->arg_operands()) { 03024 ArgListEntry Entry; 03025 Entry.Val = Arg; 03026 Entry.Ty = Arg->getType(); 03027 Args.push_back(Entry); 03028 } 03029 03030 CallLoweringInfo CLI; 03031 CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(), 03032 TLI.getLibcallName(LC), std::move(Args)); 03033 if (!lowerCallTo(CLI)) 03034 return false; 03035 updateValueMap(II, CLI.ResultReg); 03036 return true; 03037 } 03038 case Intrinsic::trap: { 03039 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 03040 .addImm(1); 03041 return true; 03042 } 03043 case Intrinsic::sqrt: { 03044 Type *RetTy = II->getCalledFunction()->getReturnType(); 03045 03046 MVT VT; 03047 if (!isTypeLegal(RetTy, VT)) 03048 return false; 03049 03050 unsigned Op0Reg = getRegForValue(II->getOperand(0)); 03051 if (!Op0Reg) 03052 return false; 03053 bool Op0IsKill = hasTrivialKill(II->getOperand(0)); 03054 03055 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill); 03056 if (!ResultReg) 03057 return false; 03058 03059 updateValueMap(II, ResultReg); 03060 return true; 03061 } 03062 case Intrinsic::sadd_with_overflow: 03063 case Intrinsic::uadd_with_overflow: 03064 case Intrinsic::ssub_with_overflow: 03065 case Intrinsic::usub_with_overflow: 03066 case Intrinsic::smul_with_overflow: 03067 case Intrinsic::umul_with_overflow: { 03068 // This implements the basic lowering of the xalu with overflow intrinsics. 03069 const Function *Callee = II->getCalledFunction(); 03070 auto *Ty = cast<StructType>(Callee->getReturnType()); 03071 Type *RetTy = Ty->getTypeAtIndex(0U); 03072 03073 MVT VT; 03074 if (!isTypeLegal(RetTy, VT)) 03075 return false; 03076 03077 if (VT != MVT::i32 && VT != MVT::i64) 03078 return false; 03079 03080 const Value *LHS = II->getArgOperand(0); 03081 const Value *RHS = II->getArgOperand(1); 03082 // Canonicalize immediate to the RHS. 03083 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 03084 isCommutativeIntrinsic(II)) 03085 std::swap(LHS, RHS); 03086 03087 // Simplify multiplies. 03088 unsigned IID = II->getIntrinsicID(); 03089 switch (IID) { 03090 default: 03091 break; 03092 case Intrinsic::smul_with_overflow: 03093 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 03094 if (C->getValue() == 2) { 03095 IID = Intrinsic::sadd_with_overflow; 03096 RHS = LHS; 03097 } 03098 break; 03099 case Intrinsic::umul_with_overflow: 03100 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 03101 if (C->getValue() == 2) { 03102 IID = Intrinsic::uadd_with_overflow; 03103 RHS = LHS; 03104 } 03105 break; 03106 } 03107 03108 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 03109 AArch64CC::CondCode CC = AArch64CC::Invalid; 03110 switch (IID) { 03111 default: llvm_unreachable("Unexpected intrinsic!"); 03112 case Intrinsic::sadd_with_overflow: 03113 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 03114 CC = AArch64CC::VS; 03115 break; 03116 case Intrinsic::uadd_with_overflow: 03117 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 03118 CC = AArch64CC::HS; 03119 break; 03120 case Intrinsic::ssub_with_overflow: 03121 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 03122 CC = AArch64CC::VS; 03123 break; 03124 case Intrinsic::usub_with_overflow: 03125 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 03126 CC = AArch64CC::LO; 03127 break; 03128 case Intrinsic::smul_with_overflow: { 03129 CC = AArch64CC::NE; 03130 unsigned LHSReg = getRegForValue(LHS); 03131 if (!LHSReg) 03132 return false; 03133 bool LHSIsKill = hasTrivialKill(LHS); 03134 03135 unsigned RHSReg = getRegForValue(RHS); 03136 if (!RHSReg) 03137 return false; 03138 bool RHSIsKill = hasTrivialKill(RHS); 03139 03140 if (VT == MVT::i32) { 03141 MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 03142 unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg, 03143 /*IsKill=*/false, 32); 03144 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 03145 AArch64::sub_32); 03146 ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true, 03147 AArch64::sub_32); 03148 emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 03149 AArch64_AM::ASR, 31, /*WantResult=*/false); 03150 } else { 03151 assert(VT == MVT::i64 && "Unexpected value type."); 03152 MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 03153 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill, 03154 RHSReg, RHSIsKill); 03155 emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 03156 AArch64_AM::ASR, 63, /*WantResult=*/false); 03157 } 03158 break; 03159 } 03160 case Intrinsic::umul_with_overflow: { 03161 CC = AArch64CC::NE; 03162 unsigned LHSReg = getRegForValue(LHS); 03163 if (!LHSReg) 03164 return false; 03165 bool LHSIsKill = hasTrivialKill(LHS); 03166 03167 unsigned RHSReg = getRegForValue(RHS); 03168 if (!RHSReg) 03169 return false; 03170 bool RHSIsKill = hasTrivialKill(RHS); 03171 03172 if (VT == MVT::i32) { 03173 MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 03174 emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg, 03175 /*IsKill=*/false, AArch64_AM::LSR, 32, 03176 /*WantResult=*/false); 03177 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 03178 AArch64::sub_32); 03179 } else { 03180 assert(VT == MVT::i64 && "Unexpected value type."); 03181 MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 03182 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill, 03183 RHSReg, RHSIsKill); 03184 emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg, 03185 /*IsKill=*/false, /*WantResult=*/false); 03186 } 03187 break; 03188 } 03189 } 03190 03191 if (MulReg) { 03192 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 03193 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03194 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 03195 } 03196 03197 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 03198 AArch64::WZR, /*IsKill=*/true, AArch64::WZR, 03199 /*IsKill=*/true, getInvertedCondCode(CC)); 03200 assert((ResultReg1 + 1) == ResultReg2 && 03201 "Nonconsecutive result registers."); 03202 updateValueMap(II, ResultReg1, 2); 03203 return true; 03204 } 03205 } 03206 return false; 03207 } 03208 03209 bool AArch64FastISel::selectRet(const Instruction *I) { 03210 const ReturnInst *Ret = cast<ReturnInst>(I); 03211 const Function &F = *I->getParent()->getParent(); 03212 03213 if (!FuncInfo.CanLowerReturn) 03214 return false; 03215 03216 if (F.isVarArg()) 03217 return false; 03218 03219 // Build a list of return value registers. 03220 SmallVector<unsigned, 4> RetRegs; 03221 03222 if (Ret->getNumOperands() > 0) { 03223 CallingConv::ID CC = F.getCallingConv(); 03224 SmallVector<ISD::OutputArg, 4> Outs; 03225 GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); 03226 03227 // Analyze operands of the call, assigning locations to each operand. 03228 SmallVector<CCValAssign, 16> ValLocs; 03229 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 03230 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS 03231 : RetCC_AArch64_AAPCS; 03232 CCInfo.AnalyzeReturn(Outs, RetCC); 03233 03234 // Only handle a single return value for now. 03235 if (ValLocs.size() != 1) 03236 return false; 03237 03238 CCValAssign &VA = ValLocs[0]; 03239 const Value *RV = Ret->getOperand(0); 03240 03241 // Don't bother handling odd stuff for now. 03242 if ((VA.getLocInfo() != CCValAssign::Full) && 03243 (VA.getLocInfo() != CCValAssign::BCvt)) 03244 return false; 03245 03246 // Only handle register returns for now. 03247 if (!VA.isRegLoc()) 03248 return false; 03249 03250 unsigned Reg = getRegForValue(RV); 03251 if (Reg == 0) 03252 return false; 03253 03254 unsigned SrcReg = Reg + VA.getValNo(); 03255 unsigned DestReg = VA.getLocReg(); 03256 // Avoid a cross-class copy. This is very unlikely. 03257 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 03258 return false; 03259 03260 EVT RVEVT = TLI.getValueType(RV->getType()); 03261 if (!RVEVT.isSimple()) 03262 return false; 03263 03264 // Vectors (of > 1 lane) in big endian need tricky handling. 03265 if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 && 03266 !Subtarget->isLittleEndian()) 03267 return false; 03268 03269 MVT RVVT = RVEVT.getSimpleVT(); 03270 if (RVVT == MVT::f128) 03271 return false; 03272 03273 MVT DestVT = VA.getValVT(); 03274 // Special handling for extended integers. 03275 if (RVVT != DestVT) { 03276 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 03277 return false; 03278 03279 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 03280 return false; 03281 03282 bool IsZExt = Outs[0].Flags.isZExt(); 03283 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 03284 if (SrcReg == 0) 03285 return false; 03286 } 03287 03288 // Make the copy. 03289 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03290 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 03291 03292 // Add register to return instruction. 03293 RetRegs.push_back(VA.getLocReg()); 03294 } 03295 03296 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03297 TII.get(AArch64::RET_ReallyLR)); 03298 for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) 03299 MIB.addReg(RetRegs[i], RegState::Implicit); 03300 return true; 03301 } 03302 03303 bool AArch64FastISel::selectTrunc(const Instruction *I) { 03304 Type *DestTy = I->getType(); 03305 Value *Op = I->getOperand(0); 03306 Type *SrcTy = Op->getType(); 03307 03308 EVT SrcEVT = TLI.getValueType(SrcTy, true); 03309 EVT DestEVT = TLI.getValueType(DestTy, true); 03310 if (!SrcEVT.isSimple()) 03311 return false; 03312 if (!DestEVT.isSimple()) 03313 return false; 03314 03315 MVT SrcVT = SrcEVT.getSimpleVT(); 03316 MVT DestVT = DestEVT.getSimpleVT(); 03317 03318 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 03319 SrcVT != MVT::i8) 03320 return false; 03321 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 03322 DestVT != MVT::i1) 03323 return false; 03324 03325 unsigned SrcReg = getRegForValue(Op); 03326 if (!SrcReg) 03327 return false; 03328 bool SrcIsKill = hasTrivialKill(Op); 03329 03330 // If we're truncating from i64 to a smaller non-legal type then generate an 03331 // AND. Otherwise, we know the high bits are undefined and a truncate only 03332 // generate a COPY. We cannot mark the source register also as result 03333 // register, because this can incorrectly transfer the kill flag onto the 03334 // source register. 03335 unsigned ResultReg; 03336 if (SrcVT == MVT::i64) { 03337 uint64_t Mask = 0; 03338 switch (DestVT.SimpleTy) { 03339 default: 03340 // Trunc i64 to i32 is handled by the target-independent fast-isel. 03341 return false; 03342 case MVT::i1: 03343 Mask = 0x1; 03344 break; 03345 case MVT::i8: 03346 Mask = 0xff; 03347 break; 03348 case MVT::i16: 03349 Mask = 0xffff; 03350 break; 03351 } 03352 // Issue an extract_subreg to get the lower 32-bits. 03353 unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 03354 AArch64::sub_32); 03355 // Create the AND instruction which performs the actual truncation. 03356 ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask); 03357 assert(ResultReg && "Unexpected AND instruction emission failure."); 03358 } else { 03359 ResultReg = createResultReg(&AArch64::GPR32RegClass); 03360 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03361 TII.get(TargetOpcode::COPY), ResultReg) 03362 .addReg(SrcReg, getKillRegState(SrcIsKill)); 03363 } 03364 03365 updateValueMap(I, ResultReg); 03366 return true; 03367 } 03368 03369 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 03370 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 03371 DestVT == MVT::i64) && 03372 "Unexpected value type."); 03373 // Handle i8 and i16 as i32. 03374 if (DestVT == MVT::i8 || DestVT == MVT::i16) 03375 DestVT = MVT::i32; 03376 03377 if (IsZExt) { 03378 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 03379 assert(ResultReg && "Unexpected AND instruction emission failure."); 03380 if (DestVT == MVT::i64) { 03381 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 03382 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 03383 unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 03384 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03385 TII.get(AArch64::SUBREG_TO_REG), Reg64) 03386 .addImm(0) 03387 .addReg(ResultReg) 03388 .addImm(AArch64::sub_32); 03389 ResultReg = Reg64; 03390 } 03391 return ResultReg; 03392 } else { 03393 if (DestVT == MVT::i64) { 03394 // FIXME: We're SExt i1 to i64. 03395 return 0; 03396 } 03397 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 03398 /*TODO:IsKill=*/false, 0, 0); 03399 } 03400 } 03401 03402 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 03403 unsigned Op1, bool Op1IsKill) { 03404 unsigned Opc, ZReg; 03405 switch (RetVT.SimpleTy) { 03406 default: return 0; 03407 case MVT::i8: 03408 case MVT::i16: 03409 case MVT::i32: 03410 RetVT = MVT::i32; 03411 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 03412 case MVT::i64: 03413 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 03414 } 03415 03416 const TargetRegisterClass *RC = 03417 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 03418 return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill, 03419 /*IsKill=*/ZReg, true); 03420 } 03421 03422 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 03423 unsigned Op1, bool Op1IsKill) { 03424 if (RetVT != MVT::i64) 03425 return 0; 03426 03427 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 03428 Op0, Op0IsKill, Op1, Op1IsKill, 03429 AArch64::XZR, /*IsKill=*/true); 03430 } 03431 03432 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 03433 unsigned Op1, bool Op1IsKill) { 03434 if (RetVT != MVT::i64) 03435 return 0; 03436 03437 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 03438 Op0, Op0IsKill, Op1, Op1IsKill, 03439 AArch64::XZR, /*IsKill=*/true); 03440 } 03441 03442 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 03443 unsigned Op1Reg, bool Op1IsKill) { 03444 unsigned Opc = 0; 03445 bool NeedTrunc = false; 03446 uint64_t Mask = 0; 03447 switch (RetVT.SimpleTy) { 03448 default: return 0; 03449 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 03450 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 03451 case MVT::i32: Opc = AArch64::LSLVWr; break; 03452 case MVT::i64: Opc = AArch64::LSLVXr; break; 03453 } 03454 03455 const TargetRegisterClass *RC = 03456 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 03457 if (NeedTrunc) { 03458 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 03459 Op1IsKill = true; 03460 } 03461 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 03462 Op1IsKill); 03463 if (NeedTrunc) 03464 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 03465 return ResultReg; 03466 } 03467 03468 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 03469 bool Op0IsKill, uint64_t Shift, 03470 bool IsZext) { 03471 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 03472 "Unexpected source/return type pair."); 03473 assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 || 03474 SrcVT == MVT::i64) && "Unexpected source value type."); 03475 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 03476 RetVT == MVT::i64) && "Unexpected return value type."); 03477 03478 bool Is64Bit = (RetVT == MVT::i64); 03479 unsigned RegSize = Is64Bit ? 64 : 32; 03480 unsigned DstBits = RetVT.getSizeInBits(); 03481 unsigned SrcBits = SrcVT.getSizeInBits(); 03482 03483 // Don't deal with undefined shifts. 03484 if (Shift >= DstBits) 03485 return 0; 03486 03487 // For immediate shifts we can fold the zero-/sign-extension into the shift. 03488 // {S|U}BFM Wd, Wn, #r, #s 03489 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 03490 03491 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 03492 // %2 = shl i16 %1, 4 03493 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 03494 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 03495 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 03496 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 03497 03498 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 03499 // %2 = shl i16 %1, 8 03500 // Wd<32+7-24,32-24> = Wn<7:0> 03501 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 03502 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 03503 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 03504 03505 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 03506 // %2 = shl i16 %1, 12 03507 // Wd<32+3-20,32-20> = Wn<3:0> 03508 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 03509 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 03510 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 03511 03512 unsigned ImmR = RegSize - Shift; 03513 // Limit the width to the length of the source type. 03514 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 03515 static const unsigned OpcTable[2][2] = { 03516 {AArch64::SBFMWri, AArch64::SBFMXri}, 03517 {AArch64::UBFMWri, AArch64::UBFMXri} 03518 }; 03519 unsigned Opc = OpcTable[IsZext][Is64Bit]; 03520 const TargetRegisterClass *RC = 03521 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 03522 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 03523 unsigned TmpReg = MRI.createVirtualRegister(RC); 03524 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03525 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 03526 .addImm(0) 03527 .addReg(Op0, getKillRegState(Op0IsKill)) 03528 .addImm(AArch64::sub_32); 03529 Op0 = TmpReg; 03530 Op0IsKill = true; 03531 } 03532 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 03533 } 03534 03535 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 03536 unsigned Op1Reg, bool Op1IsKill) { 03537 unsigned Opc = 0; 03538 bool NeedTrunc = false; 03539 uint64_t Mask = 0; 03540 switch (RetVT.SimpleTy) { 03541 default: return 0; 03542 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 03543 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 03544 case MVT::i32: Opc = AArch64::LSRVWr; break; 03545 case MVT::i64: Opc = AArch64::LSRVXr; break; 03546 } 03547 03548 const TargetRegisterClass *RC = 03549 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 03550 if (NeedTrunc) { 03551 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask); 03552 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 03553 Op0IsKill = Op1IsKill = true; 03554 } 03555 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 03556 Op1IsKill); 03557 if (NeedTrunc) 03558 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 03559 return ResultReg; 03560 } 03561 03562 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 03563 bool Op0IsKill, uint64_t Shift, 03564 bool IsZExt) { 03565 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 03566 "Unexpected source/return type pair."); 03567 assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 || 03568 SrcVT == MVT::i64) && "Unexpected source value type."); 03569 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 03570 RetVT == MVT::i64) && "Unexpected return value type."); 03571 03572 bool Is64Bit = (RetVT == MVT::i64); 03573 unsigned RegSize = Is64Bit ? 64 : 32; 03574 unsigned DstBits = RetVT.getSizeInBits(); 03575 unsigned SrcBits = SrcVT.getSizeInBits(); 03576 03577 // Don't deal with undefined shifts. 03578 if (Shift >= DstBits) 03579 return 0; 03580 03581 // For immediate shifts we can fold the zero-/sign-extension into the shift. 03582 // {S|U}BFM Wd, Wn, #r, #s 03583 // Wd<s-r:0> = Wn<s:r> when r <= s 03584 03585 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 03586 // %2 = lshr i16 %1, 4 03587 // Wd<7-4:0> = Wn<7:4> 03588 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 03589 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 03590 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 03591 03592 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 03593 // %2 = lshr i16 %1, 8 03594 // Wd<7-7,0> = Wn<7:7> 03595 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 03596 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 03597 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 03598 03599 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 03600 // %2 = lshr i16 %1, 12 03601 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 03602 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 03603 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 03604 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 03605 03606 if (Shift >= SrcBits && IsZExt) 03607 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 03608 03609 // It is not possible to fold a sign-extend into the LShr instruction. In this 03610 // case emit a sign-extend. 03611 if (!IsZExt) { 03612 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 03613 if (!Op0) 03614 return 0; 03615 Op0IsKill = true; 03616 SrcVT = RetVT; 03617 SrcBits = SrcVT.getSizeInBits(); 03618 IsZExt = true; 03619 } 03620 03621 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 03622 unsigned ImmS = SrcBits - 1; 03623 static const unsigned OpcTable[2][2] = { 03624 {AArch64::SBFMWri, AArch64::SBFMXri}, 03625 {AArch64::UBFMWri, AArch64::UBFMXri} 03626 }; 03627 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 03628 const TargetRegisterClass *RC = 03629 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 03630 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 03631 unsigned TmpReg = MRI.createVirtualRegister(RC); 03632 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03633 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 03634 .addImm(0) 03635 .addReg(Op0, getKillRegState(Op0IsKill)) 03636 .addImm(AArch64::sub_32); 03637 Op0 = TmpReg; 03638 Op0IsKill = true; 03639 } 03640 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 03641 } 03642 03643 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 03644 unsigned Op1Reg, bool Op1IsKill) { 03645 unsigned Opc = 0; 03646 bool NeedTrunc = false; 03647 uint64_t Mask = 0; 03648 switch (RetVT.SimpleTy) { 03649 default: return 0; 03650 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 03651 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 03652 case MVT::i32: Opc = AArch64::ASRVWr; break; 03653 case MVT::i64: Opc = AArch64::ASRVXr; break; 03654 } 03655 03656 const TargetRegisterClass *RC = 03657 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 03658 if (NeedTrunc) { 03659 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false); 03660 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 03661 Op0IsKill = Op1IsKill = true; 03662 } 03663 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 03664 Op1IsKill); 03665 if (NeedTrunc) 03666 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 03667 return ResultReg; 03668 } 03669 03670 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 03671 bool Op0IsKill, uint64_t Shift, 03672 bool IsZExt) { 03673 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 03674 "Unexpected source/return type pair."); 03675 assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 || 03676 SrcVT == MVT::i64) && "Unexpected source value type."); 03677 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 03678 RetVT == MVT::i64) && "Unexpected return value type."); 03679 03680 bool Is64Bit = (RetVT == MVT::i64); 03681 unsigned RegSize = Is64Bit ? 64 : 32; 03682 unsigned DstBits = RetVT.getSizeInBits(); 03683 unsigned SrcBits = SrcVT.getSizeInBits(); 03684 03685 // Don't deal with undefined shifts. 03686 if (Shift >= DstBits) 03687 return 0; 03688 03689 // For immediate shifts we can fold the zero-/sign-extension into the shift. 03690 // {S|U}BFM Wd, Wn, #r, #s 03691 // Wd<s-r:0> = Wn<s:r> when r <= s 03692 03693 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 03694 // %2 = ashr i16 %1, 4 03695 // Wd<7-4:0> = Wn<7:4> 03696 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 03697 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 03698 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 03699 03700 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 03701 // %2 = ashr i16 %1, 8 03702 // Wd<7-7,0> = Wn<7:7> 03703 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 03704 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 03705 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 03706 03707 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 03708 // %2 = ashr i16 %1, 12 03709 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 03710 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 03711 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 03712 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 03713 03714 if (Shift >= SrcBits && IsZExt) 03715 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 03716 03717 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 03718 unsigned ImmS = SrcBits - 1; 03719 static const unsigned OpcTable[2][2] = { 03720 {AArch64::SBFMWri, AArch64::SBFMXri}, 03721 {AArch64::UBFMWri, AArch64::UBFMXri} 03722 }; 03723 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 03724 const TargetRegisterClass *RC = 03725 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 03726 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 03727 unsigned TmpReg = MRI.createVirtualRegister(RC); 03728 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03729 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 03730 .addImm(0) 03731 .addReg(Op0, getKillRegState(Op0IsKill)) 03732 .addImm(AArch64::sub_32); 03733 Op0 = TmpReg; 03734 Op0IsKill = true; 03735 } 03736 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 03737 } 03738 03739 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 03740 bool IsZExt) { 03741 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 03742 03743 // FastISel does not have plumbing to deal with extensions where the SrcVT or 03744 // DestVT are odd things, so test to make sure that they are both types we can 03745 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 03746 // bail out to SelectionDAG. 03747 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 03748 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 03749 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 03750 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 03751 return 0; 03752 03753 unsigned Opc; 03754 unsigned Imm = 0; 03755 03756 switch (SrcVT.SimpleTy) { 03757 default: 03758 return 0; 03759 case MVT::i1: 03760 return emiti1Ext(SrcReg, DestVT, IsZExt); 03761 case MVT::i8: 03762 if (DestVT == MVT::i64) 03763 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 03764 else 03765 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 03766 Imm = 7; 03767 break; 03768 case MVT::i16: 03769 if (DestVT == MVT::i64) 03770 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 03771 else 03772 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 03773 Imm = 15; 03774 break; 03775 case MVT::i32: 03776 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 03777 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 03778 Imm = 31; 03779 break; 03780 } 03781 03782 // Handle i8 and i16 as i32. 03783 if (DestVT == MVT::i8 || DestVT == MVT::i16) 03784 DestVT = MVT::i32; 03785 else if (DestVT == MVT::i64) { 03786 unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 03787 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03788 TII.get(AArch64::SUBREG_TO_REG), Src64) 03789 .addImm(0) 03790 .addReg(SrcReg) 03791 .addImm(AArch64::sub_32); 03792 SrcReg = Src64; 03793 } 03794 03795 const TargetRegisterClass *RC = 03796 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 03797 return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm); 03798 } 03799 03800 bool AArch64FastISel::selectIntExt(const Instruction *I) { 03801 // On ARM, in general, integer casts don't involve legal types; this code 03802 // handles promotable integers. The high bits for a type smaller than 03803 // the register size are assumed to be undefined. 03804 Type *DestTy = I->getType(); 03805 Value *Src = I->getOperand(0); 03806 Type *SrcTy = Src->getType(); 03807 03808 unsigned SrcReg = getRegForValue(Src); 03809 if (!SrcReg) 03810 return false; 03811 03812 EVT SrcEVT = TLI.getValueType(SrcTy, true); 03813 EVT DestEVT = TLI.getValueType(DestTy, true); 03814 if (!SrcEVT.isSimple()) 03815 return false; 03816 if (!DestEVT.isSimple()) 03817 return false; 03818 03819 MVT SrcVT = SrcEVT.getSimpleVT(); 03820 MVT DestVT = DestEVT.getSimpleVT(); 03821 unsigned ResultReg = 0; 03822 03823 bool IsZExt = isa<ZExtInst>(I); 03824 // Check if it is an argument and if it is already zero/sign-extended. 03825 if (const auto *Arg = dyn_cast<Argument>(Src)) { 03826 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 03827 if (DestVT == MVT::i64) { 03828 ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); 03829 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03830 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 03831 .addImm(0) 03832 .addReg(SrcReg) 03833 .addImm(AArch64::sub_32); 03834 } else 03835 ResultReg = SrcReg; 03836 } 03837 } 03838 03839 if (!ResultReg) 03840 ResultReg = emitIntExt(SrcVT, SrcReg, DestVT, IsZExt); 03841 03842 if (!ResultReg) 03843 return false; 03844 03845 updateValueMap(I, ResultReg); 03846 return true; 03847 } 03848 03849 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 03850 EVT DestEVT = TLI.getValueType(I->getType(), true); 03851 if (!DestEVT.isSimple()) 03852 return false; 03853 03854 MVT DestVT = DestEVT.getSimpleVT(); 03855 if (DestVT != MVT::i64 && DestVT != MVT::i32) 03856 return false; 03857 03858 unsigned DivOpc; 03859 bool Is64bit = (DestVT == MVT::i64); 03860 switch (ISDOpcode) { 03861 default: 03862 return false; 03863 case ISD::SREM: 03864 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 03865 break; 03866 case ISD::UREM: 03867 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 03868 break; 03869 } 03870 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 03871 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 03872 if (!Src0Reg) 03873 return false; 03874 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 03875 03876 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 03877 if (!Src1Reg) 03878 return false; 03879 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 03880 03881 const TargetRegisterClass *RC = 03882 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 03883 unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false, 03884 Src1Reg, /*IsKill=*/false); 03885 assert(QuotReg && "Unexpected DIV instruction emission failure."); 03886 // The remainder is computed as numerator - (quotient * denominator) using the 03887 // MSUB instruction. 03888 unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true, 03889 Src1Reg, Src1IsKill, Src0Reg, 03890 Src0IsKill); 03891 updateValueMap(I, ResultReg); 03892 return true; 03893 } 03894 03895 bool AArch64FastISel::selectMul(const Instruction *I) { 03896 MVT VT; 03897 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 03898 return false; 03899 03900 if (VT.isVector()) 03901 return selectBinaryOp(I, ISD::MUL); 03902 03903 const Value *Src0 = I->getOperand(0); 03904 const Value *Src1 = I->getOperand(1); 03905 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 03906 if (C->getValue().isPowerOf2()) 03907 std::swap(Src0, Src1); 03908 03909 // Try to simplify to a shift instruction. 03910 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 03911 if (C->getValue().isPowerOf2()) { 03912 uint64_t ShiftVal = C->getValue().logBase2(); 03913 MVT SrcVT = VT; 03914 bool IsZExt = true; 03915 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 03916 MVT VT; 03917 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 03918 SrcVT = VT; 03919 IsZExt = true; 03920 Src0 = ZExt->getOperand(0); 03921 } 03922 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 03923 MVT VT; 03924 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 03925 SrcVT = VT; 03926 IsZExt = false; 03927 Src0 = SExt->getOperand(0); 03928 } 03929 } 03930 03931 unsigned Src0Reg = getRegForValue(Src0); 03932 if (!Src0Reg) 03933 return false; 03934 bool Src0IsKill = hasTrivialKill(Src0); 03935 03936 unsigned ResultReg = 03937 emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt); 03938 03939 if (ResultReg) { 03940 updateValueMap(I, ResultReg); 03941 return true; 03942 } 03943 } 03944 03945 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 03946 if (!Src0Reg) 03947 return false; 03948 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 03949 03950 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 03951 if (!Src1Reg) 03952 return false; 03953 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 03954 03955 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill); 03956 03957 if (!ResultReg) 03958 return false; 03959 03960 updateValueMap(I, ResultReg); 03961 return true; 03962 } 03963 03964 bool AArch64FastISel::selectShift(const Instruction *I) { 03965 MVT RetVT; 03966 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 03967 return false; 03968 03969 if (RetVT.isVector()) 03970 return selectOperator(I, I->getOpcode()); 03971 03972 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 03973 unsigned ResultReg = 0; 03974 uint64_t ShiftVal = C->getZExtValue(); 03975 MVT SrcVT = RetVT; 03976 bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true; 03977 const Value *Op0 = I->getOperand(0); 03978 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 03979 MVT TmpVT; 03980 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 03981 SrcVT = TmpVT; 03982 IsZExt = true; 03983 Op0 = ZExt->getOperand(0); 03984 } 03985 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 03986 MVT TmpVT; 03987 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 03988 SrcVT = TmpVT; 03989 IsZExt = false; 03990 Op0 = SExt->getOperand(0); 03991 } 03992 } 03993 03994 unsigned Op0Reg = getRegForValue(Op0); 03995 if (!Op0Reg) 03996 return false; 03997 bool Op0IsKill = hasTrivialKill(Op0); 03998 03999 switch (I->getOpcode()) { 04000 default: llvm_unreachable("Unexpected instruction."); 04001 case Instruction::Shl: 04002 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 04003 break; 04004 case Instruction::AShr: 04005 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 04006 break; 04007 case Instruction::LShr: 04008 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 04009 break; 04010 } 04011 if (!ResultReg) 04012 return false; 04013 04014 updateValueMap(I, ResultReg); 04015 return true; 04016 } 04017 04018 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 04019 if (!Op0Reg) 04020 return false; 04021 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 04022 04023 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 04024 if (!Op1Reg) 04025 return false; 04026 bool Op1IsKill = hasTrivialKill(I->getOperand(1)); 04027 04028 unsigned ResultReg = 0; 04029 switch (I->getOpcode()) { 04030 default: llvm_unreachable("Unexpected instruction."); 04031 case Instruction::Shl: 04032 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 04033 break; 04034 case Instruction::AShr: 04035 ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 04036 break; 04037 case Instruction::LShr: 04038 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 04039 break; 04040 } 04041 04042 if (!ResultReg) 04043 return false; 04044 04045 updateValueMap(I, ResultReg); 04046 return true; 04047 } 04048 04049 bool AArch64FastISel::selectBitCast(const Instruction *I) { 04050 MVT RetVT, SrcVT; 04051 04052 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 04053 return false; 04054 if (!isTypeLegal(I->getType(), RetVT)) 04055 return false; 04056 04057 unsigned Opc; 04058 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 04059 Opc = AArch64::FMOVWSr; 04060 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 04061 Opc = AArch64::FMOVXDr; 04062 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 04063 Opc = AArch64::FMOVSWr; 04064 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 04065 Opc = AArch64::FMOVDXr; 04066 else 04067 return false; 04068 04069 const TargetRegisterClass *RC = nullptr; 04070 switch (RetVT.SimpleTy) { 04071 default: llvm_unreachable("Unexpected value type."); 04072 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 04073 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 04074 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 04075 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 04076 } 04077 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 04078 if (!Op0Reg) 04079 return false; 04080 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 04081 unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill); 04082 04083 if (!ResultReg) 04084 return false; 04085 04086 updateValueMap(I, ResultReg); 04087 return true; 04088 } 04089 04090 bool AArch64FastISel::selectFRem(const Instruction *I) { 04091 MVT RetVT; 04092 if (!isTypeLegal(I->getType(), RetVT)) 04093 return false; 04094 04095 RTLIB::Libcall LC; 04096 switch (RetVT.SimpleTy) { 04097 default: 04098 return false; 04099 case MVT::f32: 04100 LC = RTLIB::REM_F32; 04101 break; 04102 case MVT::f64: 04103 LC = RTLIB::REM_F64; 04104 break; 04105 } 04106 04107 ArgListTy Args; 04108 Args.reserve(I->getNumOperands()); 04109 04110 // Populate the argument list. 04111 for (auto &Arg : I->operands()) { 04112 ArgListEntry Entry; 04113 Entry.Val = Arg; 04114 Entry.Ty = Arg->getType(); 04115 Args.push_back(Entry); 04116 } 04117 04118 CallLoweringInfo CLI; 04119 CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(), 04120 TLI.getLibcallName(LC), std::move(Args)); 04121 if (!lowerCallTo(CLI)) 04122 return false; 04123 updateValueMap(I, CLI.ResultReg); 04124 return true; 04125 } 04126 04127 bool AArch64FastISel::selectSDiv(const Instruction *I) { 04128 MVT VT; 04129 if (!isTypeLegal(I->getType(), VT)) 04130 return false; 04131 04132 if (!isa<ConstantInt>(I->getOperand(1))) 04133 return selectBinaryOp(I, ISD::SDIV); 04134 04135 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 04136 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 04137 !(C.isPowerOf2() || (-C).isPowerOf2())) 04138 return selectBinaryOp(I, ISD::SDIV); 04139 04140 unsigned Lg2 = C.countTrailingZeros(); 04141 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 04142 if (!Src0Reg) 04143 return false; 04144 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 04145 04146 if (cast<BinaryOperator>(I)->isExact()) { 04147 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2); 04148 if (!ResultReg) 04149 return false; 04150 updateValueMap(I, ResultReg); 04151 return true; 04152 } 04153 04154 unsigned Pow2MinusOne = (1 << Lg2) - 1; 04155 unsigned AddReg = emitAddSub_ri(/*UseAdd=*/true, VT, Src0Reg, 04156 /*IsKill=*/false, Pow2MinusOne); 04157 if (!AddReg) 04158 return false; 04159 04160 // (Src0 < 0) ? Pow2 - 1 : 0; 04161 if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0)) 04162 return false; 04163 04164 unsigned SelectOpc; 04165 const TargetRegisterClass *RC; 04166 if (VT == MVT::i64) { 04167 SelectOpc = AArch64::CSELXr; 04168 RC = &AArch64::GPR64RegClass; 04169 } else { 04170 SelectOpc = AArch64::CSELWr; 04171 RC = &AArch64::GPR32RegClass; 04172 } 04173 unsigned SelectReg = 04174 fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg, 04175 Src0IsKill, AArch64CC::LT); 04176 if (!SelectReg) 04177 return false; 04178 04179 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 04180 // negate the result. 04181 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 04182 unsigned ResultReg; 04183 if (C.isNegative()) 04184 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true, 04185 SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2); 04186 else 04187 ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2); 04188 04189 if (!ResultReg) 04190 return false; 04191 04192 updateValueMap(I, ResultReg); 04193 return true; 04194 } 04195 04196 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 04197 switch (I->getOpcode()) { 04198 default: 04199 break; 04200 case Instruction::Add: 04201 case Instruction::Sub: 04202 return selectAddSub(I); 04203 case Instruction::Mul: 04204 return selectMul(I); 04205 case Instruction::SDiv: 04206 return selectSDiv(I); 04207 case Instruction::SRem: 04208 if (!selectBinaryOp(I, ISD::SREM)) 04209 return selectRem(I, ISD::SREM); 04210 return true; 04211 case Instruction::URem: 04212 if (!selectBinaryOp(I, ISD::UREM)) 04213 return selectRem(I, ISD::UREM); 04214 return true; 04215 case Instruction::Shl: 04216 case Instruction::LShr: 04217 case Instruction::AShr: 04218 return selectShift(I); 04219 case Instruction::And: 04220 case Instruction::Or: 04221 case Instruction::Xor: 04222 return selectLogicalOp(I); 04223 case Instruction::Br: 04224 return selectBranch(I); 04225 case Instruction::IndirectBr: 04226 return selectIndirectBr(I); 04227 case Instruction::BitCast: 04228 if (!FastISel::selectBitCast(I)) 04229 return selectBitCast(I); 04230 return true; 04231 case Instruction::FPToSI: 04232 if (!selectCast(I, ISD::FP_TO_SINT)) 04233 return selectFPToInt(I, /*Signed=*/true); 04234 return true; 04235 case Instruction::FPToUI: 04236 return selectFPToInt(I, /*Signed=*/false); 04237 case Instruction::ZExt: 04238 if (!selectCast(I, ISD::ZERO_EXTEND)) 04239 return selectIntExt(I); 04240 return true; 04241 case Instruction::SExt: 04242 if (!selectCast(I, ISD::SIGN_EXTEND)) 04243 return selectIntExt(I); 04244 return true; 04245 case Instruction::Trunc: 04246 if (!selectCast(I, ISD::TRUNCATE)) 04247 return selectTrunc(I); 04248 return true; 04249 case Instruction::FPExt: 04250 return selectFPExt(I); 04251 case Instruction::FPTrunc: 04252 return selectFPTrunc(I); 04253 case Instruction::SIToFP: 04254 if (!selectCast(I, ISD::SINT_TO_FP)) 04255 return selectIntToFP(I, /*Signed=*/true); 04256 return true; 04257 case Instruction::UIToFP: 04258 return selectIntToFP(I, /*Signed=*/false); 04259 case Instruction::Load: 04260 return selectLoad(I); 04261 case Instruction::Store: 04262 return selectStore(I); 04263 case Instruction::FCmp: 04264 case Instruction::ICmp: 04265 return selectCmp(I); 04266 case Instruction::Select: 04267 return selectSelect(I); 04268 case Instruction::Ret: 04269 return selectRet(I); 04270 case Instruction::FRem: 04271 return selectFRem(I); 04272 } 04273 04274 // fall-back to target-independent instruction selection. 04275 return selectOperator(I, I->getOpcode()); 04276 // Silence warnings. 04277 (void)&CC_AArch64_DarwinPCS_VarArg; 04278 } 04279 04280 namespace llvm { 04281 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 04282 const TargetLibraryInfo *LibInfo) { 04283 return new AArch64FastISel(FuncInfo, LibInfo); 04284 } 04285 }