LLVM API Documentation

AArch64FastISel.cpp
Go to the documentation of this file.
00001 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines the AArch64-specific support for the FastISel class. Some
00011 // of the target-specific code is generated by tablegen in the file
00012 // AArch64GenFastISel.inc, which is #included here.
00013 //
00014 //===----------------------------------------------------------------------===//
00015 
00016 #include "AArch64.h"
00017 #include "AArch64Subtarget.h"
00018 #include "AArch64TargetMachine.h"
00019 #include "MCTargetDesc/AArch64AddressingModes.h"
00020 #include "llvm/Analysis/BranchProbabilityInfo.h"
00021 #include "llvm/CodeGen/CallingConvLower.h"
00022 #include "llvm/CodeGen/FastISel.h"
00023 #include "llvm/CodeGen/FunctionLoweringInfo.h"
00024 #include "llvm/CodeGen/MachineConstantPool.h"
00025 #include "llvm/CodeGen/MachineFrameInfo.h"
00026 #include "llvm/CodeGen/MachineInstrBuilder.h"
00027 #include "llvm/CodeGen/MachineRegisterInfo.h"
00028 #include "llvm/IR/CallingConv.h"
00029 #include "llvm/IR/DataLayout.h"
00030 #include "llvm/IR/DerivedTypes.h"
00031 #include "llvm/IR/Function.h"
00032 #include "llvm/IR/GetElementPtrTypeIterator.h"
00033 #include "llvm/IR/GlobalAlias.h"
00034 #include "llvm/IR/GlobalVariable.h"
00035 #include "llvm/IR/Instructions.h"
00036 #include "llvm/IR/IntrinsicInst.h"
00037 #include "llvm/IR/Operator.h"
00038 #include "llvm/Support/CommandLine.h"
00039 using namespace llvm;
00040 
00041 namespace {
00042 
00043 class AArch64FastISel final : public FastISel {
00044   class Address {
00045   public:
00046     typedef enum {
00047       RegBase,
00048       FrameIndexBase
00049     } BaseKind;
00050 
00051   private:
00052     BaseKind Kind;
00053     AArch64_AM::ShiftExtendType ExtType;
00054     union {
00055       unsigned Reg;
00056       int FI;
00057     } Base;
00058     unsigned OffsetReg;
00059     unsigned Shift;
00060     int64_t Offset;
00061     const GlobalValue *GV;
00062 
00063   public:
00064     Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
00065       OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
00066     void setKind(BaseKind K) { Kind = K; }
00067     BaseKind getKind() const { return Kind; }
00068     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
00069     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
00070     bool isRegBase() const { return Kind == RegBase; }
00071     bool isFIBase() const { return Kind == FrameIndexBase; }
00072     void setReg(unsigned Reg) {
00073       assert(isRegBase() && "Invalid base register access!");
00074       Base.Reg = Reg;
00075     }
00076     unsigned getReg() const {
00077       assert(isRegBase() && "Invalid base register access!");
00078       return Base.Reg;
00079     }
00080     void setOffsetReg(unsigned Reg) {
00081       assert(isRegBase() && "Invalid offset register access!");
00082       OffsetReg = Reg;
00083     }
00084     unsigned getOffsetReg() const {
00085       assert(isRegBase() && "Invalid offset register access!");
00086       return OffsetReg;
00087     }
00088     void setFI(unsigned FI) {
00089       assert(isFIBase() && "Invalid base frame index  access!");
00090       Base.FI = FI;
00091     }
00092     unsigned getFI() const {
00093       assert(isFIBase() && "Invalid base frame index access!");
00094       return Base.FI;
00095     }
00096     void setOffset(int64_t O) { Offset = O; }
00097     int64_t getOffset() { return Offset; }
00098     void setShift(unsigned S) { Shift = S; }
00099     unsigned getShift() { return Shift; }
00100 
00101     void setGlobalValue(const GlobalValue *G) { GV = G; }
00102     const GlobalValue *getGlobalValue() { return GV; }
00103   };
00104 
00105   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
00106   /// make the right decision when generating code for different targets.
00107   const AArch64Subtarget *Subtarget;
00108   LLVMContext *Context;
00109 
00110   bool fastLowerArguments() override;
00111   bool fastLowerCall(CallLoweringInfo &CLI) override;
00112   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
00113 
00114 private:
00115   // Selection routines.
00116   bool selectAddSub(const Instruction *I);
00117   bool selectLogicalOp(const Instruction *I);
00118   bool selectLoad(const Instruction *I);
00119   bool selectStore(const Instruction *I);
00120   bool selectBranch(const Instruction *I);
00121   bool selectIndirectBr(const Instruction *I);
00122   bool selectCmp(const Instruction *I);
00123   bool selectSelect(const Instruction *I);
00124   bool selectFPExt(const Instruction *I);
00125   bool selectFPTrunc(const Instruction *I);
00126   bool selectFPToInt(const Instruction *I, bool Signed);
00127   bool selectIntToFP(const Instruction *I, bool Signed);
00128   bool selectRem(const Instruction *I, unsigned ISDOpcode);
00129   bool selectRet(const Instruction *I);
00130   bool selectTrunc(const Instruction *I);
00131   bool selectIntExt(const Instruction *I);
00132   bool selectMul(const Instruction *I);
00133   bool selectShift(const Instruction *I);
00134   bool selectBitCast(const Instruction *I);
00135   bool selectFRem(const Instruction *I);
00136   bool selectSDiv(const Instruction *I);
00137 
00138   // Utility helper routines.
00139   bool isTypeLegal(Type *Ty, MVT &VT);
00140   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
00141   bool isValueAvailable(const Value *V) const;
00142   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
00143   bool computeCallAddress(const Value *V, Address &Addr);
00144   bool simplifyAddress(Address &Addr, MVT VT);
00145   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
00146                             unsigned Flags, unsigned ScaleFactor,
00147                             MachineMemOperand *MMO);
00148   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
00149   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
00150                           unsigned Alignment);
00151   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
00152                          const Value *Cond);
00153 
00154   // Emit helper routines.
00155   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
00156                       const Value *RHS, bool SetFlags = false,
00157                       bool WantResult = true,  bool IsZExt = false);
00158   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
00159                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
00160                          bool SetFlags = false, bool WantResult = true);
00161   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
00162                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
00163                          bool WantResult = true);
00164   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
00165                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
00166                          AArch64_AM::ShiftExtendType ShiftType,
00167                          uint64_t ShiftImm, bool SetFlags = false,
00168                          bool WantResult = true);
00169   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
00170                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
00171                           AArch64_AM::ShiftExtendType ExtType,
00172                           uint64_t ShiftImm, bool SetFlags = false,
00173                          bool WantResult = true);
00174 
00175   // Emit functions.
00176   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
00177   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
00178   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
00179   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
00180   bool emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
00181                 MachineMemOperand *MMO = nullptr);
00182   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
00183                  MachineMemOperand *MMO = nullptr);
00184   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
00185   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
00186   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
00187                    bool SetFlags = false, bool WantResult = true,
00188                    bool IsZExt = false);
00189   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
00190                    bool SetFlags = false, bool WantResult = true,
00191                    bool IsZExt = false);
00192   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
00193                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
00194   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
00195                        unsigned RHSReg, bool RHSIsKill,
00196                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
00197                        bool WantResult = true);
00198   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
00199                          const Value *RHS);
00200   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
00201                             bool LHSIsKill, uint64_t Imm);
00202   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
00203                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
00204                             uint64_t ShiftImm);
00205   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
00206   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
00207                       unsigned Op1, bool Op1IsKill);
00208   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
00209                         unsigned Op1, bool Op1IsKill);
00210   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
00211                         unsigned Op1, bool Op1IsKill);
00212   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
00213                       unsigned Op1Reg, bool Op1IsKill);
00214   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
00215                       uint64_t Imm, bool IsZExt = true);
00216   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
00217                       unsigned Op1Reg, bool Op1IsKill);
00218   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
00219                       uint64_t Imm, bool IsZExt = true);
00220   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
00221                       unsigned Op1Reg, bool Op1IsKill);
00222   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
00223                       uint64_t Imm, bool IsZExt = false);
00224 
00225   unsigned materializeInt(const ConstantInt *CI, MVT VT);
00226   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
00227   unsigned materializeGV(const GlobalValue *GV);
00228 
00229   // Call handling routines.
00230 private:
00231   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
00232   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
00233                        unsigned &NumBytes);
00234   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
00235 
00236 public:
00237   // Backend specific FastISel code.
00238   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
00239   unsigned fastMaterializeConstant(const Constant *C) override;
00240   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
00241 
00242   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
00243                          const TargetLibraryInfo *LibInfo)
00244       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
00245     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
00246     Context = &FuncInfo.Fn->getContext();
00247   }
00248 
00249   bool fastSelectInstruction(const Instruction *I) override;
00250 
00251 #include "AArch64GenFastISel.inc"
00252 };
00253 
00254 } // end anonymous namespace
00255 
00256 #include "AArch64GenCallingConv.inc"
00257 
00258 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
00259   if (CC == CallingConv::WebKit_JS)
00260     return CC_AArch64_WebKit_JS;
00261   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
00262 }
00263 
00264 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
00265   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
00266          "Alloca should always return a pointer.");
00267 
00268   // Don't handle dynamic allocas.
00269   if (!FuncInfo.StaticAllocaMap.count(AI))
00270     return 0;
00271 
00272   DenseMap<const AllocaInst *, int>::iterator SI =
00273       FuncInfo.StaticAllocaMap.find(AI);
00274 
00275   if (SI != FuncInfo.StaticAllocaMap.end()) {
00276     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
00277     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
00278             ResultReg)
00279         .addFrameIndex(SI->second)
00280         .addImm(0)
00281         .addImm(0);
00282     return ResultReg;
00283   }
00284 
00285   return 0;
00286 }
00287 
00288 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
00289   if (VT > MVT::i64)
00290     return 0;
00291 
00292   if (!CI->isZero())
00293     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
00294 
00295   // Create a copy from the zero register to materialize a "0" value.
00296   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
00297                                                    : &AArch64::GPR32RegClass;
00298   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
00299   unsigned ResultReg = createResultReg(RC);
00300   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
00301           ResultReg).addReg(ZeroReg, getKillRegState(true));
00302   return ResultReg;
00303 }
00304 
00305 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
00306   // Positive zero (+0.0) has to be materialized with a fmov from the zero
00307   // register, because the immediate version of fmov cannot encode zero.
00308   if (CFP->isNullValue())
00309     return fastMaterializeFloatZero(CFP);
00310 
00311   if (VT != MVT::f32 && VT != MVT::f64)
00312     return 0;
00313 
00314   const APFloat Val = CFP->getValueAPF();
00315   bool Is64Bit = (VT == MVT::f64);
00316   // This checks to see if we can use FMOV instructions to materialize
00317   // a constant, otherwise we have to materialize via the constant pool.
00318   if (TLI.isFPImmLegal(Val, VT)) {
00319     int Imm =
00320         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
00321     assert((Imm != -1) && "Cannot encode floating-point constant.");
00322     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
00323     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
00324   }
00325 
00326   // Materialize via constant pool.  MachineConstantPool wants an explicit
00327   // alignment.
00328   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
00329   if (Align == 0)
00330     Align = DL.getTypeAllocSize(CFP->getType());
00331 
00332   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
00333   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
00334   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
00335           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
00336 
00337   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
00338   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
00339   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
00340       .addReg(ADRPReg)
00341       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
00342   return ResultReg;
00343 }
00344 
00345 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
00346   // We can't handle thread-local variables quickly yet.
00347   if (GV->isThreadLocal())
00348     return 0;
00349 
00350   // MachO still uses GOT for large code-model accesses, but ELF requires
00351   // movz/movk sequences, which FastISel doesn't handle yet.
00352   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
00353     return 0;
00354 
00355   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
00356 
00357   EVT DestEVT = TLI.getValueType(GV->getType(), true);
00358   if (!DestEVT.isSimple())
00359     return 0;
00360 
00361   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
00362   unsigned ResultReg;
00363 
00364   if (OpFlags & AArch64II::MO_GOT) {
00365     // ADRP + LDRX
00366     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
00367             ADRPReg)
00368       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
00369 
00370     ResultReg = createResultReg(&AArch64::GPR64RegClass);
00371     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
00372             ResultReg)
00373       .addReg(ADRPReg)
00374       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
00375                         AArch64II::MO_NC);
00376   } else if (OpFlags & AArch64II::MO_CONSTPOOL) {
00377     // We can't handle addresses loaded from a constant pool quickly yet.
00378     return 0;
00379   } else {
00380     // ADRP + ADDX
00381     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
00382             ADRPReg)
00383       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
00384 
00385     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
00386     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
00387             ResultReg)
00388       .addReg(ADRPReg)
00389       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
00390       .addImm(0);
00391   }
00392   return ResultReg;
00393 }
00394 
00395 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
00396   EVT CEVT = TLI.getValueType(C->getType(), true);
00397 
00398   // Only handle simple types.
00399   if (!CEVT.isSimple())
00400     return 0;
00401   MVT VT = CEVT.getSimpleVT();
00402 
00403   if (const auto *CI = dyn_cast<ConstantInt>(C))
00404     return materializeInt(CI, VT);
00405   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
00406     return materializeFP(CFP, VT);
00407   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
00408     return materializeGV(GV);
00409 
00410   return 0;
00411 }
00412 
00413 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
00414   assert(CFP->isNullValue() &&
00415          "Floating-point constant is not a positive zero.");
00416   MVT VT;
00417   if (!isTypeLegal(CFP->getType(), VT))
00418     return 0;
00419 
00420   if (VT != MVT::f32 && VT != MVT::f64)
00421     return 0;
00422 
00423   bool Is64Bit = (VT == MVT::f64);
00424   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
00425   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
00426   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
00427 }
00428 
00429 /// \brief Check if the multiply is by a power-of-2 constant.
00430 static bool isMulPowOf2(const Value *I) {
00431   if (const auto *MI = dyn_cast<MulOperator>(I)) {
00432     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
00433       if (C->getValue().isPowerOf2())
00434         return true;
00435     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
00436       if (C->getValue().isPowerOf2())
00437         return true;
00438   }
00439   return false;
00440 }
00441 
00442 // Computes the address to get to an object.
00443 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
00444 {
00445   const User *U = nullptr;
00446   unsigned Opcode = Instruction::UserOp1;
00447   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
00448     // Don't walk into other basic blocks unless the object is an alloca from
00449     // another block, otherwise it may not have a virtual register assigned.
00450     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
00451         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
00452       Opcode = I->getOpcode();
00453       U = I;
00454     }
00455   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
00456     Opcode = C->getOpcode();
00457     U = C;
00458   }
00459 
00460   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
00461     if (Ty->getAddressSpace() > 255)
00462       // Fast instruction selection doesn't support the special
00463       // address spaces.
00464       return false;
00465 
00466   switch (Opcode) {
00467   default:
00468     break;
00469   case Instruction::BitCast: {
00470     // Look through bitcasts.
00471     return computeAddress(U->getOperand(0), Addr, Ty);
00472   }
00473   case Instruction::IntToPtr: {
00474     // Look past no-op inttoptrs.
00475     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
00476       return computeAddress(U->getOperand(0), Addr, Ty);
00477     break;
00478   }
00479   case Instruction::PtrToInt: {
00480     // Look past no-op ptrtoints.
00481     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
00482       return computeAddress(U->getOperand(0), Addr, Ty);
00483     break;
00484   }
00485   case Instruction::GetElementPtr: {
00486     Address SavedAddr = Addr;
00487     uint64_t TmpOffset = Addr.getOffset();
00488 
00489     // Iterate through the GEP folding the constants into offsets where
00490     // we can.
00491     gep_type_iterator GTI = gep_type_begin(U);
00492     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
00493          ++i, ++GTI) {
00494       const Value *Op = *i;
00495       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
00496         const StructLayout *SL = DL.getStructLayout(STy);
00497         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
00498         TmpOffset += SL->getElementOffset(Idx);
00499       } else {
00500         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
00501         for (;;) {
00502           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
00503             // Constant-offset addressing.
00504             TmpOffset += CI->getSExtValue() * S;
00505             break;
00506           }
00507           if (canFoldAddIntoGEP(U, Op)) {
00508             // A compatible add with a constant operand. Fold the constant.
00509             ConstantInt *CI =
00510                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
00511             TmpOffset += CI->getSExtValue() * S;
00512             // Iterate on the other operand.
00513             Op = cast<AddOperator>(Op)->getOperand(0);
00514             continue;
00515           }
00516           // Unsupported
00517           goto unsupported_gep;
00518         }
00519       }
00520     }
00521 
00522     // Try to grab the base operand now.
00523     Addr.setOffset(TmpOffset);
00524     if (computeAddress(U->getOperand(0), Addr, Ty))
00525       return true;
00526 
00527     // We failed, restore everything and try the other options.
00528     Addr = SavedAddr;
00529 
00530   unsupported_gep:
00531     break;
00532   }
00533   case Instruction::Alloca: {
00534     const AllocaInst *AI = cast<AllocaInst>(Obj);
00535     DenseMap<const AllocaInst *, int>::iterator SI =
00536         FuncInfo.StaticAllocaMap.find(AI);
00537     if (SI != FuncInfo.StaticAllocaMap.end()) {
00538       Addr.setKind(Address::FrameIndexBase);
00539       Addr.setFI(SI->second);
00540       return true;
00541     }
00542     break;
00543   }
00544   case Instruction::Add: {
00545     // Adds of constants are common and easy enough.
00546     const Value *LHS = U->getOperand(0);
00547     const Value *RHS = U->getOperand(1);
00548 
00549     if (isa<ConstantInt>(LHS))
00550       std::swap(LHS, RHS);
00551 
00552     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
00553       Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
00554       return computeAddress(LHS, Addr, Ty);
00555     }
00556 
00557     Address Backup = Addr;
00558     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
00559       return true;
00560     Addr = Backup;
00561 
00562     break;
00563   }
00564   case Instruction::Shl:
00565     if (Addr.getOffsetReg())
00566       break;
00567 
00568     if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
00569       unsigned Val = CI->getZExtValue();
00570       if (Val < 1 || Val > 3)
00571         break;
00572 
00573       uint64_t NumBytes = 0;
00574       if (Ty && Ty->isSized()) {
00575         uint64_t NumBits = DL.getTypeSizeInBits(Ty);
00576         NumBytes = NumBits / 8;
00577         if (!isPowerOf2_64(NumBits))
00578           NumBytes = 0;
00579       }
00580 
00581       if (NumBytes != (1ULL << Val))
00582         break;
00583 
00584       Addr.setShift(Val);
00585       Addr.setExtendType(AArch64_AM::LSL);
00586 
00587       if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
00588         if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
00589           U = I;
00590 
00591       if (const auto *ZE = dyn_cast<ZExtInst>(U))
00592         if (ZE->getOperand(0)->getType()->isIntegerTy(32))
00593           Addr.setExtendType(AArch64_AM::UXTW);
00594 
00595       if (const auto *SE = dyn_cast<SExtInst>(U))
00596         if (SE->getOperand(0)->getType()->isIntegerTy(32))
00597           Addr.setExtendType(AArch64_AM::SXTW);
00598 
00599       if (const auto *AI = dyn_cast<BinaryOperator>(U))
00600         if (AI->getOpcode() == Instruction::And) {
00601           const Value *LHS = AI->getOperand(0);
00602           const Value *RHS = AI->getOperand(1);
00603 
00604           if (const auto *C = dyn_cast<ConstantInt>(LHS))
00605             if (C->getValue() == 0xffffffff)
00606               std::swap(LHS, RHS);
00607 
00608           if (const auto *C = cast<ConstantInt>(RHS))
00609             if (C->getValue() == 0xffffffff) {
00610               Addr.setExtendType(AArch64_AM::UXTW);
00611               unsigned Reg = getRegForValue(LHS);
00612               if (!Reg)
00613                 return false;
00614               bool RegIsKill = hasTrivialKill(LHS);
00615               Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
00616                                                AArch64::sub_32);
00617               Addr.setOffsetReg(Reg);
00618               return true;
00619             }
00620         }
00621 
00622       unsigned Reg = getRegForValue(U->getOperand(0));
00623       if (!Reg)
00624         return false;
00625       Addr.setOffsetReg(Reg);
00626       return true;
00627     }
00628     break;
00629   case Instruction::Mul: {
00630     if (Addr.getOffsetReg())
00631       break;
00632 
00633     if (!isMulPowOf2(U))
00634       break;
00635 
00636     const Value *LHS = U->getOperand(0);
00637     const Value *RHS = U->getOperand(1);
00638 
00639     // Canonicalize power-of-2 value to the RHS.
00640     if (const auto *C = dyn_cast<ConstantInt>(LHS))
00641       if (C->getValue().isPowerOf2())
00642         std::swap(LHS, RHS);
00643 
00644     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
00645     const auto *C = cast<ConstantInt>(RHS);
00646     unsigned Val = C->getValue().logBase2();
00647     if (Val < 1 || Val > 3)
00648       break;
00649 
00650     uint64_t NumBytes = 0;
00651     if (Ty && Ty->isSized()) {
00652       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
00653       NumBytes = NumBits / 8;
00654       if (!isPowerOf2_64(NumBits))
00655         NumBytes = 0;
00656     }
00657 
00658     if (NumBytes != (1ULL << Val))
00659       break;
00660 
00661     Addr.setShift(Val);
00662     Addr.setExtendType(AArch64_AM::LSL);
00663 
00664     if (const auto *I = dyn_cast<Instruction>(LHS))
00665       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
00666         U = I;
00667 
00668     if (const auto *ZE = dyn_cast<ZExtInst>(U))
00669       if (ZE->getOperand(0)->getType()->isIntegerTy(32)) {
00670         Addr.setExtendType(AArch64_AM::UXTW);
00671         LHS = U->getOperand(0);
00672       }
00673 
00674     if (const auto *SE = dyn_cast<SExtInst>(U))
00675       if (SE->getOperand(0)->getType()->isIntegerTy(32)) {
00676         Addr.setExtendType(AArch64_AM::SXTW);
00677         LHS = U->getOperand(0);
00678       }
00679 
00680     unsigned Reg = getRegForValue(LHS);
00681     if (!Reg)
00682       return false;
00683     Addr.setOffsetReg(Reg);
00684     return true;
00685   }
00686   case Instruction::And: {
00687     if (Addr.getOffsetReg())
00688       break;
00689 
00690     if (DL.getTypeSizeInBits(Ty) != 8)
00691       break;
00692 
00693     const Value *LHS = U->getOperand(0);
00694     const Value *RHS = U->getOperand(1);
00695 
00696     if (const auto *C = dyn_cast<ConstantInt>(LHS))
00697       if (C->getValue() == 0xffffffff)
00698         std::swap(LHS, RHS);
00699 
00700     if (const auto *C = cast<ConstantInt>(RHS))
00701       if (C->getValue() == 0xffffffff) {
00702         Addr.setShift(0);
00703         Addr.setExtendType(AArch64_AM::LSL);
00704         Addr.setExtendType(AArch64_AM::UXTW);
00705 
00706         unsigned Reg = getRegForValue(LHS);
00707         if (!Reg)
00708           return false;
00709         bool RegIsKill = hasTrivialKill(LHS);
00710         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
00711                                          AArch64::sub_32);
00712         Addr.setOffsetReg(Reg);
00713         return true;
00714       }
00715     break;
00716   }
00717   } // end switch
00718 
00719   if (Addr.getReg()) {
00720     if (!Addr.getOffsetReg()) {
00721       unsigned Reg = getRegForValue(Obj);
00722       if (!Reg)
00723         return false;
00724       Addr.setOffsetReg(Reg);
00725       return true;
00726     }
00727     return false;
00728   }
00729 
00730   unsigned Reg = getRegForValue(Obj);
00731   if (!Reg)
00732     return false;
00733   Addr.setReg(Reg);
00734   return true;
00735 }
00736 
00737 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
00738   const User *U = nullptr;
00739   unsigned Opcode = Instruction::UserOp1;
00740   bool InMBB = true;
00741 
00742   if (const auto *I = dyn_cast<Instruction>(V)) {
00743     Opcode = I->getOpcode();
00744     U = I;
00745     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
00746   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
00747     Opcode = C->getOpcode();
00748     U = C;
00749   }
00750 
00751   switch (Opcode) {
00752   default: break;
00753   case Instruction::BitCast:
00754     // Look past bitcasts if its operand is in the same BB.
00755     if (InMBB)
00756       return computeCallAddress(U->getOperand(0), Addr);
00757     break;
00758   case Instruction::IntToPtr:
00759     // Look past no-op inttoptrs if its operand is in the same BB.
00760     if (InMBB &&
00761         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
00762       return computeCallAddress(U->getOperand(0), Addr);
00763     break;
00764   case Instruction::PtrToInt:
00765     // Look past no-op ptrtoints if its operand is in the same BB.
00766     if (InMBB &&
00767         TLI.getValueType(U->getType()) == TLI.getPointerTy())
00768       return computeCallAddress(U->getOperand(0), Addr);
00769     break;
00770   }
00771 
00772   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
00773     Addr.setGlobalValue(GV);
00774     return true;
00775   }
00776 
00777   // If all else fails, try to materialize the value in a register.
00778   if (!Addr.getGlobalValue()) {
00779     Addr.setReg(getRegForValue(V));
00780     return Addr.getReg() != 0;
00781   }
00782 
00783   return false;
00784 }
00785 
00786 
00787 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
00788   EVT evt = TLI.getValueType(Ty, true);
00789 
00790   // Only handle simple types.
00791   if (evt == MVT::Other || !evt.isSimple())
00792     return false;
00793   VT = evt.getSimpleVT();
00794 
00795   // This is a legal type, but it's not something we handle in fast-isel.
00796   if (VT == MVT::f128)
00797     return false;
00798 
00799   // Handle all other legal types, i.e. a register that will directly hold this
00800   // value.
00801   return TLI.isTypeLegal(VT);
00802 }
00803 
00804 /// \brief Determine if the value type is supported by FastISel.
00805 ///
00806 /// FastISel for AArch64 can handle more value types than are legal. This adds
00807 /// simple value type such as i1, i8, and i16.
00808 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
00809   if (Ty->isVectorTy() && !IsVectorAllowed)
00810     return false;
00811 
00812   if (isTypeLegal(Ty, VT))
00813     return true;
00814 
00815   // If this is a type than can be sign or zero-extended to a basic operation
00816   // go ahead and accept it now.
00817   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
00818     return true;
00819 
00820   return false;
00821 }
00822 
00823 bool AArch64FastISel::isValueAvailable(const Value *V) const {
00824   if (!isa<Instruction>(V))
00825     return true;
00826 
00827   const auto *I = cast<Instruction>(V);
00828   if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
00829     return true;
00830 
00831   return false;
00832 }
00833 
00834 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
00835   unsigned ScaleFactor;
00836   switch (VT.SimpleTy) {
00837   default: return false;
00838   case MVT::i1:  // fall-through
00839   case MVT::i8:  ScaleFactor = 1; break;
00840   case MVT::i16: ScaleFactor = 2; break;
00841   case MVT::i32: // fall-through
00842   case MVT::f32: ScaleFactor = 4; break;
00843   case MVT::i64: // fall-through
00844   case MVT::f64: ScaleFactor = 8; break;
00845   }
00846 
00847   bool ImmediateOffsetNeedsLowering = false;
00848   bool RegisterOffsetNeedsLowering = false;
00849   int64_t Offset = Addr.getOffset();
00850   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
00851     ImmediateOffsetNeedsLowering = true;
00852   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
00853            !isUInt<12>(Offset / ScaleFactor))
00854     ImmediateOffsetNeedsLowering = true;
00855 
00856   // Cannot encode an offset register and an immediate offset in the same
00857   // instruction. Fold the immediate offset into the load/store instruction and
00858   // emit an additonal add to take care of the offset register.
00859   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
00860       Addr.getOffsetReg())
00861     RegisterOffsetNeedsLowering = true;
00862 
00863   // Cannot encode zero register as base.
00864   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
00865     RegisterOffsetNeedsLowering = true;
00866 
00867   // If this is a stack pointer and the offset needs to be simplified then put
00868   // the alloca address into a register, set the base type back to register and
00869   // continue. This should almost never happen.
00870   if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
00871     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
00872     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
00873             ResultReg)
00874       .addFrameIndex(Addr.getFI())
00875       .addImm(0)
00876       .addImm(0);
00877     Addr.setKind(Address::RegBase);
00878     Addr.setReg(ResultReg);
00879   }
00880 
00881   if (RegisterOffsetNeedsLowering) {
00882     unsigned ResultReg = 0;
00883     if (Addr.getReg()) {
00884       if (Addr.getExtendType() == AArch64_AM::SXTW ||
00885           Addr.getExtendType() == AArch64_AM::UXTW   )
00886         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
00887                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
00888                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
00889                                   Addr.getShift());
00890       else
00891         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
00892                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
00893                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
00894                                   Addr.getShift());
00895     } else {
00896       if (Addr.getExtendType() == AArch64_AM::UXTW)
00897         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
00898                                /*Op0IsKill=*/false, Addr.getShift(),
00899                                /*IsZExt=*/true);
00900       else if (Addr.getExtendType() == AArch64_AM::SXTW)
00901         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
00902                                /*Op0IsKill=*/false, Addr.getShift(),
00903                                /*IsZExt=*/false);
00904       else
00905         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
00906                                /*Op0IsKill=*/false, Addr.getShift());
00907     }
00908     if (!ResultReg)
00909       return false;
00910 
00911     Addr.setReg(ResultReg);
00912     Addr.setOffsetReg(0);
00913     Addr.setShift(0);
00914     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
00915   }
00916 
00917   // Since the offset is too large for the load/store instruction get the
00918   // reg+offset into a register.
00919   if (ImmediateOffsetNeedsLowering) {
00920     unsigned ResultReg;
00921     if (Addr.getReg()) {
00922       // Try to fold the immediate into the add instruction.
00923       if (Offset < 0)
00924         ResultReg = emitAddSub_ri(/*UseAdd=*/false, MVT::i64, Addr.getReg(),
00925                                   /*IsKill=*/false, -Offset);
00926       else
00927         ResultReg = emitAddSub_ri(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
00928                                   /*IsKill=*/false, Offset);
00929       if (!ResultReg) {
00930         unsigned ImmReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
00931         ResultReg = emitAddSub_rr(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
00932                                   /*IsKill=*/false, ImmReg, /*IsKill=*/true);
00933       }
00934     } else
00935       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
00936 
00937     if (!ResultReg)
00938       return false;
00939     Addr.setReg(ResultReg);
00940     Addr.setOffset(0);
00941   }
00942   return true;
00943 }
00944 
00945 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
00946                                            const MachineInstrBuilder &MIB,
00947                                            unsigned Flags,
00948                                            unsigned ScaleFactor,
00949                                            MachineMemOperand *MMO) {
00950   int64_t Offset = Addr.getOffset() / ScaleFactor;
00951   // Frame base works a bit differently. Handle it separately.
00952   if (Addr.isFIBase()) {
00953     int FI = Addr.getFI();
00954     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
00955     // and alignment should be based on the VT.
00956     MMO = FuncInfo.MF->getMachineMemOperand(
00957       MachinePointerInfo::getFixedStack(FI, Offset), Flags,
00958       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
00959     // Now add the rest of the operands.
00960     MIB.addFrameIndex(FI).addImm(Offset);
00961   } else {
00962     assert(Addr.isRegBase() && "Unexpected address kind.");
00963     const MCInstrDesc &II = MIB->getDesc();
00964     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
00965     Addr.setReg(
00966       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
00967     Addr.setOffsetReg(
00968       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
00969     if (Addr.getOffsetReg()) {
00970       assert(Addr.getOffset() == 0 && "Unexpected offset");
00971       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
00972                       Addr.getExtendType() == AArch64_AM::SXTX;
00973       MIB.addReg(Addr.getReg());
00974       MIB.addReg(Addr.getOffsetReg());
00975       MIB.addImm(IsSigned);
00976       MIB.addImm(Addr.getShift() != 0);
00977     } else {
00978       MIB.addReg(Addr.getReg());
00979       MIB.addImm(Offset);
00980     }
00981   }
00982 
00983   if (MMO)
00984     MIB.addMemOperand(MMO);
00985 }
00986 
00987 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
00988                                      const Value *RHS, bool SetFlags,
00989                                      bool WantResult,  bool IsZExt) {
00990   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
00991   bool NeedExtend = false;
00992   switch (RetVT.SimpleTy) {
00993   default:
00994     return 0;
00995   case MVT::i1:
00996     NeedExtend = true;
00997     break;
00998   case MVT::i8:
00999     NeedExtend = true;
01000     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
01001     break;
01002   case MVT::i16:
01003     NeedExtend = true;
01004     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
01005     break;
01006   case MVT::i32:  // fall-through
01007   case MVT::i64:
01008     break;
01009   }
01010   MVT SrcVT = RetVT;
01011   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
01012 
01013   // Canonicalize immediates to the RHS first.
01014   if (UseAdd && isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
01015     std::swap(LHS, RHS);
01016 
01017   // Canonicalize mul by power of 2 to the RHS.
01018   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
01019     if (isMulPowOf2(LHS))
01020       std::swap(LHS, RHS);
01021 
01022   // Canonicalize shift immediate to the RHS.
01023   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
01024     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
01025       if (isa<ConstantInt>(SI->getOperand(1)))
01026         if (SI->getOpcode() == Instruction::Shl  ||
01027             SI->getOpcode() == Instruction::LShr ||
01028             SI->getOpcode() == Instruction::AShr   )
01029           std::swap(LHS, RHS);
01030 
01031   unsigned LHSReg = getRegForValue(LHS);
01032   if (!LHSReg)
01033     return 0;
01034   bool LHSIsKill = hasTrivialKill(LHS);
01035 
01036   if (NeedExtend)
01037     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
01038 
01039   unsigned ResultReg = 0;
01040   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
01041     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
01042     if (C->isNegative())
01043       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
01044                                 SetFlags, WantResult);
01045     else
01046       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
01047                                 WantResult);
01048   }
01049   if (ResultReg)
01050     return ResultReg;
01051 
01052   // Only extend the RHS within the instruction if there is a valid extend type.
01053   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
01054       isValueAvailable(RHS)) {
01055     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
01056       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
01057         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
01058           unsigned RHSReg = getRegForValue(SI->getOperand(0));
01059           if (!RHSReg)
01060             return 0;
01061           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
01062           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
01063                                RHSIsKill, ExtendType, C->getZExtValue(),
01064                                SetFlags, WantResult);
01065         }
01066     unsigned RHSReg = getRegForValue(RHS);
01067     if (!RHSReg)
01068       return 0;
01069     bool RHSIsKill = hasTrivialKill(RHS);
01070     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
01071                          ExtendType, 0, SetFlags, WantResult);
01072   }
01073 
01074   // Check if the mul can be folded into the instruction.
01075   if (RHS->hasOneUse() && isValueAvailable(RHS))
01076     if (isMulPowOf2(RHS)) {
01077       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
01078       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
01079 
01080       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
01081         if (C->getValue().isPowerOf2())
01082           std::swap(MulLHS, MulRHS);
01083 
01084       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
01085       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
01086       unsigned RHSReg = getRegForValue(MulLHS);
01087       if (!RHSReg)
01088         return 0;
01089       bool RHSIsKill = hasTrivialKill(MulLHS);
01090       return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
01091                            AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
01092     }
01093 
01094   // Check if the shift can be folded into the instruction.
01095   if (RHS->hasOneUse() && isValueAvailable(RHS))
01096     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
01097       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
01098         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
01099         switch (SI->getOpcode()) {
01100         default: break;
01101         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
01102         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
01103         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
01104         }
01105         uint64_t ShiftVal = C->getZExtValue();
01106         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
01107           unsigned RHSReg = getRegForValue(SI->getOperand(0));
01108           if (!RHSReg)
01109             return 0;
01110           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
01111           return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
01112                                RHSIsKill, ShiftType, ShiftVal, SetFlags,
01113                                WantResult);
01114         }
01115       }
01116     }
01117 
01118   unsigned RHSReg = getRegForValue(RHS);
01119   if (!RHSReg)
01120     return 0;
01121   bool RHSIsKill = hasTrivialKill(RHS);
01122 
01123   if (NeedExtend)
01124     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
01125 
01126   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
01127                        SetFlags, WantResult);
01128 }
01129 
01130 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
01131                                         bool LHSIsKill, unsigned RHSReg,
01132                                         bool RHSIsKill, bool SetFlags,
01133                                         bool WantResult) {
01134   assert(LHSReg && RHSReg && "Invalid register number.");
01135 
01136   if (RetVT != MVT::i32 && RetVT != MVT::i64)
01137     return 0;
01138 
01139   static const unsigned OpcTable[2][2][2] = {
01140     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
01141       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
01142     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
01143       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
01144   };
01145   bool Is64Bit = RetVT == MVT::i64;
01146   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
01147   const TargetRegisterClass *RC =
01148       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
01149   unsigned ResultReg;
01150   if (WantResult)
01151     ResultReg = createResultReg(RC);
01152   else
01153     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
01154 
01155   const MCInstrDesc &II = TII.get(Opc);
01156   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
01157   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
01158   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
01159       .addReg(LHSReg, getKillRegState(LHSIsKill))
01160       .addReg(RHSReg, getKillRegState(RHSIsKill));
01161   return ResultReg;
01162 }
01163 
01164 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
01165                                         bool LHSIsKill, uint64_t Imm,
01166                                         bool SetFlags, bool WantResult) {
01167   assert(LHSReg && "Invalid register number.");
01168 
01169   if (RetVT != MVT::i32 && RetVT != MVT::i64)
01170     return 0;
01171 
01172   unsigned ShiftImm;
01173   if (isUInt<12>(Imm))
01174     ShiftImm = 0;
01175   else if ((Imm & 0xfff000) == Imm) {
01176     ShiftImm = 12;
01177     Imm >>= 12;
01178   } else
01179     return 0;
01180 
01181   static const unsigned OpcTable[2][2][2] = {
01182     { { AArch64::SUBWri,  AArch64::SUBXri  },
01183       { AArch64::ADDWri,  AArch64::ADDXri  }  },
01184     { { AArch64::SUBSWri, AArch64::SUBSXri },
01185       { AArch64::ADDSWri, AArch64::ADDSXri }  }
01186   };
01187   bool Is64Bit = RetVT == MVT::i64;
01188   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
01189   const TargetRegisterClass *RC;
01190   if (SetFlags)
01191     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
01192   else
01193     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
01194   unsigned ResultReg;
01195   if (WantResult)
01196     ResultReg = createResultReg(RC);
01197   else
01198     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
01199 
01200   const MCInstrDesc &II = TII.get(Opc);
01201   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
01202   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
01203       .addReg(LHSReg, getKillRegState(LHSIsKill))
01204       .addImm(Imm)
01205       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
01206   return ResultReg;
01207 }
01208 
01209 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
01210                                         bool LHSIsKill, unsigned RHSReg,
01211                                         bool RHSIsKill,
01212                                         AArch64_AM::ShiftExtendType ShiftType,
01213                                         uint64_t ShiftImm, bool SetFlags,
01214                                         bool WantResult) {
01215   assert(LHSReg && RHSReg && "Invalid register number.");
01216 
01217   if (RetVT != MVT::i32 && RetVT != MVT::i64)
01218     return 0;
01219 
01220   static const unsigned OpcTable[2][2][2] = {
01221     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
01222       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
01223     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
01224       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
01225   };
01226   bool Is64Bit = RetVT == MVT::i64;
01227   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
01228   const TargetRegisterClass *RC =
01229       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
01230   unsigned ResultReg;
01231   if (WantResult)
01232     ResultReg = createResultReg(RC);
01233   else
01234     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
01235 
01236   const MCInstrDesc &II = TII.get(Opc);
01237   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
01238   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
01239   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
01240       .addReg(LHSReg, getKillRegState(LHSIsKill))
01241       .addReg(RHSReg, getKillRegState(RHSIsKill))
01242       .addImm(getShifterImm(ShiftType, ShiftImm));
01243   return ResultReg;
01244 }
01245 
01246 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
01247                                         bool LHSIsKill, unsigned RHSReg,
01248                                         bool RHSIsKill,
01249                                         AArch64_AM::ShiftExtendType ExtType,
01250                                         uint64_t ShiftImm, bool SetFlags,
01251                                         bool WantResult) {
01252   assert(LHSReg && RHSReg && "Invalid register number.");
01253 
01254   if (RetVT != MVT::i32 && RetVT != MVT::i64)
01255     return 0;
01256 
01257   static const unsigned OpcTable[2][2][2] = {
01258     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
01259       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
01260     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
01261       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
01262   };
01263   bool Is64Bit = RetVT == MVT::i64;
01264   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
01265   const TargetRegisterClass *RC = nullptr;
01266   if (SetFlags)
01267     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
01268   else
01269     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
01270   unsigned ResultReg;
01271   if (WantResult)
01272     ResultReg = createResultReg(RC);
01273   else
01274     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
01275 
01276   const MCInstrDesc &II = TII.get(Opc);
01277   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
01278   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
01279   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
01280       .addReg(LHSReg, getKillRegState(LHSIsKill))
01281       .addReg(RHSReg, getKillRegState(RHSIsKill))
01282       .addImm(getArithExtendImm(ExtType, ShiftImm));
01283   return ResultReg;
01284 }
01285 
01286 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
01287   Type *Ty = LHS->getType();
01288   EVT EVT = TLI.getValueType(Ty, true);
01289   if (!EVT.isSimple())
01290     return false;
01291   MVT VT = EVT.getSimpleVT();
01292 
01293   switch (VT.SimpleTy) {
01294   default:
01295     return false;
01296   case MVT::i1:
01297   case MVT::i8:
01298   case MVT::i16:
01299   case MVT::i32:
01300   case MVT::i64:
01301     return emitICmp(VT, LHS, RHS, IsZExt);
01302   case MVT::f32:
01303   case MVT::f64:
01304     return emitFCmp(VT, LHS, RHS);
01305   }
01306 }
01307 
01308 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
01309                                bool IsZExt) {
01310   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
01311                  IsZExt) != 0;
01312 }
01313 
01314 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
01315                                   uint64_t Imm) {
01316   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
01317                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
01318 }
01319 
01320 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
01321   if (RetVT != MVT::f32 && RetVT != MVT::f64)
01322     return false;
01323 
01324   // Check to see if the 2nd operand is a constant that we can encode directly
01325   // in the compare.
01326   bool UseImm = false;
01327   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
01328     if (CFP->isZero() && !CFP->isNegative())
01329       UseImm = true;
01330 
01331   unsigned LHSReg = getRegForValue(LHS);
01332   if (!LHSReg)
01333     return false;
01334   bool LHSIsKill = hasTrivialKill(LHS);
01335 
01336   if (UseImm) {
01337     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
01338     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
01339         .addReg(LHSReg, getKillRegState(LHSIsKill));
01340     return true;
01341   }
01342 
01343   unsigned RHSReg = getRegForValue(RHS);
01344   if (!RHSReg)
01345     return false;
01346   bool RHSIsKill = hasTrivialKill(RHS);
01347 
01348   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
01349   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
01350       .addReg(LHSReg, getKillRegState(LHSIsKill))
01351       .addReg(RHSReg, getKillRegState(RHSIsKill));
01352   return true;
01353 }
01354 
01355 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
01356                                   bool SetFlags, bool WantResult, bool IsZExt) {
01357   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
01358                     IsZExt);
01359 }
01360 
01361 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
01362                                   bool SetFlags, bool WantResult, bool IsZExt) {
01363   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
01364                     IsZExt);
01365 }
01366 
01367 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
01368                                       bool LHSIsKill, unsigned RHSReg,
01369                                       bool RHSIsKill, bool WantResult) {
01370   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
01371                        RHSIsKill, /*SetFlags=*/true, WantResult);
01372 }
01373 
01374 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
01375                                       bool LHSIsKill, unsigned RHSReg,
01376                                       bool RHSIsKill,
01377                                       AArch64_AM::ShiftExtendType ShiftType,
01378                                       uint64_t ShiftImm, bool WantResult) {
01379   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
01380                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
01381                        WantResult);
01382 }
01383 
01384 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
01385                                         const Value *LHS, const Value *RHS) {
01386   // Canonicalize immediates to the RHS first.
01387   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
01388     std::swap(LHS, RHS);
01389 
01390   // Canonicalize mul by power-of-2 to the RHS.
01391   if (LHS->hasOneUse() && isValueAvailable(LHS))
01392     if (isMulPowOf2(LHS))
01393       std::swap(LHS, RHS);
01394 
01395   // Canonicalize shift immediate to the RHS.
01396   if (LHS->hasOneUse() && isValueAvailable(LHS))
01397     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
01398       if (isa<ConstantInt>(SI->getOperand(1)))
01399         std::swap(LHS, RHS);
01400 
01401   unsigned LHSReg = getRegForValue(LHS);
01402   if (!LHSReg)
01403     return 0;
01404   bool LHSIsKill = hasTrivialKill(LHS);
01405 
01406   unsigned ResultReg = 0;
01407   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
01408     uint64_t Imm = C->getZExtValue();
01409     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
01410   }
01411   if (ResultReg)
01412     return ResultReg;
01413 
01414   // Check if the mul can be folded into the instruction.
01415   if (RHS->hasOneUse() && isValueAvailable(RHS))
01416     if (isMulPowOf2(RHS)) {
01417       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
01418       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
01419 
01420       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
01421         if (C->getValue().isPowerOf2())
01422           std::swap(MulLHS, MulRHS);
01423 
01424       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
01425       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
01426 
01427       unsigned RHSReg = getRegForValue(MulLHS);
01428       if (!RHSReg)
01429         return 0;
01430       bool RHSIsKill = hasTrivialKill(MulLHS);
01431       return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
01432                               RHSIsKill, ShiftVal);
01433     }
01434 
01435   // Check if the shift can be folded into the instruction.
01436   if (RHS->hasOneUse() && isValueAvailable(RHS))
01437     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
01438       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
01439         uint64_t ShiftVal = C->getZExtValue();
01440         unsigned RHSReg = getRegForValue(SI->getOperand(0));
01441         if (!RHSReg)
01442           return 0;
01443         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
01444         return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
01445                                 RHSIsKill, ShiftVal);
01446       }
01447 
01448   unsigned RHSReg = getRegForValue(RHS);
01449   if (!RHSReg)
01450     return 0;
01451   bool RHSIsKill = hasTrivialKill(RHS);
01452 
01453   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
01454   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
01455   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
01456     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
01457     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
01458   }
01459   return ResultReg;
01460 }
01461 
01462 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
01463                                            unsigned LHSReg, bool LHSIsKill,
01464                                            uint64_t Imm) {
01465   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
01466          "ISD nodes are not consecutive!");
01467   static const unsigned OpcTable[3][2] = {
01468     { AArch64::ANDWri, AArch64::ANDXri },
01469     { AArch64::ORRWri, AArch64::ORRXri },
01470     { AArch64::EORWri, AArch64::EORXri }
01471   };
01472   const TargetRegisterClass *RC;
01473   unsigned Opc;
01474   unsigned RegSize;
01475   switch (RetVT.SimpleTy) {
01476   default:
01477     return 0;
01478   case MVT::i1:
01479   case MVT::i8:
01480   case MVT::i16:
01481   case MVT::i32: {
01482     unsigned Idx = ISDOpc - ISD::AND;
01483     Opc = OpcTable[Idx][0];
01484     RC = &AArch64::GPR32spRegClass;
01485     RegSize = 32;
01486     break;
01487   }
01488   case MVT::i64:
01489     Opc = OpcTable[ISDOpc - ISD::AND][1];
01490     RC = &AArch64::GPR64spRegClass;
01491     RegSize = 64;
01492     break;
01493   }
01494 
01495   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
01496     return 0;
01497 
01498   unsigned ResultReg =
01499       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
01500                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
01501   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
01502     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
01503     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
01504   }
01505   return ResultReg;
01506 }
01507 
01508 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
01509                                            unsigned LHSReg, bool LHSIsKill,
01510                                            unsigned RHSReg, bool RHSIsKill,
01511                                            uint64_t ShiftImm) {
01512   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
01513          "ISD nodes are not consecutive!");
01514   static const unsigned OpcTable[3][2] = {
01515     { AArch64::ANDWrs, AArch64::ANDXrs },
01516     { AArch64::ORRWrs, AArch64::ORRXrs },
01517     { AArch64::EORWrs, AArch64::EORXrs }
01518   };
01519   const TargetRegisterClass *RC;
01520   unsigned Opc;
01521   switch (RetVT.SimpleTy) {
01522   default:
01523     return 0;
01524   case MVT::i1:
01525   case MVT::i8:
01526   case MVT::i16:
01527   case MVT::i32:
01528     Opc = OpcTable[ISDOpc - ISD::AND][0];
01529     RC = &AArch64::GPR32RegClass;
01530     break;
01531   case MVT::i64:
01532     Opc = OpcTable[ISDOpc - ISD::AND][1];
01533     RC = &AArch64::GPR64RegClass;
01534     break;
01535   }
01536   unsigned ResultReg =
01537       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
01538                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
01539   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
01540     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
01541     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
01542   }
01543   return ResultReg;
01544 }
01545 
01546 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
01547                                      uint64_t Imm) {
01548   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
01549 }
01550 
01551 bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
01552                                MachineMemOperand *MMO) {
01553   // Simplify this down to something we can handle.
01554   if (!simplifyAddress(Addr, VT))
01555     return false;
01556 
01557   unsigned ScaleFactor;
01558   switch (VT.SimpleTy) {
01559   default: llvm_unreachable("Unexpected value type.");
01560   case MVT::i1:  // fall-through
01561   case MVT::i8:  ScaleFactor = 1; break;
01562   case MVT::i16: ScaleFactor = 2; break;
01563   case MVT::i32: // fall-through
01564   case MVT::f32: ScaleFactor = 4; break;
01565   case MVT::i64: // fall-through
01566   case MVT::f64: ScaleFactor = 8; break;
01567   }
01568 
01569   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
01570   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
01571   bool UseScaled = true;
01572   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
01573     UseScaled = false;
01574     ScaleFactor = 1;
01575   }
01576 
01577   static const unsigned OpcTable[4][6] = {
01578     { AArch64::LDURBBi,  AArch64::LDURHHi,  AArch64::LDURWi,  AArch64::LDURXi,
01579       AArch64::LDURSi,   AArch64::LDURDi },
01580     { AArch64::LDRBBui,  AArch64::LDRHHui,  AArch64::LDRWui,  AArch64::LDRXui,
01581       AArch64::LDRSui,   AArch64::LDRDui },
01582     { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
01583       AArch64::LDRSroX,  AArch64::LDRDroX },
01584     { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
01585       AArch64::LDRSroW,  AArch64::LDRDroW }
01586   };
01587 
01588   unsigned Opc;
01589   const TargetRegisterClass *RC;
01590   bool VTIsi1 = false;
01591   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
01592                       Addr.getOffsetReg();
01593   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
01594   if (Addr.getExtendType() == AArch64_AM::UXTW ||
01595       Addr.getExtendType() == AArch64_AM::SXTW)
01596     Idx++;
01597 
01598   switch (VT.SimpleTy) {
01599   default: llvm_unreachable("Unexpected value type.");
01600   case MVT::i1:  VTIsi1 = true; // Intentional fall-through.
01601   case MVT::i8:  Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break;
01602   case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break;
01603   case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break;
01604   case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break;
01605   case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break;
01606   case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break;
01607   }
01608 
01609   // Create the base instruction, then add the operands.
01610   ResultReg = createResultReg(RC);
01611   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
01612                                     TII.get(Opc), ResultReg);
01613   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
01614 
01615   // Loading an i1 requires special handling.
01616   if (VTIsi1) {
01617     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
01618     assert(ANDReg && "Unexpected AND instruction emission failure.");
01619     ResultReg = ANDReg;
01620   }
01621   return true;
01622 }
01623 
01624 bool AArch64FastISel::selectAddSub(const Instruction *I) {
01625   MVT VT;
01626   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
01627     return false;
01628 
01629   if (VT.isVector())
01630     return selectOperator(I, I->getOpcode());
01631 
01632   unsigned ResultReg;
01633   switch (I->getOpcode()) {
01634   default:
01635     llvm_unreachable("Unexpected instruction.");
01636   case Instruction::Add:
01637     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
01638     break;
01639   case Instruction::Sub:
01640     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
01641     break;
01642   }
01643   if (!ResultReg)
01644     return false;
01645 
01646   updateValueMap(I, ResultReg);
01647   return true;
01648 }
01649 
01650 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
01651   MVT VT;
01652   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
01653     return false;
01654 
01655   if (VT.isVector())
01656     return selectOperator(I, I->getOpcode());
01657 
01658   unsigned ResultReg;
01659   switch (I->getOpcode()) {
01660   default:
01661     llvm_unreachable("Unexpected instruction.");
01662   case Instruction::And:
01663     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
01664     break;
01665   case Instruction::Or:
01666     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
01667     break;
01668   case Instruction::Xor:
01669     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
01670     break;
01671   }
01672   if (!ResultReg)
01673     return false;
01674 
01675   updateValueMap(I, ResultReg);
01676   return true;
01677 }
01678 
01679 bool AArch64FastISel::selectLoad(const Instruction *I) {
01680   MVT VT;
01681   // Verify we have a legal type before going any further.  Currently, we handle
01682   // simple types that will directly fit in a register (i32/f32/i64/f64) or
01683   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
01684   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
01685       cast<LoadInst>(I)->isAtomic())
01686     return false;
01687 
01688   // See if we can handle this address.
01689   Address Addr;
01690   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
01691     return false;
01692 
01693   unsigned ResultReg;
01694   if (!emitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I)))
01695     return false;
01696 
01697   updateValueMap(I, ResultReg);
01698   return true;
01699 }
01700 
01701 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
01702                                 MachineMemOperand *MMO) {
01703   // Simplify this down to something we can handle.
01704   if (!simplifyAddress(Addr, VT))
01705     return false;
01706 
01707   unsigned ScaleFactor;
01708   switch (VT.SimpleTy) {
01709   default: llvm_unreachable("Unexpected value type.");
01710   case MVT::i1:  // fall-through
01711   case MVT::i8:  ScaleFactor = 1; break;
01712   case MVT::i16: ScaleFactor = 2; break;
01713   case MVT::i32: // fall-through
01714   case MVT::f32: ScaleFactor = 4; break;
01715   case MVT::i64: // fall-through
01716   case MVT::f64: ScaleFactor = 8; break;
01717   }
01718 
01719   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
01720   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
01721   bool UseScaled = true;
01722   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
01723     UseScaled = false;
01724     ScaleFactor = 1;
01725   }
01726 
01727 
01728   static const unsigned OpcTable[4][6] = {
01729     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
01730       AArch64::STURSi,   AArch64::STURDi },
01731     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
01732       AArch64::STRSui,   AArch64::STRDui },
01733     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
01734       AArch64::STRSroX,  AArch64::STRDroX },
01735     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
01736       AArch64::STRSroW,  AArch64::STRDroW }
01737 
01738   };
01739 
01740   unsigned Opc;
01741   bool VTIsi1 = false;
01742   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
01743                       Addr.getOffsetReg();
01744   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
01745   if (Addr.getExtendType() == AArch64_AM::UXTW ||
01746       Addr.getExtendType() == AArch64_AM::SXTW)
01747     Idx++;
01748 
01749   switch (VT.SimpleTy) {
01750   default: llvm_unreachable("Unexpected value type.");
01751   case MVT::i1:  VTIsi1 = true;
01752   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
01753   case MVT::i16: Opc = OpcTable[Idx][1]; break;
01754   case MVT::i32: Opc = OpcTable[Idx][2]; break;
01755   case MVT::i64: Opc = OpcTable[Idx][3]; break;
01756   case MVT::f32: Opc = OpcTable[Idx][4]; break;
01757   case MVT::f64: Opc = OpcTable[Idx][5]; break;
01758   }
01759 
01760   // Storing an i1 requires special handling.
01761   if (VTIsi1 && SrcReg != AArch64::WZR) {
01762     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
01763     assert(ANDReg && "Unexpected AND instruction emission failure.");
01764     SrcReg = ANDReg;
01765   }
01766   // Create the base instruction, then add the operands.
01767   const MCInstrDesc &II = TII.get(Opc);
01768   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
01769   MachineInstrBuilder MIB =
01770       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
01771   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
01772 
01773   return true;
01774 }
01775 
01776 bool AArch64FastISel::selectStore(const Instruction *I) {
01777   MVT VT;
01778   const Value *Op0 = I->getOperand(0);
01779   // Verify we have a legal type before going any further.  Currently, we handle
01780   // simple types that will directly fit in a register (i32/f32/i64/f64) or
01781   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
01782   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
01783       cast<StoreInst>(I)->isAtomic())
01784     return false;
01785 
01786   // Get the value to be stored into a register. Use the zero register directly
01787   // when possible to avoid an unnecessary copy and a wasted register.
01788   unsigned SrcReg = 0;
01789   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
01790     if (CI->isZero())
01791       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
01792   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
01793     if (CF->isZero() && !CF->isNegative()) {
01794       VT = MVT::getIntegerVT(VT.getSizeInBits());
01795       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
01796     }
01797   }
01798 
01799   if (!SrcReg)
01800     SrcReg = getRegForValue(Op0);
01801 
01802   if (!SrcReg)
01803     return false;
01804 
01805   // See if we can handle this address.
01806   Address Addr;
01807   if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
01808     return false;
01809 
01810   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
01811     return false;
01812   return true;
01813 }
01814 
01815 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
01816   switch (Pred) {
01817   case CmpInst::FCMP_ONE:
01818   case CmpInst::FCMP_UEQ:
01819   default:
01820     // AL is our "false" for now. The other two need more compares.
01821     return AArch64CC::AL;
01822   case CmpInst::ICMP_EQ:
01823   case CmpInst::FCMP_OEQ:
01824     return AArch64CC::EQ;
01825   case CmpInst::ICMP_SGT:
01826   case CmpInst::FCMP_OGT:
01827     return AArch64CC::GT;
01828   case CmpInst::ICMP_SGE:
01829   case CmpInst::FCMP_OGE:
01830     return AArch64CC::GE;
01831   case CmpInst::ICMP_UGT:
01832   case CmpInst::FCMP_UGT:
01833     return AArch64CC::HI;
01834   case CmpInst::FCMP_OLT:
01835     return AArch64CC::MI;
01836   case CmpInst::ICMP_ULE:
01837   case CmpInst::FCMP_OLE:
01838     return AArch64CC::LS;
01839   case CmpInst::FCMP_ORD:
01840     return AArch64CC::VC;
01841   case CmpInst::FCMP_UNO:
01842     return AArch64CC::VS;
01843   case CmpInst::FCMP_UGE:
01844     return AArch64CC::PL;
01845   case CmpInst::ICMP_SLT:
01846   case CmpInst::FCMP_ULT:
01847     return AArch64CC::LT;
01848   case CmpInst::ICMP_SLE:
01849   case CmpInst::FCMP_ULE:
01850     return AArch64CC::LE;
01851   case CmpInst::FCMP_UNE:
01852   case CmpInst::ICMP_NE:
01853     return AArch64CC::NE;
01854   case CmpInst::ICMP_UGE:
01855     return AArch64CC::HS;
01856   case CmpInst::ICMP_ULT:
01857     return AArch64CC::LO;
01858   }
01859 }
01860 
01861 /// \brief Check if the comparison against zero and the following branch can be
01862 /// folded into a single instruction (CBZ or CBNZ).
01863 static bool canFoldZeroCheckIntoBranch(const CmpInst *CI) {
01864   CmpInst::Predicate Predicate = CI->getPredicate();
01865   if ((Predicate != CmpInst::ICMP_EQ) && (Predicate != CmpInst::ICMP_NE))
01866     return false;
01867 
01868   Type *Ty = CI->getOperand(0)->getType();
01869   if (!Ty->isIntegerTy())
01870     return false;
01871 
01872   unsigned BW = cast<IntegerType>(Ty)->getBitWidth();
01873   if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64)
01874     return false;
01875 
01876   if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(0)))
01877     if (C->isNullValue())
01878       return true;
01879 
01880   if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(1)))
01881     if (C->isNullValue())
01882       return true;
01883 
01884   return false;
01885 }
01886 
01887 bool AArch64FastISel::selectBranch(const Instruction *I) {
01888   const BranchInst *BI = cast<BranchInst>(I);
01889   if (BI->isUnconditional()) {
01890     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
01891     fastEmitBranch(MSucc, BI->getDebugLoc());
01892     return true;
01893   }
01894 
01895   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
01896   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
01897 
01898   AArch64CC::CondCode CC = AArch64CC::NE;
01899   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
01900     if (CI->hasOneUse() && isValueAvailable(CI)) {
01901       // Try to optimize or fold the cmp.
01902       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
01903       switch (Predicate) {
01904       default:
01905         break;
01906       case CmpInst::FCMP_FALSE:
01907         fastEmitBranch(FBB, DbgLoc);
01908         return true;
01909       case CmpInst::FCMP_TRUE:
01910         fastEmitBranch(TBB, DbgLoc);
01911         return true;
01912       }
01913 
01914       // Try to take advantage of fallthrough opportunities.
01915       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
01916         std::swap(TBB, FBB);
01917         Predicate = CmpInst::getInversePredicate(Predicate);
01918       }
01919 
01920       // Try to optimize comparisons against zero.
01921       if (canFoldZeroCheckIntoBranch(CI)) {
01922         const Value *LHS = CI->getOperand(0);
01923         const Value *RHS = CI->getOperand(1);
01924 
01925         // Canonicalize zero values to the RHS.
01926         if (const auto *C = dyn_cast<ConstantInt>(LHS))
01927           if (C->isNullValue())
01928             std::swap(LHS, RHS);
01929 
01930         int TestBit = -1;
01931         if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
01932           if (AI->getOpcode() == Instruction::And) {
01933             const Value *AndLHS = AI->getOperand(0);
01934             const Value *AndRHS = AI->getOperand(1);
01935 
01936             if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
01937               if (C->getValue().isPowerOf2())
01938                 std::swap(AndLHS, AndRHS);
01939 
01940             if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
01941               if (C->getValue().isPowerOf2()) {
01942                 TestBit = C->getValue().logBase2();
01943                 LHS = AndLHS;
01944               }
01945           }
01946 
01947         static const unsigned OpcTable[2][2][2] = {
01948           { {AArch64::CBZW,  AArch64::CBZX },
01949             {AArch64::CBNZW, AArch64::CBNZX} },
01950           { {AArch64::TBZW,  AArch64::TBZX },
01951             {AArch64::TBNZW, AArch64::TBNZX} }
01952         };
01953         bool IsBitTest = TestBit != -1;
01954         bool IsCmpNE = Predicate == CmpInst::ICMP_NE;
01955         bool Is64Bit = LHS->getType()->isIntegerTy(64);
01956         unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
01957 
01958         unsigned SrcReg = getRegForValue(LHS);
01959         if (!SrcReg)
01960           return false;
01961         bool SrcIsKill = hasTrivialKill(LHS);
01962 
01963         // Emit the combined compare and branch instruction.
01964         MachineInstrBuilder MIB =
01965             BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
01966                 .addReg(SrcReg, getKillRegState(SrcIsKill));
01967         if (IsBitTest)
01968           MIB.addImm(TestBit);
01969         MIB.addMBB(TBB);
01970 
01971         // Obtain the branch weight and add the TrueBB to the successor list.
01972         uint32_t BranchWeight = 0;
01973         if (FuncInfo.BPI)
01974           BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
01975                                                      TBB->getBasicBlock());
01976         FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
01977 
01978         fastEmitBranch(FBB, DbgLoc);
01979         return true;
01980       }
01981 
01982       // Emit the cmp.
01983       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
01984         return false;
01985 
01986       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
01987       // instruction.
01988       CC = getCompareCC(Predicate);
01989       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
01990       switch (Predicate) {
01991       default:
01992         break;
01993       case CmpInst::FCMP_UEQ:
01994         ExtraCC = AArch64CC::EQ;
01995         CC = AArch64CC::VS;
01996         break;
01997       case CmpInst::FCMP_ONE:
01998         ExtraCC = AArch64CC::MI;
01999         CC = AArch64CC::GT;
02000         break;
02001       }
02002       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
02003 
02004       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
02005       if (ExtraCC != AArch64CC::AL) {
02006         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
02007             .addImm(ExtraCC)
02008             .addMBB(TBB);
02009       }
02010 
02011       // Emit the branch.
02012       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
02013           .addImm(CC)
02014           .addMBB(TBB);
02015 
02016       // Obtain the branch weight and add the TrueBB to the successor list.
02017       uint32_t BranchWeight = 0;
02018       if (FuncInfo.BPI)
02019         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
02020                                                   TBB->getBasicBlock());
02021       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
02022 
02023       fastEmitBranch(FBB, DbgLoc);
02024       return true;
02025     }
02026   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
02027     MVT SrcVT;
02028     if (TI->hasOneUse() && isValueAvailable(TI) &&
02029         isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) {
02030       unsigned CondReg = getRegForValue(TI->getOperand(0));
02031       if (!CondReg)
02032         return false;
02033       bool CondIsKill = hasTrivialKill(TI->getOperand(0));
02034 
02035       // Issue an extract_subreg to get the lower 32-bits.
02036       if (SrcVT == MVT::i64) {
02037         CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
02038                                              AArch64::sub_32);
02039         CondIsKill = true;
02040       }
02041 
02042       unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
02043       assert(ANDReg && "Unexpected AND instruction emission failure.");
02044       emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
02045 
02046       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
02047         std::swap(TBB, FBB);
02048         CC = AArch64CC::EQ;
02049       }
02050       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
02051           .addImm(CC)
02052           .addMBB(TBB);
02053 
02054       // Obtain the branch weight and add the TrueBB to the successor list.
02055       uint32_t BranchWeight = 0;
02056       if (FuncInfo.BPI)
02057         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
02058                                                   TBB->getBasicBlock());
02059       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
02060 
02061       fastEmitBranch(FBB, DbgLoc);
02062       return true;
02063     }
02064   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
02065     uint64_t Imm = CI->getZExtValue();
02066     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
02067     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
02068         .addMBB(Target);
02069 
02070     // Obtain the branch weight and add the target to the successor list.
02071     uint32_t BranchWeight = 0;
02072     if (FuncInfo.BPI)
02073       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
02074                                                  Target->getBasicBlock());
02075     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
02076     return true;
02077   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
02078     // Fake request the condition, otherwise the intrinsic might be completely
02079     // optimized away.
02080     unsigned CondReg = getRegForValue(BI->getCondition());
02081     if (!CondReg)
02082       return false;
02083 
02084     // Emit the branch.
02085     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
02086       .addImm(CC)
02087       .addMBB(TBB);
02088 
02089     // Obtain the branch weight and add the TrueBB to the successor list.
02090     uint32_t BranchWeight = 0;
02091     if (FuncInfo.BPI)
02092       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
02093                                                  TBB->getBasicBlock());
02094     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
02095 
02096     fastEmitBranch(FBB, DbgLoc);
02097     return true;
02098   }
02099 
02100   unsigned CondReg = getRegForValue(BI->getCondition());
02101   if (CondReg == 0)
02102     return false;
02103   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
02104 
02105   // We've been divorced from our compare!  Our block was split, and
02106   // now our compare lives in a predecessor block.  We musn't
02107   // re-compare here, as the children of the compare aren't guaranteed
02108   // live across the block boundary (we *could* check for this).
02109   // Regardless, the compare has been done in the predecessor block,
02110   // and it left a value for us in a virtual register.  Ergo, we test
02111   // the one-bit value left in the virtual register.
02112   emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
02113 
02114   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
02115     std::swap(TBB, FBB);
02116     CC = AArch64CC::EQ;
02117   }
02118 
02119   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
02120       .addImm(CC)
02121       .addMBB(TBB);
02122 
02123   // Obtain the branch weight and add the TrueBB to the successor list.
02124   uint32_t BranchWeight = 0;
02125   if (FuncInfo.BPI)
02126     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
02127                                                TBB->getBasicBlock());
02128   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
02129 
02130   fastEmitBranch(FBB, DbgLoc);
02131   return true;
02132 }
02133 
02134 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
02135   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
02136   unsigned AddrReg = getRegForValue(BI->getOperand(0));
02137   if (AddrReg == 0)
02138     return false;
02139 
02140   // Emit the indirect branch.
02141   const MCInstrDesc &II = TII.get(AArch64::BR);
02142   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
02143   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
02144 
02145   // Make sure the CFG is up-to-date.
02146   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
02147     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
02148 
02149   return true;
02150 }
02151 
02152 bool AArch64FastISel::selectCmp(const Instruction *I) {
02153   const CmpInst *CI = cast<CmpInst>(I);
02154 
02155   // Try to optimize or fold the cmp.
02156   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
02157   unsigned ResultReg = 0;
02158   switch (Predicate) {
02159   default:
02160     break;
02161   case CmpInst::FCMP_FALSE:
02162     ResultReg = createResultReg(&AArch64::GPR32RegClass);
02163     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02164             TII.get(TargetOpcode::COPY), ResultReg)
02165         .addReg(AArch64::WZR, getKillRegState(true));
02166     break;
02167   case CmpInst::FCMP_TRUE:
02168     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
02169     break;
02170   }
02171 
02172   if (ResultReg) {
02173     updateValueMap(I, ResultReg);
02174     return true;
02175   }
02176 
02177   // Emit the cmp.
02178   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
02179     return false;
02180 
02181   ResultReg = createResultReg(&AArch64::GPR32RegClass);
02182 
02183   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
02184   // condition codes are inverted, because they are used by CSINC.
02185   static unsigned CondCodeTable[2][2] = {
02186     { AArch64CC::NE, AArch64CC::VC },
02187     { AArch64CC::PL, AArch64CC::LE }
02188   };
02189   unsigned *CondCodes = nullptr;
02190   switch (Predicate) {
02191   default:
02192     break;
02193   case CmpInst::FCMP_UEQ:
02194     CondCodes = &CondCodeTable[0][0];
02195     break;
02196   case CmpInst::FCMP_ONE:
02197     CondCodes = &CondCodeTable[1][0];
02198     break;
02199   }
02200 
02201   if (CondCodes) {
02202     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
02203     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
02204             TmpReg1)
02205         .addReg(AArch64::WZR, getKillRegState(true))
02206         .addReg(AArch64::WZR, getKillRegState(true))
02207         .addImm(CondCodes[0]);
02208     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
02209             ResultReg)
02210         .addReg(TmpReg1, getKillRegState(true))
02211         .addReg(AArch64::WZR, getKillRegState(true))
02212         .addImm(CondCodes[1]);
02213 
02214     updateValueMap(I, ResultReg);
02215     return true;
02216   }
02217 
02218   // Now set a register based on the comparison.
02219   AArch64CC::CondCode CC = getCompareCC(Predicate);
02220   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
02221   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
02222   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
02223           ResultReg)
02224       .addReg(AArch64::WZR, getKillRegState(true))
02225       .addReg(AArch64::WZR, getKillRegState(true))
02226       .addImm(invertedCC);
02227 
02228   updateValueMap(I, ResultReg);
02229   return true;
02230 }
02231 
02232 bool AArch64FastISel::selectSelect(const Instruction *I) {
02233   const SelectInst *SI = cast<SelectInst>(I);
02234 
02235   EVT DestEVT = TLI.getValueType(SI->getType(), true);
02236   if (!DestEVT.isSimple())
02237     return false;
02238 
02239   MVT DestVT = DestEVT.getSimpleVT();
02240   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
02241       DestVT != MVT::f64)
02242     return false;
02243 
02244   unsigned SelectOpc;
02245   const TargetRegisterClass *RC = nullptr;
02246   switch (DestVT.SimpleTy) {
02247   default: return false;
02248   case MVT::i32:
02249     SelectOpc = AArch64::CSELWr;    RC = &AArch64::GPR32RegClass; break;
02250   case MVT::i64:
02251     SelectOpc = AArch64::CSELXr;    RC = &AArch64::GPR64RegClass; break;
02252   case MVT::f32:
02253     SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break;
02254   case MVT::f64:
02255     SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break;
02256   }
02257 
02258   const Value *Cond = SI->getCondition();
02259   bool NeedTest = true;
02260   AArch64CC::CondCode CC = AArch64CC::NE;
02261   if (foldXALUIntrinsic(CC, I, Cond))
02262     NeedTest = false;
02263 
02264   unsigned CondReg = getRegForValue(Cond);
02265   if (!CondReg)
02266     return false;
02267   bool CondIsKill = hasTrivialKill(Cond);
02268 
02269   if (NeedTest) {
02270     unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
02271     assert(ANDReg && "Unexpected AND instruction emission failure.");
02272     emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
02273   }
02274 
02275   unsigned TrueReg = getRegForValue(SI->getTrueValue());
02276   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
02277 
02278   unsigned FalseReg = getRegForValue(SI->getFalseValue());
02279   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
02280 
02281   if (!TrueReg || !FalseReg)
02282     return false;
02283 
02284   unsigned ResultReg = fastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill,
02285                                         FalseReg, FalseIsKill, CC);
02286   updateValueMap(I, ResultReg);
02287   return true;
02288 }
02289 
02290 bool AArch64FastISel::selectFPExt(const Instruction *I) {
02291   Value *V = I->getOperand(0);
02292   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
02293     return false;
02294 
02295   unsigned Op = getRegForValue(V);
02296   if (Op == 0)
02297     return false;
02298 
02299   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
02300   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
02301           ResultReg).addReg(Op);
02302   updateValueMap(I, ResultReg);
02303   return true;
02304 }
02305 
02306 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
02307   Value *V = I->getOperand(0);
02308   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
02309     return false;
02310 
02311   unsigned Op = getRegForValue(V);
02312   if (Op == 0)
02313     return false;
02314 
02315   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
02316   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
02317           ResultReg).addReg(Op);
02318   updateValueMap(I, ResultReg);
02319   return true;
02320 }
02321 
02322 // FPToUI and FPToSI
02323 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
02324   MVT DestVT;
02325   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
02326     return false;
02327 
02328   unsigned SrcReg = getRegForValue(I->getOperand(0));
02329   if (SrcReg == 0)
02330     return false;
02331 
02332   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
02333   if (SrcVT == MVT::f128)
02334     return false;
02335 
02336   unsigned Opc;
02337   if (SrcVT == MVT::f64) {
02338     if (Signed)
02339       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
02340     else
02341       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
02342   } else {
02343     if (Signed)
02344       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
02345     else
02346       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
02347   }
02348   unsigned ResultReg = createResultReg(
02349       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
02350   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
02351       .addReg(SrcReg);
02352   updateValueMap(I, ResultReg);
02353   return true;
02354 }
02355 
02356 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
02357   MVT DestVT;
02358   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
02359     return false;
02360   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
02361           "Unexpected value type.");
02362 
02363   unsigned SrcReg = getRegForValue(I->getOperand(0));
02364   if (!SrcReg)
02365     return false;
02366   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
02367 
02368   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
02369 
02370   // Handle sign-extension.
02371   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
02372     SrcReg =
02373         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
02374     if (!SrcReg)
02375       return false;
02376     SrcIsKill = true;
02377   }
02378 
02379   unsigned Opc;
02380   if (SrcVT == MVT::i64) {
02381     if (Signed)
02382       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
02383     else
02384       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
02385   } else {
02386     if (Signed)
02387       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
02388     else
02389       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
02390   }
02391 
02392   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
02393                                       SrcIsKill);
02394   updateValueMap(I, ResultReg);
02395   return true;
02396 }
02397 
02398 bool AArch64FastISel::fastLowerArguments() {
02399   if (!FuncInfo.CanLowerReturn)
02400     return false;
02401 
02402   const Function *F = FuncInfo.Fn;
02403   if (F->isVarArg())
02404     return false;
02405 
02406   CallingConv::ID CC = F->getCallingConv();
02407   if (CC != CallingConv::C)
02408     return false;
02409 
02410   // Only handle simple cases of up to 8 GPR and FPR each.
02411   unsigned GPRCnt = 0;
02412   unsigned FPRCnt = 0;
02413   unsigned Idx = 0;
02414   for (auto const &Arg : F->args()) {
02415     // The first argument is at index 1.
02416     ++Idx;
02417     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
02418         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
02419         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
02420         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
02421       return false;
02422 
02423     Type *ArgTy = Arg.getType();
02424     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
02425       return false;
02426 
02427     EVT ArgVT = TLI.getValueType(ArgTy);
02428     if (!ArgVT.isSimple())
02429       return false;
02430 
02431     MVT VT = ArgVT.getSimpleVT().SimpleTy;
02432     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
02433       return false;
02434 
02435     if (VT.isVector() &&
02436         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
02437       return false;
02438 
02439     if (VT >= MVT::i1 && VT <= MVT::i64)
02440       ++GPRCnt;
02441     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
02442              VT.is128BitVector())
02443       ++FPRCnt;
02444     else
02445       return false;
02446 
02447     if (GPRCnt > 8 || FPRCnt > 8)
02448       return false;
02449   }
02450 
02451   static const MCPhysReg Registers[6][8] = {
02452     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
02453       AArch64::W5, AArch64::W6, AArch64::W7 },
02454     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
02455       AArch64::X5, AArch64::X6, AArch64::X7 },
02456     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
02457       AArch64::H5, AArch64::H6, AArch64::H7 },
02458     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
02459       AArch64::S5, AArch64::S6, AArch64::S7 },
02460     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
02461       AArch64::D5, AArch64::D6, AArch64::D7 },
02462     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
02463       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
02464   };
02465 
02466   unsigned GPRIdx = 0;
02467   unsigned FPRIdx = 0;
02468   for (auto const &Arg : F->args()) {
02469     MVT VT = TLI.getSimpleValueType(Arg.getType());
02470     unsigned SrcReg;
02471     const TargetRegisterClass *RC;
02472     if (VT >= MVT::i1 && VT <= MVT::i32) {
02473       SrcReg = Registers[0][GPRIdx++];
02474       RC = &AArch64::GPR32RegClass;
02475       VT = MVT::i32;
02476     } else if (VT == MVT::i64) {
02477       SrcReg = Registers[1][GPRIdx++];
02478       RC = &AArch64::GPR64RegClass;
02479     } else if (VT == MVT::f16) {
02480       SrcReg = Registers[2][FPRIdx++];
02481       RC = &AArch64::FPR16RegClass;
02482     } else if (VT ==  MVT::f32) {
02483       SrcReg = Registers[3][FPRIdx++];
02484       RC = &AArch64::FPR32RegClass;
02485     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
02486       SrcReg = Registers[4][FPRIdx++];
02487       RC = &AArch64::FPR64RegClass;
02488     } else if (VT.is128BitVector()) {
02489       SrcReg = Registers[5][FPRIdx++];
02490       RC = &AArch64::FPR128RegClass;
02491     } else
02492       llvm_unreachable("Unexpected value type.");
02493 
02494     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
02495     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
02496     // Without this, EmitLiveInCopies may eliminate the livein if its only
02497     // use is a bitcast (which isn't turned into an instruction).
02498     unsigned ResultReg = createResultReg(RC);
02499     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02500             TII.get(TargetOpcode::COPY), ResultReg)
02501         .addReg(DstReg, getKillRegState(true));
02502     updateValueMap(&Arg, ResultReg);
02503   }
02504   return true;
02505 }
02506 
02507 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
02508                                       SmallVectorImpl<MVT> &OutVTs,
02509                                       unsigned &NumBytes) {
02510   CallingConv::ID CC = CLI.CallConv;
02511   SmallVector<CCValAssign, 16> ArgLocs;
02512   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
02513   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
02514 
02515   // Get a count of how many bytes are to be pushed on the stack.
02516   NumBytes = CCInfo.getNextStackOffset();
02517 
02518   // Issue CALLSEQ_START
02519   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
02520   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
02521     .addImm(NumBytes);
02522 
02523   // Process the args.
02524   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02525     CCValAssign &VA = ArgLocs[i];
02526     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
02527     MVT ArgVT = OutVTs[VA.getValNo()];
02528 
02529     unsigned ArgReg = getRegForValue(ArgVal);
02530     if (!ArgReg)
02531       return false;
02532 
02533     // Handle arg promotion: SExt, ZExt, AExt.
02534     switch (VA.getLocInfo()) {
02535     case CCValAssign::Full:
02536       break;
02537     case CCValAssign::SExt: {
02538       MVT DestVT = VA.getLocVT();
02539       MVT SrcVT = ArgVT;
02540       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
02541       if (!ArgReg)
02542         return false;
02543       break;
02544     }
02545     case CCValAssign::AExt:
02546     // Intentional fall-through.
02547     case CCValAssign::ZExt: {
02548       MVT DestVT = VA.getLocVT();
02549       MVT SrcVT = ArgVT;
02550       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
02551       if (!ArgReg)
02552         return false;
02553       break;
02554     }
02555     default:
02556       llvm_unreachable("Unknown arg promotion!");
02557     }
02558 
02559     // Now copy/store arg to correct locations.
02560     if (VA.isRegLoc() && !VA.needsCustom()) {
02561       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02562               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
02563       CLI.OutRegs.push_back(VA.getLocReg());
02564     } else if (VA.needsCustom()) {
02565       // FIXME: Handle custom args.
02566       return false;
02567     } else {
02568       assert(VA.isMemLoc() && "Assuming store on stack.");
02569 
02570       // Don't emit stores for undef values.
02571       if (isa<UndefValue>(ArgVal))
02572         continue;
02573 
02574       // Need to store on the stack.
02575       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
02576 
02577       unsigned BEAlign = 0;
02578       if (ArgSize < 8 && !Subtarget->isLittleEndian())
02579         BEAlign = 8 - ArgSize;
02580 
02581       Address Addr;
02582       Addr.setKind(Address::RegBase);
02583       Addr.setReg(AArch64::SP);
02584       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
02585 
02586       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
02587       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
02588         MachinePointerInfo::getStack(Addr.getOffset()),
02589         MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
02590 
02591       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
02592         return false;
02593     }
02594   }
02595   return true;
02596 }
02597 
02598 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
02599                                  unsigned NumBytes) {
02600   CallingConv::ID CC = CLI.CallConv;
02601 
02602   // Issue CALLSEQ_END
02603   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
02604   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
02605     .addImm(NumBytes).addImm(0);
02606 
02607   // Now the return value.
02608   if (RetVT != MVT::isVoid) {
02609     SmallVector<CCValAssign, 16> RVLocs;
02610     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
02611     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
02612 
02613     // Only handle a single return value.
02614     if (RVLocs.size() != 1)
02615       return false;
02616 
02617     // Copy all of the result registers out of their specified physreg.
02618     MVT CopyVT = RVLocs[0].getValVT();
02619     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
02620     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02621             TII.get(TargetOpcode::COPY), ResultReg)
02622         .addReg(RVLocs[0].getLocReg());
02623     CLI.InRegs.push_back(RVLocs[0].getLocReg());
02624 
02625     CLI.ResultReg = ResultReg;
02626     CLI.NumResultRegs = 1;
02627   }
02628 
02629   return true;
02630 }
02631 
02632 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
02633   CallingConv::ID CC  = CLI.CallConv;
02634   bool IsTailCall     = CLI.IsTailCall;
02635   bool IsVarArg       = CLI.IsVarArg;
02636   const Value *Callee = CLI.Callee;
02637   const char *SymName = CLI.SymName;
02638 
02639   if (!Callee && !SymName)
02640     return false;
02641 
02642   // Allow SelectionDAG isel to handle tail calls.
02643   if (IsTailCall)
02644     return false;
02645 
02646   CodeModel::Model CM = TM.getCodeModel();
02647   // Only support the small and large code model.
02648   if (CM != CodeModel::Small && CM != CodeModel::Large)
02649     return false;
02650 
02651   // FIXME: Add large code model support for ELF.
02652   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
02653     return false;
02654 
02655   // Let SDISel handle vararg functions.
02656   if (IsVarArg)
02657     return false;
02658 
02659   // FIXME: Only handle *simple* calls for now.
02660   MVT RetVT;
02661   if (CLI.RetTy->isVoidTy())
02662     RetVT = MVT::isVoid;
02663   else if (!isTypeLegal(CLI.RetTy, RetVT))
02664     return false;
02665 
02666   for (auto Flag : CLI.OutFlags)
02667     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
02668       return false;
02669 
02670   // Set up the argument vectors.
02671   SmallVector<MVT, 16> OutVTs;
02672   OutVTs.reserve(CLI.OutVals.size());
02673 
02674   for (auto *Val : CLI.OutVals) {
02675     MVT VT;
02676     if (!isTypeLegal(Val->getType(), VT) &&
02677         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
02678       return false;
02679 
02680     // We don't handle vector parameters yet.
02681     if (VT.isVector() || VT.getSizeInBits() > 64)
02682       return false;
02683 
02684     OutVTs.push_back(VT);
02685   }
02686 
02687   Address Addr;
02688   if (Callee && !computeCallAddress(Callee, Addr))
02689     return false;
02690 
02691   // Handle the arguments now that we've gotten them.
02692   unsigned NumBytes;
02693   if (!processCallArgs(CLI, OutVTs, NumBytes))
02694     return false;
02695 
02696   // Issue the call.
02697   MachineInstrBuilder MIB;
02698   if (CM == CodeModel::Small) {
02699     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
02700     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
02701     if (SymName)
02702       MIB.addExternalSymbol(SymName, 0);
02703     else if (Addr.getGlobalValue())
02704       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
02705     else if (Addr.getReg()) {
02706       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
02707       MIB.addReg(Reg);
02708     } else
02709       return false;
02710   } else {
02711     unsigned CallReg = 0;
02712     if (SymName) {
02713       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
02714       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
02715               ADRPReg)
02716         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
02717 
02718       CallReg = createResultReg(&AArch64::GPR64RegClass);
02719       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
02720               CallReg)
02721         .addReg(ADRPReg)
02722         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
02723                            AArch64II::MO_NC);
02724     } else if (Addr.getGlobalValue())
02725       CallReg = materializeGV(Addr.getGlobalValue());
02726     else if (Addr.getReg())
02727       CallReg = Addr.getReg();
02728 
02729     if (!CallReg)
02730       return false;
02731 
02732     const MCInstrDesc &II = TII.get(AArch64::BLR);
02733     CallReg = constrainOperandRegClass(II, CallReg, 0);
02734     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
02735   }
02736 
02737   // Add implicit physical register uses to the call.
02738   for (auto Reg : CLI.OutRegs)
02739     MIB.addReg(Reg, RegState::Implicit);
02740 
02741   // Add a register mask with the call-preserved registers.
02742   // Proper defs for return values will be added by setPhysRegsDeadExcept().
02743   MIB.addRegMask(TRI.getCallPreservedMask(CC));
02744 
02745   CLI.Call = MIB;
02746 
02747   // Finish off the call including any return values.
02748   return finishCall(CLI, RetVT, NumBytes);
02749 }
02750 
02751 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
02752   if (Alignment)
02753     return Len / Alignment <= 4;
02754   else
02755     return Len < 32;
02756 }
02757 
02758 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
02759                                          uint64_t Len, unsigned Alignment) {
02760   // Make sure we don't bloat code by inlining very large memcpy's.
02761   if (!isMemCpySmall(Len, Alignment))
02762     return false;
02763 
02764   int64_t UnscaledOffset = 0;
02765   Address OrigDest = Dest;
02766   Address OrigSrc = Src;
02767 
02768   while (Len) {
02769     MVT VT;
02770     if (!Alignment || Alignment >= 8) {
02771       if (Len >= 8)
02772         VT = MVT::i64;
02773       else if (Len >= 4)
02774         VT = MVT::i32;
02775       else if (Len >= 2)
02776         VT = MVT::i16;
02777       else {
02778         VT = MVT::i8;
02779       }
02780     } else {
02781       // Bound based on alignment.
02782       if (Len >= 4 && Alignment == 4)
02783         VT = MVT::i32;
02784       else if (Len >= 2 && Alignment == 2)
02785         VT = MVT::i16;
02786       else {
02787         VT = MVT::i8;
02788       }
02789     }
02790 
02791     bool RV;
02792     unsigned ResultReg;
02793     RV = emitLoad(VT, ResultReg, Src);
02794     if (!RV)
02795       return false;
02796 
02797     RV = emitStore(VT, ResultReg, Dest);
02798     if (!RV)
02799       return false;
02800 
02801     int64_t Size = VT.getSizeInBits() / 8;
02802     Len -= Size;
02803     UnscaledOffset += Size;
02804 
02805     // We need to recompute the unscaled offset for each iteration.
02806     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
02807     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
02808   }
02809 
02810   return true;
02811 }
02812 
02813 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
02814 /// into the user. The condition code will only be updated on success.
02815 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
02816                                         const Instruction *I,
02817                                         const Value *Cond) {
02818   if (!isa<ExtractValueInst>(Cond))
02819     return false;
02820 
02821   const auto *EV = cast<ExtractValueInst>(Cond);
02822   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
02823     return false;
02824 
02825   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
02826   MVT RetVT;
02827   const Function *Callee = II->getCalledFunction();
02828   Type *RetTy =
02829   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
02830   if (!isTypeLegal(RetTy, RetVT))
02831     return false;
02832 
02833   if (RetVT != MVT::i32 && RetVT != MVT::i64)
02834     return false;
02835 
02836   const Value *LHS = II->getArgOperand(0);
02837   const Value *RHS = II->getArgOperand(1);
02838 
02839   // Canonicalize immediate to the RHS.
02840   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
02841       isCommutativeIntrinsic(II))
02842     std::swap(LHS, RHS);
02843 
02844   // Simplify multiplies.
02845   unsigned IID = II->getIntrinsicID();
02846   switch (IID) {
02847   default:
02848     break;
02849   case Intrinsic::smul_with_overflow:
02850     if (const auto *C = dyn_cast<ConstantInt>(RHS))
02851       if (C->getValue() == 2)
02852         IID = Intrinsic::sadd_with_overflow;
02853     break;
02854   case Intrinsic::umul_with_overflow:
02855     if (const auto *C = dyn_cast<ConstantInt>(RHS))
02856       if (C->getValue() == 2)
02857         IID = Intrinsic::uadd_with_overflow;
02858     break;
02859   }
02860 
02861   AArch64CC::CondCode TmpCC;
02862   switch (IID) {
02863   default:
02864     return false;
02865   case Intrinsic::sadd_with_overflow:
02866   case Intrinsic::ssub_with_overflow:
02867     TmpCC = AArch64CC::VS;
02868     break;
02869   case Intrinsic::uadd_with_overflow:
02870     TmpCC = AArch64CC::HS;
02871     break;
02872   case Intrinsic::usub_with_overflow:
02873     TmpCC = AArch64CC::LO;
02874     break;
02875   case Intrinsic::smul_with_overflow:
02876   case Intrinsic::umul_with_overflow:
02877     TmpCC = AArch64CC::NE;
02878     break;
02879   }
02880 
02881   // Check if both instructions are in the same basic block.
02882   if (!isValueAvailable(II))
02883     return false;
02884 
02885   // Make sure nothing is in the way
02886   BasicBlock::const_iterator Start = I;
02887   BasicBlock::const_iterator End = II;
02888   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
02889     // We only expect extractvalue instructions between the intrinsic and the
02890     // instruction to be selected.
02891     if (!isa<ExtractValueInst>(Itr))
02892       return false;
02893 
02894     // Check that the extractvalue operand comes from the intrinsic.
02895     const auto *EVI = cast<ExtractValueInst>(Itr);
02896     if (EVI->getAggregateOperand() != II)
02897       return false;
02898   }
02899 
02900   CC = TmpCC;
02901   return true;
02902 }
02903 
02904 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
02905   // FIXME: Handle more intrinsics.
02906   switch (II->getIntrinsicID()) {
02907   default: return false;
02908   case Intrinsic::frameaddress: {
02909     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
02910     MFI->setFrameAddressIsTaken(true);
02911 
02912     const AArch64RegisterInfo *RegInfo =
02913         static_cast<const AArch64RegisterInfo *>(
02914             TM.getSubtargetImpl()->getRegisterInfo());
02915     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
02916     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
02917     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02918             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
02919     // Recursively load frame address
02920     // ldr x0, [fp]
02921     // ldr x0, [x0]
02922     // ldr x0, [x0]
02923     // ...
02924     unsigned DestReg;
02925     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
02926     while (Depth--) {
02927       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
02928                                 SrcReg, /*IsKill=*/true, 0);
02929       assert(DestReg && "Unexpected LDR instruction emission failure.");
02930       SrcReg = DestReg;
02931     }
02932 
02933     updateValueMap(II, SrcReg);
02934     return true;
02935   }
02936   case Intrinsic::memcpy:
02937   case Intrinsic::memmove: {
02938     const auto *MTI = cast<MemTransferInst>(II);
02939     // Don't handle volatile.
02940     if (MTI->isVolatile())
02941       return false;
02942 
02943     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
02944     // we would emit dead code because we don't currently handle memmoves.
02945     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
02946     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
02947       // Small memcpy's are common enough that we want to do them without a call
02948       // if possible.
02949       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
02950       unsigned Alignment = MTI->getAlignment();
02951       if (isMemCpySmall(Len, Alignment)) {
02952         Address Dest, Src;
02953         if (!computeAddress(MTI->getRawDest(), Dest) ||
02954             !computeAddress(MTI->getRawSource(), Src))
02955           return false;
02956         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
02957           return true;
02958       }
02959     }
02960 
02961     if (!MTI->getLength()->getType()->isIntegerTy(64))
02962       return false;
02963 
02964     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
02965       // Fast instruction selection doesn't support the special
02966       // address spaces.
02967       return false;
02968 
02969     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
02970     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
02971   }
02972   case Intrinsic::memset: {
02973     const MemSetInst *MSI = cast<MemSetInst>(II);
02974     // Don't handle volatile.
02975     if (MSI->isVolatile())
02976       return false;
02977 
02978     if (!MSI->getLength()->getType()->isIntegerTy(64))
02979       return false;
02980 
02981     if (MSI->getDestAddressSpace() > 255)
02982       // Fast instruction selection doesn't support the special
02983       // address spaces.
02984       return false;
02985 
02986     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
02987   }
02988   case Intrinsic::sin:
02989   case Intrinsic::cos:
02990   case Intrinsic::pow: {
02991     MVT RetVT;
02992     if (!isTypeLegal(II->getType(), RetVT))
02993       return false;
02994 
02995     if (RetVT != MVT::f32 && RetVT != MVT::f64)
02996       return false;
02997 
02998     static const RTLIB::Libcall LibCallTable[3][2] = {
02999       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
03000       { RTLIB::COS_F32, RTLIB::COS_F64 },
03001       { RTLIB::POW_F32, RTLIB::POW_F64 }
03002     };
03003     RTLIB::Libcall LC;
03004     bool Is64Bit = RetVT == MVT::f64;
03005     switch (II->getIntrinsicID()) {
03006     default:
03007       llvm_unreachable("Unexpected intrinsic.");
03008     case Intrinsic::sin:
03009       LC = LibCallTable[0][Is64Bit];
03010       break;
03011     case Intrinsic::cos:
03012       LC = LibCallTable[1][Is64Bit];
03013       break;
03014     case Intrinsic::pow:
03015       LC = LibCallTable[2][Is64Bit];
03016       break;
03017     }
03018 
03019     ArgListTy Args;
03020     Args.reserve(II->getNumArgOperands());
03021 
03022     // Populate the argument list.
03023     for (auto &Arg : II->arg_operands()) {
03024       ArgListEntry Entry;
03025       Entry.Val = Arg;
03026       Entry.Ty = Arg->getType();
03027       Args.push_back(Entry);
03028     }
03029 
03030     CallLoweringInfo CLI;
03031     CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(),
03032                   TLI.getLibcallName(LC), std::move(Args));
03033     if (!lowerCallTo(CLI))
03034       return false;
03035     updateValueMap(II, CLI.ResultReg);
03036     return true;
03037   }
03038   case Intrinsic::trap: {
03039     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
03040         .addImm(1);
03041     return true;
03042   }
03043   case Intrinsic::sqrt: {
03044     Type *RetTy = II->getCalledFunction()->getReturnType();
03045 
03046     MVT VT;
03047     if (!isTypeLegal(RetTy, VT))
03048       return false;
03049 
03050     unsigned Op0Reg = getRegForValue(II->getOperand(0));
03051     if (!Op0Reg)
03052       return false;
03053     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
03054 
03055     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
03056     if (!ResultReg)
03057       return false;
03058 
03059     updateValueMap(II, ResultReg);
03060     return true;
03061   }
03062   case Intrinsic::sadd_with_overflow:
03063   case Intrinsic::uadd_with_overflow:
03064   case Intrinsic::ssub_with_overflow:
03065   case Intrinsic::usub_with_overflow:
03066   case Intrinsic::smul_with_overflow:
03067   case Intrinsic::umul_with_overflow: {
03068     // This implements the basic lowering of the xalu with overflow intrinsics.
03069     const Function *Callee = II->getCalledFunction();
03070     auto *Ty = cast<StructType>(Callee->getReturnType());
03071     Type *RetTy = Ty->getTypeAtIndex(0U);
03072 
03073     MVT VT;
03074     if (!isTypeLegal(RetTy, VT))
03075       return false;
03076 
03077     if (VT != MVT::i32 && VT != MVT::i64)
03078       return false;
03079 
03080     const Value *LHS = II->getArgOperand(0);
03081     const Value *RHS = II->getArgOperand(1);
03082     // Canonicalize immediate to the RHS.
03083     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
03084         isCommutativeIntrinsic(II))
03085       std::swap(LHS, RHS);
03086 
03087     // Simplify multiplies.
03088     unsigned IID = II->getIntrinsicID();
03089     switch (IID) {
03090     default:
03091       break;
03092     case Intrinsic::smul_with_overflow:
03093       if (const auto *C = dyn_cast<ConstantInt>(RHS))
03094         if (C->getValue() == 2) {
03095           IID = Intrinsic::sadd_with_overflow;
03096           RHS = LHS;
03097         }
03098       break;
03099     case Intrinsic::umul_with_overflow:
03100       if (const auto *C = dyn_cast<ConstantInt>(RHS))
03101         if (C->getValue() == 2) {
03102           IID = Intrinsic::uadd_with_overflow;
03103           RHS = LHS;
03104         }
03105       break;
03106     }
03107 
03108     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
03109     AArch64CC::CondCode CC = AArch64CC::Invalid;
03110     switch (IID) {
03111     default: llvm_unreachable("Unexpected intrinsic!");
03112     case Intrinsic::sadd_with_overflow:
03113       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
03114       CC = AArch64CC::VS;
03115       break;
03116     case Intrinsic::uadd_with_overflow:
03117       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
03118       CC = AArch64CC::HS;
03119       break;
03120     case Intrinsic::ssub_with_overflow:
03121       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
03122       CC = AArch64CC::VS;
03123       break;
03124     case Intrinsic::usub_with_overflow:
03125       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
03126       CC = AArch64CC::LO;
03127       break;
03128     case Intrinsic::smul_with_overflow: {
03129       CC = AArch64CC::NE;
03130       unsigned LHSReg = getRegForValue(LHS);
03131       if (!LHSReg)
03132         return false;
03133       bool LHSIsKill = hasTrivialKill(LHS);
03134 
03135       unsigned RHSReg = getRegForValue(RHS);
03136       if (!RHSReg)
03137         return false;
03138       bool RHSIsKill = hasTrivialKill(RHS);
03139 
03140       if (VT == MVT::i32) {
03141         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
03142         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
03143                                        /*IsKill=*/false, 32);
03144         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
03145                                             AArch64::sub_32);
03146         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
03147                                               AArch64::sub_32);
03148         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
03149                     AArch64_AM::ASR, 31, /*WantResult=*/false);
03150       } else {
03151         assert(VT == MVT::i64 && "Unexpected value type.");
03152         MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
03153         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
03154                                         RHSReg, RHSIsKill);
03155         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
03156                     AArch64_AM::ASR, 63, /*WantResult=*/false);
03157       }
03158       break;
03159     }
03160     case Intrinsic::umul_with_overflow: {
03161       CC = AArch64CC::NE;
03162       unsigned LHSReg = getRegForValue(LHS);
03163       if (!LHSReg)
03164         return false;
03165       bool LHSIsKill = hasTrivialKill(LHS);
03166 
03167       unsigned RHSReg = getRegForValue(RHS);
03168       if (!RHSReg)
03169         return false;
03170       bool RHSIsKill = hasTrivialKill(RHS);
03171 
03172       if (VT == MVT::i32) {
03173         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
03174         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
03175                     /*IsKill=*/false, AArch64_AM::LSR, 32,
03176                     /*WantResult=*/false);
03177         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
03178                                             AArch64::sub_32);
03179       } else {
03180         assert(VT == MVT::i64 && "Unexpected value type.");
03181         MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
03182         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
03183                                         RHSReg, RHSIsKill);
03184         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
03185                     /*IsKill=*/false, /*WantResult=*/false);
03186       }
03187       break;
03188     }
03189     }
03190 
03191     if (MulReg) {
03192       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
03193       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03194               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
03195     }
03196 
03197     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
03198                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
03199                                   /*IsKill=*/true, getInvertedCondCode(CC));
03200     assert((ResultReg1 + 1) == ResultReg2 &&
03201            "Nonconsecutive result registers.");
03202     updateValueMap(II, ResultReg1, 2);
03203     return true;
03204   }
03205   }
03206   return false;
03207 }
03208 
03209 bool AArch64FastISel::selectRet(const Instruction *I) {
03210   const ReturnInst *Ret = cast<ReturnInst>(I);
03211   const Function &F = *I->getParent()->getParent();
03212 
03213   if (!FuncInfo.CanLowerReturn)
03214     return false;
03215 
03216   if (F.isVarArg())
03217     return false;
03218 
03219   // Build a list of return value registers.
03220   SmallVector<unsigned, 4> RetRegs;
03221 
03222   if (Ret->getNumOperands() > 0) {
03223     CallingConv::ID CC = F.getCallingConv();
03224     SmallVector<ISD::OutputArg, 4> Outs;
03225     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
03226 
03227     // Analyze operands of the call, assigning locations to each operand.
03228     SmallVector<CCValAssign, 16> ValLocs;
03229     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
03230     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
03231                                                      : RetCC_AArch64_AAPCS;
03232     CCInfo.AnalyzeReturn(Outs, RetCC);
03233 
03234     // Only handle a single return value for now.
03235     if (ValLocs.size() != 1)
03236       return false;
03237 
03238     CCValAssign &VA = ValLocs[0];
03239     const Value *RV = Ret->getOperand(0);
03240 
03241     // Don't bother handling odd stuff for now.
03242     if ((VA.getLocInfo() != CCValAssign::Full) &&
03243         (VA.getLocInfo() != CCValAssign::BCvt))
03244       return false;
03245 
03246     // Only handle register returns for now.
03247     if (!VA.isRegLoc())
03248       return false;
03249 
03250     unsigned Reg = getRegForValue(RV);
03251     if (Reg == 0)
03252       return false;
03253 
03254     unsigned SrcReg = Reg + VA.getValNo();
03255     unsigned DestReg = VA.getLocReg();
03256     // Avoid a cross-class copy. This is very unlikely.
03257     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
03258       return false;
03259 
03260     EVT RVEVT = TLI.getValueType(RV->getType());
03261     if (!RVEVT.isSimple())
03262       return false;
03263 
03264     // Vectors (of > 1 lane) in big endian need tricky handling.
03265     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
03266         !Subtarget->isLittleEndian())
03267       return false;
03268 
03269     MVT RVVT = RVEVT.getSimpleVT();
03270     if (RVVT == MVT::f128)
03271       return false;
03272 
03273     MVT DestVT = VA.getValVT();
03274     // Special handling for extended integers.
03275     if (RVVT != DestVT) {
03276       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
03277         return false;
03278 
03279       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
03280         return false;
03281 
03282       bool IsZExt = Outs[0].Flags.isZExt();
03283       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
03284       if (SrcReg == 0)
03285         return false;
03286     }
03287 
03288     // Make the copy.
03289     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03290             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
03291 
03292     // Add register to return instruction.
03293     RetRegs.push_back(VA.getLocReg());
03294   }
03295 
03296   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03297                                     TII.get(AArch64::RET_ReallyLR));
03298   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
03299     MIB.addReg(RetRegs[i], RegState::Implicit);
03300   return true;
03301 }
03302 
03303 bool AArch64FastISel::selectTrunc(const Instruction *I) {
03304   Type *DestTy = I->getType();
03305   Value *Op = I->getOperand(0);
03306   Type *SrcTy = Op->getType();
03307 
03308   EVT SrcEVT = TLI.getValueType(SrcTy, true);
03309   EVT DestEVT = TLI.getValueType(DestTy, true);
03310   if (!SrcEVT.isSimple())
03311     return false;
03312   if (!DestEVT.isSimple())
03313     return false;
03314 
03315   MVT SrcVT = SrcEVT.getSimpleVT();
03316   MVT DestVT = DestEVT.getSimpleVT();
03317 
03318   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
03319       SrcVT != MVT::i8)
03320     return false;
03321   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
03322       DestVT != MVT::i1)
03323     return false;
03324 
03325   unsigned SrcReg = getRegForValue(Op);
03326   if (!SrcReg)
03327     return false;
03328   bool SrcIsKill = hasTrivialKill(Op);
03329 
03330   // If we're truncating from i64 to a smaller non-legal type then generate an
03331   // AND. Otherwise, we know the high bits are undefined and a truncate only
03332   // generate a COPY. We cannot mark the source register also as result
03333   // register, because this can incorrectly transfer the kill flag onto the
03334   // source register.
03335   unsigned ResultReg;
03336   if (SrcVT == MVT::i64) {
03337     uint64_t Mask = 0;
03338     switch (DestVT.SimpleTy) {
03339     default:
03340       // Trunc i64 to i32 is handled by the target-independent fast-isel.
03341       return false;
03342     case MVT::i1:
03343       Mask = 0x1;
03344       break;
03345     case MVT::i8:
03346       Mask = 0xff;
03347       break;
03348     case MVT::i16:
03349       Mask = 0xffff;
03350       break;
03351     }
03352     // Issue an extract_subreg to get the lower 32-bits.
03353     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
03354                                                 AArch64::sub_32);
03355     // Create the AND instruction which performs the actual truncation.
03356     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
03357     assert(ResultReg && "Unexpected AND instruction emission failure.");
03358   } else {
03359     ResultReg = createResultReg(&AArch64::GPR32RegClass);
03360     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03361             TII.get(TargetOpcode::COPY), ResultReg)
03362         .addReg(SrcReg, getKillRegState(SrcIsKill));
03363   }
03364 
03365   updateValueMap(I, ResultReg);
03366   return true;
03367 }
03368 
03369 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
03370   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
03371           DestVT == MVT::i64) &&
03372          "Unexpected value type.");
03373   // Handle i8 and i16 as i32.
03374   if (DestVT == MVT::i8 || DestVT == MVT::i16)
03375     DestVT = MVT::i32;
03376 
03377   if (IsZExt) {
03378     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
03379     assert(ResultReg && "Unexpected AND instruction emission failure.");
03380     if (DestVT == MVT::i64) {
03381       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
03382       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
03383       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
03384       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03385               TII.get(AArch64::SUBREG_TO_REG), Reg64)
03386           .addImm(0)
03387           .addReg(ResultReg)
03388           .addImm(AArch64::sub_32);
03389       ResultReg = Reg64;
03390     }
03391     return ResultReg;
03392   } else {
03393     if (DestVT == MVT::i64) {
03394       // FIXME: We're SExt i1 to i64.
03395       return 0;
03396     }
03397     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
03398                             /*TODO:IsKill=*/false, 0, 0);
03399   }
03400 }
03401 
03402 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
03403                                       unsigned Op1, bool Op1IsKill) {
03404   unsigned Opc, ZReg;
03405   switch (RetVT.SimpleTy) {
03406   default: return 0;
03407   case MVT::i8:
03408   case MVT::i16:
03409   case MVT::i32:
03410     RetVT = MVT::i32;
03411     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
03412   case MVT::i64:
03413     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
03414   }
03415 
03416   const TargetRegisterClass *RC =
03417       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
03418   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
03419                           /*IsKill=*/ZReg, true);
03420 }
03421 
03422 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
03423                                         unsigned Op1, bool Op1IsKill) {
03424   if (RetVT != MVT::i64)
03425     return 0;
03426 
03427   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
03428                           Op0, Op0IsKill, Op1, Op1IsKill,
03429                           AArch64::XZR, /*IsKill=*/true);
03430 }
03431 
03432 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
03433                                         unsigned Op1, bool Op1IsKill) {
03434   if (RetVT != MVT::i64)
03435     return 0;
03436 
03437   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
03438                           Op0, Op0IsKill, Op1, Op1IsKill,
03439                           AArch64::XZR, /*IsKill=*/true);
03440 }
03441 
03442 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
03443                                      unsigned Op1Reg, bool Op1IsKill) {
03444   unsigned Opc = 0;
03445   bool NeedTrunc = false;
03446   uint64_t Mask = 0;
03447   switch (RetVT.SimpleTy) {
03448   default: return 0;
03449   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
03450   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
03451   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
03452   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
03453   }
03454 
03455   const TargetRegisterClass *RC =
03456       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
03457   if (NeedTrunc) {
03458     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
03459     Op1IsKill = true;
03460   }
03461   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
03462                                        Op1IsKill);
03463   if (NeedTrunc)
03464     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
03465   return ResultReg;
03466 }
03467 
03468 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
03469                                      bool Op0IsKill, uint64_t Shift,
03470                                      bool IsZext) {
03471   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
03472          "Unexpected source/return type pair.");
03473   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
03474           SrcVT == MVT::i64) && "Unexpected source value type.");
03475   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
03476           RetVT == MVT::i64) && "Unexpected return value type.");
03477 
03478   bool Is64Bit = (RetVT == MVT::i64);
03479   unsigned RegSize = Is64Bit ? 64 : 32;
03480   unsigned DstBits = RetVT.getSizeInBits();
03481   unsigned SrcBits = SrcVT.getSizeInBits();
03482 
03483   // Don't deal with undefined shifts.
03484   if (Shift >= DstBits)
03485     return 0;
03486 
03487   // For immediate shifts we can fold the zero-/sign-extension into the shift.
03488   // {S|U}BFM Wd, Wn, #r, #s
03489   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
03490 
03491   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
03492   // %2 = shl i16 %1, 4
03493   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
03494   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
03495   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
03496   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
03497 
03498   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
03499   // %2 = shl i16 %1, 8
03500   // Wd<32+7-24,32-24> = Wn<7:0>
03501   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
03502   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
03503   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
03504 
03505   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
03506   // %2 = shl i16 %1, 12
03507   // Wd<32+3-20,32-20> = Wn<3:0>
03508   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
03509   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
03510   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
03511 
03512   unsigned ImmR = RegSize - Shift;
03513   // Limit the width to the length of the source type.
03514   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
03515   static const unsigned OpcTable[2][2] = {
03516     {AArch64::SBFMWri, AArch64::SBFMXri},
03517     {AArch64::UBFMWri, AArch64::UBFMXri}
03518   };
03519   unsigned Opc = OpcTable[IsZext][Is64Bit];
03520   const TargetRegisterClass *RC =
03521       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
03522   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
03523     unsigned TmpReg = MRI.createVirtualRegister(RC);
03524     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03525             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
03526         .addImm(0)
03527         .addReg(Op0, getKillRegState(Op0IsKill))
03528         .addImm(AArch64::sub_32);
03529     Op0 = TmpReg;
03530     Op0IsKill = true;
03531   }
03532   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
03533 }
03534 
03535 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
03536                                      unsigned Op1Reg, bool Op1IsKill) {
03537   unsigned Opc = 0;
03538   bool NeedTrunc = false;
03539   uint64_t Mask = 0;
03540   switch (RetVT.SimpleTy) {
03541   default: return 0;
03542   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
03543   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
03544   case MVT::i32: Opc = AArch64::LSRVWr; break;
03545   case MVT::i64: Opc = AArch64::LSRVXr; break;
03546   }
03547 
03548   const TargetRegisterClass *RC =
03549       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
03550   if (NeedTrunc) {
03551     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
03552     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
03553     Op0IsKill = Op1IsKill = true;
03554   }
03555   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
03556                                        Op1IsKill);
03557   if (NeedTrunc)
03558     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
03559   return ResultReg;
03560 }
03561 
03562 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
03563                                      bool Op0IsKill, uint64_t Shift,
03564                                      bool IsZExt) {
03565   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
03566          "Unexpected source/return type pair.");
03567   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
03568           SrcVT == MVT::i64) && "Unexpected source value type.");
03569   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
03570           RetVT == MVT::i64) && "Unexpected return value type.");
03571 
03572   bool Is64Bit = (RetVT == MVT::i64);
03573   unsigned RegSize = Is64Bit ? 64 : 32;
03574   unsigned DstBits = RetVT.getSizeInBits();
03575   unsigned SrcBits = SrcVT.getSizeInBits();
03576 
03577   // Don't deal with undefined shifts.
03578   if (Shift >= DstBits)
03579     return 0;
03580 
03581   // For immediate shifts we can fold the zero-/sign-extension into the shift.
03582   // {S|U}BFM Wd, Wn, #r, #s
03583   // Wd<s-r:0> = Wn<s:r> when r <= s
03584 
03585   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
03586   // %2 = lshr i16 %1, 4
03587   // Wd<7-4:0> = Wn<7:4>
03588   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
03589   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
03590   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
03591 
03592   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
03593   // %2 = lshr i16 %1, 8
03594   // Wd<7-7,0> = Wn<7:7>
03595   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
03596   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
03597   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
03598 
03599   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
03600   // %2 = lshr i16 %1, 12
03601   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
03602   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
03603   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
03604   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
03605 
03606   if (Shift >= SrcBits && IsZExt)
03607     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
03608 
03609   // It is not possible to fold a sign-extend into the LShr instruction. In this
03610   // case emit a sign-extend.
03611   if (!IsZExt) {
03612     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
03613     if (!Op0)
03614       return 0;
03615     Op0IsKill = true;
03616     SrcVT = RetVT;
03617     SrcBits = SrcVT.getSizeInBits();
03618     IsZExt = true;
03619   }
03620 
03621   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
03622   unsigned ImmS = SrcBits - 1;
03623   static const unsigned OpcTable[2][2] = {
03624     {AArch64::SBFMWri, AArch64::SBFMXri},
03625     {AArch64::UBFMWri, AArch64::UBFMXri}
03626   };
03627   unsigned Opc = OpcTable[IsZExt][Is64Bit];
03628   const TargetRegisterClass *RC =
03629       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
03630   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
03631     unsigned TmpReg = MRI.createVirtualRegister(RC);
03632     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03633             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
03634         .addImm(0)
03635         .addReg(Op0, getKillRegState(Op0IsKill))
03636         .addImm(AArch64::sub_32);
03637     Op0 = TmpReg;
03638     Op0IsKill = true;
03639   }
03640   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
03641 }
03642 
03643 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
03644                                      unsigned Op1Reg, bool Op1IsKill) {
03645   unsigned Opc = 0;
03646   bool NeedTrunc = false;
03647   uint64_t Mask = 0;
03648   switch (RetVT.SimpleTy) {
03649   default: return 0;
03650   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
03651   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
03652   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
03653   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
03654   }
03655 
03656   const TargetRegisterClass *RC =
03657       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
03658   if (NeedTrunc) {
03659     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
03660     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
03661     Op0IsKill = Op1IsKill = true;
03662   }
03663   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
03664                                        Op1IsKill);
03665   if (NeedTrunc)
03666     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
03667   return ResultReg;
03668 }
03669 
03670 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
03671                                      bool Op0IsKill, uint64_t Shift,
03672                                      bool IsZExt) {
03673   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
03674          "Unexpected source/return type pair.");
03675   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
03676           SrcVT == MVT::i64) && "Unexpected source value type.");
03677   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
03678           RetVT == MVT::i64) && "Unexpected return value type.");
03679 
03680   bool Is64Bit = (RetVT == MVT::i64);
03681   unsigned RegSize = Is64Bit ? 64 : 32;
03682   unsigned DstBits = RetVT.getSizeInBits();
03683   unsigned SrcBits = SrcVT.getSizeInBits();
03684 
03685   // Don't deal with undefined shifts.
03686   if (Shift >= DstBits)
03687     return 0;
03688 
03689   // For immediate shifts we can fold the zero-/sign-extension into the shift.
03690   // {S|U}BFM Wd, Wn, #r, #s
03691   // Wd<s-r:0> = Wn<s:r> when r <= s
03692 
03693   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
03694   // %2 = ashr i16 %1, 4
03695   // Wd<7-4:0> = Wn<7:4>
03696   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
03697   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
03698   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
03699 
03700   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
03701   // %2 = ashr i16 %1, 8
03702   // Wd<7-7,0> = Wn<7:7>
03703   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
03704   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
03705   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
03706 
03707   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
03708   // %2 = ashr i16 %1, 12
03709   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
03710   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
03711   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
03712   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
03713 
03714   if (Shift >= SrcBits && IsZExt)
03715     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
03716 
03717   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
03718   unsigned ImmS = SrcBits - 1;
03719   static const unsigned OpcTable[2][2] = {
03720     {AArch64::SBFMWri, AArch64::SBFMXri},
03721     {AArch64::UBFMWri, AArch64::UBFMXri}
03722   };
03723   unsigned Opc = OpcTable[IsZExt][Is64Bit];
03724   const TargetRegisterClass *RC =
03725       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
03726   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
03727     unsigned TmpReg = MRI.createVirtualRegister(RC);
03728     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03729             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
03730         .addImm(0)
03731         .addReg(Op0, getKillRegState(Op0IsKill))
03732         .addImm(AArch64::sub_32);
03733     Op0 = TmpReg;
03734     Op0IsKill = true;
03735   }
03736   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
03737 }
03738 
03739 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
03740                                      bool IsZExt) {
03741   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
03742 
03743   // FastISel does not have plumbing to deal with extensions where the SrcVT or
03744   // DestVT are odd things, so test to make sure that they are both types we can
03745   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
03746   // bail out to SelectionDAG.
03747   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
03748        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
03749       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
03750        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
03751     return 0;
03752 
03753   unsigned Opc;
03754   unsigned Imm = 0;
03755 
03756   switch (SrcVT.SimpleTy) {
03757   default:
03758     return 0;
03759   case MVT::i1:
03760     return emiti1Ext(SrcReg, DestVT, IsZExt);
03761   case MVT::i8:
03762     if (DestVT == MVT::i64)
03763       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
03764     else
03765       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
03766     Imm = 7;
03767     break;
03768   case MVT::i16:
03769     if (DestVT == MVT::i64)
03770       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
03771     else
03772       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
03773     Imm = 15;
03774     break;
03775   case MVT::i32:
03776     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
03777     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
03778     Imm = 31;
03779     break;
03780   }
03781 
03782   // Handle i8 and i16 as i32.
03783   if (DestVT == MVT::i8 || DestVT == MVT::i16)
03784     DestVT = MVT::i32;
03785   else if (DestVT == MVT::i64) {
03786     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
03787     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03788             TII.get(AArch64::SUBREG_TO_REG), Src64)
03789         .addImm(0)
03790         .addReg(SrcReg)
03791         .addImm(AArch64::sub_32);
03792     SrcReg = Src64;
03793   }
03794 
03795   const TargetRegisterClass *RC =
03796       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
03797   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
03798 }
03799 
03800 bool AArch64FastISel::selectIntExt(const Instruction *I) {
03801   // On ARM, in general, integer casts don't involve legal types; this code
03802   // handles promotable integers.  The high bits for a type smaller than
03803   // the register size are assumed to be undefined.
03804   Type *DestTy = I->getType();
03805   Value *Src = I->getOperand(0);
03806   Type *SrcTy = Src->getType();
03807 
03808   unsigned SrcReg = getRegForValue(Src);
03809   if (!SrcReg)
03810     return false;
03811 
03812   EVT SrcEVT = TLI.getValueType(SrcTy, true);
03813   EVT DestEVT = TLI.getValueType(DestTy, true);
03814   if (!SrcEVT.isSimple())
03815     return false;
03816   if (!DestEVT.isSimple())
03817     return false;
03818 
03819   MVT SrcVT = SrcEVT.getSimpleVT();
03820   MVT DestVT = DestEVT.getSimpleVT();
03821   unsigned ResultReg = 0;
03822 
03823   bool IsZExt = isa<ZExtInst>(I);
03824   // Check if it is an argument and if it is already zero/sign-extended.
03825   if (const auto *Arg = dyn_cast<Argument>(Src)) {
03826     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
03827       if (DestVT == MVT::i64) {
03828         ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
03829         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03830                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
03831           .addImm(0)
03832           .addReg(SrcReg)
03833           .addImm(AArch64::sub_32);
03834       } else
03835         ResultReg = SrcReg;
03836     }
03837   }
03838 
03839   if (!ResultReg)
03840     ResultReg = emitIntExt(SrcVT, SrcReg, DestVT, IsZExt);
03841 
03842   if (!ResultReg)
03843     return false;
03844 
03845   updateValueMap(I, ResultReg);
03846   return true;
03847 }
03848 
03849 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
03850   EVT DestEVT = TLI.getValueType(I->getType(), true);
03851   if (!DestEVT.isSimple())
03852     return false;
03853 
03854   MVT DestVT = DestEVT.getSimpleVT();
03855   if (DestVT != MVT::i64 && DestVT != MVT::i32)
03856     return false;
03857 
03858   unsigned DivOpc;
03859   bool Is64bit = (DestVT == MVT::i64);
03860   switch (ISDOpcode) {
03861   default:
03862     return false;
03863   case ISD::SREM:
03864     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
03865     break;
03866   case ISD::UREM:
03867     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
03868     break;
03869   }
03870   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
03871   unsigned Src0Reg = getRegForValue(I->getOperand(0));
03872   if (!Src0Reg)
03873     return false;
03874   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
03875 
03876   unsigned Src1Reg = getRegForValue(I->getOperand(1));
03877   if (!Src1Reg)
03878     return false;
03879   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
03880 
03881   const TargetRegisterClass *RC =
03882       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
03883   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
03884                                      Src1Reg, /*IsKill=*/false);
03885   assert(QuotReg && "Unexpected DIV instruction emission failure.");
03886   // The remainder is computed as numerator - (quotient * denominator) using the
03887   // MSUB instruction.
03888   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
03889                                         Src1Reg, Src1IsKill, Src0Reg,
03890                                         Src0IsKill);
03891   updateValueMap(I, ResultReg);
03892   return true;
03893 }
03894 
03895 bool AArch64FastISel::selectMul(const Instruction *I) {
03896   MVT VT;
03897   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
03898     return false;
03899 
03900   if (VT.isVector())
03901     return selectBinaryOp(I, ISD::MUL);
03902 
03903   const Value *Src0 = I->getOperand(0);
03904   const Value *Src1 = I->getOperand(1);
03905   if (const auto *C = dyn_cast<ConstantInt>(Src0))
03906     if (C->getValue().isPowerOf2())
03907       std::swap(Src0, Src1);
03908 
03909   // Try to simplify to a shift instruction.
03910   if (const auto *C = dyn_cast<ConstantInt>(Src1))
03911     if (C->getValue().isPowerOf2()) {
03912       uint64_t ShiftVal = C->getValue().logBase2();
03913       MVT SrcVT = VT;
03914       bool IsZExt = true;
03915       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
03916         MVT VT;
03917         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
03918           SrcVT = VT;
03919           IsZExt = true;
03920           Src0 = ZExt->getOperand(0);
03921         }
03922       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
03923         MVT VT;
03924         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
03925           SrcVT = VT;
03926           IsZExt = false;
03927           Src0 = SExt->getOperand(0);
03928         }
03929       }
03930 
03931       unsigned Src0Reg = getRegForValue(Src0);
03932       if (!Src0Reg)
03933         return false;
03934       bool Src0IsKill = hasTrivialKill(Src0);
03935 
03936       unsigned ResultReg =
03937           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
03938 
03939       if (ResultReg) {
03940         updateValueMap(I, ResultReg);
03941         return true;
03942       }
03943     }
03944 
03945   unsigned Src0Reg = getRegForValue(I->getOperand(0));
03946   if (!Src0Reg)
03947     return false;
03948   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
03949 
03950   unsigned Src1Reg = getRegForValue(I->getOperand(1));
03951   if (!Src1Reg)
03952     return false;
03953   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
03954 
03955   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
03956 
03957   if (!ResultReg)
03958     return false;
03959 
03960   updateValueMap(I, ResultReg);
03961   return true;
03962 }
03963 
03964 bool AArch64FastISel::selectShift(const Instruction *I) {
03965   MVT RetVT;
03966   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
03967     return false;
03968 
03969   if (RetVT.isVector())
03970     return selectOperator(I, I->getOpcode());
03971 
03972   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
03973     unsigned ResultReg = 0;
03974     uint64_t ShiftVal = C->getZExtValue();
03975     MVT SrcVT = RetVT;
03976     bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
03977     const Value *Op0 = I->getOperand(0);
03978     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
03979       MVT TmpVT;
03980       if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
03981         SrcVT = TmpVT;
03982         IsZExt = true;
03983         Op0 = ZExt->getOperand(0);
03984       }
03985     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
03986       MVT TmpVT;
03987       if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
03988         SrcVT = TmpVT;
03989         IsZExt = false;
03990         Op0 = SExt->getOperand(0);
03991       }
03992     }
03993 
03994     unsigned Op0Reg = getRegForValue(Op0);
03995     if (!Op0Reg)
03996       return false;
03997     bool Op0IsKill = hasTrivialKill(Op0);
03998 
03999     switch (I->getOpcode()) {
04000     default: llvm_unreachable("Unexpected instruction.");
04001     case Instruction::Shl:
04002       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
04003       break;
04004     case Instruction::AShr:
04005       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
04006       break;
04007     case Instruction::LShr:
04008       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
04009       break;
04010     }
04011     if (!ResultReg)
04012       return false;
04013 
04014     updateValueMap(I, ResultReg);
04015     return true;
04016   }
04017 
04018   unsigned Op0Reg = getRegForValue(I->getOperand(0));
04019   if (!Op0Reg)
04020     return false;
04021   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
04022 
04023   unsigned Op1Reg = getRegForValue(I->getOperand(1));
04024   if (!Op1Reg)
04025     return false;
04026   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
04027 
04028   unsigned ResultReg = 0;
04029   switch (I->getOpcode()) {
04030   default: llvm_unreachable("Unexpected instruction.");
04031   case Instruction::Shl:
04032     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
04033     break;
04034   case Instruction::AShr:
04035     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
04036     break;
04037   case Instruction::LShr:
04038     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
04039     break;
04040   }
04041 
04042   if (!ResultReg)
04043     return false;
04044 
04045   updateValueMap(I, ResultReg);
04046   return true;
04047 }
04048 
04049 bool AArch64FastISel::selectBitCast(const Instruction *I) {
04050   MVT RetVT, SrcVT;
04051 
04052   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
04053     return false;
04054   if (!isTypeLegal(I->getType(), RetVT))
04055     return false;
04056 
04057   unsigned Opc;
04058   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
04059     Opc = AArch64::FMOVWSr;
04060   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
04061     Opc = AArch64::FMOVXDr;
04062   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
04063     Opc = AArch64::FMOVSWr;
04064   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
04065     Opc = AArch64::FMOVDXr;
04066   else
04067     return false;
04068 
04069   const TargetRegisterClass *RC = nullptr;
04070   switch (RetVT.SimpleTy) {
04071   default: llvm_unreachable("Unexpected value type.");
04072   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
04073   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
04074   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
04075   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
04076   }
04077   unsigned Op0Reg = getRegForValue(I->getOperand(0));
04078   if (!Op0Reg)
04079     return false;
04080   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
04081   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
04082 
04083   if (!ResultReg)
04084     return false;
04085 
04086   updateValueMap(I, ResultReg);
04087   return true;
04088 }
04089 
04090 bool AArch64FastISel::selectFRem(const Instruction *I) {
04091   MVT RetVT;
04092   if (!isTypeLegal(I->getType(), RetVT))
04093     return false;
04094 
04095   RTLIB::Libcall LC;
04096   switch (RetVT.SimpleTy) {
04097   default:
04098     return false;
04099   case MVT::f32:
04100     LC = RTLIB::REM_F32;
04101     break;
04102   case MVT::f64:
04103     LC = RTLIB::REM_F64;
04104     break;
04105   }
04106 
04107   ArgListTy Args;
04108   Args.reserve(I->getNumOperands());
04109 
04110   // Populate the argument list.
04111   for (auto &Arg : I->operands()) {
04112     ArgListEntry Entry;
04113     Entry.Val = Arg;
04114     Entry.Ty = Arg->getType();
04115     Args.push_back(Entry);
04116   }
04117 
04118   CallLoweringInfo CLI;
04119   CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(),
04120                 TLI.getLibcallName(LC), std::move(Args));
04121   if (!lowerCallTo(CLI))
04122     return false;
04123   updateValueMap(I, CLI.ResultReg);
04124   return true;
04125 }
04126 
04127 bool AArch64FastISel::selectSDiv(const Instruction *I) {
04128   MVT VT;
04129   if (!isTypeLegal(I->getType(), VT))
04130     return false;
04131 
04132   if (!isa<ConstantInt>(I->getOperand(1)))
04133     return selectBinaryOp(I, ISD::SDIV);
04134 
04135   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
04136   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
04137       !(C.isPowerOf2() || (-C).isPowerOf2()))
04138     return selectBinaryOp(I, ISD::SDIV);
04139 
04140   unsigned Lg2 = C.countTrailingZeros();
04141   unsigned Src0Reg = getRegForValue(I->getOperand(0));
04142   if (!Src0Reg)
04143     return false;
04144   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
04145 
04146   if (cast<BinaryOperator>(I)->isExact()) {
04147     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
04148     if (!ResultReg)
04149       return false;
04150     updateValueMap(I, ResultReg);
04151     return true;
04152   }
04153 
04154   unsigned Pow2MinusOne = (1 << Lg2) - 1;
04155   unsigned AddReg = emitAddSub_ri(/*UseAdd=*/true, VT, Src0Reg,
04156                                   /*IsKill=*/false, Pow2MinusOne);
04157   if (!AddReg)
04158     return false;
04159 
04160   // (Src0 < 0) ? Pow2 - 1 : 0;
04161   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
04162     return false;
04163 
04164   unsigned SelectOpc;
04165   const TargetRegisterClass *RC;
04166   if (VT == MVT::i64) {
04167     SelectOpc = AArch64::CSELXr;
04168     RC = &AArch64::GPR64RegClass;
04169   } else {
04170     SelectOpc = AArch64::CSELWr;
04171     RC = &AArch64::GPR32RegClass;
04172   }
04173   unsigned SelectReg =
04174       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
04175                        Src0IsKill, AArch64CC::LT);
04176   if (!SelectReg)
04177     return false;
04178 
04179   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
04180   // negate the result.
04181   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
04182   unsigned ResultReg;
04183   if (C.isNegative())
04184     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
04185                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
04186   else
04187     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
04188 
04189   if (!ResultReg)
04190     return false;
04191 
04192   updateValueMap(I, ResultReg);
04193   return true;
04194 }
04195 
04196 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
04197   switch (I->getOpcode()) {
04198   default:
04199     break;
04200   case Instruction::Add:
04201   case Instruction::Sub:
04202     return selectAddSub(I);
04203   case Instruction::Mul:
04204     return selectMul(I);
04205   case Instruction::SDiv:
04206     return selectSDiv(I);
04207   case Instruction::SRem:
04208     if (!selectBinaryOp(I, ISD::SREM))
04209       return selectRem(I, ISD::SREM);
04210     return true;
04211   case Instruction::URem:
04212     if (!selectBinaryOp(I, ISD::UREM))
04213       return selectRem(I, ISD::UREM);
04214     return true;
04215   case Instruction::Shl:
04216   case Instruction::LShr:
04217   case Instruction::AShr:
04218     return selectShift(I);
04219   case Instruction::And:
04220   case Instruction::Or:
04221   case Instruction::Xor:
04222     return selectLogicalOp(I);
04223   case Instruction::Br:
04224     return selectBranch(I);
04225   case Instruction::IndirectBr:
04226     return selectIndirectBr(I);
04227   case Instruction::BitCast:
04228     if (!FastISel::selectBitCast(I))
04229       return selectBitCast(I);
04230     return true;
04231   case Instruction::FPToSI:
04232     if (!selectCast(I, ISD::FP_TO_SINT))
04233       return selectFPToInt(I, /*Signed=*/true);
04234     return true;
04235   case Instruction::FPToUI:
04236     return selectFPToInt(I, /*Signed=*/false);
04237   case Instruction::ZExt:
04238     if (!selectCast(I, ISD::ZERO_EXTEND))
04239       return selectIntExt(I);
04240     return true;
04241   case Instruction::SExt:
04242     if (!selectCast(I, ISD::SIGN_EXTEND))
04243       return selectIntExt(I);
04244     return true;
04245   case Instruction::Trunc:
04246     if (!selectCast(I, ISD::TRUNCATE))
04247       return selectTrunc(I);
04248     return true;
04249   case Instruction::FPExt:
04250     return selectFPExt(I);
04251   case Instruction::FPTrunc:
04252     return selectFPTrunc(I);
04253   case Instruction::SIToFP:
04254     if (!selectCast(I, ISD::SINT_TO_FP))
04255       return selectIntToFP(I, /*Signed=*/true);
04256     return true;
04257   case Instruction::UIToFP:
04258     return selectIntToFP(I, /*Signed=*/false);
04259   case Instruction::Load:
04260     return selectLoad(I);
04261   case Instruction::Store:
04262     return selectStore(I);
04263   case Instruction::FCmp:
04264   case Instruction::ICmp:
04265     return selectCmp(I);
04266   case Instruction::Select:
04267     return selectSelect(I);
04268   case Instruction::Ret:
04269     return selectRet(I);
04270   case Instruction::FRem:
04271     return selectFRem(I);
04272   }
04273 
04274   // fall-back to target-independent instruction selection.
04275   return selectOperator(I, I->getOpcode());
04276   // Silence warnings.
04277   (void)&CC_AArch64_DarwinPCS_VarArg;
04278 }
04279 
04280 namespace llvm {
04281 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
04282                                         const TargetLibraryInfo *LibInfo) {
04283   return new AArch64FastISel(FuncInfo, LibInfo);
04284 }
04285 }