LLVM API Documentation

X86FastISel.cpp
Go to the documentation of this file.
00001 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines the X86-specific support for the FastISel class. Much
00011 // of the target-specific code is generated by tablegen in the file
00012 // X86GenFastISel.inc, which is #included here.
00013 //
00014 //===----------------------------------------------------------------------===//
00015 
00016 #include "X86.h"
00017 #include "X86CallingConv.h"
00018 #include "X86InstrBuilder.h"
00019 #include "X86InstrInfo.h"
00020 #include "X86MachineFunctionInfo.h"
00021 #include "X86RegisterInfo.h"
00022 #include "X86Subtarget.h"
00023 #include "X86TargetMachine.h"
00024 #include "llvm/Analysis/BranchProbabilityInfo.h"
00025 #include "llvm/CodeGen/Analysis.h"
00026 #include "llvm/CodeGen/FastISel.h"
00027 #include "llvm/CodeGen/FunctionLoweringInfo.h"
00028 #include "llvm/CodeGen/MachineConstantPool.h"
00029 #include "llvm/CodeGen/MachineFrameInfo.h"
00030 #include "llvm/CodeGen/MachineRegisterInfo.h"
00031 #include "llvm/IR/CallSite.h"
00032 #include "llvm/IR/CallingConv.h"
00033 #include "llvm/IR/DerivedTypes.h"
00034 #include "llvm/IR/GetElementPtrTypeIterator.h"
00035 #include "llvm/IR/GlobalAlias.h"
00036 #include "llvm/IR/GlobalVariable.h"
00037 #include "llvm/IR/Instructions.h"
00038 #include "llvm/IR/IntrinsicInst.h"
00039 #include "llvm/IR/Operator.h"
00040 #include "llvm/Support/ErrorHandling.h"
00041 #include "llvm/Target/TargetOptions.h"
00042 using namespace llvm;
00043 
00044 namespace {
00045 
00046 class X86FastISel final : public FastISel {
00047   /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
00048   /// make the right decision when generating code for different targets.
00049   const X86Subtarget *Subtarget;
00050 
00051   /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
00052   /// floating point ops.
00053   /// When SSE is available, use it for f32 operations.
00054   /// When SSE2 is available, use it for f64 operations.
00055   bool X86ScalarSSEf64;
00056   bool X86ScalarSSEf32;
00057 
00058 public:
00059   explicit X86FastISel(FunctionLoweringInfo &funcInfo,
00060                        const TargetLibraryInfo *libInfo)
00061     : FastISel(funcInfo, libInfo) {
00062     Subtarget = &TM.getSubtarget<X86Subtarget>();
00063     X86ScalarSSEf64 = Subtarget->hasSSE2();
00064     X86ScalarSSEf32 = Subtarget->hasSSE1();
00065   }
00066 
00067   bool fastSelectInstruction(const Instruction *I) override;
00068 
00069   /// \brief The specified machine instr operand is a vreg, and that
00070   /// vreg is being provided by the specified load instruction.  If possible,
00071   /// try to fold the load as an operand to the instruction, returning true if
00072   /// possible.
00073   bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
00074                            const LoadInst *LI) override;
00075 
00076   bool fastLowerArguments() override;
00077   bool fastLowerCall(CallLoweringInfo &CLI) override;
00078   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
00079 
00080 #include "X86GenFastISel.inc"
00081 
00082 private:
00083   bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
00084 
00085   bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO,
00086                        unsigned &ResultReg);
00087 
00088   bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
00089                         MachineMemOperand *MMO = nullptr, bool Aligned = false);
00090   bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
00091                         const X86AddressMode &AM,
00092                         MachineMemOperand *MMO = nullptr, bool Aligned = false);
00093 
00094   bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
00095                          unsigned &ResultReg);
00096 
00097   bool X86SelectAddress(const Value *V, X86AddressMode &AM);
00098   bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
00099 
00100   bool X86SelectLoad(const Instruction *I);
00101 
00102   bool X86SelectStore(const Instruction *I);
00103 
00104   bool X86SelectRet(const Instruction *I);
00105 
00106   bool X86SelectCmp(const Instruction *I);
00107 
00108   bool X86SelectZExt(const Instruction *I);
00109 
00110   bool X86SelectBranch(const Instruction *I);
00111 
00112   bool X86SelectShift(const Instruction *I);
00113 
00114   bool X86SelectDivRem(const Instruction *I);
00115 
00116   bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
00117 
00118   bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
00119 
00120   bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
00121 
00122   bool X86SelectSelect(const Instruction *I);
00123 
00124   bool X86SelectTrunc(const Instruction *I);
00125 
00126   bool X86SelectFPExt(const Instruction *I);
00127   bool X86SelectFPTrunc(const Instruction *I);
00128 
00129   const X86InstrInfo *getInstrInfo() const {
00130     return getTargetMachine()->getSubtargetImpl()->getInstrInfo();
00131   }
00132   const X86TargetMachine *getTargetMachine() const {
00133     return static_cast<const X86TargetMachine *>(&TM);
00134   }
00135 
00136   bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
00137 
00138   unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
00139   unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
00140   unsigned X86MaterializeGV(const GlobalValue *GV,MVT VT);
00141   unsigned fastMaterializeConstant(const Constant *C) override;
00142 
00143   unsigned fastMaterializeAlloca(const AllocaInst *C) override;
00144 
00145   unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
00146 
00147   /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
00148   /// computed in an SSE register, not on the X87 floating point stack.
00149   bool isScalarFPTypeInSSEReg(EVT VT) const {
00150     return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
00151       (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
00152   }
00153 
00154   bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
00155 
00156   bool IsMemcpySmall(uint64_t Len);
00157 
00158   bool TryEmitSmallMemcpy(X86AddressMode DestAM,
00159                           X86AddressMode SrcAM, uint64_t Len);
00160 
00161   bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
00162                             const Value *Cond);
00163 };
00164 
00165 } // end anonymous namespace.
00166 
00167 static std::pair<X86::CondCode, bool>
00168 getX86ConditionCode(CmpInst::Predicate Predicate) {
00169   X86::CondCode CC = X86::COND_INVALID;
00170   bool NeedSwap = false;
00171   switch (Predicate) {
00172   default: break;
00173   // Floating-point Predicates
00174   case CmpInst::FCMP_UEQ: CC = X86::COND_E;       break;
00175   case CmpInst::FCMP_OLT: NeedSwap = true; // fall-through
00176   case CmpInst::FCMP_OGT: CC = X86::COND_A;       break;
00177   case CmpInst::FCMP_OLE: NeedSwap = true; // fall-through
00178   case CmpInst::FCMP_OGE: CC = X86::COND_AE;      break;
00179   case CmpInst::FCMP_UGT: NeedSwap = true; // fall-through
00180   case CmpInst::FCMP_ULT: CC = X86::COND_B;       break;
00181   case CmpInst::FCMP_UGE: NeedSwap = true; // fall-through
00182   case CmpInst::FCMP_ULE: CC = X86::COND_BE;      break;
00183   case CmpInst::FCMP_ONE: CC = X86::COND_NE;      break;
00184   case CmpInst::FCMP_UNO: CC = X86::COND_P;       break;
00185   case CmpInst::FCMP_ORD: CC = X86::COND_NP;      break;
00186   case CmpInst::FCMP_OEQ: // fall-through
00187   case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break;
00188 
00189   // Integer Predicates
00190   case CmpInst::ICMP_EQ:  CC = X86::COND_E;       break;
00191   case CmpInst::ICMP_NE:  CC = X86::COND_NE;      break;
00192   case CmpInst::ICMP_UGT: CC = X86::COND_A;       break;
00193   case CmpInst::ICMP_UGE: CC = X86::COND_AE;      break;
00194   case CmpInst::ICMP_ULT: CC = X86::COND_B;       break;
00195   case CmpInst::ICMP_ULE: CC = X86::COND_BE;      break;
00196   case CmpInst::ICMP_SGT: CC = X86::COND_G;       break;
00197   case CmpInst::ICMP_SGE: CC = X86::COND_GE;      break;
00198   case CmpInst::ICMP_SLT: CC = X86::COND_L;       break;
00199   case CmpInst::ICMP_SLE: CC = X86::COND_LE;      break;
00200   }
00201 
00202   return std::make_pair(CC, NeedSwap);
00203 }
00204 
00205 static std::pair<unsigned, bool>
00206 getX86SSEConditionCode(CmpInst::Predicate Predicate) {
00207   unsigned CC;
00208   bool NeedSwap = false;
00209 
00210   // SSE Condition code mapping:
00211   //  0 - EQ
00212   //  1 - LT
00213   //  2 - LE
00214   //  3 - UNORD
00215   //  4 - NEQ
00216   //  5 - NLT
00217   //  6 - NLE
00218   //  7 - ORD
00219   switch (Predicate) {
00220   default: llvm_unreachable("Unexpected predicate");
00221   case CmpInst::FCMP_OEQ: CC = 0;          break;
00222   case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through
00223   case CmpInst::FCMP_OLT: CC = 1;          break;
00224   case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through
00225   case CmpInst::FCMP_OLE: CC = 2;          break;
00226   case CmpInst::FCMP_UNO: CC = 3;          break;
00227   case CmpInst::FCMP_UNE: CC = 4;          break;
00228   case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through
00229   case CmpInst::FCMP_UGE: CC = 5;          break;
00230   case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through
00231   case CmpInst::FCMP_UGT: CC = 6;          break;
00232   case CmpInst::FCMP_ORD: CC = 7;          break;
00233   case CmpInst::FCMP_UEQ:
00234   case CmpInst::FCMP_ONE: CC = 8;          break;
00235   }
00236 
00237   return std::make_pair(CC, NeedSwap);
00238 }
00239 
00240 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
00241 /// into the user. The condition code will only be updated on success.
00242 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
00243                                        const Value *Cond) {
00244   if (!isa<ExtractValueInst>(Cond))
00245     return false;
00246 
00247   const auto *EV = cast<ExtractValueInst>(Cond);
00248   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
00249     return false;
00250 
00251   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
00252   MVT RetVT;
00253   const Function *Callee = II->getCalledFunction();
00254   Type *RetTy =
00255     cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
00256   if (!isTypeLegal(RetTy, RetVT))
00257     return false;
00258 
00259   if (RetVT != MVT::i32 && RetVT != MVT::i64)
00260     return false;
00261 
00262   X86::CondCode TmpCC;
00263   switch (II->getIntrinsicID()) {
00264   default: return false;
00265   case Intrinsic::sadd_with_overflow:
00266   case Intrinsic::ssub_with_overflow:
00267   case Intrinsic::smul_with_overflow:
00268   case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
00269   case Intrinsic::uadd_with_overflow:
00270   case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
00271   }
00272 
00273   // Check if both instructions are in the same basic block.
00274   if (II->getParent() != I->getParent())
00275     return false;
00276 
00277   // Make sure nothing is in the way
00278   BasicBlock::const_iterator Start = I;
00279   BasicBlock::const_iterator End = II;
00280   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
00281     // We only expect extractvalue instructions between the intrinsic and the
00282     // instruction to be selected.
00283     if (!isa<ExtractValueInst>(Itr))
00284       return false;
00285 
00286     // Check that the extractvalue operand comes from the intrinsic.
00287     const auto *EVI = cast<ExtractValueInst>(Itr);
00288     if (EVI->getAggregateOperand() != II)
00289       return false;
00290   }
00291 
00292   CC = TmpCC;
00293   return true;
00294 }
00295 
00296 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
00297   EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
00298   if (evt == MVT::Other || !evt.isSimple())
00299     // Unhandled type. Halt "fast" selection and bail.
00300     return false;
00301 
00302   VT = evt.getSimpleVT();
00303   // For now, require SSE/SSE2 for performing floating-point operations,
00304   // since x87 requires additional work.
00305   if (VT == MVT::f64 && !X86ScalarSSEf64)
00306     return false;
00307   if (VT == MVT::f32 && !X86ScalarSSEf32)
00308     return false;
00309   // Similarly, no f80 support yet.
00310   if (VT == MVT::f80)
00311     return false;
00312   // We only handle legal types. For example, on x86-32 the instruction
00313   // selector contains all of the 64-bit instructions from x86-64,
00314   // under the assumption that i64 won't be used if the target doesn't
00315   // support it.
00316   return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
00317 }
00318 
00319 #include "X86GenCallingConv.inc"
00320 
00321 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
00322 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
00323 /// Return true and the result register by reference if it is possible.
00324 bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
00325                                   MachineMemOperand *MMO, unsigned &ResultReg) {
00326   // Get opcode and regclass of the output for the given load instruction.
00327   unsigned Opc = 0;
00328   const TargetRegisterClass *RC = nullptr;
00329   switch (VT.getSimpleVT().SimpleTy) {
00330   default: return false;
00331   case MVT::i1:
00332   case MVT::i8:
00333     Opc = X86::MOV8rm;
00334     RC  = &X86::GR8RegClass;
00335     break;
00336   case MVT::i16:
00337     Opc = X86::MOV16rm;
00338     RC  = &X86::GR16RegClass;
00339     break;
00340   case MVT::i32:
00341     Opc = X86::MOV32rm;
00342     RC  = &X86::GR32RegClass;
00343     break;
00344   case MVT::i64:
00345     // Must be in x86-64 mode.
00346     Opc = X86::MOV64rm;
00347     RC  = &X86::GR64RegClass;
00348     break;
00349   case MVT::f32:
00350     if (X86ScalarSSEf32) {
00351       Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
00352       RC  = &X86::FR32RegClass;
00353     } else {
00354       Opc = X86::LD_Fp32m;
00355       RC  = &X86::RFP32RegClass;
00356     }
00357     break;
00358   case MVT::f64:
00359     if (X86ScalarSSEf64) {
00360       Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
00361       RC  = &X86::FR64RegClass;
00362     } else {
00363       Opc = X86::LD_Fp64m;
00364       RC  = &X86::RFP64RegClass;
00365     }
00366     break;
00367   case MVT::f80:
00368     // No f80 support yet.
00369     return false;
00370   }
00371 
00372   ResultReg = createResultReg(RC);
00373   MachineInstrBuilder MIB =
00374     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
00375   addFullAddress(MIB, AM);
00376   if (MMO)
00377     MIB->addMemOperand(*FuncInfo.MF, MMO);
00378   return true;
00379 }
00380 
00381 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
00382 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
00383 /// and a displacement offset, or a GlobalAddress,
00384 /// i.e. V. Return true if it is possible.
00385 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
00386                                    const X86AddressMode &AM,
00387                                    MachineMemOperand *MMO, bool Aligned) {
00388   // Get opcode and regclass of the output for the given store instruction.
00389   unsigned Opc = 0;
00390   switch (VT.getSimpleVT().SimpleTy) {
00391   case MVT::f80: // No f80 support yet.
00392   default: return false;
00393   case MVT::i1: {
00394     // Mask out all but lowest bit.
00395     unsigned AndResult = createResultReg(&X86::GR8RegClass);
00396     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
00397             TII.get(X86::AND8ri), AndResult)
00398       .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
00399     ValReg = AndResult;
00400   }
00401   // FALLTHROUGH, handling i1 as i8.
00402   case MVT::i8:  Opc = X86::MOV8mr;  break;
00403   case MVT::i16: Opc = X86::MOV16mr; break;
00404   case MVT::i32: Opc = X86::MOV32mr; break;
00405   case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
00406   case MVT::f32:
00407     Opc = X86ScalarSSEf32 ?
00408           (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
00409     break;
00410   case MVT::f64:
00411     Opc = X86ScalarSSEf64 ?
00412           (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
00413     break;
00414   case MVT::v4f32:
00415     if (Aligned)
00416       Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
00417     else
00418       Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
00419     break;
00420   case MVT::v2f64:
00421     if (Aligned)
00422       Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr;
00423     else
00424       Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr;
00425     break;
00426   case MVT::v4i32:
00427   case MVT::v2i64:
00428   case MVT::v8i16:
00429   case MVT::v16i8:
00430     if (Aligned)
00431       Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr;
00432     else
00433       Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
00434     break;
00435   }
00436 
00437   MachineInstrBuilder MIB =
00438     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
00439   addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
00440   if (MMO)
00441     MIB->addMemOperand(*FuncInfo.MF, MMO);
00442 
00443   return true;
00444 }
00445 
00446 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
00447                                    const X86AddressMode &AM,
00448                                    MachineMemOperand *MMO, bool Aligned) {
00449   // Handle 'null' like i32/i64 0.
00450   if (isa<ConstantPointerNull>(Val))
00451     Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
00452 
00453   // If this is a store of a simple constant, fold the constant into the store.
00454   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
00455     unsigned Opc = 0;
00456     bool Signed = true;
00457     switch (VT.getSimpleVT().SimpleTy) {
00458     default: break;
00459     case MVT::i1:  Signed = false;     // FALLTHROUGH to handle as i8.
00460     case MVT::i8:  Opc = X86::MOV8mi;  break;
00461     case MVT::i16: Opc = X86::MOV16mi; break;
00462     case MVT::i32: Opc = X86::MOV32mi; break;
00463     case MVT::i64:
00464       // Must be a 32-bit sign extended value.
00465       if (isInt<32>(CI->getSExtValue()))
00466         Opc = X86::MOV64mi32;
00467       break;
00468     }
00469 
00470     if (Opc) {
00471       MachineInstrBuilder MIB =
00472         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
00473       addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
00474                                             : CI->getZExtValue());
00475       if (MMO)
00476         MIB->addMemOperand(*FuncInfo.MF, MMO);
00477       return true;
00478     }
00479   }
00480 
00481   unsigned ValReg = getRegForValue(Val);
00482   if (ValReg == 0)
00483     return false;
00484 
00485   bool ValKill = hasTrivialKill(Val);
00486   return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
00487 }
00488 
00489 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
00490 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
00491 /// ISD::SIGN_EXTEND).
00492 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
00493                                     unsigned Src, EVT SrcVT,
00494                                     unsigned &ResultReg) {
00495   unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
00496                            Src, /*TODO: Kill=*/false);
00497   if (RR == 0)
00498     return false;
00499 
00500   ResultReg = RR;
00501   return true;
00502 }
00503 
00504 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
00505   // Handle constant address.
00506   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
00507     // Can't handle alternate code models yet.
00508     if (TM.getCodeModel() != CodeModel::Small)
00509       return false;
00510 
00511     // Can't handle TLS yet.
00512     if (GV->isThreadLocal())
00513       return false;
00514 
00515     // RIP-relative addresses can't have additional register operands, so if
00516     // we've already folded stuff into the addressing mode, just force the
00517     // global value into its own register, which we can use as the basereg.
00518     if (!Subtarget->isPICStyleRIPRel() ||
00519         (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
00520       // Okay, we've committed to selecting this global. Set up the address.
00521       AM.GV = GV;
00522 
00523       // Allow the subtarget to classify the global.
00524       unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
00525 
00526       // If this reference is relative to the pic base, set it now.
00527       if (isGlobalRelativeToPICBase(GVFlags)) {
00528         // FIXME: How do we know Base.Reg is free??
00529         AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
00530       }
00531 
00532       // Unless the ABI requires an extra load, return a direct reference to
00533       // the global.
00534       if (!isGlobalStubReference(GVFlags)) {
00535         if (Subtarget->isPICStyleRIPRel()) {
00536           // Use rip-relative addressing if we can.  Above we verified that the
00537           // base and index registers are unused.
00538           assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
00539           AM.Base.Reg = X86::RIP;
00540         }
00541         AM.GVOpFlags = GVFlags;
00542         return true;
00543       }
00544 
00545       // Ok, we need to do a load from a stub.  If we've already loaded from
00546       // this stub, reuse the loaded pointer, otherwise emit the load now.
00547       DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
00548       unsigned LoadReg;
00549       if (I != LocalValueMap.end() && I->second != 0) {
00550         LoadReg = I->second;
00551       } else {
00552         // Issue load from stub.
00553         unsigned Opc = 0;
00554         const TargetRegisterClass *RC = nullptr;
00555         X86AddressMode StubAM;
00556         StubAM.Base.Reg = AM.Base.Reg;
00557         StubAM.GV = GV;
00558         StubAM.GVOpFlags = GVFlags;
00559 
00560         // Prepare for inserting code in the local-value area.
00561         SavePoint SaveInsertPt = enterLocalValueArea();
00562 
00563         if (TLI.getPointerTy() == MVT::i64) {
00564           Opc = X86::MOV64rm;
00565           RC  = &X86::GR64RegClass;
00566 
00567           if (Subtarget->isPICStyleRIPRel())
00568             StubAM.Base.Reg = X86::RIP;
00569         } else {
00570           Opc = X86::MOV32rm;
00571           RC  = &X86::GR32RegClass;
00572         }
00573 
00574         LoadReg = createResultReg(RC);
00575         MachineInstrBuilder LoadMI =
00576           BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
00577         addFullAddress(LoadMI, StubAM);
00578 
00579         // Ok, back to normal mode.
00580         leaveLocalValueArea(SaveInsertPt);
00581 
00582         // Prevent loading GV stub multiple times in same MBB.
00583         LocalValueMap[V] = LoadReg;
00584       }
00585 
00586       // Now construct the final address. Note that the Disp, Scale,
00587       // and Index values may already be set here.
00588       AM.Base.Reg = LoadReg;
00589       AM.GV = nullptr;
00590       return true;
00591     }
00592   }
00593 
00594   // If all else fails, try to materialize the value in a register.
00595   if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
00596     if (AM.Base.Reg == 0) {
00597       AM.Base.Reg = getRegForValue(V);
00598       return AM.Base.Reg != 0;
00599     }
00600     if (AM.IndexReg == 0) {
00601       assert(AM.Scale == 1 && "Scale with no index!");
00602       AM.IndexReg = getRegForValue(V);
00603       return AM.IndexReg != 0;
00604     }
00605   }
00606 
00607   return false;
00608 }
00609 
00610 /// X86SelectAddress - Attempt to fill in an address from the given value.
00611 ///
00612 bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
00613   SmallVector<const Value *, 32> GEPs;
00614 redo_gep:
00615   const User *U = nullptr;
00616   unsigned Opcode = Instruction::UserOp1;
00617   if (const Instruction *I = dyn_cast<Instruction>(V)) {
00618     // Don't walk into other basic blocks; it's possible we haven't
00619     // visited them yet, so the instructions may not yet be assigned
00620     // virtual registers.
00621     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
00622         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
00623       Opcode = I->getOpcode();
00624       U = I;
00625     }
00626   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
00627     Opcode = C->getOpcode();
00628     U = C;
00629   }
00630 
00631   if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
00632     if (Ty->getAddressSpace() > 255)
00633       // Fast instruction selection doesn't support the special
00634       // address spaces.
00635       return false;
00636 
00637   switch (Opcode) {
00638   default: break;
00639   case Instruction::BitCast:
00640     // Look past bitcasts.
00641     return X86SelectAddress(U->getOperand(0), AM);
00642 
00643   case Instruction::IntToPtr:
00644     // Look past no-op inttoptrs.
00645     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
00646       return X86SelectAddress(U->getOperand(0), AM);
00647     break;
00648 
00649   case Instruction::PtrToInt:
00650     // Look past no-op ptrtoints.
00651     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
00652       return X86SelectAddress(U->getOperand(0), AM);
00653     break;
00654 
00655   case Instruction::Alloca: {
00656     // Do static allocas.
00657     const AllocaInst *A = cast<AllocaInst>(V);
00658     DenseMap<const AllocaInst*, int>::iterator SI =
00659       FuncInfo.StaticAllocaMap.find(A);
00660     if (SI != FuncInfo.StaticAllocaMap.end()) {
00661       AM.BaseType = X86AddressMode::FrameIndexBase;
00662       AM.Base.FrameIndex = SI->second;
00663       return true;
00664     }
00665     break;
00666   }
00667 
00668   case Instruction::Add: {
00669     // Adds of constants are common and easy enough.
00670     if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
00671       uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
00672       // They have to fit in the 32-bit signed displacement field though.
00673       if (isInt<32>(Disp)) {
00674         AM.Disp = (uint32_t)Disp;
00675         return X86SelectAddress(U->getOperand(0), AM);
00676       }
00677     }
00678     break;
00679   }
00680 
00681   case Instruction::GetElementPtr: {
00682     X86AddressMode SavedAM = AM;
00683 
00684     // Pattern-match simple GEPs.
00685     uint64_t Disp = (int32_t)AM.Disp;
00686     unsigned IndexReg = AM.IndexReg;
00687     unsigned Scale = AM.Scale;
00688     gep_type_iterator GTI = gep_type_begin(U);
00689     // Iterate through the indices, folding what we can. Constants can be
00690     // folded, and one dynamic index can be handled, if the scale is supported.
00691     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
00692          i != e; ++i, ++GTI) {
00693       const Value *Op = *i;
00694       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
00695         const StructLayout *SL = DL.getStructLayout(STy);
00696         Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
00697         continue;
00698       }
00699 
00700       // A array/variable index is always of the form i*S where S is the
00701       // constant scale size.  See if we can push the scale into immediates.
00702       uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
00703       for (;;) {
00704         if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
00705           // Constant-offset addressing.
00706           Disp += CI->getSExtValue() * S;
00707           break;
00708         }
00709         if (canFoldAddIntoGEP(U, Op)) {
00710           // A compatible add with a constant operand. Fold the constant.
00711           ConstantInt *CI =
00712             cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
00713           Disp += CI->getSExtValue() * S;
00714           // Iterate on the other operand.
00715           Op = cast<AddOperator>(Op)->getOperand(0);
00716           continue;
00717         }
00718         if (IndexReg == 0 &&
00719             (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
00720             (S == 1 || S == 2 || S == 4 || S == 8)) {
00721           // Scaled-index addressing.
00722           Scale = S;
00723           IndexReg = getRegForGEPIndex(Op).first;
00724           if (IndexReg == 0)
00725             return false;
00726           break;
00727         }
00728         // Unsupported.
00729         goto unsupported_gep;
00730       }
00731     }
00732 
00733     // Check for displacement overflow.
00734     if (!isInt<32>(Disp))
00735       break;
00736 
00737     AM.IndexReg = IndexReg;
00738     AM.Scale = Scale;
00739     AM.Disp = (uint32_t)Disp;
00740     GEPs.push_back(V);
00741 
00742     if (const GetElementPtrInst *GEP =
00743           dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
00744       // Ok, the GEP indices were covered by constant-offset and scaled-index
00745       // addressing. Update the address state and move on to examining the base.
00746       V = GEP;
00747       goto redo_gep;
00748     } else if (X86SelectAddress(U->getOperand(0), AM)) {
00749       return true;
00750     }
00751 
00752     // If we couldn't merge the gep value into this addr mode, revert back to
00753     // our address and just match the value instead of completely failing.
00754     AM = SavedAM;
00755 
00756     for (SmallVectorImpl<const Value *>::reverse_iterator
00757            I = GEPs.rbegin(), E = GEPs.rend(); I != E; ++I)
00758       if (handleConstantAddresses(*I, AM))
00759         return true;
00760 
00761     return false;
00762   unsupported_gep:
00763     // Ok, the GEP indices weren't all covered.
00764     break;
00765   }
00766   }
00767 
00768   return handleConstantAddresses(V, AM);
00769 }
00770 
00771 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
00772 ///
00773 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
00774   const User *U = nullptr;
00775   unsigned Opcode = Instruction::UserOp1;
00776   const Instruction *I = dyn_cast<Instruction>(V);
00777   // Record if the value is defined in the same basic block.
00778   //
00779   // This information is crucial to know whether or not folding an
00780   // operand is valid.
00781   // Indeed, FastISel generates or reuses a virtual register for all
00782   // operands of all instructions it selects. Obviously, the definition and
00783   // its uses must use the same virtual register otherwise the produced
00784   // code is incorrect.
00785   // Before instruction selection, FunctionLoweringInfo::set sets the virtual
00786   // registers for values that are alive across basic blocks. This ensures
00787   // that the values are consistently set between across basic block, even
00788   // if different instruction selection mechanisms are used (e.g., a mix of
00789   // SDISel and FastISel).
00790   // For values local to a basic block, the instruction selection process
00791   // generates these virtual registers with whatever method is appropriate
00792   // for its needs. In particular, FastISel and SDISel do not share the way
00793   // local virtual registers are set.
00794   // Therefore, this is impossible (or at least unsafe) to share values
00795   // between basic blocks unless they use the same instruction selection
00796   // method, which is not guarantee for X86.
00797   // Moreover, things like hasOneUse could not be used accurately, if we
00798   // allow to reference values across basic blocks whereas they are not
00799   // alive across basic blocks initially.
00800   bool InMBB = true;
00801   if (I) {
00802     Opcode = I->getOpcode();
00803     U = I;
00804     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
00805   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
00806     Opcode = C->getOpcode();
00807     U = C;
00808   }
00809 
00810   switch (Opcode) {
00811   default: break;
00812   case Instruction::BitCast:
00813     // Look past bitcasts if its operand is in the same BB.
00814     if (InMBB)
00815       return X86SelectCallAddress(U->getOperand(0), AM);
00816     break;
00817 
00818   case Instruction::IntToPtr:
00819     // Look past no-op inttoptrs if its operand is in the same BB.
00820     if (InMBB &&
00821         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
00822       return X86SelectCallAddress(U->getOperand(0), AM);
00823     break;
00824 
00825   case Instruction::PtrToInt:
00826     // Look past no-op ptrtoints if its operand is in the same BB.
00827     if (InMBB &&
00828         TLI.getValueType(U->getType()) == TLI.getPointerTy())
00829       return X86SelectCallAddress(U->getOperand(0), AM);
00830     break;
00831   }
00832 
00833   // Handle constant address.
00834   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
00835     // Can't handle alternate code models yet.
00836     if (TM.getCodeModel() != CodeModel::Small)
00837       return false;
00838 
00839     // RIP-relative addresses can't have additional register operands.
00840     if (Subtarget->isPICStyleRIPRel() &&
00841         (AM.Base.Reg != 0 || AM.IndexReg != 0))
00842       return false;
00843 
00844     // Can't handle DLL Import.
00845     if (GV->hasDLLImportStorageClass())
00846       return false;
00847 
00848     // Can't handle TLS.
00849     if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
00850       if (GVar->isThreadLocal())
00851         return false;
00852 
00853     // Okay, we've committed to selecting this global. Set up the basic address.
00854     AM.GV = GV;
00855 
00856     // No ABI requires an extra load for anything other than DLLImport, which
00857     // we rejected above. Return a direct reference to the global.
00858     if (Subtarget->isPICStyleRIPRel()) {
00859       // Use rip-relative addressing if we can.  Above we verified that the
00860       // base and index registers are unused.
00861       assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
00862       AM.Base.Reg = X86::RIP;
00863     } else if (Subtarget->isPICStyleStubPIC()) {
00864       AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
00865     } else if (Subtarget->isPICStyleGOT()) {
00866       AM.GVOpFlags = X86II::MO_GOTOFF;
00867     }
00868 
00869     return true;
00870   }
00871 
00872   // If all else fails, try to materialize the value in a register.
00873   if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
00874     if (AM.Base.Reg == 0) {
00875       AM.Base.Reg = getRegForValue(V);
00876       return AM.Base.Reg != 0;
00877     }
00878     if (AM.IndexReg == 0) {
00879       assert(AM.Scale == 1 && "Scale with no index!");
00880       AM.IndexReg = getRegForValue(V);
00881       return AM.IndexReg != 0;
00882     }
00883   }
00884 
00885   return false;
00886 }
00887 
00888 
00889 /// X86SelectStore - Select and emit code to implement store instructions.
00890 bool X86FastISel::X86SelectStore(const Instruction *I) {
00891   // Atomic stores need special handling.
00892   const StoreInst *S = cast<StoreInst>(I);
00893 
00894   if (S->isAtomic())
00895     return false;
00896 
00897   const Value *Val = S->getValueOperand();
00898   const Value *Ptr = S->getPointerOperand();
00899 
00900   MVT VT;
00901   if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
00902     return false;
00903 
00904   unsigned Alignment = S->getAlignment();
00905   unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
00906   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
00907     Alignment = ABIAlignment;
00908   bool Aligned = Alignment >= ABIAlignment;
00909 
00910   X86AddressMode AM;
00911   if (!X86SelectAddress(Ptr, AM))
00912     return false;
00913 
00914   return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
00915 }
00916 
00917 /// X86SelectRet - Select and emit code to implement ret instructions.
00918 bool X86FastISel::X86SelectRet(const Instruction *I) {
00919   const ReturnInst *Ret = cast<ReturnInst>(I);
00920   const Function &F = *I->getParent()->getParent();
00921   const X86MachineFunctionInfo *X86MFInfo =
00922       FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
00923 
00924   if (!FuncInfo.CanLowerReturn)
00925     return false;
00926 
00927   CallingConv::ID CC = F.getCallingConv();
00928   if (CC != CallingConv::C &&
00929       CC != CallingConv::Fast &&
00930       CC != CallingConv::X86_FastCall &&
00931       CC != CallingConv::X86_64_SysV)
00932     return false;
00933 
00934   if (Subtarget->isCallingConvWin64(CC))
00935     return false;
00936 
00937   // Don't handle popping bytes on return for now.
00938   if (X86MFInfo->getBytesToPopOnReturn() != 0)
00939     return false;
00940 
00941   // fastcc with -tailcallopt is intended to provide a guaranteed
00942   // tail call optimization. Fastisel doesn't know how to do that.
00943   if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
00944     return false;
00945 
00946   // Let SDISel handle vararg functions.
00947   if (F.isVarArg())
00948     return false;
00949 
00950   // Build a list of return value registers.
00951   SmallVector<unsigned, 4> RetRegs;
00952 
00953   if (Ret->getNumOperands() > 0) {
00954     SmallVector<ISD::OutputArg, 4> Outs;
00955     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
00956 
00957     // Analyze operands of the call, assigning locations to each operand.
00958     SmallVector<CCValAssign, 16> ValLocs;
00959     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
00960     CCInfo.AnalyzeReturn(Outs, RetCC_X86);
00961 
00962     const Value *RV = Ret->getOperand(0);
00963     unsigned Reg = getRegForValue(RV);
00964     if (Reg == 0)
00965       return false;
00966 
00967     // Only handle a single return value for now.
00968     if (ValLocs.size() != 1)
00969       return false;
00970 
00971     CCValAssign &VA = ValLocs[0];
00972 
00973     // Don't bother handling odd stuff for now.
00974     if (VA.getLocInfo() != CCValAssign::Full)
00975       return false;
00976     // Only handle register returns for now.
00977     if (!VA.isRegLoc())
00978       return false;
00979 
00980     // The calling-convention tables for x87 returns don't tell
00981     // the whole story.
00982     if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
00983       return false;
00984 
00985     unsigned SrcReg = Reg + VA.getValNo();
00986     EVT SrcVT = TLI.getValueType(RV->getType());
00987     EVT DstVT = VA.getValVT();
00988     // Special handling for extended integers.
00989     if (SrcVT != DstVT) {
00990       if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
00991         return false;
00992 
00993       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
00994         return false;
00995 
00996       assert(DstVT == MVT::i32 && "X86 should always ext to i32");
00997 
00998       if (SrcVT == MVT::i1) {
00999         if (Outs[0].Flags.isSExt())
01000           return false;
01001         SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
01002         SrcVT = MVT::i8;
01003       }
01004       unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
01005                                              ISD::SIGN_EXTEND;
01006       SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
01007                           SrcReg, /*TODO: Kill=*/false);
01008     }
01009 
01010     // Make the copy.
01011     unsigned DstReg = VA.getLocReg();
01012     const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
01013     // Avoid a cross-class copy. This is very unlikely.
01014     if (!SrcRC->contains(DstReg))
01015       return false;
01016     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
01017             DstReg).addReg(SrcReg);
01018 
01019     // Add register to return instruction.
01020     RetRegs.push_back(VA.getLocReg());
01021   }
01022 
01023   // The x86-64 ABI for returning structs by value requires that we copy
01024   // the sret argument into %rax for the return. We saved the argument into
01025   // a virtual register in the entry block, so now we copy the value out
01026   // and into %rax. We also do the same with %eax for Win32.
01027   if (F.hasStructRetAttr() &&
01028       (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
01029     unsigned Reg = X86MFInfo->getSRetReturnReg();
01030     assert(Reg &&
01031            "SRetReturnReg should have been set in LowerFormalArguments()!");
01032     unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
01033     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
01034             RetReg).addReg(Reg);
01035     RetRegs.push_back(RetReg);
01036   }
01037 
01038   // Now emit the RET.
01039   MachineInstrBuilder MIB =
01040     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
01041   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
01042     MIB.addReg(RetRegs[i], RegState::Implicit);
01043   return true;
01044 }
01045 
01046 /// X86SelectLoad - Select and emit code to implement load instructions.
01047 ///
01048 bool X86FastISel::X86SelectLoad(const Instruction *I) {
01049   const LoadInst *LI = cast<LoadInst>(I);
01050 
01051   // Atomic loads need special handling.
01052   if (LI->isAtomic())
01053     return false;
01054 
01055   MVT VT;
01056   if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
01057     return false;
01058 
01059   const Value *Ptr = LI->getPointerOperand();
01060 
01061   X86AddressMode AM;
01062   if (!X86SelectAddress(Ptr, AM))
01063     return false;
01064 
01065   unsigned ResultReg = 0;
01066   if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg))
01067     return false;
01068 
01069   updateValueMap(I, ResultReg);
01070   return true;
01071 }
01072 
01073 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
01074   bool HasAVX = Subtarget->hasAVX();
01075   bool X86ScalarSSEf32 = Subtarget->hasSSE1();
01076   bool X86ScalarSSEf64 = Subtarget->hasSSE2();
01077 
01078   switch (VT.getSimpleVT().SimpleTy) {
01079   default:       return 0;
01080   case MVT::i8:  return X86::CMP8rr;
01081   case MVT::i16: return X86::CMP16rr;
01082   case MVT::i32: return X86::CMP32rr;
01083   case MVT::i64: return X86::CMP64rr;
01084   case MVT::f32:
01085     return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0;
01086   case MVT::f64:
01087     return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0;
01088   }
01089 }
01090 
01091 /// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
01092 /// of the comparison, return an opcode that works for the compare (e.g.
01093 /// CMP32ri) otherwise return 0.
01094 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
01095   switch (VT.getSimpleVT().SimpleTy) {
01096   // Otherwise, we can't fold the immediate into this comparison.
01097   default: return 0;
01098   case MVT::i8: return X86::CMP8ri;
01099   case MVT::i16: return X86::CMP16ri;
01100   case MVT::i32: return X86::CMP32ri;
01101   case MVT::i64:
01102     // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
01103     // field.
01104     if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
01105       return X86::CMP64ri32;
01106     return 0;
01107   }
01108 }
01109 
01110 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
01111                                      EVT VT) {
01112   unsigned Op0Reg = getRegForValue(Op0);
01113   if (Op0Reg == 0) return false;
01114 
01115   // Handle 'null' like i32/i64 0.
01116   if (isa<ConstantPointerNull>(Op1))
01117     Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
01118 
01119   // We have two options: compare with register or immediate.  If the RHS of
01120   // the compare is an immediate that we can fold into this compare, use
01121   // CMPri, otherwise use CMPrr.
01122   if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
01123     if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
01124       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CompareImmOpc))
01125         .addReg(Op0Reg)
01126         .addImm(Op1C->getSExtValue());
01127       return true;
01128     }
01129   }
01130 
01131   unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
01132   if (CompareOpc == 0) return false;
01133 
01134   unsigned Op1Reg = getRegForValue(Op1);
01135   if (Op1Reg == 0) return false;
01136   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CompareOpc))
01137     .addReg(Op0Reg)
01138     .addReg(Op1Reg);
01139 
01140   return true;
01141 }
01142 
01143 bool X86FastISel::X86SelectCmp(const Instruction *I) {
01144   const CmpInst *CI = cast<CmpInst>(I);
01145 
01146   MVT VT;
01147   if (!isTypeLegal(I->getOperand(0)->getType(), VT))
01148     return false;
01149 
01150   // Try to optimize or fold the cmp.
01151   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
01152   unsigned ResultReg = 0;
01153   switch (Predicate) {
01154   default: break;
01155   case CmpInst::FCMP_FALSE: {
01156     ResultReg = createResultReg(&X86::GR32RegClass);
01157     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
01158             ResultReg);
01159     ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
01160                                            X86::sub_8bit);
01161     if (!ResultReg)
01162       return false;
01163     break;
01164   }
01165   case CmpInst::FCMP_TRUE: {
01166     ResultReg = createResultReg(&X86::GR8RegClass);
01167     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
01168             ResultReg).addImm(1);
01169     break;
01170   }
01171   }
01172 
01173   if (ResultReg) {
01174     updateValueMap(I, ResultReg);
01175     return true;
01176   }
01177 
01178   const Value *LHS = CI->getOperand(0);
01179   const Value *RHS = CI->getOperand(1);
01180 
01181   // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
01182   // We don't have to materialize a zero constant for this case and can just use
01183   // %x again on the RHS.
01184   if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
01185     const auto *RHSC = dyn_cast<ConstantFP>(RHS);
01186     if (RHSC && RHSC->isNullValue())
01187       RHS = LHS;
01188   }
01189 
01190   // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
01191   static unsigned SETFOpcTable[2][3] = {
01192     { X86::SETEr,  X86::SETNPr, X86::AND8rr },
01193     { X86::SETNEr, X86::SETPr,  X86::OR8rr  }
01194   };
01195   unsigned *SETFOpc = nullptr;
01196   switch (Predicate) {
01197   default: break;
01198   case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
01199   case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
01200   }
01201 
01202   ResultReg = createResultReg(&X86::GR8RegClass);
01203   if (SETFOpc) {
01204     if (!X86FastEmitCompare(LHS, RHS, VT))
01205       return false;
01206 
01207     unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
01208     unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
01209     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
01210             FlagReg1);
01211     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
01212             FlagReg2);
01213     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
01214             ResultReg).addReg(FlagReg1).addReg(FlagReg2);
01215     updateValueMap(I, ResultReg);
01216     return true;
01217   }
01218 
01219   X86::CondCode CC;
01220   bool SwapArgs;
01221   std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate);
01222   assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
01223   unsigned Opc = X86::getSETFromCond(CC);
01224 
01225   if (SwapArgs)
01226     std::swap(LHS, RHS);
01227 
01228   // Emit a compare of LHS/RHS.
01229   if (!X86FastEmitCompare(LHS, RHS, VT))
01230     return false;
01231 
01232   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
01233   updateValueMap(I, ResultReg);
01234   return true;
01235 }
01236 
01237 bool X86FastISel::X86SelectZExt(const Instruction *I) {
01238   EVT DstVT = TLI.getValueType(I->getType());
01239   if (!TLI.isTypeLegal(DstVT))
01240     return false;
01241 
01242   unsigned ResultReg = getRegForValue(I->getOperand(0));
01243   if (ResultReg == 0)
01244     return false;
01245 
01246   // Handle zero-extension from i1 to i8, which is common.
01247   MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType());
01248   if (SrcVT.SimpleTy == MVT::i1) {
01249     // Set the high bits to zero.
01250     ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
01251     SrcVT = MVT::i8;
01252 
01253     if (ResultReg == 0)
01254       return false;
01255   }
01256 
01257   if (DstVT == MVT::i64) {
01258     // Handle extension to 64-bits via sub-register shenanigans.
01259     unsigned MovInst;
01260 
01261     switch (SrcVT.SimpleTy) {
01262     case MVT::i8:  MovInst = X86::MOVZX32rr8;  break;
01263     case MVT::i16: MovInst = X86::MOVZX32rr16; break;
01264     case MVT::i32: MovInst = X86::MOV32rr;     break;
01265     default: llvm_unreachable("Unexpected zext to i64 source type");
01266     }
01267 
01268     unsigned Result32 = createResultReg(&X86::GR32RegClass);
01269     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
01270       .addReg(ResultReg);
01271 
01272     ResultReg = createResultReg(&X86::GR64RegClass);
01273     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
01274             ResultReg)
01275       .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
01276   } else if (DstVT != MVT::i8) {
01277     ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
01278                            ResultReg, /*Kill=*/true);
01279     if (ResultReg == 0)
01280       return false;
01281   }
01282 
01283   updateValueMap(I, ResultReg);
01284   return true;
01285 }
01286 
01287 
01288 bool X86FastISel::X86SelectBranch(const Instruction *I) {
01289   // Unconditional branches are selected by tablegen-generated code.
01290   // Handle a conditional branch.
01291   const BranchInst *BI = cast<BranchInst>(I);
01292   MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
01293   MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
01294 
01295   // Fold the common case of a conditional branch with a comparison
01296   // in the same block (values defined on other blocks may not have
01297   // initialized registers).
01298   X86::CondCode CC;
01299   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
01300     if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
01301       EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
01302 
01303       // Try to optimize or fold the cmp.
01304       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
01305       switch (Predicate) {
01306       default: break;
01307       case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
01308       case CmpInst::FCMP_TRUE:  fastEmitBranch(TrueMBB, DbgLoc); return true;
01309       }
01310 
01311       const Value *CmpLHS = CI->getOperand(0);
01312       const Value *CmpRHS = CI->getOperand(1);
01313 
01314       // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
01315       // 0.0.
01316       // We don't have to materialize a zero constant for this case and can just
01317       // use %x again on the RHS.
01318       if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
01319         const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
01320         if (CmpRHSC && CmpRHSC->isNullValue())
01321           CmpRHS = CmpLHS;
01322       }
01323 
01324       // Try to take advantage of fallthrough opportunities.
01325       if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
01326         std::swap(TrueMBB, FalseMBB);
01327         Predicate = CmpInst::getInversePredicate(Predicate);
01328       }
01329 
01330       // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
01331       // code check. Instead two branch instructions are required to check all
01332       // the flags. First we change the predicate to a supported condition code,
01333       // which will be the first branch. Later one we will emit the second
01334       // branch.
01335       bool NeedExtraBranch = false;
01336       switch (Predicate) {
01337       default: break;
01338       case CmpInst::FCMP_OEQ:
01339         std::swap(TrueMBB, FalseMBB); // fall-through
01340       case CmpInst::FCMP_UNE:
01341         NeedExtraBranch = true;
01342         Predicate = CmpInst::FCMP_ONE;
01343         break;
01344       }
01345 
01346       bool SwapArgs;
01347       unsigned BranchOpc;
01348       std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate);
01349       assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
01350 
01351       BranchOpc = X86::GetCondBranchFromCond(CC);
01352       if (SwapArgs)
01353         std::swap(CmpLHS, CmpRHS);
01354 
01355       // Emit a compare of the LHS and RHS, setting the flags.
01356       if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT))
01357         return false;
01358 
01359       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
01360         .addMBB(TrueMBB);
01361 
01362       // X86 requires a second branch to handle UNE (and OEQ, which is mapped
01363       // to UNE above).
01364       if (NeedExtraBranch) {
01365         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_4))
01366           .addMBB(TrueMBB);
01367       }
01368 
01369       // Obtain the branch weight and add the TrueBB to the successor list.
01370       uint32_t BranchWeight = 0;
01371       if (FuncInfo.BPI)
01372         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
01373                                                    TrueMBB->getBasicBlock());
01374       FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
01375 
01376       // Emits an unconditional branch to the FalseBB, obtains the branch
01377       // weight, and adds it to the successor list.
01378       fastEmitBranch(FalseMBB, DbgLoc);
01379 
01380       return true;
01381     }
01382   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
01383     // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
01384     // typically happen for _Bool and C++ bools.
01385     MVT SourceVT;
01386     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
01387         isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
01388       unsigned TestOpc = 0;
01389       switch (SourceVT.SimpleTy) {
01390       default: break;
01391       case MVT::i8:  TestOpc = X86::TEST8ri; break;
01392       case MVT::i16: TestOpc = X86::TEST16ri; break;
01393       case MVT::i32: TestOpc = X86::TEST32ri; break;
01394       case MVT::i64: TestOpc = X86::TEST64ri32; break;
01395       }
01396       if (TestOpc) {
01397         unsigned OpReg = getRegForValue(TI->getOperand(0));
01398         if (OpReg == 0) return false;
01399         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
01400           .addReg(OpReg).addImm(1);
01401 
01402         unsigned JmpOpc = X86::JNE_4;
01403         if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
01404           std::swap(TrueMBB, FalseMBB);
01405           JmpOpc = X86::JE_4;
01406         }
01407 
01408         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
01409           .addMBB(TrueMBB);
01410         fastEmitBranch(FalseMBB, DbgLoc);
01411         uint32_t BranchWeight = 0;
01412         if (FuncInfo.BPI)
01413           BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
01414                                                      TrueMBB->getBasicBlock());
01415         FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
01416         return true;
01417       }
01418     }
01419   } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
01420     // Fake request the condition, otherwise the intrinsic might be completely
01421     // optimized away.
01422     unsigned TmpReg = getRegForValue(BI->getCondition());
01423     if (TmpReg == 0)
01424       return false;
01425 
01426     unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
01427 
01428     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
01429       .addMBB(TrueMBB);
01430     fastEmitBranch(FalseMBB, DbgLoc);
01431     uint32_t BranchWeight = 0;
01432     if (FuncInfo.BPI)
01433       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
01434                                                  TrueMBB->getBasicBlock());
01435     FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
01436     return true;
01437   }
01438 
01439   // Otherwise do a clumsy setcc and re-test it.
01440   // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
01441   // in an explicit cast, so make sure to handle that correctly.
01442   unsigned OpReg = getRegForValue(BI->getCondition());
01443   if (OpReg == 0) return false;
01444 
01445   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
01446     .addReg(OpReg).addImm(1);
01447   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_4))
01448     .addMBB(TrueMBB);
01449   fastEmitBranch(FalseMBB, DbgLoc);
01450   uint32_t BranchWeight = 0;
01451   if (FuncInfo.BPI)
01452     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
01453                                                TrueMBB->getBasicBlock());
01454   FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
01455   return true;
01456 }
01457 
01458 bool X86FastISel::X86SelectShift(const Instruction *I) {
01459   unsigned CReg = 0, OpReg = 0;
01460   const TargetRegisterClass *RC = nullptr;
01461   if (I->getType()->isIntegerTy(8)) {
01462     CReg = X86::CL;
01463     RC = &X86::GR8RegClass;
01464     switch (I->getOpcode()) {
01465     case Instruction::LShr: OpReg = X86::SHR8rCL; break;
01466     case Instruction::AShr: OpReg = X86::SAR8rCL; break;
01467     case Instruction::Shl:  OpReg = X86::SHL8rCL; break;
01468     default: return false;
01469     }
01470   } else if (I->getType()->isIntegerTy(16)) {
01471     CReg = X86::CX;
01472     RC = &X86::GR16RegClass;
01473     switch (I->getOpcode()) {
01474     case Instruction::LShr: OpReg = X86::SHR16rCL; break;
01475     case Instruction::AShr: OpReg = X86::SAR16rCL; break;
01476     case Instruction::Shl:  OpReg = X86::SHL16rCL; break;
01477     default: return false;
01478     }
01479   } else if (I->getType()->isIntegerTy(32)) {
01480     CReg = X86::ECX;
01481     RC = &X86::GR32RegClass;
01482     switch (I->getOpcode()) {
01483     case Instruction::LShr: OpReg = X86::SHR32rCL; break;
01484     case Instruction::AShr: OpReg = X86::SAR32rCL; break;
01485     case Instruction::Shl:  OpReg = X86::SHL32rCL; break;
01486     default: return false;
01487     }
01488   } else if (I->getType()->isIntegerTy(64)) {
01489     CReg = X86::RCX;
01490     RC = &X86::GR64RegClass;
01491     switch (I->getOpcode()) {
01492     case Instruction::LShr: OpReg = X86::SHR64rCL; break;
01493     case Instruction::AShr: OpReg = X86::SAR64rCL; break;
01494     case Instruction::Shl:  OpReg = X86::SHL64rCL; break;
01495     default: return false;
01496     }
01497   } else {
01498     return false;
01499   }
01500 
01501   MVT VT;
01502   if (!isTypeLegal(I->getType(), VT))
01503     return false;
01504 
01505   unsigned Op0Reg = getRegForValue(I->getOperand(0));
01506   if (Op0Reg == 0) return false;
01507 
01508   unsigned Op1Reg = getRegForValue(I->getOperand(1));
01509   if (Op1Reg == 0) return false;
01510   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
01511           CReg).addReg(Op1Reg);
01512 
01513   // The shift instruction uses X86::CL. If we defined a super-register
01514   // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
01515   if (CReg != X86::CL)
01516     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
01517             TII.get(TargetOpcode::KILL), X86::CL)
01518       .addReg(CReg, RegState::Kill);
01519 
01520   unsigned ResultReg = createResultReg(RC);
01521   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
01522     .addReg(Op0Reg);
01523   updateValueMap(I, ResultReg);
01524   return true;
01525 }
01526 
01527 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
01528   const static unsigned NumTypes = 4; // i8, i16, i32, i64
01529   const static unsigned NumOps   = 4; // SDiv, SRem, UDiv, URem
01530   const static bool S = true;  // IsSigned
01531   const static bool U = false; // !IsSigned
01532   const static unsigned Copy = TargetOpcode::COPY;
01533   // For the X86 DIV/IDIV instruction, in most cases the dividend
01534   // (numerator) must be in a specific register pair highreg:lowreg,
01535   // producing the quotient in lowreg and the remainder in highreg.
01536   // For most data types, to set up the instruction, the dividend is
01537   // copied into lowreg, and lowreg is sign-extended or zero-extended
01538   // into highreg.  The exception is i8, where the dividend is defined
01539   // as a single register rather than a register pair, and we
01540   // therefore directly sign-extend or zero-extend the dividend into
01541   // lowreg, instead of copying, and ignore the highreg.
01542   const static struct DivRemEntry {
01543     // The following portion depends only on the data type.
01544     const TargetRegisterClass *RC;
01545     unsigned LowInReg;  // low part of the register pair
01546     unsigned HighInReg; // high part of the register pair
01547     // The following portion depends on both the data type and the operation.
01548     struct DivRemResult {
01549     unsigned OpDivRem;        // The specific DIV/IDIV opcode to use.
01550     unsigned OpSignExtend;    // Opcode for sign-extending lowreg into
01551                               // highreg, or copying a zero into highreg.
01552     unsigned OpCopy;          // Opcode for copying dividend into lowreg, or
01553                               // zero/sign-extending into lowreg for i8.
01554     unsigned DivRemResultReg; // Register containing the desired result.
01555     bool IsOpSigned;          // Whether to use signed or unsigned form.
01556     } ResultTable[NumOps];
01557   } OpTable[NumTypes] = {
01558     { &X86::GR8RegClass,  X86::AX,  0, {
01559         { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AL,  S }, // SDiv
01560         { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AH,  S }, // SRem
01561         { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AL,  U }, // UDiv
01562         { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AH,  U }, // URem
01563       }
01564     }, // i8
01565     { &X86::GR16RegClass, X86::AX,  X86::DX, {
01566         { X86::IDIV16r, X86::CWD,     Copy,            X86::AX,  S }, // SDiv
01567         { X86::IDIV16r, X86::CWD,     Copy,            X86::DX,  S }, // SRem
01568         { X86::DIV16r,  X86::MOV32r0, Copy,            X86::AX,  U }, // UDiv
01569         { X86::DIV16r,  X86::MOV32r0, Copy,            X86::DX,  U }, // URem
01570       }
01571     }, // i16
01572     { &X86::GR32RegClass, X86::EAX, X86::EDX, {
01573         { X86::IDIV32r, X86::CDQ,     Copy,            X86::EAX, S }, // SDiv
01574         { X86::IDIV32r, X86::CDQ,     Copy,            X86::EDX, S }, // SRem
01575         { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EAX, U }, // UDiv
01576         { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EDX, U }, // URem
01577       }
01578     }, // i32
01579     { &X86::GR64RegClass, X86::RAX, X86::RDX, {
01580         { X86::IDIV64r, X86::CQO,     Copy,            X86::RAX, S }, // SDiv
01581         { X86::IDIV64r, X86::CQO,     Copy,            X86::RDX, S }, // SRem
01582         { X86::DIV64r,  X86::MOV32r0, Copy,            X86::RAX, U }, // UDiv
01583         { X86::DIV64r,  X86::MOV32r0, Copy,            X86::RDX, U }, // URem
01584       }
01585     }, // i64
01586   };
01587 
01588   MVT VT;
01589   if (!isTypeLegal(I->getType(), VT))
01590     return false;
01591 
01592   unsigned TypeIndex, OpIndex;
01593   switch (VT.SimpleTy) {
01594   default: return false;
01595   case MVT::i8:  TypeIndex = 0; break;
01596   case MVT::i16: TypeIndex = 1; break;
01597   case MVT::i32: TypeIndex = 2; break;
01598   case MVT::i64: TypeIndex = 3;
01599     if (!Subtarget->is64Bit())
01600       return false;
01601     break;
01602   }
01603 
01604   switch (I->getOpcode()) {
01605   default: llvm_unreachable("Unexpected div/rem opcode");
01606   case Instruction::SDiv: OpIndex = 0; break;
01607   case Instruction::SRem: OpIndex = 1; break;
01608   case Instruction::UDiv: OpIndex = 2; break;
01609   case Instruction::URem: OpIndex = 3; break;
01610   }
01611 
01612   const DivRemEntry &TypeEntry = OpTable[TypeIndex];
01613   const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
01614   unsigned Op0Reg = getRegForValue(I->getOperand(0));
01615   if (Op0Reg == 0)
01616     return false;
01617   unsigned Op1Reg = getRegForValue(I->getOperand(1));
01618   if (Op1Reg == 0)
01619     return false;
01620 
01621   // Move op0 into low-order input register.
01622   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
01623           TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
01624   // Zero-extend or sign-extend into high-order input register.
01625   if (OpEntry.OpSignExtend) {
01626     if (OpEntry.IsOpSigned)
01627       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
01628               TII.get(OpEntry.OpSignExtend));
01629     else {
01630       unsigned Zero32 = createResultReg(&X86::GR32RegClass);
01631       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
01632               TII.get(X86::MOV32r0), Zero32);
01633 
01634       // Copy the zero into the appropriate sub/super/identical physical
01635       // register. Unfortunately the operations needed are not uniform enough to
01636       // fit neatly into the table above.
01637       if (VT.SimpleTy == MVT::i16) {
01638         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
01639                 TII.get(Copy), TypeEntry.HighInReg)
01640           .addReg(Zero32, 0, X86::sub_16bit);
01641       } else if (VT.SimpleTy == MVT::i32) {
01642         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
01643                 TII.get(Copy), TypeEntry.HighInReg)
01644             .addReg(Zero32);
01645       } else if (VT.SimpleTy == MVT::i64) {
01646         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
01647                 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
01648             .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
01649       }
01650     }
01651   }
01652   // Generate the DIV/IDIV instruction.
01653   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
01654           TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
01655   // For i8 remainder, we can't reference AH directly, as we'll end
01656   // up with bogus copies like %R9B = COPY %AH. Reference AX
01657   // instead to prevent AH references in a REX instruction.
01658   //
01659   // The current assumption of the fast register allocator is that isel
01660   // won't generate explicit references to the GPR8_NOREX registers. If
01661   // the allocator and/or the backend get enhanced to be more robust in
01662   // that regard, this can be, and should be, removed.
01663   unsigned ResultReg = 0;
01664   if ((I->getOpcode() == Instruction::SRem ||
01665        I->getOpcode() == Instruction::URem) &&
01666       OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
01667     unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
01668     unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
01669     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
01670             TII.get(Copy), SourceSuperReg).addReg(X86::AX);
01671 
01672     // Shift AX right by 8 bits instead of using AH.
01673     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
01674             ResultSuperReg).addReg(SourceSuperReg).addImm(8);
01675 
01676     // Now reference the 8-bit subreg of the result.
01677     ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
01678                                            /*Kill=*/true, X86::sub_8bit);
01679   }
01680   // Copy the result out of the physreg if we haven't already.
01681   if (!ResultReg) {
01682     ResultReg = createResultReg(TypeEntry.RC);
01683     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
01684         .addReg(OpEntry.DivRemResultReg);
01685   }
01686   updateValueMap(I, ResultReg);
01687 
01688   return true;
01689 }
01690 
01691 /// \brief Emit a conditional move instruction (if the are supported) to lower
01692 /// the select.
01693 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
01694   // Check if the subtarget supports these instructions.
01695   if (!Subtarget->hasCMov())
01696     return false;
01697 
01698   // FIXME: Add support for i8.
01699   if (RetVT < MVT::i16 || RetVT > MVT::i64)
01700     return false;
01701 
01702   const Value *Cond = I->getOperand(0);
01703   const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
01704   bool NeedTest = true;
01705   X86::CondCode CC = X86::COND_NE;
01706 
01707   // Optimize conditions coming from a compare if both instructions are in the
01708   // same basic block (values defined in other basic blocks may not have
01709   // initialized registers).
01710   const auto *CI = dyn_cast<CmpInst>(Cond);
01711   if (CI && (CI->getParent() == I->getParent())) {
01712     CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
01713 
01714     // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
01715     static unsigned SETFOpcTable[2][3] = {
01716       { X86::SETNPr, X86::SETEr , X86::TEST8rr },
01717       { X86::SETPr,  X86::SETNEr, X86::OR8rr   }
01718     };
01719     unsigned *SETFOpc = nullptr;
01720     switch (Predicate) {
01721     default: break;
01722     case CmpInst::FCMP_OEQ:
01723       SETFOpc = &SETFOpcTable[0][0];
01724       Predicate = CmpInst::ICMP_NE;
01725       break;
01726     case CmpInst::FCMP_UNE:
01727       SETFOpc = &SETFOpcTable[1][0];
01728       Predicate = CmpInst::ICMP_NE;
01729       break;
01730     }
01731 
01732     bool NeedSwap;
01733     std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate);
01734     assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
01735 
01736     const Value *CmpLHS = CI->getOperand(0);
01737     const Value *CmpRHS = CI->getOperand(1);
01738     if (NeedSwap)
01739       std::swap(CmpLHS, CmpRHS);
01740 
01741     EVT CmpVT = TLI.getValueType(CmpLHS->getType());
01742     // Emit a compare of the LHS and RHS, setting the flags.
01743     if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
01744      return false;
01745 
01746     if (SETFOpc) {
01747       unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
01748       unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
01749       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
01750               FlagReg1);
01751       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
01752               FlagReg2);
01753       auto const &II = TII.get(SETFOpc[2]);
01754       if (II.getNumDefs()) {
01755         unsigned TmpReg = createResultReg(&X86::GR8RegClass);
01756         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
01757           .addReg(FlagReg2).addReg(FlagReg1);
01758       } else {
01759         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
01760           .addReg(FlagReg2).addReg(FlagReg1);
01761       }
01762     }
01763     NeedTest = false;
01764   } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
01765     // Fake request the condition, otherwise the intrinsic might be completely
01766     // optimized away.
01767     unsigned TmpReg = getRegForValue(Cond);
01768     if (TmpReg == 0)
01769       return false;
01770 
01771     NeedTest = false;
01772   }
01773 
01774   if (NeedTest) {
01775     // Selects operate on i1, however, CondReg is 8 bits width and may contain
01776     // garbage. Indeed, only the less significant bit is supposed to be
01777     // accurate. If we read more than the lsb, we may see non-zero values
01778     // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
01779     // the select. This is achieved by performing TEST against 1.
01780     unsigned CondReg = getRegForValue(Cond);
01781     if (CondReg == 0)
01782       return false;
01783     bool CondIsKill = hasTrivialKill(Cond);
01784 
01785     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
01786       .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
01787   }
01788 
01789   const Value *LHS = I->getOperand(1);
01790   const Value *RHS = I->getOperand(2);
01791 
01792   unsigned RHSReg = getRegForValue(RHS);
01793   bool RHSIsKill = hasTrivialKill(RHS);
01794 
01795   unsigned LHSReg = getRegForValue(LHS);
01796   bool LHSIsKill = hasTrivialKill(LHS);
01797 
01798   if (!LHSReg || !RHSReg)
01799     return false;
01800 
01801   unsigned Opc = X86::getCMovFromCond(CC, RC->getSize());
01802   unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
01803                                        LHSReg, LHSIsKill);
01804   updateValueMap(I, ResultReg);
01805   return true;
01806 }
01807 
01808 /// \brief Emit SSE instructions to lower the select.
01809 ///
01810 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
01811 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
01812 /// SSE instructions are available.
01813 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
01814   // Optimize conditions coming from a compare if both instructions are in the
01815   // same basic block (values defined in other basic blocks may not have
01816   // initialized registers).
01817   const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
01818   if (!CI || (CI->getParent() != I->getParent()))
01819     return false;
01820 
01821   if (I->getType() != CI->getOperand(0)->getType() ||
01822       !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
01823         (Subtarget->hasSSE2() && RetVT == MVT::f64)    ))
01824     return false;
01825 
01826   const Value *CmpLHS = CI->getOperand(0);
01827   const Value *CmpRHS = CI->getOperand(1);
01828   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
01829 
01830   // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
01831   // We don't have to materialize a zero constant for this case and can just use
01832   // %x again on the RHS.
01833   if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
01834     const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
01835     if (CmpRHSC && CmpRHSC->isNullValue())
01836       CmpRHS = CmpLHS;
01837   }
01838 
01839   unsigned CC;
01840   bool NeedSwap;
01841   std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
01842   if (CC > 7)
01843     return false;
01844 
01845   if (NeedSwap)
01846     std::swap(CmpLHS, CmpRHS);
01847 
01848   static unsigned OpcTable[2][2][4] = {
01849     { { X86::CMPSSrr,  X86::FsANDPSrr,  X86::FsANDNPSrr,  X86::FsORPSrr  },
01850       { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr }  },
01851     { { X86::CMPSDrr,  X86::FsANDPDrr,  X86::FsANDNPDrr,  X86::FsORPDrr  },
01852       { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr }  }
01853   };
01854 
01855   bool HasAVX = Subtarget->hasAVX();
01856   unsigned *Opc = nullptr;
01857   switch (RetVT.SimpleTy) {
01858   default: return false;
01859   case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break;
01860   case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break;
01861   }
01862 
01863   const Value *LHS = I->getOperand(1);
01864   const Value *RHS = I->getOperand(2);
01865 
01866   unsigned LHSReg = getRegForValue(LHS);
01867   bool LHSIsKill = hasTrivialKill(LHS);
01868 
01869   unsigned RHSReg = getRegForValue(RHS);
01870   bool RHSIsKill = hasTrivialKill(RHS);
01871 
01872   unsigned CmpLHSReg = getRegForValue(CmpLHS);
01873   bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
01874 
01875   unsigned CmpRHSReg = getRegForValue(CmpRHS);
01876   bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
01877 
01878   if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
01879     return false;
01880 
01881   const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
01882   unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
01883                                      CmpRHSReg, CmpRHSIsKill, CC);
01884   unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
01885                                     LHSReg, LHSIsKill);
01886   unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
01887                                      RHSReg, RHSIsKill);
01888   unsigned ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
01889                                        AndReg, /*IsKill=*/true);
01890   updateValueMap(I, ResultReg);
01891   return true;
01892 }
01893 
01894 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
01895   // These are pseudo CMOV instructions and will be later expanded into control-
01896   // flow.
01897   unsigned Opc;
01898   switch (RetVT.SimpleTy) {
01899   default: return false;
01900   case MVT::i8:  Opc = X86::CMOV_GR8;  break;
01901   case MVT::i16: Opc = X86::CMOV_GR16; break;
01902   case MVT::i32: Opc = X86::CMOV_GR32; break;
01903   case MVT::f32: Opc = X86::CMOV_FR32; break;
01904   case MVT::f64: Opc = X86::CMOV_FR64; break;
01905   }
01906 
01907   const Value *Cond = I->getOperand(0);
01908   X86::CondCode CC = X86::COND_NE;
01909 
01910   // Optimize conditions coming from a compare if both instructions are in the
01911   // same basic block (values defined in other basic blocks may not have
01912   // initialized registers).
01913   const auto *CI = dyn_cast<CmpInst>(Cond);
01914   if (CI && (CI->getParent() == I->getParent())) {
01915     bool NeedSwap;
01916     std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate());
01917     if (CC > X86::LAST_VALID_COND)
01918       return false;
01919 
01920     const Value *CmpLHS = CI->getOperand(0);
01921     const Value *CmpRHS = CI->getOperand(1);
01922 
01923     if (NeedSwap)
01924       std::swap(CmpLHS, CmpRHS);
01925 
01926     EVT CmpVT = TLI.getValueType(CmpLHS->getType());
01927     if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
01928       return false;
01929   } else {
01930     unsigned CondReg = getRegForValue(Cond);
01931     if (CondReg == 0)
01932       return false;
01933     bool CondIsKill = hasTrivialKill(Cond);
01934     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
01935       .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
01936   }
01937 
01938   const Value *LHS = I->getOperand(1);
01939   const Value *RHS = I->getOperand(2);
01940 
01941   unsigned LHSReg = getRegForValue(LHS);
01942   bool LHSIsKill = hasTrivialKill(LHS);
01943 
01944   unsigned RHSReg = getRegForValue(RHS);
01945   bool RHSIsKill = hasTrivialKill(RHS);
01946 
01947   if (!LHSReg || !RHSReg)
01948     return false;
01949 
01950   const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
01951 
01952   unsigned ResultReg =
01953     fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
01954   updateValueMap(I, ResultReg);
01955   return true;
01956 }
01957 
01958 bool X86FastISel::X86SelectSelect(const Instruction *I) {
01959   MVT RetVT;
01960   if (!isTypeLegal(I->getType(), RetVT))
01961     return false;
01962 
01963   // Check if we can fold the select.
01964   if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
01965     CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
01966     const Value *Opnd = nullptr;
01967     switch (Predicate) {
01968     default:                              break;
01969     case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
01970     case CmpInst::FCMP_TRUE:  Opnd = I->getOperand(1); break;
01971     }
01972     // No need for a select anymore - this is an unconditional move.
01973     if (Opnd) {
01974       unsigned OpReg = getRegForValue(Opnd);
01975       if (OpReg == 0)
01976         return false;
01977       bool OpIsKill = hasTrivialKill(Opnd);
01978       const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
01979       unsigned ResultReg = createResultReg(RC);
01980       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
01981               TII.get(TargetOpcode::COPY), ResultReg)
01982         .addReg(OpReg, getKillRegState(OpIsKill));
01983       updateValueMap(I, ResultReg);
01984       return true;
01985     }
01986   }
01987 
01988   // First try to use real conditional move instructions.
01989   if (X86FastEmitCMoveSelect(RetVT, I))
01990     return true;
01991 
01992   // Try to use a sequence of SSE instructions to simulate a conditional move.
01993   if (X86FastEmitSSESelect(RetVT, I))
01994     return true;
01995 
01996   // Fall-back to pseudo conditional move instructions, which will be later
01997   // converted to control-flow.
01998   if (X86FastEmitPseudoSelect(RetVT, I))
01999     return true;
02000 
02001   return false;
02002 }
02003 
02004 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
02005   // fpext from float to double.
02006   if (X86ScalarSSEf64 &&
02007       I->getType()->isDoubleTy()) {
02008     const Value *V = I->getOperand(0);
02009     if (V->getType()->isFloatTy()) {
02010       unsigned OpReg = getRegForValue(V);
02011       if (OpReg == 0) return false;
02012       unsigned ResultReg = createResultReg(&X86::FR64RegClass);
02013       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02014               TII.get(X86::CVTSS2SDrr), ResultReg)
02015         .addReg(OpReg);
02016       updateValueMap(I, ResultReg);
02017       return true;
02018     }
02019   }
02020 
02021   return false;
02022 }
02023 
02024 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
02025   if (X86ScalarSSEf64) {
02026     if (I->getType()->isFloatTy()) {
02027       const Value *V = I->getOperand(0);
02028       if (V->getType()->isDoubleTy()) {
02029         unsigned OpReg = getRegForValue(V);
02030         if (OpReg == 0) return false;
02031         unsigned ResultReg = createResultReg(&X86::FR32RegClass);
02032         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02033                 TII.get(X86::CVTSD2SSrr), ResultReg)
02034           .addReg(OpReg);
02035         updateValueMap(I, ResultReg);
02036         return true;
02037       }
02038     }
02039   }
02040 
02041   return false;
02042 }
02043 
02044 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
02045   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
02046   EVT DstVT = TLI.getValueType(I->getType());
02047 
02048   // This code only handles truncation to byte.
02049   if (DstVT != MVT::i8 && DstVT != MVT::i1)
02050     return false;
02051   if (!TLI.isTypeLegal(SrcVT))
02052     return false;
02053 
02054   unsigned InputReg = getRegForValue(I->getOperand(0));
02055   if (!InputReg)
02056     // Unhandled operand.  Halt "fast" selection and bail.
02057     return false;
02058 
02059   if (SrcVT == MVT::i8) {
02060     // Truncate from i8 to i1; no code needed.
02061     updateValueMap(I, InputReg);
02062     return true;
02063   }
02064 
02065   if (!Subtarget->is64Bit()) {
02066     // If we're on x86-32; we can't extract an i8 from a general register.
02067     // First issue a copy to GR16_ABCD or GR32_ABCD.
02068     const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ?
02069       (const TargetRegisterClass*)&X86::GR16_ABCDRegClass :
02070       (const TargetRegisterClass*)&X86::GR32_ABCDRegClass;
02071     unsigned CopyReg = createResultReg(CopyRC);
02072     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
02073             CopyReg).addReg(InputReg);
02074     InputReg = CopyReg;
02075   }
02076 
02077   // Issue an extract_subreg.
02078   unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
02079                                                   InputReg, /*Kill=*/true,
02080                                                   X86::sub_8bit);
02081   if (!ResultReg)
02082     return false;
02083 
02084   updateValueMap(I, ResultReg);
02085   return true;
02086 }
02087 
02088 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
02089   return Len <= (Subtarget->is64Bit() ? 32 : 16);
02090 }
02091 
02092 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
02093                                      X86AddressMode SrcAM, uint64_t Len) {
02094 
02095   // Make sure we don't bloat code by inlining very large memcpy's.
02096   if (!IsMemcpySmall(Len))
02097     return false;
02098 
02099   bool i64Legal = Subtarget->is64Bit();
02100 
02101   // We don't care about alignment here since we just emit integer accesses.
02102   while (Len) {
02103     MVT VT;
02104     if (Len >= 8 && i64Legal)
02105       VT = MVT::i64;
02106     else if (Len >= 4)
02107       VT = MVT::i32;
02108     else if (Len >= 2)
02109       VT = MVT::i16;
02110     else {
02111       VT = MVT::i8;
02112     }
02113 
02114     unsigned Reg;
02115     bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
02116     RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
02117     assert(RV && "Failed to emit load or store??");
02118 
02119     unsigned Size = VT.getSizeInBits()/8;
02120     Len -= Size;
02121     DestAM.Disp += Size;
02122     SrcAM.Disp += Size;
02123   }
02124 
02125   return true;
02126 }
02127 
02128 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
02129   // FIXME: Handle more intrinsics.
02130   switch (II->getIntrinsicID()) {
02131   default: return false;
02132   case Intrinsic::frameaddress: {
02133     Type *RetTy = II->getCalledFunction()->getReturnType();
02134 
02135     MVT VT;
02136     if (!isTypeLegal(RetTy, VT))
02137       return false;
02138 
02139     unsigned Opc;
02140     const TargetRegisterClass *RC = nullptr;
02141 
02142     switch (VT.SimpleTy) {
02143     default: llvm_unreachable("Invalid result type for frameaddress.");
02144     case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
02145     case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
02146     }
02147 
02148     // This needs to be set before we call getFrameRegister, otherwise we get
02149     // the wrong frame register.
02150     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
02151     MFI->setFrameAddressIsTaken(true);
02152 
02153     const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
02154         TM.getSubtargetImpl()->getRegisterInfo());
02155     unsigned FrameReg = RegInfo->getFrameRegister(*(FuncInfo.MF));
02156     assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
02157             (FrameReg == X86::EBP && VT == MVT::i32)) &&
02158            "Invalid Frame Register!");
02159 
02160     // Always make a copy of the frame register to to a vreg first, so that we
02161     // never directly reference the frame register (the TwoAddressInstruction-
02162     // Pass doesn't like that).
02163     unsigned SrcReg = createResultReg(RC);
02164     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02165             TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
02166 
02167     // Now recursively load from the frame address.
02168     // movq (%rbp), %rax
02169     // movq (%rax), %rax
02170     // movq (%rax), %rax
02171     // ...
02172     unsigned DestReg;
02173     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
02174     while (Depth--) {
02175       DestReg = createResultReg(RC);
02176       addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02177                            TII.get(Opc), DestReg), SrcReg);
02178       SrcReg = DestReg;
02179     }
02180 
02181     updateValueMap(II, SrcReg);
02182     return true;
02183   }
02184   case Intrinsic::memcpy: {
02185     const MemCpyInst *MCI = cast<MemCpyInst>(II);
02186     // Don't handle volatile or variable length memcpys.
02187     if (MCI->isVolatile())
02188       return false;
02189 
02190     if (isa<ConstantInt>(MCI->getLength())) {
02191       // Small memcpy's are common enough that we want to do them
02192       // without a call if possible.
02193       uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
02194       if (IsMemcpySmall(Len)) {
02195         X86AddressMode DestAM, SrcAM;
02196         if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
02197             !X86SelectAddress(MCI->getRawSource(), SrcAM))
02198           return false;
02199         TryEmitSmallMemcpy(DestAM, SrcAM, Len);
02200         return true;
02201       }
02202     }
02203 
02204     unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
02205     if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
02206       return false;
02207 
02208     if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
02209       return false;
02210 
02211     return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 2);
02212   }
02213   case Intrinsic::memset: {
02214     const MemSetInst *MSI = cast<MemSetInst>(II);
02215 
02216     if (MSI->isVolatile())
02217       return false;
02218 
02219     unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
02220     if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
02221       return false;
02222 
02223     if (MSI->getDestAddressSpace() > 255)
02224       return false;
02225 
02226     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
02227   }
02228   case Intrinsic::stackprotector: {
02229     // Emit code to store the stack guard onto the stack.
02230     EVT PtrTy = TLI.getPointerTy();
02231 
02232     const Value *Op1 = II->getArgOperand(0); // The guard's value.
02233     const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
02234 
02235     MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
02236 
02237     // Grab the frame index.
02238     X86AddressMode AM;
02239     if (!X86SelectAddress(Slot, AM)) return false;
02240     if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
02241     return true;
02242   }
02243   case Intrinsic::dbg_declare: {
02244     const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
02245     X86AddressMode AM;
02246     assert(DI->getAddress() && "Null address should be checked earlier!");
02247     if (!X86SelectAddress(DI->getAddress(), AM))
02248       return false;
02249     const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
02250     // FIXME may need to add RegState::Debug to any registers produced,
02251     // although ESP/EBP should be the only ones at the moment.
02252     addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM).
02253       addImm(0).addMetadata(DI->getVariable());
02254     return true;
02255   }
02256   case Intrinsic::trap: {
02257     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
02258     return true;
02259   }
02260   case Intrinsic::sqrt: {
02261     if (!Subtarget->hasSSE1())
02262       return false;
02263 
02264     Type *RetTy = II->getCalledFunction()->getReturnType();
02265 
02266     MVT VT;
02267     if (!isTypeLegal(RetTy, VT))
02268       return false;
02269 
02270     // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
02271     // is not generated by FastISel yet.
02272     // FIXME: Update this code once tablegen can handle it.
02273     static const unsigned SqrtOpc[2][2] = {
02274       {X86::SQRTSSr, X86::VSQRTSSr},
02275       {X86::SQRTSDr, X86::VSQRTSDr}
02276     };
02277     bool HasAVX = Subtarget->hasAVX();
02278     unsigned Opc;
02279     const TargetRegisterClass *RC;
02280     switch (VT.SimpleTy) {
02281     default: return false;
02282     case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break;
02283     case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
02284     }
02285 
02286     const Value *SrcVal = II->getArgOperand(0);
02287     unsigned SrcReg = getRegForValue(SrcVal);
02288 
02289     if (SrcReg == 0)
02290       return false;
02291 
02292     unsigned ImplicitDefReg = 0;
02293     if (HasAVX) {
02294       ImplicitDefReg = createResultReg(RC);
02295       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02296               TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
02297     }
02298 
02299     unsigned ResultReg = createResultReg(RC);
02300     MachineInstrBuilder MIB;
02301     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
02302                   ResultReg);
02303 
02304     if (ImplicitDefReg)
02305       MIB.addReg(ImplicitDefReg);
02306 
02307     MIB.addReg(SrcReg);
02308 
02309     updateValueMap(II, ResultReg);
02310     return true;
02311   }
02312   case Intrinsic::sadd_with_overflow:
02313   case Intrinsic::uadd_with_overflow:
02314   case Intrinsic::ssub_with_overflow:
02315   case Intrinsic::usub_with_overflow:
02316   case Intrinsic::smul_with_overflow:
02317   case Intrinsic::umul_with_overflow: {
02318     // This implements the basic lowering of the xalu with overflow intrinsics
02319     // into add/sub/mul followed by either seto or setb.
02320     const Function *Callee = II->getCalledFunction();
02321     auto *Ty = cast<StructType>(Callee->getReturnType());
02322     Type *RetTy = Ty->getTypeAtIndex(0U);
02323     Type *CondTy = Ty->getTypeAtIndex(1);
02324 
02325     MVT VT;
02326     if (!isTypeLegal(RetTy, VT))
02327       return false;
02328 
02329     if (VT < MVT::i8 || VT > MVT::i64)
02330       return false;
02331 
02332     const Value *LHS = II->getArgOperand(0);
02333     const Value *RHS = II->getArgOperand(1);
02334 
02335     // Canonicalize immediate to the RHS.
02336     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
02337         isCommutativeIntrinsic(II))
02338       std::swap(LHS, RHS);
02339 
02340     bool UseIncDec = false;
02341     if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
02342       UseIncDec = true;
02343 
02344     unsigned BaseOpc, CondOpc;
02345     switch (II->getIntrinsicID()) {
02346     default: llvm_unreachable("Unexpected intrinsic!");
02347     case Intrinsic::sadd_with_overflow:
02348       BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
02349       CondOpc = X86::SETOr;
02350       break;
02351     case Intrinsic::uadd_with_overflow:
02352       BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
02353     case Intrinsic::ssub_with_overflow:
02354       BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
02355       CondOpc = X86::SETOr;
02356       break;
02357     case Intrinsic::usub_with_overflow:
02358       BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
02359     case Intrinsic::smul_with_overflow:
02360       BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
02361     case Intrinsic::umul_with_overflow:
02362       BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
02363     }
02364 
02365     unsigned LHSReg = getRegForValue(LHS);
02366     if (LHSReg == 0)
02367       return false;
02368     bool LHSIsKill = hasTrivialKill(LHS);
02369 
02370     unsigned ResultReg = 0;
02371     // Check if we have an immediate version.
02372     if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
02373       static const unsigned Opc[2][2][4] = {
02374         { { X86::INC8r, X86::INC16r,    X86::INC32r,    X86::INC64r },
02375           { X86::DEC8r, X86::DEC16r,    X86::DEC32r,    X86::DEC64r }  },
02376         { { X86::INC8r, X86::INC64_16r, X86::INC64_32r, X86::INC64r },
02377           { X86::DEC8r, X86::DEC64_16r, X86::DEC64_32r, X86::DEC64r }  }
02378       };
02379 
02380       if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
02381         ResultReg = createResultReg(TLI.getRegClassFor(VT));
02382         bool Is64Bit = Subtarget->is64Bit();
02383         bool IsDec = BaseOpc == X86ISD::DEC;
02384         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02385                 TII.get(Opc[Is64Bit][IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
02386           .addReg(LHSReg, getKillRegState(LHSIsKill));
02387       } else
02388         ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
02389                                 CI->getZExtValue());
02390     }
02391 
02392     unsigned RHSReg;
02393     bool RHSIsKill;
02394     if (!ResultReg) {
02395       RHSReg = getRegForValue(RHS);
02396       if (RHSReg == 0)
02397         return false;
02398       RHSIsKill = hasTrivialKill(RHS);
02399       ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
02400                               RHSIsKill);
02401     }
02402 
02403     // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
02404     // it manually.
02405     if (BaseOpc == X86ISD::UMUL && !ResultReg) {
02406       static const unsigned MULOpc[] =
02407         { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
02408       static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
02409       // First copy the first operand into RAX, which is an implicit input to
02410       // the X86::MUL*r instruction.
02411       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02412               TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
02413         .addReg(LHSReg, getKillRegState(LHSIsKill));
02414       ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
02415                                  TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
02416     } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
02417       static const unsigned MULOpc[] =
02418         { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
02419       if (VT == MVT::i8) {
02420         // Copy the first operand into AL, which is an implicit input to the
02421         // X86::IMUL8r instruction.
02422         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02423                TII.get(TargetOpcode::COPY), X86::AL)
02424           .addReg(LHSReg, getKillRegState(LHSIsKill));
02425         ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
02426                                    RHSIsKill);
02427       } else
02428         ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
02429                                     TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
02430                                     RHSReg, RHSIsKill);
02431     }
02432 
02433     if (!ResultReg)
02434       return false;
02435 
02436     unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
02437     assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
02438     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
02439             ResultReg2);
02440 
02441     updateValueMap(II, ResultReg, 2);
02442     return true;
02443   }
02444   case Intrinsic::x86_sse_cvttss2si:
02445   case Intrinsic::x86_sse_cvttss2si64:
02446   case Intrinsic::x86_sse2_cvttsd2si:
02447   case Intrinsic::x86_sse2_cvttsd2si64: {
02448     bool IsInputDouble;
02449     switch (II->getIntrinsicID()) {
02450     default: llvm_unreachable("Unexpected intrinsic.");
02451     case Intrinsic::x86_sse_cvttss2si:
02452     case Intrinsic::x86_sse_cvttss2si64:
02453       if (!Subtarget->hasSSE1())
02454         return false;
02455       IsInputDouble = false;
02456       break;
02457     case Intrinsic::x86_sse2_cvttsd2si:
02458     case Intrinsic::x86_sse2_cvttsd2si64:
02459       if (!Subtarget->hasSSE2())
02460         return false;
02461       IsInputDouble = true;
02462       break;
02463     }
02464 
02465     Type *RetTy = II->getCalledFunction()->getReturnType();
02466     MVT VT;
02467     if (!isTypeLegal(RetTy, VT))
02468       return false;
02469 
02470     static const unsigned CvtOpc[2][2][2] = {
02471       { { X86::CVTTSS2SIrr,   X86::VCVTTSS2SIrr   },
02472         { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr }  },
02473       { { X86::CVTTSD2SIrr,   X86::VCVTTSD2SIrr   },
02474         { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr }  }
02475     };
02476     bool HasAVX = Subtarget->hasAVX();
02477     unsigned Opc;
02478     switch (VT.SimpleTy) {
02479     default: llvm_unreachable("Unexpected result type.");
02480     case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break;
02481     case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break;
02482     }
02483 
02484     // Check if we can fold insertelement instructions into the convert.
02485     const Value *Op = II->getArgOperand(0);
02486     while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
02487       const Value *Index = IE->getOperand(2);
02488       if (!isa<ConstantInt>(Index))
02489         break;
02490       unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
02491 
02492       if (Idx == 0) {
02493         Op = IE->getOperand(1);
02494         break;
02495       }
02496       Op = IE->getOperand(0);
02497     }
02498 
02499     unsigned Reg = getRegForValue(Op);
02500     if (Reg == 0)
02501       return false;
02502 
02503     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
02504     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
02505       .addReg(Reg);
02506 
02507     updateValueMap(II, ResultReg);
02508     return true;
02509   }
02510   }
02511 }
02512 
02513 bool X86FastISel::fastLowerArguments() {
02514   if (!FuncInfo.CanLowerReturn)
02515     return false;
02516 
02517   const Function *F = FuncInfo.Fn;
02518   if (F->isVarArg())
02519     return false;
02520 
02521   CallingConv::ID CC = F->getCallingConv();
02522   if (CC != CallingConv::C)
02523     return false;
02524 
02525   if (Subtarget->isCallingConvWin64(CC))
02526     return false;
02527 
02528   if (!Subtarget->is64Bit())
02529     return false;
02530   
02531   // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
02532   unsigned GPRCnt = 0;
02533   unsigned FPRCnt = 0;
02534   unsigned Idx = 0;
02535   for (auto const &Arg : F->args()) {
02536     // The first argument is at index 1.
02537     ++Idx;
02538     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
02539         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
02540         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
02541         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
02542       return false;
02543 
02544     Type *ArgTy = Arg.getType();
02545     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
02546       return false;
02547 
02548     EVT ArgVT = TLI.getValueType(ArgTy);
02549     if (!ArgVT.isSimple()) return false;
02550     switch (ArgVT.getSimpleVT().SimpleTy) {
02551     default: return false;
02552     case MVT::i32:
02553     case MVT::i64:
02554       ++GPRCnt;
02555       break;
02556     case MVT::f32:
02557     case MVT::f64:
02558       if (!Subtarget->hasSSE1())
02559         return false;
02560       ++FPRCnt;
02561       break;
02562     }
02563 
02564     if (GPRCnt > 6)
02565       return false;
02566 
02567     if (FPRCnt > 8)
02568       return false;
02569   }
02570 
02571   static const MCPhysReg GPR32ArgRegs[] = {
02572     X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
02573   };
02574   static const MCPhysReg GPR64ArgRegs[] = {
02575     X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
02576   };
02577   static const MCPhysReg XMMArgRegs[] = {
02578     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
02579     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
02580   };
02581 
02582   unsigned GPRIdx = 0;
02583   unsigned FPRIdx = 0;
02584   for (auto const &Arg : F->args()) {
02585     MVT VT = TLI.getSimpleValueType(Arg.getType());
02586     const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
02587     unsigned SrcReg;
02588     switch (VT.SimpleTy) {
02589     default: llvm_unreachable("Unexpected value type.");
02590     case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
02591     case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
02592     case MVT::f32: // fall-through
02593     case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
02594     }
02595     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
02596     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
02597     // Without this, EmitLiveInCopies may eliminate the livein if its only
02598     // use is a bitcast (which isn't turned into an instruction).
02599     unsigned ResultReg = createResultReg(RC);
02600     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02601             TII.get(TargetOpcode::COPY), ResultReg)
02602       .addReg(DstReg, getKillRegState(true));
02603     updateValueMap(&Arg, ResultReg);
02604   }
02605   return true;
02606 }
02607 
02608 static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget,
02609                                            CallingConv::ID CC,
02610                                            ImmutableCallSite *CS) {
02611   if (Subtarget->is64Bit())
02612     return 0;
02613   if (Subtarget->getTargetTriple().isOSMSVCRT())
02614     return 0;
02615   if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
02616       CC == CallingConv::HiPE)
02617     return 0;
02618   if (CS && !CS->paramHasAttr(1, Attribute::StructRet))
02619     return 0;
02620   if (CS && CS->paramHasAttr(1, Attribute::InReg))
02621     return 0;
02622   return 4;
02623 }
02624 
02625 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
02626   auto &OutVals       = CLI.OutVals;
02627   auto &OutFlags      = CLI.OutFlags;
02628   auto &OutRegs       = CLI.OutRegs;
02629   auto &Ins           = CLI.Ins;
02630   auto &InRegs        = CLI.InRegs;
02631   CallingConv::ID CC  = CLI.CallConv;
02632   bool &IsTailCall    = CLI.IsTailCall;
02633   bool IsVarArg       = CLI.IsVarArg;
02634   const Value *Callee = CLI.Callee;
02635   const char *SymName = CLI.SymName;
02636 
02637   bool Is64Bit        = Subtarget->is64Bit();
02638   bool IsWin64        = Subtarget->isCallingConvWin64(CC);
02639 
02640   // Handle only C, fastcc, and webkit_js calling conventions for now.
02641   switch (CC) {
02642   default: return false;
02643   case CallingConv::C:
02644   case CallingConv::Fast:
02645   case CallingConv::WebKit_JS:
02646   case CallingConv::X86_FastCall:
02647   case CallingConv::X86_64_Win64:
02648   case CallingConv::X86_64_SysV:
02649     break;
02650   }
02651 
02652   // Allow SelectionDAG isel to handle tail calls.
02653   if (IsTailCall)
02654     return false;
02655 
02656   // fastcc with -tailcallopt is intended to provide a guaranteed
02657   // tail call optimization. Fastisel doesn't know how to do that.
02658   if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
02659     return false;
02660 
02661   // Don't know how to handle Win64 varargs yet.  Nothing special needed for
02662   // x86-32. Special handling for x86-64 is implemented.
02663   if (IsVarArg && IsWin64)
02664     return false;
02665 
02666   // Don't know about inalloca yet.
02667   if (CLI.CS && CLI.CS->hasInAllocaArgument())
02668     return false;
02669 
02670   // Fast-isel doesn't know about callee-pop yet.
02671   if (X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
02672                        TM.Options.GuaranteedTailCallOpt))
02673     return false;
02674 
02675   // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
02676   // instruction. This is safe because it is common to all FastISel supported
02677   // calling conventions on x86.
02678   for (int i = 0, e = OutVals.size(); i != e; ++i) {
02679     Value *&Val = OutVals[i];
02680     ISD::ArgFlagsTy Flags = OutFlags[i];
02681     if (auto *CI = dyn_cast<ConstantInt>(Val)) {
02682       if (CI->getBitWidth() < 32) {
02683         if (Flags.isSExt())
02684           Val = ConstantExpr::getSExt(CI, Type::getInt32Ty(CI->getContext()));
02685         else
02686           Val = ConstantExpr::getZExt(CI, Type::getInt32Ty(CI->getContext()));
02687       }
02688     }
02689 
02690     // Passing bools around ends up doing a trunc to i1 and passing it.
02691     // Codegen this as an argument + "and 1".
02692     if (auto *TI = dyn_cast<TruncInst>(Val)) {
02693       if (TI->getType()->isIntegerTy(1) && CLI.CS &&
02694           (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
02695           TI->hasOneUse()) {
02696         Val = cast<TruncInst>(Val)->getOperand(0);
02697         unsigned ResultReg = getRegForValue(Val);
02698 
02699         if (!ResultReg)
02700           return false;
02701 
02702         MVT ArgVT;
02703         if (!isTypeLegal(Val->getType(), ArgVT))
02704           return false;
02705 
02706         ResultReg =
02707           fastEmit_ri(ArgVT, ArgVT, ISD::AND, ResultReg, Val->hasOneUse(), 1);
02708 
02709         if (!ResultReg)
02710           return false;
02711         updateValueMap(Val, ResultReg);
02712       }
02713     }
02714   }
02715 
02716   // Analyze operands of the call, assigning locations to each operand.
02717   SmallVector<CCValAssign, 16> ArgLocs;
02718   CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
02719 
02720   // Allocate shadow area for Win64
02721   if (IsWin64)
02722     CCInfo.AllocateStack(32, 8);
02723 
02724   SmallVector<MVT, 16> OutVTs;
02725   for (auto *Val : OutVals) {
02726     MVT VT;
02727     if (!isTypeLegal(Val->getType(), VT))
02728       return false;
02729     OutVTs.push_back(VT);
02730   }
02731   CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
02732 
02733   // Get a count of how many bytes are to be pushed on the stack.
02734   unsigned NumBytes = CCInfo.getNextStackOffset();
02735 
02736   // Issue CALLSEQ_START
02737   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
02738   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
02739     .addImm(NumBytes);
02740 
02741   // Walk the register/memloc assignments, inserting copies/loads.
02742   const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
02743       TM.getSubtargetImpl()->getRegisterInfo());
02744   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02745     CCValAssign const &VA = ArgLocs[i];
02746     const Value *ArgVal = OutVals[VA.getValNo()];
02747     MVT ArgVT = OutVTs[VA.getValNo()];
02748 
02749     if (ArgVT == MVT::x86mmx)
02750       return false;
02751 
02752     unsigned ArgReg = getRegForValue(ArgVal);
02753     if (!ArgReg)
02754       return false;
02755 
02756     // Promote the value if needed.
02757     switch (VA.getLocInfo()) {
02758     case CCValAssign::Full: break;
02759     case CCValAssign::SExt: {
02760       assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
02761              "Unexpected extend");
02762       bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
02763                                        ArgVT, ArgReg);
02764       assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
02765       ArgVT = VA.getLocVT();
02766       break;
02767     }
02768     case CCValAssign::ZExt: {
02769       assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
02770              "Unexpected extend");
02771       bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
02772                                        ArgVT, ArgReg);
02773       assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
02774       ArgVT = VA.getLocVT();
02775       break;
02776     }
02777     case CCValAssign::AExt: {
02778       assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
02779              "Unexpected extend");
02780       bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
02781                                        ArgVT, ArgReg);
02782       if (!Emitted)
02783         Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
02784                                     ArgVT, ArgReg);
02785       if (!Emitted)
02786         Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
02787                                     ArgVT, ArgReg);
02788 
02789       assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
02790       ArgVT = VA.getLocVT();
02791       break;
02792     }
02793     case CCValAssign::BCvt: {
02794       ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
02795                           /*TODO: Kill=*/false);
02796       assert(ArgReg && "Failed to emit a bitcast!");
02797       ArgVT = VA.getLocVT();
02798       break;
02799     }
02800     case CCValAssign::VExt:
02801       // VExt has not been implemented, so this should be impossible to reach
02802       // for now.  However, fallback to Selection DAG isel once implemented.
02803       return false;
02804     case CCValAssign::FPExt:
02805       llvm_unreachable("Unexpected loc info!");
02806     case CCValAssign::Indirect:
02807       // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
02808       // support this.
02809       return false;
02810     }
02811 
02812     if (VA.isRegLoc()) {
02813       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02814               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
02815       OutRegs.push_back(VA.getLocReg());
02816     } else {
02817       assert(VA.isMemLoc());
02818 
02819       // Don't emit stores for undef values.
02820       if (isa<UndefValue>(ArgVal))
02821         continue;
02822 
02823       unsigned LocMemOffset = VA.getLocMemOffset();
02824       X86AddressMode AM;
02825       AM.Base.Reg = RegInfo->getStackRegister();
02826       AM.Disp = LocMemOffset;
02827       ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
02828       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
02829       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
02830         MachinePointerInfo::getStack(LocMemOffset), MachineMemOperand::MOStore,
02831         ArgVT.getStoreSize(), Alignment);
02832       if (Flags.isByVal()) {
02833         X86AddressMode SrcAM;
02834         SrcAM.Base.Reg = ArgReg;
02835         if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
02836           return false;
02837       } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
02838         // If this is a really simple value, emit this with the Value* version
02839         // of X86FastEmitStore.  If it isn't simple, we don't want to do this,
02840         // as it can cause us to reevaluate the argument.
02841         if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
02842           return false;
02843       } else {
02844         bool ValIsKill = hasTrivialKill(ArgVal);
02845         if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
02846           return false;
02847       }
02848     }
02849   }
02850 
02851   // ELF / PIC requires GOT in the EBX register before function calls via PLT
02852   // GOT pointer.
02853   if (Subtarget->isPICStyleGOT()) {
02854     unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
02855     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02856             TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
02857   }
02858 
02859   if (Is64Bit && IsVarArg && !IsWin64) {
02860     // From AMD64 ABI document:
02861     // For calls that may call functions that use varargs or stdargs
02862     // (prototype-less calls or calls to functions containing ellipsis (...) in
02863     // the declaration) %al is used as hidden argument to specify the number
02864     // of SSE registers used. The contents of %al do not need to match exactly
02865     // the number of registers, but must be an ubound on the number of SSE
02866     // registers used and is in the range 0 - 8 inclusive.
02867 
02868     // Count the number of XMM registers allocated.
02869     static const MCPhysReg XMMArgRegs[] = {
02870       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
02871       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
02872     };
02873     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
02874     assert((Subtarget->hasSSE1() || !NumXMMRegs)
02875            && "SSE registers cannot be used when SSE is disabled");
02876     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
02877             X86::AL).addImm(NumXMMRegs);
02878   }
02879 
02880   // Materialize callee address in a register. FIXME: GV address can be
02881   // handled with a CALLpcrel32 instead.
02882   X86AddressMode CalleeAM;
02883   if (!X86SelectCallAddress(Callee, CalleeAM))
02884     return false;
02885 
02886   unsigned CalleeOp = 0;
02887   const GlobalValue *GV = nullptr;
02888   if (CalleeAM.GV != nullptr) {
02889     GV = CalleeAM.GV;
02890   } else if (CalleeAM.Base.Reg != 0) {
02891     CalleeOp = CalleeAM.Base.Reg;
02892   } else
02893     return false;
02894 
02895   // Issue the call.
02896   MachineInstrBuilder MIB;
02897   if (CalleeOp) {
02898     // Register-indirect call.
02899     unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
02900     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
02901       .addReg(CalleeOp);
02902   } else {
02903     // Direct call.
02904     assert(GV && "Not a direct call");
02905     unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
02906 
02907     // See if we need any target-specific flags on the GV operand.
02908     unsigned char OpFlags = 0;
02909 
02910     // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
02911     // external symbols most go through the PLT in PIC mode.  If the symbol
02912     // has hidden or protected visibility, or if it is static or local, then
02913     // we don't need to use the PLT - we can directly call it.
02914     if (Subtarget->isTargetELF() &&
02915         TM.getRelocationModel() == Reloc::PIC_ &&
02916         GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
02917       OpFlags = X86II::MO_PLT;
02918     } else if (Subtarget->isPICStyleStubAny() &&
02919                (GV->isDeclaration() || GV->isWeakForLinker()) &&
02920                (!Subtarget->getTargetTriple().isMacOSX() ||
02921                 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
02922       // PC-relative references to external symbols should go through $stub,
02923       // unless we're building with the leopard linker or later, which
02924       // automatically synthesizes these stubs.
02925       OpFlags = X86II::MO_DARWIN_STUB;
02926     }
02927 
02928     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
02929     if (SymName)
02930       MIB.addExternalSymbol(SymName, OpFlags);
02931     else
02932       MIB.addGlobalAddress(GV, 0, OpFlags);
02933   }
02934 
02935   // Add a register mask operand representing the call-preserved registers.
02936   // Proper defs for return values will be added by setPhysRegsDeadExcept().
02937   MIB.addRegMask(TRI.getCallPreservedMask(CC));
02938 
02939   // Add an implicit use GOT pointer in EBX.
02940   if (Subtarget->isPICStyleGOT())
02941     MIB.addReg(X86::EBX, RegState::Implicit);
02942 
02943   if (Is64Bit && IsVarArg && !IsWin64)
02944     MIB.addReg(X86::AL, RegState::Implicit);
02945 
02946   // Add implicit physical register uses to the call.
02947   for (auto Reg : OutRegs)
02948     MIB.addReg(Reg, RegState::Implicit);
02949 
02950   // Issue CALLSEQ_END
02951   unsigned NumBytesForCalleeToPop =
02952     computeBytesPoppedByCallee(Subtarget, CC, CLI.CS);
02953   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
02954   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
02955     .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
02956 
02957   // Now handle call return values.
02958   SmallVector<CCValAssign, 16> RVLocs;
02959   CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
02960                     CLI.RetTy->getContext());
02961   CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
02962 
02963   // Copy all of the result registers out of their specified physreg.
02964   unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
02965   for (unsigned i = 0; i != RVLocs.size(); ++i) {
02966     CCValAssign &VA = RVLocs[i];
02967     EVT CopyVT = VA.getValVT();
02968     unsigned CopyReg = ResultReg + i;
02969 
02970     // If this is x86-64, and we disabled SSE, we can't return FP values
02971     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
02972         ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
02973       report_fatal_error("SSE register return with SSE disabled");
02974     }
02975 
02976     // If we prefer to use the value in xmm registers, copy it out as f80 and
02977     // use a truncate to move it from fp stack reg to xmm reg.
02978     if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
02979         isScalarFPTypeInSSEReg(VA.getValVT())) {
02980       CopyVT = MVT::f80;
02981       CopyReg = createResultReg(&X86::RFP80RegClass);
02982     }
02983 
02984     // Copy out the result.
02985     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02986             TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg());
02987     InRegs.push_back(VA.getLocReg());
02988 
02989     // Round the f80 to the right size, which also moves it to the appropriate
02990     // xmm register. This is accomplished by storing the f80 value in memory
02991     // and then loading it back.
02992     if (CopyVT != VA.getValVT()) {
02993       EVT ResVT = VA.getValVT();
02994       unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
02995       unsigned MemSize = ResVT.getSizeInBits()/8;
02996       int FI = MFI.CreateStackObject(MemSize, MemSize, false);
02997       addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
02998                                 TII.get(Opc)), FI)
02999         .addReg(CopyReg);
03000       Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
03001       addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03002                                 TII.get(Opc), ResultReg + i), FI);
03003     }
03004   }
03005 
03006   CLI.ResultReg = ResultReg;
03007   CLI.NumResultRegs = RVLocs.size();
03008   CLI.Call = MIB;
03009 
03010   return true;
03011 }
03012 
03013 bool
03014 X86FastISel::fastSelectInstruction(const Instruction *I)  {
03015   switch (I->getOpcode()) {
03016   default: break;
03017   case Instruction::Load:
03018     return X86SelectLoad(I);
03019   case Instruction::Store:
03020     return X86SelectStore(I);
03021   case Instruction::Ret:
03022     return X86SelectRet(I);
03023   case Instruction::ICmp:
03024   case Instruction::FCmp:
03025     return X86SelectCmp(I);
03026   case Instruction::ZExt:
03027     return X86SelectZExt(I);
03028   case Instruction::Br:
03029     return X86SelectBranch(I);
03030   case Instruction::LShr:
03031   case Instruction::AShr:
03032   case Instruction::Shl:
03033     return X86SelectShift(I);
03034   case Instruction::SDiv:
03035   case Instruction::UDiv:
03036   case Instruction::SRem:
03037   case Instruction::URem:
03038     return X86SelectDivRem(I);
03039   case Instruction::Select:
03040     return X86SelectSelect(I);
03041   case Instruction::Trunc:
03042     return X86SelectTrunc(I);
03043   case Instruction::FPExt:
03044     return X86SelectFPExt(I);
03045   case Instruction::FPTrunc:
03046     return X86SelectFPTrunc(I);
03047   case Instruction::IntToPtr: // Deliberate fall-through.
03048   case Instruction::PtrToInt: {
03049     EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
03050     EVT DstVT = TLI.getValueType(I->getType());
03051     if (DstVT.bitsGT(SrcVT))
03052       return X86SelectZExt(I);
03053     if (DstVT.bitsLT(SrcVT))
03054       return X86SelectTrunc(I);
03055     unsigned Reg = getRegForValue(I->getOperand(0));
03056     if (Reg == 0) return false;
03057     updateValueMap(I, Reg);
03058     return true;
03059   }
03060   }
03061 
03062   return false;
03063 }
03064 
03065 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
03066   if (VT > MVT::i64)
03067     return 0;
03068 
03069   uint64_t Imm = CI->getZExtValue();
03070   if (Imm == 0) {
03071     unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
03072     switch (VT.SimpleTy) {
03073     default: llvm_unreachable("Unexpected value type");
03074     case MVT::i1:
03075     case MVT::i8:
03076       return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
03077                                         X86::sub_8bit);
03078     case MVT::i16:
03079       return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
03080                                         X86::sub_16bit);
03081     case MVT::i32:
03082       return SrcReg;
03083     case MVT::i64: {
03084       unsigned ResultReg = createResultReg(&X86::GR64RegClass);
03085       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03086               TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
03087         .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
03088       return ResultReg;
03089     }
03090     }
03091   }
03092 
03093   unsigned Opc = 0;
03094   switch (VT.SimpleTy) {
03095   default: llvm_unreachable("Unexpected value type");
03096   case MVT::i1:  VT = MVT::i8; // fall-through
03097   case MVT::i8:  Opc = X86::MOV8ri;  break;
03098   case MVT::i16: Opc = X86::MOV16ri; break;
03099   case MVT::i32: Opc = X86::MOV32ri; break;
03100   case MVT::i64: {
03101     if (isUInt<32>(Imm))
03102       Opc = X86::MOV32ri;
03103     else if (isInt<32>(Imm))
03104       Opc = X86::MOV64ri32;
03105     else
03106       Opc = X86::MOV64ri;
03107     break;
03108   }
03109   }
03110   if (VT == MVT::i64 && Opc == X86::MOV32ri) {
03111     unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm);
03112     unsigned ResultReg = createResultReg(&X86::GR64RegClass);
03113     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03114             TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
03115       .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
03116     return ResultReg;
03117   }
03118   return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
03119 }
03120 
03121 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
03122   if (CFP->isNullValue())
03123     return fastMaterializeFloatZero(CFP);
03124 
03125   // Can't handle alternate code models yet.
03126   CodeModel::Model CM = TM.getCodeModel();
03127   if (CM != CodeModel::Small && CM != CodeModel::Large)
03128     return 0;
03129 
03130   // Get opcode and regclass of the output for the given load instruction.
03131   unsigned Opc = 0;
03132   const TargetRegisterClass *RC = nullptr;
03133   switch (VT.SimpleTy) {
03134   default: return 0;
03135   case MVT::f32:
03136     if (X86ScalarSSEf32) {
03137       Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
03138       RC  = &X86::FR32RegClass;
03139     } else {
03140       Opc = X86::LD_Fp32m;
03141       RC  = &X86::RFP32RegClass;
03142     }
03143     break;
03144   case MVT::f64:
03145     if (X86ScalarSSEf64) {
03146       Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
03147       RC  = &X86::FR64RegClass;
03148     } else {
03149       Opc = X86::LD_Fp64m;
03150       RC  = &X86::RFP64RegClass;
03151     }
03152     break;
03153   case MVT::f80:
03154     // No f80 support yet.
03155     return 0;
03156   }
03157 
03158   // MachineConstantPool wants an explicit alignment.
03159   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
03160   if (Align == 0) {
03161     // Alignment of vector types. FIXME!
03162     Align = DL.getTypeAllocSize(CFP->getType());
03163   }
03164 
03165   // x86-32 PIC requires a PIC base register for constant pools.
03166   unsigned PICBase = 0;
03167   unsigned char OpFlag = 0;
03168   if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
03169     OpFlag = X86II::MO_PIC_BASE_OFFSET;
03170     PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
03171   } else if (Subtarget->isPICStyleGOT()) {
03172     OpFlag = X86II::MO_GOTOFF;
03173     PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
03174   } else if (Subtarget->isPICStyleRIPRel() &&
03175              TM.getCodeModel() == CodeModel::Small) {
03176     PICBase = X86::RIP;
03177   }
03178 
03179   // Create the load from the constant pool.
03180   unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
03181   unsigned ResultReg = createResultReg(RC);
03182 
03183   if (CM == CodeModel::Large) {
03184     unsigned AddrReg = createResultReg(&X86::GR64RegClass);
03185     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
03186             AddrReg)
03187       .addConstantPoolIndex(CPI, 0, OpFlag);
03188     MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03189                                       TII.get(Opc), ResultReg);
03190     addDirectMem(MIB, AddrReg);
03191     MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
03192       MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
03193       TM.getSubtargetImpl()->getDataLayout()->getPointerSize(), Align);
03194     MIB->addMemOperand(*FuncInfo.MF, MMO);
03195     return ResultReg;
03196   }
03197 
03198   addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03199                                    TII.get(Opc), ResultReg),
03200                            CPI, PICBase, OpFlag);
03201   return ResultReg;
03202 }
03203 
03204 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
03205   // Can't handle alternate code models yet.
03206   if (TM.getCodeModel() != CodeModel::Small)
03207     return 0;
03208 
03209   // Materialize addresses with LEA/MOV instructions.
03210   X86AddressMode AM;
03211   if (X86SelectAddress(GV, AM)) {
03212     // If the expression is just a basereg, then we're done, otherwise we need
03213     // to emit an LEA.
03214     if (AM.BaseType == X86AddressMode::RegBase &&
03215         AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
03216       return AM.Base.Reg;
03217 
03218     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
03219     if (TM.getRelocationModel() == Reloc::Static &&
03220         TLI.getPointerTy() == MVT::i64) {
03221       // The displacement code could be more than 32 bits away so we need to use
03222       // an instruction with a 64 bit immediate
03223       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
03224               ResultReg)
03225         .addGlobalAddress(GV);
03226     } else {
03227       unsigned Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
03228       addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03229                              TII.get(Opc), ResultReg), AM);
03230     }
03231     return ResultReg;
03232   }
03233   return 0;
03234 }
03235 
03236 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
03237   EVT CEVT = TLI.getValueType(C->getType(), true);
03238 
03239   // Only handle simple types.
03240   if (!CEVT.isSimple())
03241     return 0;
03242   MVT VT = CEVT.getSimpleVT();
03243 
03244   if (const auto *CI = dyn_cast<ConstantInt>(C))
03245     return X86MaterializeInt(CI, VT);
03246   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
03247     return X86MaterializeFP(CFP, VT);
03248   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
03249     return X86MaterializeGV(GV, VT);
03250 
03251   return 0;
03252 }
03253 
03254 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
03255   // Fail on dynamic allocas. At this point, getRegForValue has already
03256   // checked its CSE maps, so if we're here trying to handle a dynamic
03257   // alloca, we're not going to succeed. X86SelectAddress has a
03258   // check for dynamic allocas, because it's called directly from
03259   // various places, but targetMaterializeAlloca also needs a check
03260   // in order to avoid recursion between getRegForValue,
03261   // X86SelectAddrss, and targetMaterializeAlloca.
03262   if (!FuncInfo.StaticAllocaMap.count(C))
03263     return 0;
03264   assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
03265 
03266   X86AddressMode AM;
03267   if (!X86SelectAddress(C, AM))
03268     return 0;
03269   unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
03270   const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
03271   unsigned ResultReg = createResultReg(RC);
03272   addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
03273                          TII.get(Opc), ResultReg), AM);
03274   return ResultReg;
03275 }
03276 
03277 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
03278   MVT VT;
03279   if (!isTypeLegal(CF->getType(), VT))
03280     return 0;
03281 
03282   // Get opcode and regclass for the given zero.
03283   unsigned Opc = 0;
03284   const TargetRegisterClass *RC = nullptr;
03285   switch (VT.SimpleTy) {
03286   default: return 0;
03287   case MVT::f32:
03288     if (X86ScalarSSEf32) {
03289       Opc = X86::FsFLD0SS;
03290       RC  = &X86::FR32RegClass;
03291     } else {
03292       Opc = X86::LD_Fp032;
03293       RC  = &X86::RFP32RegClass;
03294     }
03295     break;
03296   case MVT::f64:
03297     if (X86ScalarSSEf64) {
03298       Opc = X86::FsFLD0SD;
03299       RC  = &X86::FR64RegClass;
03300     } else {
03301       Opc = X86::LD_Fp064;
03302       RC  = &X86::RFP64RegClass;
03303     }
03304     break;
03305   case MVT::f80:
03306     // No f80 support yet.
03307     return 0;
03308   }
03309 
03310   unsigned ResultReg = createResultReg(RC);
03311   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
03312   return ResultReg;
03313 }
03314 
03315 
03316 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
03317                                       const LoadInst *LI) {
03318   const Value *Ptr = LI->getPointerOperand();
03319   X86AddressMode AM;
03320   if (!X86SelectAddress(Ptr, AM))
03321     return false;
03322 
03323   const X86InstrInfo &XII = (const X86InstrInfo&)TII;
03324 
03325   unsigned Size = DL.getTypeAllocSize(LI->getType());
03326   unsigned Alignment = LI->getAlignment();
03327 
03328   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
03329     Alignment = DL.getABITypeAlignment(LI->getType());
03330 
03331   SmallVector<MachineOperand, 8> AddrOps;
03332   AM.getFullAddress(AddrOps);
03333 
03334   MachineInstr *Result =
03335     XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
03336   if (!Result)
03337     return false;
03338 
03339   Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
03340   FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
03341   MI->eraseFromParent();
03342   return true;
03343 }
03344 
03345 
03346 namespace llvm {
03347   FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
03348                                 const TargetLibraryInfo *libInfo) {
03349     return new X86FastISel(funcInfo, libInfo);
03350   }
03351 }