LLVM API Documentation
00001 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines the X86-specific support for the FastISel class. Much 00011 // of the target-specific code is generated by tablegen in the file 00012 // X86GenFastISel.inc, which is #included here. 00013 // 00014 //===----------------------------------------------------------------------===// 00015 00016 #include "X86.h" 00017 #include "X86CallingConv.h" 00018 #include "X86InstrBuilder.h" 00019 #include "X86InstrInfo.h" 00020 #include "X86MachineFunctionInfo.h" 00021 #include "X86RegisterInfo.h" 00022 #include "X86Subtarget.h" 00023 #include "X86TargetMachine.h" 00024 #include "llvm/Analysis/BranchProbabilityInfo.h" 00025 #include "llvm/CodeGen/Analysis.h" 00026 #include "llvm/CodeGen/FastISel.h" 00027 #include "llvm/CodeGen/FunctionLoweringInfo.h" 00028 #include "llvm/CodeGen/MachineConstantPool.h" 00029 #include "llvm/CodeGen/MachineFrameInfo.h" 00030 #include "llvm/CodeGen/MachineRegisterInfo.h" 00031 #include "llvm/IR/CallSite.h" 00032 #include "llvm/IR/CallingConv.h" 00033 #include "llvm/IR/DerivedTypes.h" 00034 #include "llvm/IR/GetElementPtrTypeIterator.h" 00035 #include "llvm/IR/GlobalAlias.h" 00036 #include "llvm/IR/GlobalVariable.h" 00037 #include "llvm/IR/Instructions.h" 00038 #include "llvm/IR/IntrinsicInst.h" 00039 #include "llvm/IR/Operator.h" 00040 #include "llvm/Support/ErrorHandling.h" 00041 #include "llvm/Target/TargetOptions.h" 00042 using namespace llvm; 00043 00044 namespace { 00045 00046 class X86FastISel final : public FastISel { 00047 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 00048 /// make the right decision when generating code for different targets. 00049 const X86Subtarget *Subtarget; 00050 00051 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 00052 /// floating point ops. 00053 /// When SSE is available, use it for f32 operations. 00054 /// When SSE2 is available, use it for f64 operations. 00055 bool X86ScalarSSEf64; 00056 bool X86ScalarSSEf32; 00057 00058 public: 00059 explicit X86FastISel(FunctionLoweringInfo &funcInfo, 00060 const TargetLibraryInfo *libInfo) 00061 : FastISel(funcInfo, libInfo) { 00062 Subtarget = &TM.getSubtarget<X86Subtarget>(); 00063 X86ScalarSSEf64 = Subtarget->hasSSE2(); 00064 X86ScalarSSEf32 = Subtarget->hasSSE1(); 00065 } 00066 00067 bool fastSelectInstruction(const Instruction *I) override; 00068 00069 /// \brief The specified machine instr operand is a vreg, and that 00070 /// vreg is being provided by the specified load instruction. If possible, 00071 /// try to fold the load as an operand to the instruction, returning true if 00072 /// possible. 00073 bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, 00074 const LoadInst *LI) override; 00075 00076 bool fastLowerArguments() override; 00077 bool fastLowerCall(CallLoweringInfo &CLI) override; 00078 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 00079 00080 #include "X86GenFastISel.inc" 00081 00082 private: 00083 bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT); 00084 00085 bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO, 00086 unsigned &ResultReg); 00087 00088 bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM, 00089 MachineMemOperand *MMO = nullptr, bool Aligned = false); 00090 bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, 00091 const X86AddressMode &AM, 00092 MachineMemOperand *MMO = nullptr, bool Aligned = false); 00093 00094 bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, 00095 unsigned &ResultReg); 00096 00097 bool X86SelectAddress(const Value *V, X86AddressMode &AM); 00098 bool X86SelectCallAddress(const Value *V, X86AddressMode &AM); 00099 00100 bool X86SelectLoad(const Instruction *I); 00101 00102 bool X86SelectStore(const Instruction *I); 00103 00104 bool X86SelectRet(const Instruction *I); 00105 00106 bool X86SelectCmp(const Instruction *I); 00107 00108 bool X86SelectZExt(const Instruction *I); 00109 00110 bool X86SelectBranch(const Instruction *I); 00111 00112 bool X86SelectShift(const Instruction *I); 00113 00114 bool X86SelectDivRem(const Instruction *I); 00115 00116 bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I); 00117 00118 bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I); 00119 00120 bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I); 00121 00122 bool X86SelectSelect(const Instruction *I); 00123 00124 bool X86SelectTrunc(const Instruction *I); 00125 00126 bool X86SelectFPExt(const Instruction *I); 00127 bool X86SelectFPTrunc(const Instruction *I); 00128 00129 const X86InstrInfo *getInstrInfo() const { 00130 return getTargetMachine()->getSubtargetImpl()->getInstrInfo(); 00131 } 00132 const X86TargetMachine *getTargetMachine() const { 00133 return static_cast<const X86TargetMachine *>(&TM); 00134 } 00135 00136 bool handleConstantAddresses(const Value *V, X86AddressMode &AM); 00137 00138 unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT); 00139 unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT); 00140 unsigned X86MaterializeGV(const GlobalValue *GV,MVT VT); 00141 unsigned fastMaterializeConstant(const Constant *C) override; 00142 00143 unsigned fastMaterializeAlloca(const AllocaInst *C) override; 00144 00145 unsigned fastMaterializeFloatZero(const ConstantFP *CF) override; 00146 00147 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is 00148 /// computed in an SSE register, not on the X87 floating point stack. 00149 bool isScalarFPTypeInSSEReg(EVT VT) const { 00150 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 00151 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 00152 } 00153 00154 bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false); 00155 00156 bool IsMemcpySmall(uint64_t Len); 00157 00158 bool TryEmitSmallMemcpy(X86AddressMode DestAM, 00159 X86AddressMode SrcAM, uint64_t Len); 00160 00161 bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, 00162 const Value *Cond); 00163 }; 00164 00165 } // end anonymous namespace. 00166 00167 static std::pair<X86::CondCode, bool> 00168 getX86ConditionCode(CmpInst::Predicate Predicate) { 00169 X86::CondCode CC = X86::COND_INVALID; 00170 bool NeedSwap = false; 00171 switch (Predicate) { 00172 default: break; 00173 // Floating-point Predicates 00174 case CmpInst::FCMP_UEQ: CC = X86::COND_E; break; 00175 case CmpInst::FCMP_OLT: NeedSwap = true; // fall-through 00176 case CmpInst::FCMP_OGT: CC = X86::COND_A; break; 00177 case CmpInst::FCMP_OLE: NeedSwap = true; // fall-through 00178 case CmpInst::FCMP_OGE: CC = X86::COND_AE; break; 00179 case CmpInst::FCMP_UGT: NeedSwap = true; // fall-through 00180 case CmpInst::FCMP_ULT: CC = X86::COND_B; break; 00181 case CmpInst::FCMP_UGE: NeedSwap = true; // fall-through 00182 case CmpInst::FCMP_ULE: CC = X86::COND_BE; break; 00183 case CmpInst::FCMP_ONE: CC = X86::COND_NE; break; 00184 case CmpInst::FCMP_UNO: CC = X86::COND_P; break; 00185 case CmpInst::FCMP_ORD: CC = X86::COND_NP; break; 00186 case CmpInst::FCMP_OEQ: // fall-through 00187 case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break; 00188 00189 // Integer Predicates 00190 case CmpInst::ICMP_EQ: CC = X86::COND_E; break; 00191 case CmpInst::ICMP_NE: CC = X86::COND_NE; break; 00192 case CmpInst::ICMP_UGT: CC = X86::COND_A; break; 00193 case CmpInst::ICMP_UGE: CC = X86::COND_AE; break; 00194 case CmpInst::ICMP_ULT: CC = X86::COND_B; break; 00195 case CmpInst::ICMP_ULE: CC = X86::COND_BE; break; 00196 case CmpInst::ICMP_SGT: CC = X86::COND_G; break; 00197 case CmpInst::ICMP_SGE: CC = X86::COND_GE; break; 00198 case CmpInst::ICMP_SLT: CC = X86::COND_L; break; 00199 case CmpInst::ICMP_SLE: CC = X86::COND_LE; break; 00200 } 00201 00202 return std::make_pair(CC, NeedSwap); 00203 } 00204 00205 static std::pair<unsigned, bool> 00206 getX86SSEConditionCode(CmpInst::Predicate Predicate) { 00207 unsigned CC; 00208 bool NeedSwap = false; 00209 00210 // SSE Condition code mapping: 00211 // 0 - EQ 00212 // 1 - LT 00213 // 2 - LE 00214 // 3 - UNORD 00215 // 4 - NEQ 00216 // 5 - NLT 00217 // 6 - NLE 00218 // 7 - ORD 00219 switch (Predicate) { 00220 default: llvm_unreachable("Unexpected predicate"); 00221 case CmpInst::FCMP_OEQ: CC = 0; break; 00222 case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through 00223 case CmpInst::FCMP_OLT: CC = 1; break; 00224 case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through 00225 case CmpInst::FCMP_OLE: CC = 2; break; 00226 case CmpInst::FCMP_UNO: CC = 3; break; 00227 case CmpInst::FCMP_UNE: CC = 4; break; 00228 case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through 00229 case CmpInst::FCMP_UGE: CC = 5; break; 00230 case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through 00231 case CmpInst::FCMP_UGT: CC = 6; break; 00232 case CmpInst::FCMP_ORD: CC = 7; break; 00233 case CmpInst::FCMP_UEQ: 00234 case CmpInst::FCMP_ONE: CC = 8; break; 00235 } 00236 00237 return std::make_pair(CC, NeedSwap); 00238 } 00239 00240 /// \brief Check if it is possible to fold the condition from the XALU intrinsic 00241 /// into the user. The condition code will only be updated on success. 00242 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, 00243 const Value *Cond) { 00244 if (!isa<ExtractValueInst>(Cond)) 00245 return false; 00246 00247 const auto *EV = cast<ExtractValueInst>(Cond); 00248 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 00249 return false; 00250 00251 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 00252 MVT RetVT; 00253 const Function *Callee = II->getCalledFunction(); 00254 Type *RetTy = 00255 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 00256 if (!isTypeLegal(RetTy, RetVT)) 00257 return false; 00258 00259 if (RetVT != MVT::i32 && RetVT != MVT::i64) 00260 return false; 00261 00262 X86::CondCode TmpCC; 00263 switch (II->getIntrinsicID()) { 00264 default: return false; 00265 case Intrinsic::sadd_with_overflow: 00266 case Intrinsic::ssub_with_overflow: 00267 case Intrinsic::smul_with_overflow: 00268 case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break; 00269 case Intrinsic::uadd_with_overflow: 00270 case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break; 00271 } 00272 00273 // Check if both instructions are in the same basic block. 00274 if (II->getParent() != I->getParent()) 00275 return false; 00276 00277 // Make sure nothing is in the way 00278 BasicBlock::const_iterator Start = I; 00279 BasicBlock::const_iterator End = II; 00280 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 00281 // We only expect extractvalue instructions between the intrinsic and the 00282 // instruction to be selected. 00283 if (!isa<ExtractValueInst>(Itr)) 00284 return false; 00285 00286 // Check that the extractvalue operand comes from the intrinsic. 00287 const auto *EVI = cast<ExtractValueInst>(Itr); 00288 if (EVI->getAggregateOperand() != II) 00289 return false; 00290 } 00291 00292 CC = TmpCC; 00293 return true; 00294 } 00295 00296 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { 00297 EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true); 00298 if (evt == MVT::Other || !evt.isSimple()) 00299 // Unhandled type. Halt "fast" selection and bail. 00300 return false; 00301 00302 VT = evt.getSimpleVT(); 00303 // For now, require SSE/SSE2 for performing floating-point operations, 00304 // since x87 requires additional work. 00305 if (VT == MVT::f64 && !X86ScalarSSEf64) 00306 return false; 00307 if (VT == MVT::f32 && !X86ScalarSSEf32) 00308 return false; 00309 // Similarly, no f80 support yet. 00310 if (VT == MVT::f80) 00311 return false; 00312 // We only handle legal types. For example, on x86-32 the instruction 00313 // selector contains all of the 64-bit instructions from x86-64, 00314 // under the assumption that i64 won't be used if the target doesn't 00315 // support it. 00316 return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT); 00317 } 00318 00319 #include "X86GenCallingConv.inc" 00320 00321 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. 00322 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. 00323 /// Return true and the result register by reference if it is possible. 00324 bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, 00325 MachineMemOperand *MMO, unsigned &ResultReg) { 00326 // Get opcode and regclass of the output for the given load instruction. 00327 unsigned Opc = 0; 00328 const TargetRegisterClass *RC = nullptr; 00329 switch (VT.getSimpleVT().SimpleTy) { 00330 default: return false; 00331 case MVT::i1: 00332 case MVT::i8: 00333 Opc = X86::MOV8rm; 00334 RC = &X86::GR8RegClass; 00335 break; 00336 case MVT::i16: 00337 Opc = X86::MOV16rm; 00338 RC = &X86::GR16RegClass; 00339 break; 00340 case MVT::i32: 00341 Opc = X86::MOV32rm; 00342 RC = &X86::GR32RegClass; 00343 break; 00344 case MVT::i64: 00345 // Must be in x86-64 mode. 00346 Opc = X86::MOV64rm; 00347 RC = &X86::GR64RegClass; 00348 break; 00349 case MVT::f32: 00350 if (X86ScalarSSEf32) { 00351 Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; 00352 RC = &X86::FR32RegClass; 00353 } else { 00354 Opc = X86::LD_Fp32m; 00355 RC = &X86::RFP32RegClass; 00356 } 00357 break; 00358 case MVT::f64: 00359 if (X86ScalarSSEf64) { 00360 Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; 00361 RC = &X86::FR64RegClass; 00362 } else { 00363 Opc = X86::LD_Fp64m; 00364 RC = &X86::RFP64RegClass; 00365 } 00366 break; 00367 case MVT::f80: 00368 // No f80 support yet. 00369 return false; 00370 } 00371 00372 ResultReg = createResultReg(RC); 00373 MachineInstrBuilder MIB = 00374 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); 00375 addFullAddress(MIB, AM); 00376 if (MMO) 00377 MIB->addMemOperand(*FuncInfo.MF, MMO); 00378 return true; 00379 } 00380 00381 /// X86FastEmitStore - Emit a machine instruction to store a value Val of 00382 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr 00383 /// and a displacement offset, or a GlobalAddress, 00384 /// i.e. V. Return true if it is possible. 00385 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, 00386 const X86AddressMode &AM, 00387 MachineMemOperand *MMO, bool Aligned) { 00388 // Get opcode and regclass of the output for the given store instruction. 00389 unsigned Opc = 0; 00390 switch (VT.getSimpleVT().SimpleTy) { 00391 case MVT::f80: // No f80 support yet. 00392 default: return false; 00393 case MVT::i1: { 00394 // Mask out all but lowest bit. 00395 unsigned AndResult = createResultReg(&X86::GR8RegClass); 00396 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 00397 TII.get(X86::AND8ri), AndResult) 00398 .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1); 00399 ValReg = AndResult; 00400 } 00401 // FALLTHROUGH, handling i1 as i8. 00402 case MVT::i8: Opc = X86::MOV8mr; break; 00403 case MVT::i16: Opc = X86::MOV16mr; break; 00404 case MVT::i32: Opc = X86::MOV32mr; break; 00405 case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode. 00406 case MVT::f32: 00407 Opc = X86ScalarSSEf32 ? 00408 (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m; 00409 break; 00410 case MVT::f64: 00411 Opc = X86ScalarSSEf64 ? 00412 (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m; 00413 break; 00414 case MVT::v4f32: 00415 if (Aligned) 00416 Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; 00417 else 00418 Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; 00419 break; 00420 case MVT::v2f64: 00421 if (Aligned) 00422 Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr; 00423 else 00424 Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr; 00425 break; 00426 case MVT::v4i32: 00427 case MVT::v2i64: 00428 case MVT::v8i16: 00429 case MVT::v16i8: 00430 if (Aligned) 00431 Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr; 00432 else 00433 Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr; 00434 break; 00435 } 00436 00437 MachineInstrBuilder MIB = 00438 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); 00439 addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill)); 00440 if (MMO) 00441 MIB->addMemOperand(*FuncInfo.MF, MMO); 00442 00443 return true; 00444 } 00445 00446 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, 00447 const X86AddressMode &AM, 00448 MachineMemOperand *MMO, bool Aligned) { 00449 // Handle 'null' like i32/i64 0. 00450 if (isa<ConstantPointerNull>(Val)) 00451 Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext())); 00452 00453 // If this is a store of a simple constant, fold the constant into the store. 00454 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { 00455 unsigned Opc = 0; 00456 bool Signed = true; 00457 switch (VT.getSimpleVT().SimpleTy) { 00458 default: break; 00459 case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8. 00460 case MVT::i8: Opc = X86::MOV8mi; break; 00461 case MVT::i16: Opc = X86::MOV16mi; break; 00462 case MVT::i32: Opc = X86::MOV32mi; break; 00463 case MVT::i64: 00464 // Must be a 32-bit sign extended value. 00465 if (isInt<32>(CI->getSExtValue())) 00466 Opc = X86::MOV64mi32; 00467 break; 00468 } 00469 00470 if (Opc) { 00471 MachineInstrBuilder MIB = 00472 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); 00473 addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue() 00474 : CI->getZExtValue()); 00475 if (MMO) 00476 MIB->addMemOperand(*FuncInfo.MF, MMO); 00477 return true; 00478 } 00479 } 00480 00481 unsigned ValReg = getRegForValue(Val); 00482 if (ValReg == 0) 00483 return false; 00484 00485 bool ValKill = hasTrivialKill(Val); 00486 return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned); 00487 } 00488 00489 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of 00490 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g. 00491 /// ISD::SIGN_EXTEND). 00492 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, 00493 unsigned Src, EVT SrcVT, 00494 unsigned &ResultReg) { 00495 unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, 00496 Src, /*TODO: Kill=*/false); 00497 if (RR == 0) 00498 return false; 00499 00500 ResultReg = RR; 00501 return true; 00502 } 00503 00504 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { 00505 // Handle constant address. 00506 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 00507 // Can't handle alternate code models yet. 00508 if (TM.getCodeModel() != CodeModel::Small) 00509 return false; 00510 00511 // Can't handle TLS yet. 00512 if (GV->isThreadLocal()) 00513 return false; 00514 00515 // RIP-relative addresses can't have additional register operands, so if 00516 // we've already folded stuff into the addressing mode, just force the 00517 // global value into its own register, which we can use as the basereg. 00518 if (!Subtarget->isPICStyleRIPRel() || 00519 (AM.Base.Reg == 0 && AM.IndexReg == 0)) { 00520 // Okay, we've committed to selecting this global. Set up the address. 00521 AM.GV = GV; 00522 00523 // Allow the subtarget to classify the global. 00524 unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); 00525 00526 // If this reference is relative to the pic base, set it now. 00527 if (isGlobalRelativeToPICBase(GVFlags)) { 00528 // FIXME: How do we know Base.Reg is free?? 00529 AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 00530 } 00531 00532 // Unless the ABI requires an extra load, return a direct reference to 00533 // the global. 00534 if (!isGlobalStubReference(GVFlags)) { 00535 if (Subtarget->isPICStyleRIPRel()) { 00536 // Use rip-relative addressing if we can. Above we verified that the 00537 // base and index registers are unused. 00538 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 00539 AM.Base.Reg = X86::RIP; 00540 } 00541 AM.GVOpFlags = GVFlags; 00542 return true; 00543 } 00544 00545 // Ok, we need to do a load from a stub. If we've already loaded from 00546 // this stub, reuse the loaded pointer, otherwise emit the load now. 00547 DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V); 00548 unsigned LoadReg; 00549 if (I != LocalValueMap.end() && I->second != 0) { 00550 LoadReg = I->second; 00551 } else { 00552 // Issue load from stub. 00553 unsigned Opc = 0; 00554 const TargetRegisterClass *RC = nullptr; 00555 X86AddressMode StubAM; 00556 StubAM.Base.Reg = AM.Base.Reg; 00557 StubAM.GV = GV; 00558 StubAM.GVOpFlags = GVFlags; 00559 00560 // Prepare for inserting code in the local-value area. 00561 SavePoint SaveInsertPt = enterLocalValueArea(); 00562 00563 if (TLI.getPointerTy() == MVT::i64) { 00564 Opc = X86::MOV64rm; 00565 RC = &X86::GR64RegClass; 00566 00567 if (Subtarget->isPICStyleRIPRel()) 00568 StubAM.Base.Reg = X86::RIP; 00569 } else { 00570 Opc = X86::MOV32rm; 00571 RC = &X86::GR32RegClass; 00572 } 00573 00574 LoadReg = createResultReg(RC); 00575 MachineInstrBuilder LoadMI = 00576 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg); 00577 addFullAddress(LoadMI, StubAM); 00578 00579 // Ok, back to normal mode. 00580 leaveLocalValueArea(SaveInsertPt); 00581 00582 // Prevent loading GV stub multiple times in same MBB. 00583 LocalValueMap[V] = LoadReg; 00584 } 00585 00586 // Now construct the final address. Note that the Disp, Scale, 00587 // and Index values may already be set here. 00588 AM.Base.Reg = LoadReg; 00589 AM.GV = nullptr; 00590 return true; 00591 } 00592 } 00593 00594 // If all else fails, try to materialize the value in a register. 00595 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 00596 if (AM.Base.Reg == 0) { 00597 AM.Base.Reg = getRegForValue(V); 00598 return AM.Base.Reg != 0; 00599 } 00600 if (AM.IndexReg == 0) { 00601 assert(AM.Scale == 1 && "Scale with no index!"); 00602 AM.IndexReg = getRegForValue(V); 00603 return AM.IndexReg != 0; 00604 } 00605 } 00606 00607 return false; 00608 } 00609 00610 /// X86SelectAddress - Attempt to fill in an address from the given value. 00611 /// 00612 bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { 00613 SmallVector<const Value *, 32> GEPs; 00614 redo_gep: 00615 const User *U = nullptr; 00616 unsigned Opcode = Instruction::UserOp1; 00617 if (const Instruction *I = dyn_cast<Instruction>(V)) { 00618 // Don't walk into other basic blocks; it's possible we haven't 00619 // visited them yet, so the instructions may not yet be assigned 00620 // virtual registers. 00621 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) || 00622 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 00623 Opcode = I->getOpcode(); 00624 U = I; 00625 } 00626 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 00627 Opcode = C->getOpcode(); 00628 U = C; 00629 } 00630 00631 if (PointerType *Ty = dyn_cast<PointerType>(V->getType())) 00632 if (Ty->getAddressSpace() > 255) 00633 // Fast instruction selection doesn't support the special 00634 // address spaces. 00635 return false; 00636 00637 switch (Opcode) { 00638 default: break; 00639 case Instruction::BitCast: 00640 // Look past bitcasts. 00641 return X86SelectAddress(U->getOperand(0), AM); 00642 00643 case Instruction::IntToPtr: 00644 // Look past no-op inttoptrs. 00645 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 00646 return X86SelectAddress(U->getOperand(0), AM); 00647 break; 00648 00649 case Instruction::PtrToInt: 00650 // Look past no-op ptrtoints. 00651 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 00652 return X86SelectAddress(U->getOperand(0), AM); 00653 break; 00654 00655 case Instruction::Alloca: { 00656 // Do static allocas. 00657 const AllocaInst *A = cast<AllocaInst>(V); 00658 DenseMap<const AllocaInst*, int>::iterator SI = 00659 FuncInfo.StaticAllocaMap.find(A); 00660 if (SI != FuncInfo.StaticAllocaMap.end()) { 00661 AM.BaseType = X86AddressMode::FrameIndexBase; 00662 AM.Base.FrameIndex = SI->second; 00663 return true; 00664 } 00665 break; 00666 } 00667 00668 case Instruction::Add: { 00669 // Adds of constants are common and easy enough. 00670 if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { 00671 uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); 00672 // They have to fit in the 32-bit signed displacement field though. 00673 if (isInt<32>(Disp)) { 00674 AM.Disp = (uint32_t)Disp; 00675 return X86SelectAddress(U->getOperand(0), AM); 00676 } 00677 } 00678 break; 00679 } 00680 00681 case Instruction::GetElementPtr: { 00682 X86AddressMode SavedAM = AM; 00683 00684 // Pattern-match simple GEPs. 00685 uint64_t Disp = (int32_t)AM.Disp; 00686 unsigned IndexReg = AM.IndexReg; 00687 unsigned Scale = AM.Scale; 00688 gep_type_iterator GTI = gep_type_begin(U); 00689 // Iterate through the indices, folding what we can. Constants can be 00690 // folded, and one dynamic index can be handled, if the scale is supported. 00691 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); 00692 i != e; ++i, ++GTI) { 00693 const Value *Op = *i; 00694 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 00695 const StructLayout *SL = DL.getStructLayout(STy); 00696 Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue()); 00697 continue; 00698 } 00699 00700 // A array/variable index is always of the form i*S where S is the 00701 // constant scale size. See if we can push the scale into immediates. 00702 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 00703 for (;;) { 00704 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 00705 // Constant-offset addressing. 00706 Disp += CI->getSExtValue() * S; 00707 break; 00708 } 00709 if (canFoldAddIntoGEP(U, Op)) { 00710 // A compatible add with a constant operand. Fold the constant. 00711 ConstantInt *CI = 00712 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 00713 Disp += CI->getSExtValue() * S; 00714 // Iterate on the other operand. 00715 Op = cast<AddOperator>(Op)->getOperand(0); 00716 continue; 00717 } 00718 if (IndexReg == 0 && 00719 (!AM.GV || !Subtarget->isPICStyleRIPRel()) && 00720 (S == 1 || S == 2 || S == 4 || S == 8)) { 00721 // Scaled-index addressing. 00722 Scale = S; 00723 IndexReg = getRegForGEPIndex(Op).first; 00724 if (IndexReg == 0) 00725 return false; 00726 break; 00727 } 00728 // Unsupported. 00729 goto unsupported_gep; 00730 } 00731 } 00732 00733 // Check for displacement overflow. 00734 if (!isInt<32>(Disp)) 00735 break; 00736 00737 AM.IndexReg = IndexReg; 00738 AM.Scale = Scale; 00739 AM.Disp = (uint32_t)Disp; 00740 GEPs.push_back(V); 00741 00742 if (const GetElementPtrInst *GEP = 00743 dyn_cast<GetElementPtrInst>(U->getOperand(0))) { 00744 // Ok, the GEP indices were covered by constant-offset and scaled-index 00745 // addressing. Update the address state and move on to examining the base. 00746 V = GEP; 00747 goto redo_gep; 00748 } else if (X86SelectAddress(U->getOperand(0), AM)) { 00749 return true; 00750 } 00751 00752 // If we couldn't merge the gep value into this addr mode, revert back to 00753 // our address and just match the value instead of completely failing. 00754 AM = SavedAM; 00755 00756 for (SmallVectorImpl<const Value *>::reverse_iterator 00757 I = GEPs.rbegin(), E = GEPs.rend(); I != E; ++I) 00758 if (handleConstantAddresses(*I, AM)) 00759 return true; 00760 00761 return false; 00762 unsupported_gep: 00763 // Ok, the GEP indices weren't all covered. 00764 break; 00765 } 00766 } 00767 00768 return handleConstantAddresses(V, AM); 00769 } 00770 00771 /// X86SelectCallAddress - Attempt to fill in an address from the given value. 00772 /// 00773 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { 00774 const User *U = nullptr; 00775 unsigned Opcode = Instruction::UserOp1; 00776 const Instruction *I = dyn_cast<Instruction>(V); 00777 // Record if the value is defined in the same basic block. 00778 // 00779 // This information is crucial to know whether or not folding an 00780 // operand is valid. 00781 // Indeed, FastISel generates or reuses a virtual register for all 00782 // operands of all instructions it selects. Obviously, the definition and 00783 // its uses must use the same virtual register otherwise the produced 00784 // code is incorrect. 00785 // Before instruction selection, FunctionLoweringInfo::set sets the virtual 00786 // registers for values that are alive across basic blocks. This ensures 00787 // that the values are consistently set between across basic block, even 00788 // if different instruction selection mechanisms are used (e.g., a mix of 00789 // SDISel and FastISel). 00790 // For values local to a basic block, the instruction selection process 00791 // generates these virtual registers with whatever method is appropriate 00792 // for its needs. In particular, FastISel and SDISel do not share the way 00793 // local virtual registers are set. 00794 // Therefore, this is impossible (or at least unsafe) to share values 00795 // between basic blocks unless they use the same instruction selection 00796 // method, which is not guarantee for X86. 00797 // Moreover, things like hasOneUse could not be used accurately, if we 00798 // allow to reference values across basic blocks whereas they are not 00799 // alive across basic blocks initially. 00800 bool InMBB = true; 00801 if (I) { 00802 Opcode = I->getOpcode(); 00803 U = I; 00804 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 00805 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 00806 Opcode = C->getOpcode(); 00807 U = C; 00808 } 00809 00810 switch (Opcode) { 00811 default: break; 00812 case Instruction::BitCast: 00813 // Look past bitcasts if its operand is in the same BB. 00814 if (InMBB) 00815 return X86SelectCallAddress(U->getOperand(0), AM); 00816 break; 00817 00818 case Instruction::IntToPtr: 00819 // Look past no-op inttoptrs if its operand is in the same BB. 00820 if (InMBB && 00821 TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 00822 return X86SelectCallAddress(U->getOperand(0), AM); 00823 break; 00824 00825 case Instruction::PtrToInt: 00826 // Look past no-op ptrtoints if its operand is in the same BB. 00827 if (InMBB && 00828 TLI.getValueType(U->getType()) == TLI.getPointerTy()) 00829 return X86SelectCallAddress(U->getOperand(0), AM); 00830 break; 00831 } 00832 00833 // Handle constant address. 00834 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 00835 // Can't handle alternate code models yet. 00836 if (TM.getCodeModel() != CodeModel::Small) 00837 return false; 00838 00839 // RIP-relative addresses can't have additional register operands. 00840 if (Subtarget->isPICStyleRIPRel() && 00841 (AM.Base.Reg != 0 || AM.IndexReg != 0)) 00842 return false; 00843 00844 // Can't handle DLL Import. 00845 if (GV->hasDLLImportStorageClass()) 00846 return false; 00847 00848 // Can't handle TLS. 00849 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) 00850 if (GVar->isThreadLocal()) 00851 return false; 00852 00853 // Okay, we've committed to selecting this global. Set up the basic address. 00854 AM.GV = GV; 00855 00856 // No ABI requires an extra load for anything other than DLLImport, which 00857 // we rejected above. Return a direct reference to the global. 00858 if (Subtarget->isPICStyleRIPRel()) { 00859 // Use rip-relative addressing if we can. Above we verified that the 00860 // base and index registers are unused. 00861 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 00862 AM.Base.Reg = X86::RIP; 00863 } else if (Subtarget->isPICStyleStubPIC()) { 00864 AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET; 00865 } else if (Subtarget->isPICStyleGOT()) { 00866 AM.GVOpFlags = X86II::MO_GOTOFF; 00867 } 00868 00869 return true; 00870 } 00871 00872 // If all else fails, try to materialize the value in a register. 00873 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 00874 if (AM.Base.Reg == 0) { 00875 AM.Base.Reg = getRegForValue(V); 00876 return AM.Base.Reg != 0; 00877 } 00878 if (AM.IndexReg == 0) { 00879 assert(AM.Scale == 1 && "Scale with no index!"); 00880 AM.IndexReg = getRegForValue(V); 00881 return AM.IndexReg != 0; 00882 } 00883 } 00884 00885 return false; 00886 } 00887 00888 00889 /// X86SelectStore - Select and emit code to implement store instructions. 00890 bool X86FastISel::X86SelectStore(const Instruction *I) { 00891 // Atomic stores need special handling. 00892 const StoreInst *S = cast<StoreInst>(I); 00893 00894 if (S->isAtomic()) 00895 return false; 00896 00897 const Value *Val = S->getValueOperand(); 00898 const Value *Ptr = S->getPointerOperand(); 00899 00900 MVT VT; 00901 if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true)) 00902 return false; 00903 00904 unsigned Alignment = S->getAlignment(); 00905 unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType()); 00906 if (Alignment == 0) // Ensure that codegen never sees alignment 0 00907 Alignment = ABIAlignment; 00908 bool Aligned = Alignment >= ABIAlignment; 00909 00910 X86AddressMode AM; 00911 if (!X86SelectAddress(Ptr, AM)) 00912 return false; 00913 00914 return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned); 00915 } 00916 00917 /// X86SelectRet - Select and emit code to implement ret instructions. 00918 bool X86FastISel::X86SelectRet(const Instruction *I) { 00919 const ReturnInst *Ret = cast<ReturnInst>(I); 00920 const Function &F = *I->getParent()->getParent(); 00921 const X86MachineFunctionInfo *X86MFInfo = 00922 FuncInfo.MF->getInfo<X86MachineFunctionInfo>(); 00923 00924 if (!FuncInfo.CanLowerReturn) 00925 return false; 00926 00927 CallingConv::ID CC = F.getCallingConv(); 00928 if (CC != CallingConv::C && 00929 CC != CallingConv::Fast && 00930 CC != CallingConv::X86_FastCall && 00931 CC != CallingConv::X86_64_SysV) 00932 return false; 00933 00934 if (Subtarget->isCallingConvWin64(CC)) 00935 return false; 00936 00937 // Don't handle popping bytes on return for now. 00938 if (X86MFInfo->getBytesToPopOnReturn() != 0) 00939 return false; 00940 00941 // fastcc with -tailcallopt is intended to provide a guaranteed 00942 // tail call optimization. Fastisel doesn't know how to do that. 00943 if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) 00944 return false; 00945 00946 // Let SDISel handle vararg functions. 00947 if (F.isVarArg()) 00948 return false; 00949 00950 // Build a list of return value registers. 00951 SmallVector<unsigned, 4> RetRegs; 00952 00953 if (Ret->getNumOperands() > 0) { 00954 SmallVector<ISD::OutputArg, 4> Outs; 00955 GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); 00956 00957 // Analyze operands of the call, assigning locations to each operand. 00958 SmallVector<CCValAssign, 16> ValLocs; 00959 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 00960 CCInfo.AnalyzeReturn(Outs, RetCC_X86); 00961 00962 const Value *RV = Ret->getOperand(0); 00963 unsigned Reg = getRegForValue(RV); 00964 if (Reg == 0) 00965 return false; 00966 00967 // Only handle a single return value for now. 00968 if (ValLocs.size() != 1) 00969 return false; 00970 00971 CCValAssign &VA = ValLocs[0]; 00972 00973 // Don't bother handling odd stuff for now. 00974 if (VA.getLocInfo() != CCValAssign::Full) 00975 return false; 00976 // Only handle register returns for now. 00977 if (!VA.isRegLoc()) 00978 return false; 00979 00980 // The calling-convention tables for x87 returns don't tell 00981 // the whole story. 00982 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) 00983 return false; 00984 00985 unsigned SrcReg = Reg + VA.getValNo(); 00986 EVT SrcVT = TLI.getValueType(RV->getType()); 00987 EVT DstVT = VA.getValVT(); 00988 // Special handling for extended integers. 00989 if (SrcVT != DstVT) { 00990 if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16) 00991 return false; 00992 00993 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 00994 return false; 00995 00996 assert(DstVT == MVT::i32 && "X86 should always ext to i32"); 00997 00998 if (SrcVT == MVT::i1) { 00999 if (Outs[0].Flags.isSExt()) 01000 return false; 01001 SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false); 01002 SrcVT = MVT::i8; 01003 } 01004 unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND : 01005 ISD::SIGN_EXTEND; 01006 SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op, 01007 SrcReg, /*TODO: Kill=*/false); 01008 } 01009 01010 // Make the copy. 01011 unsigned DstReg = VA.getLocReg(); 01012 const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); 01013 // Avoid a cross-class copy. This is very unlikely. 01014 if (!SrcRC->contains(DstReg)) 01015 return false; 01016 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 01017 DstReg).addReg(SrcReg); 01018 01019 // Add register to return instruction. 01020 RetRegs.push_back(VA.getLocReg()); 01021 } 01022 01023 // The x86-64 ABI for returning structs by value requires that we copy 01024 // the sret argument into %rax for the return. We saved the argument into 01025 // a virtual register in the entry block, so now we copy the value out 01026 // and into %rax. We also do the same with %eax for Win32. 01027 if (F.hasStructRetAttr() && 01028 (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) { 01029 unsigned Reg = X86MFInfo->getSRetReturnReg(); 01030 assert(Reg && 01031 "SRetReturnReg should have been set in LowerFormalArguments()!"); 01032 unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; 01033 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 01034 RetReg).addReg(Reg); 01035 RetRegs.push_back(RetReg); 01036 } 01037 01038 // Now emit the RET. 01039 MachineInstrBuilder MIB = 01040 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL)); 01041 for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) 01042 MIB.addReg(RetRegs[i], RegState::Implicit); 01043 return true; 01044 } 01045 01046 /// X86SelectLoad - Select and emit code to implement load instructions. 01047 /// 01048 bool X86FastISel::X86SelectLoad(const Instruction *I) { 01049 const LoadInst *LI = cast<LoadInst>(I); 01050 01051 // Atomic loads need special handling. 01052 if (LI->isAtomic()) 01053 return false; 01054 01055 MVT VT; 01056 if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true)) 01057 return false; 01058 01059 const Value *Ptr = LI->getPointerOperand(); 01060 01061 X86AddressMode AM; 01062 if (!X86SelectAddress(Ptr, AM)) 01063 return false; 01064 01065 unsigned ResultReg = 0; 01066 if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg)) 01067 return false; 01068 01069 updateValueMap(I, ResultReg); 01070 return true; 01071 } 01072 01073 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { 01074 bool HasAVX = Subtarget->hasAVX(); 01075 bool X86ScalarSSEf32 = Subtarget->hasSSE1(); 01076 bool X86ScalarSSEf64 = Subtarget->hasSSE2(); 01077 01078 switch (VT.getSimpleVT().SimpleTy) { 01079 default: return 0; 01080 case MVT::i8: return X86::CMP8rr; 01081 case MVT::i16: return X86::CMP16rr; 01082 case MVT::i32: return X86::CMP32rr; 01083 case MVT::i64: return X86::CMP64rr; 01084 case MVT::f32: 01085 return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0; 01086 case MVT::f64: 01087 return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0; 01088 } 01089 } 01090 01091 /// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS 01092 /// of the comparison, return an opcode that works for the compare (e.g. 01093 /// CMP32ri) otherwise return 0. 01094 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) { 01095 switch (VT.getSimpleVT().SimpleTy) { 01096 // Otherwise, we can't fold the immediate into this comparison. 01097 default: return 0; 01098 case MVT::i8: return X86::CMP8ri; 01099 case MVT::i16: return X86::CMP16ri; 01100 case MVT::i32: return X86::CMP32ri; 01101 case MVT::i64: 01102 // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext 01103 // field. 01104 if ((int)RHSC->getSExtValue() == RHSC->getSExtValue()) 01105 return X86::CMP64ri32; 01106 return 0; 01107 } 01108 } 01109 01110 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, 01111 EVT VT) { 01112 unsigned Op0Reg = getRegForValue(Op0); 01113 if (Op0Reg == 0) return false; 01114 01115 // Handle 'null' like i32/i64 0. 01116 if (isa<ConstantPointerNull>(Op1)) 01117 Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext())); 01118 01119 // We have two options: compare with register or immediate. If the RHS of 01120 // the compare is an immediate that we can fold into this compare, use 01121 // CMPri, otherwise use CMPrr. 01122 if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { 01123 if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { 01124 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CompareImmOpc)) 01125 .addReg(Op0Reg) 01126 .addImm(Op1C->getSExtValue()); 01127 return true; 01128 } 01129 } 01130 01131 unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget); 01132 if (CompareOpc == 0) return false; 01133 01134 unsigned Op1Reg = getRegForValue(Op1); 01135 if (Op1Reg == 0) return false; 01136 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CompareOpc)) 01137 .addReg(Op0Reg) 01138 .addReg(Op1Reg); 01139 01140 return true; 01141 } 01142 01143 bool X86FastISel::X86SelectCmp(const Instruction *I) { 01144 const CmpInst *CI = cast<CmpInst>(I); 01145 01146 MVT VT; 01147 if (!isTypeLegal(I->getOperand(0)->getType(), VT)) 01148 return false; 01149 01150 // Try to optimize or fold the cmp. 01151 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 01152 unsigned ResultReg = 0; 01153 switch (Predicate) { 01154 default: break; 01155 case CmpInst::FCMP_FALSE: { 01156 ResultReg = createResultReg(&X86::GR32RegClass); 01157 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0), 01158 ResultReg); 01159 ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true, 01160 X86::sub_8bit); 01161 if (!ResultReg) 01162 return false; 01163 break; 01164 } 01165 case CmpInst::FCMP_TRUE: { 01166 ResultReg = createResultReg(&X86::GR8RegClass); 01167 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri), 01168 ResultReg).addImm(1); 01169 break; 01170 } 01171 } 01172 01173 if (ResultReg) { 01174 updateValueMap(I, ResultReg); 01175 return true; 01176 } 01177 01178 const Value *LHS = CI->getOperand(0); 01179 const Value *RHS = CI->getOperand(1); 01180 01181 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. 01182 // We don't have to materialize a zero constant for this case and can just use 01183 // %x again on the RHS. 01184 if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { 01185 const auto *RHSC = dyn_cast<ConstantFP>(RHS); 01186 if (RHSC && RHSC->isNullValue()) 01187 RHS = LHS; 01188 } 01189 01190 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. 01191 static unsigned SETFOpcTable[2][3] = { 01192 { X86::SETEr, X86::SETNPr, X86::AND8rr }, 01193 { X86::SETNEr, X86::SETPr, X86::OR8rr } 01194 }; 01195 unsigned *SETFOpc = nullptr; 01196 switch (Predicate) { 01197 default: break; 01198 case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break; 01199 case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break; 01200 } 01201 01202 ResultReg = createResultReg(&X86::GR8RegClass); 01203 if (SETFOpc) { 01204 if (!X86FastEmitCompare(LHS, RHS, VT)) 01205 return false; 01206 01207 unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); 01208 unsigned FlagReg2 = createResultReg(&X86::GR8RegClass); 01209 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]), 01210 FlagReg1); 01211 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]), 01212 FlagReg2); 01213 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]), 01214 ResultReg).addReg(FlagReg1).addReg(FlagReg2); 01215 updateValueMap(I, ResultReg); 01216 return true; 01217 } 01218 01219 X86::CondCode CC; 01220 bool SwapArgs; 01221 std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); 01222 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 01223 unsigned Opc = X86::getSETFromCond(CC); 01224 01225 if (SwapArgs) 01226 std::swap(LHS, RHS); 01227 01228 // Emit a compare of LHS/RHS. 01229 if (!X86FastEmitCompare(LHS, RHS, VT)) 01230 return false; 01231 01232 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); 01233 updateValueMap(I, ResultReg); 01234 return true; 01235 } 01236 01237 bool X86FastISel::X86SelectZExt(const Instruction *I) { 01238 EVT DstVT = TLI.getValueType(I->getType()); 01239 if (!TLI.isTypeLegal(DstVT)) 01240 return false; 01241 01242 unsigned ResultReg = getRegForValue(I->getOperand(0)); 01243 if (ResultReg == 0) 01244 return false; 01245 01246 // Handle zero-extension from i1 to i8, which is common. 01247 MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType()); 01248 if (SrcVT.SimpleTy == MVT::i1) { 01249 // Set the high bits to zero. 01250 ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); 01251 SrcVT = MVT::i8; 01252 01253 if (ResultReg == 0) 01254 return false; 01255 } 01256 01257 if (DstVT == MVT::i64) { 01258 // Handle extension to 64-bits via sub-register shenanigans. 01259 unsigned MovInst; 01260 01261 switch (SrcVT.SimpleTy) { 01262 case MVT::i8: MovInst = X86::MOVZX32rr8; break; 01263 case MVT::i16: MovInst = X86::MOVZX32rr16; break; 01264 case MVT::i32: MovInst = X86::MOV32rr; break; 01265 default: llvm_unreachable("Unexpected zext to i64 source type"); 01266 } 01267 01268 unsigned Result32 = createResultReg(&X86::GR32RegClass); 01269 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32) 01270 .addReg(ResultReg); 01271 01272 ResultReg = createResultReg(&X86::GR64RegClass); 01273 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG), 01274 ResultReg) 01275 .addImm(0).addReg(Result32).addImm(X86::sub_32bit); 01276 } else if (DstVT != MVT::i8) { 01277 ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND, 01278 ResultReg, /*Kill=*/true); 01279 if (ResultReg == 0) 01280 return false; 01281 } 01282 01283 updateValueMap(I, ResultReg); 01284 return true; 01285 } 01286 01287 01288 bool X86FastISel::X86SelectBranch(const Instruction *I) { 01289 // Unconditional branches are selected by tablegen-generated code. 01290 // Handle a conditional branch. 01291 const BranchInst *BI = cast<BranchInst>(I); 01292 MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 01293 MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 01294 01295 // Fold the common case of a conditional branch with a comparison 01296 // in the same block (values defined on other blocks may not have 01297 // initialized registers). 01298 X86::CondCode CC; 01299 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 01300 if (CI->hasOneUse() && CI->getParent() == I->getParent()) { 01301 EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); 01302 01303 // Try to optimize or fold the cmp. 01304 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 01305 switch (Predicate) { 01306 default: break; 01307 case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true; 01308 case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true; 01309 } 01310 01311 const Value *CmpLHS = CI->getOperand(0); 01312 const Value *CmpRHS = CI->getOperand(1); 01313 01314 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 01315 // 0.0. 01316 // We don't have to materialize a zero constant for this case and can just 01317 // use %x again on the RHS. 01318 if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { 01319 const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS); 01320 if (CmpRHSC && CmpRHSC->isNullValue()) 01321 CmpRHS = CmpLHS; 01322 } 01323 01324 // Try to take advantage of fallthrough opportunities. 01325 if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { 01326 std::swap(TrueMBB, FalseMBB); 01327 Predicate = CmpInst::getInversePredicate(Predicate); 01328 } 01329 01330 // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition 01331 // code check. Instead two branch instructions are required to check all 01332 // the flags. First we change the predicate to a supported condition code, 01333 // which will be the first branch. Later one we will emit the second 01334 // branch. 01335 bool NeedExtraBranch = false; 01336 switch (Predicate) { 01337 default: break; 01338 case CmpInst::FCMP_OEQ: 01339 std::swap(TrueMBB, FalseMBB); // fall-through 01340 case CmpInst::FCMP_UNE: 01341 NeedExtraBranch = true; 01342 Predicate = CmpInst::FCMP_ONE; 01343 break; 01344 } 01345 01346 bool SwapArgs; 01347 unsigned BranchOpc; 01348 std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); 01349 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 01350 01351 BranchOpc = X86::GetCondBranchFromCond(CC); 01352 if (SwapArgs) 01353 std::swap(CmpLHS, CmpRHS); 01354 01355 // Emit a compare of the LHS and RHS, setting the flags. 01356 if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT)) 01357 return false; 01358 01359 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc)) 01360 .addMBB(TrueMBB); 01361 01362 // X86 requires a second branch to handle UNE (and OEQ, which is mapped 01363 // to UNE above). 01364 if (NeedExtraBranch) { 01365 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_4)) 01366 .addMBB(TrueMBB); 01367 } 01368 01369 // Obtain the branch weight and add the TrueBB to the successor list. 01370 uint32_t BranchWeight = 0; 01371 if (FuncInfo.BPI) 01372 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 01373 TrueMBB->getBasicBlock()); 01374 FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); 01375 01376 // Emits an unconditional branch to the FalseBB, obtains the branch 01377 // weight, and adds it to the successor list. 01378 fastEmitBranch(FalseMBB, DbgLoc); 01379 01380 return true; 01381 } 01382 } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { 01383 // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which 01384 // typically happen for _Bool and C++ bools. 01385 MVT SourceVT; 01386 if (TI->hasOneUse() && TI->getParent() == I->getParent() && 01387 isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) { 01388 unsigned TestOpc = 0; 01389 switch (SourceVT.SimpleTy) { 01390 default: break; 01391 case MVT::i8: TestOpc = X86::TEST8ri; break; 01392 case MVT::i16: TestOpc = X86::TEST16ri; break; 01393 case MVT::i32: TestOpc = X86::TEST32ri; break; 01394 case MVT::i64: TestOpc = X86::TEST64ri32; break; 01395 } 01396 if (TestOpc) { 01397 unsigned OpReg = getRegForValue(TI->getOperand(0)); 01398 if (OpReg == 0) return false; 01399 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc)) 01400 .addReg(OpReg).addImm(1); 01401 01402 unsigned JmpOpc = X86::JNE_4; 01403 if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { 01404 std::swap(TrueMBB, FalseMBB); 01405 JmpOpc = X86::JE_4; 01406 } 01407 01408 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc)) 01409 .addMBB(TrueMBB); 01410 fastEmitBranch(FalseMBB, DbgLoc); 01411 uint32_t BranchWeight = 0; 01412 if (FuncInfo.BPI) 01413 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 01414 TrueMBB->getBasicBlock()); 01415 FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); 01416 return true; 01417 } 01418 } 01419 } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) { 01420 // Fake request the condition, otherwise the intrinsic might be completely 01421 // optimized away. 01422 unsigned TmpReg = getRegForValue(BI->getCondition()); 01423 if (TmpReg == 0) 01424 return false; 01425 01426 unsigned BranchOpc = X86::GetCondBranchFromCond(CC); 01427 01428 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc)) 01429 .addMBB(TrueMBB); 01430 fastEmitBranch(FalseMBB, DbgLoc); 01431 uint32_t BranchWeight = 0; 01432 if (FuncInfo.BPI) 01433 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 01434 TrueMBB->getBasicBlock()); 01435 FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); 01436 return true; 01437 } 01438 01439 // Otherwise do a clumsy setcc and re-test it. 01440 // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used 01441 // in an explicit cast, so make sure to handle that correctly. 01442 unsigned OpReg = getRegForValue(BI->getCondition()); 01443 if (OpReg == 0) return false; 01444 01445 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) 01446 .addReg(OpReg).addImm(1); 01447 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_4)) 01448 .addMBB(TrueMBB); 01449 fastEmitBranch(FalseMBB, DbgLoc); 01450 uint32_t BranchWeight = 0; 01451 if (FuncInfo.BPI) 01452 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 01453 TrueMBB->getBasicBlock()); 01454 FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); 01455 return true; 01456 } 01457 01458 bool X86FastISel::X86SelectShift(const Instruction *I) { 01459 unsigned CReg = 0, OpReg = 0; 01460 const TargetRegisterClass *RC = nullptr; 01461 if (I->getType()->isIntegerTy(8)) { 01462 CReg = X86::CL; 01463 RC = &X86::GR8RegClass; 01464 switch (I->getOpcode()) { 01465 case Instruction::LShr: OpReg = X86::SHR8rCL; break; 01466 case Instruction::AShr: OpReg = X86::SAR8rCL; break; 01467 case Instruction::Shl: OpReg = X86::SHL8rCL; break; 01468 default: return false; 01469 } 01470 } else if (I->getType()->isIntegerTy(16)) { 01471 CReg = X86::CX; 01472 RC = &X86::GR16RegClass; 01473 switch (I->getOpcode()) { 01474 case Instruction::LShr: OpReg = X86::SHR16rCL; break; 01475 case Instruction::AShr: OpReg = X86::SAR16rCL; break; 01476 case Instruction::Shl: OpReg = X86::SHL16rCL; break; 01477 default: return false; 01478 } 01479 } else if (I->getType()->isIntegerTy(32)) { 01480 CReg = X86::ECX; 01481 RC = &X86::GR32RegClass; 01482 switch (I->getOpcode()) { 01483 case Instruction::LShr: OpReg = X86::SHR32rCL; break; 01484 case Instruction::AShr: OpReg = X86::SAR32rCL; break; 01485 case Instruction::Shl: OpReg = X86::SHL32rCL; break; 01486 default: return false; 01487 } 01488 } else if (I->getType()->isIntegerTy(64)) { 01489 CReg = X86::RCX; 01490 RC = &X86::GR64RegClass; 01491 switch (I->getOpcode()) { 01492 case Instruction::LShr: OpReg = X86::SHR64rCL; break; 01493 case Instruction::AShr: OpReg = X86::SAR64rCL; break; 01494 case Instruction::Shl: OpReg = X86::SHL64rCL; break; 01495 default: return false; 01496 } 01497 } else { 01498 return false; 01499 } 01500 01501 MVT VT; 01502 if (!isTypeLegal(I->getType(), VT)) 01503 return false; 01504 01505 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 01506 if (Op0Reg == 0) return false; 01507 01508 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 01509 if (Op1Reg == 0) return false; 01510 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 01511 CReg).addReg(Op1Reg); 01512 01513 // The shift instruction uses X86::CL. If we defined a super-register 01514 // of X86::CL, emit a subreg KILL to precisely describe what we're doing here. 01515 if (CReg != X86::CL) 01516 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01517 TII.get(TargetOpcode::KILL), X86::CL) 01518 .addReg(CReg, RegState::Kill); 01519 01520 unsigned ResultReg = createResultReg(RC); 01521 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg) 01522 .addReg(Op0Reg); 01523 updateValueMap(I, ResultReg); 01524 return true; 01525 } 01526 01527 bool X86FastISel::X86SelectDivRem(const Instruction *I) { 01528 const static unsigned NumTypes = 4; // i8, i16, i32, i64 01529 const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem 01530 const static bool S = true; // IsSigned 01531 const static bool U = false; // !IsSigned 01532 const static unsigned Copy = TargetOpcode::COPY; 01533 // For the X86 DIV/IDIV instruction, in most cases the dividend 01534 // (numerator) must be in a specific register pair highreg:lowreg, 01535 // producing the quotient in lowreg and the remainder in highreg. 01536 // For most data types, to set up the instruction, the dividend is 01537 // copied into lowreg, and lowreg is sign-extended or zero-extended 01538 // into highreg. The exception is i8, where the dividend is defined 01539 // as a single register rather than a register pair, and we 01540 // therefore directly sign-extend or zero-extend the dividend into 01541 // lowreg, instead of copying, and ignore the highreg. 01542 const static struct DivRemEntry { 01543 // The following portion depends only on the data type. 01544 const TargetRegisterClass *RC; 01545 unsigned LowInReg; // low part of the register pair 01546 unsigned HighInReg; // high part of the register pair 01547 // The following portion depends on both the data type and the operation. 01548 struct DivRemResult { 01549 unsigned OpDivRem; // The specific DIV/IDIV opcode to use. 01550 unsigned OpSignExtend; // Opcode for sign-extending lowreg into 01551 // highreg, or copying a zero into highreg. 01552 unsigned OpCopy; // Opcode for copying dividend into lowreg, or 01553 // zero/sign-extending into lowreg for i8. 01554 unsigned DivRemResultReg; // Register containing the desired result. 01555 bool IsOpSigned; // Whether to use signed or unsigned form. 01556 } ResultTable[NumOps]; 01557 } OpTable[NumTypes] = { 01558 { &X86::GR8RegClass, X86::AX, 0, { 01559 { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv 01560 { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem 01561 { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv 01562 { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem 01563 } 01564 }, // i8 01565 { &X86::GR16RegClass, X86::AX, X86::DX, { 01566 { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv 01567 { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem 01568 { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv 01569 { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem 01570 } 01571 }, // i16 01572 { &X86::GR32RegClass, X86::EAX, X86::EDX, { 01573 { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv 01574 { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem 01575 { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv 01576 { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem 01577 } 01578 }, // i32 01579 { &X86::GR64RegClass, X86::RAX, X86::RDX, { 01580 { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv 01581 { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem 01582 { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv 01583 { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem 01584 } 01585 }, // i64 01586 }; 01587 01588 MVT VT; 01589 if (!isTypeLegal(I->getType(), VT)) 01590 return false; 01591 01592 unsigned TypeIndex, OpIndex; 01593 switch (VT.SimpleTy) { 01594 default: return false; 01595 case MVT::i8: TypeIndex = 0; break; 01596 case MVT::i16: TypeIndex = 1; break; 01597 case MVT::i32: TypeIndex = 2; break; 01598 case MVT::i64: TypeIndex = 3; 01599 if (!Subtarget->is64Bit()) 01600 return false; 01601 break; 01602 } 01603 01604 switch (I->getOpcode()) { 01605 default: llvm_unreachable("Unexpected div/rem opcode"); 01606 case Instruction::SDiv: OpIndex = 0; break; 01607 case Instruction::SRem: OpIndex = 1; break; 01608 case Instruction::UDiv: OpIndex = 2; break; 01609 case Instruction::URem: OpIndex = 3; break; 01610 } 01611 01612 const DivRemEntry &TypeEntry = OpTable[TypeIndex]; 01613 const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex]; 01614 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 01615 if (Op0Reg == 0) 01616 return false; 01617 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 01618 if (Op1Reg == 0) 01619 return false; 01620 01621 // Move op0 into low-order input register. 01622 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01623 TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg); 01624 // Zero-extend or sign-extend into high-order input register. 01625 if (OpEntry.OpSignExtend) { 01626 if (OpEntry.IsOpSigned) 01627 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01628 TII.get(OpEntry.OpSignExtend)); 01629 else { 01630 unsigned Zero32 = createResultReg(&X86::GR32RegClass); 01631 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01632 TII.get(X86::MOV32r0), Zero32); 01633 01634 // Copy the zero into the appropriate sub/super/identical physical 01635 // register. Unfortunately the operations needed are not uniform enough to 01636 // fit neatly into the table above. 01637 if (VT.SimpleTy == MVT::i16) { 01638 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01639 TII.get(Copy), TypeEntry.HighInReg) 01640 .addReg(Zero32, 0, X86::sub_16bit); 01641 } else if (VT.SimpleTy == MVT::i32) { 01642 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01643 TII.get(Copy), TypeEntry.HighInReg) 01644 .addReg(Zero32); 01645 } else if (VT.SimpleTy == MVT::i64) { 01646 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01647 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg) 01648 .addImm(0).addReg(Zero32).addImm(X86::sub_32bit); 01649 } 01650 } 01651 } 01652 // Generate the DIV/IDIV instruction. 01653 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01654 TII.get(OpEntry.OpDivRem)).addReg(Op1Reg); 01655 // For i8 remainder, we can't reference AH directly, as we'll end 01656 // up with bogus copies like %R9B = COPY %AH. Reference AX 01657 // instead to prevent AH references in a REX instruction. 01658 // 01659 // The current assumption of the fast register allocator is that isel 01660 // won't generate explicit references to the GPR8_NOREX registers. If 01661 // the allocator and/or the backend get enhanced to be more robust in 01662 // that regard, this can be, and should be, removed. 01663 unsigned ResultReg = 0; 01664 if ((I->getOpcode() == Instruction::SRem || 01665 I->getOpcode() == Instruction::URem) && 01666 OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) { 01667 unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass); 01668 unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass); 01669 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01670 TII.get(Copy), SourceSuperReg).addReg(X86::AX); 01671 01672 // Shift AX right by 8 bits instead of using AH. 01673 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri), 01674 ResultSuperReg).addReg(SourceSuperReg).addImm(8); 01675 01676 // Now reference the 8-bit subreg of the result. 01677 ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg, 01678 /*Kill=*/true, X86::sub_8bit); 01679 } 01680 // Copy the result out of the physreg if we haven't already. 01681 if (!ResultReg) { 01682 ResultReg = createResultReg(TypeEntry.RC); 01683 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg) 01684 .addReg(OpEntry.DivRemResultReg); 01685 } 01686 updateValueMap(I, ResultReg); 01687 01688 return true; 01689 } 01690 01691 /// \brief Emit a conditional move instruction (if the are supported) to lower 01692 /// the select. 01693 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { 01694 // Check if the subtarget supports these instructions. 01695 if (!Subtarget->hasCMov()) 01696 return false; 01697 01698 // FIXME: Add support for i8. 01699 if (RetVT < MVT::i16 || RetVT > MVT::i64) 01700 return false; 01701 01702 const Value *Cond = I->getOperand(0); 01703 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 01704 bool NeedTest = true; 01705 X86::CondCode CC = X86::COND_NE; 01706 01707 // Optimize conditions coming from a compare if both instructions are in the 01708 // same basic block (values defined in other basic blocks may not have 01709 // initialized registers). 01710 const auto *CI = dyn_cast<CmpInst>(Cond); 01711 if (CI && (CI->getParent() == I->getParent())) { 01712 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 01713 01714 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. 01715 static unsigned SETFOpcTable[2][3] = { 01716 { X86::SETNPr, X86::SETEr , X86::TEST8rr }, 01717 { X86::SETPr, X86::SETNEr, X86::OR8rr } 01718 }; 01719 unsigned *SETFOpc = nullptr; 01720 switch (Predicate) { 01721 default: break; 01722 case CmpInst::FCMP_OEQ: 01723 SETFOpc = &SETFOpcTable[0][0]; 01724 Predicate = CmpInst::ICMP_NE; 01725 break; 01726 case CmpInst::FCMP_UNE: 01727 SETFOpc = &SETFOpcTable[1][0]; 01728 Predicate = CmpInst::ICMP_NE; 01729 break; 01730 } 01731 01732 bool NeedSwap; 01733 std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate); 01734 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 01735 01736 const Value *CmpLHS = CI->getOperand(0); 01737 const Value *CmpRHS = CI->getOperand(1); 01738 if (NeedSwap) 01739 std::swap(CmpLHS, CmpRHS); 01740 01741 EVT CmpVT = TLI.getValueType(CmpLHS->getType()); 01742 // Emit a compare of the LHS and RHS, setting the flags. 01743 if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT)) 01744 return false; 01745 01746 if (SETFOpc) { 01747 unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); 01748 unsigned FlagReg2 = createResultReg(&X86::GR8RegClass); 01749 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]), 01750 FlagReg1); 01751 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]), 01752 FlagReg2); 01753 auto const &II = TII.get(SETFOpc[2]); 01754 if (II.getNumDefs()) { 01755 unsigned TmpReg = createResultReg(&X86::GR8RegClass); 01756 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg) 01757 .addReg(FlagReg2).addReg(FlagReg1); 01758 } else { 01759 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 01760 .addReg(FlagReg2).addReg(FlagReg1); 01761 } 01762 } 01763 NeedTest = false; 01764 } else if (foldX86XALUIntrinsic(CC, I, Cond)) { 01765 // Fake request the condition, otherwise the intrinsic might be completely 01766 // optimized away. 01767 unsigned TmpReg = getRegForValue(Cond); 01768 if (TmpReg == 0) 01769 return false; 01770 01771 NeedTest = false; 01772 } 01773 01774 if (NeedTest) { 01775 // Selects operate on i1, however, CondReg is 8 bits width and may contain 01776 // garbage. Indeed, only the less significant bit is supposed to be 01777 // accurate. If we read more than the lsb, we may see non-zero values 01778 // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for 01779 // the select. This is achieved by performing TEST against 1. 01780 unsigned CondReg = getRegForValue(Cond); 01781 if (CondReg == 0) 01782 return false; 01783 bool CondIsKill = hasTrivialKill(Cond); 01784 01785 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) 01786 .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1); 01787 } 01788 01789 const Value *LHS = I->getOperand(1); 01790 const Value *RHS = I->getOperand(2); 01791 01792 unsigned RHSReg = getRegForValue(RHS); 01793 bool RHSIsKill = hasTrivialKill(RHS); 01794 01795 unsigned LHSReg = getRegForValue(LHS); 01796 bool LHSIsKill = hasTrivialKill(LHS); 01797 01798 if (!LHSReg || !RHSReg) 01799 return false; 01800 01801 unsigned Opc = X86::getCMovFromCond(CC, RC->getSize()); 01802 unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, 01803 LHSReg, LHSIsKill); 01804 updateValueMap(I, ResultReg); 01805 return true; 01806 } 01807 01808 /// \brief Emit SSE instructions to lower the select. 01809 /// 01810 /// Try to use SSE1/SSE2 instructions to simulate a select without branches. 01811 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary 01812 /// SSE instructions are available. 01813 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { 01814 // Optimize conditions coming from a compare if both instructions are in the 01815 // same basic block (values defined in other basic blocks may not have 01816 // initialized registers). 01817 const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0)); 01818 if (!CI || (CI->getParent() != I->getParent())) 01819 return false; 01820 01821 if (I->getType() != CI->getOperand(0)->getType() || 01822 !((Subtarget->hasSSE1() && RetVT == MVT::f32) || 01823 (Subtarget->hasSSE2() && RetVT == MVT::f64) )) 01824 return false; 01825 01826 const Value *CmpLHS = CI->getOperand(0); 01827 const Value *CmpRHS = CI->getOperand(1); 01828 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 01829 01830 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. 01831 // We don't have to materialize a zero constant for this case and can just use 01832 // %x again on the RHS. 01833 if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { 01834 const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS); 01835 if (CmpRHSC && CmpRHSC->isNullValue()) 01836 CmpRHS = CmpLHS; 01837 } 01838 01839 unsigned CC; 01840 bool NeedSwap; 01841 std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate); 01842 if (CC > 7) 01843 return false; 01844 01845 if (NeedSwap) 01846 std::swap(CmpLHS, CmpRHS); 01847 01848 static unsigned OpcTable[2][2][4] = { 01849 { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr }, 01850 { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } }, 01851 { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr }, 01852 { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } } 01853 }; 01854 01855 bool HasAVX = Subtarget->hasAVX(); 01856 unsigned *Opc = nullptr; 01857 switch (RetVT.SimpleTy) { 01858 default: return false; 01859 case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break; 01860 case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break; 01861 } 01862 01863 const Value *LHS = I->getOperand(1); 01864 const Value *RHS = I->getOperand(2); 01865 01866 unsigned LHSReg = getRegForValue(LHS); 01867 bool LHSIsKill = hasTrivialKill(LHS); 01868 01869 unsigned RHSReg = getRegForValue(RHS); 01870 bool RHSIsKill = hasTrivialKill(RHS); 01871 01872 unsigned CmpLHSReg = getRegForValue(CmpLHS); 01873 bool CmpLHSIsKill = hasTrivialKill(CmpLHS); 01874 01875 unsigned CmpRHSReg = getRegForValue(CmpRHS); 01876 bool CmpRHSIsKill = hasTrivialKill(CmpRHS); 01877 01878 if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS) 01879 return false; 01880 01881 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 01882 unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill, 01883 CmpRHSReg, CmpRHSIsKill, CC); 01884 unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false, 01885 LHSReg, LHSIsKill); 01886 unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true, 01887 RHSReg, RHSIsKill); 01888 unsigned ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true, 01889 AndReg, /*IsKill=*/true); 01890 updateValueMap(I, ResultReg); 01891 return true; 01892 } 01893 01894 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) { 01895 // These are pseudo CMOV instructions and will be later expanded into control- 01896 // flow. 01897 unsigned Opc; 01898 switch (RetVT.SimpleTy) { 01899 default: return false; 01900 case MVT::i8: Opc = X86::CMOV_GR8; break; 01901 case MVT::i16: Opc = X86::CMOV_GR16; break; 01902 case MVT::i32: Opc = X86::CMOV_GR32; break; 01903 case MVT::f32: Opc = X86::CMOV_FR32; break; 01904 case MVT::f64: Opc = X86::CMOV_FR64; break; 01905 } 01906 01907 const Value *Cond = I->getOperand(0); 01908 X86::CondCode CC = X86::COND_NE; 01909 01910 // Optimize conditions coming from a compare if both instructions are in the 01911 // same basic block (values defined in other basic blocks may not have 01912 // initialized registers). 01913 const auto *CI = dyn_cast<CmpInst>(Cond); 01914 if (CI && (CI->getParent() == I->getParent())) { 01915 bool NeedSwap; 01916 std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate()); 01917 if (CC > X86::LAST_VALID_COND) 01918 return false; 01919 01920 const Value *CmpLHS = CI->getOperand(0); 01921 const Value *CmpRHS = CI->getOperand(1); 01922 01923 if (NeedSwap) 01924 std::swap(CmpLHS, CmpRHS); 01925 01926 EVT CmpVT = TLI.getValueType(CmpLHS->getType()); 01927 if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT)) 01928 return false; 01929 } else { 01930 unsigned CondReg = getRegForValue(Cond); 01931 if (CondReg == 0) 01932 return false; 01933 bool CondIsKill = hasTrivialKill(Cond); 01934 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) 01935 .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1); 01936 } 01937 01938 const Value *LHS = I->getOperand(1); 01939 const Value *RHS = I->getOperand(2); 01940 01941 unsigned LHSReg = getRegForValue(LHS); 01942 bool LHSIsKill = hasTrivialKill(LHS); 01943 01944 unsigned RHSReg = getRegForValue(RHS); 01945 bool RHSIsKill = hasTrivialKill(RHS); 01946 01947 if (!LHSReg || !RHSReg) 01948 return false; 01949 01950 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 01951 01952 unsigned ResultReg = 01953 fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC); 01954 updateValueMap(I, ResultReg); 01955 return true; 01956 } 01957 01958 bool X86FastISel::X86SelectSelect(const Instruction *I) { 01959 MVT RetVT; 01960 if (!isTypeLegal(I->getType(), RetVT)) 01961 return false; 01962 01963 // Check if we can fold the select. 01964 if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) { 01965 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 01966 const Value *Opnd = nullptr; 01967 switch (Predicate) { 01968 default: break; 01969 case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break; 01970 case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break; 01971 } 01972 // No need for a select anymore - this is an unconditional move. 01973 if (Opnd) { 01974 unsigned OpReg = getRegForValue(Opnd); 01975 if (OpReg == 0) 01976 return false; 01977 bool OpIsKill = hasTrivialKill(Opnd); 01978 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 01979 unsigned ResultReg = createResultReg(RC); 01980 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01981 TII.get(TargetOpcode::COPY), ResultReg) 01982 .addReg(OpReg, getKillRegState(OpIsKill)); 01983 updateValueMap(I, ResultReg); 01984 return true; 01985 } 01986 } 01987 01988 // First try to use real conditional move instructions. 01989 if (X86FastEmitCMoveSelect(RetVT, I)) 01990 return true; 01991 01992 // Try to use a sequence of SSE instructions to simulate a conditional move. 01993 if (X86FastEmitSSESelect(RetVT, I)) 01994 return true; 01995 01996 // Fall-back to pseudo conditional move instructions, which will be later 01997 // converted to control-flow. 01998 if (X86FastEmitPseudoSelect(RetVT, I)) 01999 return true; 02000 02001 return false; 02002 } 02003 02004 bool X86FastISel::X86SelectFPExt(const Instruction *I) { 02005 // fpext from float to double. 02006 if (X86ScalarSSEf64 && 02007 I->getType()->isDoubleTy()) { 02008 const Value *V = I->getOperand(0); 02009 if (V->getType()->isFloatTy()) { 02010 unsigned OpReg = getRegForValue(V); 02011 if (OpReg == 0) return false; 02012 unsigned ResultReg = createResultReg(&X86::FR64RegClass); 02013 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02014 TII.get(X86::CVTSS2SDrr), ResultReg) 02015 .addReg(OpReg); 02016 updateValueMap(I, ResultReg); 02017 return true; 02018 } 02019 } 02020 02021 return false; 02022 } 02023 02024 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { 02025 if (X86ScalarSSEf64) { 02026 if (I->getType()->isFloatTy()) { 02027 const Value *V = I->getOperand(0); 02028 if (V->getType()->isDoubleTy()) { 02029 unsigned OpReg = getRegForValue(V); 02030 if (OpReg == 0) return false; 02031 unsigned ResultReg = createResultReg(&X86::FR32RegClass); 02032 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02033 TII.get(X86::CVTSD2SSrr), ResultReg) 02034 .addReg(OpReg); 02035 updateValueMap(I, ResultReg); 02036 return true; 02037 } 02038 } 02039 } 02040 02041 return false; 02042 } 02043 02044 bool X86FastISel::X86SelectTrunc(const Instruction *I) { 02045 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 02046 EVT DstVT = TLI.getValueType(I->getType()); 02047 02048 // This code only handles truncation to byte. 02049 if (DstVT != MVT::i8 && DstVT != MVT::i1) 02050 return false; 02051 if (!TLI.isTypeLegal(SrcVT)) 02052 return false; 02053 02054 unsigned InputReg = getRegForValue(I->getOperand(0)); 02055 if (!InputReg) 02056 // Unhandled operand. Halt "fast" selection and bail. 02057 return false; 02058 02059 if (SrcVT == MVT::i8) { 02060 // Truncate from i8 to i1; no code needed. 02061 updateValueMap(I, InputReg); 02062 return true; 02063 } 02064 02065 if (!Subtarget->is64Bit()) { 02066 // If we're on x86-32; we can't extract an i8 from a general register. 02067 // First issue a copy to GR16_ABCD or GR32_ABCD. 02068 const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ? 02069 (const TargetRegisterClass*)&X86::GR16_ABCDRegClass : 02070 (const TargetRegisterClass*)&X86::GR32_ABCDRegClass; 02071 unsigned CopyReg = createResultReg(CopyRC); 02072 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 02073 CopyReg).addReg(InputReg); 02074 InputReg = CopyReg; 02075 } 02076 02077 // Issue an extract_subreg. 02078 unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8, 02079 InputReg, /*Kill=*/true, 02080 X86::sub_8bit); 02081 if (!ResultReg) 02082 return false; 02083 02084 updateValueMap(I, ResultReg); 02085 return true; 02086 } 02087 02088 bool X86FastISel::IsMemcpySmall(uint64_t Len) { 02089 return Len <= (Subtarget->is64Bit() ? 32 : 16); 02090 } 02091 02092 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, 02093 X86AddressMode SrcAM, uint64_t Len) { 02094 02095 // Make sure we don't bloat code by inlining very large memcpy's. 02096 if (!IsMemcpySmall(Len)) 02097 return false; 02098 02099 bool i64Legal = Subtarget->is64Bit(); 02100 02101 // We don't care about alignment here since we just emit integer accesses. 02102 while (Len) { 02103 MVT VT; 02104 if (Len >= 8 && i64Legal) 02105 VT = MVT::i64; 02106 else if (Len >= 4) 02107 VT = MVT::i32; 02108 else if (Len >= 2) 02109 VT = MVT::i16; 02110 else { 02111 VT = MVT::i8; 02112 } 02113 02114 unsigned Reg; 02115 bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg); 02116 RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM); 02117 assert(RV && "Failed to emit load or store??"); 02118 02119 unsigned Size = VT.getSizeInBits()/8; 02120 Len -= Size; 02121 DestAM.Disp += Size; 02122 SrcAM.Disp += Size; 02123 } 02124 02125 return true; 02126 } 02127 02128 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 02129 // FIXME: Handle more intrinsics. 02130 switch (II->getIntrinsicID()) { 02131 default: return false; 02132 case Intrinsic::frameaddress: { 02133 Type *RetTy = II->getCalledFunction()->getReturnType(); 02134 02135 MVT VT; 02136 if (!isTypeLegal(RetTy, VT)) 02137 return false; 02138 02139 unsigned Opc; 02140 const TargetRegisterClass *RC = nullptr; 02141 02142 switch (VT.SimpleTy) { 02143 default: llvm_unreachable("Invalid result type for frameaddress."); 02144 case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break; 02145 case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break; 02146 } 02147 02148 // This needs to be set before we call getFrameRegister, otherwise we get 02149 // the wrong frame register. 02150 MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo(); 02151 MFI->setFrameAddressIsTaken(true); 02152 02153 const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>( 02154 TM.getSubtargetImpl()->getRegisterInfo()); 02155 unsigned FrameReg = RegInfo->getFrameRegister(*(FuncInfo.MF)); 02156 assert(((FrameReg == X86::RBP && VT == MVT::i64) || 02157 (FrameReg == X86::EBP && VT == MVT::i32)) && 02158 "Invalid Frame Register!"); 02159 02160 // Always make a copy of the frame register to to a vreg first, so that we 02161 // never directly reference the frame register (the TwoAddressInstruction- 02162 // Pass doesn't like that). 02163 unsigned SrcReg = createResultReg(RC); 02164 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02165 TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg); 02166 02167 // Now recursively load from the frame address. 02168 // movq (%rbp), %rax 02169 // movq (%rax), %rax 02170 // movq (%rax), %rax 02171 // ... 02172 unsigned DestReg; 02173 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 02174 while (Depth--) { 02175 DestReg = createResultReg(RC); 02176 addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02177 TII.get(Opc), DestReg), SrcReg); 02178 SrcReg = DestReg; 02179 } 02180 02181 updateValueMap(II, SrcReg); 02182 return true; 02183 } 02184 case Intrinsic::memcpy: { 02185 const MemCpyInst *MCI = cast<MemCpyInst>(II); 02186 // Don't handle volatile or variable length memcpys. 02187 if (MCI->isVolatile()) 02188 return false; 02189 02190 if (isa<ConstantInt>(MCI->getLength())) { 02191 // Small memcpy's are common enough that we want to do them 02192 // without a call if possible. 02193 uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue(); 02194 if (IsMemcpySmall(Len)) { 02195 X86AddressMode DestAM, SrcAM; 02196 if (!X86SelectAddress(MCI->getRawDest(), DestAM) || 02197 !X86SelectAddress(MCI->getRawSource(), SrcAM)) 02198 return false; 02199 TryEmitSmallMemcpy(DestAM, SrcAM, Len); 02200 return true; 02201 } 02202 } 02203 02204 unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32; 02205 if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth)) 02206 return false; 02207 02208 if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255) 02209 return false; 02210 02211 return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 2); 02212 } 02213 case Intrinsic::memset: { 02214 const MemSetInst *MSI = cast<MemSetInst>(II); 02215 02216 if (MSI->isVolatile()) 02217 return false; 02218 02219 unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32; 02220 if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth)) 02221 return false; 02222 02223 if (MSI->getDestAddressSpace() > 255) 02224 return false; 02225 02226 return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); 02227 } 02228 case Intrinsic::stackprotector: { 02229 // Emit code to store the stack guard onto the stack. 02230 EVT PtrTy = TLI.getPointerTy(); 02231 02232 const Value *Op1 = II->getArgOperand(0); // The guard's value. 02233 const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1)); 02234 02235 MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]); 02236 02237 // Grab the frame index. 02238 X86AddressMode AM; 02239 if (!X86SelectAddress(Slot, AM)) return false; 02240 if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; 02241 return true; 02242 } 02243 case Intrinsic::dbg_declare: { 02244 const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); 02245 X86AddressMode AM; 02246 assert(DI->getAddress() && "Null address should be checked earlier!"); 02247 if (!X86SelectAddress(DI->getAddress(), AM)) 02248 return false; 02249 const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); 02250 // FIXME may need to add RegState::Debug to any registers produced, 02251 // although ESP/EBP should be the only ones at the moment. 02252 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM). 02253 addImm(0).addMetadata(DI->getVariable()); 02254 return true; 02255 } 02256 case Intrinsic::trap: { 02257 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP)); 02258 return true; 02259 } 02260 case Intrinsic::sqrt: { 02261 if (!Subtarget->hasSSE1()) 02262 return false; 02263 02264 Type *RetTy = II->getCalledFunction()->getReturnType(); 02265 02266 MVT VT; 02267 if (!isTypeLegal(RetTy, VT)) 02268 return false; 02269 02270 // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT 02271 // is not generated by FastISel yet. 02272 // FIXME: Update this code once tablegen can handle it. 02273 static const unsigned SqrtOpc[2][2] = { 02274 {X86::SQRTSSr, X86::VSQRTSSr}, 02275 {X86::SQRTSDr, X86::VSQRTSDr} 02276 }; 02277 bool HasAVX = Subtarget->hasAVX(); 02278 unsigned Opc; 02279 const TargetRegisterClass *RC; 02280 switch (VT.SimpleTy) { 02281 default: return false; 02282 case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break; 02283 case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break; 02284 } 02285 02286 const Value *SrcVal = II->getArgOperand(0); 02287 unsigned SrcReg = getRegForValue(SrcVal); 02288 02289 if (SrcReg == 0) 02290 return false; 02291 02292 unsigned ImplicitDefReg = 0; 02293 if (HasAVX) { 02294 ImplicitDefReg = createResultReg(RC); 02295 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02296 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); 02297 } 02298 02299 unsigned ResultReg = createResultReg(RC); 02300 MachineInstrBuilder MIB; 02301 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), 02302 ResultReg); 02303 02304 if (ImplicitDefReg) 02305 MIB.addReg(ImplicitDefReg); 02306 02307 MIB.addReg(SrcReg); 02308 02309 updateValueMap(II, ResultReg); 02310 return true; 02311 } 02312 case Intrinsic::sadd_with_overflow: 02313 case Intrinsic::uadd_with_overflow: 02314 case Intrinsic::ssub_with_overflow: 02315 case Intrinsic::usub_with_overflow: 02316 case Intrinsic::smul_with_overflow: 02317 case Intrinsic::umul_with_overflow: { 02318 // This implements the basic lowering of the xalu with overflow intrinsics 02319 // into add/sub/mul followed by either seto or setb. 02320 const Function *Callee = II->getCalledFunction(); 02321 auto *Ty = cast<StructType>(Callee->getReturnType()); 02322 Type *RetTy = Ty->getTypeAtIndex(0U); 02323 Type *CondTy = Ty->getTypeAtIndex(1); 02324 02325 MVT VT; 02326 if (!isTypeLegal(RetTy, VT)) 02327 return false; 02328 02329 if (VT < MVT::i8 || VT > MVT::i64) 02330 return false; 02331 02332 const Value *LHS = II->getArgOperand(0); 02333 const Value *RHS = II->getArgOperand(1); 02334 02335 // Canonicalize immediate to the RHS. 02336 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 02337 isCommutativeIntrinsic(II)) 02338 std::swap(LHS, RHS); 02339 02340 bool UseIncDec = false; 02341 if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne()) 02342 UseIncDec = true; 02343 02344 unsigned BaseOpc, CondOpc; 02345 switch (II->getIntrinsicID()) { 02346 default: llvm_unreachable("Unexpected intrinsic!"); 02347 case Intrinsic::sadd_with_overflow: 02348 BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD); 02349 CondOpc = X86::SETOr; 02350 break; 02351 case Intrinsic::uadd_with_overflow: 02352 BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break; 02353 case Intrinsic::ssub_with_overflow: 02354 BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB); 02355 CondOpc = X86::SETOr; 02356 break; 02357 case Intrinsic::usub_with_overflow: 02358 BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break; 02359 case Intrinsic::smul_with_overflow: 02360 BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break; 02361 case Intrinsic::umul_with_overflow: 02362 BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break; 02363 } 02364 02365 unsigned LHSReg = getRegForValue(LHS); 02366 if (LHSReg == 0) 02367 return false; 02368 bool LHSIsKill = hasTrivialKill(LHS); 02369 02370 unsigned ResultReg = 0; 02371 // Check if we have an immediate version. 02372 if (const auto *CI = dyn_cast<ConstantInt>(RHS)) { 02373 static const unsigned Opc[2][2][4] = { 02374 { { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r }, 02375 { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r } }, 02376 { { X86::INC8r, X86::INC64_16r, X86::INC64_32r, X86::INC64r }, 02377 { X86::DEC8r, X86::DEC64_16r, X86::DEC64_32r, X86::DEC64r } } 02378 }; 02379 02380 if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) { 02381 ResultReg = createResultReg(TLI.getRegClassFor(VT)); 02382 bool Is64Bit = Subtarget->is64Bit(); 02383 bool IsDec = BaseOpc == X86ISD::DEC; 02384 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02385 TII.get(Opc[Is64Bit][IsDec][VT.SimpleTy-MVT::i8]), ResultReg) 02386 .addReg(LHSReg, getKillRegState(LHSIsKill)); 02387 } else 02388 ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill, 02389 CI->getZExtValue()); 02390 } 02391 02392 unsigned RHSReg; 02393 bool RHSIsKill; 02394 if (!ResultReg) { 02395 RHSReg = getRegForValue(RHS); 02396 if (RHSReg == 0) 02397 return false; 02398 RHSIsKill = hasTrivialKill(RHS); 02399 ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg, 02400 RHSIsKill); 02401 } 02402 02403 // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit 02404 // it manually. 02405 if (BaseOpc == X86ISD::UMUL && !ResultReg) { 02406 static const unsigned MULOpc[] = 02407 { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r }; 02408 static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX }; 02409 // First copy the first operand into RAX, which is an implicit input to 02410 // the X86::MUL*r instruction. 02411 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02412 TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8]) 02413 .addReg(LHSReg, getKillRegState(LHSIsKill)); 02414 ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8], 02415 TLI.getRegClassFor(VT), RHSReg, RHSIsKill); 02416 } else if (BaseOpc == X86ISD::SMUL && !ResultReg) { 02417 static const unsigned MULOpc[] = 02418 { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr }; 02419 if (VT == MVT::i8) { 02420 // Copy the first operand into AL, which is an implicit input to the 02421 // X86::IMUL8r instruction. 02422 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02423 TII.get(TargetOpcode::COPY), X86::AL) 02424 .addReg(LHSReg, getKillRegState(LHSIsKill)); 02425 ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg, 02426 RHSIsKill); 02427 } else 02428 ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8], 02429 TLI.getRegClassFor(VT), LHSReg, LHSIsKill, 02430 RHSReg, RHSIsKill); 02431 } 02432 02433 if (!ResultReg) 02434 return false; 02435 02436 unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy); 02437 assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers."); 02438 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc), 02439 ResultReg2); 02440 02441 updateValueMap(II, ResultReg, 2); 02442 return true; 02443 } 02444 case Intrinsic::x86_sse_cvttss2si: 02445 case Intrinsic::x86_sse_cvttss2si64: 02446 case Intrinsic::x86_sse2_cvttsd2si: 02447 case Intrinsic::x86_sse2_cvttsd2si64: { 02448 bool IsInputDouble; 02449 switch (II->getIntrinsicID()) { 02450 default: llvm_unreachable("Unexpected intrinsic."); 02451 case Intrinsic::x86_sse_cvttss2si: 02452 case Intrinsic::x86_sse_cvttss2si64: 02453 if (!Subtarget->hasSSE1()) 02454 return false; 02455 IsInputDouble = false; 02456 break; 02457 case Intrinsic::x86_sse2_cvttsd2si: 02458 case Intrinsic::x86_sse2_cvttsd2si64: 02459 if (!Subtarget->hasSSE2()) 02460 return false; 02461 IsInputDouble = true; 02462 break; 02463 } 02464 02465 Type *RetTy = II->getCalledFunction()->getReturnType(); 02466 MVT VT; 02467 if (!isTypeLegal(RetTy, VT)) 02468 return false; 02469 02470 static const unsigned CvtOpc[2][2][2] = { 02471 { { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr }, 02472 { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } }, 02473 { { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr }, 02474 { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr } } 02475 }; 02476 bool HasAVX = Subtarget->hasAVX(); 02477 unsigned Opc; 02478 switch (VT.SimpleTy) { 02479 default: llvm_unreachable("Unexpected result type."); 02480 case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break; 02481 case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break; 02482 } 02483 02484 // Check if we can fold insertelement instructions into the convert. 02485 const Value *Op = II->getArgOperand(0); 02486 while (auto *IE = dyn_cast<InsertElementInst>(Op)) { 02487 const Value *Index = IE->getOperand(2); 02488 if (!isa<ConstantInt>(Index)) 02489 break; 02490 unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); 02491 02492 if (Idx == 0) { 02493 Op = IE->getOperand(1); 02494 break; 02495 } 02496 Op = IE->getOperand(0); 02497 } 02498 02499 unsigned Reg = getRegForValue(Op); 02500 if (Reg == 0) 02501 return false; 02502 02503 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 02504 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 02505 .addReg(Reg); 02506 02507 updateValueMap(II, ResultReg); 02508 return true; 02509 } 02510 } 02511 } 02512 02513 bool X86FastISel::fastLowerArguments() { 02514 if (!FuncInfo.CanLowerReturn) 02515 return false; 02516 02517 const Function *F = FuncInfo.Fn; 02518 if (F->isVarArg()) 02519 return false; 02520 02521 CallingConv::ID CC = F->getCallingConv(); 02522 if (CC != CallingConv::C) 02523 return false; 02524 02525 if (Subtarget->isCallingConvWin64(CC)) 02526 return false; 02527 02528 if (!Subtarget->is64Bit()) 02529 return false; 02530 02531 // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. 02532 unsigned GPRCnt = 0; 02533 unsigned FPRCnt = 0; 02534 unsigned Idx = 0; 02535 for (auto const &Arg : F->args()) { 02536 // The first argument is at index 1. 02537 ++Idx; 02538 if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || 02539 F->getAttributes().hasAttribute(Idx, Attribute::InReg) || 02540 F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || 02541 F->getAttributes().hasAttribute(Idx, Attribute::Nest)) 02542 return false; 02543 02544 Type *ArgTy = Arg.getType(); 02545 if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) 02546 return false; 02547 02548 EVT ArgVT = TLI.getValueType(ArgTy); 02549 if (!ArgVT.isSimple()) return false; 02550 switch (ArgVT.getSimpleVT().SimpleTy) { 02551 default: return false; 02552 case MVT::i32: 02553 case MVT::i64: 02554 ++GPRCnt; 02555 break; 02556 case MVT::f32: 02557 case MVT::f64: 02558 if (!Subtarget->hasSSE1()) 02559 return false; 02560 ++FPRCnt; 02561 break; 02562 } 02563 02564 if (GPRCnt > 6) 02565 return false; 02566 02567 if (FPRCnt > 8) 02568 return false; 02569 } 02570 02571 static const MCPhysReg GPR32ArgRegs[] = { 02572 X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D 02573 }; 02574 static const MCPhysReg GPR64ArgRegs[] = { 02575 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9 02576 }; 02577 static const MCPhysReg XMMArgRegs[] = { 02578 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 02579 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 02580 }; 02581 02582 unsigned GPRIdx = 0; 02583 unsigned FPRIdx = 0; 02584 for (auto const &Arg : F->args()) { 02585 MVT VT = TLI.getSimpleValueType(Arg.getType()); 02586 const TargetRegisterClass *RC = TLI.getRegClassFor(VT); 02587 unsigned SrcReg; 02588 switch (VT.SimpleTy) { 02589 default: llvm_unreachable("Unexpected value type."); 02590 case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break; 02591 case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break; 02592 case MVT::f32: // fall-through 02593 case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break; 02594 } 02595 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 02596 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 02597 // Without this, EmitLiveInCopies may eliminate the livein if its only 02598 // use is a bitcast (which isn't turned into an instruction). 02599 unsigned ResultReg = createResultReg(RC); 02600 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02601 TII.get(TargetOpcode::COPY), ResultReg) 02602 .addReg(DstReg, getKillRegState(true)); 02603 updateValueMap(&Arg, ResultReg); 02604 } 02605 return true; 02606 } 02607 02608 static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget, 02609 CallingConv::ID CC, 02610 ImmutableCallSite *CS) { 02611 if (Subtarget->is64Bit()) 02612 return 0; 02613 if (Subtarget->getTargetTriple().isOSMSVCRT()) 02614 return 0; 02615 if (CC == CallingConv::Fast || CC == CallingConv::GHC || 02616 CC == CallingConv::HiPE) 02617 return 0; 02618 if (CS && !CS->paramHasAttr(1, Attribute::StructRet)) 02619 return 0; 02620 if (CS && CS->paramHasAttr(1, Attribute::InReg)) 02621 return 0; 02622 return 4; 02623 } 02624 02625 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { 02626 auto &OutVals = CLI.OutVals; 02627 auto &OutFlags = CLI.OutFlags; 02628 auto &OutRegs = CLI.OutRegs; 02629 auto &Ins = CLI.Ins; 02630 auto &InRegs = CLI.InRegs; 02631 CallingConv::ID CC = CLI.CallConv; 02632 bool &IsTailCall = CLI.IsTailCall; 02633 bool IsVarArg = CLI.IsVarArg; 02634 const Value *Callee = CLI.Callee; 02635 const char *SymName = CLI.SymName; 02636 02637 bool Is64Bit = Subtarget->is64Bit(); 02638 bool IsWin64 = Subtarget->isCallingConvWin64(CC); 02639 02640 // Handle only C, fastcc, and webkit_js calling conventions for now. 02641 switch (CC) { 02642 default: return false; 02643 case CallingConv::C: 02644 case CallingConv::Fast: 02645 case CallingConv::WebKit_JS: 02646 case CallingConv::X86_FastCall: 02647 case CallingConv::X86_64_Win64: 02648 case CallingConv::X86_64_SysV: 02649 break; 02650 } 02651 02652 // Allow SelectionDAG isel to handle tail calls. 02653 if (IsTailCall) 02654 return false; 02655 02656 // fastcc with -tailcallopt is intended to provide a guaranteed 02657 // tail call optimization. Fastisel doesn't know how to do that. 02658 if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) 02659 return false; 02660 02661 // Don't know how to handle Win64 varargs yet. Nothing special needed for 02662 // x86-32. Special handling for x86-64 is implemented. 02663 if (IsVarArg && IsWin64) 02664 return false; 02665 02666 // Don't know about inalloca yet. 02667 if (CLI.CS && CLI.CS->hasInAllocaArgument()) 02668 return false; 02669 02670 // Fast-isel doesn't know about callee-pop yet. 02671 if (X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg, 02672 TM.Options.GuaranteedTailCallOpt)) 02673 return false; 02674 02675 // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra 02676 // instruction. This is safe because it is common to all FastISel supported 02677 // calling conventions on x86. 02678 for (int i = 0, e = OutVals.size(); i != e; ++i) { 02679 Value *&Val = OutVals[i]; 02680 ISD::ArgFlagsTy Flags = OutFlags[i]; 02681 if (auto *CI = dyn_cast<ConstantInt>(Val)) { 02682 if (CI->getBitWidth() < 32) { 02683 if (Flags.isSExt()) 02684 Val = ConstantExpr::getSExt(CI, Type::getInt32Ty(CI->getContext())); 02685 else 02686 Val = ConstantExpr::getZExt(CI, Type::getInt32Ty(CI->getContext())); 02687 } 02688 } 02689 02690 // Passing bools around ends up doing a trunc to i1 and passing it. 02691 // Codegen this as an argument + "and 1". 02692 if (auto *TI = dyn_cast<TruncInst>(Val)) { 02693 if (TI->getType()->isIntegerTy(1) && CLI.CS && 02694 (TI->getParent() == CLI.CS->getInstruction()->getParent()) && 02695 TI->hasOneUse()) { 02696 Val = cast<TruncInst>(Val)->getOperand(0); 02697 unsigned ResultReg = getRegForValue(Val); 02698 02699 if (!ResultReg) 02700 return false; 02701 02702 MVT ArgVT; 02703 if (!isTypeLegal(Val->getType(), ArgVT)) 02704 return false; 02705 02706 ResultReg = 02707 fastEmit_ri(ArgVT, ArgVT, ISD::AND, ResultReg, Val->hasOneUse(), 1); 02708 02709 if (!ResultReg) 02710 return false; 02711 updateValueMap(Val, ResultReg); 02712 } 02713 } 02714 } 02715 02716 // Analyze operands of the call, assigning locations to each operand. 02717 SmallVector<CCValAssign, 16> ArgLocs; 02718 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext()); 02719 02720 // Allocate shadow area for Win64 02721 if (IsWin64) 02722 CCInfo.AllocateStack(32, 8); 02723 02724 SmallVector<MVT, 16> OutVTs; 02725 for (auto *Val : OutVals) { 02726 MVT VT; 02727 if (!isTypeLegal(Val->getType(), VT)) 02728 return false; 02729 OutVTs.push_back(VT); 02730 } 02731 CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86); 02732 02733 // Get a count of how many bytes are to be pushed on the stack. 02734 unsigned NumBytes = CCInfo.getNextStackOffset(); 02735 02736 // Issue CALLSEQ_START 02737 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 02738 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) 02739 .addImm(NumBytes); 02740 02741 // Walk the register/memloc assignments, inserting copies/loads. 02742 const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>( 02743 TM.getSubtargetImpl()->getRegisterInfo()); 02744 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 02745 CCValAssign const &VA = ArgLocs[i]; 02746 const Value *ArgVal = OutVals[VA.getValNo()]; 02747 MVT ArgVT = OutVTs[VA.getValNo()]; 02748 02749 if (ArgVT == MVT::x86mmx) 02750 return false; 02751 02752 unsigned ArgReg = getRegForValue(ArgVal); 02753 if (!ArgReg) 02754 return false; 02755 02756 // Promote the value if needed. 02757 switch (VA.getLocInfo()) { 02758 case CCValAssign::Full: break; 02759 case CCValAssign::SExt: { 02760 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && 02761 "Unexpected extend"); 02762 bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg, 02763 ArgVT, ArgReg); 02764 assert(Emitted && "Failed to emit a sext!"); (void)Emitted; 02765 ArgVT = VA.getLocVT(); 02766 break; 02767 } 02768 case CCValAssign::ZExt: { 02769 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && 02770 "Unexpected extend"); 02771 bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg, 02772 ArgVT, ArgReg); 02773 assert(Emitted && "Failed to emit a zext!"); (void)Emitted; 02774 ArgVT = VA.getLocVT(); 02775 break; 02776 } 02777 case CCValAssign::AExt: { 02778 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && 02779 "Unexpected extend"); 02780 bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg, 02781 ArgVT, ArgReg); 02782 if (!Emitted) 02783 Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg, 02784 ArgVT, ArgReg); 02785 if (!Emitted) 02786 Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg, 02787 ArgVT, ArgReg); 02788 02789 assert(Emitted && "Failed to emit a aext!"); (void)Emitted; 02790 ArgVT = VA.getLocVT(); 02791 break; 02792 } 02793 case CCValAssign::BCvt: { 02794 ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg, 02795 /*TODO: Kill=*/false); 02796 assert(ArgReg && "Failed to emit a bitcast!"); 02797 ArgVT = VA.getLocVT(); 02798 break; 02799 } 02800 case CCValAssign::VExt: 02801 // VExt has not been implemented, so this should be impossible to reach 02802 // for now. However, fallback to Selection DAG isel once implemented. 02803 return false; 02804 case CCValAssign::FPExt: 02805 llvm_unreachable("Unexpected loc info!"); 02806 case CCValAssign::Indirect: 02807 // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully 02808 // support this. 02809 return false; 02810 } 02811 02812 if (VA.isRegLoc()) { 02813 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02814 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 02815 OutRegs.push_back(VA.getLocReg()); 02816 } else { 02817 assert(VA.isMemLoc()); 02818 02819 // Don't emit stores for undef values. 02820 if (isa<UndefValue>(ArgVal)) 02821 continue; 02822 02823 unsigned LocMemOffset = VA.getLocMemOffset(); 02824 X86AddressMode AM; 02825 AM.Base.Reg = RegInfo->getStackRegister(); 02826 AM.Disp = LocMemOffset; 02827 ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()]; 02828 unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); 02829 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 02830 MachinePointerInfo::getStack(LocMemOffset), MachineMemOperand::MOStore, 02831 ArgVT.getStoreSize(), Alignment); 02832 if (Flags.isByVal()) { 02833 X86AddressMode SrcAM; 02834 SrcAM.Base.Reg = ArgReg; 02835 if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize())) 02836 return false; 02837 } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) { 02838 // If this is a really simple value, emit this with the Value* version 02839 // of X86FastEmitStore. If it isn't simple, we don't want to do this, 02840 // as it can cause us to reevaluate the argument. 02841 if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO)) 02842 return false; 02843 } else { 02844 bool ValIsKill = hasTrivialKill(ArgVal); 02845 if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO)) 02846 return false; 02847 } 02848 } 02849 } 02850 02851 // ELF / PIC requires GOT in the EBX register before function calls via PLT 02852 // GOT pointer. 02853 if (Subtarget->isPICStyleGOT()) { 02854 unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 02855 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02856 TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base); 02857 } 02858 02859 if (Is64Bit && IsVarArg && !IsWin64) { 02860 // From AMD64 ABI document: 02861 // For calls that may call functions that use varargs or stdargs 02862 // (prototype-less calls or calls to functions containing ellipsis (...) in 02863 // the declaration) %al is used as hidden argument to specify the number 02864 // of SSE registers used. The contents of %al do not need to match exactly 02865 // the number of registers, but must be an ubound on the number of SSE 02866 // registers used and is in the range 0 - 8 inclusive. 02867 02868 // Count the number of XMM registers allocated. 02869 static const MCPhysReg XMMArgRegs[] = { 02870 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 02871 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 02872 }; 02873 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 02874 assert((Subtarget->hasSSE1() || !NumXMMRegs) 02875 && "SSE registers cannot be used when SSE is disabled"); 02876 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri), 02877 X86::AL).addImm(NumXMMRegs); 02878 } 02879 02880 // Materialize callee address in a register. FIXME: GV address can be 02881 // handled with a CALLpcrel32 instead. 02882 X86AddressMode CalleeAM; 02883 if (!X86SelectCallAddress(Callee, CalleeAM)) 02884 return false; 02885 02886 unsigned CalleeOp = 0; 02887 const GlobalValue *GV = nullptr; 02888 if (CalleeAM.GV != nullptr) { 02889 GV = CalleeAM.GV; 02890 } else if (CalleeAM.Base.Reg != 0) { 02891 CalleeOp = CalleeAM.Base.Reg; 02892 } else 02893 return false; 02894 02895 // Issue the call. 02896 MachineInstrBuilder MIB; 02897 if (CalleeOp) { 02898 // Register-indirect call. 02899 unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r; 02900 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)) 02901 .addReg(CalleeOp); 02902 } else { 02903 // Direct call. 02904 assert(GV && "Not a direct call"); 02905 unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; 02906 02907 // See if we need any target-specific flags on the GV operand. 02908 unsigned char OpFlags = 0; 02909 02910 // On ELF targets, in both X86-64 and X86-32 mode, direct calls to 02911 // external symbols most go through the PLT in PIC mode. If the symbol 02912 // has hidden or protected visibility, or if it is static or local, then 02913 // we don't need to use the PLT - we can directly call it. 02914 if (Subtarget->isTargetELF() && 02915 TM.getRelocationModel() == Reloc::PIC_ && 02916 GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { 02917 OpFlags = X86II::MO_PLT; 02918 } else if (Subtarget->isPICStyleStubAny() && 02919 (GV->isDeclaration() || GV->isWeakForLinker()) && 02920 (!Subtarget->getTargetTriple().isMacOSX() || 02921 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { 02922 // PC-relative references to external symbols should go through $stub, 02923 // unless we're building with the leopard linker or later, which 02924 // automatically synthesizes these stubs. 02925 OpFlags = X86II::MO_DARWIN_STUB; 02926 } 02927 02928 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); 02929 if (SymName) 02930 MIB.addExternalSymbol(SymName, OpFlags); 02931 else 02932 MIB.addGlobalAddress(GV, 0, OpFlags); 02933 } 02934 02935 // Add a register mask operand representing the call-preserved registers. 02936 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 02937 MIB.addRegMask(TRI.getCallPreservedMask(CC)); 02938 02939 // Add an implicit use GOT pointer in EBX. 02940 if (Subtarget->isPICStyleGOT()) 02941 MIB.addReg(X86::EBX, RegState::Implicit); 02942 02943 if (Is64Bit && IsVarArg && !IsWin64) 02944 MIB.addReg(X86::AL, RegState::Implicit); 02945 02946 // Add implicit physical register uses to the call. 02947 for (auto Reg : OutRegs) 02948 MIB.addReg(Reg, RegState::Implicit); 02949 02950 // Issue CALLSEQ_END 02951 unsigned NumBytesForCalleeToPop = 02952 computeBytesPoppedByCallee(Subtarget, CC, CLI.CS); 02953 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 02954 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) 02955 .addImm(NumBytes).addImm(NumBytesForCalleeToPop); 02956 02957 // Now handle call return values. 02958 SmallVector<CCValAssign, 16> RVLocs; 02959 CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, 02960 CLI.RetTy->getContext()); 02961 CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86); 02962 02963 // Copy all of the result registers out of their specified physreg. 02964 unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy); 02965 for (unsigned i = 0; i != RVLocs.size(); ++i) { 02966 CCValAssign &VA = RVLocs[i]; 02967 EVT CopyVT = VA.getValVT(); 02968 unsigned CopyReg = ResultReg + i; 02969 02970 // If this is x86-64, and we disabled SSE, we can't return FP values 02971 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && 02972 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { 02973 report_fatal_error("SSE register return with SSE disabled"); 02974 } 02975 02976 // If we prefer to use the value in xmm registers, copy it out as f80 and 02977 // use a truncate to move it from fp stack reg to xmm reg. 02978 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && 02979 isScalarFPTypeInSSEReg(VA.getValVT())) { 02980 CopyVT = MVT::f80; 02981 CopyReg = createResultReg(&X86::RFP80RegClass); 02982 } 02983 02984 // Copy out the result. 02985 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02986 TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg()); 02987 InRegs.push_back(VA.getLocReg()); 02988 02989 // Round the f80 to the right size, which also moves it to the appropriate 02990 // xmm register. This is accomplished by storing the f80 value in memory 02991 // and then loading it back. 02992 if (CopyVT != VA.getValVT()) { 02993 EVT ResVT = VA.getValVT(); 02994 unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; 02995 unsigned MemSize = ResVT.getSizeInBits()/8; 02996 int FI = MFI.CreateStackObject(MemSize, MemSize, false); 02997 addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02998 TII.get(Opc)), FI) 02999 .addReg(CopyReg); 03000 Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm; 03001 addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03002 TII.get(Opc), ResultReg + i), FI); 03003 } 03004 } 03005 03006 CLI.ResultReg = ResultReg; 03007 CLI.NumResultRegs = RVLocs.size(); 03008 CLI.Call = MIB; 03009 03010 return true; 03011 } 03012 03013 bool 03014 X86FastISel::fastSelectInstruction(const Instruction *I) { 03015 switch (I->getOpcode()) { 03016 default: break; 03017 case Instruction::Load: 03018 return X86SelectLoad(I); 03019 case Instruction::Store: 03020 return X86SelectStore(I); 03021 case Instruction::Ret: 03022 return X86SelectRet(I); 03023 case Instruction::ICmp: 03024 case Instruction::FCmp: 03025 return X86SelectCmp(I); 03026 case Instruction::ZExt: 03027 return X86SelectZExt(I); 03028 case Instruction::Br: 03029 return X86SelectBranch(I); 03030 case Instruction::LShr: 03031 case Instruction::AShr: 03032 case Instruction::Shl: 03033 return X86SelectShift(I); 03034 case Instruction::SDiv: 03035 case Instruction::UDiv: 03036 case Instruction::SRem: 03037 case Instruction::URem: 03038 return X86SelectDivRem(I); 03039 case Instruction::Select: 03040 return X86SelectSelect(I); 03041 case Instruction::Trunc: 03042 return X86SelectTrunc(I); 03043 case Instruction::FPExt: 03044 return X86SelectFPExt(I); 03045 case Instruction::FPTrunc: 03046 return X86SelectFPTrunc(I); 03047 case Instruction::IntToPtr: // Deliberate fall-through. 03048 case Instruction::PtrToInt: { 03049 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 03050 EVT DstVT = TLI.getValueType(I->getType()); 03051 if (DstVT.bitsGT(SrcVT)) 03052 return X86SelectZExt(I); 03053 if (DstVT.bitsLT(SrcVT)) 03054 return X86SelectTrunc(I); 03055 unsigned Reg = getRegForValue(I->getOperand(0)); 03056 if (Reg == 0) return false; 03057 updateValueMap(I, Reg); 03058 return true; 03059 } 03060 } 03061 03062 return false; 03063 } 03064 03065 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) { 03066 if (VT > MVT::i64) 03067 return 0; 03068 03069 uint64_t Imm = CI->getZExtValue(); 03070 if (Imm == 0) { 03071 unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass); 03072 switch (VT.SimpleTy) { 03073 default: llvm_unreachable("Unexpected value type"); 03074 case MVT::i1: 03075 case MVT::i8: 03076 return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true, 03077 X86::sub_8bit); 03078 case MVT::i16: 03079 return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true, 03080 X86::sub_16bit); 03081 case MVT::i32: 03082 return SrcReg; 03083 case MVT::i64: { 03084 unsigned ResultReg = createResultReg(&X86::GR64RegClass); 03085 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03086 TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg) 03087 .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit); 03088 return ResultReg; 03089 } 03090 } 03091 } 03092 03093 unsigned Opc = 0; 03094 switch (VT.SimpleTy) { 03095 default: llvm_unreachable("Unexpected value type"); 03096 case MVT::i1: VT = MVT::i8; // fall-through 03097 case MVT::i8: Opc = X86::MOV8ri; break; 03098 case MVT::i16: Opc = X86::MOV16ri; break; 03099 case MVT::i32: Opc = X86::MOV32ri; break; 03100 case MVT::i64: { 03101 if (isUInt<32>(Imm)) 03102 Opc = X86::MOV32ri; 03103 else if (isInt<32>(Imm)) 03104 Opc = X86::MOV64ri32; 03105 else 03106 Opc = X86::MOV64ri; 03107 break; 03108 } 03109 } 03110 if (VT == MVT::i64 && Opc == X86::MOV32ri) { 03111 unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm); 03112 unsigned ResultReg = createResultReg(&X86::GR64RegClass); 03113 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03114 TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg) 03115 .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit); 03116 return ResultReg; 03117 } 03118 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 03119 } 03120 03121 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) { 03122 if (CFP->isNullValue()) 03123 return fastMaterializeFloatZero(CFP); 03124 03125 // Can't handle alternate code models yet. 03126 CodeModel::Model CM = TM.getCodeModel(); 03127 if (CM != CodeModel::Small && CM != CodeModel::Large) 03128 return 0; 03129 03130 // Get opcode and regclass of the output for the given load instruction. 03131 unsigned Opc = 0; 03132 const TargetRegisterClass *RC = nullptr; 03133 switch (VT.SimpleTy) { 03134 default: return 0; 03135 case MVT::f32: 03136 if (X86ScalarSSEf32) { 03137 Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; 03138 RC = &X86::FR32RegClass; 03139 } else { 03140 Opc = X86::LD_Fp32m; 03141 RC = &X86::RFP32RegClass; 03142 } 03143 break; 03144 case MVT::f64: 03145 if (X86ScalarSSEf64) { 03146 Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; 03147 RC = &X86::FR64RegClass; 03148 } else { 03149 Opc = X86::LD_Fp64m; 03150 RC = &X86::RFP64RegClass; 03151 } 03152 break; 03153 case MVT::f80: 03154 // No f80 support yet. 03155 return 0; 03156 } 03157 03158 // MachineConstantPool wants an explicit alignment. 03159 unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); 03160 if (Align == 0) { 03161 // Alignment of vector types. FIXME! 03162 Align = DL.getTypeAllocSize(CFP->getType()); 03163 } 03164 03165 // x86-32 PIC requires a PIC base register for constant pools. 03166 unsigned PICBase = 0; 03167 unsigned char OpFlag = 0; 03168 if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic 03169 OpFlag = X86II::MO_PIC_BASE_OFFSET; 03170 PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 03171 } else if (Subtarget->isPICStyleGOT()) { 03172 OpFlag = X86II::MO_GOTOFF; 03173 PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 03174 } else if (Subtarget->isPICStyleRIPRel() && 03175 TM.getCodeModel() == CodeModel::Small) { 03176 PICBase = X86::RIP; 03177 } 03178 03179 // Create the load from the constant pool. 03180 unsigned CPI = MCP.getConstantPoolIndex(CFP, Align); 03181 unsigned ResultReg = createResultReg(RC); 03182 03183 if (CM == CodeModel::Large) { 03184 unsigned AddrReg = createResultReg(&X86::GR64RegClass); 03185 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri), 03186 AddrReg) 03187 .addConstantPoolIndex(CPI, 0, OpFlag); 03188 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03189 TII.get(Opc), ResultReg); 03190 addDirectMem(MIB, AddrReg); 03191 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 03192 MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, 03193 TM.getSubtargetImpl()->getDataLayout()->getPointerSize(), Align); 03194 MIB->addMemOperand(*FuncInfo.MF, MMO); 03195 return ResultReg; 03196 } 03197 03198 addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03199 TII.get(Opc), ResultReg), 03200 CPI, PICBase, OpFlag); 03201 return ResultReg; 03202 } 03203 03204 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) { 03205 // Can't handle alternate code models yet. 03206 if (TM.getCodeModel() != CodeModel::Small) 03207 return 0; 03208 03209 // Materialize addresses with LEA/MOV instructions. 03210 X86AddressMode AM; 03211 if (X86SelectAddress(GV, AM)) { 03212 // If the expression is just a basereg, then we're done, otherwise we need 03213 // to emit an LEA. 03214 if (AM.BaseType == X86AddressMode::RegBase && 03215 AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr) 03216 return AM.Base.Reg; 03217 03218 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 03219 if (TM.getRelocationModel() == Reloc::Static && 03220 TLI.getPointerTy() == MVT::i64) { 03221 // The displacement code could be more than 32 bits away so we need to use 03222 // an instruction with a 64 bit immediate 03223 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri), 03224 ResultReg) 03225 .addGlobalAddress(GV); 03226 } else { 03227 unsigned Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r; 03228 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03229 TII.get(Opc), ResultReg), AM); 03230 } 03231 return ResultReg; 03232 } 03233 return 0; 03234 } 03235 03236 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) { 03237 EVT CEVT = TLI.getValueType(C->getType(), true); 03238 03239 // Only handle simple types. 03240 if (!CEVT.isSimple()) 03241 return 0; 03242 MVT VT = CEVT.getSimpleVT(); 03243 03244 if (const auto *CI = dyn_cast<ConstantInt>(C)) 03245 return X86MaterializeInt(CI, VT); 03246 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 03247 return X86MaterializeFP(CFP, VT); 03248 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 03249 return X86MaterializeGV(GV, VT); 03250 03251 return 0; 03252 } 03253 03254 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) { 03255 // Fail on dynamic allocas. At this point, getRegForValue has already 03256 // checked its CSE maps, so if we're here trying to handle a dynamic 03257 // alloca, we're not going to succeed. X86SelectAddress has a 03258 // check for dynamic allocas, because it's called directly from 03259 // various places, but targetMaterializeAlloca also needs a check 03260 // in order to avoid recursion between getRegForValue, 03261 // X86SelectAddrss, and targetMaterializeAlloca. 03262 if (!FuncInfo.StaticAllocaMap.count(C)) 03263 return 0; 03264 assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?"); 03265 03266 X86AddressMode AM; 03267 if (!X86SelectAddress(C, AM)) 03268 return 0; 03269 unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; 03270 const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); 03271 unsigned ResultReg = createResultReg(RC); 03272 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 03273 TII.get(Opc), ResultReg), AM); 03274 return ResultReg; 03275 } 03276 03277 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) { 03278 MVT VT; 03279 if (!isTypeLegal(CF->getType(), VT)) 03280 return 0; 03281 03282 // Get opcode and regclass for the given zero. 03283 unsigned Opc = 0; 03284 const TargetRegisterClass *RC = nullptr; 03285 switch (VT.SimpleTy) { 03286 default: return 0; 03287 case MVT::f32: 03288 if (X86ScalarSSEf32) { 03289 Opc = X86::FsFLD0SS; 03290 RC = &X86::FR32RegClass; 03291 } else { 03292 Opc = X86::LD_Fp032; 03293 RC = &X86::RFP32RegClass; 03294 } 03295 break; 03296 case MVT::f64: 03297 if (X86ScalarSSEf64) { 03298 Opc = X86::FsFLD0SD; 03299 RC = &X86::FR64RegClass; 03300 } else { 03301 Opc = X86::LD_Fp064; 03302 RC = &X86::RFP64RegClass; 03303 } 03304 break; 03305 case MVT::f80: 03306 // No f80 support yet. 03307 return 0; 03308 } 03309 03310 unsigned ResultReg = createResultReg(RC); 03311 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); 03312 return ResultReg; 03313 } 03314 03315 03316 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, 03317 const LoadInst *LI) { 03318 const Value *Ptr = LI->getPointerOperand(); 03319 X86AddressMode AM; 03320 if (!X86SelectAddress(Ptr, AM)) 03321 return false; 03322 03323 const X86InstrInfo &XII = (const X86InstrInfo&)TII; 03324 03325 unsigned Size = DL.getTypeAllocSize(LI->getType()); 03326 unsigned Alignment = LI->getAlignment(); 03327 03328 if (Alignment == 0) // Ensure that codegen never sees alignment 0 03329 Alignment = DL.getABITypeAlignment(LI->getType()); 03330 03331 SmallVector<MachineOperand, 8> AddrOps; 03332 AM.getFullAddress(AddrOps); 03333 03334 MachineInstr *Result = 03335 XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment); 03336 if (!Result) 03337 return false; 03338 03339 Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI)); 03340 FuncInfo.MBB->insert(FuncInfo.InsertPt, Result); 03341 MI->eraseFromParent(); 03342 return true; 03343 } 03344 03345 03346 namespace llvm { 03347 FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo, 03348 const TargetLibraryInfo *libInfo) { 03349 return new X86FastISel(funcInfo, libInfo); 03350 } 03351 }