LLVM API Documentation
00001 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines a DAG pattern matching instruction selector for X86, 00011 // converting from a legalized dag to a X86 dag. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "X86.h" 00016 #include "X86InstrBuilder.h" 00017 #include "X86MachineFunctionInfo.h" 00018 #include "X86RegisterInfo.h" 00019 #include "X86Subtarget.h" 00020 #include "X86TargetMachine.h" 00021 #include "llvm/ADT/Statistic.h" 00022 #include "llvm/CodeGen/MachineFrameInfo.h" 00023 #include "llvm/CodeGen/MachineFunction.h" 00024 #include "llvm/CodeGen/MachineInstrBuilder.h" 00025 #include "llvm/CodeGen/MachineRegisterInfo.h" 00026 #include "llvm/CodeGen/SelectionDAGISel.h" 00027 #include "llvm/IR/Function.h" 00028 #include "llvm/IR/Instructions.h" 00029 #include "llvm/IR/Intrinsics.h" 00030 #include "llvm/IR/Type.h" 00031 #include "llvm/Support/Debug.h" 00032 #include "llvm/Support/ErrorHandling.h" 00033 #include "llvm/Support/MathExtras.h" 00034 #include "llvm/Support/raw_ostream.h" 00035 #include "llvm/Target/TargetMachine.h" 00036 #include "llvm/Target/TargetOptions.h" 00037 using namespace llvm; 00038 00039 #define DEBUG_TYPE "x86-isel" 00040 00041 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 00042 00043 //===----------------------------------------------------------------------===// 00044 // Pattern Matcher Implementation 00045 //===----------------------------------------------------------------------===// 00046 00047 namespace { 00048 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 00049 /// SDValue's instead of register numbers for the leaves of the matched 00050 /// tree. 00051 struct X86ISelAddressMode { 00052 enum { 00053 RegBase, 00054 FrameIndexBase 00055 } BaseType; 00056 00057 // This is really a union, discriminated by BaseType! 00058 SDValue Base_Reg; 00059 int Base_FrameIndex; 00060 00061 unsigned Scale; 00062 SDValue IndexReg; 00063 int32_t Disp; 00064 SDValue Segment; 00065 const GlobalValue *GV; 00066 const Constant *CP; 00067 const BlockAddress *BlockAddr; 00068 const char *ES; 00069 int JT; 00070 unsigned Align; // CP alignment. 00071 unsigned char SymbolFlags; // X86II::MO_* 00072 00073 X86ISelAddressMode() 00074 : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), 00075 Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr), 00076 JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) { 00077 } 00078 00079 bool hasSymbolicDisplacement() const { 00080 return GV != nullptr || CP != nullptr || ES != nullptr || 00081 JT != -1 || BlockAddr != nullptr; 00082 } 00083 00084 bool hasBaseOrIndexReg() const { 00085 return BaseType == FrameIndexBase || 00086 IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr; 00087 } 00088 00089 /// isRIPRelative - Return true if this addressing mode is already RIP 00090 /// relative. 00091 bool isRIPRelative() const { 00092 if (BaseType != RegBase) return false; 00093 if (RegisterSDNode *RegNode = 00094 dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode())) 00095 return RegNode->getReg() == X86::RIP; 00096 return false; 00097 } 00098 00099 void setBaseReg(SDValue Reg) { 00100 BaseType = RegBase; 00101 Base_Reg = Reg; 00102 } 00103 00104 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 00105 void dump() { 00106 dbgs() << "X86ISelAddressMode " << this << '\n'; 00107 dbgs() << "Base_Reg "; 00108 if (Base_Reg.getNode()) 00109 Base_Reg.getNode()->dump(); 00110 else 00111 dbgs() << "nul"; 00112 dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n' 00113 << " Scale" << Scale << '\n' 00114 << "IndexReg "; 00115 if (IndexReg.getNode()) 00116 IndexReg.getNode()->dump(); 00117 else 00118 dbgs() << "nul"; 00119 dbgs() << " Disp " << Disp << '\n' 00120 << "GV "; 00121 if (GV) 00122 GV->dump(); 00123 else 00124 dbgs() << "nul"; 00125 dbgs() << " CP "; 00126 if (CP) 00127 CP->dump(); 00128 else 00129 dbgs() << "nul"; 00130 dbgs() << '\n' 00131 << "ES "; 00132 if (ES) 00133 dbgs() << ES; 00134 else 00135 dbgs() << "nul"; 00136 dbgs() << " JT" << JT << " Align" << Align << '\n'; 00137 } 00138 #endif 00139 }; 00140 } 00141 00142 namespace { 00143 //===--------------------------------------------------------------------===// 00144 /// ISel - X86 specific code to select X86 machine instructions for 00145 /// SelectionDAG operations. 00146 /// 00147 class X86DAGToDAGISel final : public SelectionDAGISel { 00148 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 00149 /// make the right decision when generating code for different targets. 00150 const X86Subtarget *Subtarget; 00151 00152 /// OptForSize - If true, selector should try to optimize for code size 00153 /// instead of performance. 00154 bool OptForSize; 00155 00156 public: 00157 explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) 00158 : SelectionDAGISel(tm, OptLevel), 00159 Subtarget(&tm.getSubtarget<X86Subtarget>()), 00160 OptForSize(false) {} 00161 00162 const char *getPassName() const override { 00163 return "X86 DAG->DAG Instruction Selection"; 00164 } 00165 00166 bool runOnMachineFunction(MachineFunction &MF) override { 00167 // Reset the subtarget each time through. 00168 Subtarget = &TM.getSubtarget<X86Subtarget>(); 00169 SelectionDAGISel::runOnMachineFunction(MF); 00170 return true; 00171 } 00172 00173 void EmitFunctionEntryCode() override; 00174 00175 bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; 00176 00177 void PreprocessISelDAG() override; 00178 00179 inline bool immSext8(SDNode *N) const { 00180 return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue()); 00181 } 00182 00183 // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit 00184 // sign extended field. 00185 inline bool i64immSExt32(SDNode *N) const { 00186 uint64_t v = cast<ConstantSDNode>(N)->getZExtValue(); 00187 return (int64_t)v == (int32_t)v; 00188 } 00189 00190 // Include the pieces autogenerated from the target description. 00191 #include "X86GenDAGISel.inc" 00192 00193 private: 00194 SDNode *Select(SDNode *N) override; 00195 SDNode *SelectGather(SDNode *N, unsigned Opc); 00196 SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT); 00197 00198 bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); 00199 bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); 00200 bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); 00201 bool MatchAddress(SDValue N, X86ISelAddressMode &AM); 00202 bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 00203 unsigned Depth); 00204 bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); 00205 bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, 00206 SDValue &Scale, SDValue &Index, SDValue &Disp, 00207 SDValue &Segment); 00208 bool SelectMOV64Imm32(SDValue N, SDValue &Imm); 00209 bool SelectLEAAddr(SDValue N, SDValue &Base, 00210 SDValue &Scale, SDValue &Index, SDValue &Disp, 00211 SDValue &Segment); 00212 bool SelectLEA64_32Addr(SDValue N, SDValue &Base, 00213 SDValue &Scale, SDValue &Index, SDValue &Disp, 00214 SDValue &Segment); 00215 bool SelectTLSADDRAddr(SDValue N, SDValue &Base, 00216 SDValue &Scale, SDValue &Index, SDValue &Disp, 00217 SDValue &Segment); 00218 bool SelectScalarSSELoad(SDNode *Root, SDValue N, 00219 SDValue &Base, SDValue &Scale, 00220 SDValue &Index, SDValue &Disp, 00221 SDValue &Segment, 00222 SDValue &NodeWithChain); 00223 00224 bool TryFoldLoad(SDNode *P, SDValue N, 00225 SDValue &Base, SDValue &Scale, 00226 SDValue &Index, SDValue &Disp, 00227 SDValue &Segment); 00228 00229 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 00230 /// inline asm expressions. 00231 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 00232 char ConstraintCode, 00233 std::vector<SDValue> &OutOps) override; 00234 00235 void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 00236 00237 inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 00238 SDValue &Scale, SDValue &Index, 00239 SDValue &Disp, SDValue &Segment) { 00240 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 00241 CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, 00242 getTargetLowering()->getPointerTy()) : 00243 AM.Base_Reg; 00244 Scale = getI8Imm(AM.Scale); 00245 Index = AM.IndexReg; 00246 // These are 32-bit even in 64-bit mode since RIP relative offset 00247 // is 32-bit. 00248 if (AM.GV) 00249 Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(), 00250 MVT::i32, AM.Disp, 00251 AM.SymbolFlags); 00252 else if (AM.CP) 00253 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, 00254 AM.Align, AM.Disp, AM.SymbolFlags); 00255 else if (AM.ES) { 00256 assert(!AM.Disp && "Non-zero displacement is ignored with ES."); 00257 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); 00258 } else if (AM.JT != -1) { 00259 assert(!AM.Disp && "Non-zero displacement is ignored with JT."); 00260 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); 00261 } else if (AM.BlockAddr) 00262 Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp, 00263 AM.SymbolFlags); 00264 else 00265 Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); 00266 00267 if (AM.Segment.getNode()) 00268 Segment = AM.Segment; 00269 else 00270 Segment = CurDAG->getRegister(0, MVT::i32); 00271 } 00272 00273 /// getI8Imm - Return a target constant with the specified value, of type 00274 /// i8. 00275 inline SDValue getI8Imm(unsigned Imm) { 00276 return CurDAG->getTargetConstant(Imm, MVT::i8); 00277 } 00278 00279 /// getI32Imm - Return a target constant with the specified value, of type 00280 /// i32. 00281 inline SDValue getI32Imm(unsigned Imm) { 00282 return CurDAG->getTargetConstant(Imm, MVT::i32); 00283 } 00284 00285 /// getGlobalBaseReg - Return an SDNode that returns the value of 00286 /// the global base register. Output instructions required to 00287 /// initialize the global base register, if necessary. 00288 /// 00289 SDNode *getGlobalBaseReg(); 00290 00291 /// getTargetMachine - Return a reference to the TargetMachine, casted 00292 /// to the target-specific type. 00293 const X86TargetMachine &getTargetMachine() const { 00294 return static_cast<const X86TargetMachine &>(TM); 00295 } 00296 00297 /// getInstrInfo - Return a reference to the TargetInstrInfo, casted 00298 /// to the target-specific type. 00299 const X86InstrInfo *getInstrInfo() const { 00300 return getTargetMachine().getSubtargetImpl()->getInstrInfo(); 00301 } 00302 }; 00303 } 00304 00305 00306 bool 00307 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { 00308 if (OptLevel == CodeGenOpt::None) return false; 00309 00310 if (!N.hasOneUse()) 00311 return false; 00312 00313 if (N.getOpcode() != ISD::LOAD) 00314 return true; 00315 00316 // If N is a load, do additional profitability checks. 00317 if (U == Root) { 00318 switch (U->getOpcode()) { 00319 default: break; 00320 case X86ISD::ADD: 00321 case X86ISD::SUB: 00322 case X86ISD::AND: 00323 case X86ISD::XOR: 00324 case X86ISD::OR: 00325 case ISD::ADD: 00326 case ISD::ADDC: 00327 case ISD::ADDE: 00328 case ISD::AND: 00329 case ISD::OR: 00330 case ISD::XOR: { 00331 SDValue Op1 = U->getOperand(1); 00332 00333 // If the other operand is a 8-bit immediate we should fold the immediate 00334 // instead. This reduces code size. 00335 // e.g. 00336 // movl 4(%esp), %eax 00337 // addl $4, %eax 00338 // vs. 00339 // movl $4, %eax 00340 // addl 4(%esp), %eax 00341 // The former is 2 bytes shorter. In case where the increment is 1, then 00342 // the saving can be 4 bytes (by using incl %eax). 00343 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) 00344 if (Imm->getAPIntValue().isSignedIntN(8)) 00345 return false; 00346 00347 // If the other operand is a TLS address, we should fold it instead. 00348 // This produces 00349 // movl %gs:0, %eax 00350 // leal i@NTPOFF(%eax), %eax 00351 // instead of 00352 // movl $i@NTPOFF, %eax 00353 // addl %gs:0, %eax 00354 // if the block also has an access to a second TLS address this will save 00355 // a load. 00356 // FIXME: This is probably also true for non-TLS addresses. 00357 if (Op1.getOpcode() == X86ISD::Wrapper) { 00358 SDValue Val = Op1.getOperand(0); 00359 if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) 00360 return false; 00361 } 00362 } 00363 } 00364 } 00365 00366 return true; 00367 } 00368 00369 /// MoveBelowCallOrigChain - Replace the original chain operand of the call with 00370 /// load's chain operand and move load below the call's chain operand. 00371 static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, 00372 SDValue Call, SDValue OrigChain) { 00373 SmallVector<SDValue, 8> Ops; 00374 SDValue Chain = OrigChain.getOperand(0); 00375 if (Chain.getNode() == Load.getNode()) 00376 Ops.push_back(Load.getOperand(0)); 00377 else { 00378 assert(Chain.getOpcode() == ISD::TokenFactor && 00379 "Unexpected chain operand"); 00380 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) 00381 if (Chain.getOperand(i).getNode() == Load.getNode()) 00382 Ops.push_back(Load.getOperand(0)); 00383 else 00384 Ops.push_back(Chain.getOperand(i)); 00385 SDValue NewChain = 00386 CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops); 00387 Ops.clear(); 00388 Ops.push_back(NewChain); 00389 } 00390 for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i) 00391 Ops.push_back(OrigChain.getOperand(i)); 00392 CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops); 00393 CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), 00394 Load.getOperand(1), Load.getOperand(2)); 00395 00396 unsigned NumOps = Call.getNode()->getNumOperands(); 00397 Ops.clear(); 00398 Ops.push_back(SDValue(Load.getNode(), 1)); 00399 for (unsigned i = 1, e = NumOps; i != e; ++i) 00400 Ops.push_back(Call.getOperand(i)); 00401 CurDAG->UpdateNodeOperands(Call.getNode(), Ops); 00402 } 00403 00404 /// isCalleeLoad - Return true if call address is a load and it can be 00405 /// moved below CALLSEQ_START and the chains leading up to the call. 00406 /// Return the CALLSEQ_START by reference as a second output. 00407 /// In the case of a tail call, there isn't a callseq node between the call 00408 /// chain and the load. 00409 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { 00410 // The transformation is somewhat dangerous if the call's chain was glued to 00411 // the call. After MoveBelowOrigChain the load is moved between the call and 00412 // the chain, this can create a cycle if the load is not folded. So it is 00413 // *really* important that we are sure the load will be folded. 00414 if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) 00415 return false; 00416 LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); 00417 if (!LD || 00418 LD->isVolatile() || 00419 LD->getAddressingMode() != ISD::UNINDEXED || 00420 LD->getExtensionType() != ISD::NON_EXTLOAD) 00421 return false; 00422 00423 // Now let's find the callseq_start. 00424 while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) { 00425 if (!Chain.hasOneUse()) 00426 return false; 00427 Chain = Chain.getOperand(0); 00428 } 00429 00430 if (!Chain.getNumOperands()) 00431 return false; 00432 // Since we are not checking for AA here, conservatively abort if the chain 00433 // writes to memory. It's not safe to move the callee (a load) across a store. 00434 if (isa<MemSDNode>(Chain.getNode()) && 00435 cast<MemSDNode>(Chain.getNode())->writeMem()) 00436 return false; 00437 if (Chain.getOperand(0).getNode() == Callee.getNode()) 00438 return true; 00439 if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && 00440 Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) && 00441 Callee.getValue(1).hasOneUse()) 00442 return true; 00443 return false; 00444 } 00445 00446 void X86DAGToDAGISel::PreprocessISelDAG() { 00447 // OptForSize is used in pattern predicates that isel is matching. 00448 OptForSize = MF->getFunction()->getAttributes(). 00449 hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); 00450 00451 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 00452 E = CurDAG->allnodes_end(); I != E; ) { 00453 SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 00454 00455 if (OptLevel != CodeGenOpt::None && 00456 // Only does this when target favors doesn't favor register indirect 00457 // call. 00458 ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) || 00459 (N->getOpcode() == X86ISD::TC_RETURN && 00460 // Only does this if load can be folded into TC_RETURN. 00461 (Subtarget->is64Bit() || 00462 getTargetMachine().getRelocationModel() != Reloc::PIC_)))) { 00463 /// Also try moving call address load from outside callseq_start to just 00464 /// before the call to allow it to be folded. 00465 /// 00466 /// [Load chain] 00467 /// ^ 00468 /// | 00469 /// [Load] 00470 /// ^ ^ 00471 /// | | 00472 /// / \-- 00473 /// / | 00474 ///[CALLSEQ_START] | 00475 /// ^ | 00476 /// | | 00477 /// [LOAD/C2Reg] | 00478 /// | | 00479 /// \ / 00480 /// \ / 00481 /// [CALL] 00482 bool HasCallSeq = N->getOpcode() == X86ISD::CALL; 00483 SDValue Chain = N->getOperand(0); 00484 SDValue Load = N->getOperand(1); 00485 if (!isCalleeLoad(Load, Chain, HasCallSeq)) 00486 continue; 00487 MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain); 00488 ++NumLoadMoved; 00489 continue; 00490 } 00491 00492 // Lower fpround and fpextend nodes that target the FP stack to be store and 00493 // load to the stack. This is a gross hack. We would like to simply mark 00494 // these as being illegal, but when we do that, legalize produces these when 00495 // it expands calls, then expands these in the same legalize pass. We would 00496 // like dag combine to be able to hack on these between the call expansion 00497 // and the node legalization. As such this pass basically does "really 00498 // late" legalization of these inline with the X86 isel pass. 00499 // FIXME: This should only happen when not compiled with -O0. 00500 if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 00501 continue; 00502 00503 MVT SrcVT = N->getOperand(0).getSimpleValueType(); 00504 MVT DstVT = N->getSimpleValueType(0); 00505 00506 // If any of the sources are vectors, no fp stack involved. 00507 if (SrcVT.isVector() || DstVT.isVector()) 00508 continue; 00509 00510 // If the source and destination are SSE registers, then this is a legal 00511 // conversion that should not be lowered. 00512 const X86TargetLowering *X86Lowering = 00513 static_cast<const X86TargetLowering *>(getTargetLowering()); 00514 bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); 00515 bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); 00516 if (SrcIsSSE && DstIsSSE) 00517 continue; 00518 00519 if (!SrcIsSSE && !DstIsSSE) { 00520 // If this is an FPStack extension, it is a noop. 00521 if (N->getOpcode() == ISD::FP_EXTEND) 00522 continue; 00523 // If this is a value-preserving FPStack truncation, it is a noop. 00524 if (N->getConstantOperandVal(1)) 00525 continue; 00526 } 00527 00528 // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 00529 // FPStack has extload and truncstore. SSE can fold direct loads into other 00530 // operations. Based on this, decide what we want to do. 00531 MVT MemVT; 00532 if (N->getOpcode() == ISD::FP_ROUND) 00533 MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 00534 else 00535 MemVT = SrcIsSSE ? SrcVT : DstVT; 00536 00537 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); 00538 SDLoc dl(N); 00539 00540 // FIXME: optimize the case where the src/dest is a load or store? 00541 SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, 00542 N->getOperand(0), 00543 MemTmp, MachinePointerInfo(), MemVT, 00544 false, false, 0); 00545 SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, 00546 MachinePointerInfo(), 00547 MemVT, false, false, false, 0); 00548 00549 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 00550 // extload we created. This will cause general havok on the dag because 00551 // anything below the conversion could be folded into other existing nodes. 00552 // To avoid invalidating 'I', back it up to the convert node. 00553 --I; 00554 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 00555 00556 // Now that we did that, the node is dead. Increment the iterator to the 00557 // next node to process, then delete N. 00558 ++I; 00559 CurDAG->DeleteNode(N); 00560 } 00561 } 00562 00563 00564 /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 00565 /// the main function. 00566 void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 00567 MachineFrameInfo *MFI) { 00568 const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo(); 00569 if (Subtarget->isTargetCygMing()) { 00570 unsigned CallOp = 00571 Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32; 00572 BuildMI(BB, DebugLoc(), 00573 TII->get(CallOp)).addExternalSymbol("__main"); 00574 } 00575 } 00576 00577 void X86DAGToDAGISel::EmitFunctionEntryCode() { 00578 // If this is main, emit special code for main. 00579 if (const Function *Fn = MF->getFunction()) 00580 if (Fn->hasExternalLinkage() && Fn->getName() == "main") 00581 EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo()); 00582 } 00583 00584 static bool isDispSafeForFrameIndex(int64_t Val) { 00585 // On 64-bit platforms, we can run into an issue where a frame index 00586 // includes a displacement that, when added to the explicit displacement, 00587 // will overflow the displacement field. Assuming that the frame index 00588 // displacement fits into a 31-bit integer (which is only slightly more 00589 // aggressive than the current fundamental assumption that it fits into 00590 // a 32-bit integer), a 31-bit disp should always be safe. 00591 return isInt<31>(Val); 00592 } 00593 00594 bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset, 00595 X86ISelAddressMode &AM) { 00596 int64_t Val = AM.Disp + Offset; 00597 CodeModel::Model M = TM.getCodeModel(); 00598 if (Subtarget->is64Bit()) { 00599 if (!X86::isOffsetSuitableForCodeModel(Val, M, 00600 AM.hasSymbolicDisplacement())) 00601 return true; 00602 // In addition to the checks required for a register base, check that 00603 // we do not try to use an unsafe Disp with a frame index. 00604 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase && 00605 !isDispSafeForFrameIndex(Val)) 00606 return true; 00607 } 00608 AM.Disp = Val; 00609 return false; 00610 00611 } 00612 00613 bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ 00614 SDValue Address = N->getOperand(1); 00615 00616 // load gs:0 -> GS segment register. 00617 // load fs:0 -> FS segment register. 00618 // 00619 // This optimization is valid because the GNU TLS model defines that 00620 // gs:0 (or fs:0 on X86-64) contains its own address. 00621 // For more information see http://people.redhat.com/drepper/tls.pdf 00622 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address)) 00623 if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr && 00624 Subtarget->isTargetLinux()) 00625 switch (N->getPointerInfo().getAddrSpace()) { 00626 case 256: 00627 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); 00628 return false; 00629 case 257: 00630 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); 00631 return false; 00632 } 00633 00634 return true; 00635 } 00636 00637 /// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes 00638 /// into an addressing mode. These wrap things that will resolve down into a 00639 /// symbol reference. If no match is possible, this returns true, otherwise it 00640 /// returns false. 00641 bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { 00642 // If the addressing mode already has a symbol as the displacement, we can 00643 // never match another symbol. 00644 if (AM.hasSymbolicDisplacement()) 00645 return true; 00646 00647 SDValue N0 = N.getOperand(0); 00648 CodeModel::Model M = TM.getCodeModel(); 00649 00650 // Handle X86-64 rip-relative addresses. We check this before checking direct 00651 // folding because RIP is preferable to non-RIP accesses. 00652 if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP && 00653 // Under X86-64 non-small code model, GV (and friends) are 64-bits, so 00654 // they cannot be folded into immediate fields. 00655 // FIXME: This can be improved for kernel and other models? 00656 (M == CodeModel::Small || M == CodeModel::Kernel)) { 00657 // Base and index reg must be 0 in order to use %rip as base. 00658 if (AM.hasBaseOrIndexReg()) 00659 return true; 00660 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 00661 X86ISelAddressMode Backup = AM; 00662 AM.GV = G->getGlobal(); 00663 AM.SymbolFlags = G->getTargetFlags(); 00664 if (FoldOffsetIntoAddress(G->getOffset(), AM)) { 00665 AM = Backup; 00666 return true; 00667 } 00668 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 00669 X86ISelAddressMode Backup = AM; 00670 AM.CP = CP->getConstVal(); 00671 AM.Align = CP->getAlignment(); 00672 AM.SymbolFlags = CP->getTargetFlags(); 00673 if (FoldOffsetIntoAddress(CP->getOffset(), AM)) { 00674 AM = Backup; 00675 return true; 00676 } 00677 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 00678 AM.ES = S->getSymbol(); 00679 AM.SymbolFlags = S->getTargetFlags(); 00680 } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 00681 AM.JT = J->getIndex(); 00682 AM.SymbolFlags = J->getTargetFlags(); 00683 } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) { 00684 X86ISelAddressMode Backup = AM; 00685 AM.BlockAddr = BA->getBlockAddress(); 00686 AM.SymbolFlags = BA->getTargetFlags(); 00687 if (FoldOffsetIntoAddress(BA->getOffset(), AM)) { 00688 AM = Backup; 00689 return true; 00690 } 00691 } else 00692 llvm_unreachable("Unhandled symbol reference node."); 00693 00694 if (N.getOpcode() == X86ISD::WrapperRIP) 00695 AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); 00696 return false; 00697 } 00698 00699 // Handle the case when globals fit in our immediate field: This is true for 00700 // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit 00701 // mode, this only applies to a non-RIP-relative computation. 00702 if (!Subtarget->is64Bit() || 00703 M == CodeModel::Small || M == CodeModel::Kernel) { 00704 assert(N.getOpcode() != X86ISD::WrapperRIP && 00705 "RIP-relative addressing already handled"); 00706 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 00707 AM.GV = G->getGlobal(); 00708 AM.Disp += G->getOffset(); 00709 AM.SymbolFlags = G->getTargetFlags(); 00710 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 00711 AM.CP = CP->getConstVal(); 00712 AM.Align = CP->getAlignment(); 00713 AM.Disp += CP->getOffset(); 00714 AM.SymbolFlags = CP->getTargetFlags(); 00715 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 00716 AM.ES = S->getSymbol(); 00717 AM.SymbolFlags = S->getTargetFlags(); 00718 } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 00719 AM.JT = J->getIndex(); 00720 AM.SymbolFlags = J->getTargetFlags(); 00721 } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) { 00722 AM.BlockAddr = BA->getBlockAddress(); 00723 AM.Disp += BA->getOffset(); 00724 AM.SymbolFlags = BA->getTargetFlags(); 00725 } else 00726 llvm_unreachable("Unhandled symbol reference node."); 00727 return false; 00728 } 00729 00730 return true; 00731 } 00732 00733 /// MatchAddress - Add the specified node to the specified addressing mode, 00734 /// returning true if it cannot be done. This just pattern matches for the 00735 /// addressing mode. 00736 bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { 00737 if (MatchAddressRecursively(N, AM, 0)) 00738 return true; 00739 00740 // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has 00741 // a smaller encoding and avoids a scaled-index. 00742 if (AM.Scale == 2 && 00743 AM.BaseType == X86ISelAddressMode::RegBase && 00744 AM.Base_Reg.getNode() == nullptr) { 00745 AM.Base_Reg = AM.IndexReg; 00746 AM.Scale = 1; 00747 } 00748 00749 // Post-processing: Convert foo to foo(%rip), even in non-PIC mode, 00750 // because it has a smaller encoding. 00751 // TODO: Which other code models can use this? 00752 if (TM.getCodeModel() == CodeModel::Small && 00753 Subtarget->is64Bit() && 00754 AM.Scale == 1 && 00755 AM.BaseType == X86ISelAddressMode::RegBase && 00756 AM.Base_Reg.getNode() == nullptr && 00757 AM.IndexReg.getNode() == nullptr && 00758 AM.SymbolFlags == X86II::MO_NO_FLAG && 00759 AM.hasSymbolicDisplacement()) 00760 AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); 00761 00762 return false; 00763 } 00764 00765 // Insert a node into the DAG at least before the Pos node's position. This 00766 // will reposition the node as needed, and will assign it a node ID that is <= 00767 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node 00768 // IDs! The selection DAG must no longer depend on their uniqueness when this 00769 // is used. 00770 static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { 00771 if (N.getNode()->getNodeId() == -1 || 00772 N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) { 00773 DAG.RepositionNode(Pos.getNode(), N.getNode()); 00774 N.getNode()->setNodeId(Pos.getNode()->getNodeId()); 00775 } 00776 } 00777 00778 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if 00779 // safe. This allows us to convert the shift and and into an h-register 00780 // extract and a scaled index. Returns false if the simplification is 00781 // performed. 00782 static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, 00783 uint64_t Mask, 00784 SDValue Shift, SDValue X, 00785 X86ISelAddressMode &AM) { 00786 if (Shift.getOpcode() != ISD::SRL || 00787 !isa<ConstantSDNode>(Shift.getOperand(1)) || 00788 !Shift.hasOneUse()) 00789 return true; 00790 00791 int ScaleLog = 8 - Shift.getConstantOperandVal(1); 00792 if (ScaleLog <= 0 || ScaleLog >= 4 || 00793 Mask != (0xffu << ScaleLog)) 00794 return true; 00795 00796 MVT VT = N.getSimpleValueType(); 00797 SDLoc DL(N); 00798 SDValue Eight = DAG.getConstant(8, MVT::i8); 00799 SDValue NewMask = DAG.getConstant(0xff, VT); 00800 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight); 00801 SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask); 00802 SDValue ShlCount = DAG.getConstant(ScaleLog, MVT::i8); 00803 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount); 00804 00805 // Insert the new nodes into the topological ordering. We must do this in 00806 // a valid topological ordering as nothing is going to go back and re-sort 00807 // these nodes. We continually insert before 'N' in sequence as this is 00808 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no 00809 // hierarchy left to express. 00810 InsertDAGNode(DAG, N, Eight); 00811 InsertDAGNode(DAG, N, Srl); 00812 InsertDAGNode(DAG, N, NewMask); 00813 InsertDAGNode(DAG, N, And); 00814 InsertDAGNode(DAG, N, ShlCount); 00815 InsertDAGNode(DAG, N, Shl); 00816 DAG.ReplaceAllUsesWith(N, Shl); 00817 AM.IndexReg = And; 00818 AM.Scale = (1 << ScaleLog); 00819 return false; 00820 } 00821 00822 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this 00823 // allows us to fold the shift into this addressing mode. Returns false if the 00824 // transform succeeded. 00825 static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, 00826 uint64_t Mask, 00827 SDValue Shift, SDValue X, 00828 X86ISelAddressMode &AM) { 00829 if (Shift.getOpcode() != ISD::SHL || 00830 !isa<ConstantSDNode>(Shift.getOperand(1))) 00831 return true; 00832 00833 // Not likely to be profitable if either the AND or SHIFT node has more 00834 // than one use (unless all uses are for address computation). Besides, 00835 // isel mechanism requires their node ids to be reused. 00836 if (!N.hasOneUse() || !Shift.hasOneUse()) 00837 return true; 00838 00839 // Verify that the shift amount is something we can fold. 00840 unsigned ShiftAmt = Shift.getConstantOperandVal(1); 00841 if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3) 00842 return true; 00843 00844 MVT VT = N.getSimpleValueType(); 00845 SDLoc DL(N); 00846 SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT); 00847 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask); 00848 SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1)); 00849 00850 // Insert the new nodes into the topological ordering. We must do this in 00851 // a valid topological ordering as nothing is going to go back and re-sort 00852 // these nodes. We continually insert before 'N' in sequence as this is 00853 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no 00854 // hierarchy left to express. 00855 InsertDAGNode(DAG, N, NewMask); 00856 InsertDAGNode(DAG, N, NewAnd); 00857 InsertDAGNode(DAG, N, NewShift); 00858 DAG.ReplaceAllUsesWith(N, NewShift); 00859 00860 AM.Scale = 1 << ShiftAmt; 00861 AM.IndexReg = NewAnd; 00862 return false; 00863 } 00864 00865 // Implement some heroics to detect shifts of masked values where the mask can 00866 // be replaced by extending the shift and undoing that in the addressing mode 00867 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and 00868 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in 00869 // the addressing mode. This results in code such as: 00870 // 00871 // int f(short *y, int *lookup_table) { 00872 // ... 00873 // return *y + lookup_table[*y >> 11]; 00874 // } 00875 // 00876 // Turning into: 00877 // movzwl (%rdi), %eax 00878 // movl %eax, %ecx 00879 // shrl $11, %ecx 00880 // addl (%rsi,%rcx,4), %eax 00881 // 00882 // Instead of: 00883 // movzwl (%rdi), %eax 00884 // movl %eax, %ecx 00885 // shrl $9, %ecx 00886 // andl $124, %rcx 00887 // addl (%rsi,%rcx), %eax 00888 // 00889 // Note that this function assumes the mask is provided as a mask *after* the 00890 // value is shifted. The input chain may or may not match that, but computing 00891 // such a mask is trivial. 00892 static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, 00893 uint64_t Mask, 00894 SDValue Shift, SDValue X, 00895 X86ISelAddressMode &AM) { 00896 if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() || 00897 !isa<ConstantSDNode>(Shift.getOperand(1))) 00898 return true; 00899 00900 unsigned ShiftAmt = Shift.getConstantOperandVal(1); 00901 unsigned MaskLZ = countLeadingZeros(Mask); 00902 unsigned MaskTZ = countTrailingZeros(Mask); 00903 00904 // The amount of shift we're trying to fit into the addressing mode is taken 00905 // from the trailing zeros of the mask. 00906 unsigned AMShiftAmt = MaskTZ; 00907 00908 // There is nothing we can do here unless the mask is removing some bits. 00909 // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. 00910 if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true; 00911 00912 // We also need to ensure that mask is a continuous run of bits. 00913 if (CountTrailingOnes_64(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true; 00914 00915 // Scale the leading zero count down based on the actual size of the value. 00916 // Also scale it down based on the size of the shift. 00917 MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; 00918 00919 // The final check is to ensure that any masked out high bits of X are 00920 // already known to be zero. Otherwise, the mask has a semantic impact 00921 // other than masking out a couple of low bits. Unfortunately, because of 00922 // the mask, zero extensions will be removed from operands in some cases. 00923 // This code works extra hard to look through extensions because we can 00924 // replace them with zero extensions cheaply if necessary. 00925 bool ReplacingAnyExtend = false; 00926 if (X.getOpcode() == ISD::ANY_EXTEND) { 00927 unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() - 00928 X.getOperand(0).getSimpleValueType().getSizeInBits(); 00929 // Assume that we'll replace the any-extend with a zero-extend, and 00930 // narrow the search to the extended value. 00931 X = X.getOperand(0); 00932 MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits; 00933 ReplacingAnyExtend = true; 00934 } 00935 APInt MaskedHighBits = 00936 APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ); 00937 APInt KnownZero, KnownOne; 00938 DAG.computeKnownBits(X, KnownZero, KnownOne); 00939 if (MaskedHighBits != KnownZero) return true; 00940 00941 // We've identified a pattern that can be transformed into a single shift 00942 // and an addressing mode. Make it so. 00943 MVT VT = N.getSimpleValueType(); 00944 if (ReplacingAnyExtend) { 00945 assert(X.getValueType() != VT); 00946 // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND. 00947 SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X); 00948 InsertDAGNode(DAG, N, NewX); 00949 X = NewX; 00950 } 00951 SDLoc DL(N); 00952 SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8); 00953 SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); 00954 SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, MVT::i8); 00955 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt); 00956 00957 // Insert the new nodes into the topological ordering. We must do this in 00958 // a valid topological ordering as nothing is going to go back and re-sort 00959 // these nodes. We continually insert before 'N' in sequence as this is 00960 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no 00961 // hierarchy left to express. 00962 InsertDAGNode(DAG, N, NewSRLAmt); 00963 InsertDAGNode(DAG, N, NewSRL); 00964 InsertDAGNode(DAG, N, NewSHLAmt); 00965 InsertDAGNode(DAG, N, NewSHL); 00966 DAG.ReplaceAllUsesWith(N, NewSHL); 00967 00968 AM.Scale = 1 << AMShiftAmt; 00969 AM.IndexReg = NewSRL; 00970 return false; 00971 } 00972 00973 bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 00974 unsigned Depth) { 00975 SDLoc dl(N); 00976 DEBUG({ 00977 dbgs() << "MatchAddress: "; 00978 AM.dump(); 00979 }); 00980 // Limit recursion. 00981 if (Depth > 5) 00982 return MatchAddressBase(N, AM); 00983 00984 // If this is already a %rip relative address, we can only merge immediates 00985 // into it. Instead of handling this in every case, we handle it here. 00986 // RIP relative addressing: %rip + 32-bit displacement! 00987 if (AM.isRIPRelative()) { 00988 // FIXME: JumpTable and ExternalSymbol address currently don't like 00989 // displacements. It isn't very important, but this should be fixed for 00990 // consistency. 00991 if (!AM.ES && AM.JT != -1) return true; 00992 00993 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) 00994 if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM)) 00995 return false; 00996 return true; 00997 } 00998 00999 switch (N.getOpcode()) { 01000 default: break; 01001 case ISD::Constant: { 01002 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); 01003 if (!FoldOffsetIntoAddress(Val, AM)) 01004 return false; 01005 break; 01006 } 01007 01008 case X86ISD::Wrapper: 01009 case X86ISD::WrapperRIP: 01010 if (!MatchWrapper(N, AM)) 01011 return false; 01012 break; 01013 01014 case ISD::LOAD: 01015 if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM)) 01016 return false; 01017 break; 01018 01019 case ISD::FrameIndex: 01020 if (AM.BaseType == X86ISelAddressMode::RegBase && 01021 AM.Base_Reg.getNode() == nullptr && 01022 (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) { 01023 AM.BaseType = X86ISelAddressMode::FrameIndexBase; 01024 AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 01025 return false; 01026 } 01027 break; 01028 01029 case ISD::SHL: 01030 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) 01031 break; 01032 01033 if (ConstantSDNode 01034 *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) { 01035 unsigned Val = CN->getZExtValue(); 01036 // Note that we handle x<<1 as (,x,2) rather than (x,x) here so 01037 // that the base operand remains free for further matching. If 01038 // the base doesn't end up getting used, a post-processing step 01039 // in MatchAddress turns (,x,2) into (x,x), which is cheaper. 01040 if (Val == 1 || Val == 2 || Val == 3) { 01041 AM.Scale = 1 << Val; 01042 SDValue ShVal = N.getNode()->getOperand(0); 01043 01044 // Okay, we know that we have a scale by now. However, if the scaled 01045 // value is an add of something and a constant, we can fold the 01046 // constant into the disp field here. 01047 if (CurDAG->isBaseWithConstantOffset(ShVal)) { 01048 AM.IndexReg = ShVal.getNode()->getOperand(0); 01049 ConstantSDNode *AddVal = 01050 cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); 01051 uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val; 01052 if (!FoldOffsetIntoAddress(Disp, AM)) 01053 return false; 01054 } 01055 01056 AM.IndexReg = ShVal; 01057 return false; 01058 } 01059 } 01060 break; 01061 01062 case ISD::SRL: { 01063 // Scale must not be used already. 01064 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; 01065 01066 SDValue And = N.getOperand(0); 01067 if (And.getOpcode() != ISD::AND) break; 01068 SDValue X = And.getOperand(0); 01069 01070 // We only handle up to 64-bit values here as those are what matter for 01071 // addressing mode optimizations. 01072 if (X.getSimpleValueType().getSizeInBits() > 64) break; 01073 01074 // The mask used for the transform is expected to be post-shift, but we 01075 // found the shift first so just apply the shift to the mask before passing 01076 // it down. 01077 if (!isa<ConstantSDNode>(N.getOperand(1)) || 01078 !isa<ConstantSDNode>(And.getOperand(1))) 01079 break; 01080 uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1); 01081 01082 // Try to fold the mask and shift into the scale, and return false if we 01083 // succeed. 01084 if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM)) 01085 return false; 01086 break; 01087 } 01088 01089 case ISD::SMUL_LOHI: 01090 case ISD::UMUL_LOHI: 01091 // A mul_lohi where we need the low part can be folded as a plain multiply. 01092 if (N.getResNo() != 0) break; 01093 // FALL THROUGH 01094 case ISD::MUL: 01095 case X86ISD::MUL_IMM: 01096 // X*[3,5,9] -> X+X*[2,4,8] 01097 if (AM.BaseType == X86ISelAddressMode::RegBase && 01098 AM.Base_Reg.getNode() == nullptr && 01099 AM.IndexReg.getNode() == nullptr) { 01100 if (ConstantSDNode 01101 *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) 01102 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || 01103 CN->getZExtValue() == 9) { 01104 AM.Scale = unsigned(CN->getZExtValue())-1; 01105 01106 SDValue MulVal = N.getNode()->getOperand(0); 01107 SDValue Reg; 01108 01109 // Okay, we know that we have a scale by now. However, if the scaled 01110 // value is an add of something and a constant, we can fold the 01111 // constant into the disp field here. 01112 if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 01113 isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) { 01114 Reg = MulVal.getNode()->getOperand(0); 01115 ConstantSDNode *AddVal = 01116 cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); 01117 uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue(); 01118 if (FoldOffsetIntoAddress(Disp, AM)) 01119 Reg = N.getNode()->getOperand(0); 01120 } else { 01121 Reg = N.getNode()->getOperand(0); 01122 } 01123 01124 AM.IndexReg = AM.Base_Reg = Reg; 01125 return false; 01126 } 01127 } 01128 break; 01129 01130 case ISD::SUB: { 01131 // Given A-B, if A can be completely folded into the address and 01132 // the index field with the index field unused, use -B as the index. 01133 // This is a win if a has multiple parts that can be folded into 01134 // the address. Also, this saves a mov if the base register has 01135 // other uses, since it avoids a two-address sub instruction, however 01136 // it costs an additional mov if the index register has other uses. 01137 01138 // Add an artificial use to this node so that we can keep track of 01139 // it if it gets CSE'd with a different node. 01140 HandleSDNode Handle(N); 01141 01142 // Test if the LHS of the sub can be folded. 01143 X86ISelAddressMode Backup = AM; 01144 if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { 01145 AM = Backup; 01146 break; 01147 } 01148 // Test if the index field is free for use. 01149 if (AM.IndexReg.getNode() || AM.isRIPRelative()) { 01150 AM = Backup; 01151 break; 01152 } 01153 01154 int Cost = 0; 01155 SDValue RHS = Handle.getValue().getNode()->getOperand(1); 01156 // If the RHS involves a register with multiple uses, this 01157 // transformation incurs an extra mov, due to the neg instruction 01158 // clobbering its operand. 01159 if (!RHS.getNode()->hasOneUse() || 01160 RHS.getNode()->getOpcode() == ISD::CopyFromReg || 01161 RHS.getNode()->getOpcode() == ISD::TRUNCATE || 01162 RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || 01163 (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && 01164 RHS.getNode()->getOperand(0).getValueType() == MVT::i32)) 01165 ++Cost; 01166 // If the base is a register with multiple uses, this 01167 // transformation may save a mov. 01168 if ((AM.BaseType == X86ISelAddressMode::RegBase && 01169 AM.Base_Reg.getNode() && 01170 !AM.Base_Reg.getNode()->hasOneUse()) || 01171 AM.BaseType == X86ISelAddressMode::FrameIndexBase) 01172 --Cost; 01173 // If the folded LHS was interesting, this transformation saves 01174 // address arithmetic. 01175 if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + 01176 ((AM.Disp != 0) && (Backup.Disp == 0)) + 01177 (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) 01178 --Cost; 01179 // If it doesn't look like it may be an overall win, don't do it. 01180 if (Cost >= 0) { 01181 AM = Backup; 01182 break; 01183 } 01184 01185 // Ok, the transformation is legal and appears profitable. Go for it. 01186 SDValue Zero = CurDAG->getConstant(0, N.getValueType()); 01187 SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS); 01188 AM.IndexReg = Neg; 01189 AM.Scale = 1; 01190 01191 // Insert the new nodes into the topological ordering. 01192 InsertDAGNode(*CurDAG, N, Zero); 01193 InsertDAGNode(*CurDAG, N, Neg); 01194 return false; 01195 } 01196 01197 case ISD::ADD: { 01198 // Add an artificial use to this node so that we can keep track of 01199 // it if it gets CSE'd with a different node. 01200 HandleSDNode Handle(N); 01201 01202 X86ISelAddressMode Backup = AM; 01203 if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && 01204 !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)) 01205 return false; 01206 AM = Backup; 01207 01208 // Try again after commuting the operands. 01209 if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&& 01210 !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1)) 01211 return false; 01212 AM = Backup; 01213 01214 // If we couldn't fold both operands into the address at the same time, 01215 // see if we can just put each operand into a register and fold at least 01216 // the add. 01217 if (AM.BaseType == X86ISelAddressMode::RegBase && 01218 !AM.Base_Reg.getNode() && 01219 !AM.IndexReg.getNode()) { 01220 N = Handle.getValue(); 01221 AM.Base_Reg = N.getOperand(0); 01222 AM.IndexReg = N.getOperand(1); 01223 AM.Scale = 1; 01224 return false; 01225 } 01226 N = Handle.getValue(); 01227 break; 01228 } 01229 01230 case ISD::OR: 01231 // Handle "X | C" as "X + C" iff X is known to have C bits clear. 01232 if (CurDAG->isBaseWithConstantOffset(N)) { 01233 X86ISelAddressMode Backup = AM; 01234 ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1)); 01235 01236 // Start with the LHS as an addr mode. 01237 if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && 01238 !FoldOffsetIntoAddress(CN->getSExtValue(), AM)) 01239 return false; 01240 AM = Backup; 01241 } 01242 break; 01243 01244 case ISD::AND: { 01245 // Perform some heroic transforms on an and of a constant-count shift 01246 // with a constant to enable use of the scaled offset field. 01247 01248 // Scale must not be used already. 01249 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; 01250 01251 SDValue Shift = N.getOperand(0); 01252 if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break; 01253 SDValue X = Shift.getOperand(0); 01254 01255 // We only handle up to 64-bit values here as those are what matter for 01256 // addressing mode optimizations. 01257 if (X.getSimpleValueType().getSizeInBits() > 64) break; 01258 01259 if (!isa<ConstantSDNode>(N.getOperand(1))) 01260 break; 01261 uint64_t Mask = N.getConstantOperandVal(1); 01262 01263 // Try to fold the mask and shift into an extract and scale. 01264 if (!FoldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM)) 01265 return false; 01266 01267 // Try to fold the mask and shift directly into the scale. 01268 if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM)) 01269 return false; 01270 01271 // Try to swap the mask and shift to place shifts which can be done as 01272 // a scale on the outside of the mask. 01273 if (!FoldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM)) 01274 return false; 01275 break; 01276 } 01277 } 01278 01279 return MatchAddressBase(N, AM); 01280 } 01281 01282 /// MatchAddressBase - Helper for MatchAddress. Add the specified node to the 01283 /// specified addressing mode without any further recursion. 01284 bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { 01285 // Is the base register already occupied? 01286 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { 01287 // If so, check to see if the scale index register is set. 01288 if (!AM.IndexReg.getNode()) { 01289 AM.IndexReg = N; 01290 AM.Scale = 1; 01291 return false; 01292 } 01293 01294 // Otherwise, we cannot select it. 01295 return true; 01296 } 01297 01298 // Default, generate it as a register. 01299 AM.BaseType = X86ISelAddressMode::RegBase; 01300 AM.Base_Reg = N; 01301 return false; 01302 } 01303 01304 /// SelectAddr - returns true if it is able pattern match an addressing mode. 01305 /// It returns the operands which make up the maximal addressing mode it can 01306 /// match by reference. 01307 /// 01308 /// Parent is the parent node of the addr operand that is being matched. It 01309 /// is always a load, store, atomic node, or null. It is only null when 01310 /// checking memory operands for inline asm nodes. 01311 bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, 01312 SDValue &Scale, SDValue &Index, 01313 SDValue &Disp, SDValue &Segment) { 01314 X86ISelAddressMode AM; 01315 01316 if (Parent && 01317 // This list of opcodes are all the nodes that have an "addr:$ptr" operand 01318 // that are not a MemSDNode, and thus don't have proper addrspace info. 01319 Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme 01320 Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores 01321 Parent->getOpcode() != X86ISD::TLSCALL && // Fixme 01322 Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp 01323 Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp 01324 unsigned AddrSpace = 01325 cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace(); 01326 // AddrSpace 256 -> GS, 257 -> FS. 01327 if (AddrSpace == 256) 01328 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); 01329 if (AddrSpace == 257) 01330 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); 01331 } 01332 01333 if (MatchAddress(N, AM)) 01334 return false; 01335 01336 MVT VT = N.getSimpleValueType(); 01337 if (AM.BaseType == X86ISelAddressMode::RegBase) { 01338 if (!AM.Base_Reg.getNode()) 01339 AM.Base_Reg = CurDAG->getRegister(0, VT); 01340 } 01341 01342 if (!AM.IndexReg.getNode()) 01343 AM.IndexReg = CurDAG->getRegister(0, VT); 01344 01345 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 01346 return true; 01347 } 01348 01349 /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to 01350 /// match a load whose top elements are either undef or zeros. The load flavor 01351 /// is derived from the type of N, which is either v4f32 or v2f64. 01352 /// 01353 /// We also return: 01354 /// PatternChainNode: this is the matched node that has a chain input and 01355 /// output. 01356 bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, 01357 SDValue N, SDValue &Base, 01358 SDValue &Scale, SDValue &Index, 01359 SDValue &Disp, SDValue &Segment, 01360 SDValue &PatternNodeWithChain) { 01361 if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { 01362 PatternNodeWithChain = N.getOperand(0); 01363 if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) && 01364 PatternNodeWithChain.hasOneUse() && 01365 IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && 01366 IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { 01367 LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain); 01368 if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 01369 return false; 01370 return true; 01371 } 01372 } 01373 01374 // Also handle the case where we explicitly require zeros in the top 01375 // elements. This is a vector shuffle from the zero vector. 01376 if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() && 01377 // Check to see if the top elements are all zeros (or bitcast of zeros). 01378 N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 01379 N.getOperand(0).getNode()->hasOneUse() && 01380 ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && 01381 N.getOperand(0).getOperand(0).hasOneUse() && 01382 IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && 01383 IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { 01384 // Okay, this is a zero extending load. Fold it. 01385 LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); 01386 if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 01387 return false; 01388 PatternNodeWithChain = SDValue(LD, 0); 01389 return true; 01390 } 01391 return false; 01392 } 01393 01394 01395 bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) { 01396 if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 01397 uint64_t ImmVal = CN->getZExtValue(); 01398 if ((uint32_t)ImmVal != (uint64_t)ImmVal) 01399 return false; 01400 01401 Imm = CurDAG->getTargetConstant(ImmVal, MVT::i64); 01402 return true; 01403 } 01404 01405 // In static codegen with small code model, we can get the address of a label 01406 // into a register with 'movl'. TableGen has already made sure we're looking 01407 // at a label of some kind. 01408 assert(N->getOpcode() == X86ISD::Wrapper && 01409 "Unexpected node type for MOV32ri64"); 01410 N = N.getOperand(0); 01411 01412 if (N->getOpcode() != ISD::TargetConstantPool && 01413 N->getOpcode() != ISD::TargetJumpTable && 01414 N->getOpcode() != ISD::TargetGlobalAddress && 01415 N->getOpcode() != ISD::TargetExternalSymbol && 01416 N->getOpcode() != ISD::TargetBlockAddress) 01417 return false; 01418 01419 Imm = N; 01420 return TM.getCodeModel() == CodeModel::Small; 01421 } 01422 01423 bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base, 01424 SDValue &Scale, SDValue &Index, 01425 SDValue &Disp, SDValue &Segment) { 01426 if (!SelectLEAAddr(N, Base, Scale, Index, Disp, Segment)) 01427 return false; 01428 01429 SDLoc DL(N); 01430 RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base); 01431 if (RN && RN->getReg() == 0) 01432 Base = CurDAG->getRegister(0, MVT::i64); 01433 else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) { 01434 // Base could already be %rip, particularly in the x32 ABI. 01435 Base = SDValue(CurDAG->getMachineNode( 01436 TargetOpcode::SUBREG_TO_REG, DL, MVT::i64, 01437 CurDAG->getTargetConstant(0, MVT::i64), 01438 Base, 01439 CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)), 01440 0); 01441 } 01442 01443 RN = dyn_cast<RegisterSDNode>(Index); 01444 if (RN && RN->getReg() == 0) 01445 Index = CurDAG->getRegister(0, MVT::i64); 01446 else { 01447 assert(Index.getValueType() == MVT::i32 && 01448 "Expect to be extending 32-bit registers for use in LEA"); 01449 Index = SDValue(CurDAG->getMachineNode( 01450 TargetOpcode::SUBREG_TO_REG, DL, MVT::i64, 01451 CurDAG->getTargetConstant(0, MVT::i64), 01452 Index, 01453 CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)), 01454 0); 01455 } 01456 01457 return true; 01458 } 01459 01460 /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing 01461 /// mode it matches can be cost effectively emitted as an LEA instruction. 01462 bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, 01463 SDValue &Base, SDValue &Scale, 01464 SDValue &Index, SDValue &Disp, 01465 SDValue &Segment) { 01466 X86ISelAddressMode AM; 01467 01468 // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support 01469 // segments. 01470 SDValue Copy = AM.Segment; 01471 SDValue T = CurDAG->getRegister(0, MVT::i32); 01472 AM.Segment = T; 01473 if (MatchAddress(N, AM)) 01474 return false; 01475 assert (T == AM.Segment); 01476 AM.Segment = Copy; 01477 01478 MVT VT = N.getSimpleValueType(); 01479 unsigned Complexity = 0; 01480 if (AM.BaseType == X86ISelAddressMode::RegBase) 01481 if (AM.Base_Reg.getNode()) 01482 Complexity = 1; 01483 else 01484 AM.Base_Reg = CurDAG->getRegister(0, VT); 01485 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 01486 Complexity = 4; 01487 01488 if (AM.IndexReg.getNode()) 01489 Complexity++; 01490 else 01491 AM.IndexReg = CurDAG->getRegister(0, VT); 01492 01493 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with 01494 // a simple shift. 01495 if (AM.Scale > 1) 01496 Complexity++; 01497 01498 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 01499 // to a LEA. This is determined with some expermentation but is by no means 01500 // optimal (especially for code size consideration). LEA is nice because of 01501 // its three-address nature. Tweak the cost function again when we can run 01502 // convertToThreeAddress() at register allocation time. 01503 if (AM.hasSymbolicDisplacement()) { 01504 // For X86-64, we should always use lea to materialize RIP relative 01505 // addresses. 01506 if (Subtarget->is64Bit()) 01507 Complexity = 4; 01508 else 01509 Complexity += 2; 01510 } 01511 01512 if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode())) 01513 Complexity++; 01514 01515 // If it isn't worth using an LEA, reject it. 01516 if (Complexity <= 2) 01517 return false; 01518 01519 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 01520 return true; 01521 } 01522 01523 /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. 01524 bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base, 01525 SDValue &Scale, SDValue &Index, 01526 SDValue &Disp, SDValue &Segment) { 01527 assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); 01528 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); 01529 01530 X86ISelAddressMode AM; 01531 AM.GV = GA->getGlobal(); 01532 AM.Disp += GA->getOffset(); 01533 AM.Base_Reg = CurDAG->getRegister(0, N.getValueType()); 01534 AM.SymbolFlags = GA->getTargetFlags(); 01535 01536 if (N.getValueType() == MVT::i32) { 01537 AM.Scale = 1; 01538 AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); 01539 } else { 01540 AM.IndexReg = CurDAG->getRegister(0, MVT::i64); 01541 } 01542 01543 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 01544 return true; 01545 } 01546 01547 01548 bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, 01549 SDValue &Base, SDValue &Scale, 01550 SDValue &Index, SDValue &Disp, 01551 SDValue &Segment) { 01552 if (!ISD::isNON_EXTLoad(N.getNode()) || 01553 !IsProfitableToFold(N, P, P) || 01554 !IsLegalToFold(N, P, P, OptLevel)) 01555 return false; 01556 01557 return SelectAddr(N.getNode(), 01558 N.getOperand(1), Base, Scale, Index, Disp, Segment); 01559 } 01560 01561 /// getGlobalBaseReg - Return an SDNode that returns the value of 01562 /// the global base register. Output instructions required to 01563 /// initialize the global base register, if necessary. 01564 /// 01565 SDNode *X86DAGToDAGISel::getGlobalBaseReg() { 01566 unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); 01567 return CurDAG->getRegister(GlobalBaseReg, 01568 getTargetLowering()->getPointerTy()).getNode(); 01569 } 01570 01571 /// Atomic opcode table 01572 /// 01573 enum AtomicOpc { 01574 ADD, 01575 SUB, 01576 INC, 01577 DEC, 01578 OR, 01579 AND, 01580 XOR, 01581 AtomicOpcEnd 01582 }; 01583 01584 enum AtomicSz { 01585 ConstantI8, 01586 I8, 01587 SextConstantI16, 01588 ConstantI16, 01589 I16, 01590 SextConstantI32, 01591 ConstantI32, 01592 I32, 01593 SextConstantI64, 01594 ConstantI64, 01595 I64, 01596 AtomicSzEnd 01597 }; 01598 01599 static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { 01600 { 01601 X86::LOCK_ADD8mi, 01602 X86::LOCK_ADD8mr, 01603 X86::LOCK_ADD16mi8, 01604 X86::LOCK_ADD16mi, 01605 X86::LOCK_ADD16mr, 01606 X86::LOCK_ADD32mi8, 01607 X86::LOCK_ADD32mi, 01608 X86::LOCK_ADD32mr, 01609 X86::LOCK_ADD64mi8, 01610 X86::LOCK_ADD64mi32, 01611 X86::LOCK_ADD64mr, 01612 }, 01613 { 01614 X86::LOCK_SUB8mi, 01615 X86::LOCK_SUB8mr, 01616 X86::LOCK_SUB16mi8, 01617 X86::LOCK_SUB16mi, 01618 X86::LOCK_SUB16mr, 01619 X86::LOCK_SUB32mi8, 01620 X86::LOCK_SUB32mi, 01621 X86::LOCK_SUB32mr, 01622 X86::LOCK_SUB64mi8, 01623 X86::LOCK_SUB64mi32, 01624 X86::LOCK_SUB64mr, 01625 }, 01626 { 01627 0, 01628 X86::LOCK_INC8m, 01629 0, 01630 0, 01631 X86::LOCK_INC16m, 01632 0, 01633 0, 01634 X86::LOCK_INC32m, 01635 0, 01636 0, 01637 X86::LOCK_INC64m, 01638 }, 01639 { 01640 0, 01641 X86::LOCK_DEC8m, 01642 0, 01643 0, 01644 X86::LOCK_DEC16m, 01645 0, 01646 0, 01647 X86::LOCK_DEC32m, 01648 0, 01649 0, 01650 X86::LOCK_DEC64m, 01651 }, 01652 { 01653 X86::LOCK_OR8mi, 01654 X86::LOCK_OR8mr, 01655 X86::LOCK_OR16mi8, 01656 X86::LOCK_OR16mi, 01657 X86::LOCK_OR16mr, 01658 X86::LOCK_OR32mi8, 01659 X86::LOCK_OR32mi, 01660 X86::LOCK_OR32mr, 01661 X86::LOCK_OR64mi8, 01662 X86::LOCK_OR64mi32, 01663 X86::LOCK_OR64mr, 01664 }, 01665 { 01666 X86::LOCK_AND8mi, 01667 X86::LOCK_AND8mr, 01668 X86::LOCK_AND16mi8, 01669 X86::LOCK_AND16mi, 01670 X86::LOCK_AND16mr, 01671 X86::LOCK_AND32mi8, 01672 X86::LOCK_AND32mi, 01673 X86::LOCK_AND32mr, 01674 X86::LOCK_AND64mi8, 01675 X86::LOCK_AND64mi32, 01676 X86::LOCK_AND64mr, 01677 }, 01678 { 01679 X86::LOCK_XOR8mi, 01680 X86::LOCK_XOR8mr, 01681 X86::LOCK_XOR16mi8, 01682 X86::LOCK_XOR16mi, 01683 X86::LOCK_XOR16mr, 01684 X86::LOCK_XOR32mi8, 01685 X86::LOCK_XOR32mi, 01686 X86::LOCK_XOR32mr, 01687 X86::LOCK_XOR64mi8, 01688 X86::LOCK_XOR64mi32, 01689 X86::LOCK_XOR64mr, 01690 } 01691 }; 01692 01693 // Return the target constant operand for atomic-load-op and do simple 01694 // translations, such as from atomic-load-add to lock-sub. The return value is 01695 // one of the following 3 cases: 01696 // + target-constant, the operand could be supported as a target constant. 01697 // + empty, the operand is not needed any more with the new op selected. 01698 // + non-empty, otherwise. 01699 static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, 01700 SDLoc dl, 01701 enum AtomicOpc &Op, MVT NVT, 01702 SDValue Val) { 01703 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) { 01704 int64_t CNVal = CN->getSExtValue(); 01705 // Quit if not 32-bit imm. 01706 if ((int32_t)CNVal != CNVal) 01707 return Val; 01708 // For atomic-load-add, we could do some optimizations. 01709 if (Op == ADD) { 01710 // Translate to INC/DEC if ADD by 1 or -1. 01711 if ((CNVal == 1) || (CNVal == -1)) { 01712 Op = (CNVal == 1) ? INC : DEC; 01713 // No more constant operand after being translated into INC/DEC. 01714 return SDValue(); 01715 } 01716 // Translate to SUB if ADD by negative value. 01717 if (CNVal < 0) { 01718 Op = SUB; 01719 CNVal = -CNVal; 01720 } 01721 } 01722 return CurDAG->getTargetConstant(CNVal, NVT); 01723 } 01724 01725 // If the value operand is single-used, try to optimize it. 01726 if (Op == ADD && Val.hasOneUse()) { 01727 // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x). 01728 if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) { 01729 Op = SUB; 01730 return Val.getOperand(1); 01731 } 01732 // A special case for i16, which needs truncating as, in most cases, it's 01733 // promoted to i32. We will translate 01734 // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x)) 01735 if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 && 01736 Val.getOperand(0).getOpcode() == ISD::SUB && 01737 X86::isZeroNode(Val.getOperand(0).getOperand(0))) { 01738 Op = SUB; 01739 Val = Val.getOperand(0); 01740 return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT, 01741 Val.getOperand(1)); 01742 } 01743 } 01744 01745 return Val; 01746 } 01747 01748 SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) { 01749 if (Node->hasAnyUseOfValue(0)) 01750 return nullptr; 01751 01752 SDLoc dl(Node); 01753 01754 // Optimize common patterns for __sync_or_and_fetch and similar arith 01755 // operations where the result is not used. This allows us to use the "lock" 01756 // version of the arithmetic instruction. 01757 SDValue Chain = Node->getOperand(0); 01758 SDValue Ptr = Node->getOperand(1); 01759 SDValue Val = Node->getOperand(2); 01760 SDValue Base, Scale, Index, Disp, Segment; 01761 if (!SelectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment)) 01762 return nullptr; 01763 01764 // Which index into the table. 01765 enum AtomicOpc Op; 01766 switch (Node->getOpcode()) { 01767 default: 01768 return nullptr; 01769 case ISD::ATOMIC_LOAD_OR: 01770 Op = OR; 01771 break; 01772 case ISD::ATOMIC_LOAD_AND: 01773 Op = AND; 01774 break; 01775 case ISD::ATOMIC_LOAD_XOR: 01776 Op = XOR; 01777 break; 01778 case ISD::ATOMIC_LOAD_ADD: 01779 Op = ADD; 01780 break; 01781 } 01782 01783 Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val); 01784 bool isUnOp = !Val.getNode(); 01785 bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant); 01786 01787 unsigned Opc = 0; 01788 switch (NVT.SimpleTy) { 01789 default: return nullptr; 01790 case MVT::i8: 01791 if (isCN) 01792 Opc = AtomicOpcTbl[Op][ConstantI8]; 01793 else 01794 Opc = AtomicOpcTbl[Op][I8]; 01795 break; 01796 case MVT::i16: 01797 if (isCN) { 01798 if (immSext8(Val.getNode())) 01799 Opc = AtomicOpcTbl[Op][SextConstantI16]; 01800 else 01801 Opc = AtomicOpcTbl[Op][ConstantI16]; 01802 } else 01803 Opc = AtomicOpcTbl[Op][I16]; 01804 break; 01805 case MVT::i32: 01806 if (isCN) { 01807 if (immSext8(Val.getNode())) 01808 Opc = AtomicOpcTbl[Op][SextConstantI32]; 01809 else 01810 Opc = AtomicOpcTbl[Op][ConstantI32]; 01811 } else 01812 Opc = AtomicOpcTbl[Op][I32]; 01813 break; 01814 case MVT::i64: 01815 if (isCN) { 01816 if (immSext8(Val.getNode())) 01817 Opc = AtomicOpcTbl[Op][SextConstantI64]; 01818 else if (i64immSExt32(Val.getNode())) 01819 Opc = AtomicOpcTbl[Op][ConstantI64]; 01820 } else 01821 Opc = AtomicOpcTbl[Op][I64]; 01822 break; 01823 } 01824 01825 assert(Opc != 0 && "Invalid arith lock transform!"); 01826 01827 // Building the new node. 01828 SDValue Ret; 01829 if (isUnOp) { 01830 SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Chain }; 01831 Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); 01832 } else { 01833 SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Val, Chain }; 01834 Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); 01835 } 01836 01837 // Copying the MachineMemOperand. 01838 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 01839 MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 01840 cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); 01841 01842 // We need to have two outputs as that is what the original instruction had. 01843 // So we add a dummy, undefined output. This is safe as we checked first 01844 // that no-one uses our output anyway. 01845 SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 01846 dl, NVT), 0); 01847 SDValue RetVals[] = { Undef, Ret }; 01848 return CurDAG->getMergeValues(RetVals, dl).getNode(); 01849 } 01850 01851 /// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has 01852 /// any uses which require the SF or OF bits to be accurate. 01853 static bool HasNoSignedComparisonUses(SDNode *N) { 01854 // Examine each user of the node. 01855 for (SDNode::use_iterator UI = N->use_begin(), 01856 UE = N->use_end(); UI != UE; ++UI) { 01857 // Only examine CopyToReg uses. 01858 if (UI->getOpcode() != ISD::CopyToReg) 01859 return false; 01860 // Only examine CopyToReg uses that copy to EFLAGS. 01861 if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != 01862 X86::EFLAGS) 01863 return false; 01864 // Examine each user of the CopyToReg use. 01865 for (SDNode::use_iterator FlagUI = UI->use_begin(), 01866 FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { 01867 // Only examine the Flag result. 01868 if (FlagUI.getUse().getResNo() != 1) continue; 01869 // Anything unusual: assume conservatively. 01870 if (!FlagUI->isMachineOpcode()) return false; 01871 // Examine the opcode of the user. 01872 switch (FlagUI->getMachineOpcode()) { 01873 // These comparisons don't treat the most significant bit specially. 01874 case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr: 01875 case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: 01876 case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: 01877 case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: 01878 case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4: 01879 case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4: 01880 case X86::CMOVA16rr: case X86::CMOVA16rm: 01881 case X86::CMOVA32rr: case X86::CMOVA32rm: 01882 case X86::CMOVA64rr: case X86::CMOVA64rm: 01883 case X86::CMOVAE16rr: case X86::CMOVAE16rm: 01884 case X86::CMOVAE32rr: case X86::CMOVAE32rm: 01885 case X86::CMOVAE64rr: case X86::CMOVAE64rm: 01886 case X86::CMOVB16rr: case X86::CMOVB16rm: 01887 case X86::CMOVB32rr: case X86::CMOVB32rm: 01888 case X86::CMOVB64rr: case X86::CMOVB64rm: 01889 case X86::CMOVBE16rr: case X86::CMOVBE16rm: 01890 case X86::CMOVBE32rr: case X86::CMOVBE32rm: 01891 case X86::CMOVBE64rr: case X86::CMOVBE64rm: 01892 case X86::CMOVE16rr: case X86::CMOVE16rm: 01893 case X86::CMOVE32rr: case X86::CMOVE32rm: 01894 case X86::CMOVE64rr: case X86::CMOVE64rm: 01895 case X86::CMOVNE16rr: case X86::CMOVNE16rm: 01896 case X86::CMOVNE32rr: case X86::CMOVNE32rm: 01897 case X86::CMOVNE64rr: case X86::CMOVNE64rm: 01898 case X86::CMOVNP16rr: case X86::CMOVNP16rm: 01899 case X86::CMOVNP32rr: case X86::CMOVNP32rm: 01900 case X86::CMOVNP64rr: case X86::CMOVNP64rm: 01901 case X86::CMOVP16rr: case X86::CMOVP16rm: 01902 case X86::CMOVP32rr: case X86::CMOVP32rm: 01903 case X86::CMOVP64rr: case X86::CMOVP64rm: 01904 continue; 01905 // Anything else: assume conservatively. 01906 default: return false; 01907 } 01908 } 01909 } 01910 return true; 01911 } 01912 01913 /// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode 01914 /// is suitable for doing the {load; increment or decrement; store} to modify 01915 /// transformation. 01916 static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, 01917 SDValue StoredVal, SelectionDAG *CurDAG, 01918 LoadSDNode* &LoadNode, SDValue &InputChain) { 01919 01920 // is the value stored the result of a DEC or INC? 01921 if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false; 01922 01923 // is the stored value result 0 of the load? 01924 if (StoredVal.getResNo() != 0) return false; 01925 01926 // are there other uses of the loaded value than the inc or dec? 01927 if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false; 01928 01929 // is the store non-extending and non-indexed? 01930 if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal()) 01931 return false; 01932 01933 SDValue Load = StoredVal->getOperand(0); 01934 // Is the stored value a non-extending and non-indexed load? 01935 if (!ISD::isNormalLoad(Load.getNode())) return false; 01936 01937 // Return LoadNode by reference. 01938 LoadNode = cast<LoadSDNode>(Load); 01939 // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8) 01940 EVT LdVT = LoadNode->getMemoryVT(); 01941 if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 && 01942 LdVT != MVT::i8) 01943 return false; 01944 01945 // Is store the only read of the loaded value? 01946 if (!Load.hasOneUse()) 01947 return false; 01948 01949 // Is the address of the store the same as the load? 01950 if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || 01951 LoadNode->getOffset() != StoreNode->getOffset()) 01952 return false; 01953 01954 // Check if the chain is produced by the load or is a TokenFactor with 01955 // the load output chain as an operand. Return InputChain by reference. 01956 SDValue Chain = StoreNode->getChain(); 01957 01958 bool ChainCheck = false; 01959 if (Chain == Load.getValue(1)) { 01960 ChainCheck = true; 01961 InputChain = LoadNode->getChain(); 01962 } else if (Chain.getOpcode() == ISD::TokenFactor) { 01963 SmallVector<SDValue, 4> ChainOps; 01964 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { 01965 SDValue Op = Chain.getOperand(i); 01966 if (Op == Load.getValue(1)) { 01967 ChainCheck = true; 01968 continue; 01969 } 01970 01971 // Make sure using Op as part of the chain would not cause a cycle here. 01972 // In theory, we could check whether the chain node is a predecessor of 01973 // the load. But that can be very expensive. Instead visit the uses and 01974 // make sure they all have smaller node id than the load. 01975 int LoadId = LoadNode->getNodeId(); 01976 for (SDNode::use_iterator UI = Op.getNode()->use_begin(), 01977 UE = UI->use_end(); UI != UE; ++UI) { 01978 if (UI.getUse().getResNo() != 0) 01979 continue; 01980 if (UI->getNodeId() > LoadId) 01981 return false; 01982 } 01983 01984 ChainOps.push_back(Op); 01985 } 01986 01987 if (ChainCheck) 01988 // Make a new TokenFactor with all the other input chains except 01989 // for the load. 01990 InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), 01991 MVT::Other, ChainOps); 01992 } 01993 if (!ChainCheck) 01994 return false; 01995 01996 return true; 01997 } 01998 01999 /// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory 02000 /// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC. 02001 static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) { 02002 if (Opc == X86ISD::DEC) { 02003 if (LdVT == MVT::i64) return X86::DEC64m; 02004 if (LdVT == MVT::i32) return X86::DEC32m; 02005 if (LdVT == MVT::i16) return X86::DEC16m; 02006 if (LdVT == MVT::i8) return X86::DEC8m; 02007 } else { 02008 assert(Opc == X86ISD::INC && "unrecognized opcode"); 02009 if (LdVT == MVT::i64) return X86::INC64m; 02010 if (LdVT == MVT::i32) return X86::INC32m; 02011 if (LdVT == MVT::i16) return X86::INC16m; 02012 if (LdVT == MVT::i8) return X86::INC8m; 02013 } 02014 llvm_unreachable("unrecognized size for LdVT"); 02015 } 02016 02017 /// SelectGather - Customized ISel for GATHER operations. 02018 /// 02019 SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { 02020 // Operands of Gather: VSrc, Base, VIdx, VMask, Scale 02021 SDValue Chain = Node->getOperand(0); 02022 SDValue VSrc = Node->getOperand(2); 02023 SDValue Base = Node->getOperand(3); 02024 SDValue VIdx = Node->getOperand(4); 02025 SDValue VMask = Node->getOperand(5); 02026 ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6)); 02027 if (!Scale) 02028 return nullptr; 02029 02030 SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(), 02031 MVT::Other); 02032 02033 // Memory Operands: Base, Scale, Index, Disp, Segment 02034 SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32); 02035 SDValue Segment = CurDAG->getRegister(0, MVT::i32); 02036 const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx, 02037 Disp, Segment, VMask, Chain}; 02038 SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node), VTs, Ops); 02039 // Node has 2 outputs: VDst and MVT::Other. 02040 // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other. 02041 // We replace VDst of Node with VDst of ResNode, and Other of Node with Other 02042 // of ResNode. 02043 ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); 02044 ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2)); 02045 return ResNode; 02046 } 02047 02048 SDNode *X86DAGToDAGISel::Select(SDNode *Node) { 02049 MVT NVT = Node->getSimpleValueType(0); 02050 unsigned Opc, MOpc; 02051 unsigned Opcode = Node->getOpcode(); 02052 SDLoc dl(Node); 02053 02054 DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); 02055 02056 if (Node->isMachineOpcode()) { 02057 DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); 02058 Node->setNodeId(-1); 02059 return nullptr; // Already selected. 02060 } 02061 02062 switch (Opcode) { 02063 default: break; 02064 case ISD::INTRINSIC_W_CHAIN: { 02065 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 02066 switch (IntNo) { 02067 default: break; 02068 case Intrinsic::x86_avx2_gather_d_pd: 02069 case Intrinsic::x86_avx2_gather_d_pd_256: 02070 case Intrinsic::x86_avx2_gather_q_pd: 02071 case Intrinsic::x86_avx2_gather_q_pd_256: 02072 case Intrinsic::x86_avx2_gather_d_ps: 02073 case Intrinsic::x86_avx2_gather_d_ps_256: 02074 case Intrinsic::x86_avx2_gather_q_ps: 02075 case Intrinsic::x86_avx2_gather_q_ps_256: 02076 case Intrinsic::x86_avx2_gather_d_q: 02077 case Intrinsic::x86_avx2_gather_d_q_256: 02078 case Intrinsic::x86_avx2_gather_q_q: 02079 case Intrinsic::x86_avx2_gather_q_q_256: 02080 case Intrinsic::x86_avx2_gather_d_d: 02081 case Intrinsic::x86_avx2_gather_d_d_256: 02082 case Intrinsic::x86_avx2_gather_q_d: 02083 case Intrinsic::x86_avx2_gather_q_d_256: { 02084 if (!Subtarget->hasAVX2()) 02085 break; 02086 unsigned Opc; 02087 switch (IntNo) { 02088 default: llvm_unreachable("Impossible intrinsic"); 02089 case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break; 02090 case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break; 02091 case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break; 02092 case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break; 02093 case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break; 02094 case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break; 02095 case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break; 02096 case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break; 02097 case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break; 02098 case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break; 02099 case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break; 02100 case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break; 02101 case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break; 02102 case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break; 02103 case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break; 02104 case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break; 02105 } 02106 SDNode *RetVal = SelectGather(Node, Opc); 02107 if (RetVal) 02108 // We already called ReplaceUses inside SelectGather. 02109 return nullptr; 02110 break; 02111 } 02112 } 02113 break; 02114 } 02115 case X86ISD::GlobalBaseReg: 02116 return getGlobalBaseReg(); 02117 02118 02119 case ISD::ATOMIC_LOAD_XOR: 02120 case ISD::ATOMIC_LOAD_AND: 02121 case ISD::ATOMIC_LOAD_OR: 02122 case ISD::ATOMIC_LOAD_ADD: { 02123 SDNode *RetVal = SelectAtomicLoadArith(Node, NVT); 02124 if (RetVal) 02125 return RetVal; 02126 break; 02127 } 02128 case ISD::AND: 02129 case ISD::OR: 02130 case ISD::XOR: { 02131 // For operations of the form (x << C1) op C2, check if we can use a smaller 02132 // encoding for C2 by transforming it into (x op (C2>>C1)) << C1. 02133 SDValue N0 = Node->getOperand(0); 02134 SDValue N1 = Node->getOperand(1); 02135 02136 if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse()) 02137 break; 02138 02139 // i8 is unshrinkable, i16 should be promoted to i32. 02140 if (NVT != MVT::i32 && NVT != MVT::i64) 02141 break; 02142 02143 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); 02144 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 02145 if (!Cst || !ShlCst) 02146 break; 02147 02148 int64_t Val = Cst->getSExtValue(); 02149 uint64_t ShlVal = ShlCst->getZExtValue(); 02150 02151 // Make sure that we don't change the operation by removing bits. 02152 // This only matters for OR and XOR, AND is unaffected. 02153 uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1; 02154 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) 02155 break; 02156 02157 unsigned ShlOp, Op; 02158 MVT CstVT = NVT; 02159 02160 // Check the minimum bitwidth for the new constant. 02161 // TODO: AND32ri is the same as AND64ri32 with zext imm. 02162 // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr 02163 // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32. 02164 if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal)) 02165 CstVT = MVT::i8; 02166 else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal)) 02167 CstVT = MVT::i32; 02168 02169 // Bail if there is no smaller encoding. 02170 if (NVT == CstVT) 02171 break; 02172 02173 switch (NVT.SimpleTy) { 02174 default: llvm_unreachable("Unsupported VT!"); 02175 case MVT::i32: 02176 assert(CstVT == MVT::i8); 02177 ShlOp = X86::SHL32ri; 02178 02179 switch (Opcode) { 02180 default: llvm_unreachable("Impossible opcode"); 02181 case ISD::AND: Op = X86::AND32ri8; break; 02182 case ISD::OR: Op = X86::OR32ri8; break; 02183 case ISD::XOR: Op = X86::XOR32ri8; break; 02184 } 02185 break; 02186 case MVT::i64: 02187 assert(CstVT == MVT::i8 || CstVT == MVT::i32); 02188 ShlOp = X86::SHL64ri; 02189 02190 switch (Opcode) { 02191 default: llvm_unreachable("Impossible opcode"); 02192 case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break; 02193 case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break; 02194 case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break; 02195 } 02196 break; 02197 } 02198 02199 // Emit the smaller op and the shift. 02200 SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT); 02201 SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst); 02202 return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0), 02203 getI8Imm(ShlVal)); 02204 } 02205 case X86ISD::UMUL: { 02206 SDValue N0 = Node->getOperand(0); 02207 SDValue N1 = Node->getOperand(1); 02208 02209 unsigned LoReg; 02210 switch (NVT.SimpleTy) { 02211 default: llvm_unreachable("Unsupported VT!"); 02212 case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break; 02213 case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break; 02214 case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break; 02215 case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break; 02216 } 02217 02218 SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, 02219 N0, SDValue()).getValue(1); 02220 02221 SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); 02222 SDValue Ops[] = {N1, InFlag}; 02223 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); 02224 02225 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); 02226 ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); 02227 ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2)); 02228 return nullptr; 02229 } 02230 02231 case ISD::SMUL_LOHI: 02232 case ISD::UMUL_LOHI: { 02233 SDValue N0 = Node->getOperand(0); 02234 SDValue N1 = Node->getOperand(1); 02235 02236 bool isSigned = Opcode == ISD::SMUL_LOHI; 02237 bool hasBMI2 = Subtarget->hasBMI2(); 02238 if (!isSigned) { 02239 switch (NVT.SimpleTy) { 02240 default: llvm_unreachable("Unsupported VT!"); 02241 case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 02242 case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 02243 case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r; 02244 MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break; 02245 case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r; 02246 MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break; 02247 } 02248 } else { 02249 switch (NVT.SimpleTy) { 02250 default: llvm_unreachable("Unsupported VT!"); 02251 case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 02252 case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 02253 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 02254 case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 02255 } 02256 } 02257 02258 unsigned SrcReg, LoReg, HiReg; 02259 switch (Opc) { 02260 default: llvm_unreachable("Unknown MUL opcode!"); 02261 case X86::IMUL8r: 02262 case X86::MUL8r: 02263 SrcReg = LoReg = X86::AL; HiReg = X86::AH; 02264 break; 02265 case X86::IMUL16r: 02266 case X86::MUL16r: 02267 SrcReg = LoReg = X86::AX; HiReg = X86::DX; 02268 break; 02269 case X86::IMUL32r: 02270 case X86::MUL32r: 02271 SrcReg = LoReg = X86::EAX; HiReg = X86::EDX; 02272 break; 02273 case X86::IMUL64r: 02274 case X86::MUL64r: 02275 SrcReg = LoReg = X86::RAX; HiReg = X86::RDX; 02276 break; 02277 case X86::MULX32rr: 02278 SrcReg = X86::EDX; LoReg = HiReg = 0; 02279 break; 02280 case X86::MULX64rr: 02281 SrcReg = X86::RDX; LoReg = HiReg = 0; 02282 break; 02283 } 02284 02285 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 02286 bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 02287 // Multiply is commmutative. 02288 if (!foldedLoad) { 02289 foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 02290 if (foldedLoad) 02291 std::swap(N0, N1); 02292 } 02293 02294 SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg, 02295 N0, SDValue()).getValue(1); 02296 SDValue ResHi, ResLo; 02297 02298 if (foldedLoad) { 02299 SDValue Chain; 02300 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 02301 InFlag }; 02302 if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { 02303 SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); 02304 SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); 02305 ResHi = SDValue(CNode, 0); 02306 ResLo = SDValue(CNode, 1); 02307 Chain = SDValue(CNode, 2); 02308 InFlag = SDValue(CNode, 3); 02309 } else { 02310 SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); 02311 SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); 02312 Chain = SDValue(CNode, 0); 02313 InFlag = SDValue(CNode, 1); 02314 } 02315 02316 // Update the chain. 02317 ReplaceUses(N1.getValue(1), Chain); 02318 } else { 02319 SDValue Ops[] = { N1, InFlag }; 02320 if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { 02321 SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue); 02322 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); 02323 ResHi = SDValue(CNode, 0); 02324 ResLo = SDValue(CNode, 1); 02325 InFlag = SDValue(CNode, 2); 02326 } else { 02327 SDVTList VTs = CurDAG->getVTList(MVT::Glue); 02328 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); 02329 InFlag = SDValue(CNode, 0); 02330 } 02331 } 02332 02333 // Prevent use of AH in a REX instruction by referencing AX instead. 02334 if (HiReg == X86::AH && Subtarget->is64Bit() && 02335 !SDValue(Node, 1).use_empty()) { 02336 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 02337 X86::AX, MVT::i16, InFlag); 02338 InFlag = Result.getValue(2); 02339 // Get the low part if needed. Don't use getCopyFromReg for aliasing 02340 // registers. 02341 if (!SDValue(Node, 0).use_empty()) 02342 ReplaceUses(SDValue(Node, 1), 02343 CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 02344 02345 // Shift AX down 8 bits. 02346 Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 02347 Result, 02348 CurDAG->getTargetConstant(8, MVT::i8)), 0); 02349 // Then truncate it down to i8. 02350 ReplaceUses(SDValue(Node, 1), 02351 CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 02352 } 02353 // Copy the low half of the result, if it is needed. 02354 if (!SDValue(Node, 0).use_empty()) { 02355 if (!ResLo.getNode()) { 02356 assert(LoReg && "Register for low half is not defined!"); 02357 ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, 02358 InFlag); 02359 InFlag = ResLo.getValue(2); 02360 } 02361 ReplaceUses(SDValue(Node, 0), ResLo); 02362 DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n'); 02363 } 02364 // Copy the high half of the result, if it is needed. 02365 if (!SDValue(Node, 1).use_empty()) { 02366 if (!ResHi.getNode()) { 02367 assert(HiReg && "Register for high half is not defined!"); 02368 ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT, 02369 InFlag); 02370 InFlag = ResHi.getValue(2); 02371 } 02372 ReplaceUses(SDValue(Node, 1), ResHi); 02373 DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); 02374 } 02375 02376 return nullptr; 02377 } 02378 02379 case ISD::SDIVREM: 02380 case ISD::UDIVREM: { 02381 SDValue N0 = Node->getOperand(0); 02382 SDValue N1 = Node->getOperand(1); 02383 02384 bool isSigned = Opcode == ISD::SDIVREM; 02385 if (!isSigned) { 02386 switch (NVT.SimpleTy) { 02387 default: llvm_unreachable("Unsupported VT!"); 02388 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 02389 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 02390 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 02391 case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 02392 } 02393 } else { 02394 switch (NVT.SimpleTy) { 02395 default: llvm_unreachable("Unsupported VT!"); 02396 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 02397 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 02398 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 02399 case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 02400 } 02401 } 02402 02403 unsigned LoReg, HiReg, ClrReg; 02404 unsigned SExtOpcode; 02405 switch (NVT.SimpleTy) { 02406 default: llvm_unreachable("Unsupported VT!"); 02407 case MVT::i8: 02408 LoReg = X86::AL; ClrReg = HiReg = X86::AH; 02409 SExtOpcode = X86::CBW; 02410 break; 02411 case MVT::i16: 02412 LoReg = X86::AX; HiReg = X86::DX; 02413 ClrReg = X86::DX; 02414 SExtOpcode = X86::CWD; 02415 break; 02416 case MVT::i32: 02417 LoReg = X86::EAX; ClrReg = HiReg = X86::EDX; 02418 SExtOpcode = X86::CDQ; 02419 break; 02420 case MVT::i64: 02421 LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; 02422 SExtOpcode = X86::CQO; 02423 break; 02424 } 02425 02426 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 02427 bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 02428 bool signBitIsZero = CurDAG->SignBitIsZero(N0); 02429 02430 SDValue InFlag; 02431 if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { 02432 // Special case for div8, just use a move with zero extension to AX to 02433 // clear the upper 8 bits (AH). 02434 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; 02435 if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { 02436 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; 02437 Move = 02438 SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32, 02439 MVT::Other, Ops), 0); 02440 Chain = Move.getValue(1); 02441 ReplaceUses(N0.getValue(1), Chain); 02442 } else { 02443 Move = 02444 SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0); 02445 Chain = CurDAG->getEntryNode(); 02446 } 02447 Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue()); 02448 InFlag = Chain.getValue(1); 02449 } else { 02450 InFlag = 02451 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, 02452 LoReg, N0, SDValue()).getValue(1); 02453 if (isSigned && !signBitIsZero) { 02454 // Sign extend the low part into the high part. 02455 InFlag = 02456 SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0); 02457 } else { 02458 // Zero out the high part, effectively zero extending the input. 02459 SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0); 02460 switch (NVT.SimpleTy) { 02461 case MVT::i16: 02462 ClrNode = 02463 SDValue(CurDAG->getMachineNode( 02464 TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode, 02465 CurDAG->getTargetConstant(X86::sub_16bit, MVT::i32)), 02466 0); 02467 break; 02468 case MVT::i32: 02469 break; 02470 case MVT::i64: 02471 ClrNode = 02472 SDValue(CurDAG->getMachineNode( 02473 TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, 02474 CurDAG->getTargetConstant(0, MVT::i64), ClrNode, 02475 CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)), 02476 0); 02477 break; 02478 default: 02479 llvm_unreachable("Unexpected division source"); 02480 } 02481 02482 InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, 02483 ClrNode, InFlag).getValue(1); 02484 } 02485 } 02486 02487 if (foldedLoad) { 02488 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 02489 InFlag }; 02490 SDNode *CNode = 02491 CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops); 02492 InFlag = SDValue(CNode, 1); 02493 // Update the chain. 02494 ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 02495 } else { 02496 InFlag = 02497 SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0); 02498 } 02499 02500 // Prevent use of AH in a REX instruction by referencing AX instead. 02501 // Shift it down 8 bits. 02502 // 02503 // The current assumption of the register allocator is that isel 02504 // won't generate explicit references to the GPR8_NOREX registers. If 02505 // the allocator and/or the backend get enhanced to be more robust in 02506 // that regard, this can be, and should be, removed. 02507 if (HiReg == X86::AH && Subtarget->is64Bit() && 02508 !SDValue(Node, 1).use_empty()) { 02509 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 02510 X86::AX, MVT::i16, InFlag); 02511 InFlag = Result.getValue(2); 02512 02513 // If we also need AL (the quotient), get it by extracting a subreg from 02514 // Result. The fast register allocator does not like multiple CopyFromReg 02515 // nodes using aliasing registers. 02516 if (!SDValue(Node, 0).use_empty()) 02517 ReplaceUses(SDValue(Node, 0), 02518 CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 02519 02520 // Shift AX right by 8 bits instead of using AH. 02521 Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 02522 Result, 02523 CurDAG->getTargetConstant(8, MVT::i8)), 02524 0); 02525 ReplaceUses(SDValue(Node, 1), 02526 CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 02527 } 02528 // Copy the division (low) result, if it is needed. 02529 if (!SDValue(Node, 0).use_empty()) { 02530 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 02531 LoReg, NVT, InFlag); 02532 InFlag = Result.getValue(2); 02533 ReplaceUses(SDValue(Node, 0), Result); 02534 DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); 02535 } 02536 // Copy the remainder (high) result, if it is needed. 02537 if (!SDValue(Node, 1).use_empty()) { 02538 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 02539 HiReg, NVT, InFlag); 02540 InFlag = Result.getValue(2); 02541 ReplaceUses(SDValue(Node, 1), Result); 02542 DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); 02543 } 02544 return nullptr; 02545 } 02546 02547 case X86ISD::CMP: 02548 case X86ISD::SUB: { 02549 // Sometimes a SUB is used to perform comparison. 02550 if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0)) 02551 // This node is not a CMP. 02552 break; 02553 SDValue N0 = Node->getOperand(0); 02554 SDValue N1 = Node->getOperand(1); 02555 02556 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && 02557 HasNoSignedComparisonUses(Node)) { 02558 // Look for (X86cmp (truncate $op, i1), 0) and try to convert to a 02559 // smaller encoding 02560 if (Opcode == X86ISD::CMP && N0.getValueType() == MVT::i1 && 02561 X86::isZeroNode(N1)) { 02562 SDValue Reg = N0.getOperand(0); 02563 SDValue Imm = CurDAG->getTargetConstant(1, MVT::i8); 02564 02565 // Emit testb 02566 if (Reg.getScalarValueSizeInBits() > 8) 02567 Reg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Reg); 02568 // Emit a testb. 02569 SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, 02570 Reg, Imm); 02571 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); 02572 return nullptr; 02573 } 02574 02575 N0 = N0.getOperand(0); 02576 } 02577 // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to 02578 // use a smaller encoding. 02579 // Look past the truncate if CMP is the only use of it. 02580 if ((N0.getNode()->getOpcode() == ISD::AND || 02581 (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) && 02582 N0.getNode()->hasOneUse() && 02583 N0.getValueType() != MVT::i8 && 02584 X86::isZeroNode(N1)) { 02585 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1)); 02586 if (!C) break; 02587 02588 // For example, convert "testl %eax, $8" to "testb %al, $8" 02589 if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 && 02590 (!(C->getZExtValue() & 0x80) || 02591 HasNoSignedComparisonUses(Node))) { 02592 SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8); 02593 SDValue Reg = N0.getNode()->getOperand(0); 02594 02595 // On x86-32, only the ABCD registers have 8-bit subregisters. 02596 if (!Subtarget->is64Bit()) { 02597 const TargetRegisterClass *TRC; 02598 switch (N0.getSimpleValueType().SimpleTy) { 02599 case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 02600 case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 02601 default: llvm_unreachable("Unsupported TEST operand type!"); 02602 } 02603 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); 02604 Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 02605 Reg.getValueType(), Reg, RC), 0); 02606 } 02607 02608 // Extract the l-register. 02609 SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, 02610 MVT::i8, Reg); 02611 02612 // Emit a testb. 02613 SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, 02614 Subreg, Imm); 02615 // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has 02616 // one, do not call ReplaceAllUsesWith. 02617 ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), 02618 SDValue(NewNode, 0)); 02619 return nullptr; 02620 } 02621 02622 // For example, "testl %eax, $2048" to "testb %ah, $8". 02623 if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 && 02624 (!(C->getZExtValue() & 0x8000) || 02625 HasNoSignedComparisonUses(Node))) { 02626 // Shift the immediate right by 8 bits. 02627 SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8, 02628 MVT::i8); 02629 SDValue Reg = N0.getNode()->getOperand(0); 02630 02631 // Put the value in an ABCD register. 02632 const TargetRegisterClass *TRC; 02633 switch (N0.getSimpleValueType().SimpleTy) { 02634 case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break; 02635 case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 02636 case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 02637 default: llvm_unreachable("Unsupported TEST operand type!"); 02638 } 02639 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); 02640 Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 02641 Reg.getValueType(), Reg, RC), 0); 02642 02643 // Extract the h-register. 02644 SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, 02645 MVT::i8, Reg); 02646 02647 // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only 02648 // target GR8_NOREX registers, so make sure the register class is 02649 // forced. 02650 SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, 02651 MVT::i32, Subreg, ShiftedImm); 02652 // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has 02653 // one, do not call ReplaceAllUsesWith. 02654 ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), 02655 SDValue(NewNode, 0)); 02656 return nullptr; 02657 } 02658 02659 // For example, "testl %eax, $32776" to "testw %ax, $32776". 02660 if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 && 02661 N0.getValueType() != MVT::i16 && 02662 (!(C->getZExtValue() & 0x8000) || 02663 HasNoSignedComparisonUses(Node))) { 02664 SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16); 02665 SDValue Reg = N0.getNode()->getOperand(0); 02666 02667 // Extract the 16-bit subregister. 02668 SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, 02669 MVT::i16, Reg); 02670 02671 // Emit a testw. 02672 SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, 02673 Subreg, Imm); 02674 // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has 02675 // one, do not call ReplaceAllUsesWith. 02676 ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), 02677 SDValue(NewNode, 0)); 02678 return nullptr; 02679 } 02680 02681 // For example, "testq %rax, $268468232" to "testl %eax, $268468232". 02682 if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 && 02683 N0.getValueType() == MVT::i64 && 02684 (!(C->getZExtValue() & 0x80000000) || 02685 HasNoSignedComparisonUses(Node))) { 02686 SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); 02687 SDValue Reg = N0.getNode()->getOperand(0); 02688 02689 // Extract the 32-bit subregister. 02690 SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl, 02691 MVT::i32, Reg); 02692 02693 // Emit a testl. 02694 SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, 02695 Subreg, Imm); 02696 // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has 02697 // one, do not call ReplaceAllUsesWith. 02698 ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), 02699 SDValue(NewNode, 0)); 02700 return nullptr; 02701 } 02702 } 02703 break; 02704 } 02705 case ISD::STORE: { 02706 // Change a chain of {load; incr or dec; store} of the same value into 02707 // a simple increment or decrement through memory of that value, if the 02708 // uses of the modified value and its address are suitable. 02709 // The DEC64m tablegen pattern is currently not able to match the case where 02710 // the EFLAGS on the original DEC are used. (This also applies to 02711 // {INC,DEC}X{64,32,16,8}.) 02712 // We'll need to improve tablegen to allow flags to be transferred from a 02713 // node in the pattern to the result node. probably with a new keyword 02714 // for example, we have this 02715 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", 02716 // [(store (add (loadi64 addr:$dst), -1), addr:$dst), 02717 // (implicit EFLAGS)]>; 02718 // but maybe need something like this 02719 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", 02720 // [(store (add (loadi64 addr:$dst), -1), addr:$dst), 02721 // (transferrable EFLAGS)]>; 02722 02723 StoreSDNode *StoreNode = cast<StoreSDNode>(Node); 02724 SDValue StoredVal = StoreNode->getOperand(1); 02725 unsigned Opc = StoredVal->getOpcode(); 02726 02727 LoadSDNode *LoadNode = nullptr; 02728 SDValue InputChain; 02729 if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG, 02730 LoadNode, InputChain)) 02731 break; 02732 02733 SDValue Base, Scale, Index, Disp, Segment; 02734 if (!SelectAddr(LoadNode, LoadNode->getBasePtr(), 02735 Base, Scale, Index, Disp, Segment)) 02736 break; 02737 02738 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2); 02739 MemOp[0] = StoreNode->getMemOperand(); 02740 MemOp[1] = LoadNode->getMemOperand(); 02741 const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain }; 02742 EVT LdVT = LoadNode->getMemoryVT(); 02743 unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); 02744 MachineSDNode *Result = CurDAG->getMachineNode(newOpc, 02745 SDLoc(Node), 02746 MVT::i32, MVT::Other, Ops); 02747 Result->setMemRefs(MemOp, MemOp + 2); 02748 02749 ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); 02750 ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0)); 02751 02752 return Result; 02753 } 02754 } 02755 02756 SDNode *ResNode = SelectCode(Node); 02757 02758 DEBUG(dbgs() << "=> "; 02759 if (ResNode == nullptr || ResNode == Node) 02760 Node->dump(CurDAG); 02761 else 02762 ResNode->dump(CurDAG); 02763 dbgs() << '\n'); 02764 02765 return ResNode; 02766 } 02767 02768 bool X86DAGToDAGISel:: 02769 SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, 02770 std::vector<SDValue> &OutOps) { 02771 SDValue Op0, Op1, Op2, Op3, Op4; 02772 switch (ConstraintCode) { 02773 case 'o': // offsetable ?? 02774 case 'v': // not offsetable ?? 02775 default: return true; 02776 case 'm': // memory 02777 if (!SelectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4)) 02778 return true; 02779 break; 02780 } 02781 02782 OutOps.push_back(Op0); 02783 OutOps.push_back(Op1); 02784 OutOps.push_back(Op2); 02785 OutOps.push_back(Op3); 02786 OutOps.push_back(Op4); 02787 return false; 02788 } 02789 02790 /// createX86ISelDag - This pass converts a legalized DAG into a 02791 /// X86-specific DAG, ready for instruction scheduling. 02792 /// 02793 FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, 02794 CodeGenOpt::Level OptLevel) { 02795 return new X86DAGToDAGISel(TM, OptLevel); 02796 }