LLVM API Documentation
00001 //===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This implements routines for translating from LLVM IR into SelectionDAG IR. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "SelectionDAGBuilder.h" 00015 #include "SDNodeDbgValue.h" 00016 #include "llvm/ADT/BitVector.h" 00017 #include "llvm/ADT/Optional.h" 00018 #include "llvm/ADT/SmallSet.h" 00019 #include "llvm/Analysis/AliasAnalysis.h" 00020 #include "llvm/Analysis/BranchProbabilityInfo.h" 00021 #include "llvm/Analysis/ConstantFolding.h" 00022 #include "llvm/Analysis/ValueTracking.h" 00023 #include "llvm/CodeGen/Analysis.h" 00024 #include "llvm/CodeGen/FastISel.h" 00025 #include "llvm/CodeGen/FunctionLoweringInfo.h" 00026 #include "llvm/CodeGen/GCMetadata.h" 00027 #include "llvm/CodeGen/GCStrategy.h" 00028 #include "llvm/CodeGen/MachineFrameInfo.h" 00029 #include "llvm/CodeGen/MachineFunction.h" 00030 #include "llvm/CodeGen/MachineInstrBuilder.h" 00031 #include "llvm/CodeGen/MachineJumpTableInfo.h" 00032 #include "llvm/CodeGen/MachineModuleInfo.h" 00033 #include "llvm/CodeGen/MachineRegisterInfo.h" 00034 #include "llvm/CodeGen/SelectionDAG.h" 00035 #include "llvm/CodeGen/StackMaps.h" 00036 #include "llvm/IR/CallingConv.h" 00037 #include "llvm/IR/Constants.h" 00038 #include "llvm/IR/DataLayout.h" 00039 #include "llvm/IR/DebugInfo.h" 00040 #include "llvm/IR/DerivedTypes.h" 00041 #include "llvm/IR/Function.h" 00042 #include "llvm/IR/GlobalVariable.h" 00043 #include "llvm/IR/InlineAsm.h" 00044 #include "llvm/IR/Instructions.h" 00045 #include "llvm/IR/IntrinsicInst.h" 00046 #include "llvm/IR/Intrinsics.h" 00047 #include "llvm/IR/LLVMContext.h" 00048 #include "llvm/IR/Module.h" 00049 #include "llvm/Support/CommandLine.h" 00050 #include "llvm/Support/Debug.h" 00051 #include "llvm/Support/ErrorHandling.h" 00052 #include "llvm/Support/MathExtras.h" 00053 #include "llvm/Support/raw_ostream.h" 00054 #include "llvm/Target/TargetFrameLowering.h" 00055 #include "llvm/Target/TargetInstrInfo.h" 00056 #include "llvm/Target/TargetIntrinsicInfo.h" 00057 #include "llvm/Target/TargetLibraryInfo.h" 00058 #include "llvm/Target/TargetLowering.h" 00059 #include "llvm/Target/TargetOptions.h" 00060 #include "llvm/Target/TargetSelectionDAGInfo.h" 00061 #include "llvm/Target/TargetSubtargetInfo.h" 00062 #include <algorithm> 00063 using namespace llvm; 00064 00065 #define DEBUG_TYPE "isel" 00066 00067 /// LimitFloatPrecision - Generate low-precision inline sequences for 00068 /// some float libcalls (6, 8 or 12 bits). 00069 static unsigned LimitFloatPrecision; 00070 00071 static cl::opt<unsigned, true> 00072 LimitFPPrecision("limit-float-precision", 00073 cl::desc("Generate low-precision inline sequences " 00074 "for some float libcalls"), 00075 cl::location(LimitFloatPrecision), 00076 cl::init(0)); 00077 00078 // Limit the width of DAG chains. This is important in general to prevent 00079 // prevent DAG-based analysis from blowing up. For example, alias analysis and 00080 // load clustering may not complete in reasonable time. It is difficult to 00081 // recognize and avoid this situation within each individual analysis, and 00082 // future analyses are likely to have the same behavior. Limiting DAG width is 00083 // the safe approach, and will be especially important with global DAGs. 00084 // 00085 // MaxParallelChains default is arbitrarily high to avoid affecting 00086 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st 00087 // sequence over this should have been converted to llvm.memcpy by the 00088 // frontend. It easy to induce this behavior with .ll code such as: 00089 // %buffer = alloca [4096 x i8] 00090 // %data = load [4096 x i8]* %argPtr 00091 // store [4096 x i8] %data, [4096 x i8]* %buffer 00092 static const unsigned MaxParallelChains = 64; 00093 00094 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, 00095 const SDValue *Parts, unsigned NumParts, 00096 MVT PartVT, EVT ValueVT, const Value *V); 00097 00098 /// getCopyFromParts - Create a value that contains the specified legal parts 00099 /// combined into the value they represent. If the parts combine to a type 00100 /// larger then ValueVT then AssertOp can be used to specify whether the extra 00101 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT 00102 /// (ISD::AssertSext). 00103 static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, 00104 const SDValue *Parts, 00105 unsigned NumParts, MVT PartVT, EVT ValueVT, 00106 const Value *V, 00107 ISD::NodeType AssertOp = ISD::DELETED_NODE) { 00108 if (ValueVT.isVector()) 00109 return getCopyFromPartsVector(DAG, DL, Parts, NumParts, 00110 PartVT, ValueVT, V); 00111 00112 assert(NumParts > 0 && "No parts to assemble!"); 00113 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 00114 SDValue Val = Parts[0]; 00115 00116 if (NumParts > 1) { 00117 // Assemble the value from multiple parts. 00118 if (ValueVT.isInteger()) { 00119 unsigned PartBits = PartVT.getSizeInBits(); 00120 unsigned ValueBits = ValueVT.getSizeInBits(); 00121 00122 // Assemble the power of 2 part. 00123 unsigned RoundParts = NumParts & (NumParts - 1) ? 00124 1 << Log2_32(NumParts) : NumParts; 00125 unsigned RoundBits = PartBits * RoundParts; 00126 EVT RoundVT = RoundBits == ValueBits ? 00127 ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); 00128 SDValue Lo, Hi; 00129 00130 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); 00131 00132 if (RoundParts > 2) { 00133 Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, 00134 PartVT, HalfVT, V); 00135 Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, 00136 RoundParts / 2, PartVT, HalfVT, V); 00137 } else { 00138 Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); 00139 Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); 00140 } 00141 00142 if (TLI.isBigEndian()) 00143 std::swap(Lo, Hi); 00144 00145 Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); 00146 00147 if (RoundParts < NumParts) { 00148 // Assemble the trailing non-power-of-2 part. 00149 unsigned OddParts = NumParts - RoundParts; 00150 EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); 00151 Hi = getCopyFromParts(DAG, DL, 00152 Parts + RoundParts, OddParts, PartVT, OddVT, V); 00153 00154 // Combine the round and odd parts. 00155 Lo = Val; 00156 if (TLI.isBigEndian()) 00157 std::swap(Lo, Hi); 00158 EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 00159 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); 00160 Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, 00161 DAG.getConstant(Lo.getValueType().getSizeInBits(), 00162 TLI.getPointerTy())); 00163 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); 00164 Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); 00165 } 00166 } else if (PartVT.isFloatingPoint()) { 00167 // FP split into multiple FP parts (for ppcf128) 00168 assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && 00169 "Unexpected split"); 00170 SDValue Lo, Hi; 00171 Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); 00172 Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); 00173 if (TLI.hasBigEndianPartOrdering(ValueVT)) 00174 std::swap(Lo, Hi); 00175 Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); 00176 } else { 00177 // FP split into integer parts (soft fp) 00178 assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && 00179 !PartVT.isVector() && "Unexpected split"); 00180 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); 00181 Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V); 00182 } 00183 } 00184 00185 // There is now one part, held in Val. Correct it to match ValueVT. 00186 EVT PartEVT = Val.getValueType(); 00187 00188 if (PartEVT == ValueVT) 00189 return Val; 00190 00191 if (PartEVT.isInteger() && ValueVT.isInteger()) { 00192 if (ValueVT.bitsLT(PartEVT)) { 00193 // For a truncate, see if we have any information to 00194 // indicate whether the truncated bits will always be 00195 // zero or sign-extension. 00196 if (AssertOp != ISD::DELETED_NODE) 00197 Val = DAG.getNode(AssertOp, DL, PartEVT, Val, 00198 DAG.getValueType(ValueVT)); 00199 return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); 00200 } 00201 return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); 00202 } 00203 00204 if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { 00205 // FP_ROUND's are always exact here. 00206 if (ValueVT.bitsLT(Val.getValueType())) 00207 return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, 00208 DAG.getTargetConstant(1, TLI.getPointerTy())); 00209 00210 return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); 00211 } 00212 00213 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) 00214 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 00215 00216 llvm_unreachable("Unknown mismatch!"); 00217 } 00218 00219 static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, 00220 const Twine &ErrMsg) { 00221 const Instruction *I = dyn_cast_or_null<Instruction>(V); 00222 if (!V) 00223 return Ctx.emitError(ErrMsg); 00224 00225 const char *AsmError = ", possible invalid constraint for vector type"; 00226 if (const CallInst *CI = dyn_cast<CallInst>(I)) 00227 if (isa<InlineAsm>(CI->getCalledValue())) 00228 return Ctx.emitError(I, ErrMsg + AsmError); 00229 00230 return Ctx.emitError(I, ErrMsg); 00231 } 00232 00233 /// getCopyFromPartsVector - Create a value that contains the specified legal 00234 /// parts combined into the value they represent. If the parts combine to a 00235 /// type larger then ValueVT then AssertOp can be used to specify whether the 00236 /// extra bits are known to be zero (ISD::AssertZext) or sign extended from 00237 /// ValueVT (ISD::AssertSext). 00238 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, 00239 const SDValue *Parts, unsigned NumParts, 00240 MVT PartVT, EVT ValueVT, const Value *V) { 00241 assert(ValueVT.isVector() && "Not a vector value"); 00242 assert(NumParts > 0 && "No parts to assemble!"); 00243 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 00244 SDValue Val = Parts[0]; 00245 00246 // Handle a multi-element vector. 00247 if (NumParts > 1) { 00248 EVT IntermediateVT; 00249 MVT RegisterVT; 00250 unsigned NumIntermediates; 00251 unsigned NumRegs = 00252 TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, 00253 NumIntermediates, RegisterVT); 00254 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); 00255 NumParts = NumRegs; // Silence a compiler warning. 00256 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); 00257 assert(RegisterVT == Parts[0].getSimpleValueType() && 00258 "Part type doesn't match part!"); 00259 00260 // Assemble the parts into intermediate operands. 00261 SmallVector<SDValue, 8> Ops(NumIntermediates); 00262 if (NumIntermediates == NumParts) { 00263 // If the register was not expanded, truncate or copy the value, 00264 // as appropriate. 00265 for (unsigned i = 0; i != NumParts; ++i) 00266 Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, 00267 PartVT, IntermediateVT, V); 00268 } else if (NumParts > 0) { 00269 // If the intermediate type was expanded, build the intermediate 00270 // operands from the parts. 00271 assert(NumParts % NumIntermediates == 0 && 00272 "Must expand into a divisible number of parts!"); 00273 unsigned Factor = NumParts / NumIntermediates; 00274 for (unsigned i = 0; i != NumIntermediates; ++i) 00275 Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, 00276 PartVT, IntermediateVT, V); 00277 } 00278 00279 // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the 00280 // intermediate operands. 00281 Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS 00282 : ISD::BUILD_VECTOR, 00283 DL, ValueVT, Ops); 00284 } 00285 00286 // There is now one part, held in Val. Correct it to match ValueVT. 00287 EVT PartEVT = Val.getValueType(); 00288 00289 if (PartEVT == ValueVT) 00290 return Val; 00291 00292 if (PartEVT.isVector()) { 00293 // If the element type of the source/dest vectors are the same, but the 00294 // parts vector has more elements than the value vector, then we have a 00295 // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the 00296 // elements we want. 00297 if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { 00298 assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && 00299 "Cannot narrow, it would be a lossy transformation"); 00300 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, 00301 DAG.getConstant(0, TLI.getVectorIdxTy())); 00302 } 00303 00304 // Vector/Vector bitcast. 00305 if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) 00306 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 00307 00308 assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && 00309 "Cannot handle this kind of promotion"); 00310 // Promoted vector extract 00311 bool Smaller = ValueVT.bitsLE(PartEVT); 00312 return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 00313 DL, ValueVT, Val); 00314 00315 } 00316 00317 // Trivial bitcast if the types are the same size and the destination 00318 // vector type is legal. 00319 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() && 00320 TLI.isTypeLegal(ValueVT)) 00321 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 00322 00323 // Handle cases such as i8 -> <1 x i1> 00324 if (ValueVT.getVectorNumElements() != 1) { 00325 diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, 00326 "non-trivial scalar-to-vector conversion"); 00327 return DAG.getUNDEF(ValueVT); 00328 } 00329 00330 if (ValueVT.getVectorNumElements() == 1 && 00331 ValueVT.getVectorElementType() != PartEVT) { 00332 bool Smaller = ValueVT.bitsLE(PartEVT); 00333 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 00334 DL, ValueVT.getScalarType(), Val); 00335 } 00336 00337 return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); 00338 } 00339 00340 static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc dl, 00341 SDValue Val, SDValue *Parts, unsigned NumParts, 00342 MVT PartVT, const Value *V); 00343 00344 /// getCopyToParts - Create a series of nodes that contain the specified value 00345 /// split into legal parts. If the parts contain more bits than Val, then, for 00346 /// integers, ExtendKind can be used to specify how to generate the extra bits. 00347 static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, 00348 SDValue Val, SDValue *Parts, unsigned NumParts, 00349 MVT PartVT, const Value *V, 00350 ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { 00351 EVT ValueVT = Val.getValueType(); 00352 00353 // Handle the vector case separately. 00354 if (ValueVT.isVector()) 00355 return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); 00356 00357 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 00358 unsigned PartBits = PartVT.getSizeInBits(); 00359 unsigned OrigNumParts = NumParts; 00360 assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); 00361 00362 if (NumParts == 0) 00363 return; 00364 00365 assert(!ValueVT.isVector() && "Vector case handled elsewhere"); 00366 EVT PartEVT = PartVT; 00367 if (PartEVT == ValueVT) { 00368 assert(NumParts == 1 && "No-op copy with multiple parts!"); 00369 Parts[0] = Val; 00370 return; 00371 } 00372 00373 if (NumParts * PartBits > ValueVT.getSizeInBits()) { 00374 // If the parts cover more bits than the value has, promote the value. 00375 if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { 00376 assert(NumParts == 1 && "Do not know what to promote to!"); 00377 Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); 00378 } else { 00379 assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && 00380 ValueVT.isInteger() && 00381 "Unknown mismatch!"); 00382 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 00383 Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); 00384 if (PartVT == MVT::x86mmx) 00385 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 00386 } 00387 } else if (PartBits == ValueVT.getSizeInBits()) { 00388 // Different types of the same size. 00389 assert(NumParts == 1 && PartEVT != ValueVT); 00390 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 00391 } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { 00392 // If the parts cover less bits than value has, truncate the value. 00393 assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && 00394 ValueVT.isInteger() && 00395 "Unknown mismatch!"); 00396 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 00397 Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); 00398 if (PartVT == MVT::x86mmx) 00399 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 00400 } 00401 00402 // The value may have changed - recompute ValueVT. 00403 ValueVT = Val.getValueType(); 00404 assert(NumParts * PartBits == ValueVT.getSizeInBits() && 00405 "Failed to tile the value with PartVT!"); 00406 00407 if (NumParts == 1) { 00408 if (PartEVT != ValueVT) 00409 diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, 00410 "scalar-to-vector conversion failed"); 00411 00412 Parts[0] = Val; 00413 return; 00414 } 00415 00416 // Expand the value into multiple parts. 00417 if (NumParts & (NumParts - 1)) { 00418 // The number of parts is not a power of 2. Split off and copy the tail. 00419 assert(PartVT.isInteger() && ValueVT.isInteger() && 00420 "Do not know what to expand to!"); 00421 unsigned RoundParts = 1 << Log2_32(NumParts); 00422 unsigned RoundBits = RoundParts * PartBits; 00423 unsigned OddParts = NumParts - RoundParts; 00424 SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, 00425 DAG.getIntPtrConstant(RoundBits)); 00426 getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); 00427 00428 if (TLI.isBigEndian()) 00429 // The odd parts were reversed by getCopyToParts - unreverse them. 00430 std::reverse(Parts + RoundParts, Parts + NumParts); 00431 00432 NumParts = RoundParts; 00433 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 00434 Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); 00435 } 00436 00437 // The number of parts is a power of 2. Repeatedly bisect the value using 00438 // EXTRACT_ELEMENT. 00439 Parts[0] = DAG.getNode(ISD::BITCAST, DL, 00440 EVT::getIntegerVT(*DAG.getContext(), 00441 ValueVT.getSizeInBits()), 00442 Val); 00443 00444 for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { 00445 for (unsigned i = 0; i < NumParts; i += StepSize) { 00446 unsigned ThisBits = StepSize * PartBits / 2; 00447 EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); 00448 SDValue &Part0 = Parts[i]; 00449 SDValue &Part1 = Parts[i+StepSize/2]; 00450 00451 Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, 00452 ThisVT, Part0, DAG.getIntPtrConstant(1)); 00453 Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, 00454 ThisVT, Part0, DAG.getIntPtrConstant(0)); 00455 00456 if (ThisBits == PartBits && ThisVT != PartVT) { 00457 Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); 00458 Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1); 00459 } 00460 } 00461 } 00462 00463 if (TLI.isBigEndian()) 00464 std::reverse(Parts, Parts + OrigNumParts); 00465 } 00466 00467 00468 /// getCopyToPartsVector - Create a series of nodes that contain the specified 00469 /// value split into legal parts. 00470 static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, 00471 SDValue Val, SDValue *Parts, unsigned NumParts, 00472 MVT PartVT, const Value *V) { 00473 EVT ValueVT = Val.getValueType(); 00474 assert(ValueVT.isVector() && "Not a vector"); 00475 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 00476 00477 if (NumParts == 1) { 00478 EVT PartEVT = PartVT; 00479 if (PartEVT == ValueVT) { 00480 // Nothing to do. 00481 } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { 00482 // Bitconvert vector->vector case. 00483 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 00484 } else if (PartVT.isVector() && 00485 PartEVT.getVectorElementType() == ValueVT.getVectorElementType() && 00486 PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { 00487 EVT ElementVT = PartVT.getVectorElementType(); 00488 // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in 00489 // undef elements. 00490 SmallVector<SDValue, 16> Ops; 00491 for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) 00492 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 00493 ElementVT, Val, DAG.getConstant(i, 00494 TLI.getVectorIdxTy()))); 00495 00496 for (unsigned i = ValueVT.getVectorNumElements(), 00497 e = PartVT.getVectorNumElements(); i != e; ++i) 00498 Ops.push_back(DAG.getUNDEF(ElementVT)); 00499 00500 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops); 00501 00502 // FIXME: Use CONCAT for 2x -> 4x. 00503 00504 //SDValue UndefElts = DAG.getUNDEF(VectorTy); 00505 //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); 00506 } else if (PartVT.isVector() && 00507 PartEVT.getVectorElementType().bitsGE( 00508 ValueVT.getVectorElementType()) && 00509 PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { 00510 00511 // Promoted vector extract 00512 bool Smaller = PartEVT.bitsLE(ValueVT); 00513 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 00514 DL, PartVT, Val); 00515 } else{ 00516 // Vector -> scalar conversion. 00517 assert(ValueVT.getVectorNumElements() == 1 && 00518 "Only trivial vector-to-scalar conversions should get here!"); 00519 Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 00520 PartVT, Val, DAG.getConstant(0, TLI.getVectorIdxTy())); 00521 00522 bool Smaller = ValueVT.bitsLE(PartVT); 00523 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 00524 DL, PartVT, Val); 00525 } 00526 00527 Parts[0] = Val; 00528 return; 00529 } 00530 00531 // Handle a multi-element vector. 00532 EVT IntermediateVT; 00533 MVT RegisterVT; 00534 unsigned NumIntermediates; 00535 unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, 00536 IntermediateVT, 00537 NumIntermediates, RegisterVT); 00538 unsigned NumElements = ValueVT.getVectorNumElements(); 00539 00540 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); 00541 NumParts = NumRegs; // Silence a compiler warning. 00542 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); 00543 00544 // Split the vector into intermediate operands. 00545 SmallVector<SDValue, 8> Ops(NumIntermediates); 00546 for (unsigned i = 0; i != NumIntermediates; ++i) { 00547 if (IntermediateVT.isVector()) 00548 Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, 00549 IntermediateVT, Val, 00550 DAG.getConstant(i * (NumElements / NumIntermediates), 00551 TLI.getVectorIdxTy())); 00552 else 00553 Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 00554 IntermediateVT, Val, 00555 DAG.getConstant(i, TLI.getVectorIdxTy())); 00556 } 00557 00558 // Split the intermediate operands into legal parts. 00559 if (NumParts == NumIntermediates) { 00560 // If the register was not expanded, promote or copy the value, 00561 // as appropriate. 00562 for (unsigned i = 0; i != NumParts; ++i) 00563 getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V); 00564 } else if (NumParts > 0) { 00565 // If the intermediate type was expanded, split each the value into 00566 // legal parts. 00567 assert(NumParts % NumIntermediates == 0 && 00568 "Must expand into a divisible number of parts!"); 00569 unsigned Factor = NumParts / NumIntermediates; 00570 for (unsigned i = 0; i != NumIntermediates; ++i) 00571 getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); 00572 } 00573 } 00574 00575 namespace { 00576 /// RegsForValue - This struct represents the registers (physical or virtual) 00577 /// that a particular set of values is assigned, and the type information 00578 /// about the value. The most common situation is to represent one value at a 00579 /// time, but struct or array values are handled element-wise as multiple 00580 /// values. The splitting of aggregates is performed recursively, so that we 00581 /// never have aggregate-typed registers. The values at this point do not 00582 /// necessarily have legal types, so each value may require one or more 00583 /// registers of some legal type. 00584 /// 00585 struct RegsForValue { 00586 /// ValueVTs - The value types of the values, which may not be legal, and 00587 /// may need be promoted or synthesized from one or more registers. 00588 /// 00589 SmallVector<EVT, 4> ValueVTs; 00590 00591 /// RegVTs - The value types of the registers. This is the same size as 00592 /// ValueVTs and it records, for each value, what the type of the assigned 00593 /// register or registers are. (Individual values are never synthesized 00594 /// from more than one type of register.) 00595 /// 00596 /// With virtual registers, the contents of RegVTs is redundant with TLI's 00597 /// getRegisterType member function, however when with physical registers 00598 /// it is necessary to have a separate record of the types. 00599 /// 00600 SmallVector<MVT, 4> RegVTs; 00601 00602 /// Regs - This list holds the registers assigned to the values. 00603 /// Each legal or promoted value requires one register, and each 00604 /// expanded value requires multiple registers. 00605 /// 00606 SmallVector<unsigned, 4> Regs; 00607 00608 RegsForValue() {} 00609 00610 RegsForValue(const SmallVector<unsigned, 4> ®s, 00611 MVT regvt, EVT valuevt) 00612 : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} 00613 00614 RegsForValue(LLVMContext &Context, const TargetLowering &tli, 00615 unsigned Reg, Type *Ty) { 00616 ComputeValueVTs(tli, Ty, ValueVTs); 00617 00618 for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { 00619 EVT ValueVT = ValueVTs[Value]; 00620 unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); 00621 MVT RegisterVT = tli.getRegisterType(Context, ValueVT); 00622 for (unsigned i = 0; i != NumRegs; ++i) 00623 Regs.push_back(Reg + i); 00624 RegVTs.push_back(RegisterVT); 00625 Reg += NumRegs; 00626 } 00627 } 00628 00629 /// append - Add the specified values to this one. 00630 void append(const RegsForValue &RHS) { 00631 ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); 00632 RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); 00633 Regs.append(RHS.Regs.begin(), RHS.Regs.end()); 00634 } 00635 00636 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from 00637 /// this value and returns the result as a ValueVTs value. This uses 00638 /// Chain/Flag as the input and updates them for the output Chain/Flag. 00639 /// If the Flag pointer is NULL, no flag is used. 00640 SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, 00641 SDLoc dl, 00642 SDValue &Chain, SDValue *Flag, 00643 const Value *V = nullptr) const; 00644 00645 /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the 00646 /// specified value into the registers specified by this object. This uses 00647 /// Chain/Flag as the input and updates them for the output Chain/Flag. 00648 /// If the Flag pointer is NULL, no flag is used. 00649 void getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, 00650 SDValue &Chain, SDValue *Flag, const Value *V) const; 00651 00652 /// AddInlineAsmOperands - Add this value to the specified inlineasm node 00653 /// operand list. This adds the code marker, matching input operand index 00654 /// (if applicable), and includes the number of values added into it. 00655 void AddInlineAsmOperands(unsigned Kind, 00656 bool HasMatching, unsigned MatchingIdx, 00657 SelectionDAG &DAG, 00658 std::vector<SDValue> &Ops) const; 00659 }; 00660 } 00661 00662 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from 00663 /// this value and returns the result as a ValueVT value. This uses 00664 /// Chain/Flag as the input and updates them for the output Chain/Flag. 00665 /// If the Flag pointer is NULL, no flag is used. 00666 SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, 00667 FunctionLoweringInfo &FuncInfo, 00668 SDLoc dl, 00669 SDValue &Chain, SDValue *Flag, 00670 const Value *V) const { 00671 // A Value with type {} or [0 x %t] needs no registers. 00672 if (ValueVTs.empty()) 00673 return SDValue(); 00674 00675 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 00676 00677 // Assemble the legal parts into the final values. 00678 SmallVector<SDValue, 4> Values(ValueVTs.size()); 00679 SmallVector<SDValue, 8> Parts; 00680 for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { 00681 // Copy the legal parts from the registers. 00682 EVT ValueVT = ValueVTs[Value]; 00683 unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); 00684 MVT RegisterVT = RegVTs[Value]; 00685 00686 Parts.resize(NumRegs); 00687 for (unsigned i = 0; i != NumRegs; ++i) { 00688 SDValue P; 00689 if (!Flag) { 00690 P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); 00691 } else { 00692 P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); 00693 *Flag = P.getValue(2); 00694 } 00695 00696 Chain = P.getValue(1); 00697 Parts[i] = P; 00698 00699 // If the source register was virtual and if we know something about it, 00700 // add an assert node. 00701 if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || 00702 !RegisterVT.isInteger() || RegisterVT.isVector()) 00703 continue; 00704 00705 const FunctionLoweringInfo::LiveOutInfo *LOI = 00706 FuncInfo.GetLiveOutRegInfo(Regs[Part+i]); 00707 if (!LOI) 00708 continue; 00709 00710 unsigned RegSize = RegisterVT.getSizeInBits(); 00711 unsigned NumSignBits = LOI->NumSignBits; 00712 unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); 00713 00714 if (NumZeroBits == RegSize) { 00715 // The current value is a zero. 00716 // Explicitly express that as it would be easier for 00717 // optimizations to kick in. 00718 Parts[i] = DAG.getConstant(0, RegisterVT); 00719 continue; 00720 } 00721 00722 // FIXME: We capture more information than the dag can represent. For 00723 // now, just use the tightest assertzext/assertsext possible. 00724 bool isSExt = true; 00725 EVT FromVT(MVT::Other); 00726 if (NumSignBits == RegSize) 00727 isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 00728 else if (NumZeroBits >= RegSize-1) 00729 isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 00730 else if (NumSignBits > RegSize-8) 00731 isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 00732 else if (NumZeroBits >= RegSize-8) 00733 isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 00734 else if (NumSignBits > RegSize-16) 00735 isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 00736 else if (NumZeroBits >= RegSize-16) 00737 isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 00738 else if (NumSignBits > RegSize-32) 00739 isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 00740 else if (NumZeroBits >= RegSize-32) 00741 isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 00742 else 00743 continue; 00744 00745 // Add an assertion node. 00746 assert(FromVT != MVT::Other); 00747 Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, 00748 RegisterVT, P, DAG.getValueType(FromVT)); 00749 } 00750 00751 Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), 00752 NumRegs, RegisterVT, ValueVT, V); 00753 Part += NumRegs; 00754 Parts.clear(); 00755 } 00756 00757 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values); 00758 } 00759 00760 /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the 00761 /// specified value into the registers specified by this object. This uses 00762 /// Chain/Flag as the input and updates them for the output Chain/Flag. 00763 /// If the Flag pointer is NULL, no flag is used. 00764 void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, 00765 SDValue &Chain, SDValue *Flag, 00766 const Value *V) const { 00767 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 00768 00769 // Get the list of the values's legal parts. 00770 unsigned NumRegs = Regs.size(); 00771 SmallVector<SDValue, 8> Parts(NumRegs); 00772 for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { 00773 EVT ValueVT = ValueVTs[Value]; 00774 unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); 00775 MVT RegisterVT = RegVTs[Value]; 00776 ISD::NodeType ExtendKind = 00777 TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND; 00778 00779 getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), 00780 &Parts[Part], NumParts, RegisterVT, V, ExtendKind); 00781 Part += NumParts; 00782 } 00783 00784 // Copy the parts into the registers. 00785 SmallVector<SDValue, 8> Chains(NumRegs); 00786 for (unsigned i = 0; i != NumRegs; ++i) { 00787 SDValue Part; 00788 if (!Flag) { 00789 Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); 00790 } else { 00791 Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); 00792 *Flag = Part.getValue(1); 00793 } 00794 00795 Chains[i] = Part.getValue(0); 00796 } 00797 00798 if (NumRegs == 1 || Flag) 00799 // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is 00800 // flagged to it. That is the CopyToReg nodes and the user are considered 00801 // a single scheduling unit. If we create a TokenFactor and return it as 00802 // chain, then the TokenFactor is both a predecessor (operand) of the 00803 // user as well as a successor (the TF operands are flagged to the user). 00804 // c1, f1 = CopyToReg 00805 // c2, f2 = CopyToReg 00806 // c3 = TokenFactor c1, c2 00807 // ... 00808 // = op c3, ..., f2 00809 Chain = Chains[NumRegs-1]; 00810 else 00811 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); 00812 } 00813 00814 /// AddInlineAsmOperands - Add this value to the specified inlineasm node 00815 /// operand list. This adds the code marker and includes the number of 00816 /// values added into it. 00817 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, 00818 unsigned MatchingIdx, 00819 SelectionDAG &DAG, 00820 std::vector<SDValue> &Ops) const { 00821 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 00822 00823 unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); 00824 if (HasMatching) 00825 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); 00826 else if (!Regs.empty() && 00827 TargetRegisterInfo::isVirtualRegister(Regs.front())) { 00828 // Put the register class of the virtual registers in the flag word. That 00829 // way, later passes can recompute register class constraints for inline 00830 // assembly as well as normal instructions. 00831 // Don't do this for tied operands that can use the regclass information 00832 // from the def. 00833 const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 00834 const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); 00835 Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); 00836 } 00837 00838 SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); 00839 Ops.push_back(Res); 00840 00841 unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); 00842 for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { 00843 unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); 00844 MVT RegisterVT = RegVTs[Value]; 00845 for (unsigned i = 0; i != NumRegs; ++i) { 00846 assert(Reg < Regs.size() && "Mismatch in # registers expected"); 00847 unsigned TheReg = Regs[Reg++]; 00848 Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); 00849 00850 if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { 00851 // If we clobbered the stack pointer, MFI should know about it. 00852 assert(DAG.getMachineFunction().getFrameInfo()-> 00853 hasInlineAsmWithSPAdjust()); 00854 } 00855 } 00856 } 00857 } 00858 00859 void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, 00860 const TargetLibraryInfo *li) { 00861 AA = &aa; 00862 GFI = gfi; 00863 LibInfo = li; 00864 DL = DAG.getSubtarget().getDataLayout(); 00865 Context = DAG.getContext(); 00866 LPadToCallSiteMap.clear(); 00867 } 00868 00869 /// clear - Clear out the current SelectionDAG and the associated 00870 /// state and prepare this SelectionDAGBuilder object to be used 00871 /// for a new block. This doesn't clear out information about 00872 /// additional blocks that are needed to complete switch lowering 00873 /// or PHI node updating; that information is cleared out as it is 00874 /// consumed. 00875 void SelectionDAGBuilder::clear() { 00876 NodeMap.clear(); 00877 UnusedArgNodeMap.clear(); 00878 PendingLoads.clear(); 00879 PendingExports.clear(); 00880 CurInst = nullptr; 00881 HasTailCall = false; 00882 SDNodeOrder = LowestSDNodeOrder; 00883 } 00884 00885 /// clearDanglingDebugInfo - Clear the dangling debug information 00886 /// map. This function is separated from the clear so that debug 00887 /// information that is dangling in a basic block can be properly 00888 /// resolved in a different basic block. This allows the 00889 /// SelectionDAG to resolve dangling debug information attached 00890 /// to PHI nodes. 00891 void SelectionDAGBuilder::clearDanglingDebugInfo() { 00892 DanglingDebugInfoMap.clear(); 00893 } 00894 00895 /// getRoot - Return the current virtual root of the Selection DAG, 00896 /// flushing any PendingLoad items. This must be done before emitting 00897 /// a store or any other node that may need to be ordered after any 00898 /// prior load instructions. 00899 /// 00900 SDValue SelectionDAGBuilder::getRoot() { 00901 if (PendingLoads.empty()) 00902 return DAG.getRoot(); 00903 00904 if (PendingLoads.size() == 1) { 00905 SDValue Root = PendingLoads[0]; 00906 DAG.setRoot(Root); 00907 PendingLoads.clear(); 00908 return Root; 00909 } 00910 00911 // Otherwise, we have to make a token factor node. 00912 SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 00913 PendingLoads); 00914 PendingLoads.clear(); 00915 DAG.setRoot(Root); 00916 return Root; 00917 } 00918 00919 /// getControlRoot - Similar to getRoot, but instead of flushing all the 00920 /// PendingLoad items, flush all the PendingExports items. It is necessary 00921 /// to do this before emitting a terminator instruction. 00922 /// 00923 SDValue SelectionDAGBuilder::getControlRoot() { 00924 SDValue Root = DAG.getRoot(); 00925 00926 if (PendingExports.empty()) 00927 return Root; 00928 00929 // Turn all of the CopyToReg chains into one factored node. 00930 if (Root.getOpcode() != ISD::EntryToken) { 00931 unsigned i = 0, e = PendingExports.size(); 00932 for (; i != e; ++i) { 00933 assert(PendingExports[i].getNode()->getNumOperands() > 1); 00934 if (PendingExports[i].getNode()->getOperand(0) == Root) 00935 break; // Don't add the root if we already indirectly depend on it. 00936 } 00937 00938 if (i == e) 00939 PendingExports.push_back(Root); 00940 } 00941 00942 Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 00943 PendingExports); 00944 PendingExports.clear(); 00945 DAG.setRoot(Root); 00946 return Root; 00947 } 00948 00949 void SelectionDAGBuilder::visit(const Instruction &I) { 00950 // Set up outgoing PHI node register values before emitting the terminator. 00951 if (isa<TerminatorInst>(&I)) 00952 HandlePHINodesInSuccessorBlocks(I.getParent()); 00953 00954 ++SDNodeOrder; 00955 00956 CurInst = &I; 00957 00958 visit(I.getOpcode(), I); 00959 00960 if (!isa<TerminatorInst>(&I) && !HasTailCall) 00961 CopyToExportRegsIfNeeded(&I); 00962 00963 CurInst = nullptr; 00964 } 00965 00966 void SelectionDAGBuilder::visitPHI(const PHINode &) { 00967 llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!"); 00968 } 00969 00970 void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { 00971 // Note: this doesn't use InstVisitor, because it has to work with 00972 // ConstantExpr's in addition to instructions. 00973 switch (Opcode) { 00974 default: llvm_unreachable("Unknown instruction type encountered!"); 00975 // Build the switch statement using the Instruction.def file. 00976 #define HANDLE_INST(NUM, OPCODE, CLASS) \ 00977 case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; 00978 #include "llvm/IR/Instruction.def" 00979 } 00980 } 00981 00982 // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, 00983 // generate the debug data structures now that we've seen its definition. 00984 void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, 00985 SDValue Val) { 00986 DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; 00987 if (DDI.getDI()) { 00988 const DbgValueInst *DI = DDI.getDI(); 00989 DebugLoc dl = DDI.getdl(); 00990 unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); 00991 MDNode *Variable = DI->getVariable(); 00992 uint64_t Offset = DI->getOffset(); 00993 // A dbg.value for an alloca is always indirect. 00994 bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; 00995 SDDbgValue *SDV; 00996 if (Val.getNode()) { 00997 if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, Val)) { 00998 SDV = DAG.getDbgValue(Variable, Val.getNode(), 00999 Val.getResNo(), IsIndirect, 01000 Offset, dl, DbgSDNodeOrder); 01001 DAG.AddDbgValue(SDV, Val.getNode(), false); 01002 } 01003 } else 01004 DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); 01005 DanglingDebugInfoMap[V] = DanglingDebugInfo(); 01006 } 01007 } 01008 01009 /// getValue - Return an SDValue for the given Value. 01010 SDValue SelectionDAGBuilder::getValue(const Value *V) { 01011 // If we already have an SDValue for this value, use it. It's important 01012 // to do this first, so that we don't create a CopyFromReg if we already 01013 // have a regular SDValue. 01014 SDValue &N = NodeMap[V]; 01015 if (N.getNode()) return N; 01016 01017 // If there's a virtual register allocated and initialized for this 01018 // value, use it. 01019 DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); 01020 if (It != FuncInfo.ValueMap.end()) { 01021 unsigned InReg = It->second; 01022 RegsForValue RFV(*DAG.getContext(), 01023 *TM.getSubtargetImpl()->getTargetLowering(), InReg, 01024 V->getType()); 01025 SDValue Chain = DAG.getEntryNode(); 01026 N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); 01027 resolveDanglingDebugInfo(V, N); 01028 return N; 01029 } 01030 01031 // Otherwise create a new SDValue and remember it. 01032 SDValue Val = getValueImpl(V); 01033 NodeMap[V] = Val; 01034 resolveDanglingDebugInfo(V, Val); 01035 return Val; 01036 } 01037 01038 /// getNonRegisterValue - Return an SDValue for the given Value, but 01039 /// don't look in FuncInfo.ValueMap for a virtual register. 01040 SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { 01041 // If we already have an SDValue for this value, use it. 01042 SDValue &N = NodeMap[V]; 01043 if (N.getNode()) return N; 01044 01045 // Otherwise create a new SDValue and remember it. 01046 SDValue Val = getValueImpl(V); 01047 NodeMap[V] = Val; 01048 resolveDanglingDebugInfo(V, Val); 01049 return Val; 01050 } 01051 01052 /// getValueImpl - Helper function for getValue and getNonRegisterValue. 01053 /// Create an SDValue for the given value. 01054 SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { 01055 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 01056 01057 if (const Constant *C = dyn_cast<Constant>(V)) { 01058 EVT VT = TLI->getValueType(V->getType(), true); 01059 01060 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) 01061 return DAG.getConstant(*CI, VT); 01062 01063 if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 01064 return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); 01065 01066 if (isa<ConstantPointerNull>(C)) { 01067 unsigned AS = V->getType()->getPointerAddressSpace(); 01068 return DAG.getConstant(0, TLI->getPointerTy(AS)); 01069 } 01070 01071 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 01072 return DAG.getConstantFP(*CFP, VT); 01073 01074 if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) 01075 return DAG.getUNDEF(VT); 01076 01077 if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { 01078 visit(CE->getOpcode(), *CE); 01079 SDValue N1 = NodeMap[V]; 01080 assert(N1.getNode() && "visit didn't populate the NodeMap!"); 01081 return N1; 01082 } 01083 01084 if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) { 01085 SmallVector<SDValue, 4> Constants; 01086 for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); 01087 OI != OE; ++OI) { 01088 SDNode *Val = getValue(*OI).getNode(); 01089 // If the operand is an empty aggregate, there are no values. 01090 if (!Val) continue; 01091 // Add each leaf value from the operand to the Constants list 01092 // to form a flattened list of all the values. 01093 for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) 01094 Constants.push_back(SDValue(Val, i)); 01095 } 01096 01097 return DAG.getMergeValues(Constants, getCurSDLoc()); 01098 } 01099 01100 if (const ConstantDataSequential *CDS = 01101 dyn_cast<ConstantDataSequential>(C)) { 01102 SmallVector<SDValue, 4> Ops; 01103 for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { 01104 SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); 01105 // Add each leaf value from the operand to the Constants list 01106 // to form a flattened list of all the values. 01107 for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) 01108 Ops.push_back(SDValue(Val, i)); 01109 } 01110 01111 if (isa<ArrayType>(CDS->getType())) 01112 return DAG.getMergeValues(Ops, getCurSDLoc()); 01113 return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), 01114 VT, Ops); 01115 } 01116 01117 if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { 01118 assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && 01119 "Unknown struct or array constant!"); 01120 01121 SmallVector<EVT, 4> ValueVTs; 01122 ComputeValueVTs(*TLI, C->getType(), ValueVTs); 01123 unsigned NumElts = ValueVTs.size(); 01124 if (NumElts == 0) 01125 return SDValue(); // empty struct 01126 SmallVector<SDValue, 4> Constants(NumElts); 01127 for (unsigned i = 0; i != NumElts; ++i) { 01128 EVT EltVT = ValueVTs[i]; 01129 if (isa<UndefValue>(C)) 01130 Constants[i] = DAG.getUNDEF(EltVT); 01131 else if (EltVT.isFloatingPoint()) 01132 Constants[i] = DAG.getConstantFP(0, EltVT); 01133 else 01134 Constants[i] = DAG.getConstant(0, EltVT); 01135 } 01136 01137 return DAG.getMergeValues(Constants, getCurSDLoc()); 01138 } 01139 01140 if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) 01141 return DAG.getBlockAddress(BA, VT); 01142 01143 VectorType *VecTy = cast<VectorType>(V->getType()); 01144 unsigned NumElements = VecTy->getNumElements(); 01145 01146 // Now that we know the number and type of the elements, get that number of 01147 // elements into the Ops array based on what kind of constant it is. 01148 SmallVector<SDValue, 16> Ops; 01149 if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) { 01150 for (unsigned i = 0; i != NumElements; ++i) 01151 Ops.push_back(getValue(CV->getOperand(i))); 01152 } else { 01153 assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); 01154 EVT EltVT = TLI->getValueType(VecTy->getElementType()); 01155 01156 SDValue Op; 01157 if (EltVT.isFloatingPoint()) 01158 Op = DAG.getConstantFP(0, EltVT); 01159 else 01160 Op = DAG.getConstant(0, EltVT); 01161 Ops.assign(NumElements, Op); 01162 } 01163 01164 // Create a BUILD_VECTOR node. 01165 return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); 01166 } 01167 01168 // If this is a static alloca, generate it as the frameindex instead of 01169 // computation. 01170 if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { 01171 DenseMap<const AllocaInst*, int>::iterator SI = 01172 FuncInfo.StaticAllocaMap.find(AI); 01173 if (SI != FuncInfo.StaticAllocaMap.end()) 01174 return DAG.getFrameIndex(SI->second, TLI->getPointerTy()); 01175 } 01176 01177 // If this is an instruction which fast-isel has deferred, select it now. 01178 if (const Instruction *Inst = dyn_cast<Instruction>(V)) { 01179 unsigned InReg = FuncInfo.InitializeRegForValue(Inst); 01180 RegsForValue RFV(*DAG.getContext(), *TLI, InReg, Inst->getType()); 01181 SDValue Chain = DAG.getEntryNode(); 01182 return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); 01183 } 01184 01185 llvm_unreachable("Can't get register for value!"); 01186 } 01187 01188 void SelectionDAGBuilder::visitRet(const ReturnInst &I) { 01189 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 01190 SDValue Chain = getControlRoot(); 01191 SmallVector<ISD::OutputArg, 8> Outs; 01192 SmallVector<SDValue, 8> OutVals; 01193 01194 if (!FuncInfo.CanLowerReturn) { 01195 unsigned DemoteReg = FuncInfo.DemoteRegister; 01196 const Function *F = I.getParent()->getParent(); 01197 01198 // Emit a store of the return value through the virtual register. 01199 // Leave Outs empty so that LowerReturn won't try to load return 01200 // registers the usual way. 01201 SmallVector<EVT, 1> PtrValueVTs; 01202 ComputeValueVTs(*TLI, PointerType::getUnqual(F->getReturnType()), 01203 PtrValueVTs); 01204 01205 SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); 01206 SDValue RetOp = getValue(I.getOperand(0)); 01207 01208 SmallVector<EVT, 4> ValueVTs; 01209 SmallVector<uint64_t, 4> Offsets; 01210 ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); 01211 unsigned NumValues = ValueVTs.size(); 01212 01213 SmallVector<SDValue, 4> Chains(NumValues); 01214 for (unsigned i = 0; i != NumValues; ++i) { 01215 SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), 01216 RetPtr.getValueType(), RetPtr, 01217 DAG.getIntPtrConstant(Offsets[i])); 01218 Chains[i] = 01219 DAG.getStore(Chain, getCurSDLoc(), 01220 SDValue(RetOp.getNode(), RetOp.getResNo() + i), 01221 // FIXME: better loc info would be nice. 01222 Add, MachinePointerInfo(), false, false, 0); 01223 } 01224 01225 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), 01226 MVT::Other, Chains); 01227 } else if (I.getNumOperands() != 0) { 01228 SmallVector<EVT, 4> ValueVTs; 01229 ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs); 01230 unsigned NumValues = ValueVTs.size(); 01231 if (NumValues) { 01232 SDValue RetOp = getValue(I.getOperand(0)); 01233 for (unsigned j = 0, f = NumValues; j != f; ++j) { 01234 EVT VT = ValueVTs[j]; 01235 01236 ISD::NodeType ExtendKind = ISD::ANY_EXTEND; 01237 01238 const Function *F = I.getParent()->getParent(); 01239 if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, 01240 Attribute::SExt)) 01241 ExtendKind = ISD::SIGN_EXTEND; 01242 else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, 01243 Attribute::ZExt)) 01244 ExtendKind = ISD::ZERO_EXTEND; 01245 01246 if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) 01247 VT = TLI->getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind); 01248 01249 unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), VT); 01250 MVT PartVT = TLI->getRegisterType(*DAG.getContext(), VT); 01251 SmallVector<SDValue, 4> Parts(NumParts); 01252 getCopyToParts(DAG, getCurSDLoc(), 01253 SDValue(RetOp.getNode(), RetOp.getResNo() + j), 01254 &Parts[0], NumParts, PartVT, &I, ExtendKind); 01255 01256 // 'inreg' on function refers to return value 01257 ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); 01258 if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, 01259 Attribute::InReg)) 01260 Flags.setInReg(); 01261 01262 // Propagate extension type if any 01263 if (ExtendKind == ISD::SIGN_EXTEND) 01264 Flags.setSExt(); 01265 else if (ExtendKind == ISD::ZERO_EXTEND) 01266 Flags.setZExt(); 01267 01268 for (unsigned i = 0; i < NumParts; ++i) { 01269 Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), 01270 VT, /*isfixed=*/true, 0, 0)); 01271 OutVals.push_back(Parts[i]); 01272 } 01273 } 01274 } 01275 } 01276 01277 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 01278 CallingConv::ID CallConv = 01279 DAG.getMachineFunction().getFunction()->getCallingConv(); 01280 Chain = TM.getSubtargetImpl()->getTargetLowering()->LowerReturn( 01281 Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); 01282 01283 // Verify that the target's LowerReturn behaved as expected. 01284 assert(Chain.getNode() && Chain.getValueType() == MVT::Other && 01285 "LowerReturn didn't return a valid chain!"); 01286 01287 // Update the DAG with the new chain value resulting from return lowering. 01288 DAG.setRoot(Chain); 01289 } 01290 01291 /// CopyToExportRegsIfNeeded - If the given value has virtual registers 01292 /// created for it, emit nodes to copy the value into the virtual 01293 /// registers. 01294 void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { 01295 // Skip empty types 01296 if (V->getType()->isEmptyTy()) 01297 return; 01298 01299 DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); 01300 if (VMI != FuncInfo.ValueMap.end()) { 01301 assert(!V->use_empty() && "Unused value assigned virtual registers!"); 01302 CopyValueToVirtualRegister(V, VMI->second); 01303 } 01304 } 01305 01306 /// ExportFromCurrentBlock - If this condition isn't known to be exported from 01307 /// the current basic block, add it to ValueMap now so that we'll get a 01308 /// CopyTo/FromReg. 01309 void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { 01310 // No need to export constants. 01311 if (!isa<Instruction>(V) && !isa<Argument>(V)) return; 01312 01313 // Already exported? 01314 if (FuncInfo.isExportedInst(V)) return; 01315 01316 unsigned Reg = FuncInfo.InitializeRegForValue(V); 01317 CopyValueToVirtualRegister(V, Reg); 01318 } 01319 01320 bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, 01321 const BasicBlock *FromBB) { 01322 // The operands of the setcc have to be in this block. We don't know 01323 // how to export them from some other block. 01324 if (const Instruction *VI = dyn_cast<Instruction>(V)) { 01325 // Can export from current BB. 01326 if (VI->getParent() == FromBB) 01327 return true; 01328 01329 // Is already exported, noop. 01330 return FuncInfo.isExportedInst(V); 01331 } 01332 01333 // If this is an argument, we can export it if the BB is the entry block or 01334 // if it is already exported. 01335 if (isa<Argument>(V)) { 01336 if (FromBB == &FromBB->getParent()->getEntryBlock()) 01337 return true; 01338 01339 // Otherwise, can only export this if it is already exported. 01340 return FuncInfo.isExportedInst(V); 01341 } 01342 01343 // Otherwise, constants can always be exported. 01344 return true; 01345 } 01346 01347 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks. 01348 uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src, 01349 const MachineBasicBlock *Dst) const { 01350 BranchProbabilityInfo *BPI = FuncInfo.BPI; 01351 if (!BPI) 01352 return 0; 01353 const BasicBlock *SrcBB = Src->getBasicBlock(); 01354 const BasicBlock *DstBB = Dst->getBasicBlock(); 01355 return BPI->getEdgeWeight(SrcBB, DstBB); 01356 } 01357 01358 void SelectionDAGBuilder:: 01359 addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, 01360 uint32_t Weight /* = 0 */) { 01361 if (!Weight) 01362 Weight = getEdgeWeight(Src, Dst); 01363 Src->addSuccessor(Dst, Weight); 01364 } 01365 01366 01367 static bool InBlock(const Value *V, const BasicBlock *BB) { 01368 if (const Instruction *I = dyn_cast<Instruction>(V)) 01369 return I->getParent() == BB; 01370 return true; 01371 } 01372 01373 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions. 01374 /// This function emits a branch and is used at the leaves of an OR or an 01375 /// AND operator tree. 01376 /// 01377 void 01378 SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, 01379 MachineBasicBlock *TBB, 01380 MachineBasicBlock *FBB, 01381 MachineBasicBlock *CurBB, 01382 MachineBasicBlock *SwitchBB, 01383 uint32_t TWeight, 01384 uint32_t FWeight) { 01385 const BasicBlock *BB = CurBB->getBasicBlock(); 01386 01387 // If the leaf of the tree is a comparison, merge the condition into 01388 // the caseblock. 01389 if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { 01390 // The operands of the cmp have to be in this block. We don't know 01391 // how to export them from some other block. If this is the first block 01392 // of the sequence, no exporting is needed. 01393 if (CurBB == SwitchBB || 01394 (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && 01395 isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { 01396 ISD::CondCode Condition; 01397 if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { 01398 Condition = getICmpCondCode(IC->getPredicate()); 01399 } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { 01400 Condition = getFCmpCondCode(FC->getPredicate()); 01401 if (TM.Options.NoNaNsFPMath) 01402 Condition = getFCmpCodeWithoutNaN(Condition); 01403 } else { 01404 Condition = ISD::SETEQ; // silence warning. 01405 llvm_unreachable("Unknown compare instruction"); 01406 } 01407 01408 CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, 01409 TBB, FBB, CurBB, TWeight, FWeight); 01410 SwitchCases.push_back(CB); 01411 return; 01412 } 01413 } 01414 01415 // Create a CaseBlock record representing this branch. 01416 CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), 01417 nullptr, TBB, FBB, CurBB, TWeight, FWeight); 01418 SwitchCases.push_back(CB); 01419 } 01420 01421 /// Scale down both weights to fit into uint32_t. 01422 static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { 01423 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; 01424 uint32_t Scale = (NewMax / UINT32_MAX) + 1; 01425 NewTrue = NewTrue / Scale; 01426 NewFalse = NewFalse / Scale; 01427 } 01428 01429 /// FindMergedConditions - If Cond is an expression like 01430 void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, 01431 MachineBasicBlock *TBB, 01432 MachineBasicBlock *FBB, 01433 MachineBasicBlock *CurBB, 01434 MachineBasicBlock *SwitchBB, 01435 unsigned Opc, uint32_t TWeight, 01436 uint32_t FWeight) { 01437 // If this node is not part of the or/and tree, emit it as a branch. 01438 const Instruction *BOp = dyn_cast<Instruction>(Cond); 01439 if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || 01440 (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || 01441 BOp->getParent() != CurBB->getBasicBlock() || 01442 !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || 01443 !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { 01444 EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, 01445 TWeight, FWeight); 01446 return; 01447 } 01448 01449 // Create TmpBB after CurBB. 01450 MachineFunction::iterator BBI = CurBB; 01451 MachineFunction &MF = DAG.getMachineFunction(); 01452 MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); 01453 CurBB->getParent()->insert(++BBI, TmpBB); 01454 01455 if (Opc == Instruction::Or) { 01456 // Codegen X | Y as: 01457 // BB1: 01458 // jmp_if_X TBB 01459 // jmp TmpBB 01460 // TmpBB: 01461 // jmp_if_Y TBB 01462 // jmp FBB 01463 // 01464 01465 // We have flexibility in setting Prob for BB1 and Prob for TmpBB. 01466 // The requirement is that 01467 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) 01468 // = TrueProb for orignal BB. 01469 // Assuming the orignal weights are A and B, one choice is to set BB1's 01470 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice 01471 // assumes that 01472 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. 01473 // Another choice is to assume TrueProb for BB1 equals to TrueProb for 01474 // TmpBB, but the math is more complicated. 01475 01476 uint64_t NewTrueWeight = TWeight; 01477 uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight; 01478 ScaleWeights(NewTrueWeight, NewFalseWeight); 01479 // Emit the LHS condition. 01480 FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, 01481 NewTrueWeight, NewFalseWeight); 01482 01483 NewTrueWeight = TWeight; 01484 NewFalseWeight = 2 * (uint64_t)FWeight; 01485 ScaleWeights(NewTrueWeight, NewFalseWeight); 01486 // Emit the RHS condition into TmpBB. 01487 FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, 01488 NewTrueWeight, NewFalseWeight); 01489 } else { 01490 assert(Opc == Instruction::And && "Unknown merge op!"); 01491 // Codegen X & Y as: 01492 // BB1: 01493 // jmp_if_X TmpBB 01494 // jmp FBB 01495 // TmpBB: 01496 // jmp_if_Y TBB 01497 // jmp FBB 01498 // 01499 // This requires creation of TmpBB after CurBB. 01500 01501 // We have flexibility in setting Prob for BB1 and Prob for TmpBB. 01502 // The requirement is that 01503 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) 01504 // = FalseProb for orignal BB. 01505 // Assuming the orignal weights are A and B, one choice is to set BB1's 01506 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice 01507 // assumes that 01508 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. 01509 01510 uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight; 01511 uint64_t NewFalseWeight = FWeight; 01512 ScaleWeights(NewTrueWeight, NewFalseWeight); 01513 // Emit the LHS condition. 01514 FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, 01515 NewTrueWeight, NewFalseWeight); 01516 01517 NewTrueWeight = 2 * (uint64_t)TWeight; 01518 NewFalseWeight = FWeight; 01519 ScaleWeights(NewTrueWeight, NewFalseWeight); 01520 // Emit the RHS condition into TmpBB. 01521 FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, 01522 NewTrueWeight, NewFalseWeight); 01523 } 01524 } 01525 01526 /// If the set of cases should be emitted as a series of branches, return true. 01527 /// If we should emit this as a bunch of and/or'd together conditions, return 01528 /// false. 01529 bool 01530 SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) { 01531 if (Cases.size() != 2) return true; 01532 01533 // If this is two comparisons of the same values or'd or and'd together, they 01534 // will get folded into a single comparison, so don't emit two blocks. 01535 if ((Cases[0].CmpLHS == Cases[1].CmpLHS && 01536 Cases[0].CmpRHS == Cases[1].CmpRHS) || 01537 (Cases[0].CmpRHS == Cases[1].CmpLHS && 01538 Cases[0].CmpLHS == Cases[1].CmpRHS)) { 01539 return false; 01540 } 01541 01542 // Handle: (X != null) | (Y != null) --> (X|Y) != 0 01543 // Handle: (X == null) & (Y == null) --> (X|Y) == 0 01544 if (Cases[0].CmpRHS == Cases[1].CmpRHS && 01545 Cases[0].CC == Cases[1].CC && 01546 isa<Constant>(Cases[0].CmpRHS) && 01547 cast<Constant>(Cases[0].CmpRHS)->isNullValue()) { 01548 if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB) 01549 return false; 01550 if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) 01551 return false; 01552 } 01553 01554 return true; 01555 } 01556 01557 void SelectionDAGBuilder::visitBr(const BranchInst &I) { 01558 MachineBasicBlock *BrMBB = FuncInfo.MBB; 01559 01560 // Update machine-CFG edges. 01561 MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; 01562 01563 // Figure out which block is immediately after the current one. 01564 MachineBasicBlock *NextBlock = nullptr; 01565 MachineFunction::iterator BBI = BrMBB; 01566 if (++BBI != FuncInfo.MF->end()) 01567 NextBlock = BBI; 01568 01569 if (I.isUnconditional()) { 01570 // Update machine-CFG edges. 01571 BrMBB->addSuccessor(Succ0MBB); 01572 01573 // If this is not a fall-through branch or optimizations are switched off, 01574 // emit the branch. 01575 if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None) 01576 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), 01577 MVT::Other, getControlRoot(), 01578 DAG.getBasicBlock(Succ0MBB))); 01579 01580 return; 01581 } 01582 01583 // If this condition is one of the special cases we handle, do special stuff 01584 // now. 01585 const Value *CondVal = I.getCondition(); 01586 MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; 01587 01588 // If this is a series of conditions that are or'd or and'd together, emit 01589 // this as a sequence of branches instead of setcc's with and/or operations. 01590 // As long as jumps are not expensive, this should improve performance. 01591 // For example, instead of something like: 01592 // cmp A, B 01593 // C = seteq 01594 // cmp D, E 01595 // F = setle 01596 // or C, F 01597 // jnz foo 01598 // Emit: 01599 // cmp A, B 01600 // je foo 01601 // cmp D, E 01602 // jle foo 01603 // 01604 if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { 01605 if (!TM.getSubtargetImpl()->getTargetLowering()->isJumpExpensive() && 01606 BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || 01607 BOp->getOpcode() == Instruction::Or)) { 01608 FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, 01609 BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), 01610 getEdgeWeight(BrMBB, Succ1MBB)); 01611 // If the compares in later blocks need to use values not currently 01612 // exported from this block, export them now. This block should always 01613 // be the first entry. 01614 assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); 01615 01616 // Allow some cases to be rejected. 01617 if (ShouldEmitAsBranches(SwitchCases)) { 01618 for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { 01619 ExportFromCurrentBlock(SwitchCases[i].CmpLHS); 01620 ExportFromCurrentBlock(SwitchCases[i].CmpRHS); 01621 } 01622 01623 // Emit the branch for this block. 01624 visitSwitchCase(SwitchCases[0], BrMBB); 01625 SwitchCases.erase(SwitchCases.begin()); 01626 return; 01627 } 01628 01629 // Okay, we decided not to do this, remove any inserted MBB's and clear 01630 // SwitchCases. 01631 for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) 01632 FuncInfo.MF->erase(SwitchCases[i].ThisBB); 01633 01634 SwitchCases.clear(); 01635 } 01636 } 01637 01638 // Create a CaseBlock record representing this branch. 01639 CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), 01640 nullptr, Succ0MBB, Succ1MBB, BrMBB); 01641 01642 // Use visitSwitchCase to actually insert the fast branch sequence for this 01643 // cond branch. 01644 visitSwitchCase(CB, BrMBB); 01645 } 01646 01647 /// visitSwitchCase - Emits the necessary code to represent a single node in 01648 /// the binary search tree resulting from lowering a switch instruction. 01649 void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, 01650 MachineBasicBlock *SwitchBB) { 01651 SDValue Cond; 01652 SDValue CondLHS = getValue(CB.CmpLHS); 01653 SDLoc dl = getCurSDLoc(); 01654 01655 // Build the setcc now. 01656 if (!CB.CmpMHS) { 01657 // Fold "(X == true)" to X and "(X == false)" to !X to 01658 // handle common cases produced by branch lowering. 01659 if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && 01660 CB.CC == ISD::SETEQ) 01661 Cond = CondLHS; 01662 else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && 01663 CB.CC == ISD::SETEQ) { 01664 SDValue True = DAG.getConstant(1, CondLHS.getValueType()); 01665 Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); 01666 } else 01667 Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); 01668 } else { 01669 assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); 01670 01671 const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); 01672 const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); 01673 01674 SDValue CmpOp = getValue(CB.CmpMHS); 01675 EVT VT = CmpOp.getValueType(); 01676 01677 if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { 01678 Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), 01679 ISD::SETLE); 01680 } else { 01681 SDValue SUB = DAG.getNode(ISD::SUB, dl, 01682 VT, CmpOp, DAG.getConstant(Low, VT)); 01683 Cond = DAG.getSetCC(dl, MVT::i1, SUB, 01684 DAG.getConstant(High-Low, VT), ISD::SETULE); 01685 } 01686 } 01687 01688 // Update successor info 01689 addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); 01690 // TrueBB and FalseBB are always different unless the incoming IR is 01691 // degenerate. This only happens when running llc on weird IR. 01692 if (CB.TrueBB != CB.FalseBB) 01693 addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); 01694 01695 // Set NextBlock to be the MBB immediately after the current one, if any. 01696 // This is used to avoid emitting unnecessary branches to the next block. 01697 MachineBasicBlock *NextBlock = nullptr; 01698 MachineFunction::iterator BBI = SwitchBB; 01699 if (++BBI != FuncInfo.MF->end()) 01700 NextBlock = BBI; 01701 01702 // If the lhs block is the next block, invert the condition so that we can 01703 // fall through to the lhs instead of the rhs block. 01704 if (CB.TrueBB == NextBlock) { 01705 std::swap(CB.TrueBB, CB.FalseBB); 01706 SDValue True = DAG.getConstant(1, Cond.getValueType()); 01707 Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); 01708 } 01709 01710 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, 01711 MVT::Other, getControlRoot(), Cond, 01712 DAG.getBasicBlock(CB.TrueBB)); 01713 01714 // Insert the false branch. Do this even if it's a fall through branch, 01715 // this makes it easier to do DAG optimizations which require inverting 01716 // the branch condition. 01717 BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, 01718 DAG.getBasicBlock(CB.FalseBB)); 01719 01720 DAG.setRoot(BrCond); 01721 } 01722 01723 /// visitJumpTable - Emit JumpTable node in the current MBB 01724 void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { 01725 // Emit the code for the jump table 01726 assert(JT.Reg != -1U && "Should lower JT Header first!"); 01727 EVT PTy = TM.getSubtargetImpl()->getTargetLowering()->getPointerTy(); 01728 SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), 01729 JT.Reg, PTy); 01730 SDValue Table = DAG.getJumpTable(JT.JTI, PTy); 01731 SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(), 01732 MVT::Other, Index.getValue(1), 01733 Table, Index); 01734 DAG.setRoot(BrJumpTable); 01735 } 01736 01737 /// visitJumpTableHeader - This function emits necessary code to produce index 01738 /// in the JumpTable from switch case. 01739 void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, 01740 JumpTableHeader &JTH, 01741 MachineBasicBlock *SwitchBB) { 01742 // Subtract the lowest switch case value from the value being switched on and 01743 // conditional branch to default mbb if the result is greater than the 01744 // difference between smallest and largest cases. 01745 SDValue SwitchOp = getValue(JTH.SValue); 01746 EVT VT = SwitchOp.getValueType(); 01747 SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, 01748 DAG.getConstant(JTH.First, VT)); 01749 01750 // The SDNode we just created, which holds the value being switched on minus 01751 // the smallest case value, needs to be copied to a virtual register so it 01752 // can be used as an index into the jump table in a subsequent basic block. 01753 // This value may be smaller or larger than the target's pointer type, and 01754 // therefore require extension or truncating. 01755 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 01756 SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI->getPointerTy()); 01757 01758 unsigned JumpTableReg = FuncInfo.CreateReg(TLI->getPointerTy()); 01759 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), 01760 JumpTableReg, SwitchOp); 01761 JT.Reg = JumpTableReg; 01762 01763 // Emit the range check for the jump table, and branch to the default block 01764 // for the switch statement if the value being switched on exceeds the largest 01765 // case in the switch. 01766 SDValue CMP = DAG.getSetCC(getCurSDLoc(), 01767 TLI->getSetCCResultType(*DAG.getContext(), 01768 Sub.getValueType()), 01769 Sub, 01770 DAG.getConstant(JTH.Last - JTH.First,VT), 01771 ISD::SETUGT); 01772 01773 // Set NextBlock to be the MBB immediately after the current one, if any. 01774 // This is used to avoid emitting unnecessary branches to the next block. 01775 MachineBasicBlock *NextBlock = nullptr; 01776 MachineFunction::iterator BBI = SwitchBB; 01777 01778 if (++BBI != FuncInfo.MF->end()) 01779 NextBlock = BBI; 01780 01781 SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), 01782 MVT::Other, CopyTo, CMP, 01783 DAG.getBasicBlock(JT.Default)); 01784 01785 if (JT.MBB != NextBlock) 01786 BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond, 01787 DAG.getBasicBlock(JT.MBB)); 01788 01789 DAG.setRoot(BrCond); 01790 } 01791 01792 /// Codegen a new tail for a stack protector check ParentMBB which has had its 01793 /// tail spliced into a stack protector check success bb. 01794 /// 01795 /// For a high level explanation of how this fits into the stack protector 01796 /// generation see the comment on the declaration of class 01797 /// StackProtectorDescriptor. 01798 void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, 01799 MachineBasicBlock *ParentBB) { 01800 01801 // First create the loads to the guard/stack slot for the comparison. 01802 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 01803 EVT PtrTy = TLI->getPointerTy(); 01804 01805 MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); 01806 int FI = MFI->getStackProtectorIndex(); 01807 01808 const Value *IRGuard = SPD.getGuard(); 01809 SDValue GuardPtr = getValue(IRGuard); 01810 SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); 01811 01812 unsigned Align = 01813 TLI->getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); 01814 01815 SDValue Guard; 01816 01817 // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the 01818 // guard value from the virtual register holding the value. Otherwise, emit a 01819 // volatile load to retrieve the stack guard value. 01820 unsigned GuardReg = SPD.getGuardReg(); 01821 01822 if (GuardReg && TLI->useLoadStackGuardNode()) 01823 Guard = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), GuardReg, 01824 PtrTy); 01825 else 01826 Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), 01827 GuardPtr, MachinePointerInfo(IRGuard, 0), 01828 true, false, false, Align); 01829 01830 SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), 01831 StackSlotPtr, 01832 MachinePointerInfo::getFixedStack(FI), 01833 true, false, false, Align); 01834 01835 // Perform the comparison via a subtract/getsetcc. 01836 EVT VT = Guard.getValueType(); 01837 SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); 01838 01839 SDValue Cmp = DAG.getSetCC(getCurSDLoc(), 01840 TLI->getSetCCResultType(*DAG.getContext(), 01841 Sub.getValueType()), 01842 Sub, DAG.getConstant(0, VT), 01843 ISD::SETNE); 01844 01845 // If the sub is not 0, then we know the guard/stackslot do not equal, so 01846 // branch to failure MBB. 01847 SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), 01848 MVT::Other, StackSlot.getOperand(0), 01849 Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); 01850 // Otherwise branch to success MBB. 01851 SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(), 01852 MVT::Other, BrCond, 01853 DAG.getBasicBlock(SPD.getSuccessMBB())); 01854 01855 DAG.setRoot(Br); 01856 } 01857 01858 /// Codegen the failure basic block for a stack protector check. 01859 /// 01860 /// A failure stack protector machine basic block consists simply of a call to 01861 /// __stack_chk_fail(). 01862 /// 01863 /// For a high level explanation of how this fits into the stack protector 01864 /// generation see the comment on the declaration of class 01865 /// StackProtectorDescriptor. 01866 void 01867 SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { 01868 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 01869 SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, 01870 MVT::isVoid, nullptr, 0, false, 01871 getCurSDLoc(), false, false).second; 01872 DAG.setRoot(Chain); 01873 } 01874 01875 /// visitBitTestHeader - This function emits necessary code to produce value 01876 /// suitable for "bit tests" 01877 void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, 01878 MachineBasicBlock *SwitchBB) { 01879 // Subtract the minimum value 01880 SDValue SwitchOp = getValue(B.SValue); 01881 EVT VT = SwitchOp.getValueType(); 01882 SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, 01883 DAG.getConstant(B.First, VT)); 01884 01885 // Check range 01886 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 01887 SDValue RangeCmp = DAG.getSetCC(getCurSDLoc(), 01888 TLI->getSetCCResultType(*DAG.getContext(), 01889 Sub.getValueType()), 01890 Sub, DAG.getConstant(B.Range, VT), 01891 ISD::SETUGT); 01892 01893 // Determine the type of the test operands. 01894 bool UsePtrType = false; 01895 if (!TLI->isTypeLegal(VT)) 01896 UsePtrType = true; 01897 else { 01898 for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) 01899 if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) { 01900 // Switch table case range are encoded into series of masks. 01901 // Just use pointer type, it's guaranteed to fit. 01902 UsePtrType = true; 01903 break; 01904 } 01905 } 01906 if (UsePtrType) { 01907 VT = TLI->getPointerTy(); 01908 Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT); 01909 } 01910 01911 B.RegVT = VT.getSimpleVT(); 01912 B.Reg = FuncInfo.CreateReg(B.RegVT); 01913 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), 01914 B.Reg, Sub); 01915 01916 // Set NextBlock to be the MBB immediately after the current one, if any. 01917 // This is used to avoid emitting unnecessary branches to the next block. 01918 MachineBasicBlock *NextBlock = nullptr; 01919 MachineFunction::iterator BBI = SwitchBB; 01920 if (++BBI != FuncInfo.MF->end()) 01921 NextBlock = BBI; 01922 01923 MachineBasicBlock* MBB = B.Cases[0].ThisBB; 01924 01925 addSuccessorWithWeight(SwitchBB, B.Default); 01926 addSuccessorWithWeight(SwitchBB, MBB); 01927 01928 SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurSDLoc(), 01929 MVT::Other, CopyTo, RangeCmp, 01930 DAG.getBasicBlock(B.Default)); 01931 01932 if (MBB != NextBlock) 01933 BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo, 01934 DAG.getBasicBlock(MBB)); 01935 01936 DAG.setRoot(BrRange); 01937 } 01938 01939 /// visitBitTestCase - this function produces one "bit test" 01940 void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, 01941 MachineBasicBlock* NextMBB, 01942 uint32_t BranchWeightToNext, 01943 unsigned Reg, 01944 BitTestCase &B, 01945 MachineBasicBlock *SwitchBB) { 01946 MVT VT = BB.RegVT; 01947 SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), 01948 Reg, VT); 01949 SDValue Cmp; 01950 unsigned PopCount = CountPopulation_64(B.Mask); 01951 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 01952 if (PopCount == 1) { 01953 // Testing for a single bit; just compare the shift count with what it 01954 // would need to be to shift a 1 bit in that position. 01955 Cmp = DAG.getSetCC(getCurSDLoc(), 01956 TLI->getSetCCResultType(*DAG.getContext(), VT), 01957 ShiftOp, 01958 DAG.getConstant(countTrailingZeros(B.Mask), VT), 01959 ISD::SETEQ); 01960 } else if (PopCount == BB.Range) { 01961 // There is only one zero bit in the range, test for it directly. 01962 Cmp = DAG.getSetCC(getCurSDLoc(), 01963 TLI->getSetCCResultType(*DAG.getContext(), VT), 01964 ShiftOp, 01965 DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), 01966 ISD::SETNE); 01967 } else { 01968 // Make desired shift 01969 SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT, 01970 DAG.getConstant(1, VT), ShiftOp); 01971 01972 // Emit bit tests and jumps 01973 SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(), 01974 VT, SwitchVal, DAG.getConstant(B.Mask, VT)); 01975 Cmp = DAG.getSetCC(getCurSDLoc(), 01976 TLI->getSetCCResultType(*DAG.getContext(), VT), 01977 AndOp, DAG.getConstant(0, VT), 01978 ISD::SETNE); 01979 } 01980 01981 // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. 01982 addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight); 01983 // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. 01984 addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); 01985 01986 SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurSDLoc(), 01987 MVT::Other, getControlRoot(), 01988 Cmp, DAG.getBasicBlock(B.TargetBB)); 01989 01990 // Set NextBlock to be the MBB immediately after the current one, if any. 01991 // This is used to avoid emitting unnecessary branches to the next block. 01992 MachineBasicBlock *NextBlock = nullptr; 01993 MachineFunction::iterator BBI = SwitchBB; 01994 if (++BBI != FuncInfo.MF->end()) 01995 NextBlock = BBI; 01996 01997 if (NextMBB != NextBlock) 01998 BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd, 01999 DAG.getBasicBlock(NextMBB)); 02000 02001 DAG.setRoot(BrAnd); 02002 } 02003 02004 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { 02005 MachineBasicBlock *InvokeMBB = FuncInfo.MBB; 02006 02007 // Retrieve successors. 02008 MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; 02009 MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; 02010 02011 const Value *Callee(I.getCalledValue()); 02012 const Function *Fn = dyn_cast<Function>(Callee); 02013 if (isa<InlineAsm>(Callee)) 02014 visitInlineAsm(&I); 02015 else if (Fn && Fn->isIntrinsic()) { 02016 assert(Fn->getIntrinsicID() == Intrinsic::donothing); 02017 // Ignore invokes to @llvm.donothing: jump directly to the next BB. 02018 } else 02019 LowerCallTo(&I, getValue(Callee), false, LandingPad); 02020 02021 // If the value of the invoke is used outside of its defining block, make it 02022 // available as a virtual register. 02023 CopyToExportRegsIfNeeded(&I); 02024 02025 // Update successor info 02026 addSuccessorWithWeight(InvokeMBB, Return); 02027 addSuccessorWithWeight(InvokeMBB, LandingPad); 02028 02029 // Drop into normal successor. 02030 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), 02031 MVT::Other, getControlRoot(), 02032 DAG.getBasicBlock(Return))); 02033 } 02034 02035 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { 02036 llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); 02037 } 02038 02039 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { 02040 assert(FuncInfo.MBB->isLandingPad() && 02041 "Call to landingpad not in landing pad!"); 02042 02043 MachineBasicBlock *MBB = FuncInfo.MBB; 02044 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 02045 AddLandingPadInfo(LP, MMI, MBB); 02046 02047 // If there aren't registers to copy the values into (e.g., during SjLj 02048 // exceptions), then don't bother to create these DAG nodes. 02049 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 02050 if (TLI->getExceptionPointerRegister() == 0 && 02051 TLI->getExceptionSelectorRegister() == 0) 02052 return; 02053 02054 SmallVector<EVT, 2> ValueVTs; 02055 ComputeValueVTs(*TLI, LP.getType(), ValueVTs); 02056 assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); 02057 02058 // Get the two live-in registers as SDValues. The physregs have already been 02059 // copied into virtual registers. 02060 SDValue Ops[2]; 02061 Ops[0] = DAG.getZExtOrTrunc( 02062 DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), 02063 FuncInfo.ExceptionPointerVirtReg, TLI->getPointerTy()), 02064 getCurSDLoc(), ValueVTs[0]); 02065 Ops[1] = DAG.getZExtOrTrunc( 02066 DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), 02067 FuncInfo.ExceptionSelectorVirtReg, TLI->getPointerTy()), 02068 getCurSDLoc(), ValueVTs[1]); 02069 02070 // Merge into one. 02071 SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 02072 DAG.getVTList(ValueVTs), Ops); 02073 setValue(&LP, Res); 02074 } 02075 02076 /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for 02077 /// small case ranges). 02078 bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, 02079 CaseRecVector& WorkList, 02080 const Value* SV, 02081 MachineBasicBlock *Default, 02082 MachineBasicBlock *SwitchBB) { 02083 // Size is the number of Cases represented by this range. 02084 size_t Size = CR.Range.second - CR.Range.first; 02085 if (Size > 3) 02086 return false; 02087 02088 // Get the MachineFunction which holds the current MBB. This is used when 02089 // inserting any additional MBBs necessary to represent the switch. 02090 MachineFunction *CurMF = FuncInfo.MF; 02091 02092 // Figure out which block is immediately after the current one. 02093 MachineBasicBlock *NextBlock = nullptr; 02094 MachineFunction::iterator BBI = CR.CaseBB; 02095 02096 if (++BBI != FuncInfo.MF->end()) 02097 NextBlock = BBI; 02098 02099 BranchProbabilityInfo *BPI = FuncInfo.BPI; 02100 // If any two of the cases has the same destination, and if one value 02101 // is the same as the other, but has one bit unset that the other has set, 02102 // use bit manipulation to do two compares at once. For example: 02103 // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" 02104 // TODO: This could be extended to merge any 2 cases in switches with 3 cases. 02105 // TODO: Handle cases where CR.CaseBB != SwitchBB. 02106 if (Size == 2 && CR.CaseBB == SwitchBB) { 02107 Case &Small = *CR.Range.first; 02108 Case &Big = *(CR.Range.second-1); 02109 02110 if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) { 02111 const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue(); 02112 const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue(); 02113 02114 // Check that there is only one bit different. 02115 if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && 02116 (SmallValue | BigValue) == BigValue) { 02117 // Isolate the common bit. 02118 APInt CommonBit = BigValue & ~SmallValue; 02119 assert((SmallValue | CommonBit) == BigValue && 02120 CommonBit.countPopulation() == 1 && "Not a common bit?"); 02121 02122 SDValue CondLHS = getValue(SV); 02123 EVT VT = CondLHS.getValueType(); 02124 SDLoc DL = getCurSDLoc(); 02125 02126 SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, 02127 DAG.getConstant(CommonBit, VT)); 02128 SDValue Cond = DAG.getSetCC(DL, MVT::i1, 02129 Or, DAG.getConstant(BigValue, VT), 02130 ISD::SETEQ); 02131 02132 // Update successor info. 02133 // Both Small and Big will jump to Small.BB, so we sum up the weights. 02134 addSuccessorWithWeight(SwitchBB, Small.BB, 02135 Small.ExtraWeight + Big.ExtraWeight); 02136 addSuccessorWithWeight(SwitchBB, Default, 02137 // The default destination is the first successor in IR. 02138 BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0); 02139 02140 // Insert the true branch. 02141 SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, 02142 getControlRoot(), Cond, 02143 DAG.getBasicBlock(Small.BB)); 02144 02145 // Insert the false branch. 02146 BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, 02147 DAG.getBasicBlock(Default)); 02148 02149 DAG.setRoot(BrCond); 02150 return true; 02151 } 02152 } 02153 } 02154 02155 // Order cases by weight so the most likely case will be checked first. 02156 uint32_t UnhandledWeights = 0; 02157 if (BPI) { 02158 for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) { 02159 uint32_t IWeight = I->ExtraWeight; 02160 UnhandledWeights += IWeight; 02161 for (CaseItr J = CR.Range.first; J < I; ++J) { 02162 uint32_t JWeight = J->ExtraWeight; 02163 if (IWeight > JWeight) 02164 std::swap(*I, *J); 02165 } 02166 } 02167 } 02168 // Rearrange the case blocks so that the last one falls through if possible. 02169 Case &BackCase = *(CR.Range.second-1); 02170 if (Size > 1 && 02171 NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { 02172 // The last case block won't fall through into 'NextBlock' if we emit the 02173 // branches in this order. See if rearranging a case value would help. 02174 // We start at the bottom as it's the case with the least weight. 02175 for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I) 02176 if (I->BB == NextBlock) { 02177 std::swap(*I, BackCase); 02178 break; 02179 } 02180 } 02181 02182 // Create a CaseBlock record representing a conditional branch to 02183 // the Case's target mbb if the value being switched on SV is equal 02184 // to C. 02185 MachineBasicBlock *CurBlock = CR.CaseBB; 02186 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { 02187 MachineBasicBlock *FallThrough; 02188 if (I != E-1) { 02189 FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock()); 02190 CurMF->insert(BBI, FallThrough); 02191 02192 // Put SV in a virtual register to make it available from the new blocks. 02193 ExportFromCurrentBlock(SV); 02194 } else { 02195 // If the last case doesn't match, go to the default block. 02196 FallThrough = Default; 02197 } 02198 02199 const Value *RHS, *LHS, *MHS; 02200 ISD::CondCode CC; 02201 if (I->High == I->Low) { 02202 // This is just small small case range :) containing exactly 1 case 02203 CC = ISD::SETEQ; 02204 LHS = SV; RHS = I->High; MHS = nullptr; 02205 } else { 02206 CC = ISD::SETLE; 02207 LHS = I->Low; MHS = SV; RHS = I->High; 02208 } 02209 02210 // The false weight should be sum of all un-handled cases. 02211 UnhandledWeights -= I->ExtraWeight; 02212 CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough, 02213 /* me */ CurBlock, 02214 /* trueweight */ I->ExtraWeight, 02215 /* falseweight */ UnhandledWeights); 02216 02217 // If emitting the first comparison, just call visitSwitchCase to emit the 02218 // code into the current block. Otherwise, push the CaseBlock onto the 02219 // vector to be later processed by SDISel, and insert the node's MBB 02220 // before the next MBB. 02221 if (CurBlock == SwitchBB) 02222 visitSwitchCase(CB, SwitchBB); 02223 else 02224 SwitchCases.push_back(CB); 02225 02226 CurBlock = FallThrough; 02227 } 02228 02229 return true; 02230 } 02231 02232 static inline bool areJTsAllowed(const TargetLowering &TLI) { 02233 return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || 02234 TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); 02235 } 02236 02237 static APInt ComputeRange(const APInt &First, const APInt &Last) { 02238 uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; 02239 APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth); 02240 return (LastExt - FirstExt + 1ULL); 02241 } 02242 02243 /// handleJTSwitchCase - Emit jumptable for current switch case range 02244 bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, 02245 CaseRecVector &WorkList, 02246 const Value *SV, 02247 MachineBasicBlock *Default, 02248 MachineBasicBlock *SwitchBB) { 02249 Case& FrontCase = *CR.Range.first; 02250 Case& BackCase = *(CR.Range.second-1); 02251 02252 const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); 02253 const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); 02254 02255 APInt TSize(First.getBitWidth(), 0); 02256 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) 02257 TSize += I->size(); 02258 02259 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 02260 if (!areJTsAllowed(*TLI) || TSize.ult(TLI->getMinimumJumpTableEntries())) 02261 return false; 02262 02263 APInt Range = ComputeRange(First, Last); 02264 // The density is TSize / Range. Require at least 40%. 02265 // It should not be possible for IntTSize to saturate for sane code, but make 02266 // sure we handle Range saturation correctly. 02267 uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10); 02268 uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10); 02269 if (IntTSize * 10 < IntRange * 4) 02270 return false; 02271 02272 DEBUG(dbgs() << "Lowering jump table\n" 02273 << "First entry: " << First << ". Last entry: " << Last << '\n' 02274 << "Range: " << Range << ". Size: " << TSize << ".\n\n"); 02275 02276 // Get the MachineFunction which holds the current MBB. This is used when 02277 // inserting any additional MBBs necessary to represent the switch. 02278 MachineFunction *CurMF = FuncInfo.MF; 02279 02280 // Figure out which block is immediately after the current one. 02281 MachineFunction::iterator BBI = CR.CaseBB; 02282 ++BBI; 02283 02284 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); 02285 02286 // Create a new basic block to hold the code for loading the address 02287 // of the jump table, and jumping to it. Update successor information; 02288 // we will either branch to the default case for the switch, or the jump 02289 // table. 02290 MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); 02291 CurMF->insert(BBI, JumpTableBB); 02292 02293 addSuccessorWithWeight(CR.CaseBB, Default); 02294 addSuccessorWithWeight(CR.CaseBB, JumpTableBB); 02295 02296 // Build a vector of destination BBs, corresponding to each target 02297 // of the jump table. If the value of the jump table slot corresponds to 02298 // a case statement, push the case's BB onto the vector, otherwise, push 02299 // the default BB. 02300 std::vector<MachineBasicBlock*> DestBBs; 02301 APInt TEI = First; 02302 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { 02303 const APInt &Low = cast<ConstantInt>(I->Low)->getValue(); 02304 const APInt &High = cast<ConstantInt>(I->High)->getValue(); 02305 02306 if (Low.sle(TEI) && TEI.sle(High)) { 02307 DestBBs.push_back(I->BB); 02308 if (TEI==High) 02309 ++I; 02310 } else { 02311 DestBBs.push_back(Default); 02312 } 02313 } 02314 02315 // Calculate weight for each unique destination in CR. 02316 DenseMap<MachineBasicBlock*, uint32_t> DestWeights; 02317 if (FuncInfo.BPI) 02318 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { 02319 DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = 02320 DestWeights.find(I->BB); 02321 if (Itr != DestWeights.end()) 02322 Itr->second += I->ExtraWeight; 02323 else 02324 DestWeights[I->BB] = I->ExtraWeight; 02325 } 02326 02327 // Update successor info. Add one edge to each unique successor. 02328 BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); 02329 for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), 02330 E = DestBBs.end(); I != E; ++I) { 02331 if (!SuccsHandled[(*I)->getNumber()]) { 02332 SuccsHandled[(*I)->getNumber()] = true; 02333 DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = 02334 DestWeights.find(*I); 02335 addSuccessorWithWeight(JumpTableBB, *I, 02336 Itr != DestWeights.end() ? Itr->second : 0); 02337 } 02338 } 02339 02340 // Create a jump table index for this jump table. 02341 unsigned JTEncoding = TLI->getJumpTableEncoding(); 02342 unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) 02343 ->createJumpTableIndex(DestBBs); 02344 02345 // Set the jump table information so that we can codegen it as a second 02346 // MachineBasicBlock 02347 JumpTable JT(-1U, JTI, JumpTableBB, Default); 02348 JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB)); 02349 if (CR.CaseBB == SwitchBB) 02350 visitJumpTableHeader(JT, JTH, SwitchBB); 02351 02352 JTCases.push_back(JumpTableBlock(JTH, JT)); 02353 return true; 02354 } 02355 02356 /// handleBTSplitSwitchCase - emit comparison and split binary search tree into 02357 /// 2 subtrees. 02358 bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, 02359 CaseRecVector& WorkList, 02360 const Value* SV, 02361 MachineBasicBlock* Default, 02362 MachineBasicBlock* SwitchBB) { 02363 // Get the MachineFunction which holds the current MBB. This is used when 02364 // inserting any additional MBBs necessary to represent the switch. 02365 MachineFunction *CurMF = FuncInfo.MF; 02366 02367 // Figure out which block is immediately after the current one. 02368 MachineFunction::iterator BBI = CR.CaseBB; 02369 ++BBI; 02370 02371 Case& FrontCase = *CR.Range.first; 02372 Case& BackCase = *(CR.Range.second-1); 02373 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); 02374 02375 // Size is the number of Cases represented by this range. 02376 unsigned Size = CR.Range.second - CR.Range.first; 02377 02378 const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); 02379 const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); 02380 double FMetric = 0; 02381 CaseItr Pivot = CR.Range.first + Size/2; 02382 02383 // Select optimal pivot, maximizing sum density of LHS and RHS. This will 02384 // (heuristically) allow us to emit JumpTable's later. 02385 APInt TSize(First.getBitWidth(), 0); 02386 for (CaseItr I = CR.Range.first, E = CR.Range.second; 02387 I!=E; ++I) 02388 TSize += I->size(); 02389 02390 APInt LSize = FrontCase.size(); 02391 APInt RSize = TSize-LSize; 02392 DEBUG(dbgs() << "Selecting best pivot: \n" 02393 << "First: " << First << ", Last: " << Last <<'\n' 02394 << "LSize: " << LSize << ", RSize: " << RSize << '\n'); 02395 for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; 02396 J!=E; ++I, ++J) { 02397 const APInt &LEnd = cast<ConstantInt>(I->High)->getValue(); 02398 const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue(); 02399 APInt Range = ComputeRange(LEnd, RBegin); 02400 assert((Range - 2ULL).isNonNegative() && 02401 "Invalid case distance"); 02402 // Use volatile double here to avoid excess precision issues on some hosts, 02403 // e.g. that use 80-bit X87 registers. 02404 volatile double LDensity = 02405 (double)LSize.roundToDouble() / 02406 (LEnd - First + 1ULL).roundToDouble(); 02407 volatile double RDensity = 02408 (double)RSize.roundToDouble() / 02409 (Last - RBegin + 1ULL).roundToDouble(); 02410 volatile double Metric = Range.logBase2()*(LDensity+RDensity); 02411 // Should always split in some non-trivial place 02412 DEBUG(dbgs() <<"=>Step\n" 02413 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' 02414 << "LDensity: " << LDensity 02415 << ", RDensity: " << RDensity << '\n' 02416 << "Metric: " << Metric << '\n'); 02417 if (FMetric < Metric) { 02418 Pivot = J; 02419 FMetric = Metric; 02420 DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n'); 02421 } 02422 02423 LSize += J->size(); 02424 RSize -= J->size(); 02425 } 02426 02427 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 02428 if (areJTsAllowed(*TLI)) { 02429 // If our case is dense we *really* should handle it earlier! 02430 assert((FMetric > 0) && "Should handle dense range earlier!"); 02431 } else { 02432 Pivot = CR.Range.first + Size/2; 02433 } 02434 02435 CaseRange LHSR(CR.Range.first, Pivot); 02436 CaseRange RHSR(Pivot, CR.Range.second); 02437 const Constant *C = Pivot->Low; 02438 MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr; 02439 02440 // We know that we branch to the LHS if the Value being switched on is 02441 // less than the Pivot value, C. We use this to optimize our binary 02442 // tree a bit, by recognizing that if SV is greater than or equal to the 02443 // LHS's Case Value, and that Case Value is exactly one less than the 02444 // Pivot's Value, then we can branch directly to the LHS's Target, 02445 // rather than creating a leaf node for it. 02446 if ((LHSR.second - LHSR.first) == 1 && 02447 LHSR.first->High == CR.GE && 02448 cast<ConstantInt>(C)->getValue() == 02449 (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) { 02450 TrueBB = LHSR.first->BB; 02451 } else { 02452 TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); 02453 CurMF->insert(BBI, TrueBB); 02454 WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); 02455 02456 // Put SV in a virtual register to make it available from the new blocks. 02457 ExportFromCurrentBlock(SV); 02458 } 02459 02460 // Similar to the optimization above, if the Value being switched on is 02461 // known to be less than the Constant CR.LT, and the current Case Value 02462 // is CR.LT - 1, then we can branch directly to the target block for 02463 // the current Case Value, rather than emitting a RHS leaf node for it. 02464 if ((RHSR.second - RHSR.first) == 1 && CR.LT && 02465 cast<ConstantInt>(RHSR.first->Low)->getValue() == 02466 (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) { 02467 FalseBB = RHSR.first->BB; 02468 } else { 02469 FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); 02470 CurMF->insert(BBI, FalseBB); 02471 WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR)); 02472 02473 // Put SV in a virtual register to make it available from the new blocks. 02474 ExportFromCurrentBlock(SV); 02475 } 02476 02477 // Create a CaseBlock record representing a conditional branch to 02478 // the LHS node if the value being switched on SV is less than C. 02479 // Otherwise, branch to LHS. 02480 CaseBlock CB(ISD::SETLT, SV, C, nullptr, TrueBB, FalseBB, CR.CaseBB); 02481 02482 if (CR.CaseBB == SwitchBB) 02483 visitSwitchCase(CB, SwitchBB); 02484 else 02485 SwitchCases.push_back(CB); 02486 02487 return true; 02488 } 02489 02490 /// handleBitTestsSwitchCase - if current case range has few destination and 02491 /// range span less, than machine word bitwidth, encode case range into series 02492 /// of masks and emit bit tests with these masks. 02493 bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, 02494 CaseRecVector& WorkList, 02495 const Value* SV, 02496 MachineBasicBlock* Default, 02497 MachineBasicBlock* SwitchBB) { 02498 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 02499 EVT PTy = TLI->getPointerTy(); 02500 unsigned IntPtrBits = PTy.getSizeInBits(); 02501 02502 Case& FrontCase = *CR.Range.first; 02503 Case& BackCase = *(CR.Range.second-1); 02504 02505 // Get the MachineFunction which holds the current MBB. This is used when 02506 // inserting any additional MBBs necessary to represent the switch. 02507 MachineFunction *CurMF = FuncInfo.MF; 02508 02509 // If target does not have legal shift left, do not emit bit tests at all. 02510 if (!TLI->isOperationLegal(ISD::SHL, PTy)) 02511 return false; 02512 02513 size_t numCmps = 0; 02514 for (CaseItr I = CR.Range.first, E = CR.Range.second; 02515 I!=E; ++I) { 02516 // Single case counts one, case range - two. 02517 numCmps += (I->Low == I->High ? 1 : 2); 02518 } 02519 02520 // Count unique destinations 02521 SmallSet<MachineBasicBlock*, 4> Dests; 02522 for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { 02523 Dests.insert(I->BB); 02524 if (Dests.size() > 3) 02525 // Don't bother the code below, if there are too much unique destinations 02526 return false; 02527 } 02528 DEBUG(dbgs() << "Total number of unique destinations: " 02529 << Dests.size() << '\n' 02530 << "Total number of comparisons: " << numCmps << '\n'); 02531 02532 // Compute span of values. 02533 const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue(); 02534 const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue(); 02535 APInt cmpRange = maxValue - minValue; 02536 02537 DEBUG(dbgs() << "Compare range: " << cmpRange << '\n' 02538 << "Low bound: " << minValue << '\n' 02539 << "High bound: " << maxValue << '\n'); 02540 02541 if (cmpRange.uge(IntPtrBits) || 02542 (!(Dests.size() == 1 && numCmps >= 3) && 02543 !(Dests.size() == 2 && numCmps >= 5) && 02544 !(Dests.size() >= 3 && numCmps >= 6))) 02545 return false; 02546 02547 DEBUG(dbgs() << "Emitting bit tests\n"); 02548 APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); 02549 02550 // Optimize the case where all the case values fit in a 02551 // word without having to subtract minValue. In this case, 02552 // we can optimize away the subtraction. 02553 if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { 02554 cmpRange = maxValue; 02555 } else { 02556 lowBound = minValue; 02557 } 02558 02559 CaseBitsVector CasesBits; 02560 unsigned i, count = 0; 02561 02562 for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { 02563 MachineBasicBlock* Dest = I->BB; 02564 for (i = 0; i < count; ++i) 02565 if (Dest == CasesBits[i].BB) 02566 break; 02567 02568 if (i == count) { 02569 assert((count < 3) && "Too much destinations to test!"); 02570 CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/)); 02571 count++; 02572 } 02573 02574 const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue(); 02575 const APInt& highValue = cast<ConstantInt>(I->High)->getValue(); 02576 02577 uint64_t lo = (lowValue - lowBound).getZExtValue(); 02578 uint64_t hi = (highValue - lowBound).getZExtValue(); 02579 CasesBits[i].ExtraWeight += I->ExtraWeight; 02580 02581 for (uint64_t j = lo; j <= hi; j++) { 02582 CasesBits[i].Mask |= 1ULL << j; 02583 CasesBits[i].Bits++; 02584 } 02585 02586 } 02587 std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); 02588 02589 BitTestInfo BTC; 02590 02591 // Figure out which block is immediately after the current one. 02592 MachineFunction::iterator BBI = CR.CaseBB; 02593 ++BBI; 02594 02595 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); 02596 02597 DEBUG(dbgs() << "Cases:\n"); 02598 for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { 02599 DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask 02600 << ", Bits: " << CasesBits[i].Bits 02601 << ", BB: " << CasesBits[i].BB << '\n'); 02602 02603 MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB); 02604 CurMF->insert(BBI, CaseBB); 02605 BTC.push_back(BitTestCase(CasesBits[i].Mask, 02606 CaseBB, 02607 CasesBits[i].BB, CasesBits[i].ExtraWeight)); 02608 02609 // Put SV in a virtual register to make it available from the new blocks. 02610 ExportFromCurrentBlock(SV); 02611 } 02612 02613 BitTestBlock BTB(lowBound, cmpRange, SV, 02614 -1U, MVT::Other, (CR.CaseBB == SwitchBB), 02615 CR.CaseBB, Default, BTC); 02616 02617 if (CR.CaseBB == SwitchBB) 02618 visitBitTestHeader(BTB, SwitchBB); 02619 02620 BitTestCases.push_back(BTB); 02621 02622 return true; 02623 } 02624 02625 /// Clusterify - Transform simple list of Cases into list of CaseRange's 02626 size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, 02627 const SwitchInst& SI) { 02628 size_t numCmps = 0; 02629 02630 BranchProbabilityInfo *BPI = FuncInfo.BPI; 02631 // Start with "simple" cases 02632 for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); 02633 i != e; ++i) { 02634 const BasicBlock *SuccBB = i.getCaseSuccessor(); 02635 MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; 02636 02637 uint32_t ExtraWeight = 02638 BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0; 02639 02640 Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), 02641 SMBB, ExtraWeight)); 02642 } 02643 std::sort(Cases.begin(), Cases.end(), CaseCmp()); 02644 02645 // Merge case into clusters 02646 if (Cases.size() >= 2) 02647 // Must recompute end() each iteration because it may be 02648 // invalidated by erase if we hold on to it 02649 for (CaseItr I = Cases.begin(), J = std::next(Cases.begin()); 02650 J != Cases.end(); ) { 02651 const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); 02652 const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); 02653 MachineBasicBlock* nextBB = J->BB; 02654 MachineBasicBlock* currentBB = I->BB; 02655 02656 // If the two neighboring cases go to the same destination, merge them 02657 // into a single case. 02658 if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { 02659 I->High = J->High; 02660 I->ExtraWeight += J->ExtraWeight; 02661 J = Cases.erase(J); 02662 } else { 02663 I = J++; 02664 } 02665 } 02666 02667 for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { 02668 if (I->Low != I->High) 02669 // A range counts double, since it requires two compares. 02670 ++numCmps; 02671 } 02672 02673 return numCmps; 02674 } 02675 02676 void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, 02677 MachineBasicBlock *Last) { 02678 // Update JTCases. 02679 for (unsigned i = 0, e = JTCases.size(); i != e; ++i) 02680 if (JTCases[i].first.HeaderBB == First) 02681 JTCases[i].first.HeaderBB = Last; 02682 02683 // Update BitTestCases. 02684 for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) 02685 if (BitTestCases[i].Parent == First) 02686 BitTestCases[i].Parent = Last; 02687 } 02688 02689 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { 02690 MachineBasicBlock *SwitchMBB = FuncInfo.MBB; 02691 02692 // Figure out which block is immediately after the current one. 02693 MachineBasicBlock *NextBlock = nullptr; 02694 MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; 02695 02696 // If there is only the default destination, branch to it if it is not the 02697 // next basic block. Otherwise, just fall through. 02698 if (!SI.getNumCases()) { 02699 // Update machine-CFG edges. 02700 02701 // If this is not a fall-through branch, emit the branch. 02702 SwitchMBB->addSuccessor(Default); 02703 if (Default != NextBlock) 02704 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), 02705 MVT::Other, getControlRoot(), 02706 DAG.getBasicBlock(Default))); 02707 02708 return; 02709 } 02710 02711 // If there are any non-default case statements, create a vector of Cases 02712 // representing each one, and sort the vector so that we can efficiently 02713 // create a binary search tree from them. 02714 CaseVector Cases; 02715 size_t numCmps = Clusterify(Cases, SI); 02716 DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() 02717 << ". Total compares: " << numCmps << '\n'); 02718 (void)numCmps; 02719 02720 // Get the Value to be switched on and default basic blocks, which will be 02721 // inserted into CaseBlock records, representing basic blocks in the binary 02722 // search tree. 02723 const Value *SV = SI.getCondition(); 02724 02725 // Push the initial CaseRec onto the worklist 02726 CaseRecVector WorkList; 02727 WorkList.push_back(CaseRec(SwitchMBB,nullptr,nullptr, 02728 CaseRange(Cases.begin(),Cases.end()))); 02729 02730 while (!WorkList.empty()) { 02731 // Grab a record representing a case range to process off the worklist 02732 CaseRec CR = WorkList.back(); 02733 WorkList.pop_back(); 02734 02735 if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) 02736 continue; 02737 02738 // If the range has few cases (two or less) emit a series of specific 02739 // tests. 02740 if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) 02741 continue; 02742 02743 // If the switch has more than N blocks, and is at least 40% dense, and the 02744 // target supports indirect branches, then emit a jump table rather than 02745 // lowering the switch to a binary tree of conditional branches. 02746 // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries(). 02747 if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) 02748 continue; 02749 02750 // Emit binary tree. We need to pick a pivot, and push left and right ranges 02751 // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. 02752 handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB); 02753 } 02754 } 02755 02756 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { 02757 MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; 02758 02759 // Update machine-CFG edges with unique successors. 02760 SmallSet<BasicBlock*, 32> Done; 02761 for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { 02762 BasicBlock *BB = I.getSuccessor(i); 02763 bool Inserted = Done.insert(BB); 02764 if (!Inserted) 02765 continue; 02766 02767 MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; 02768 addSuccessorWithWeight(IndirectBrMBB, Succ); 02769 } 02770 02771 DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(), 02772 MVT::Other, getControlRoot(), 02773 getValue(I.getAddress()))); 02774 } 02775 02776 void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { 02777 if (DAG.getTarget().Options.TrapUnreachable) 02778 DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); 02779 } 02780 02781 void SelectionDAGBuilder::visitFSub(const User &I) { 02782 // -0.0 - X --> fneg 02783 Type *Ty = I.getType(); 02784 if (isa<Constant>(I.getOperand(0)) && 02785 I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { 02786 SDValue Op2 = getValue(I.getOperand(1)); 02787 setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(), 02788 Op2.getValueType(), Op2)); 02789 return; 02790 } 02791 02792 visitBinary(I, ISD::FSUB); 02793 } 02794 02795 void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { 02796 SDValue Op1 = getValue(I.getOperand(0)); 02797 SDValue Op2 = getValue(I.getOperand(1)); 02798 02799 bool nuw = false; 02800 bool nsw = false; 02801 bool exact = false; 02802 if (const OverflowingBinaryOperator *OFBinOp = 02803 dyn_cast<const OverflowingBinaryOperator>(&I)) { 02804 nuw = OFBinOp->hasNoUnsignedWrap(); 02805 nsw = OFBinOp->hasNoSignedWrap(); 02806 } 02807 if (const PossiblyExactOperator *ExactOp = 02808 dyn_cast<const PossiblyExactOperator>(&I)) 02809 exact = ExactOp->isExact(); 02810 02811 SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), 02812 Op1, Op2, nuw, nsw, exact); 02813 setValue(&I, BinNodeValue); 02814 } 02815 02816 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { 02817 SDValue Op1 = getValue(I.getOperand(0)); 02818 SDValue Op2 = getValue(I.getOperand(1)); 02819 02820 EVT ShiftTy = TM.getSubtargetImpl()->getTargetLowering()->getShiftAmountTy( 02821 Op2.getValueType()); 02822 02823 // Coerce the shift amount to the right type if we can. 02824 if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { 02825 unsigned ShiftSize = ShiftTy.getSizeInBits(); 02826 unsigned Op2Size = Op2.getValueType().getSizeInBits(); 02827 SDLoc DL = getCurSDLoc(); 02828 02829 // If the operand is smaller than the shift count type, promote it. 02830 if (ShiftSize > Op2Size) 02831 Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); 02832 02833 // If the operand is larger than the shift count type but the shift 02834 // count type has enough bits to represent any shift value, truncate 02835 // it now. This is a common case and it exposes the truncate to 02836 // optimization early. 02837 else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits())) 02838 Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); 02839 // Otherwise we'll need to temporarily settle for some other convenient 02840 // type. Type legalization will make adjustments once the shiftee is split. 02841 else 02842 Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); 02843 } 02844 02845 bool nuw = false; 02846 bool nsw = false; 02847 bool exact = false; 02848 02849 if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) { 02850 02851 if (const OverflowingBinaryOperator *OFBinOp = 02852 dyn_cast<const OverflowingBinaryOperator>(&I)) { 02853 nuw = OFBinOp->hasNoUnsignedWrap(); 02854 nsw = OFBinOp->hasNoSignedWrap(); 02855 } 02856 if (const PossiblyExactOperator *ExactOp = 02857 dyn_cast<const PossiblyExactOperator>(&I)) 02858 exact = ExactOp->isExact(); 02859 } 02860 02861 SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, 02862 nuw, nsw, exact); 02863 setValue(&I, Res); 02864 } 02865 02866 void SelectionDAGBuilder::visitSDiv(const User &I) { 02867 SDValue Op1 = getValue(I.getOperand(0)); 02868 SDValue Op2 = getValue(I.getOperand(1)); 02869 02870 // Turn exact SDivs into multiplications. 02871 // FIXME: This should be in DAGCombiner, but it doesn't have access to the 02872 // exact bit. 02873 if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() && 02874 !isa<ConstantSDNode>(Op1) && 02875 isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue()) 02876 setValue(&I, TM.getSubtargetImpl()->getTargetLowering()->BuildExactSDIV( 02877 Op1, Op2, getCurSDLoc(), DAG)); 02878 else 02879 setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), 02880 Op1, Op2)); 02881 } 02882 02883 void SelectionDAGBuilder::visitICmp(const User &I) { 02884 ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; 02885 if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) 02886 predicate = IC->getPredicate(); 02887 else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) 02888 predicate = ICmpInst::Predicate(IC->getPredicate()); 02889 SDValue Op1 = getValue(I.getOperand(0)); 02890 SDValue Op2 = getValue(I.getOperand(1)); 02891 ISD::CondCode Opcode = getICmpCondCode(predicate); 02892 02893 EVT DestVT = 02894 TM.getSubtargetImpl()->getTargetLowering()->getValueType(I.getType()); 02895 setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); 02896 } 02897 02898 void SelectionDAGBuilder::visitFCmp(const User &I) { 02899 FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; 02900 if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I)) 02901 predicate = FC->getPredicate(); 02902 else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) 02903 predicate = FCmpInst::Predicate(FC->getPredicate()); 02904 SDValue Op1 = getValue(I.getOperand(0)); 02905 SDValue Op2 = getValue(I.getOperand(1)); 02906 ISD::CondCode Condition = getFCmpCondCode(predicate); 02907 if (TM.Options.NoNaNsFPMath) 02908 Condition = getFCmpCodeWithoutNaN(Condition); 02909 EVT DestVT = 02910 TM.getSubtargetImpl()->getTargetLowering()->getValueType(I.getType()); 02911 setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); 02912 } 02913 02914 void SelectionDAGBuilder::visitSelect(const User &I) { 02915 SmallVector<EVT, 4> ValueVTs; 02916 ComputeValueVTs(*TM.getSubtargetImpl()->getTargetLowering(), I.getType(), 02917 ValueVTs); 02918 unsigned NumValues = ValueVTs.size(); 02919 if (NumValues == 0) return; 02920 02921 SmallVector<SDValue, 4> Values(NumValues); 02922 SDValue Cond = getValue(I.getOperand(0)); 02923 SDValue TrueVal = getValue(I.getOperand(1)); 02924 SDValue FalseVal = getValue(I.getOperand(2)); 02925 ISD::NodeType OpCode = Cond.getValueType().isVector() ? 02926 ISD::VSELECT : ISD::SELECT; 02927 02928 for (unsigned i = 0; i != NumValues; ++i) 02929 Values[i] = DAG.getNode(OpCode, getCurSDLoc(), 02930 TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), 02931 Cond, 02932 SDValue(TrueVal.getNode(), 02933 TrueVal.getResNo() + i), 02934 SDValue(FalseVal.getNode(), 02935 FalseVal.getResNo() + i)); 02936 02937 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 02938 DAG.getVTList(ValueVTs), Values)); 02939 } 02940 02941 void SelectionDAGBuilder::visitTrunc(const User &I) { 02942 // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). 02943 SDValue N = getValue(I.getOperand(0)); 02944 EVT DestVT = 02945 TM.getSubtargetImpl()->getTargetLowering()->getValueType(I.getType()); 02946 setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); 02947 } 02948 02949 void SelectionDAGBuilder::visitZExt(const User &I) { 02950 // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). 02951 // ZExt also can't be a cast to bool for same reason. So, nothing much to do 02952 SDValue N = getValue(I.getOperand(0)); 02953 EVT DestVT = 02954 TM.getSubtargetImpl()->getTargetLowering()->getValueType(I.getType()); 02955 setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); 02956 } 02957 02958 void SelectionDAGBuilder::visitSExt(const User &I) { 02959 // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). 02960 // SExt also can't be a cast to bool for same reason. So, nothing much to do 02961 SDValue N = getValue(I.getOperand(0)); 02962 EVT DestVT = 02963 TM.getSubtargetImpl()->getTargetLowering()->getValueType(I.getType()); 02964 setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); 02965 } 02966 02967 void SelectionDAGBuilder::visitFPTrunc(const User &I) { 02968 // FPTrunc is never a no-op cast, no need to check 02969 SDValue N = getValue(I.getOperand(0)); 02970 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 02971 EVT DestVT = TLI->getValueType(I.getType()); 02972 setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), 02973 DestVT, N, 02974 DAG.getTargetConstant(0, TLI->getPointerTy()))); 02975 } 02976 02977 void SelectionDAGBuilder::visitFPExt(const User &I) { 02978 // FPExt is never a no-op cast, no need to check 02979 SDValue N = getValue(I.getOperand(0)); 02980 EVT DestVT = 02981 TM.getSubtargetImpl()->getTargetLowering()->getValueType(I.getType()); 02982 setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); 02983 } 02984 02985 void SelectionDAGBuilder::visitFPToUI(const User &I) { 02986 // FPToUI is never a no-op cast, no need to check 02987 SDValue N = getValue(I.getOperand(0)); 02988 EVT DestVT = 02989 TM.getSubtargetImpl()->getTargetLowering()->getValueType(I.getType()); 02990 setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); 02991 } 02992 02993 void SelectionDAGBuilder::visitFPToSI(const User &I) { 02994 // FPToSI is never a no-op cast, no need to check 02995 SDValue N = getValue(I.getOperand(0)); 02996 EVT DestVT = 02997 TM.getSubtargetImpl()->getTargetLowering()->getValueType(I.getType()); 02998 setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); 02999 } 03000 03001 void SelectionDAGBuilder::visitUIToFP(const User &I) { 03002 // UIToFP is never a no-op cast, no need to check 03003 SDValue N = getValue(I.getOperand(0)); 03004 EVT DestVT = 03005 TM.getSubtargetImpl()->getTargetLowering()->getValueType(I.getType()); 03006 setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); 03007 } 03008 03009 void SelectionDAGBuilder::visitSIToFP(const User &I) { 03010 // SIToFP is never a no-op cast, no need to check 03011 SDValue N = getValue(I.getOperand(0)); 03012 EVT DestVT = 03013 TM.getSubtargetImpl()->getTargetLowering()->getValueType(I.getType()); 03014 setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); 03015 } 03016 03017 void SelectionDAGBuilder::visitPtrToInt(const User &I) { 03018 // What to do depends on the size of the integer and the size of the pointer. 03019 // We can either truncate, zero extend, or no-op, accordingly. 03020 SDValue N = getValue(I.getOperand(0)); 03021 EVT DestVT = 03022 TM.getSubtargetImpl()->getTargetLowering()->getValueType(I.getType()); 03023 setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); 03024 } 03025 03026 void SelectionDAGBuilder::visitIntToPtr(const User &I) { 03027 // What to do depends on the size of the integer and the size of the pointer. 03028 // We can either truncate, zero extend, or no-op, accordingly. 03029 SDValue N = getValue(I.getOperand(0)); 03030 EVT DestVT = 03031 TM.getSubtargetImpl()->getTargetLowering()->getValueType(I.getType()); 03032 setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); 03033 } 03034 03035 void SelectionDAGBuilder::visitBitCast(const User &I) { 03036 SDValue N = getValue(I.getOperand(0)); 03037 EVT DestVT = 03038 TM.getSubtargetImpl()->getTargetLowering()->getValueType(I.getType()); 03039 03040 // BitCast assures us that source and destination are the same size so this is 03041 // either a BITCAST or a no-op. 03042 if (DestVT != N.getValueType()) 03043 setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), 03044 DestVT, N)); // convert types. 03045 // Check if the original LLVM IR Operand was a ConstantInt, because getValue() 03046 // might fold any kind of constant expression to an integer constant and that 03047 // is not what we are looking for. Only regcognize a bitcast of a genuine 03048 // constant integer as an opaque constant. 03049 else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) 03050 setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false, 03051 /*isOpaque*/true)); 03052 else 03053 setValue(&I, N); // noop cast. 03054 } 03055 03056 void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { 03057 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 03058 const Value *SV = I.getOperand(0); 03059 SDValue N = getValue(SV); 03060 EVT DestVT = 03061 TM.getSubtargetImpl()->getTargetLowering()->getValueType(I.getType()); 03062 03063 unsigned SrcAS = SV->getType()->getPointerAddressSpace(); 03064 unsigned DestAS = I.getType()->getPointerAddressSpace(); 03065 03066 if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) 03067 N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); 03068 03069 setValue(&I, N); 03070 } 03071 03072 void SelectionDAGBuilder::visitInsertElement(const User &I) { 03073 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 03074 SDValue InVec = getValue(I.getOperand(0)); 03075 SDValue InVal = getValue(I.getOperand(1)); 03076 SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), 03077 getCurSDLoc(), TLI.getVectorIdxTy()); 03078 setValue(&I, 03079 DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), 03080 TM.getSubtargetImpl()->getTargetLowering()->getValueType( 03081 I.getType()), 03082 InVec, InVal, InIdx)); 03083 } 03084 03085 void SelectionDAGBuilder::visitExtractElement(const User &I) { 03086 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 03087 SDValue InVec = getValue(I.getOperand(0)); 03088 SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), 03089 getCurSDLoc(), TLI.getVectorIdxTy()); 03090 setValue(&I, 03091 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), 03092 TM.getSubtargetImpl()->getTargetLowering()->getValueType( 03093 I.getType()), 03094 InVec, InIdx)); 03095 } 03096 03097 // Utility for visitShuffleVector - Return true if every element in Mask, 03098 // beginning from position Pos and ending in Pos+Size, falls within the 03099 // specified sequential range [L, L+Pos). or is undef. 03100 static bool isSequentialInRange(const SmallVectorImpl<int> &Mask, 03101 unsigned Pos, unsigned Size, int Low) { 03102 for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) 03103 if (Mask[i] >= 0 && Mask[i] != Low) 03104 return false; 03105 return true; 03106 } 03107 03108 void SelectionDAGBuilder::visitShuffleVector(const User &I) { 03109 SDValue Src1 = getValue(I.getOperand(0)); 03110 SDValue Src2 = getValue(I.getOperand(1)); 03111 03112 SmallVector<int, 8> Mask; 03113 ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); 03114 unsigned MaskNumElts = Mask.size(); 03115 03116 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 03117 EVT VT = TLI->getValueType(I.getType()); 03118 EVT SrcVT = Src1.getValueType(); 03119 unsigned SrcNumElts = SrcVT.getVectorNumElements(); 03120 03121 if (SrcNumElts == MaskNumElts) { 03122 setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, 03123 &Mask[0])); 03124 return; 03125 } 03126 03127 // Normalize the shuffle vector since mask and vector length don't match. 03128 if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { 03129 // Mask is longer than the source vectors and is a multiple of the source 03130 // vectors. We can use concatenate vector to make the mask and vectors 03131 // lengths match. 03132 if (SrcNumElts*2 == MaskNumElts) { 03133 // First check for Src1 in low and Src2 in high 03134 if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && 03135 isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { 03136 // The shuffle is concatenating two vectors together. 03137 setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), 03138 VT, Src1, Src2)); 03139 return; 03140 } 03141 // Then check for Src2 in low and Src1 in high 03142 if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && 03143 isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { 03144 // The shuffle is concatenating two vectors together. 03145 setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), 03146 VT, Src2, Src1)); 03147 return; 03148 } 03149 } 03150 03151 // Pad both vectors with undefs to make them the same length as the mask. 03152 unsigned NumConcat = MaskNumElts / SrcNumElts; 03153 bool Src1U = Src1.getOpcode() == ISD::UNDEF; 03154 bool Src2U = Src2.getOpcode() == ISD::UNDEF; 03155 SDValue UndefVal = DAG.getUNDEF(SrcVT); 03156 03157 SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal); 03158 SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal); 03159 MOps1[0] = Src1; 03160 MOps2[0] = Src2; 03161 03162 Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, 03163 getCurSDLoc(), VT, MOps1); 03164 Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, 03165 getCurSDLoc(), VT, MOps2); 03166 03167 // Readjust mask for new input vector length. 03168 SmallVector<int, 8> MappedOps; 03169 for (unsigned i = 0; i != MaskNumElts; ++i) { 03170 int Idx = Mask[i]; 03171 if (Idx >= (int)SrcNumElts) 03172 Idx -= SrcNumElts - MaskNumElts; 03173 MappedOps.push_back(Idx); 03174 } 03175 03176 setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, 03177 &MappedOps[0])); 03178 return; 03179 } 03180 03181 if (SrcNumElts > MaskNumElts) { 03182 // Analyze the access pattern of the vector to see if we can extract 03183 // two subvectors and do the shuffle. The analysis is done by calculating 03184 // the range of elements the mask access on both vectors. 03185 int MinRange[2] = { static_cast<int>(SrcNumElts), 03186 static_cast<int>(SrcNumElts)}; 03187 int MaxRange[2] = {-1, -1}; 03188 03189 for (unsigned i = 0; i != MaskNumElts; ++i) { 03190 int Idx = Mask[i]; 03191 unsigned Input = 0; 03192 if (Idx < 0) 03193 continue; 03194 03195 if (Idx >= (int)SrcNumElts) { 03196 Input = 1; 03197 Idx -= SrcNumElts; 03198 } 03199 if (Idx > MaxRange[Input]) 03200 MaxRange[Input] = Idx; 03201 if (Idx < MinRange[Input]) 03202 MinRange[Input] = Idx; 03203 } 03204 03205 // Check if the access is smaller than the vector size and can we find 03206 // a reasonable extract index. 03207 int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not 03208 // Extract. 03209 int StartIdx[2]; // StartIdx to extract from 03210 for (unsigned Input = 0; Input < 2; ++Input) { 03211 if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { 03212 RangeUse[Input] = 0; // Unused 03213 StartIdx[Input] = 0; 03214 continue; 03215 } 03216 03217 // Find a good start index that is a multiple of the mask length. Then 03218 // see if the rest of the elements are in range. 03219 StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; 03220 if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && 03221 StartIdx[Input] + MaskNumElts <= SrcNumElts) 03222 RangeUse[Input] = 1; // Extract from a multiple of the mask length. 03223 } 03224 03225 if (RangeUse[0] == 0 && RangeUse[1] == 0) { 03226 setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. 03227 return; 03228 } 03229 if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { 03230 // Extract appropriate subvector and generate a vector shuffle 03231 for (unsigned Input = 0; Input < 2; ++Input) { 03232 SDValue &Src = Input == 0 ? Src1 : Src2; 03233 if (RangeUse[Input] == 0) 03234 Src = DAG.getUNDEF(VT); 03235 else 03236 Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, 03237 Src, DAG.getConstant(StartIdx[Input], 03238 TLI->getVectorIdxTy())); 03239 } 03240 03241 // Calculate new mask. 03242 SmallVector<int, 8> MappedOps; 03243 for (unsigned i = 0; i != MaskNumElts; ++i) { 03244 int Idx = Mask[i]; 03245 if (Idx >= 0) { 03246 if (Idx < (int)SrcNumElts) 03247 Idx -= StartIdx[0]; 03248 else 03249 Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; 03250 } 03251 MappedOps.push_back(Idx); 03252 } 03253 03254 setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, 03255 &MappedOps[0])); 03256 return; 03257 } 03258 } 03259 03260 // We can't use either concat vectors or extract subvectors so fall back to 03261 // replacing the shuffle with extract and build vector. 03262 // to insert and build vector. 03263 EVT EltVT = VT.getVectorElementType(); 03264 EVT IdxVT = TLI->getVectorIdxTy(); 03265 SmallVector<SDValue,8> Ops; 03266 for (unsigned i = 0; i != MaskNumElts; ++i) { 03267 int Idx = Mask[i]; 03268 SDValue Res; 03269 03270 if (Idx < 0) { 03271 Res = DAG.getUNDEF(EltVT); 03272 } else { 03273 SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; 03274 if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; 03275 03276 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), 03277 EltVT, Src, DAG.getConstant(Idx, IdxVT)); 03278 } 03279 03280 Ops.push_back(Res); 03281 } 03282 03283 setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops)); 03284 } 03285 03286 void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { 03287 const Value *Op0 = I.getOperand(0); 03288 const Value *Op1 = I.getOperand(1); 03289 Type *AggTy = I.getType(); 03290 Type *ValTy = Op1->getType(); 03291 bool IntoUndef = isa<UndefValue>(Op0); 03292 bool FromUndef = isa<UndefValue>(Op1); 03293 03294 unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); 03295 03296 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 03297 SmallVector<EVT, 4> AggValueVTs; 03298 ComputeValueVTs(*TLI, AggTy, AggValueVTs); 03299 SmallVector<EVT, 4> ValValueVTs; 03300 ComputeValueVTs(*TLI, ValTy, ValValueVTs); 03301 03302 unsigned NumAggValues = AggValueVTs.size(); 03303 unsigned NumValValues = ValValueVTs.size(); 03304 SmallVector<SDValue, 4> Values(NumAggValues); 03305 03306 SDValue Agg = getValue(Op0); 03307 unsigned i = 0; 03308 // Copy the beginning value(s) from the original aggregate. 03309 for (; i != LinearIndex; ++i) 03310 Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : 03311 SDValue(Agg.getNode(), Agg.getResNo() + i); 03312 // Copy values from the inserted value(s). 03313 if (NumValValues) { 03314 SDValue Val = getValue(Op1); 03315 for (; i != LinearIndex + NumValValues; ++i) 03316 Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : 03317 SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); 03318 } 03319 // Copy remaining value(s) from the original aggregate. 03320 for (; i != NumAggValues; ++i) 03321 Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : 03322 SDValue(Agg.getNode(), Agg.getResNo() + i); 03323 03324 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 03325 DAG.getVTList(AggValueVTs), Values)); 03326 } 03327 03328 void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { 03329 const Value *Op0 = I.getOperand(0); 03330 Type *AggTy = Op0->getType(); 03331 Type *ValTy = I.getType(); 03332 bool OutOfUndef = isa<UndefValue>(Op0); 03333 03334 unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); 03335 03336 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 03337 SmallVector<EVT, 4> ValValueVTs; 03338 ComputeValueVTs(*TLI, ValTy, ValValueVTs); 03339 03340 unsigned NumValValues = ValValueVTs.size(); 03341 03342 // Ignore a extractvalue that produces an empty object 03343 if (!NumValValues) { 03344 setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); 03345 return; 03346 } 03347 03348 SmallVector<SDValue, 4> Values(NumValValues); 03349 03350 SDValue Agg = getValue(Op0); 03351 // Copy out the selected value(s). 03352 for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) 03353 Values[i - LinearIndex] = 03354 OutOfUndef ? 03355 DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : 03356 SDValue(Agg.getNode(), Agg.getResNo() + i); 03357 03358 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 03359 DAG.getVTList(ValValueVTs), Values)); 03360 } 03361 03362 void SelectionDAGBuilder::visitGetElementPtr(const User &I) { 03363 Value *Op0 = I.getOperand(0); 03364 // Note that the pointer operand may be a vector of pointers. Take the scalar 03365 // element which holds a pointer. 03366 Type *Ty = Op0->getType()->getScalarType(); 03367 unsigned AS = Ty->getPointerAddressSpace(); 03368 SDValue N = getValue(Op0); 03369 03370 for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); 03371 OI != E; ++OI) { 03372 const Value *Idx = *OI; 03373 if (StructType *StTy = dyn_cast<StructType>(Ty)) { 03374 unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); 03375 if (Field) { 03376 // N = N + Offset 03377 uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); 03378 N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, 03379 DAG.getConstant(Offset, N.getValueType())); 03380 } 03381 03382 Ty = StTy->getElementType(Field); 03383 } else { 03384 Ty = cast<SequentialType>(Ty)->getElementType(); 03385 03386 // If this is a constant subscript, handle it quickly. 03387 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 03388 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { 03389 if (CI->isZero()) continue; 03390 uint64_t Offs = 03391 DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); 03392 SDValue OffsVal; 03393 EVT PTy = TLI->getPointerTy(AS); 03394 unsigned PtrBits = PTy.getSizeInBits(); 03395 if (PtrBits < 64) 03396 OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy, 03397 DAG.getConstant(Offs, MVT::i64)); 03398 else 03399 OffsVal = DAG.getConstant(Offs, PTy); 03400 03401 N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, 03402 OffsVal); 03403 continue; 03404 } 03405 03406 // N = N + Idx * ElementSize; 03407 APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS), 03408 DL->getTypeAllocSize(Ty)); 03409 SDValue IdxN = getValue(Idx); 03410 03411 // If the index is smaller or larger than intptr_t, truncate or extend 03412 // it. 03413 IdxN = DAG.getSExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType()); 03414 03415 // If this is a multiply by a power of two, turn it into a shl 03416 // immediately. This is a very common case. 03417 if (ElementSize != 1) { 03418 if (ElementSize.isPowerOf2()) { 03419 unsigned Amt = ElementSize.logBase2(); 03420 IdxN = DAG.getNode(ISD::SHL, getCurSDLoc(), 03421 N.getValueType(), IdxN, 03422 DAG.getConstant(Amt, IdxN.getValueType())); 03423 } else { 03424 SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType()); 03425 IdxN = DAG.getNode(ISD::MUL, getCurSDLoc(), 03426 N.getValueType(), IdxN, Scale); 03427 } 03428 } 03429 03430 N = DAG.getNode(ISD::ADD, getCurSDLoc(), 03431 N.getValueType(), N, IdxN); 03432 } 03433 } 03434 03435 setValue(&I, N); 03436 } 03437 03438 void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { 03439 // If this is a fixed sized alloca in the entry block of the function, 03440 // allocate it statically on the stack. 03441 if (FuncInfo.StaticAllocaMap.count(&I)) 03442 return; // getValue will auto-populate this. 03443 03444 Type *Ty = I.getAllocatedType(); 03445 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 03446 uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); 03447 unsigned Align = 03448 std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), 03449 I.getAlignment()); 03450 03451 SDValue AllocSize = getValue(I.getArraySize()); 03452 03453 EVT IntPtr = TLI->getPointerTy(); 03454 if (AllocSize.getValueType() != IntPtr) 03455 AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr); 03456 03457 AllocSize = DAG.getNode(ISD::MUL, getCurSDLoc(), IntPtr, 03458 AllocSize, 03459 DAG.getConstant(TySize, IntPtr)); 03460 03461 // Handle alignment. If the requested alignment is less than or equal to 03462 // the stack alignment, ignore it. If the size is greater than or equal to 03463 // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. 03464 unsigned StackAlign = 03465 TM.getSubtargetImpl()->getFrameLowering()->getStackAlignment(); 03466 if (Align <= StackAlign) 03467 Align = 0; 03468 03469 // Round the size of the allocation up to the stack alignment size 03470 // by add SA-1 to the size. 03471 AllocSize = DAG.getNode(ISD::ADD, getCurSDLoc(), 03472 AllocSize.getValueType(), AllocSize, 03473 DAG.getIntPtrConstant(StackAlign-1)); 03474 03475 // Mask out the low bits for alignment purposes. 03476 AllocSize = DAG.getNode(ISD::AND, getCurSDLoc(), 03477 AllocSize.getValueType(), AllocSize, 03478 DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); 03479 03480 SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; 03481 SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); 03482 SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops); 03483 setValue(&I, DSA); 03484 DAG.setRoot(DSA.getValue(1)); 03485 03486 assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects()); 03487 } 03488 03489 void SelectionDAGBuilder::visitLoad(const LoadInst &I) { 03490 if (I.isAtomic()) 03491 return visitAtomicLoad(I); 03492 03493 const Value *SV = I.getOperand(0); 03494 SDValue Ptr = getValue(SV); 03495 03496 Type *Ty = I.getType(); 03497 03498 bool isVolatile = I.isVolatile(); 03499 bool isNonTemporal = I.getMetadata("nontemporal") != nullptr; 03500 bool isInvariant = I.getMetadata("invariant.load") != nullptr; 03501 unsigned Alignment = I.getAlignment(); 03502 03503 AAMDNodes AAInfo; 03504 I.getAAMetadata(AAInfo); 03505 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); 03506 03507 SmallVector<EVT, 4> ValueVTs; 03508 SmallVector<uint64_t, 4> Offsets; 03509 ComputeValueVTs(*TM.getSubtargetImpl()->getTargetLowering(), Ty, ValueVTs, 03510 &Offsets); 03511 unsigned NumValues = ValueVTs.size(); 03512 if (NumValues == 0) 03513 return; 03514 03515 SDValue Root; 03516 bool ConstantMemory = false; 03517 if (isVolatile || NumValues > MaxParallelChains) 03518 // Serialize volatile loads with other side effects. 03519 Root = getRoot(); 03520 else if (AA->pointsToConstantMemory( 03521 AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), AAInfo))) { 03522 // Do not serialize (non-volatile) loads of constant memory with anything. 03523 Root = DAG.getEntryNode(); 03524 ConstantMemory = true; 03525 } else { 03526 // Do not serialize non-volatile loads against each other. 03527 Root = DAG.getRoot(); 03528 } 03529 03530 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 03531 if (isVolatile) 03532 Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); 03533 03534 SmallVector<SDValue, 4> Values(NumValues); 03535 SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), 03536 NumValues)); 03537 EVT PtrVT = Ptr.getValueType(); 03538 unsigned ChainI = 0; 03539 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { 03540 // Serializing loads here may result in excessive register pressure, and 03541 // TokenFactor places arbitrary choke points on the scheduler. SD scheduling 03542 // could recover a bit by hoisting nodes upward in the chain by recognizing 03543 // they are side-effect free or do not alias. The optimizer should really 03544 // avoid this case by converting large object/array copies to llvm.memcpy 03545 // (MaxParallelChains should always remain as failsafe). 03546 if (ChainI == MaxParallelChains) { 03547 assert(PendingLoads.empty() && "PendingLoads must be serialized first"); 03548 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 03549 makeArrayRef(Chains.data(), ChainI)); 03550 Root = Chain; 03551 ChainI = 0; 03552 } 03553 SDValue A = DAG.getNode(ISD::ADD, getCurSDLoc(), 03554 PtrVT, Ptr, 03555 DAG.getConstant(Offsets[i], PtrVT)); 03556 SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root, 03557 A, MachinePointerInfo(SV, Offsets[i]), isVolatile, 03558 isNonTemporal, isInvariant, Alignment, AAInfo, 03559 Ranges); 03560 03561 Values[i] = L; 03562 Chains[ChainI] = L.getValue(1); 03563 } 03564 03565 if (!ConstantMemory) { 03566 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 03567 makeArrayRef(Chains.data(), ChainI)); 03568 if (isVolatile) 03569 DAG.setRoot(Chain); 03570 else 03571 PendingLoads.push_back(Chain); 03572 } 03573 03574 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 03575 DAG.getVTList(ValueVTs), Values)); 03576 } 03577 03578 void SelectionDAGBuilder::visitStore(const StoreInst &I) { 03579 if (I.isAtomic()) 03580 return visitAtomicStore(I); 03581 03582 const Value *SrcV = I.getOperand(0); 03583 const Value *PtrV = I.getOperand(1); 03584 03585 SmallVector<EVT, 4> ValueVTs; 03586 SmallVector<uint64_t, 4> Offsets; 03587 ComputeValueVTs(*TM.getSubtargetImpl()->getTargetLowering(), SrcV->getType(), 03588 ValueVTs, &Offsets); 03589 unsigned NumValues = ValueVTs.size(); 03590 if (NumValues == 0) 03591 return; 03592 03593 // Get the lowered operands. Note that we do this after 03594 // checking if NumResults is zero, because with zero results 03595 // the operands won't have values in the map. 03596 SDValue Src = getValue(SrcV); 03597 SDValue Ptr = getValue(PtrV); 03598 03599 SDValue Root = getRoot(); 03600 SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), 03601 NumValues)); 03602 EVT PtrVT = Ptr.getValueType(); 03603 bool isVolatile = I.isVolatile(); 03604 bool isNonTemporal = I.getMetadata("nontemporal") != nullptr; 03605 unsigned Alignment = I.getAlignment(); 03606 03607 AAMDNodes AAInfo; 03608 I.getAAMetadata(AAInfo); 03609 03610 unsigned ChainI = 0; 03611 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { 03612 // See visitLoad comments. 03613 if (ChainI == MaxParallelChains) { 03614 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 03615 makeArrayRef(Chains.data(), ChainI)); 03616 Root = Chain; 03617 ChainI = 0; 03618 } 03619 SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr, 03620 DAG.getConstant(Offsets[i], PtrVT)); 03621 SDValue St = DAG.getStore(Root, getCurSDLoc(), 03622 SDValue(Src.getNode(), Src.getResNo() + i), 03623 Add, MachinePointerInfo(PtrV, Offsets[i]), 03624 isVolatile, isNonTemporal, Alignment, AAInfo); 03625 Chains[ChainI] = St; 03626 } 03627 03628 SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 03629 makeArrayRef(Chains.data(), ChainI)); 03630 DAG.setRoot(StoreNode); 03631 } 03632 03633 static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, 03634 SynchronizationScope Scope, 03635 bool Before, SDLoc dl, 03636 SelectionDAG &DAG, 03637 const TargetLowering &TLI) { 03638 // Fence, if necessary 03639 if (Before) { 03640 if (Order == AcquireRelease || Order == SequentiallyConsistent) 03641 Order = Release; 03642 else if (Order == Acquire || Order == Monotonic || Order == Unordered) 03643 return Chain; 03644 } else { 03645 if (Order == AcquireRelease) 03646 Order = Acquire; 03647 else if (Order == Release || Order == Monotonic || Order == Unordered) 03648 return Chain; 03649 } 03650 SDValue Ops[3]; 03651 Ops[0] = Chain; 03652 Ops[1] = DAG.getConstant(Order, TLI.getPointerTy()); 03653 Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy()); 03654 return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops); 03655 } 03656 03657 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { 03658 SDLoc dl = getCurSDLoc(); 03659 AtomicOrdering SuccessOrder = I.getSuccessOrdering(); 03660 AtomicOrdering FailureOrder = I.getFailureOrdering(); 03661 SynchronizationScope Scope = I.getSynchScope(); 03662 03663 SDValue InChain = getRoot(); 03664 03665 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 03666 if (TLI->getInsertFencesForAtomic()) 03667 InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl, 03668 DAG, *TLI); 03669 03670 MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); 03671 SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); 03672 SDValue L = DAG.getAtomicCmpSwap( 03673 ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, 03674 getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), 03675 getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), 03676 0 /* Alignment */, 03677 TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder, 03678 TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope); 03679 03680 SDValue OutChain = L.getValue(2); 03681 03682 if (TLI->getInsertFencesForAtomic()) 03683 OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl, 03684 DAG, *TLI); 03685 03686 setValue(&I, L); 03687 DAG.setRoot(OutChain); 03688 } 03689 03690 void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { 03691 SDLoc dl = getCurSDLoc(); 03692 ISD::NodeType NT; 03693 switch (I.getOperation()) { 03694 default: llvm_unreachable("Unknown atomicrmw operation"); 03695 case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; 03696 case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; 03697 case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; 03698 case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break; 03699 case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break; 03700 case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break; 03701 case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break; 03702 case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break; 03703 case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break; 03704 case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break; 03705 case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; 03706 } 03707 AtomicOrdering Order = I.getOrdering(); 03708 SynchronizationScope Scope = I.getSynchScope(); 03709 03710 SDValue InChain = getRoot(); 03711 03712 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 03713 if (TLI->getInsertFencesForAtomic()) 03714 InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, 03715 DAG, *TLI); 03716 03717 SDValue L = 03718 DAG.getAtomic(NT, dl, 03719 getValue(I.getValOperand()).getSimpleValueType(), 03720 InChain, 03721 getValue(I.getPointerOperand()), 03722 getValue(I.getValOperand()), 03723 I.getPointerOperand(), 0 /* Alignment */, 03724 TLI->getInsertFencesForAtomic() ? Monotonic : Order, 03725 Scope); 03726 03727 SDValue OutChain = L.getValue(1); 03728 03729 if (TLI->getInsertFencesForAtomic()) 03730 OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, 03731 DAG, *TLI); 03732 03733 setValue(&I, L); 03734 DAG.setRoot(OutChain); 03735 } 03736 03737 void SelectionDAGBuilder::visitFence(const FenceInst &I) { 03738 SDLoc dl = getCurSDLoc(); 03739 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 03740 SDValue Ops[3]; 03741 Ops[0] = getRoot(); 03742 Ops[1] = DAG.getConstant(I.getOrdering(), TLI->getPointerTy()); 03743 Ops[2] = DAG.getConstant(I.getSynchScope(), TLI->getPointerTy()); 03744 DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); 03745 } 03746 03747 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { 03748 SDLoc dl = getCurSDLoc(); 03749 AtomicOrdering Order = I.getOrdering(); 03750 SynchronizationScope Scope = I.getSynchScope(); 03751 03752 SDValue InChain = getRoot(); 03753 03754 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 03755 EVT VT = TLI->getValueType(I.getType()); 03756 03757 if (I.getAlignment() < VT.getSizeInBits() / 8) 03758 report_fatal_error("Cannot generate unaligned atomic load"); 03759 03760 MachineMemOperand *MMO = 03761 DAG.getMachineFunction(). 03762 getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), 03763 MachineMemOperand::MOVolatile | 03764 MachineMemOperand::MOLoad, 03765 VT.getStoreSize(), 03766 I.getAlignment() ? I.getAlignment() : 03767 DAG.getEVTAlignment(VT)); 03768 03769 InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG); 03770 SDValue L = 03771 DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, 03772 getValue(I.getPointerOperand()), MMO, 03773 TLI->getInsertFencesForAtomic() ? Monotonic : Order, 03774 Scope); 03775 03776 SDValue OutChain = L.getValue(1); 03777 03778 if (TLI->getInsertFencesForAtomic()) 03779 OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, 03780 DAG, *TLI); 03781 03782 setValue(&I, L); 03783 DAG.setRoot(OutChain); 03784 } 03785 03786 void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { 03787 SDLoc dl = getCurSDLoc(); 03788 03789 AtomicOrdering Order = I.getOrdering(); 03790 SynchronizationScope Scope = I.getSynchScope(); 03791 03792 SDValue InChain = getRoot(); 03793 03794 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 03795 EVT VT = TLI->getValueType(I.getValueOperand()->getType()); 03796 03797 if (I.getAlignment() < VT.getSizeInBits() / 8) 03798 report_fatal_error("Cannot generate unaligned atomic store"); 03799 03800 if (TLI->getInsertFencesForAtomic()) 03801 InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, 03802 DAG, *TLI); 03803 03804 SDValue OutChain = 03805 DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, 03806 InChain, 03807 getValue(I.getPointerOperand()), 03808 getValue(I.getValueOperand()), 03809 I.getPointerOperand(), I.getAlignment(), 03810 TLI->getInsertFencesForAtomic() ? Monotonic : Order, 03811 Scope); 03812 03813 if (TLI->getInsertFencesForAtomic()) 03814 OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, 03815 DAG, *TLI); 03816 03817 DAG.setRoot(OutChain); 03818 } 03819 03820 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC 03821 /// node. 03822 void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, 03823 unsigned Intrinsic) { 03824 bool HasChain = !I.doesNotAccessMemory(); 03825 bool OnlyLoad = HasChain && I.onlyReadsMemory(); 03826 03827 // Build the operand list. 03828 SmallVector<SDValue, 8> Ops; 03829 if (HasChain) { // If this intrinsic has side-effects, chainify it. 03830 if (OnlyLoad) { 03831 // We don't need to serialize loads against other loads. 03832 Ops.push_back(DAG.getRoot()); 03833 } else { 03834 Ops.push_back(getRoot()); 03835 } 03836 } 03837 03838 // Info is set by getTgtMemInstrinsic 03839 TargetLowering::IntrinsicInfo Info; 03840 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 03841 bool IsTgtIntrinsic = TLI->getTgtMemIntrinsic(Info, I, Intrinsic); 03842 03843 // Add the intrinsic ID as an integer operand if it's not a target intrinsic. 03844 if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || 03845 Info.opc == ISD::INTRINSIC_W_CHAIN) 03846 Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI->getPointerTy())); 03847 03848 // Add all operands of the call to the operand list. 03849 for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { 03850 SDValue Op = getValue(I.getArgOperand(i)); 03851 Ops.push_back(Op); 03852 } 03853 03854 SmallVector<EVT, 4> ValueVTs; 03855 ComputeValueVTs(*TLI, I.getType(), ValueVTs); 03856 03857 if (HasChain) 03858 ValueVTs.push_back(MVT::Other); 03859 03860 SDVTList VTs = DAG.getVTList(ValueVTs); 03861 03862 // Create the node. 03863 SDValue Result; 03864 if (IsTgtIntrinsic) { 03865 // This is target intrinsic that touches memory 03866 Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), 03867 VTs, Ops, Info.memVT, 03868 MachinePointerInfo(Info.ptrVal, Info.offset), 03869 Info.align, Info.vol, 03870 Info.readMem, Info.writeMem, Info.size); 03871 } else if (!HasChain) { 03872 Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); 03873 } else if (!I.getType()->isVoidTy()) { 03874 Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); 03875 } else { 03876 Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); 03877 } 03878 03879 if (HasChain) { 03880 SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); 03881 if (OnlyLoad) 03882 PendingLoads.push_back(Chain); 03883 else 03884 DAG.setRoot(Chain); 03885 } 03886 03887 if (!I.getType()->isVoidTy()) { 03888 if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { 03889 EVT VT = TLI->getValueType(PTy); 03890 Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); 03891 } 03892 03893 setValue(&I, Result); 03894 } 03895 } 03896 03897 /// GetSignificand - Get the significand and build it into a floating-point 03898 /// number with exponent of 1: 03899 /// 03900 /// Op = (Op & 0x007fffff) | 0x3f800000; 03901 /// 03902 /// where Op is the hexadecimal representation of floating point value. 03903 static SDValue 03904 GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { 03905 SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, 03906 DAG.getConstant(0x007fffff, MVT::i32)); 03907 SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, 03908 DAG.getConstant(0x3f800000, MVT::i32)); 03909 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); 03910 } 03911 03912 /// GetExponent - Get the exponent: 03913 /// 03914 /// (float)(int)(((Op & 0x7f800000) >> 23) - 127); 03915 /// 03916 /// where Op is the hexadecimal representation of floating point value. 03917 static SDValue 03918 GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, 03919 SDLoc dl) { 03920 SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, 03921 DAG.getConstant(0x7f800000, MVT::i32)); 03922 SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, 03923 DAG.getConstant(23, TLI.getPointerTy())); 03924 SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, 03925 DAG.getConstant(127, MVT::i32)); 03926 return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); 03927 } 03928 03929 /// getF32Constant - Get 32-bit floating point constant. 03930 static SDValue 03931 getF32Constant(SelectionDAG &DAG, unsigned Flt) { 03932 return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), 03933 MVT::f32); 03934 } 03935 03936 /// expandExp - Lower an exp intrinsic. Handles the special sequences for 03937 /// limited-precision mode. 03938 static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, 03939 const TargetLowering &TLI) { 03940 if (Op.getValueType() == MVT::f32 && 03941 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 03942 03943 // Put the exponent in the right bit position for later addition to the 03944 // final result: 03945 // 03946 // #define LOG2OFe 1.4426950f 03947 // IntegerPartOfX = ((int32_t)(X * LOG2OFe)); 03948 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, 03949 getF32Constant(DAG, 0x3fb8aa3b)); 03950 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); 03951 03952 // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX; 03953 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); 03954 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); 03955 03956 // IntegerPartOfX <<= 23; 03957 IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, 03958 DAG.getConstant(23, TLI.getPointerTy())); 03959 03960 SDValue TwoToFracPartOfX; 03961 if (LimitFloatPrecision <= 6) { 03962 // For floating-point precision of 6: 03963 // 03964 // TwoToFractionalPartOfX = 03965 // 0.997535578f + 03966 // (0.735607626f + 0.252464424f * x) * x; 03967 // 03968 // error 0.0144103317, which is 6 bits 03969 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 03970 getF32Constant(DAG, 0x3e814304)); 03971 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 03972 getF32Constant(DAG, 0x3f3c50c8)); 03973 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 03974 TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 03975 getF32Constant(DAG, 0x3f7f5e7e)); 03976 } else if (LimitFloatPrecision <= 12) { 03977 // For floating-point precision of 12: 03978 // 03979 // TwoToFractionalPartOfX = 03980 // 0.999892986f + 03981 // (0.696457318f + 03982 // (0.224338339f + 0.792043434e-1f * x) * x) * x; 03983 // 03984 // 0.000107046256 error, which is 13 to 14 bits 03985 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 03986 getF32Constant(DAG, 0x3da235e3)); 03987 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 03988 getF32Constant(DAG, 0x3e65b8f3)); 03989 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 03990 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 03991 getF32Constant(DAG, 0x3f324b07)); 03992 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 03993 TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 03994 getF32Constant(DAG, 0x3f7ff8fd)); 03995 } else { // LimitFloatPrecision <= 18 03996 // For floating-point precision of 18: 03997 // 03998 // TwoToFractionalPartOfX = 03999 // 0.999999982f + 04000 // (0.693148872f + 04001 // (0.240227044f + 04002 // (0.554906021e-1f + 04003 // (0.961591928e-2f + 04004 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; 04005 // 04006 // error 2.47208000*10^(-7), which is better than 18 bits 04007 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04008 getF32Constant(DAG, 0x3924b03e)); 04009 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04010 getF32Constant(DAG, 0x3ab24b87)); 04011 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04012 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04013 getF32Constant(DAG, 0x3c1d8c17)); 04014 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 04015 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 04016 getF32Constant(DAG, 0x3d634a1d)); 04017 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 04018 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 04019 getF32Constant(DAG, 0x3e75fe14)); 04020 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 04021 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, 04022 getF32Constant(DAG, 0x3f317234)); 04023 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); 04024 TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, 04025 getF32Constant(DAG, 0x3f800000)); 04026 } 04027 04028 // Add the exponent into the result in integer domain. 04029 SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX); 04030 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 04031 DAG.getNode(ISD::ADD, dl, MVT::i32, 04032 t13, IntegerPartOfX)); 04033 } 04034 04035 // No special expansion. 04036 return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); 04037 } 04038 04039 /// expandLog - Lower a log intrinsic. Handles the special sequences for 04040 /// limited-precision mode. 04041 static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, 04042 const TargetLowering &TLI) { 04043 if (Op.getValueType() == MVT::f32 && 04044 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 04045 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 04046 04047 // Scale the exponent by log(2) [0.69314718f]. 04048 SDValue Exp = GetExponent(DAG, Op1, TLI, dl); 04049 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, 04050 getF32Constant(DAG, 0x3f317218)); 04051 04052 // Get the significand and build it into a floating-point number with 04053 // exponent of 1. 04054 SDValue X = GetSignificand(DAG, Op1, dl); 04055 04056 SDValue LogOfMantissa; 04057 if (LimitFloatPrecision <= 6) { 04058 // For floating-point precision of 6: 04059 // 04060 // LogofMantissa = 04061 // -1.1609546f + 04062 // (1.4034025f - 0.23903021f * x) * x; 04063 // 04064 // error 0.0034276066, which is better than 8 bits 04065 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04066 getF32Constant(DAG, 0xbe74c456)); 04067 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 04068 getF32Constant(DAG, 0x3fb3a2b1)); 04069 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 04070 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 04071 getF32Constant(DAG, 0x3f949a29)); 04072 } else if (LimitFloatPrecision <= 12) { 04073 // For floating-point precision of 12: 04074 // 04075 // LogOfMantissa = 04076 // -1.7417939f + 04077 // (2.8212026f + 04078 // (-1.4699568f + 04079 // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; 04080 // 04081 // error 0.000061011436, which is 14 bits 04082 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04083 getF32Constant(DAG, 0xbd67b6d6)); 04084 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 04085 getF32Constant(DAG, 0x3ee4f4b8)); 04086 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 04087 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 04088 getF32Constant(DAG, 0x3fbc278b)); 04089 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04090 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04091 getF32Constant(DAG, 0x40348e95)); 04092 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 04093 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 04094 getF32Constant(DAG, 0x3fdef31a)); 04095 } else { // LimitFloatPrecision <= 18 04096 // For floating-point precision of 18: 04097 // 04098 // LogOfMantissa = 04099 // -2.1072184f + 04100 // (4.2372794f + 04101 // (-3.7029485f + 04102 // (2.2781945f + 04103 // (-0.87823314f + 04104 // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; 04105 // 04106 // error 0.0000023660568, which is better than 18 bits 04107 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04108 getF32Constant(DAG, 0xbc91e5ac)); 04109 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 04110 getF32Constant(DAG, 0x3e4350aa)); 04111 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 04112 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 04113 getF32Constant(DAG, 0x3f60d3e3)); 04114 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04115 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04116 getF32Constant(DAG, 0x4011cdf0)); 04117 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 04118 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 04119 getF32Constant(DAG, 0x406cfd1c)); 04120 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 04121 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 04122 getF32Constant(DAG, 0x408797cb)); 04123 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 04124 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, 04125 getF32Constant(DAG, 0x4006dcab)); 04126 } 04127 04128 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); 04129 } 04130 04131 // No special expansion. 04132 return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); 04133 } 04134 04135 /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for 04136 /// limited-precision mode. 04137 static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, 04138 const TargetLowering &TLI) { 04139 if (Op.getValueType() == MVT::f32 && 04140 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 04141 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 04142 04143 // Get the exponent. 04144 SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); 04145 04146 // Get the significand and build it into a floating-point number with 04147 // exponent of 1. 04148 SDValue X = GetSignificand(DAG, Op1, dl); 04149 04150 // Different possible minimax approximations of significand in 04151 // floating-point for various degrees of accuracy over [1,2]. 04152 SDValue Log2ofMantissa; 04153 if (LimitFloatPrecision <= 6) { 04154 // For floating-point precision of 6: 04155 // 04156 // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; 04157 // 04158 // error 0.0049451742, which is more than 7 bits 04159 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04160 getF32Constant(DAG, 0xbeb08fe0)); 04161 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 04162 getF32Constant(DAG, 0x40019463)); 04163 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 04164 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 04165 getF32Constant(DAG, 0x3fd6633d)); 04166 } else if (LimitFloatPrecision <= 12) { 04167 // For floating-point precision of 12: 04168 // 04169 // Log2ofMantissa = 04170 // -2.51285454f + 04171 // (4.07009056f + 04172 // (-2.12067489f + 04173 // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; 04174 // 04175 // error 0.0000876136000, which is better than 13 bits 04176 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04177 getF32Constant(DAG, 0xbda7262e)); 04178 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 04179 getF32Constant(DAG, 0x3f25280b)); 04180 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 04181 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 04182 getF32Constant(DAG, 0x4007b923)); 04183 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04184 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04185 getF32Constant(DAG, 0x40823e2f)); 04186 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 04187 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 04188 getF32Constant(DAG, 0x4020d29c)); 04189 } else { // LimitFloatPrecision <= 18 04190 // For floating-point precision of 18: 04191 // 04192 // Log2ofMantissa = 04193 // -3.0400495f + 04194 // (6.1129976f + 04195 // (-5.3420409f + 04196 // (3.2865683f + 04197 // (-1.2669343f + 04198 // (0.27515199f - 04199 // 0.25691327e-1f * x) * x) * x) * x) * x) * x; 04200 // 04201 // error 0.0000018516, which is better than 18 bits 04202 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04203 getF32Constant(DAG, 0xbcd2769e)); 04204 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 04205 getF32Constant(DAG, 0x3e8ce0b9)); 04206 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 04207 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 04208 getF32Constant(DAG, 0x3fa22ae7)); 04209 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04210 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04211 getF32Constant(DAG, 0x40525723)); 04212 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 04213 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 04214 getF32Constant(DAG, 0x40aaf200)); 04215 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 04216 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 04217 getF32Constant(DAG, 0x40c39dad)); 04218 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 04219 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, 04220 getF32Constant(DAG, 0x4042902c)); 04221 } 04222 04223 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); 04224 } 04225 04226 // No special expansion. 04227 return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); 04228 } 04229 04230 /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for 04231 /// limited-precision mode. 04232 static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, 04233 const TargetLowering &TLI) { 04234 if (Op.getValueType() == MVT::f32 && 04235 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 04236 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 04237 04238 // Scale the exponent by log10(2) [0.30102999f]. 04239 SDValue Exp = GetExponent(DAG, Op1, TLI, dl); 04240 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, 04241 getF32Constant(DAG, 0x3e9a209a)); 04242 04243 // Get the significand and build it into a floating-point number with 04244 // exponent of 1. 04245 SDValue X = GetSignificand(DAG, Op1, dl); 04246 04247 SDValue Log10ofMantissa; 04248 if (LimitFloatPrecision <= 6) { 04249 // For floating-point precision of 6: 04250 // 04251 // Log10ofMantissa = 04252 // -0.50419619f + 04253 // (0.60948995f - 0.10380950f * x) * x; 04254 // 04255 // error 0.0014886165, which is 6 bits 04256 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04257 getF32Constant(DAG, 0xbdd49a13)); 04258 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 04259 getF32Constant(DAG, 0x3f1c0789)); 04260 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 04261 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 04262 getF32Constant(DAG, 0x3f011300)); 04263 } else if (LimitFloatPrecision <= 12) { 04264 // For floating-point precision of 12: 04265 // 04266 // Log10ofMantissa = 04267 // -0.64831180f + 04268 // (0.91751397f + 04269 // (-0.31664806f + 0.47637168e-1f * x) * x) * x; 04270 // 04271 // error 0.00019228036, which is better than 12 bits 04272 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04273 getF32Constant(DAG, 0x3d431f31)); 04274 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, 04275 getF32Constant(DAG, 0x3ea21fb2)); 04276 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 04277 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04278 getF32Constant(DAG, 0x3f6ae232)); 04279 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04280 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, 04281 getF32Constant(DAG, 0x3f25f7c3)); 04282 } else { // LimitFloatPrecision <= 18 04283 // For floating-point precision of 18: 04284 // 04285 // Log10ofMantissa = 04286 // -0.84299375f + 04287 // (1.5327582f + 04288 // (-1.0688956f + 04289 // (0.49102474f + 04290 // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; 04291 // 04292 // error 0.0000037995730, which is better than 18 bits 04293 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04294 getF32Constant(DAG, 0x3c5d51ce)); 04295 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, 04296 getF32Constant(DAG, 0x3e00685a)); 04297 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 04298 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04299 getF32Constant(DAG, 0x3efb6798)); 04300 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04301 SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, 04302 getF32Constant(DAG, 0x3f88d192)); 04303 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 04304 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 04305 getF32Constant(DAG, 0x3fc4316c)); 04306 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 04307 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, 04308 getF32Constant(DAG, 0x3f57ce70)); 04309 } 04310 04311 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); 04312 } 04313 04314 // No special expansion. 04315 return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); 04316 } 04317 04318 /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for 04319 /// limited-precision mode. 04320 static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, 04321 const TargetLowering &TLI) { 04322 if (Op.getValueType() == MVT::f32 && 04323 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 04324 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); 04325 04326 // FractionalPartOfX = x - (float)IntegerPartOfX; 04327 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); 04328 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1); 04329 04330 // IntegerPartOfX <<= 23; 04331 IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, 04332 DAG.getConstant(23, TLI.getPointerTy())); 04333 04334 SDValue TwoToFractionalPartOfX; 04335 if (LimitFloatPrecision <= 6) { 04336 // For floating-point precision of 6: 04337 // 04338 // TwoToFractionalPartOfX = 04339 // 0.997535578f + 04340 // (0.735607626f + 0.252464424f * x) * x; 04341 // 04342 // error 0.0144103317, which is 6 bits 04343 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04344 getF32Constant(DAG, 0x3e814304)); 04345 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04346 getF32Constant(DAG, 0x3f3c50c8)); 04347 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04348 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04349 getF32Constant(DAG, 0x3f7f5e7e)); 04350 } else if (LimitFloatPrecision <= 12) { 04351 // For floating-point precision of 12: 04352 // 04353 // TwoToFractionalPartOfX = 04354 // 0.999892986f + 04355 // (0.696457318f + 04356 // (0.224338339f + 0.792043434e-1f * x) * x) * x; 04357 // 04358 // error 0.000107046256, which is 13 to 14 bits 04359 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04360 getF32Constant(DAG, 0x3da235e3)); 04361 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04362 getF32Constant(DAG, 0x3e65b8f3)); 04363 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04364 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04365 getF32Constant(DAG, 0x3f324b07)); 04366 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 04367 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 04368 getF32Constant(DAG, 0x3f7ff8fd)); 04369 } else { // LimitFloatPrecision <= 18 04370 // For floating-point precision of 18: 04371 // 04372 // TwoToFractionalPartOfX = 04373 // 0.999999982f + 04374 // (0.693148872f + 04375 // (0.240227044f + 04376 // (0.554906021e-1f + 04377 // (0.961591928e-2f + 04378 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; 04379 // error 2.47208000*10^(-7), which is better than 18 bits 04380 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04381 getF32Constant(DAG, 0x3924b03e)); 04382 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04383 getF32Constant(DAG, 0x3ab24b87)); 04384 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04385 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04386 getF32Constant(DAG, 0x3c1d8c17)); 04387 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 04388 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 04389 getF32Constant(DAG, 0x3d634a1d)); 04390 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 04391 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 04392 getF32Constant(DAG, 0x3e75fe14)); 04393 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 04394 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, 04395 getF32Constant(DAG, 0x3f317234)); 04396 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); 04397 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, 04398 getF32Constant(DAG, 0x3f800000)); 04399 } 04400 04401 // Add the exponent into the result in integer domain. 04402 SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, 04403 TwoToFractionalPartOfX); 04404 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 04405 DAG.getNode(ISD::ADD, dl, MVT::i32, 04406 t13, IntegerPartOfX)); 04407 } 04408 04409 // No special expansion. 04410 return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); 04411 } 04412 04413 /// visitPow - Lower a pow intrinsic. Handles the special sequences for 04414 /// limited-precision mode with x == 10.0f. 04415 static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, 04416 SelectionDAG &DAG, const TargetLowering &TLI) { 04417 bool IsExp10 = false; 04418 if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && 04419 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 04420 if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) { 04421 APFloat Ten(10.0f); 04422 IsExp10 = LHSC->isExactlyValue(Ten); 04423 } 04424 } 04425 04426 if (IsExp10) { 04427 // Put the exponent in the right bit position for later addition to the 04428 // final result: 04429 // 04430 // #define LOG2OF10 3.3219281f 04431 // IntegerPartOfX = (int32_t)(x * LOG2OF10); 04432 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, 04433 getF32Constant(DAG, 0x40549a78)); 04434 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); 04435 04436 // FractionalPartOfX = x - (float)IntegerPartOfX; 04437 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); 04438 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); 04439 04440 // IntegerPartOfX <<= 23; 04441 IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, 04442 DAG.getConstant(23, TLI.getPointerTy())); 04443 04444 SDValue TwoToFractionalPartOfX; 04445 if (LimitFloatPrecision <= 6) { 04446 // For floating-point precision of 6: 04447 // 04448 // twoToFractionalPartOfX = 04449 // 0.997535578f + 04450 // (0.735607626f + 0.252464424f * x) * x; 04451 // 04452 // error 0.0144103317, which is 6 bits 04453 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04454 getF32Constant(DAG, 0x3e814304)); 04455 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04456 getF32Constant(DAG, 0x3f3c50c8)); 04457 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04458 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04459 getF32Constant(DAG, 0x3f7f5e7e)); 04460 } else if (LimitFloatPrecision <= 12) { 04461 // For floating-point precision of 12: 04462 // 04463 // TwoToFractionalPartOfX = 04464 // 0.999892986f + 04465 // (0.696457318f + 04466 // (0.224338339f + 0.792043434e-1f * x) * x) * x; 04467 // 04468 // error 0.000107046256, which is 13 to 14 bits 04469 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04470 getF32Constant(DAG, 0x3da235e3)); 04471 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04472 getF32Constant(DAG, 0x3e65b8f3)); 04473 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04474 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04475 getF32Constant(DAG, 0x3f324b07)); 04476 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 04477 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 04478 getF32Constant(DAG, 0x3f7ff8fd)); 04479 } else { // LimitFloatPrecision <= 18 04480 // For floating-point precision of 18: 04481 // 04482 // TwoToFractionalPartOfX = 04483 // 0.999999982f + 04484 // (0.693148872f + 04485 // (0.240227044f + 04486 // (0.554906021e-1f + 04487 // (0.961591928e-2f + 04488 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; 04489 // error 2.47208000*10^(-7), which is better than 18 bits 04490 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 04491 getF32Constant(DAG, 0x3924b03e)); 04492 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 04493 getF32Constant(DAG, 0x3ab24b87)); 04494 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 04495 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 04496 getF32Constant(DAG, 0x3c1d8c17)); 04497 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 04498 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 04499 getF32Constant(DAG, 0x3d634a1d)); 04500 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 04501 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 04502 getF32Constant(DAG, 0x3e75fe14)); 04503 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 04504 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, 04505 getF32Constant(DAG, 0x3f317234)); 04506 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); 04507 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, 04508 getF32Constant(DAG, 0x3f800000)); 04509 } 04510 04511 SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX); 04512 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 04513 DAG.getNode(ISD::ADD, dl, MVT::i32, 04514 t13, IntegerPartOfX)); 04515 } 04516 04517 // No special expansion. 04518 return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); 04519 } 04520 04521 04522 /// ExpandPowI - Expand a llvm.powi intrinsic. 04523 static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, 04524 SelectionDAG &DAG) { 04525 // If RHS is a constant, we can expand this out to a multiplication tree, 04526 // otherwise we end up lowering to a call to __powidf2 (for example). When 04527 // optimizing for size, we only want to do this if the expansion would produce 04528 // a small number of multiplies, otherwise we do the full expansion. 04529 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 04530 // Get the exponent as a positive value. 04531 unsigned Val = RHSC->getSExtValue(); 04532 if ((int)Val < 0) Val = -Val; 04533 04534 // powi(x, 0) -> 1.0 04535 if (Val == 0) 04536 return DAG.getConstantFP(1.0, LHS.getValueType()); 04537 04538 const Function *F = DAG.getMachineFunction().getFunction(); 04539 if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, 04540 Attribute::OptimizeForSize) || 04541 // If optimizing for size, don't insert too many multiplies. This 04542 // inserts up to 5 multiplies. 04543 CountPopulation_32(Val)+Log2_32(Val) < 7) { 04544 // We use the simple binary decomposition method to generate the multiply 04545 // sequence. There are more optimal ways to do this (for example, 04546 // powi(x,15) generates one more multiply than it should), but this has 04547 // the benefit of being both really simple and much better than a libcall. 04548 SDValue Res; // Logically starts equal to 1.0 04549 SDValue CurSquare = LHS; 04550 while (Val) { 04551 if (Val & 1) { 04552 if (Res.getNode()) 04553 Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); 04554 else 04555 Res = CurSquare; // 1.0*CurSquare. 04556 } 04557 04558 CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(), 04559 CurSquare, CurSquare); 04560 Val >>= 1; 04561 } 04562 04563 // If the original was negative, invert the result, producing 1/(x*x*x). 04564 if (RHSC->getSExtValue() < 0) 04565 Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), 04566 DAG.getConstantFP(1.0, LHS.getValueType()), Res); 04567 return Res; 04568 } 04569 } 04570 04571 // Otherwise, expand to a libcall. 04572 return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); 04573 } 04574 04575 // getTruncatedArgReg - Find underlying register used for an truncated 04576 // argument. 04577 static unsigned getTruncatedArgReg(const SDValue &N) { 04578 if (N.getOpcode() != ISD::TRUNCATE) 04579 return 0; 04580 04581 const SDValue &Ext = N.getOperand(0); 04582 if (Ext.getOpcode() == ISD::AssertZext || 04583 Ext.getOpcode() == ISD::AssertSext) { 04584 const SDValue &CFR = Ext.getOperand(0); 04585 if (CFR.getOpcode() == ISD::CopyFromReg) 04586 return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); 04587 if (CFR.getOpcode() == ISD::TRUNCATE) 04588 return getTruncatedArgReg(CFR); 04589 } 04590 return 0; 04591 } 04592 04593 /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function 04594 /// argument, create the corresponding DBG_VALUE machine instruction for it now. 04595 /// At the end of instruction selection, they will be inserted to the entry BB. 04596 bool 04597 SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, 04598 int64_t Offset, bool IsIndirect, 04599 const SDValue &N) { 04600 const Argument *Arg = dyn_cast<Argument>(V); 04601 if (!Arg) 04602 return false; 04603 04604 MachineFunction &MF = DAG.getMachineFunction(); 04605 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); 04606 04607 // Ignore inlined function arguments here. 04608 DIVariable DV(Variable); 04609 if (DV.isInlinedFnArgument(MF.getFunction())) 04610 return false; 04611 04612 Optional<MachineOperand> Op; 04613 // Some arguments' frame index is recorded during argument lowering. 04614 if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) 04615 Op = MachineOperand::CreateFI(FI); 04616 04617 if (!Op && N.getNode()) { 04618 unsigned Reg; 04619 if (N.getOpcode() == ISD::CopyFromReg) 04620 Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); 04621 else 04622 Reg = getTruncatedArgReg(N); 04623 if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { 04624 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 04625 unsigned PR = RegInfo.getLiveInPhysReg(Reg); 04626 if (PR) 04627 Reg = PR; 04628 } 04629 if (Reg) 04630 Op = MachineOperand::CreateReg(Reg, false); 04631 } 04632 04633 if (!Op) { 04634 // Check if ValueMap has reg number. 04635 DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); 04636 if (VMI != FuncInfo.ValueMap.end()) 04637 Op = MachineOperand::CreateReg(VMI->second, false); 04638 } 04639 04640 if (!Op && N.getNode()) 04641 // Check if frame index is available. 04642 if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) 04643 if (FrameIndexSDNode *FINode = 04644 dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) 04645 Op = MachineOperand::CreateFI(FINode->getIndex()); 04646 04647 if (!Op) 04648 return false; 04649 04650 if (Op->isReg()) 04651 FuncInfo.ArgDbgValues.push_back(BuildMI(MF, getCurDebugLoc(), 04652 TII->get(TargetOpcode::DBG_VALUE), 04653 IsIndirect, 04654 Op->getReg(), Offset, Variable)); 04655 else 04656 FuncInfo.ArgDbgValues.push_back( 04657 BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) 04658 .addOperand(*Op).addImm(Offset).addMetadata(Variable)); 04659 04660 return true; 04661 } 04662 04663 // VisualStudio defines setjmp as _setjmp 04664 #if defined(_MSC_VER) && defined(setjmp) && \ 04665 !defined(setjmp_undefined_for_msvc) 04666 # pragma push_macro("setjmp") 04667 # undef setjmp 04668 # define setjmp_undefined_for_msvc 04669 #endif 04670 04671 /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If 04672 /// we want to emit this as a call to a named external function, return the name 04673 /// otherwise lower it and return null. 04674 const char * 04675 SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { 04676 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 04677 SDLoc sdl = getCurSDLoc(); 04678 DebugLoc dl = getCurDebugLoc(); 04679 SDValue Res; 04680 04681 switch (Intrinsic) { 04682 default: 04683 // By default, turn this into a target intrinsic node. 04684 visitTargetIntrinsic(I, Intrinsic); 04685 return nullptr; 04686 case Intrinsic::vastart: visitVAStart(I); return nullptr; 04687 case Intrinsic::vaend: visitVAEnd(I); return nullptr; 04688 case Intrinsic::vacopy: visitVACopy(I); return nullptr; 04689 case Intrinsic::returnaddress: 04690 setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI->getPointerTy(), 04691 getValue(I.getArgOperand(0)))); 04692 return nullptr; 04693 case Intrinsic::frameaddress: 04694 setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), 04695 getValue(I.getArgOperand(0)))); 04696 return nullptr; 04697 case Intrinsic::read_register: { 04698 Value *Reg = I.getArgOperand(0); 04699 SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg)); 04700 EVT VT = 04701 TM.getSubtargetImpl()->getTargetLowering()->getValueType(I.getType()); 04702 setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName)); 04703 return nullptr; 04704 } 04705 case Intrinsic::write_register: { 04706 Value *Reg = I.getArgOperand(0); 04707 Value *RegValue = I.getArgOperand(1); 04708 SDValue Chain = getValue(RegValue).getOperand(0); 04709 SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg)); 04710 DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, 04711 RegName, getValue(RegValue))); 04712 return nullptr; 04713 } 04714 case Intrinsic::setjmp: 04715 return &"_setjmp"[!TLI->usesUnderscoreSetJmp()]; 04716 case Intrinsic::longjmp: 04717 return &"_longjmp"[!TLI->usesUnderscoreLongJmp()]; 04718 case Intrinsic::memcpy: { 04719 // Assert for address < 256 since we support only user defined address 04720 // spaces. 04721 assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() 04722 < 256 && 04723 cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() 04724 < 256 && 04725 "Unknown address space"); 04726 SDValue Op1 = getValue(I.getArgOperand(0)); 04727 SDValue Op2 = getValue(I.getArgOperand(1)); 04728 SDValue Op3 = getValue(I.getArgOperand(2)); 04729 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); 04730 if (!Align) 04731 Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. 04732 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); 04733 DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, 04734 MachinePointerInfo(I.getArgOperand(0)), 04735 MachinePointerInfo(I.getArgOperand(1)))); 04736 return nullptr; 04737 } 04738 case Intrinsic::memset: { 04739 // Assert for address < 256 since we support only user defined address 04740 // spaces. 04741 assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() 04742 < 256 && 04743 "Unknown address space"); 04744 SDValue Op1 = getValue(I.getArgOperand(0)); 04745 SDValue Op2 = getValue(I.getArgOperand(1)); 04746 SDValue Op3 = getValue(I.getArgOperand(2)); 04747 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); 04748 if (!Align) 04749 Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. 04750 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); 04751 DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, 04752 MachinePointerInfo(I.getArgOperand(0)))); 04753 return nullptr; 04754 } 04755 case Intrinsic::memmove: { 04756 // Assert for address < 256 since we support only user defined address 04757 // spaces. 04758 assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() 04759 < 256 && 04760 cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() 04761 < 256 && 04762 "Unknown address space"); 04763 SDValue Op1 = getValue(I.getArgOperand(0)); 04764 SDValue Op2 = getValue(I.getArgOperand(1)); 04765 SDValue Op3 = getValue(I.getArgOperand(2)); 04766 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); 04767 if (!Align) 04768 Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. 04769 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); 04770 DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, 04771 MachinePointerInfo(I.getArgOperand(0)), 04772 MachinePointerInfo(I.getArgOperand(1)))); 04773 return nullptr; 04774 } 04775 case Intrinsic::dbg_declare: { 04776 const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); 04777 MDNode *Variable = DI.getVariable(); 04778 const Value *Address = DI.getAddress(); 04779 DIVariable DIVar(Variable); 04780 assert((!DIVar || DIVar.isVariable()) && 04781 "Variable in DbgDeclareInst should be either null or a DIVariable."); 04782 if (!Address || !DIVar) { 04783 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 04784 return nullptr; 04785 } 04786 04787 // Check if address has undef value. 04788 if (isa<UndefValue>(Address) || 04789 (Address->use_empty() && !isa<Argument>(Address))) { 04790 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 04791 return nullptr; 04792 } 04793 04794 SDValue &N = NodeMap[Address]; 04795 if (!N.getNode() && isa<Argument>(Address)) 04796 // Check unused arguments map. 04797 N = UnusedArgNodeMap[Address]; 04798 SDDbgValue *SDV; 04799 if (N.getNode()) { 04800 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) 04801 Address = BCI->getOperand(0); 04802 // Parameters are handled specially. 04803 bool isParameter = 04804 (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable || 04805 isa<Argument>(Address)); 04806 04807 const AllocaInst *AI = dyn_cast<AllocaInst>(Address); 04808 04809 if (isParameter && !AI) { 04810 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); 04811 if (FINode) 04812 // Byval parameter. We have a frame index at this point. 04813 SDV = DAG.getFrameIndexDbgValue(Variable, FINode->getIndex(), 04814 0, dl, SDNodeOrder); 04815 else { 04816 // Address is an argument, so try to emit its dbg value using 04817 // virtual register info from the FuncInfo.ValueMap. 04818 EmitFuncArgumentDbgValue(Address, Variable, 0, false, N); 04819 return nullptr; 04820 } 04821 } else if (AI) 04822 SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), 04823 true, 0, dl, SDNodeOrder); 04824 else { 04825 // Can't do anything with other non-AI cases yet. 04826 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 04827 DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); 04828 DEBUG(Address->dump()); 04829 return nullptr; 04830 } 04831 DAG.AddDbgValue(SDV, N.getNode(), isParameter); 04832 } else { 04833 // If Address is an argument then try to emit its dbg value using 04834 // virtual register info from the FuncInfo.ValueMap. 04835 if (!EmitFuncArgumentDbgValue(Address, Variable, 0, false, N)) { 04836 // If variable is pinned by a alloca in dominating bb then 04837 // use StaticAllocaMap. 04838 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { 04839 if (AI->getParent() != DI.getParent()) { 04840 DenseMap<const AllocaInst*, int>::iterator SI = 04841 FuncInfo.StaticAllocaMap.find(AI); 04842 if (SI != FuncInfo.StaticAllocaMap.end()) { 04843 SDV = DAG.getFrameIndexDbgValue(Variable, SI->second, 04844 0, dl, SDNodeOrder); 04845 DAG.AddDbgValue(SDV, nullptr, false); 04846 return nullptr; 04847 } 04848 } 04849 } 04850 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 04851 } 04852 } 04853 return nullptr; 04854 } 04855 case Intrinsic::dbg_value: { 04856 const DbgValueInst &DI = cast<DbgValueInst>(I); 04857 DIVariable DIVar(DI.getVariable()); 04858 assert((!DIVar || DIVar.isVariable()) && 04859 "Variable in DbgValueInst should be either null or a DIVariable."); 04860 if (!DIVar) 04861 return nullptr; 04862 04863 MDNode *Variable = DI.getVariable(); 04864 uint64_t Offset = DI.getOffset(); 04865 const Value *V = DI.getValue(); 04866 if (!V) 04867 return nullptr; 04868 04869 SDDbgValue *SDV; 04870 if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { 04871 SDV = DAG.getConstantDbgValue(Variable, V, Offset, dl, SDNodeOrder); 04872 DAG.AddDbgValue(SDV, nullptr, false); 04873 } else { 04874 // Do not use getValue() in here; we don't want to generate code at 04875 // this point if it hasn't been done yet. 04876 SDValue N = NodeMap[V]; 04877 if (!N.getNode() && isa<Argument>(V)) 04878 // Check unused arguments map. 04879 N = UnusedArgNodeMap[V]; 04880 if (N.getNode()) { 04881 // A dbg.value for an alloca is always indirect. 04882 bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; 04883 if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, N)) { 04884 SDV = DAG.getDbgValue(Variable, N.getNode(), 04885 N.getResNo(), IsIndirect, 04886 Offset, dl, SDNodeOrder); 04887 DAG.AddDbgValue(SDV, N.getNode(), false); 04888 } 04889 } else if (!V->use_empty() ) { 04890 // Do not call getValue(V) yet, as we don't want to generate code. 04891 // Remember it for later. 04892 DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); 04893 DanglingDebugInfoMap[V] = DDI; 04894 } else { 04895 // We may expand this to cover more cases. One case where we have no 04896 // data available is an unreferenced parameter. 04897 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 04898 } 04899 } 04900 04901 // Build a debug info table entry. 04902 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V)) 04903 V = BCI->getOperand(0); 04904 const AllocaInst *AI = dyn_cast<AllocaInst>(V); 04905 // Don't handle byval struct arguments or VLAs, for example. 04906 if (!AI) { 04907 DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); 04908 DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); 04909 return nullptr; 04910 } 04911 DenseMap<const AllocaInst*, int>::iterator SI = 04912 FuncInfo.StaticAllocaMap.find(AI); 04913 if (SI == FuncInfo.StaticAllocaMap.end()) 04914 return nullptr; // VLAs. 04915 return nullptr; 04916 } 04917 04918 case Intrinsic::eh_typeid_for: { 04919 // Find the type id for the given typeinfo. 04920 GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0)); 04921 unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); 04922 Res = DAG.getConstant(TypeID, MVT::i32); 04923 setValue(&I, Res); 04924 return nullptr; 04925 } 04926 04927 case Intrinsic::eh_return_i32: 04928 case Intrinsic::eh_return_i64: 04929 DAG.getMachineFunction().getMMI().setCallsEHReturn(true); 04930 DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl, 04931 MVT::Other, 04932 getControlRoot(), 04933 getValue(I.getArgOperand(0)), 04934 getValue(I.getArgOperand(1)))); 04935 return nullptr; 04936 case Intrinsic::eh_unwind_init: 04937 DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); 04938 return nullptr; 04939 case Intrinsic::eh_dwarf_cfa: { 04940 SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, 04941 TLI->getPointerTy()); 04942 SDValue Offset = DAG.getNode(ISD::ADD, sdl, 04943 CfaArg.getValueType(), 04944 DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, 04945 CfaArg.getValueType()), 04946 CfaArg); 04947 SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, 04948 TLI->getPointerTy(), 04949 DAG.getConstant(0, TLI->getPointerTy())); 04950 setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), 04951 FA, Offset)); 04952 return nullptr; 04953 } 04954 case Intrinsic::eh_sjlj_callsite: { 04955 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 04956 ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); 04957 assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); 04958 assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); 04959 04960 MMI.setCurrentCallSite(CI->getZExtValue()); 04961 return nullptr; 04962 } 04963 case Intrinsic::eh_sjlj_functioncontext: { 04964 // Get and store the index of the function context. 04965 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 04966 AllocaInst *FnCtx = 04967 cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); 04968 int FI = FuncInfo.StaticAllocaMap[FnCtx]; 04969 MFI->setFunctionContextIndex(FI); 04970 return nullptr; 04971 } 04972 case Intrinsic::eh_sjlj_setjmp: { 04973 SDValue Ops[2]; 04974 Ops[0] = getRoot(); 04975 Ops[1] = getValue(I.getArgOperand(0)); 04976 SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl, 04977 DAG.getVTList(MVT::i32, MVT::Other), Ops); 04978 setValue(&I, Op.getValue(0)); 04979 DAG.setRoot(Op.getValue(1)); 04980 return nullptr; 04981 } 04982 case Intrinsic::eh_sjlj_longjmp: { 04983 DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, 04984 getRoot(), getValue(I.getArgOperand(0)))); 04985 return nullptr; 04986 } 04987 04988 case Intrinsic::x86_mmx_pslli_w: 04989 case Intrinsic::x86_mmx_pslli_d: 04990 case Intrinsic::x86_mmx_pslli_q: 04991 case Intrinsic::x86_mmx_psrli_w: 04992 case Intrinsic::x86_mmx_psrli_d: 04993 case Intrinsic::x86_mmx_psrli_q: 04994 case Intrinsic::x86_mmx_psrai_w: 04995 case Intrinsic::x86_mmx_psrai_d: { 04996 SDValue ShAmt = getValue(I.getArgOperand(1)); 04997 if (isa<ConstantSDNode>(ShAmt)) { 04998 visitTargetIntrinsic(I, Intrinsic); 04999 return nullptr; 05000 } 05001 unsigned NewIntrinsic = 0; 05002 EVT ShAmtVT = MVT::v2i32; 05003 switch (Intrinsic) { 05004 case Intrinsic::x86_mmx_pslli_w: 05005 NewIntrinsic = Intrinsic::x86_mmx_psll_w; 05006 break; 05007 case Intrinsic::x86_mmx_pslli_d: 05008 NewIntrinsic = Intrinsic::x86_mmx_psll_d; 05009 break; 05010 case Intrinsic::x86_mmx_pslli_q: 05011 NewIntrinsic = Intrinsic::x86_mmx_psll_q; 05012 break; 05013 case Intrinsic::x86_mmx_psrli_w: 05014 NewIntrinsic = Intrinsic::x86_mmx_psrl_w; 05015 break; 05016 case Intrinsic::x86_mmx_psrli_d: 05017 NewIntrinsic = Intrinsic::x86_mmx_psrl_d; 05018 break; 05019 case Intrinsic::x86_mmx_psrli_q: 05020 NewIntrinsic = Intrinsic::x86_mmx_psrl_q; 05021 break; 05022 case Intrinsic::x86_mmx_psrai_w: 05023 NewIntrinsic = Intrinsic::x86_mmx_psra_w; 05024 break; 05025 case Intrinsic::x86_mmx_psrai_d: 05026 NewIntrinsic = Intrinsic::x86_mmx_psra_d; 05027 break; 05028 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 05029 } 05030 05031 // The vector shift intrinsics with scalars uses 32b shift amounts but 05032 // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits 05033 // to be zero. 05034 // We must do this early because v2i32 is not a legal type. 05035 SDValue ShOps[2]; 05036 ShOps[0] = ShAmt; 05037 ShOps[1] = DAG.getConstant(0, MVT::i32); 05038 ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); 05039 EVT DestVT = TLI->getValueType(I.getType()); 05040 ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); 05041 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, 05042 DAG.getConstant(NewIntrinsic, MVT::i32), 05043 getValue(I.getArgOperand(0)), ShAmt); 05044 setValue(&I, Res); 05045 return nullptr; 05046 } 05047 case Intrinsic::x86_avx_vinsertf128_pd_256: 05048 case Intrinsic::x86_avx_vinsertf128_ps_256: 05049 case Intrinsic::x86_avx_vinsertf128_si_256: 05050 case Intrinsic::x86_avx2_vinserti128: { 05051 EVT DestVT = TLI->getValueType(I.getType()); 05052 EVT ElVT = TLI->getValueType(I.getArgOperand(1)->getType()); 05053 uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * 05054 ElVT.getVectorNumElements(); 05055 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, 05056 getValue(I.getArgOperand(0)), 05057 getValue(I.getArgOperand(1)), 05058 DAG.getConstant(Idx, TLI->getVectorIdxTy())); 05059 setValue(&I, Res); 05060 return nullptr; 05061 } 05062 case Intrinsic::x86_avx_vextractf128_pd_256: 05063 case Intrinsic::x86_avx_vextractf128_ps_256: 05064 case Intrinsic::x86_avx_vextractf128_si_256: 05065 case Intrinsic::x86_avx2_vextracti128: { 05066 EVT DestVT = TLI->getValueType(I.getType()); 05067 uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * 05068 DestVT.getVectorNumElements(); 05069 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT, 05070 getValue(I.getArgOperand(0)), 05071 DAG.getConstant(Idx, TLI->getVectorIdxTy())); 05072 setValue(&I, Res); 05073 return nullptr; 05074 } 05075 case Intrinsic::convertff: 05076 case Intrinsic::convertfsi: 05077 case Intrinsic::convertfui: 05078 case Intrinsic::convertsif: 05079 case Intrinsic::convertuif: 05080 case Intrinsic::convertss: 05081 case Intrinsic::convertsu: 05082 case Intrinsic::convertus: 05083 case Intrinsic::convertuu: { 05084 ISD::CvtCode Code = ISD::CVT_INVALID; 05085 switch (Intrinsic) { 05086 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 05087 case Intrinsic::convertff: Code = ISD::CVT_FF; break; 05088 case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; 05089 case Intrinsic::convertfui: Code = ISD::CVT_FU; break; 05090 case Intrinsic::convertsif: Code = ISD::CVT_SF; break; 05091 case Intrinsic::convertuif: Code = ISD::CVT_UF; break; 05092 case Intrinsic::convertss: Code = ISD::CVT_SS; break; 05093 case Intrinsic::convertsu: Code = ISD::CVT_SU; break; 05094 case Intrinsic::convertus: Code = ISD::CVT_US; break; 05095 case Intrinsic::convertuu: Code = ISD::CVT_UU; break; 05096 } 05097 EVT DestVT = TLI->getValueType(I.getType()); 05098 const Value *Op1 = I.getArgOperand(0); 05099 Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), 05100 DAG.getValueType(DestVT), 05101 DAG.getValueType(getValue(Op1).getValueType()), 05102 getValue(I.getArgOperand(1)), 05103 getValue(I.getArgOperand(2)), 05104 Code); 05105 setValue(&I, Res); 05106 return nullptr; 05107 } 05108 case Intrinsic::powi: 05109 setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), 05110 getValue(I.getArgOperand(1)), DAG)); 05111 return nullptr; 05112 case Intrinsic::log: 05113 setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); 05114 return nullptr; 05115 case Intrinsic::log2: 05116 setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); 05117 return nullptr; 05118 case Intrinsic::log10: 05119 setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); 05120 return nullptr; 05121 case Intrinsic::exp: 05122 setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); 05123 return nullptr; 05124 case Intrinsic::exp2: 05125 setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); 05126 return nullptr; 05127 case Intrinsic::pow: 05128 setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), 05129 getValue(I.getArgOperand(1)), DAG, *TLI)); 05130 return nullptr; 05131 case Intrinsic::sqrt: 05132 case Intrinsic::fabs: 05133 case Intrinsic::sin: 05134 case Intrinsic::cos: 05135 case Intrinsic::floor: 05136 case Intrinsic::ceil: 05137 case Intrinsic::trunc: 05138 case Intrinsic::rint: 05139 case Intrinsic::nearbyint: 05140 case Intrinsic::round: { 05141 unsigned Opcode; 05142 switch (Intrinsic) { 05143 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 05144 case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; 05145 case Intrinsic::fabs: Opcode = ISD::FABS; break; 05146 case Intrinsic::sin: Opcode = ISD::FSIN; break; 05147 case Intrinsic::cos: Opcode = ISD::FCOS; break; 05148 case Intrinsic::floor: Opcode = ISD::FFLOOR; break; 05149 case Intrinsic::ceil: Opcode = ISD::FCEIL; break; 05150 case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; 05151 case Intrinsic::rint: Opcode = ISD::FRINT; break; 05152 case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; 05153 case Intrinsic::round: Opcode = ISD::FROUND; break; 05154 } 05155 05156 setValue(&I, DAG.getNode(Opcode, sdl, 05157 getValue(I.getArgOperand(0)).getValueType(), 05158 getValue(I.getArgOperand(0)))); 05159 return nullptr; 05160 } 05161 case Intrinsic::copysign: 05162 setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, 05163 getValue(I.getArgOperand(0)).getValueType(), 05164 getValue(I.getArgOperand(0)), 05165 getValue(I.getArgOperand(1)))); 05166 return nullptr; 05167 case Intrinsic::fma: 05168 setValue(&I, DAG.getNode(ISD::FMA, sdl, 05169 getValue(I.getArgOperand(0)).getValueType(), 05170 getValue(I.getArgOperand(0)), 05171 getValue(I.getArgOperand(1)), 05172 getValue(I.getArgOperand(2)))); 05173 return nullptr; 05174 case Intrinsic::fmuladd: { 05175 EVT VT = TLI->getValueType(I.getType()); 05176 if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && 05177 TLI->isFMAFasterThanFMulAndFAdd(VT)) { 05178 setValue(&I, DAG.getNode(ISD::FMA, sdl, 05179 getValue(I.getArgOperand(0)).getValueType(), 05180 getValue(I.getArgOperand(0)), 05181 getValue(I.getArgOperand(1)), 05182 getValue(I.getArgOperand(2)))); 05183 } else { 05184 SDValue Mul = DAG.getNode(ISD::FMUL, sdl, 05185 getValue(I.getArgOperand(0)).getValueType(), 05186 getValue(I.getArgOperand(0)), 05187 getValue(I.getArgOperand(1))); 05188 SDValue Add = DAG.getNode(ISD::FADD, sdl, 05189 getValue(I.getArgOperand(0)).getValueType(), 05190 Mul, 05191 getValue(I.getArgOperand(2))); 05192 setValue(&I, Add); 05193 } 05194 return nullptr; 05195 } 05196 case Intrinsic::convert_to_fp16: 05197 setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, 05198 DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, 05199 getValue(I.getArgOperand(0)), 05200 DAG.getTargetConstant(0, MVT::i32)))); 05201 return nullptr; 05202 case Intrinsic::convert_from_fp16: 05203 setValue(&I, 05204 DAG.getNode(ISD::FP_EXTEND, sdl, TLI->getValueType(I.getType()), 05205 DAG.getNode(ISD::BITCAST, sdl, MVT::f16, 05206 getValue(I.getArgOperand(0))))); 05207 return nullptr; 05208 case Intrinsic::pcmarker: { 05209 SDValue Tmp = getValue(I.getArgOperand(0)); 05210 DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); 05211 return nullptr; 05212 } 05213 case Intrinsic::readcyclecounter: { 05214 SDValue Op = getRoot(); 05215 Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl, 05216 DAG.getVTList(MVT::i64, MVT::Other), Op); 05217 setValue(&I, Res); 05218 DAG.setRoot(Res.getValue(1)); 05219 return nullptr; 05220 } 05221 case Intrinsic::bswap: 05222 setValue(&I, DAG.getNode(ISD::BSWAP, sdl, 05223 getValue(I.getArgOperand(0)).getValueType(), 05224 getValue(I.getArgOperand(0)))); 05225 return nullptr; 05226 case Intrinsic::cttz: { 05227 SDValue Arg = getValue(I.getArgOperand(0)); 05228 ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); 05229 EVT Ty = Arg.getValueType(); 05230 setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, 05231 sdl, Ty, Arg)); 05232 return nullptr; 05233 } 05234 case Intrinsic::ctlz: { 05235 SDValue Arg = getValue(I.getArgOperand(0)); 05236 ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); 05237 EVT Ty = Arg.getValueType(); 05238 setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, 05239 sdl, Ty, Arg)); 05240 return nullptr; 05241 } 05242 case Intrinsic::ctpop: { 05243 SDValue Arg = getValue(I.getArgOperand(0)); 05244 EVT Ty = Arg.getValueType(); 05245 setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); 05246 return nullptr; 05247 } 05248 case Intrinsic::stacksave: { 05249 SDValue Op = getRoot(); 05250 Res = DAG.getNode(ISD::STACKSAVE, sdl, 05251 DAG.getVTList(TLI->getPointerTy(), MVT::Other), Op); 05252 setValue(&I, Res); 05253 DAG.setRoot(Res.getValue(1)); 05254 return nullptr; 05255 } 05256 case Intrinsic::stackrestore: { 05257 Res = getValue(I.getArgOperand(0)); 05258 DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); 05259 return nullptr; 05260 } 05261 case Intrinsic::stackprotector: { 05262 // Emit code into the DAG to store the stack guard onto the stack. 05263 MachineFunction &MF = DAG.getMachineFunction(); 05264 MachineFrameInfo *MFI = MF.getFrameInfo(); 05265 EVT PtrTy = TLI->getPointerTy(); 05266 SDValue Src, Chain = getRoot(); 05267 const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand(); 05268 const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr); 05269 05270 // See if Ptr is a bitcast. If it is, look through it and see if we can get 05271 // global variable __stack_chk_guard. 05272 if (!GV) 05273 if (const Operator *BC = dyn_cast<Operator>(Ptr)) 05274 if (BC->getOpcode() == Instruction::BitCast) 05275 GV = dyn_cast<GlobalVariable>(BC->getOperand(0)); 05276 05277 if (GV && TLI->useLoadStackGuardNode()) { 05278 // Emit a LOAD_STACK_GUARD node. 05279 MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, 05280 sdl, PtrTy, Chain); 05281 MachinePointerInfo MPInfo(GV); 05282 MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); 05283 unsigned Flags = MachineMemOperand::MOLoad | 05284 MachineMemOperand::MOInvariant; 05285 *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, 05286 PtrTy.getSizeInBits() / 8, 05287 DAG.getEVTAlignment(PtrTy)); 05288 Node->setMemRefs(MemRefs, MemRefs + 1); 05289 05290 // Copy the guard value to a virtual register so that it can be 05291 // retrieved in the epilogue. 05292 Src = SDValue(Node, 0); 05293 const TargetRegisterClass *RC = 05294 TLI->getRegClassFor(Src.getSimpleValueType()); 05295 unsigned Reg = MF.getRegInfo().createVirtualRegister(RC); 05296 05297 SPDescriptor.setGuardReg(Reg); 05298 Chain = DAG.getCopyToReg(Chain, sdl, Reg, Src); 05299 } else { 05300 Src = getValue(I.getArgOperand(0)); // The guard's value. 05301 } 05302 05303 AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); 05304 05305 int FI = FuncInfo.StaticAllocaMap[Slot]; 05306 MFI->setStackProtectorIndex(FI); 05307 05308 SDValue FIN = DAG.getFrameIndex(FI, PtrTy); 05309 05310 // Store the stack protector onto the stack. 05311 Res = DAG.getStore(Chain, sdl, Src, FIN, 05312 MachinePointerInfo::getFixedStack(FI), 05313 true, false, 0); 05314 setValue(&I, Res); 05315 DAG.setRoot(Res); 05316 return nullptr; 05317 } 05318 case Intrinsic::objectsize: { 05319 // If we don't know by now, we're never going to know. 05320 ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); 05321 05322 assert(CI && "Non-constant type in __builtin_object_size?"); 05323 05324 SDValue Arg = getValue(I.getCalledValue()); 05325 EVT Ty = Arg.getValueType(); 05326 05327 if (CI->isZero()) 05328 Res = DAG.getConstant(-1ULL, Ty); 05329 else 05330 Res = DAG.getConstant(0, Ty); 05331 05332 setValue(&I, Res); 05333 return nullptr; 05334 } 05335 case Intrinsic::annotation: 05336 case Intrinsic::ptr_annotation: 05337 // Drop the intrinsic, but forward the value 05338 setValue(&I, getValue(I.getOperand(0))); 05339 return nullptr; 05340 case Intrinsic::assume: 05341 case Intrinsic::var_annotation: 05342 // Discard annotate attributes and assumptions 05343 return nullptr; 05344 05345 case Intrinsic::init_trampoline: { 05346 const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); 05347 05348 SDValue Ops[6]; 05349 Ops[0] = getRoot(); 05350 Ops[1] = getValue(I.getArgOperand(0)); 05351 Ops[2] = getValue(I.getArgOperand(1)); 05352 Ops[3] = getValue(I.getArgOperand(2)); 05353 Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); 05354 Ops[5] = DAG.getSrcValue(F); 05355 05356 Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops); 05357 05358 DAG.setRoot(Res); 05359 return nullptr; 05360 } 05361 case Intrinsic::adjust_trampoline: { 05362 setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, 05363 TLI->getPointerTy(), 05364 getValue(I.getArgOperand(0)))); 05365 return nullptr; 05366 } 05367 case Intrinsic::gcroot: 05368 if (GFI) { 05369 const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); 05370 const Constant *TypeMap = cast<Constant>(I.getArgOperand(1)); 05371 05372 FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); 05373 GFI->addStackRoot(FI->getIndex(), TypeMap); 05374 } 05375 return nullptr; 05376 case Intrinsic::gcread: 05377 case Intrinsic::gcwrite: 05378 llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); 05379 case Intrinsic::flt_rounds: 05380 setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); 05381 return nullptr; 05382 05383 case Intrinsic::expect: { 05384 // Just replace __builtin_expect(exp, c) with EXP. 05385 setValue(&I, getValue(I.getArgOperand(0))); 05386 return nullptr; 05387 } 05388 05389 case Intrinsic::debugtrap: 05390 case Intrinsic::trap: { 05391 StringRef TrapFuncName = TM.Options.getTrapFunctionName(); 05392 if (TrapFuncName.empty()) { 05393 ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? 05394 ISD::TRAP : ISD::DEBUGTRAP; 05395 DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); 05396 return nullptr; 05397 } 05398 TargetLowering::ArgListTy Args; 05399 05400 TargetLowering::CallLoweringInfo CLI(DAG); 05401 CLI.setDebugLoc(sdl).setChain(getRoot()) 05402 .setCallee(CallingConv::C, I.getType(), 05403 DAG.getExternalSymbol(TrapFuncName.data(), TLI->getPointerTy()), 05404 std::move(Args), 0); 05405 05406 std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI); 05407 DAG.setRoot(Result.second); 05408 return nullptr; 05409 } 05410 05411 case Intrinsic::uadd_with_overflow: 05412 case Intrinsic::sadd_with_overflow: 05413 case Intrinsic::usub_with_overflow: 05414 case Intrinsic::ssub_with_overflow: 05415 case Intrinsic::umul_with_overflow: 05416 case Intrinsic::smul_with_overflow: { 05417 ISD::NodeType Op; 05418 switch (Intrinsic) { 05419 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 05420 case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; 05421 case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; 05422 case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; 05423 case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; 05424 case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; 05425 case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; 05426 } 05427 SDValue Op1 = getValue(I.getArgOperand(0)); 05428 SDValue Op2 = getValue(I.getArgOperand(1)); 05429 05430 SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); 05431 setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); 05432 return nullptr; 05433 } 05434 case Intrinsic::prefetch: { 05435 SDValue Ops[5]; 05436 unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); 05437 Ops[0] = getRoot(); 05438 Ops[1] = getValue(I.getArgOperand(0)); 05439 Ops[2] = getValue(I.getArgOperand(1)); 05440 Ops[3] = getValue(I.getArgOperand(2)); 05441 Ops[4] = getValue(I.getArgOperand(3)); 05442 DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, 05443 DAG.getVTList(MVT::Other), Ops, 05444 EVT::getIntegerVT(*Context, 8), 05445 MachinePointerInfo(I.getArgOperand(0)), 05446 0, /* align */ 05447 false, /* volatile */ 05448 rw==0, /* read */ 05449 rw==1)); /* write */ 05450 return nullptr; 05451 } 05452 case Intrinsic::lifetime_start: 05453 case Intrinsic::lifetime_end: { 05454 bool IsStart = (Intrinsic == Intrinsic::lifetime_start); 05455 // Stack coloring is not enabled in O0, discard region information. 05456 if (TM.getOptLevel() == CodeGenOpt::None) 05457 return nullptr; 05458 05459 SmallVector<Value *, 4> Allocas; 05460 GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL); 05461 05462 for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), 05463 E = Allocas.end(); Object != E; ++Object) { 05464 AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); 05465 05466 // Could not find an Alloca. 05467 if (!LifetimeObject) 05468 continue; 05469 05470 int FI = FuncInfo.StaticAllocaMap[LifetimeObject]; 05471 05472 SDValue Ops[2]; 05473 Ops[0] = getRoot(); 05474 Ops[1] = DAG.getFrameIndex(FI, TLI->getPointerTy(), true); 05475 unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); 05476 05477 Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); 05478 DAG.setRoot(Res); 05479 } 05480 return nullptr; 05481 } 05482 case Intrinsic::invariant_start: 05483 // Discard region information. 05484 setValue(&I, DAG.getUNDEF(TLI->getPointerTy())); 05485 return nullptr; 05486 case Intrinsic::invariant_end: 05487 // Discard region information. 05488 return nullptr; 05489 case Intrinsic::stackprotectorcheck: { 05490 // Do not actually emit anything for this basic block. Instead we initialize 05491 // the stack protector descriptor and export the guard variable so we can 05492 // access it in FinishBasicBlock. 05493 const BasicBlock *BB = I.getParent(); 05494 SPDescriptor.initialize(BB, FuncInfo.MBBMap[BB], I); 05495 ExportFromCurrentBlock(SPDescriptor.getGuard()); 05496 05497 // Flush our exports since we are going to process a terminator. 05498 (void)getControlRoot(); 05499 return nullptr; 05500 } 05501 case Intrinsic::clear_cache: 05502 return TLI->getClearCacheBuiltinName(); 05503 case Intrinsic::donothing: 05504 // ignore 05505 return nullptr; 05506 case Intrinsic::experimental_stackmap: { 05507 visitStackmap(I); 05508 return nullptr; 05509 } 05510 case Intrinsic::experimental_patchpoint_void: 05511 case Intrinsic::experimental_patchpoint_i64: { 05512 visitPatchpoint(I); 05513 return nullptr; 05514 } 05515 } 05516 } 05517 05518 void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, 05519 bool isTailCall, 05520 MachineBasicBlock *LandingPad) { 05521 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 05522 PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 05523 FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 05524 Type *RetTy = FTy->getReturnType(); 05525 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 05526 MCSymbol *BeginLabel = nullptr; 05527 05528 TargetLowering::ArgListTy Args; 05529 TargetLowering::ArgListEntry Entry; 05530 Args.reserve(CS.arg_size()); 05531 05532 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 05533 i != e; ++i) { 05534 const Value *V = *i; 05535 05536 // Skip empty types 05537 if (V->getType()->isEmptyTy()) 05538 continue; 05539 05540 SDValue ArgNode = getValue(V); 05541 Entry.Node = ArgNode; Entry.Ty = V->getType(); 05542 05543 // Skip the first return-type Attribute to get to params. 05544 Entry.setAttributes(&CS, i - CS.arg_begin() + 1); 05545 Args.push_back(Entry); 05546 } 05547 05548 if (LandingPad) { 05549 // Insert a label before the invoke call to mark the try range. This can be 05550 // used to detect deletion of the invoke via the MachineModuleInfo. 05551 BeginLabel = MMI.getContext().CreateTempSymbol(); 05552 05553 // For SjLj, keep track of which landing pads go with which invokes 05554 // so as to maintain the ordering of pads in the LSDA. 05555 unsigned CallSiteIndex = MMI.getCurrentCallSite(); 05556 if (CallSiteIndex) { 05557 MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); 05558 LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex); 05559 05560 // Now that the call site is handled, stop tracking it. 05561 MMI.setCurrentCallSite(0); 05562 } 05563 05564 // Both PendingLoads and PendingExports must be flushed here; 05565 // this call might not return. 05566 (void)getRoot(); 05567 DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel)); 05568 } 05569 05570 // Check if target-independent constraints permit a tail call here. 05571 // Target-dependent constraints are checked within TLI->LowerCallTo. 05572 if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) 05573 isTailCall = false; 05574 05575 TargetLowering::CallLoweringInfo CLI(DAG); 05576 CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) 05577 .setCallee(RetTy, FTy, Callee, std::move(Args), CS).setTailCall(isTailCall); 05578 05579 std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI); 05580 assert((isTailCall || Result.second.getNode()) && 05581 "Non-null chain expected with non-tail call!"); 05582 assert((Result.second.getNode() || !Result.first.getNode()) && 05583 "Null value expected with tail call!"); 05584 if (Result.first.getNode()) 05585 setValue(CS.getInstruction(), Result.first); 05586 05587 if (!Result.second.getNode()) { 05588 // As a special case, a null chain means that a tail call has been emitted 05589 // and the DAG root is already updated. 05590 HasTailCall = true; 05591 05592 // Since there's no actual continuation from this block, nothing can be 05593 // relying on us setting vregs for them. 05594 PendingExports.clear(); 05595 } else { 05596 DAG.setRoot(Result.second); 05597 } 05598 05599 if (LandingPad) { 05600 // Insert a label at the end of the invoke call to mark the try range. This 05601 // can be used to detect deletion of the invoke via the MachineModuleInfo. 05602 MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); 05603 DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); 05604 05605 // Inform MachineModuleInfo of range. 05606 MMI.addInvoke(LandingPad, BeginLabel, EndLabel); 05607 } 05608 } 05609 05610 /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the 05611 /// value is equal or not-equal to zero. 05612 static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { 05613 for (const User *U : V->users()) { 05614 if (const ICmpInst *IC = dyn_cast<ICmpInst>(U)) 05615 if (IC->isEquality()) 05616 if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1))) 05617 if (C->isNullValue()) 05618 continue; 05619 // Unknown instruction. 05620 return false; 05621 } 05622 return true; 05623 } 05624 05625 static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, 05626 Type *LoadTy, 05627 SelectionDAGBuilder &Builder) { 05628 05629 // Check to see if this load can be trivially constant folded, e.g. if the 05630 // input is from a string literal. 05631 if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { 05632 // Cast pointer to the type we really want to load. 05633 LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), 05634 PointerType::getUnqual(LoadTy)); 05635 05636 if (const Constant *LoadCst = 05637 ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), 05638 Builder.DL)) 05639 return Builder.getValue(LoadCst); 05640 } 05641 05642 // Otherwise, we have to emit the load. If the pointer is to unfoldable but 05643 // still constant memory, the input chain can be the entry node. 05644 SDValue Root; 05645 bool ConstantMemory = false; 05646 05647 // Do not serialize (non-volatile) loads of constant memory with anything. 05648 if (Builder.AA->pointsToConstantMemory(PtrVal)) { 05649 Root = Builder.DAG.getEntryNode(); 05650 ConstantMemory = true; 05651 } else { 05652 // Do not serialize non-volatile loads against each other. 05653 Root = Builder.DAG.getRoot(); 05654 } 05655 05656 SDValue Ptr = Builder.getValue(PtrVal); 05657 SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, 05658 Ptr, MachinePointerInfo(PtrVal), 05659 false /*volatile*/, 05660 false /*nontemporal*/, 05661 false /*isinvariant*/, 1 /* align=1 */); 05662 05663 if (!ConstantMemory) 05664 Builder.PendingLoads.push_back(LoadVal.getValue(1)); 05665 return LoadVal; 05666 } 05667 05668 /// processIntegerCallValue - Record the value for an instruction that 05669 /// produces an integer result, converting the type where necessary. 05670 void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, 05671 SDValue Value, 05672 bool IsSigned) { 05673 EVT VT = TM.getSubtargetImpl()->getTargetLowering()->getValueType(I.getType(), 05674 true); 05675 if (IsSigned) 05676 Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); 05677 else 05678 Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); 05679 setValue(&I, Value); 05680 } 05681 05682 /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. 05683 /// If so, return true and lower it, otherwise return false and it will be 05684 /// lowered like a normal call. 05685 bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { 05686 // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) 05687 if (I.getNumArgOperands() != 3) 05688 return false; 05689 05690 const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); 05691 if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || 05692 !I.getArgOperand(2)->getType()->isIntegerTy() || 05693 !I.getType()->isIntegerTy()) 05694 return false; 05695 05696 const Value *Size = I.getArgOperand(2); 05697 const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); 05698 if (CSize && CSize->getZExtValue() == 0) { 05699 EVT CallVT = TM.getSubtargetImpl()->getTargetLowering()->getValueType( 05700 I.getType(), true); 05701 setValue(&I, DAG.getConstant(0, CallVT)); 05702 return true; 05703 } 05704 05705 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 05706 std::pair<SDValue, SDValue> Res = 05707 TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), 05708 getValue(LHS), getValue(RHS), getValue(Size), 05709 MachinePointerInfo(LHS), 05710 MachinePointerInfo(RHS)); 05711 if (Res.first.getNode()) { 05712 processIntegerCallValue(I, Res.first, true); 05713 PendingLoads.push_back(Res.second); 05714 return true; 05715 } 05716 05717 // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 05718 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 05719 if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) { 05720 bool ActuallyDoIt = true; 05721 MVT LoadVT; 05722 Type *LoadTy; 05723 switch (CSize->getZExtValue()) { 05724 default: 05725 LoadVT = MVT::Other; 05726 LoadTy = nullptr; 05727 ActuallyDoIt = false; 05728 break; 05729 case 2: 05730 LoadVT = MVT::i16; 05731 LoadTy = Type::getInt16Ty(CSize->getContext()); 05732 break; 05733 case 4: 05734 LoadVT = MVT::i32; 05735 LoadTy = Type::getInt32Ty(CSize->getContext()); 05736 break; 05737 case 8: 05738 LoadVT = MVT::i64; 05739 LoadTy = Type::getInt64Ty(CSize->getContext()); 05740 break; 05741 /* 05742 case 16: 05743 LoadVT = MVT::v4i32; 05744 LoadTy = Type::getInt32Ty(CSize->getContext()); 05745 LoadTy = VectorType::get(LoadTy, 4); 05746 break; 05747 */ 05748 } 05749 05750 // This turns into unaligned loads. We only do this if the target natively 05751 // supports the MVT we'll be loading or if it is small enough (<= 4) that 05752 // we'll only produce a small number of byte loads. 05753 05754 // Require that we can find a legal MVT, and only do this if the target 05755 // supports unaligned loads of that type. Expanding into byte loads would 05756 // bloat the code. 05757 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 05758 if (ActuallyDoIt && CSize->getZExtValue() > 4) { 05759 unsigned DstAS = LHS->getType()->getPointerAddressSpace(); 05760 unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); 05761 // TODO: Handle 5 byte compare as 4-byte + 1 byte. 05762 // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. 05763 // TODO: Check alignment of src and dest ptrs. 05764 if (!TLI->isTypeLegal(LoadVT) || 05765 !TLI->allowsMisalignedMemoryAccesses(LoadVT, SrcAS) || 05766 !TLI->allowsMisalignedMemoryAccesses(LoadVT, DstAS)) 05767 ActuallyDoIt = false; 05768 } 05769 05770 if (ActuallyDoIt) { 05771 SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); 05772 SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); 05773 05774 SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, 05775 ISD::SETNE); 05776 processIntegerCallValue(I, Res, false); 05777 return true; 05778 } 05779 } 05780 05781 05782 return false; 05783 } 05784 05785 /// visitMemChrCall -- See if we can lower a memchr call into an optimized 05786 /// form. If so, return true and lower it, otherwise return false and it 05787 /// will be lowered like a normal call. 05788 bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { 05789 // Verify that the prototype makes sense. void *memchr(void *, int, size_t) 05790 if (I.getNumArgOperands() != 3) 05791 return false; 05792 05793 const Value *Src = I.getArgOperand(0); 05794 const Value *Char = I.getArgOperand(1); 05795 const Value *Length = I.getArgOperand(2); 05796 if (!Src->getType()->isPointerTy() || 05797 !Char->getType()->isIntegerTy() || 05798 !Length->getType()->isIntegerTy() || 05799 !I.getType()->isPointerTy()) 05800 return false; 05801 05802 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 05803 std::pair<SDValue, SDValue> Res = 05804 TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), 05805 getValue(Src), getValue(Char), getValue(Length), 05806 MachinePointerInfo(Src)); 05807 if (Res.first.getNode()) { 05808 setValue(&I, Res.first); 05809 PendingLoads.push_back(Res.second); 05810 return true; 05811 } 05812 05813 return false; 05814 } 05815 05816 /// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an 05817 /// optimized form. If so, return true and lower it, otherwise return false 05818 /// and it will be lowered like a normal call. 05819 bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { 05820 // Verify that the prototype makes sense. char *strcpy(char *, char *) 05821 if (I.getNumArgOperands() != 2) 05822 return false; 05823 05824 const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); 05825 if (!Arg0->getType()->isPointerTy() || 05826 !Arg1->getType()->isPointerTy() || 05827 !I.getType()->isPointerTy()) 05828 return false; 05829 05830 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 05831 std::pair<SDValue, SDValue> Res = 05832 TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), 05833 getValue(Arg0), getValue(Arg1), 05834 MachinePointerInfo(Arg0), 05835 MachinePointerInfo(Arg1), isStpcpy); 05836 if (Res.first.getNode()) { 05837 setValue(&I, Res.first); 05838 DAG.setRoot(Res.second); 05839 return true; 05840 } 05841 05842 return false; 05843 } 05844 05845 /// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form. 05846 /// If so, return true and lower it, otherwise return false and it will be 05847 /// lowered like a normal call. 05848 bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { 05849 // Verify that the prototype makes sense. int strcmp(void*,void*) 05850 if (I.getNumArgOperands() != 2) 05851 return false; 05852 05853 const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); 05854 if (!Arg0->getType()->isPointerTy() || 05855 !Arg1->getType()->isPointerTy() || 05856 !I.getType()->isIntegerTy()) 05857 return false; 05858 05859 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 05860 std::pair<SDValue, SDValue> Res = 05861 TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), 05862 getValue(Arg0), getValue(Arg1), 05863 MachinePointerInfo(Arg0), 05864 MachinePointerInfo(Arg1)); 05865 if (Res.first.getNode()) { 05866 processIntegerCallValue(I, Res.first, true); 05867 PendingLoads.push_back(Res.second); 05868 return true; 05869 } 05870 05871 return false; 05872 } 05873 05874 /// visitStrLenCall -- See if we can lower a strlen call into an optimized 05875 /// form. If so, return true and lower it, otherwise return false and it 05876 /// will be lowered like a normal call. 05877 bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { 05878 // Verify that the prototype makes sense. size_t strlen(char *) 05879 if (I.getNumArgOperands() != 1) 05880 return false; 05881 05882 const Value *Arg0 = I.getArgOperand(0); 05883 if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) 05884 return false; 05885 05886 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 05887 std::pair<SDValue, SDValue> Res = 05888 TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), 05889 getValue(Arg0), MachinePointerInfo(Arg0)); 05890 if (Res.first.getNode()) { 05891 processIntegerCallValue(I, Res.first, false); 05892 PendingLoads.push_back(Res.second); 05893 return true; 05894 } 05895 05896 return false; 05897 } 05898 05899 /// visitStrNLenCall -- See if we can lower a strnlen call into an optimized 05900 /// form. If so, return true and lower it, otherwise return false and it 05901 /// will be lowered like a normal call. 05902 bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { 05903 // Verify that the prototype makes sense. size_t strnlen(char *, size_t) 05904 if (I.getNumArgOperands() != 2) 05905 return false; 05906 05907 const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); 05908 if (!Arg0->getType()->isPointerTy() || 05909 !Arg1->getType()->isIntegerTy() || 05910 !I.getType()->isIntegerTy()) 05911 return false; 05912 05913 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 05914 std::pair<SDValue, SDValue> Res = 05915 TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), 05916 getValue(Arg0), getValue(Arg1), 05917 MachinePointerInfo(Arg0)); 05918 if (Res.first.getNode()) { 05919 processIntegerCallValue(I, Res.first, false); 05920 PendingLoads.push_back(Res.second); 05921 return true; 05922 } 05923 05924 return false; 05925 } 05926 05927 /// visitUnaryFloatCall - If a call instruction is a unary floating-point 05928 /// operation (as expected), translate it to an SDNode with the specified opcode 05929 /// and return true. 05930 bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, 05931 unsigned Opcode) { 05932 // Sanity check that it really is a unary floating-point call. 05933 if (I.getNumArgOperands() != 1 || 05934 !I.getArgOperand(0)->getType()->isFloatingPointTy() || 05935 I.getType() != I.getArgOperand(0)->getType() || 05936 !I.onlyReadsMemory()) 05937 return false; 05938 05939 SDValue Tmp = getValue(I.getArgOperand(0)); 05940 setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp)); 05941 return true; 05942 } 05943 05944 void SelectionDAGBuilder::visitCall(const CallInst &I) { 05945 // Handle inline assembly differently. 05946 if (isa<InlineAsm>(I.getCalledValue())) { 05947 visitInlineAsm(&I); 05948 return; 05949 } 05950 05951 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 05952 ComputeUsesVAFloatArgument(I, &MMI); 05953 05954 const char *RenameFn = nullptr; 05955 if (Function *F = I.getCalledFunction()) { 05956 if (F->isDeclaration()) { 05957 if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { 05958 if (unsigned IID = II->getIntrinsicID(F)) { 05959 RenameFn = visitIntrinsicCall(I, IID); 05960 if (!RenameFn) 05961 return; 05962 } 05963 } 05964 if (unsigned IID = F->getIntrinsicID()) { 05965 RenameFn = visitIntrinsicCall(I, IID); 05966 if (!RenameFn) 05967 return; 05968 } 05969 } 05970 05971 // Check for well-known libc/libm calls. If the function is internal, it 05972 // can't be a library call. 05973 LibFunc::Func Func; 05974 if (!F->hasLocalLinkage() && F->hasName() && 05975 LibInfo->getLibFunc(F->getName(), Func) && 05976 LibInfo->hasOptimizedCodeGen(Func)) { 05977 switch (Func) { 05978 default: break; 05979 case LibFunc::copysign: 05980 case LibFunc::copysignf: 05981 case LibFunc::copysignl: 05982 if (I.getNumArgOperands() == 2 && // Basic sanity checks. 05983 I.getArgOperand(0)->getType()->isFloatingPointTy() && 05984 I.getType() == I.getArgOperand(0)->getType() && 05985 I.getType() == I.getArgOperand(1)->getType() && 05986 I.onlyReadsMemory()) { 05987 SDValue LHS = getValue(I.getArgOperand(0)); 05988 SDValue RHS = getValue(I.getArgOperand(1)); 05989 setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(), 05990 LHS.getValueType(), LHS, RHS)); 05991 return; 05992 } 05993 break; 05994 case LibFunc::fabs: 05995 case LibFunc::fabsf: 05996 case LibFunc::fabsl: 05997 if (visitUnaryFloatCall(I, ISD::FABS)) 05998 return; 05999 break; 06000 case LibFunc::sin: 06001 case LibFunc::sinf: 06002 case LibFunc::sinl: 06003 if (visitUnaryFloatCall(I, ISD::FSIN)) 06004 return; 06005 break; 06006 case LibFunc::cos: 06007 case LibFunc::cosf: 06008 case LibFunc::cosl: 06009 if (visitUnaryFloatCall(I, ISD::FCOS)) 06010 return; 06011 break; 06012 case LibFunc::sqrt: 06013 case LibFunc::sqrtf: 06014 case LibFunc::sqrtl: 06015 case LibFunc::sqrt_finite: 06016 case LibFunc::sqrtf_finite: 06017 case LibFunc::sqrtl_finite: 06018 if (visitUnaryFloatCall(I, ISD::FSQRT)) 06019 return; 06020 break; 06021 case LibFunc::floor: 06022 case LibFunc::floorf: 06023 case LibFunc::floorl: 06024 if (visitUnaryFloatCall(I, ISD::FFLOOR)) 06025 return; 06026 break; 06027 case LibFunc::nearbyint: 06028 case LibFunc::nearbyintf: 06029 case LibFunc::nearbyintl: 06030 if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) 06031 return; 06032 break; 06033 case LibFunc::ceil: 06034 case LibFunc::ceilf: 06035 case LibFunc::ceill: 06036 if (visitUnaryFloatCall(I, ISD::FCEIL)) 06037 return; 06038 break; 06039 case LibFunc::rint: 06040 case LibFunc::rintf: 06041 case LibFunc::rintl: 06042 if (visitUnaryFloatCall(I, ISD::FRINT)) 06043 return; 06044 break; 06045 case LibFunc::round: 06046 case LibFunc::roundf: 06047 case LibFunc::roundl: 06048 if (visitUnaryFloatCall(I, ISD::FROUND)) 06049 return; 06050 break; 06051 case LibFunc::trunc: 06052 case LibFunc::truncf: 06053 case LibFunc::truncl: 06054 if (visitUnaryFloatCall(I, ISD::FTRUNC)) 06055 return; 06056 break; 06057 case LibFunc::log2: 06058 case LibFunc::log2f: 06059 case LibFunc::log2l: 06060 if (visitUnaryFloatCall(I, ISD::FLOG2)) 06061 return; 06062 break; 06063 case LibFunc::exp2: 06064 case LibFunc::exp2f: 06065 case LibFunc::exp2l: 06066 if (visitUnaryFloatCall(I, ISD::FEXP2)) 06067 return; 06068 break; 06069 case LibFunc::memcmp: 06070 if (visitMemCmpCall(I)) 06071 return; 06072 break; 06073 case LibFunc::memchr: 06074 if (visitMemChrCall(I)) 06075 return; 06076 break; 06077 case LibFunc::strcpy: 06078 if (visitStrCpyCall(I, false)) 06079 return; 06080 break; 06081 case LibFunc::stpcpy: 06082 if (visitStrCpyCall(I, true)) 06083 return; 06084 break; 06085 case LibFunc::strcmp: 06086 if (visitStrCmpCall(I)) 06087 return; 06088 break; 06089 case LibFunc::strlen: 06090 if (visitStrLenCall(I)) 06091 return; 06092 break; 06093 case LibFunc::strnlen: 06094 if (visitStrNLenCall(I)) 06095 return; 06096 break; 06097 } 06098 } 06099 } 06100 06101 SDValue Callee; 06102 if (!RenameFn) 06103 Callee = getValue(I.getCalledValue()); 06104 else 06105 Callee = DAG.getExternalSymbol( 06106 RenameFn, TM.getSubtargetImpl()->getTargetLowering()->getPointerTy()); 06107 06108 // Check if we can potentially perform a tail call. More detailed checking is 06109 // be done within LowerCallTo, after more information about the call is known. 06110 LowerCallTo(&I, Callee, I.isTailCall()); 06111 } 06112 06113 namespace { 06114 06115 /// AsmOperandInfo - This contains information for each constraint that we are 06116 /// lowering. 06117 class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { 06118 public: 06119 /// CallOperand - If this is the result output operand or a clobber 06120 /// this is null, otherwise it is the incoming operand to the CallInst. 06121 /// This gets modified as the asm is processed. 06122 SDValue CallOperand; 06123 06124 /// AssignedRegs - If this is a register or register class operand, this 06125 /// contains the set of register corresponding to the operand. 06126 RegsForValue AssignedRegs; 06127 06128 explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) 06129 : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) { 06130 } 06131 06132 /// getCallOperandValEVT - Return the EVT of the Value* that this operand 06133 /// corresponds to. If there is no Value* for this operand, it returns 06134 /// MVT::Other. 06135 EVT getCallOperandValEVT(LLVMContext &Context, 06136 const TargetLowering &TLI, 06137 const DataLayout *DL) const { 06138 if (!CallOperandVal) return MVT::Other; 06139 06140 if (isa<BasicBlock>(CallOperandVal)) 06141 return TLI.getPointerTy(); 06142 06143 llvm::Type *OpTy = CallOperandVal->getType(); 06144 06145 // FIXME: code duplicated from TargetLowering::ParseConstraints(). 06146 // If this is an indirect operand, the operand is a pointer to the 06147 // accessed type. 06148 if (isIndirect) { 06149 llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); 06150 if (!PtrTy) 06151 report_fatal_error("Indirect operand for inline asm not a pointer!"); 06152 OpTy = PtrTy->getElementType(); 06153 } 06154 06155 // Look for vector wrapped in a struct. e.g. { <16 x i8> }. 06156 if (StructType *STy = dyn_cast<StructType>(OpTy)) 06157 if (STy->getNumElements() == 1) 06158 OpTy = STy->getElementType(0); 06159 06160 // If OpTy is not a single value, it may be a struct/union that we 06161 // can tile with integers. 06162 if (!OpTy->isSingleValueType() && OpTy->isSized()) { 06163 unsigned BitSize = DL->getTypeSizeInBits(OpTy); 06164 switch (BitSize) { 06165 default: break; 06166 case 1: 06167 case 8: 06168 case 16: 06169 case 32: 06170 case 64: 06171 case 128: 06172 OpTy = IntegerType::get(Context, BitSize); 06173 break; 06174 } 06175 } 06176 06177 return TLI.getValueType(OpTy, true); 06178 } 06179 }; 06180 06181 typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; 06182 06183 } // end anonymous namespace 06184 06185 /// GetRegistersForValue - Assign registers (virtual or physical) for the 06186 /// specified operand. We prefer to assign virtual registers, to allow the 06187 /// register allocator to handle the assignment process. However, if the asm 06188 /// uses features that we can't model on machineinstrs, we have SDISel do the 06189 /// allocation. This produces generally horrible, but correct, code. 06190 /// 06191 /// OpInfo describes the operand. 06192 /// 06193 static void GetRegistersForValue(SelectionDAG &DAG, 06194 const TargetLowering &TLI, 06195 SDLoc DL, 06196 SDISelAsmOperandInfo &OpInfo) { 06197 LLVMContext &Context = *DAG.getContext(); 06198 06199 MachineFunction &MF = DAG.getMachineFunction(); 06200 SmallVector<unsigned, 4> Regs; 06201 06202 // If this is a constraint for a single physreg, or a constraint for a 06203 // register class, find it. 06204 std::pair<unsigned, const TargetRegisterClass*> PhysReg = 06205 TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, 06206 OpInfo.ConstraintVT); 06207 06208 unsigned NumRegs = 1; 06209 if (OpInfo.ConstraintVT != MVT::Other) { 06210 // If this is a FP input in an integer register (or visa versa) insert a bit 06211 // cast of the input value. More generally, handle any case where the input 06212 // value disagrees with the register class we plan to stick this in. 06213 if (OpInfo.Type == InlineAsm::isInput && 06214 PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { 06215 // Try to convert to the first EVT that the reg class contains. If the 06216 // types are identical size, use a bitcast to convert (e.g. two differing 06217 // vector types). 06218 MVT RegVT = *PhysReg.second->vt_begin(); 06219 if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { 06220 OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, 06221 RegVT, OpInfo.CallOperand); 06222 OpInfo.ConstraintVT = RegVT; 06223 } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { 06224 // If the input is a FP value and we want it in FP registers, do a 06225 // bitcast to the corresponding integer type. This turns an f64 value 06226 // into i64, which can be passed with two i32 values on a 32-bit 06227 // machine. 06228 RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); 06229 OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, 06230 RegVT, OpInfo.CallOperand); 06231 OpInfo.ConstraintVT = RegVT; 06232 } 06233 } 06234 06235 NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); 06236 } 06237 06238 MVT RegVT; 06239 EVT ValueVT = OpInfo.ConstraintVT; 06240 06241 // If this is a constraint for a specific physical register, like {r17}, 06242 // assign it now. 06243 if (unsigned AssignedReg = PhysReg.first) { 06244 const TargetRegisterClass *RC = PhysReg.second; 06245 if (OpInfo.ConstraintVT == MVT::Other) 06246 ValueVT = *RC->vt_begin(); 06247 06248 // Get the actual register value type. This is important, because the user 06249 // may have asked for (e.g.) the AX register in i32 type. We need to 06250 // remember that AX is actually i16 to get the right extension. 06251 RegVT = *RC->vt_begin(); 06252 06253 // This is a explicit reference to a physical register. 06254 Regs.push_back(AssignedReg); 06255 06256 // If this is an expanded reference, add the rest of the regs to Regs. 06257 if (NumRegs != 1) { 06258 TargetRegisterClass::iterator I = RC->begin(); 06259 for (; *I != AssignedReg; ++I) 06260 assert(I != RC->end() && "Didn't find reg!"); 06261 06262 // Already added the first reg. 06263 --NumRegs; ++I; 06264 for (; NumRegs; --NumRegs, ++I) { 06265 assert(I != RC->end() && "Ran out of registers to allocate!"); 06266 Regs.push_back(*I); 06267 } 06268 } 06269 06270 OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); 06271 return; 06272 } 06273 06274 // Otherwise, if this was a reference to an LLVM register class, create vregs 06275 // for this reference. 06276 if (const TargetRegisterClass *RC = PhysReg.second) { 06277 RegVT = *RC->vt_begin(); 06278 if (OpInfo.ConstraintVT == MVT::Other) 06279 ValueVT = RegVT; 06280 06281 // Create the appropriate number of virtual registers. 06282 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 06283 for (; NumRegs; --NumRegs) 06284 Regs.push_back(RegInfo.createVirtualRegister(RC)); 06285 06286 OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); 06287 return; 06288 } 06289 06290 // Otherwise, we couldn't allocate enough registers for this. 06291 } 06292 06293 /// visitInlineAsm - Handle a call to an InlineAsm object. 06294 /// 06295 void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { 06296 const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); 06297 06298 /// ConstraintOperands - Information about all of the constraints. 06299 SDISelAsmOperandInfoVector ConstraintOperands; 06300 06301 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 06302 TargetLowering::AsmOperandInfoVector 06303 TargetConstraints = TLI->ParseConstraints(CS); 06304 06305 bool hasMemory = false; 06306 06307 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. 06308 unsigned ResNo = 0; // ResNo - The result number of the next output. 06309 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 06310 ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i])); 06311 SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); 06312 06313 MVT OpVT = MVT::Other; 06314 06315 // Compute the value type for each operand. 06316 switch (OpInfo.Type) { 06317 case InlineAsm::isOutput: 06318 // Indirect outputs just consume an argument. 06319 if (OpInfo.isIndirect) { 06320 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); 06321 break; 06322 } 06323 06324 // The return value of the call is this value. As such, there is no 06325 // corresponding argument. 06326 assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); 06327 if (StructType *STy = dyn_cast<StructType>(CS.getType())) { 06328 OpVT = TLI->getSimpleValueType(STy->getElementType(ResNo)); 06329 } else { 06330 assert(ResNo == 0 && "Asm only has one result!"); 06331 OpVT = TLI->getSimpleValueType(CS.getType()); 06332 } 06333 ++ResNo; 06334 break; 06335 case InlineAsm::isInput: 06336 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); 06337 break; 06338 case InlineAsm::isClobber: 06339 // Nothing to do. 06340 break; 06341 } 06342 06343 // If this is an input or an indirect output, process the call argument. 06344 // BasicBlocks are labels, currently appearing only in asm's. 06345 if (OpInfo.CallOperandVal) { 06346 if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { 06347 OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); 06348 } else { 06349 OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); 06350 } 06351 06352 OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, DL). 06353 getSimpleVT(); 06354 } 06355 06356 OpInfo.ConstraintVT = OpVT; 06357 06358 // Indirect operand accesses access memory. 06359 if (OpInfo.isIndirect) 06360 hasMemory = true; 06361 else { 06362 for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { 06363 TargetLowering::ConstraintType 06364 CType = TLI->getConstraintType(OpInfo.Codes[j]); 06365 if (CType == TargetLowering::C_Memory) { 06366 hasMemory = true; 06367 break; 06368 } 06369 } 06370 } 06371 } 06372 06373 SDValue Chain, Flag; 06374 06375 // We won't need to flush pending loads if this asm doesn't touch 06376 // memory and is nonvolatile. 06377 if (hasMemory || IA->hasSideEffects()) 06378 Chain = getRoot(); 06379 else 06380 Chain = DAG.getRoot(); 06381 06382 // Second pass over the constraints: compute which constraint option to use 06383 // and assign registers to constraints that want a specific physreg. 06384 for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { 06385 SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; 06386 06387 // If this is an output operand with a matching input operand, look up the 06388 // matching input. If their types mismatch, e.g. one is an integer, the 06389 // other is floating point, or their sizes are different, flag it as an 06390 // error. 06391 if (OpInfo.hasMatchingInput()) { 06392 SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; 06393 06394 if (OpInfo.ConstraintVT != Input.ConstraintVT) { 06395 std::pair<unsigned, const TargetRegisterClass*> MatchRC = 06396 TLI->getRegForInlineAsmConstraint(OpInfo.ConstraintCode, 06397 OpInfo.ConstraintVT); 06398 std::pair<unsigned, const TargetRegisterClass*> InputRC = 06399 TLI->getRegForInlineAsmConstraint(Input.ConstraintCode, 06400 Input.ConstraintVT); 06401 if ((OpInfo.ConstraintVT.isInteger() != 06402 Input.ConstraintVT.isInteger()) || 06403 (MatchRC.second != InputRC.second)) { 06404 report_fatal_error("Unsupported asm: input constraint" 06405 " with a matching output constraint of" 06406 " incompatible type!"); 06407 } 06408 Input.ConstraintVT = OpInfo.ConstraintVT; 06409 } 06410 } 06411 06412 // Compute the constraint code and ConstraintType to use. 06413 TLI->ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); 06414 06415 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 06416 OpInfo.Type == InlineAsm::isClobber) 06417 continue; 06418 06419 // If this is a memory input, and if the operand is not indirect, do what we 06420 // need to to provide an address for the memory input. 06421 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 06422 !OpInfo.isIndirect) { 06423 assert((OpInfo.isMultipleAlternative || 06424 (OpInfo.Type == InlineAsm::isInput)) && 06425 "Can only indirectify direct input operands!"); 06426 06427 // Memory operands really want the address of the value. If we don't have 06428 // an indirect input, put it in the constpool if we can, otherwise spill 06429 // it to a stack slot. 06430 // TODO: This isn't quite right. We need to handle these according to 06431 // the addressing mode that the constraint wants. Also, this may take 06432 // an additional register for the computation and we don't want that 06433 // either. 06434 06435 // If the operand is a float, integer, or vector constant, spill to a 06436 // constant pool entry to get its address. 06437 const Value *OpVal = OpInfo.CallOperandVal; 06438 if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || 06439 isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { 06440 OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), 06441 TLI->getPointerTy()); 06442 } else { 06443 // Otherwise, create a stack slot and emit a store to it before the 06444 // asm. 06445 Type *Ty = OpVal->getType(); 06446 uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); 06447 unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(Ty); 06448 MachineFunction &MF = DAG.getMachineFunction(); 06449 int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); 06450 SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI->getPointerTy()); 06451 Chain = DAG.getStore(Chain, getCurSDLoc(), 06452 OpInfo.CallOperand, StackSlot, 06453 MachinePointerInfo::getFixedStack(SSFI), 06454 false, false, 0); 06455 OpInfo.CallOperand = StackSlot; 06456 } 06457 06458 // There is no longer a Value* corresponding to this operand. 06459 OpInfo.CallOperandVal = nullptr; 06460 06461 // It is now an indirect operand. 06462 OpInfo.isIndirect = true; 06463 } 06464 06465 // If this constraint is for a specific register, allocate it before 06466 // anything else. 06467 if (OpInfo.ConstraintType == TargetLowering::C_Register) 06468 GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); 06469 } 06470 06471 // Second pass - Loop over all of the operands, assigning virtual or physregs 06472 // to register class operands. 06473 for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { 06474 SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; 06475 06476 // C_Register operands have already been allocated, Other/Memory don't need 06477 // to be. 06478 if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) 06479 GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); 06480 } 06481 06482 // AsmNodeOperands - The operands for the ISD::INLINEASM node. 06483 std::vector<SDValue> AsmNodeOperands; 06484 AsmNodeOperands.push_back(SDValue()); // reserve space for input chain 06485 AsmNodeOperands.push_back( 06486 DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), 06487 TLI->getPointerTy())); 06488 06489 // If we have a !srcloc metadata node associated with it, we want to attach 06490 // this to the ultimately generated inline asm machineinstr. To do this, we 06491 // pass in the third operand as this (potentially null) inline asm MDNode. 06492 const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); 06493 AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); 06494 06495 // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore 06496 // bits as operand 3. 06497 unsigned ExtraInfo = 0; 06498 if (IA->hasSideEffects()) 06499 ExtraInfo |= InlineAsm::Extra_HasSideEffects; 06500 if (IA->isAlignStack()) 06501 ExtraInfo |= InlineAsm::Extra_IsAlignStack; 06502 // Set the asm dialect. 06503 ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; 06504 06505 // Determine if this InlineAsm MayLoad or MayStore based on the constraints. 06506 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 06507 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 06508 06509 // Compute the constraint code and ConstraintType to use. 06510 TLI->ComputeConstraintToUse(OpInfo, SDValue()); 06511 06512 // Ideally, we would only check against memory constraints. However, the 06513 // meaning of an other constraint can be target-specific and we can't easily 06514 // reason about it. Therefore, be conservative and set MayLoad/MayStore 06515 // for other constriants as well. 06516 if (OpInfo.ConstraintType == TargetLowering::C_Memory || 06517 OpInfo.ConstraintType == TargetLowering::C_Other) { 06518 if (OpInfo.Type == InlineAsm::isInput) 06519 ExtraInfo |= InlineAsm::Extra_MayLoad; 06520 else if (OpInfo.Type == InlineAsm::isOutput) 06521 ExtraInfo |= InlineAsm::Extra_MayStore; 06522 else if (OpInfo.Type == InlineAsm::isClobber) 06523 ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); 06524 } 06525 } 06526 06527 AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, 06528 TLI->getPointerTy())); 06529 06530 // Loop over all of the inputs, copying the operand values into the 06531 // appropriate registers and processing the output regs. 06532 RegsForValue RetValRegs; 06533 06534 // IndirectStoresToEmit - The set of stores to emit after the inline asm node. 06535 std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit; 06536 06537 for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { 06538 SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; 06539 06540 switch (OpInfo.Type) { 06541 case InlineAsm::isOutput: { 06542 if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && 06543 OpInfo.ConstraintType != TargetLowering::C_Register) { 06544 // Memory output, or 'other' output (e.g. 'X' constraint). 06545 assert(OpInfo.isIndirect && "Memory output must be indirect operand"); 06546 06547 // Add information to the INLINEASM node to know about this output. 06548 unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); 06549 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, 06550 TLI->getPointerTy())); 06551 AsmNodeOperands.push_back(OpInfo.CallOperand); 06552 break; 06553 } 06554 06555 // Otherwise, this is a register or register class output. 06556 06557 // Copy the output from the appropriate register. Find a register that 06558 // we can use. 06559 if (OpInfo.AssignedRegs.Regs.empty()) { 06560 LLVMContext &Ctx = *DAG.getContext(); 06561 Ctx.emitError(CS.getInstruction(), 06562 "couldn't allocate output register for constraint '" + 06563 Twine(OpInfo.ConstraintCode) + "'"); 06564 return; 06565 } 06566 06567 // If this is an indirect operand, store through the pointer after the 06568 // asm. 06569 if (OpInfo.isIndirect) { 06570 IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs, 06571 OpInfo.CallOperandVal)); 06572 } else { 06573 // This is the result value of the call. 06574 assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); 06575 // Concatenate this output onto the outputs list. 06576 RetValRegs.append(OpInfo.AssignedRegs); 06577 } 06578 06579 // Add information to the INLINEASM node to know that this register is 06580 // set. 06581 OpInfo.AssignedRegs 06582 .AddInlineAsmOperands(OpInfo.isEarlyClobber 06583 ? InlineAsm::Kind_RegDefEarlyClobber 06584 : InlineAsm::Kind_RegDef, 06585 false, 0, DAG, AsmNodeOperands); 06586 break; 06587 } 06588 case InlineAsm::isInput: { 06589 SDValue InOperandVal = OpInfo.CallOperand; 06590 06591 if (OpInfo.isMatchingInputConstraint()) { // Matching constraint? 06592 // If this is required to match an output register we have already set, 06593 // just use its register. 06594 unsigned OperandNo = OpInfo.getMatchedOperand(); 06595 06596 // Scan until we find the definition we already emitted of this operand. 06597 // When we find it, create a RegsForValue operand. 06598 unsigned CurOp = InlineAsm::Op_FirstOperand; 06599 for (; OperandNo; --OperandNo) { 06600 // Advance to the next operand. 06601 unsigned OpFlag = 06602 cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); 06603 assert((InlineAsm::isRegDefKind(OpFlag) || 06604 InlineAsm::isRegDefEarlyClobberKind(OpFlag) || 06605 InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?"); 06606 CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; 06607 } 06608 06609 unsigned OpFlag = 06610 cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); 06611 if (InlineAsm::isRegDefKind(OpFlag) || 06612 InlineAsm::isRegDefEarlyClobberKind(OpFlag)) { 06613 // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. 06614 if (OpInfo.isIndirect) { 06615 // This happens on gcc/testsuite/gcc.dg/pr8788-1.c 06616 LLVMContext &Ctx = *DAG.getContext(); 06617 Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" 06618 " don't know how to handle tied " 06619 "indirect register inputs"); 06620 return; 06621 } 06622 06623 RegsForValue MatchedRegs; 06624 MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); 06625 MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); 06626 MatchedRegs.RegVTs.push_back(RegVT); 06627 MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); 06628 for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); 06629 i != e; ++i) { 06630 if (const TargetRegisterClass *RC = TLI->getRegClassFor(RegVT)) 06631 MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); 06632 else { 06633 LLVMContext &Ctx = *DAG.getContext(); 06634 Ctx.emitError(CS.getInstruction(), 06635 "inline asm error: This value" 06636 " type register class is not natively supported!"); 06637 return; 06638 } 06639 } 06640 // Use the produced MatchedRegs object to 06641 MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), 06642 Chain, &Flag, CS.getInstruction()); 06643 MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, 06644 true, OpInfo.getMatchedOperand(), 06645 DAG, AsmNodeOperands); 06646 break; 06647 } 06648 06649 assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); 06650 assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && 06651 "Unexpected number of operands"); 06652 // Add information to the INLINEASM node to know about this input. 06653 // See InlineAsm.h isUseOperandTiedToDef. 06654 OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, 06655 OpInfo.getMatchedOperand()); 06656 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, 06657 TLI->getPointerTy())); 06658 AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); 06659 break; 06660 } 06661 06662 // Treat indirect 'X' constraint as memory. 06663 if (OpInfo.ConstraintType == TargetLowering::C_Other && 06664 OpInfo.isIndirect) 06665 OpInfo.ConstraintType = TargetLowering::C_Memory; 06666 06667 if (OpInfo.ConstraintType == TargetLowering::C_Other) { 06668 std::vector<SDValue> Ops; 06669 TLI->LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, 06670 Ops, DAG); 06671 if (Ops.empty()) { 06672 LLVMContext &Ctx = *DAG.getContext(); 06673 Ctx.emitError(CS.getInstruction(), 06674 "invalid operand for inline asm constraint '" + 06675 Twine(OpInfo.ConstraintCode) + "'"); 06676 return; 06677 } 06678 06679 // Add information to the INLINEASM node to know about this input. 06680 unsigned ResOpType = 06681 InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); 06682 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, 06683 TLI->getPointerTy())); 06684 AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); 06685 break; 06686 } 06687 06688 if (OpInfo.ConstraintType == TargetLowering::C_Memory) { 06689 assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); 06690 assert(InOperandVal.getValueType() == TLI->getPointerTy() && 06691 "Memory operands expect pointer values"); 06692 06693 // Add information to the INLINEASM node to know about this input. 06694 unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); 06695 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, 06696 TLI->getPointerTy())); 06697 AsmNodeOperands.push_back(InOperandVal); 06698 break; 06699 } 06700 06701 assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || 06702 OpInfo.ConstraintType == TargetLowering::C_Register) && 06703 "Unknown constraint type!"); 06704 06705 // TODO: Support this. 06706 if (OpInfo.isIndirect) { 06707 LLVMContext &Ctx = *DAG.getContext(); 06708 Ctx.emitError(CS.getInstruction(), 06709 "Don't know how to handle indirect register inputs yet " 06710 "for constraint '" + 06711 Twine(OpInfo.ConstraintCode) + "'"); 06712 return; 06713 } 06714 06715 // Copy the input into the appropriate registers. 06716 if (OpInfo.AssignedRegs.Regs.empty()) { 06717 LLVMContext &Ctx = *DAG.getContext(); 06718 Ctx.emitError(CS.getInstruction(), 06719 "couldn't allocate input reg for constraint '" + 06720 Twine(OpInfo.ConstraintCode) + "'"); 06721 return; 06722 } 06723 06724 OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), 06725 Chain, &Flag, CS.getInstruction()); 06726 06727 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, 06728 DAG, AsmNodeOperands); 06729 break; 06730 } 06731 case InlineAsm::isClobber: { 06732 // Add the clobbered value to the operand list, so that the register 06733 // allocator is aware that the physreg got clobbered. 06734 if (!OpInfo.AssignedRegs.Regs.empty()) 06735 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, 06736 false, 0, DAG, 06737 AsmNodeOperands); 06738 break; 06739 } 06740 } 06741 } 06742 06743 // Finish up input operands. Set the input chain and add the flag last. 06744 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 06745 if (Flag.getNode()) AsmNodeOperands.push_back(Flag); 06746 06747 Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(), 06748 DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 06749 Flag = Chain.getValue(1); 06750 06751 // If this asm returns a register value, copy the result from that register 06752 // and set it as the value of the call. 06753 if (!RetValRegs.Regs.empty()) { 06754 SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), 06755 Chain, &Flag, CS.getInstruction()); 06756 06757 // FIXME: Why don't we do this for inline asms with MRVs? 06758 if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { 06759 EVT ResultType = TLI->getValueType(CS.getType()); 06760 06761 // If any of the results of the inline asm is a vector, it may have the 06762 // wrong width/num elts. This can happen for register classes that can 06763 // contain multiple different value types. The preg or vreg allocated may 06764 // not have the same VT as was expected. Convert it to the right type 06765 // with bit_convert. 06766 if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { 06767 Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), 06768 ResultType, Val); 06769 06770 } else if (ResultType != Val.getValueType() && 06771 ResultType.isInteger() && Val.getValueType().isInteger()) { 06772 // If a result value was tied to an input value, the computed result may 06773 // have a wider width than the expected result. Extract the relevant 06774 // portion. 06775 Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultType, Val); 06776 } 06777 06778 assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); 06779 } 06780 06781 setValue(CS.getInstruction(), Val); 06782 // Don't need to use this as a chain in this case. 06783 if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) 06784 return; 06785 } 06786 06787 std::vector<std::pair<SDValue, const Value *> > StoresToEmit; 06788 06789 // Process indirect outputs, first output all of the flagged copies out of 06790 // physregs. 06791 for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { 06792 RegsForValue &OutRegs = IndirectStoresToEmit[i].first; 06793 const Value *Ptr = IndirectStoresToEmit[i].second; 06794 SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), 06795 Chain, &Flag, IA); 06796 StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); 06797 } 06798 06799 // Emit the non-flagged stores from the physregs. 06800 SmallVector<SDValue, 8> OutChains; 06801 for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { 06802 SDValue Val = DAG.getStore(Chain, getCurSDLoc(), 06803 StoresToEmit[i].first, 06804 getValue(StoresToEmit[i].second), 06805 MachinePointerInfo(StoresToEmit[i].second), 06806 false, false, 0); 06807 OutChains.push_back(Val); 06808 } 06809 06810 if (!OutChains.empty()) 06811 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains); 06812 06813 DAG.setRoot(Chain); 06814 } 06815 06816 void SelectionDAGBuilder::visitVAStart(const CallInst &I) { 06817 DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(), 06818 MVT::Other, getRoot(), 06819 getValue(I.getArgOperand(0)), 06820 DAG.getSrcValue(I.getArgOperand(0)))); 06821 } 06822 06823 void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { 06824 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 06825 const DataLayout &DL = *TLI->getDataLayout(); 06826 SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(), 06827 getRoot(), getValue(I.getOperand(0)), 06828 DAG.getSrcValue(I.getOperand(0)), 06829 DL.getABITypeAlignment(I.getType())); 06830 setValue(&I, V); 06831 DAG.setRoot(V.getValue(1)); 06832 } 06833 06834 void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { 06835 DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(), 06836 MVT::Other, getRoot(), 06837 getValue(I.getArgOperand(0)), 06838 DAG.getSrcValue(I.getArgOperand(0)))); 06839 } 06840 06841 void SelectionDAGBuilder::visitVACopy(const CallInst &I) { 06842 DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(), 06843 MVT::Other, getRoot(), 06844 getValue(I.getArgOperand(0)), 06845 getValue(I.getArgOperand(1)), 06846 DAG.getSrcValue(I.getArgOperand(0)), 06847 DAG.getSrcValue(I.getArgOperand(1)))); 06848 } 06849 06850 /// \brief Lower an argument list according to the target calling convention. 06851 /// 06852 /// \return A tuple of <return-value, token-chain> 06853 /// 06854 /// This is a helper for lowering intrinsics that follow a target calling 06855 /// convention or require stack pointer adjustment. Only a subset of the 06856 /// intrinsic's operands need to participate in the calling convention. 06857 std::pair<SDValue, SDValue> 06858 SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, 06859 unsigned NumArgs, SDValue Callee, 06860 bool useVoidTy) { 06861 TargetLowering::ArgListTy Args; 06862 Args.reserve(NumArgs); 06863 06864 // Populate the argument list. 06865 // Attributes for args start at offset 1, after the return attribute. 06866 ImmutableCallSite CS(&CI); 06867 for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; 06868 ArgI != ArgE; ++ArgI) { 06869 const Value *V = CI.getOperand(ArgI); 06870 06871 assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); 06872 06873 TargetLowering::ArgListEntry Entry; 06874 Entry.Node = getValue(V); 06875 Entry.Ty = V->getType(); 06876 Entry.setAttributes(&CS, AttrI); 06877 Args.push_back(Entry); 06878 } 06879 06880 Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType(); 06881 TargetLowering::CallLoweringInfo CLI(DAG); 06882 CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) 06883 .setCallee(CI.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) 06884 .setDiscardResult(!CI.use_empty()); 06885 06886 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 06887 return TLI->LowerCallTo(CLI); 06888 } 06889 06890 /// \brief Add a stack map intrinsic call's live variable operands to a stackmap 06891 /// or patchpoint target node's operand list. 06892 /// 06893 /// Constants are converted to TargetConstants purely as an optimization to 06894 /// avoid constant materialization and register allocation. 06895 /// 06896 /// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not 06897 /// generate addess computation nodes, and so ExpandISelPseudo can convert the 06898 /// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids 06899 /// address materialization and register allocation, but may also be required 06900 /// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an 06901 /// alloca in the entry block, then the runtime may assume that the alloca's 06902 /// StackMap location can be read immediately after compilation and that the 06903 /// location is valid at any point during execution (this is similar to the 06904 /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were 06905 /// only available in a register, then the runtime would need to trap when 06906 /// execution reaches the StackMap in order to read the alloca's location. 06907 static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx, 06908 SmallVectorImpl<SDValue> &Ops, 06909 SelectionDAGBuilder &Builder) { 06910 for (unsigned i = StartIdx, e = CI.getNumArgOperands(); i != e; ++i) { 06911 SDValue OpVal = Builder.getValue(CI.getArgOperand(i)); 06912 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { 06913 Ops.push_back( 06914 Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); 06915 Ops.push_back( 06916 Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); 06917 } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { 06918 const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); 06919 Ops.push_back( 06920 Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); 06921 } else 06922 Ops.push_back(OpVal); 06923 } 06924 } 06925 06926 /// \brief Lower llvm.experimental.stackmap directly to its target opcode. 06927 void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { 06928 // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, 06929 // [live variables...]) 06930 06931 assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); 06932 06933 SDValue Chain, InFlag, Callee, NullPtr; 06934 SmallVector<SDValue, 32> Ops; 06935 06936 SDLoc DL = getCurSDLoc(); 06937 Callee = getValue(CI.getCalledValue()); 06938 NullPtr = DAG.getIntPtrConstant(0, true); 06939 06940 // The stackmap intrinsic only records the live variables (the arguemnts 06941 // passed to it) and emits NOPS (if requested). Unlike the patchpoint 06942 // intrinsic, this won't be lowered to a function call. This means we don't 06943 // have to worry about calling conventions and target specific lowering code. 06944 // Instead we perform the call lowering right here. 06945 // 06946 // chain, flag = CALLSEQ_START(chain, 0) 06947 // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) 06948 // chain, flag = CALLSEQ_END(chain, 0, 0, flag) 06949 // 06950 Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL); 06951 InFlag = Chain.getValue(1); 06952 06953 // Add the <id> and <numBytes> constants. 06954 SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); 06955 Ops.push_back(DAG.getTargetConstant( 06956 cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); 06957 SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); 06958 Ops.push_back(DAG.getTargetConstant( 06959 cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); 06960 06961 // Push live variables for the stack map. 06962 addStackMapLiveVars(CI, 2, Ops, *this); 06963 06964 // We are not pushing any register mask info here on the operands list, 06965 // because the stackmap doesn't clobber anything. 06966 06967 // Push the chain and the glue flag. 06968 Ops.push_back(Chain); 06969 Ops.push_back(InFlag); 06970 06971 // Create the STACKMAP node. 06972 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 06973 SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops); 06974 Chain = SDValue(SM, 0); 06975 InFlag = Chain.getValue(1); 06976 06977 Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL); 06978 06979 // Stackmaps don't generate values, so nothing goes into the NodeMap. 06980 06981 // Set the root to the target-lowered call chain. 06982 DAG.setRoot(Chain); 06983 06984 // Inform the Frame Information that we have a stackmap in this function. 06985 FuncInfo.MF->getFrameInfo()->setHasStackMap(); 06986 } 06987 06988 /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. 06989 void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { 06990 // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, 06991 // i32 <numBytes>, 06992 // i8* <target>, 06993 // i32 <numArgs>, 06994 // [Args...], 06995 // [live variables...]) 06996 06997 CallingConv::ID CC = CI.getCallingConv(); 06998 bool isAnyRegCC = CC == CallingConv::AnyReg; 06999 bool hasDef = !CI.getType()->isVoidTy(); 07000 SDValue Callee = getValue(CI.getOperand(2)); // <target> 07001 07002 // Get the real number of arguments participating in the call <numArgs> 07003 SDValue NArgVal = getValue(CI.getArgOperand(PatchPointOpers::NArgPos)); 07004 unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); 07005 07006 // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> 07007 // Intrinsics include all meta-operands up to but not including CC. 07008 unsigned NumMetaOpers = PatchPointOpers::CCPos; 07009 assert(CI.getNumArgOperands() >= NumMetaOpers + NumArgs && 07010 "Not enough arguments provided to the patchpoint intrinsic"); 07011 07012 // For AnyRegCC the arguments are lowered later on manually. 07013 unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; 07014 std::pair<SDValue, SDValue> Result = 07015 LowerCallOperands(CI, NumMetaOpers, NumCallArgs, Callee, isAnyRegCC); 07016 07017 // Set the root to the target-lowered call chain. 07018 SDValue Chain = Result.second; 07019 DAG.setRoot(Chain); 07020 07021 SDNode *CallEnd = Chain.getNode(); 07022 if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) 07023 CallEnd = CallEnd->getOperand(0).getNode(); 07024 07025 /// Get a call instruction from the call sequence chain. 07026 /// Tail calls are not allowed. 07027 assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && 07028 "Expected a callseq node."); 07029 SDNode *Call = CallEnd->getOperand(0).getNode(); 07030 bool hasGlue = Call->getGluedNode(); 07031 07032 // Replace the target specific call node with the patchable intrinsic. 07033 SmallVector<SDValue, 8> Ops; 07034 07035 // Add the <id> and <numBytes> constants. 07036 SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); 07037 Ops.push_back(DAG.getTargetConstant( 07038 cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); 07039 SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); 07040 Ops.push_back(DAG.getTargetConstant( 07041 cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); 07042 07043 // Assume that the Callee is a constant address. 07044 // FIXME: handle function symbols in the future. 07045 Ops.push_back( 07046 DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(), 07047 /*isTarget=*/true)); 07048 07049 // Adjust <numArgs> to account for any arguments that have been passed on the 07050 // stack instead. 07051 // Call Node: Chain, Target, {Args}, RegMask, [Glue] 07052 unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3); 07053 NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs; 07054 Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); 07055 07056 // Add the calling convention 07057 Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32)); 07058 07059 // Add the arguments we omitted previously. The register allocator should 07060 // place these in any free register. 07061 if (isAnyRegCC) 07062 for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) 07063 Ops.push_back(getValue(CI.getArgOperand(i))); 07064 07065 // Push the arguments from the call instruction up to the register mask. 07066 SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; 07067 for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) 07068 Ops.push_back(*i); 07069 07070 // Push live variables for the stack map. 07071 addStackMapLiveVars(CI, NumMetaOpers + NumArgs, Ops, *this); 07072 07073 // Push the register mask info. 07074 if (hasGlue) 07075 Ops.push_back(*(Call->op_end()-2)); 07076 else 07077 Ops.push_back(*(Call->op_end()-1)); 07078 07079 // Push the chain (this is originally the first operand of the call, but 07080 // becomes now the last or second to last operand). 07081 Ops.push_back(*(Call->op_begin())); 07082 07083 // Push the glue flag (last operand). 07084 if (hasGlue) 07085 Ops.push_back(*(Call->op_end()-1)); 07086 07087 SDVTList NodeTys; 07088 if (isAnyRegCC && hasDef) { 07089 // Create the return types based on the intrinsic definition 07090 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 07091 SmallVector<EVT, 3> ValueVTs; 07092 ComputeValueVTs(TLI, CI.getType(), ValueVTs); 07093 assert(ValueVTs.size() == 1 && "Expected only one return value type."); 07094 07095 // There is always a chain and a glue type at the end 07096 ValueVTs.push_back(MVT::Other); 07097 ValueVTs.push_back(MVT::Glue); 07098 NodeTys = DAG.getVTList(ValueVTs); 07099 } else 07100 NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 07101 07102 // Replace the target specific call node with a PATCHPOINT node. 07103 MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, 07104 getCurSDLoc(), NodeTys, Ops); 07105 07106 // Update the NodeMap. 07107 if (hasDef) { 07108 if (isAnyRegCC) 07109 setValue(&CI, SDValue(MN, 0)); 07110 else 07111 setValue(&CI, Result.first); 07112 } 07113 07114 // Fixup the consumers of the intrinsic. The chain and glue may be used in the 07115 // call sequence. Furthermore the location of the chain and glue can change 07116 // when the AnyReg calling convention is used and the intrinsic returns a 07117 // value. 07118 if (isAnyRegCC && hasDef) { 07119 SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; 07120 SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; 07121 DAG.ReplaceAllUsesOfValuesWith(From, To, 2); 07122 } else 07123 DAG.ReplaceAllUsesWith(Call, MN); 07124 DAG.DeleteNode(Call); 07125 07126 // Inform the Frame Information that we have a patchpoint in this function. 07127 FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); 07128 } 07129 07130 /// Returns an AttributeSet representing the attributes applied to the return 07131 /// value of the given call. 07132 static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { 07133 SmallVector<Attribute::AttrKind, 2> Attrs; 07134 if (CLI.RetSExt) 07135 Attrs.push_back(Attribute::SExt); 07136 if (CLI.RetZExt) 07137 Attrs.push_back(Attribute::ZExt); 07138 if (CLI.IsInReg) 07139 Attrs.push_back(Attribute::InReg); 07140 07141 return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex, 07142 Attrs); 07143 } 07144 07145 /// TargetLowering::LowerCallTo - This is the default LowerCallTo 07146 /// implementation, which just calls LowerCall. 07147 /// FIXME: When all targets are 07148 /// migrated to using LowerCall, this hook should be integrated into SDISel. 07149 std::pair<SDValue, SDValue> 07150 TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { 07151 // Handle the incoming return values from the call. 07152 CLI.Ins.clear(); 07153 Type *OrigRetTy = CLI.RetTy; 07154 SmallVector<EVT, 4> RetTys; 07155 SmallVector<uint64_t, 4> Offsets; 07156 ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets); 07157 07158 SmallVector<ISD::OutputArg, 4> Outs; 07159 GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this); 07160 07161 bool CanLowerReturn = 07162 this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), 07163 CLI.IsVarArg, Outs, CLI.RetTy->getContext()); 07164 07165 SDValue DemoteStackSlot; 07166 int DemoteStackIdx = -100; 07167 if (!CanLowerReturn) { 07168 // FIXME: equivalent assert? 07169 // assert(!CS.hasInAllocaArgument() && 07170 // "sret demotion is incompatible with inalloca"); 07171 uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy); 07172 unsigned Align = getDataLayout()->getPrefTypeAlignment(CLI.RetTy); 07173 MachineFunction &MF = CLI.DAG.getMachineFunction(); 07174 DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); 07175 Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); 07176 07177 DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy()); 07178 ArgListEntry Entry; 07179 Entry.Node = DemoteStackSlot; 07180 Entry.Ty = StackSlotPtrType; 07181 Entry.isSExt = false; 07182 Entry.isZExt = false; 07183 Entry.isInReg = false; 07184 Entry.isSRet = true; 07185 Entry.isNest = false; 07186 Entry.isByVal = false; 07187 Entry.isReturned = false; 07188 Entry.Alignment = Align; 07189 CLI.getArgs().insert(CLI.getArgs().begin(), Entry); 07190 CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); 07191 } else { 07192 for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { 07193 EVT VT = RetTys[I]; 07194 MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); 07195 unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); 07196 for (unsigned i = 0; i != NumRegs; ++i) { 07197 ISD::InputArg MyFlags; 07198 MyFlags.VT = RegisterVT; 07199 MyFlags.ArgVT = VT; 07200 MyFlags.Used = CLI.IsReturnValueUsed; 07201 if (CLI.RetSExt) 07202 MyFlags.Flags.setSExt(); 07203 if (CLI.RetZExt) 07204 MyFlags.Flags.setZExt(); 07205 if (CLI.IsInReg) 07206 MyFlags.Flags.setInReg(); 07207 CLI.Ins.push_back(MyFlags); 07208 } 07209 } 07210 } 07211 07212 // Handle all of the outgoing arguments. 07213 CLI.Outs.clear(); 07214 CLI.OutVals.clear(); 07215 ArgListTy &Args = CLI.getArgs(); 07216 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 07217 SmallVector<EVT, 4> ValueVTs; 07218 ComputeValueVTs(*this, Args[i].Ty, ValueVTs); 07219 Type *FinalType = Args[i].Ty; 07220 if (Args[i].isByVal) 07221 FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); 07222 bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( 07223 FinalType, CLI.CallConv, CLI.IsVarArg); 07224 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; 07225 ++Value) { 07226 EVT VT = ValueVTs[Value]; 07227 Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); 07228 SDValue Op = SDValue(Args[i].Node.getNode(), 07229 Args[i].Node.getResNo() + Value); 07230 ISD::ArgFlagsTy Flags; 07231 unsigned OriginalAlignment = getDataLayout()->getABITypeAlignment(ArgTy); 07232 07233 if (Args[i].isZExt) 07234 Flags.setZExt(); 07235 if (Args[i].isSExt) 07236 Flags.setSExt(); 07237 if (Args[i].isInReg) 07238 Flags.setInReg(); 07239 if (Args[i].isSRet) 07240 Flags.setSRet(); 07241 if (Args[i].isByVal) 07242 Flags.setByVal(); 07243 if (Args[i].isInAlloca) { 07244 Flags.setInAlloca(); 07245 // Set the byval flag for CCAssignFn callbacks that don't know about 07246 // inalloca. This way we can know how many bytes we should've allocated 07247 // and how many bytes a callee cleanup function will pop. If we port 07248 // inalloca to more targets, we'll have to add custom inalloca handling 07249 // in the various CC lowering callbacks. 07250 Flags.setByVal(); 07251 } 07252 if (Args[i].isByVal || Args[i].isInAlloca) { 07253 PointerType *Ty = cast<PointerType>(Args[i].Ty); 07254 Type *ElementTy = Ty->getElementType(); 07255 Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); 07256 // For ByVal, alignment should come from FE. BE will guess if this 07257 // info is not there but there are cases it cannot get right. 07258 unsigned FrameAlign; 07259 if (Args[i].Alignment) 07260 FrameAlign = Args[i].Alignment; 07261 else 07262 FrameAlign = getByValTypeAlignment(ElementTy); 07263 Flags.setByValAlign(FrameAlign); 07264 } 07265 if (Args[i].isNest) 07266 Flags.setNest(); 07267 if (NeedsRegBlock) { 07268 Flags.setInConsecutiveRegs(); 07269 if (Value == NumValues - 1) 07270 Flags.setInConsecutiveRegsLast(); 07271 } 07272 Flags.setOrigAlign(OriginalAlignment); 07273 07274 MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); 07275 unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); 07276 SmallVector<SDValue, 4> Parts(NumParts); 07277 ISD::NodeType ExtendKind = ISD::ANY_EXTEND; 07278 07279 if (Args[i].isSExt) 07280 ExtendKind = ISD::SIGN_EXTEND; 07281 else if (Args[i].isZExt) 07282 ExtendKind = ISD::ZERO_EXTEND; 07283 07284 // Conservatively only handle 'returned' on non-vectors for now 07285 if (Args[i].isReturned && !Op.getValueType().isVector()) { 07286 assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && 07287 "unexpected use of 'returned'"); 07288 // Before passing 'returned' to the target lowering code, ensure that 07289 // either the register MVT and the actual EVT are the same size or that 07290 // the return value and argument are extended in the same way; in these 07291 // cases it's safe to pass the argument register value unchanged as the 07292 // return register value (although it's at the target's option whether 07293 // to do so) 07294 // TODO: allow code generation to take advantage of partially preserved 07295 // registers rather than clobbering the entire register when the 07296 // parameter extension method is not compatible with the return 07297 // extension method 07298 if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || 07299 (ExtendKind != ISD::ANY_EXTEND && 07300 CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt)) 07301 Flags.setReturned(); 07302 } 07303 07304 getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, 07305 CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind); 07306 07307 for (unsigned j = 0; j != NumParts; ++j) { 07308 // if it isn't first piece, alignment must be 1 07309 ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, 07310 i < CLI.NumFixedArgs, 07311 i, j*Parts[j].getValueType().getStoreSize()); 07312 if (NumParts > 1 && j == 0) 07313 MyFlags.Flags.setSplit(); 07314 else if (j != 0) 07315 MyFlags.Flags.setOrigAlign(1); 07316 07317 CLI.Outs.push_back(MyFlags); 07318 CLI.OutVals.push_back(Parts[j]); 07319 } 07320 } 07321 } 07322 07323 SmallVector<SDValue, 4> InVals; 07324 CLI.Chain = LowerCall(CLI, InVals); 07325 07326 // Verify that the target's LowerCall behaved as expected. 07327 assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other && 07328 "LowerCall didn't return a valid chain!"); 07329 assert((!CLI.IsTailCall || InVals.empty()) && 07330 "LowerCall emitted a return value for a tail call!"); 07331 assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) && 07332 "LowerCall didn't emit the correct number of values!"); 07333 07334 // For a tail call, the return value is merely live-out and there aren't 07335 // any nodes in the DAG representing it. Return a special value to 07336 // indicate that a tail call has been emitted and no more Instructions 07337 // should be processed in the current block. 07338 if (CLI.IsTailCall) { 07339 CLI.DAG.setRoot(CLI.Chain); 07340 return std::make_pair(SDValue(), SDValue()); 07341 } 07342 07343 DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { 07344 assert(InVals[i].getNode() && 07345 "LowerCall emitted a null value!"); 07346 assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && 07347 "LowerCall emitted a value with the wrong type!"); 07348 }); 07349 07350 SmallVector<SDValue, 4> ReturnValues; 07351 if (!CanLowerReturn) { 07352 // The instruction result is the result of loading from the 07353 // hidden sret parameter. 07354 SmallVector<EVT, 1> PVTs; 07355 Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); 07356 07357 ComputeValueVTs(*this, PtrRetTy, PVTs); 07358 assert(PVTs.size() == 1 && "Pointers should fit in one register"); 07359 EVT PtrVT = PVTs[0]; 07360 07361 unsigned NumValues = RetTys.size(); 07362 ReturnValues.resize(NumValues); 07363 SmallVector<SDValue, 4> Chains(NumValues); 07364 07365 for (unsigned i = 0; i < NumValues; ++i) { 07366 SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, 07367 CLI.DAG.getConstant(Offsets[i], PtrVT)); 07368 SDValue L = CLI.DAG.getLoad( 07369 RetTys[i], CLI.DL, CLI.Chain, Add, 07370 MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, 07371 false, false, 1); 07372 ReturnValues[i] = L; 07373 Chains[i] = L.getValue(1); 07374 } 07375 07376 CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains); 07377 } else { 07378 // Collect the legal value parts into potentially illegal values 07379 // that correspond to the original function's return values. 07380 ISD::NodeType AssertOp = ISD::DELETED_NODE; 07381 if (CLI.RetSExt) 07382 AssertOp = ISD::AssertSext; 07383 else if (CLI.RetZExt) 07384 AssertOp = ISD::AssertZext; 07385 unsigned CurReg = 0; 07386 for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { 07387 EVT VT = RetTys[I]; 07388 MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); 07389 unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); 07390 07391 ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], 07392 NumRegs, RegisterVT, VT, nullptr, 07393 AssertOp)); 07394 CurReg += NumRegs; 07395 } 07396 07397 // For a function returning void, there is no return value. We can't create 07398 // such a node, so we just return a null return value in that case. In 07399 // that case, nothing will actually look at the value. 07400 if (ReturnValues.empty()) 07401 return std::make_pair(SDValue(), CLI.Chain); 07402 } 07403 07404 SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, 07405 CLI.DAG.getVTList(RetTys), ReturnValues); 07406 return std::make_pair(Res, CLI.Chain); 07407 } 07408 07409 void TargetLowering::LowerOperationWrapper(SDNode *N, 07410 SmallVectorImpl<SDValue> &Results, 07411 SelectionDAG &DAG) const { 07412 SDValue Res = LowerOperation(SDValue(N, 0), DAG); 07413 if (Res.getNode()) 07414 Results.push_back(Res); 07415 } 07416 07417 SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 07418 llvm_unreachable("LowerOperation not implemented for this target!"); 07419 } 07420 07421 void 07422 SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { 07423 SDValue Op = getNonRegisterValue(V); 07424 assert((Op.getOpcode() != ISD::CopyFromReg || 07425 cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && 07426 "Copy from a reg to the same reg!"); 07427 assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); 07428 07429 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 07430 RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType()); 07431 SDValue Chain = DAG.getEntryNode(); 07432 RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V); 07433 PendingExports.push_back(Chain); 07434 } 07435 07436 #include "llvm/CodeGen/SelectionDAGISel.h" 07437 07438 /// isOnlyUsedInEntryBlock - If the specified argument is only used in the 07439 /// entry block, return true. This includes arguments used by switches, since 07440 /// the switch may expand into multiple basic blocks. 07441 static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { 07442 // With FastISel active, we may be splitting blocks, so force creation 07443 // of virtual registers for all non-dead arguments. 07444 if (FastISel) 07445 return A->use_empty(); 07446 07447 const BasicBlock *Entry = A->getParent()->begin(); 07448 for (const User *U : A->users()) 07449 if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) 07450 return false; // Use not in entry block. 07451 07452 return true; 07453 } 07454 07455 void SelectionDAGISel::LowerArguments(const Function &F) { 07456 SelectionDAG &DAG = SDB->DAG; 07457 SDLoc dl = SDB->getCurSDLoc(); 07458 const TargetLowering *TLI = getTargetLowering(); 07459 const DataLayout *DL = TLI->getDataLayout(); 07460 SmallVector<ISD::InputArg, 16> Ins; 07461 07462 if (!FuncInfo->CanLowerReturn) { 07463 // Put in an sret pointer parameter before all the other parameters. 07464 SmallVector<EVT, 1> ValueVTs; 07465 ComputeValueVTs(*getTargetLowering(), 07466 PointerType::getUnqual(F.getReturnType()), ValueVTs); 07467 07468 // NOTE: Assuming that a pointer will never break down to more than one VT 07469 // or one register. 07470 ISD::ArgFlagsTy Flags; 07471 Flags.setSRet(); 07472 MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); 07473 ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0); 07474 Ins.push_back(RetArg); 07475 } 07476 07477 // Set up the incoming argument description vector. 07478 unsigned Idx = 1; 07479 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); 07480 I != E; ++I, ++Idx) { 07481 SmallVector<EVT, 4> ValueVTs; 07482 ComputeValueVTs(*TLI, I->getType(), ValueVTs); 07483 bool isArgValueUsed = !I->use_empty(); 07484 unsigned PartBase = 0; 07485 Type *FinalType = I->getType(); 07486 if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) 07487 FinalType = cast<PointerType>(FinalType)->getElementType(); 07488 bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( 07489 FinalType, F.getCallingConv(), F.isVarArg()); 07490 for (unsigned Value = 0, NumValues = ValueVTs.size(); 07491 Value != NumValues; ++Value) { 07492 EVT VT = ValueVTs[Value]; 07493 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); 07494 ISD::ArgFlagsTy Flags; 07495 unsigned OriginalAlignment = DL->getABITypeAlignment(ArgTy); 07496 07497 if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) 07498 Flags.setZExt(); 07499 if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) 07500 Flags.setSExt(); 07501 if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) 07502 Flags.setInReg(); 07503 if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) 07504 Flags.setSRet(); 07505 if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) 07506 Flags.setByVal(); 07507 if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { 07508 Flags.setInAlloca(); 07509 // Set the byval flag for CCAssignFn callbacks that don't know about 07510 // inalloca. This way we can know how many bytes we should've allocated 07511 // and how many bytes a callee cleanup function will pop. If we port 07512 // inalloca to more targets, we'll have to add custom inalloca handling 07513 // in the various CC lowering callbacks. 07514 Flags.setByVal(); 07515 } 07516 if (Flags.isByVal() || Flags.isInAlloca()) { 07517 PointerType *Ty = cast<PointerType>(I->getType()); 07518 Type *ElementTy = Ty->getElementType(); 07519 Flags.setByValSize(DL->getTypeAllocSize(ElementTy)); 07520 // For ByVal, alignment should be passed from FE. BE will guess if 07521 // this info is not there but there are cases it cannot get right. 07522 unsigned FrameAlign; 07523 if (F.getParamAlignment(Idx)) 07524 FrameAlign = F.getParamAlignment(Idx); 07525 else 07526 FrameAlign = TLI->getByValTypeAlignment(ElementTy); 07527 Flags.setByValAlign(FrameAlign); 07528 } 07529 if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) 07530 Flags.setNest(); 07531 if (NeedsRegBlock) { 07532 Flags.setInConsecutiveRegs(); 07533 if (Value == NumValues - 1) 07534 Flags.setInConsecutiveRegsLast(); 07535 } 07536 Flags.setOrigAlign(OriginalAlignment); 07537 07538 MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); 07539 unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); 07540 for (unsigned i = 0; i != NumRegs; ++i) { 07541 ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, 07542 Idx-1, PartBase+i*RegisterVT.getStoreSize()); 07543 if (NumRegs > 1 && i == 0) 07544 MyFlags.Flags.setSplit(); 07545 // if it isn't first piece, alignment must be 1 07546 else if (i > 0) 07547 MyFlags.Flags.setOrigAlign(1); 07548 Ins.push_back(MyFlags); 07549 } 07550 PartBase += VT.getStoreSize(); 07551 } 07552 } 07553 07554 // Call the target to set up the argument values. 07555 SmallVector<SDValue, 8> InVals; 07556 SDValue NewRoot = TLI->LowerFormalArguments(DAG.getRoot(), F.getCallingConv(), 07557 F.isVarArg(), Ins, 07558 dl, DAG, InVals); 07559 07560 // Verify that the target's LowerFormalArguments behaved as expected. 07561 assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && 07562 "LowerFormalArguments didn't return a valid chain!"); 07563 assert(InVals.size() == Ins.size() && 07564 "LowerFormalArguments didn't emit the correct number of values!"); 07565 DEBUG({ 07566 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 07567 assert(InVals[i].getNode() && 07568 "LowerFormalArguments emitted a null value!"); 07569 assert(EVT(Ins[i].VT) == InVals[i].getValueType() && 07570 "LowerFormalArguments emitted a value with the wrong type!"); 07571 } 07572 }); 07573 07574 // Update the DAG with the new chain value resulting from argument lowering. 07575 DAG.setRoot(NewRoot); 07576 07577 // Set up the argument values. 07578 unsigned i = 0; 07579 Idx = 1; 07580 if (!FuncInfo->CanLowerReturn) { 07581 // Create a virtual register for the sret pointer, and put in a copy 07582 // from the sret argument into it. 07583 SmallVector<EVT, 1> ValueVTs; 07584 ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); 07585 MVT VT = ValueVTs[0].getSimpleVT(); 07586 MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); 07587 ISD::NodeType AssertOp = ISD::DELETED_NODE; 07588 SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, 07589 RegVT, VT, nullptr, AssertOp); 07590 07591 MachineFunction& MF = SDB->DAG.getMachineFunction(); 07592 MachineRegisterInfo& RegInfo = MF.getRegInfo(); 07593 unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); 07594 FuncInfo->DemoteRegister = SRetReg; 07595 NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), 07596 SRetReg, ArgValue); 07597 DAG.setRoot(NewRoot); 07598 07599 // i indexes lowered arguments. Bump it past the hidden sret argument. 07600 // Idx indexes LLVM arguments. Don't touch it. 07601 ++i; 07602 } 07603 07604 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; 07605 ++I, ++Idx) { 07606 SmallVector<SDValue, 4> ArgValues; 07607 SmallVector<EVT, 4> ValueVTs; 07608 ComputeValueVTs(*TLI, I->getType(), ValueVTs); 07609 unsigned NumValues = ValueVTs.size(); 07610 07611 // If this argument is unused then remember its value. It is used to generate 07612 // debugging information. 07613 if (I->use_empty() && NumValues) { 07614 SDB->setUnusedArgValue(I, InVals[i]); 07615 07616 // Also remember any frame index for use in FastISel. 07617 if (FrameIndexSDNode *FI = 07618 dyn_cast<FrameIndexSDNode>(InVals[i].getNode())) 07619 FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); 07620 } 07621 07622 for (unsigned Val = 0; Val != NumValues; ++Val) { 07623 EVT VT = ValueVTs[Val]; 07624 MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT); 07625 unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); 07626 07627 if (!I->use_empty()) { 07628 ISD::NodeType AssertOp = ISD::DELETED_NODE; 07629 if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) 07630 AssertOp = ISD::AssertSext; 07631 else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) 07632 AssertOp = ISD::AssertZext; 07633 07634 ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], 07635 NumParts, PartVT, VT, 07636 nullptr, AssertOp)); 07637 } 07638 07639 i += NumParts; 07640 } 07641 07642 // We don't need to do anything else for unused arguments. 07643 if (ArgValues.empty()) 07644 continue; 07645 07646 // Note down frame index. 07647 if (FrameIndexSDNode *FI = 07648 dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) 07649 FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); 07650 07651 SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), 07652 SDB->getCurSDLoc()); 07653 07654 SDB->setValue(I, Res); 07655 if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { 07656 if (LoadSDNode *LNode = 07657 dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) 07658 if (FrameIndexSDNode *FI = 07659 dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) 07660 FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); 07661 } 07662 07663 // If this argument is live outside of the entry block, insert a copy from 07664 // wherever we got it to the vreg that other BB's will reference it as. 07665 if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { 07666 // If we can, though, try to skip creating an unnecessary vreg. 07667 // FIXME: This isn't very clean... it would be nice to make this more 07668 // general. It's also subtly incompatible with the hacks FastISel 07669 // uses with vregs. 07670 unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); 07671 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 07672 FuncInfo->ValueMap[I] = Reg; 07673 continue; 07674 } 07675 } 07676 if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) { 07677 FuncInfo->InitializeRegForValue(I); 07678 SDB->CopyToExportRegsIfNeeded(I); 07679 } 07680 } 07681 07682 assert(i == InVals.size() && "Argument register count mismatch!"); 07683 07684 // Finally, if the target has anything special to do, allow it to do so. 07685 // FIXME: this should insert code into the DAG! 07686 EmitFunctionEntryCode(); 07687 } 07688 07689 /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to 07690 /// ensure constants are generated when needed. Remember the virtual registers 07691 /// that need to be added to the Machine PHI nodes as input. We cannot just 07692 /// directly add them, because expansion might result in multiple MBB's for one 07693 /// BB. As such, the start of the BB might correspond to a different MBB than 07694 /// the end. 07695 /// 07696 void 07697 SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { 07698 const TerminatorInst *TI = LLVMBB->getTerminator(); 07699 07700 SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; 07701 07702 // Check successor nodes' PHI nodes that expect a constant to be available 07703 // from this block. 07704 for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { 07705 const BasicBlock *SuccBB = TI->getSuccessor(succ); 07706 if (!isa<PHINode>(SuccBB->begin())) continue; 07707 MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; 07708 07709 // If this terminator has multiple identical successors (common for 07710 // switches), only handle each succ once. 07711 if (!SuccsHandled.insert(SuccMBB)) continue; 07712 07713 MachineBasicBlock::iterator MBBI = SuccMBB->begin(); 07714 07715 // At this point we know that there is a 1-1 correspondence between LLVM PHI 07716 // nodes and Machine PHI nodes, but the incoming operands have not been 07717 // emitted yet. 07718 for (BasicBlock::const_iterator I = SuccBB->begin(); 07719 const PHINode *PN = dyn_cast<PHINode>(I); ++I) { 07720 // Ignore dead phi's. 07721 if (PN->use_empty()) continue; 07722 07723 // Skip empty types 07724 if (PN->getType()->isEmptyTy()) 07725 continue; 07726 07727 unsigned Reg; 07728 const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); 07729 07730 if (const Constant *C = dyn_cast<Constant>(PHIOp)) { 07731 unsigned &RegOut = ConstantsOut[C]; 07732 if (RegOut == 0) { 07733 RegOut = FuncInfo.CreateRegs(C->getType()); 07734 CopyValueToVirtualRegister(C, RegOut); 07735 } 07736 Reg = RegOut; 07737 } else { 07738 DenseMap<const Value *, unsigned>::iterator I = 07739 FuncInfo.ValueMap.find(PHIOp); 07740 if (I != FuncInfo.ValueMap.end()) 07741 Reg = I->second; 07742 else { 07743 assert(isa<AllocaInst>(PHIOp) && 07744 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && 07745 "Didn't codegen value into a register!??"); 07746 Reg = FuncInfo.CreateRegs(PHIOp->getType()); 07747 CopyValueToVirtualRegister(PHIOp, Reg); 07748 } 07749 } 07750 07751 // Remember that this register needs to added to the machine PHI node as 07752 // the input for this MBB. 07753 SmallVector<EVT, 4> ValueVTs; 07754 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 07755 ComputeValueVTs(*TLI, PN->getType(), ValueVTs); 07756 for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { 07757 EVT VT = ValueVTs[vti]; 07758 unsigned NumRegisters = TLI->getNumRegisters(*DAG.getContext(), VT); 07759 for (unsigned i = 0, e = NumRegisters; i != e; ++i) 07760 FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); 07761 Reg += NumRegisters; 07762 } 07763 } 07764 } 07765 07766 ConstantsOut.clear(); 07767 } 07768 07769 /// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB 07770 /// is 0. 07771 MachineBasicBlock * 07772 SelectionDAGBuilder::StackProtectorDescriptor:: 07773 AddSuccessorMBB(const BasicBlock *BB, 07774 MachineBasicBlock *ParentMBB, 07775 MachineBasicBlock *SuccMBB) { 07776 // If SuccBB has not been created yet, create it. 07777 if (!SuccMBB) { 07778 MachineFunction *MF = ParentMBB->getParent(); 07779 MachineFunction::iterator BBI = ParentMBB; 07780 SuccMBB = MF->CreateMachineBasicBlock(BB); 07781 MF->insert(++BBI, SuccMBB); 07782 } 07783 // Add it as a successor of ParentMBB. 07784 ParentMBB->addSuccessor(SuccMBB); 07785 return SuccMBB; 07786 }