clang API Documentation
00001 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This contains code to emit Builtin calls as LLVM code. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "CodeGenFunction.h" 00015 #include "CGObjCRuntime.h" 00016 #include "CodeGenModule.h" 00017 #include "TargetInfo.h" 00018 #include "clang/AST/ASTContext.h" 00019 #include "clang/AST/Decl.h" 00020 #include "clang/Basic/TargetBuiltins.h" 00021 #include "clang/Basic/TargetInfo.h" 00022 #include "clang/CodeGen/CGFunctionInfo.h" 00023 #include "llvm/IR/DataLayout.h" 00024 #include "llvm/IR/Intrinsics.h" 00025 00026 using namespace clang; 00027 using namespace CodeGen; 00028 using namespace llvm; 00029 00030 /// getBuiltinLibFunction - Given a builtin id for a function like 00031 /// "__builtin_fabsf", return a Function* for "fabsf". 00032 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 00033 unsigned BuiltinID) { 00034 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 00035 00036 // Get the name, skip over the __builtin_ prefix (if necessary). 00037 StringRef Name; 00038 GlobalDecl D(FD); 00039 00040 // If the builtin has been declared explicitly with an assembler label, 00041 // use the mangled name. This differs from the plain label on platforms 00042 // that prefix labels. 00043 if (FD->hasAttr<AsmLabelAttr>()) 00044 Name = getMangledName(D); 00045 else 00046 Name = Context.BuiltinInfo.GetName(BuiltinID) + 10; 00047 00048 llvm::FunctionType *Ty = 00049 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 00050 00051 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 00052 } 00053 00054 /// Emit the conversions required to turn the given value into an 00055 /// integer of the given size. 00056 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 00057 QualType T, llvm::IntegerType *IntType) { 00058 V = CGF.EmitToMemory(V, T); 00059 00060 if (V->getType()->isPointerTy()) 00061 return CGF.Builder.CreatePtrToInt(V, IntType); 00062 00063 assert(V->getType() == IntType); 00064 return V; 00065 } 00066 00067 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 00068 QualType T, llvm::Type *ResultType) { 00069 V = CGF.EmitFromMemory(V, T); 00070 00071 if (ResultType->isPointerTy()) 00072 return CGF.Builder.CreateIntToPtr(V, ResultType); 00073 00074 assert(V->getType() == ResultType); 00075 return V; 00076 } 00077 00078 /// Utility to insert an atomic instruction based on Instrinsic::ID 00079 /// and the expression node. 00080 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 00081 llvm::AtomicRMWInst::BinOp Kind, 00082 const CallExpr *E) { 00083 QualType T = E->getType(); 00084 assert(E->getArg(0)->getType()->isPointerType()); 00085 assert(CGF.getContext().hasSameUnqualifiedType(T, 00086 E->getArg(0)->getType()->getPointeeType())); 00087 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 00088 00089 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 00090 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 00091 00092 llvm::IntegerType *IntType = 00093 llvm::IntegerType::get(CGF.getLLVMContext(), 00094 CGF.getContext().getTypeSize(T)); 00095 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 00096 00097 llvm::Value *Args[2]; 00098 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 00099 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 00100 llvm::Type *ValueType = Args[1]->getType(); 00101 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 00102 00103 llvm::Value *Result = 00104 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], 00105 llvm::SequentiallyConsistent); 00106 Result = EmitFromInt(CGF, Result, T, ValueType); 00107 return RValue::get(Result); 00108 } 00109 00110 /// Utility to insert an atomic instruction based Instrinsic::ID and 00111 /// the expression node, where the return value is the result of the 00112 /// operation. 00113 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 00114 llvm::AtomicRMWInst::BinOp Kind, 00115 const CallExpr *E, 00116 Instruction::BinaryOps Op, 00117 bool Invert = false) { 00118 QualType T = E->getType(); 00119 assert(E->getArg(0)->getType()->isPointerType()); 00120 assert(CGF.getContext().hasSameUnqualifiedType(T, 00121 E->getArg(0)->getType()->getPointeeType())); 00122 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 00123 00124 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 00125 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 00126 00127 llvm::IntegerType *IntType = 00128 llvm::IntegerType::get(CGF.getLLVMContext(), 00129 CGF.getContext().getTypeSize(T)); 00130 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 00131 00132 llvm::Value *Args[2]; 00133 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 00134 llvm::Type *ValueType = Args[1]->getType(); 00135 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 00136 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 00137 00138 llvm::Value *Result = 00139 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], 00140 llvm::SequentiallyConsistent); 00141 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 00142 if (Invert) 00143 Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 00144 llvm::ConstantInt::get(IntType, -1)); 00145 Result = EmitFromInt(CGF, Result, T, ValueType); 00146 return RValue::get(Result); 00147 } 00148 00149 /// EmitFAbs - Emit a call to @llvm.fabs(). 00150 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { 00151 Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 00152 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 00153 Call->setDoesNotAccessMemory(); 00154 return Call; 00155 } 00156 00157 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn, 00158 const CallExpr *E, llvm::Value *calleeValue) { 00159 return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E, 00160 ReturnValueSlot(), Fn); 00161 } 00162 00163 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 00164 /// depending on IntrinsicID. 00165 /// 00166 /// \arg CGF The current codegen function. 00167 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 00168 /// \arg X The first argument to the llvm.*.with.overflow.*. 00169 /// \arg Y The second argument to the llvm.*.with.overflow.*. 00170 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 00171 /// \returns The result (i.e. sum/product) returned by the intrinsic. 00172 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 00173 const llvm::Intrinsic::ID IntrinsicID, 00174 llvm::Value *X, llvm::Value *Y, 00175 llvm::Value *&Carry) { 00176 // Make sure we have integers of the same width. 00177 assert(X->getType() == Y->getType() && 00178 "Arguments must be the same type. (Did you forget to make sure both " 00179 "arguments have the same integer width?)"); 00180 00181 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 00182 llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y); 00183 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 00184 return CGF.Builder.CreateExtractValue(Tmp, 0); 00185 } 00186 00187 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 00188 unsigned BuiltinID, const CallExpr *E) { 00189 // See if we can constant fold this builtin. If so, don't emit it at all. 00190 Expr::EvalResult Result; 00191 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 00192 !Result.hasSideEffects()) { 00193 if (Result.Val.isInt()) 00194 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 00195 Result.Val.getInt())); 00196 if (Result.Val.isFloat()) 00197 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 00198 Result.Val.getFloat())); 00199 } 00200 00201 switch (BuiltinID) { 00202 default: break; // Handle intrinsics and libm functions below. 00203 case Builtin::BI__builtin___CFStringMakeConstantString: 00204 case Builtin::BI__builtin___NSStringMakeConstantString: 00205 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr)); 00206 case Builtin::BI__builtin_stdarg_start: 00207 case Builtin::BI__builtin_va_start: 00208 case Builtin::BI__va_start: 00209 case Builtin::BI__builtin_va_end: { 00210 Value *ArgValue = (BuiltinID == Builtin::BI__va_start) 00211 ? EmitScalarExpr(E->getArg(0)) 00212 : EmitVAListRef(E->getArg(0)); 00213 llvm::Type *DestType = Int8PtrTy; 00214 if (ArgValue->getType() != DestType) 00215 ArgValue = Builder.CreateBitCast(ArgValue, DestType, 00216 ArgValue->getName().data()); 00217 00218 Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ? 00219 Intrinsic::vaend : Intrinsic::vastart; 00220 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue)); 00221 } 00222 case Builtin::BI__builtin_va_copy: { 00223 Value *DstPtr = EmitVAListRef(E->getArg(0)); 00224 Value *SrcPtr = EmitVAListRef(E->getArg(1)); 00225 00226 llvm::Type *Type = Int8PtrTy; 00227 00228 DstPtr = Builder.CreateBitCast(DstPtr, Type); 00229 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 00230 return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy), 00231 DstPtr, SrcPtr)); 00232 } 00233 case Builtin::BI__builtin_abs: 00234 case Builtin::BI__builtin_labs: 00235 case Builtin::BI__builtin_llabs: { 00236 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 00237 00238 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 00239 Value *CmpResult = 00240 Builder.CreateICmpSGE(ArgValue, 00241 llvm::Constant::getNullValue(ArgValue->getType()), 00242 "abscond"); 00243 Value *Result = 00244 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 00245 00246 return RValue::get(Result); 00247 } 00248 case Builtin::BI__builtin_fabs: 00249 case Builtin::BI__builtin_fabsf: 00250 case Builtin::BI__builtin_fabsl: { 00251 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 00252 Value *Result = EmitFAbs(*this, Arg1); 00253 return RValue::get(Result); 00254 } 00255 case Builtin::BI__builtin_fmod: 00256 case Builtin::BI__builtin_fmodf: 00257 case Builtin::BI__builtin_fmodl: { 00258 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 00259 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 00260 Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); 00261 return RValue::get(Result); 00262 } 00263 00264 case Builtin::BI__builtin_conj: 00265 case Builtin::BI__builtin_conjf: 00266 case Builtin::BI__builtin_conjl: { 00267 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 00268 Value *Real = ComplexVal.first; 00269 Value *Imag = ComplexVal.second; 00270 Value *Zero = 00271 Imag->getType()->isFPOrFPVectorTy() 00272 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 00273 : llvm::Constant::getNullValue(Imag->getType()); 00274 00275 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 00276 return RValue::getComplex(std::make_pair(Real, Imag)); 00277 } 00278 case Builtin::BI__builtin_creal: 00279 case Builtin::BI__builtin_crealf: 00280 case Builtin::BI__builtin_creall: 00281 case Builtin::BIcreal: 00282 case Builtin::BIcrealf: 00283 case Builtin::BIcreall: { 00284 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 00285 return RValue::get(ComplexVal.first); 00286 } 00287 00288 case Builtin::BI__builtin_cimag: 00289 case Builtin::BI__builtin_cimagf: 00290 case Builtin::BI__builtin_cimagl: 00291 case Builtin::BIcimag: 00292 case Builtin::BIcimagf: 00293 case Builtin::BIcimagl: { 00294 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 00295 return RValue::get(ComplexVal.second); 00296 } 00297 00298 case Builtin::BI__builtin_ctzs: 00299 case Builtin::BI__builtin_ctz: 00300 case Builtin::BI__builtin_ctzl: 00301 case Builtin::BI__builtin_ctzll: { 00302 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 00303 00304 llvm::Type *ArgType = ArgValue->getType(); 00305 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 00306 00307 llvm::Type *ResultType = ConvertType(E->getType()); 00308 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 00309 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef); 00310 if (Result->getType() != ResultType) 00311 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 00312 "cast"); 00313 return RValue::get(Result); 00314 } 00315 case Builtin::BI__builtin_clzs: 00316 case Builtin::BI__builtin_clz: 00317 case Builtin::BI__builtin_clzl: 00318 case Builtin::BI__builtin_clzll: { 00319 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 00320 00321 llvm::Type *ArgType = ArgValue->getType(); 00322 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 00323 00324 llvm::Type *ResultType = ConvertType(E->getType()); 00325 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 00326 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef); 00327 if (Result->getType() != ResultType) 00328 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 00329 "cast"); 00330 return RValue::get(Result); 00331 } 00332 case Builtin::BI__builtin_ffs: 00333 case Builtin::BI__builtin_ffsl: 00334 case Builtin::BI__builtin_ffsll: { 00335 // ffs(x) -> x ? cttz(x) + 1 : 0 00336 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 00337 00338 llvm::Type *ArgType = ArgValue->getType(); 00339 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 00340 00341 llvm::Type *ResultType = ConvertType(E->getType()); 00342 Value *Tmp = Builder.CreateAdd(Builder.CreateCall2(F, ArgValue, 00343 Builder.getTrue()), 00344 llvm::ConstantInt::get(ArgType, 1)); 00345 Value *Zero = llvm::Constant::getNullValue(ArgType); 00346 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 00347 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 00348 if (Result->getType() != ResultType) 00349 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 00350 "cast"); 00351 return RValue::get(Result); 00352 } 00353 case Builtin::BI__builtin_parity: 00354 case Builtin::BI__builtin_parityl: 00355 case Builtin::BI__builtin_parityll: { 00356 // parity(x) -> ctpop(x) & 1 00357 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 00358 00359 llvm::Type *ArgType = ArgValue->getType(); 00360 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 00361 00362 llvm::Type *ResultType = ConvertType(E->getType()); 00363 Value *Tmp = Builder.CreateCall(F, ArgValue); 00364 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 00365 if (Result->getType() != ResultType) 00366 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 00367 "cast"); 00368 return RValue::get(Result); 00369 } 00370 case Builtin::BI__builtin_popcount: 00371 case Builtin::BI__builtin_popcountl: 00372 case Builtin::BI__builtin_popcountll: { 00373 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 00374 00375 llvm::Type *ArgType = ArgValue->getType(); 00376 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 00377 00378 llvm::Type *ResultType = ConvertType(E->getType()); 00379 Value *Result = Builder.CreateCall(F, ArgValue); 00380 if (Result->getType() != ResultType) 00381 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 00382 "cast"); 00383 return RValue::get(Result); 00384 } 00385 case Builtin::BI__builtin_expect: { 00386 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 00387 llvm::Type *ArgType = ArgValue->getType(); 00388 00389 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 00390 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 00391 00392 Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue, 00393 "expval"); 00394 return RValue::get(Result); 00395 } 00396 case Builtin::BI__builtin_assume_aligned: { 00397 Value *PtrValue = EmitScalarExpr(E->getArg(0)); 00398 Value *OffsetValue = 00399 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 00400 00401 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 00402 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 00403 unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); 00404 00405 EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); 00406 return RValue::get(PtrValue); 00407 } 00408 case Builtin::BI__assume: 00409 case Builtin::BI__builtin_assume: { 00410 if (E->getArg(0)->HasSideEffects(getContext())) 00411 return RValue::get(nullptr); 00412 00413 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 00414 Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 00415 return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); 00416 } 00417 case Builtin::BI__builtin_bswap16: 00418 case Builtin::BI__builtin_bswap32: 00419 case Builtin::BI__builtin_bswap64: { 00420 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 00421 llvm::Type *ArgType = ArgValue->getType(); 00422 Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType); 00423 return RValue::get(Builder.CreateCall(F, ArgValue)); 00424 } 00425 case Builtin::BI__builtin_object_size: { 00426 // We rely on constant folding to deal with expressions with side effects. 00427 assert(!E->getArg(0)->HasSideEffects(getContext()) && 00428 "should have been constant folded"); 00429 00430 // We pass this builtin onto the optimizer so that it can 00431 // figure out the object size in more complex cases. 00432 llvm::Type *ResType = ConvertType(E->getType()); 00433 00434 // LLVM only supports 0 and 2, make sure that we pass along that 00435 // as a boolean. 00436 Value *Ty = EmitScalarExpr(E->getArg(1)); 00437 ConstantInt *CI = dyn_cast<ConstantInt>(Ty); 00438 assert(CI); 00439 uint64_t val = CI->getZExtValue(); 00440 CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1); 00441 // FIXME: Get right address space. 00442 llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) }; 00443 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); 00444 return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI)); 00445 } 00446 case Builtin::BI__builtin_prefetch: { 00447 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 00448 // FIXME: Technically these constants should of type 'int', yes? 00449 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 00450 llvm::ConstantInt::get(Int32Ty, 0); 00451 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 00452 llvm::ConstantInt::get(Int32Ty, 3); 00453 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 00454 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 00455 return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data)); 00456 } 00457 case Builtin::BI__builtin_readcyclecounter: { 00458 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 00459 return RValue::get(Builder.CreateCall(F)); 00460 } 00461 case Builtin::BI__builtin___clear_cache: { 00462 Value *Begin = EmitScalarExpr(E->getArg(0)); 00463 Value *End = EmitScalarExpr(E->getArg(1)); 00464 Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); 00465 return RValue::get(Builder.CreateCall2(F, Begin, End)); 00466 } 00467 case Builtin::BI__builtin_trap: { 00468 Value *F = CGM.getIntrinsic(Intrinsic::trap); 00469 return RValue::get(Builder.CreateCall(F)); 00470 } 00471 case Builtin::BI__debugbreak: { 00472 Value *F = CGM.getIntrinsic(Intrinsic::debugtrap); 00473 return RValue::get(Builder.CreateCall(F)); 00474 } 00475 case Builtin::BI__builtin_unreachable: { 00476 if (SanOpts.has(SanitizerKind::Unreachable)) { 00477 SanitizerScope SanScope(this); 00478 EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()), 00479 SanitizerKind::Unreachable), 00480 "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()), 00481 None); 00482 } else 00483 Builder.CreateUnreachable(); 00484 00485 // We do need to preserve an insertion point. 00486 EmitBlock(createBasicBlock("unreachable.cont")); 00487 00488 return RValue::get(nullptr); 00489 } 00490 00491 case Builtin::BI__builtin_powi: 00492 case Builtin::BI__builtin_powif: 00493 case Builtin::BI__builtin_powil: { 00494 Value *Base = EmitScalarExpr(E->getArg(0)); 00495 Value *Exponent = EmitScalarExpr(E->getArg(1)); 00496 llvm::Type *ArgType = Base->getType(); 00497 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 00498 return RValue::get(Builder.CreateCall2(F, Base, Exponent)); 00499 } 00500 00501 case Builtin::BI__builtin_isgreater: 00502 case Builtin::BI__builtin_isgreaterequal: 00503 case Builtin::BI__builtin_isless: 00504 case Builtin::BI__builtin_islessequal: 00505 case Builtin::BI__builtin_islessgreater: 00506 case Builtin::BI__builtin_isunordered: { 00507 // Ordered comparisons: we know the arguments to these are matching scalar 00508 // floating point values. 00509 Value *LHS = EmitScalarExpr(E->getArg(0)); 00510 Value *RHS = EmitScalarExpr(E->getArg(1)); 00511 00512 switch (BuiltinID) { 00513 default: llvm_unreachable("Unknown ordered comparison"); 00514 case Builtin::BI__builtin_isgreater: 00515 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 00516 break; 00517 case Builtin::BI__builtin_isgreaterequal: 00518 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 00519 break; 00520 case Builtin::BI__builtin_isless: 00521 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 00522 break; 00523 case Builtin::BI__builtin_islessequal: 00524 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 00525 break; 00526 case Builtin::BI__builtin_islessgreater: 00527 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 00528 break; 00529 case Builtin::BI__builtin_isunordered: 00530 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 00531 break; 00532 } 00533 // ZExt bool to int type. 00534 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 00535 } 00536 case Builtin::BI__builtin_isnan: { 00537 Value *V = EmitScalarExpr(E->getArg(0)); 00538 V = Builder.CreateFCmpUNO(V, V, "cmp"); 00539 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 00540 } 00541 00542 case Builtin::BI__builtin_isinf: { 00543 // isinf(x) --> fabs(x) == infinity 00544 Value *V = EmitScalarExpr(E->getArg(0)); 00545 V = EmitFAbs(*this, V); 00546 00547 V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf"); 00548 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 00549 } 00550 00551 // TODO: BI__builtin_isinf_sign 00552 // isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0 00553 00554 case Builtin::BI__builtin_isnormal: { 00555 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 00556 Value *V = EmitScalarExpr(E->getArg(0)); 00557 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 00558 00559 Value *Abs = EmitFAbs(*this, V); 00560 Value *IsLessThanInf = 00561 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 00562 APFloat Smallest = APFloat::getSmallestNormalized( 00563 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 00564 Value *IsNormal = 00565 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 00566 "isnormal"); 00567 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 00568 V = Builder.CreateAnd(V, IsNormal, "and"); 00569 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 00570 } 00571 00572 case Builtin::BI__builtin_isfinite: { 00573 // isfinite(x) --> x == x && fabs(x) != infinity; 00574 Value *V = EmitScalarExpr(E->getArg(0)); 00575 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 00576 00577 Value *Abs = EmitFAbs(*this, V); 00578 Value *IsNotInf = 00579 Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 00580 00581 V = Builder.CreateAnd(Eq, IsNotInf, "and"); 00582 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 00583 } 00584 00585 case Builtin::BI__builtin_fpclassify: { 00586 Value *V = EmitScalarExpr(E->getArg(5)); 00587 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 00588 00589 // Create Result 00590 BasicBlock *Begin = Builder.GetInsertBlock(); 00591 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 00592 Builder.SetInsertPoint(End); 00593 PHINode *Result = 00594 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 00595 "fpclassify_result"); 00596 00597 // if (V==0) return FP_ZERO 00598 Builder.SetInsertPoint(Begin); 00599 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 00600 "iszero"); 00601 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 00602 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 00603 Builder.CreateCondBr(IsZero, End, NotZero); 00604 Result->addIncoming(ZeroLiteral, Begin); 00605 00606 // if (V != V) return FP_NAN 00607 Builder.SetInsertPoint(NotZero); 00608 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 00609 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 00610 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 00611 Builder.CreateCondBr(IsNan, End, NotNan); 00612 Result->addIncoming(NanLiteral, NotZero); 00613 00614 // if (fabs(V) == infinity) return FP_INFINITY 00615 Builder.SetInsertPoint(NotNan); 00616 Value *VAbs = EmitFAbs(*this, V); 00617 Value *IsInf = 00618 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 00619 "isinf"); 00620 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 00621 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 00622 Builder.CreateCondBr(IsInf, End, NotInf); 00623 Result->addIncoming(InfLiteral, NotNan); 00624 00625 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 00626 Builder.SetInsertPoint(NotInf); 00627 APFloat Smallest = APFloat::getSmallestNormalized( 00628 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 00629 Value *IsNormal = 00630 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 00631 "isnormal"); 00632 Value *NormalResult = 00633 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 00634 EmitScalarExpr(E->getArg(3))); 00635 Builder.CreateBr(End); 00636 Result->addIncoming(NormalResult, NotInf); 00637 00638 // return Result 00639 Builder.SetInsertPoint(End); 00640 return RValue::get(Result); 00641 } 00642 00643 case Builtin::BIalloca: 00644 case Builtin::BI_alloca: 00645 case Builtin::BI__builtin_alloca: { 00646 Value *Size = EmitScalarExpr(E->getArg(0)); 00647 return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size)); 00648 } 00649 case Builtin::BIbzero: 00650 case Builtin::BI__builtin_bzero: { 00651 std::pair<llvm::Value*, unsigned> Dest = 00652 EmitPointerWithAlignment(E->getArg(0)); 00653 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 00654 Builder.CreateMemSet(Dest.first, Builder.getInt8(0), SizeVal, 00655 Dest.second, false); 00656 return RValue::get(Dest.first); 00657 } 00658 case Builtin::BImemcpy: 00659 case Builtin::BI__builtin_memcpy: { 00660 std::pair<llvm::Value*, unsigned> Dest = 00661 EmitPointerWithAlignment(E->getArg(0)); 00662 std::pair<llvm::Value*, unsigned> Src = 00663 EmitPointerWithAlignment(E->getArg(1)); 00664 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 00665 unsigned Align = std::min(Dest.second, Src.second); 00666 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false); 00667 return RValue::get(Dest.first); 00668 } 00669 00670 case Builtin::BI__builtin___memcpy_chk: { 00671 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 00672 llvm::APSInt Size, DstSize; 00673 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 00674 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 00675 break; 00676 if (Size.ugt(DstSize)) 00677 break; 00678 std::pair<llvm::Value*, unsigned> Dest = 00679 EmitPointerWithAlignment(E->getArg(0)); 00680 std::pair<llvm::Value*, unsigned> Src = 00681 EmitPointerWithAlignment(E->getArg(1)); 00682 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 00683 unsigned Align = std::min(Dest.second, Src.second); 00684 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false); 00685 return RValue::get(Dest.first); 00686 } 00687 00688 case Builtin::BI__builtin_objc_memmove_collectable: { 00689 Value *Address = EmitScalarExpr(E->getArg(0)); 00690 Value *SrcAddr = EmitScalarExpr(E->getArg(1)); 00691 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 00692 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 00693 Address, SrcAddr, SizeVal); 00694 return RValue::get(Address); 00695 } 00696 00697 case Builtin::BI__builtin___memmove_chk: { 00698 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 00699 llvm::APSInt Size, DstSize; 00700 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 00701 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 00702 break; 00703 if (Size.ugt(DstSize)) 00704 break; 00705 std::pair<llvm::Value*, unsigned> Dest = 00706 EmitPointerWithAlignment(E->getArg(0)); 00707 std::pair<llvm::Value*, unsigned> Src = 00708 EmitPointerWithAlignment(E->getArg(1)); 00709 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 00710 unsigned Align = std::min(Dest.second, Src.second); 00711 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false); 00712 return RValue::get(Dest.first); 00713 } 00714 00715 case Builtin::BImemmove: 00716 case Builtin::BI__builtin_memmove: { 00717 std::pair<llvm::Value*, unsigned> Dest = 00718 EmitPointerWithAlignment(E->getArg(0)); 00719 std::pair<llvm::Value*, unsigned> Src = 00720 EmitPointerWithAlignment(E->getArg(1)); 00721 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 00722 unsigned Align = std::min(Dest.second, Src.second); 00723 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false); 00724 return RValue::get(Dest.first); 00725 } 00726 case Builtin::BImemset: 00727 case Builtin::BI__builtin_memset: { 00728 std::pair<llvm::Value*, unsigned> Dest = 00729 EmitPointerWithAlignment(E->getArg(0)); 00730 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 00731 Builder.getInt8Ty()); 00732 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 00733 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false); 00734 return RValue::get(Dest.first); 00735 } 00736 case Builtin::BI__builtin___memset_chk: { 00737 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 00738 llvm::APSInt Size, DstSize; 00739 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 00740 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 00741 break; 00742 if (Size.ugt(DstSize)) 00743 break; 00744 std::pair<llvm::Value*, unsigned> Dest = 00745 EmitPointerWithAlignment(E->getArg(0)); 00746 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 00747 Builder.getInt8Ty()); 00748 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 00749 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false); 00750 return RValue::get(Dest.first); 00751 } 00752 case Builtin::BI__builtin_dwarf_cfa: { 00753 // The offset in bytes from the first argument to the CFA. 00754 // 00755 // Why on earth is this in the frontend? Is there any reason at 00756 // all that the backend can't reasonably determine this while 00757 // lowering llvm.eh.dwarf.cfa()? 00758 // 00759 // TODO: If there's a satisfactory reason, add a target hook for 00760 // this instead of hard-coding 0, which is correct for most targets. 00761 int32_t Offset = 0; 00762 00763 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 00764 return RValue::get(Builder.CreateCall(F, 00765 llvm::ConstantInt::get(Int32Ty, Offset))); 00766 } 00767 case Builtin::BI__builtin_return_address: { 00768 Value *Depth = EmitScalarExpr(E->getArg(0)); 00769 Depth = Builder.CreateIntCast(Depth, Int32Ty, false); 00770 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 00771 return RValue::get(Builder.CreateCall(F, Depth)); 00772 } 00773 case Builtin::BI__builtin_frame_address: { 00774 Value *Depth = EmitScalarExpr(E->getArg(0)); 00775 Depth = Builder.CreateIntCast(Depth, Int32Ty, false); 00776 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 00777 return RValue::get(Builder.CreateCall(F, Depth)); 00778 } 00779 case Builtin::BI__builtin_extract_return_addr: { 00780 Value *Address = EmitScalarExpr(E->getArg(0)); 00781 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 00782 return RValue::get(Result); 00783 } 00784 case Builtin::BI__builtin_frob_return_addr: { 00785 Value *Address = EmitScalarExpr(E->getArg(0)); 00786 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 00787 return RValue::get(Result); 00788 } 00789 case Builtin::BI__builtin_dwarf_sp_column: { 00790 llvm::IntegerType *Ty 00791 = cast<llvm::IntegerType>(ConvertType(E->getType())); 00792 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 00793 if (Column == -1) { 00794 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 00795 return RValue::get(llvm::UndefValue::get(Ty)); 00796 } 00797 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 00798 } 00799 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 00800 Value *Address = EmitScalarExpr(E->getArg(0)); 00801 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 00802 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 00803 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 00804 } 00805 case Builtin::BI__builtin_eh_return: { 00806 Value *Int = EmitScalarExpr(E->getArg(0)); 00807 Value *Ptr = EmitScalarExpr(E->getArg(1)); 00808 00809 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 00810 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 00811 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 00812 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 00813 ? Intrinsic::eh_return_i32 00814 : Intrinsic::eh_return_i64); 00815 Builder.CreateCall2(F, Int, Ptr); 00816 Builder.CreateUnreachable(); 00817 00818 // We do need to preserve an insertion point. 00819 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 00820 00821 return RValue::get(nullptr); 00822 } 00823 case Builtin::BI__builtin_unwind_init: { 00824 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 00825 return RValue::get(Builder.CreateCall(F)); 00826 } 00827 case Builtin::BI__builtin_extend_pointer: { 00828 // Extends a pointer to the size of an _Unwind_Word, which is 00829 // uint64_t on all platforms. Generally this gets poked into a 00830 // register and eventually used as an address, so if the 00831 // addressing registers are wider than pointers and the platform 00832 // doesn't implicitly ignore high-order bits when doing 00833 // addressing, we need to make sure we zext / sext based on 00834 // the platform's expectations. 00835 // 00836 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 00837 00838 // Cast the pointer to intptr_t. 00839 Value *Ptr = EmitScalarExpr(E->getArg(0)); 00840 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 00841 00842 // If that's 64 bits, we're done. 00843 if (IntPtrTy->getBitWidth() == 64) 00844 return RValue::get(Result); 00845 00846 // Otherwise, ask the codegen data what to do. 00847 if (getTargetHooks().extendPointerWithSExt()) 00848 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 00849 else 00850 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 00851 } 00852 case Builtin::BI__builtin_setjmp: { 00853 // Buffer is a void**. 00854 Value *Buf = EmitScalarExpr(E->getArg(0)); 00855 00856 // Store the frame pointer to the setjmp buffer. 00857 Value *FrameAddr = 00858 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 00859 ConstantInt::get(Int32Ty, 0)); 00860 Builder.CreateStore(FrameAddr, Buf); 00861 00862 // Store the stack pointer to the setjmp buffer. 00863 Value *StackAddr = 00864 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 00865 Value *StackSaveSlot = 00866 Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2)); 00867 Builder.CreateStore(StackAddr, StackSaveSlot); 00868 00869 // Call LLVM's EH setjmp, which is lightweight. 00870 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 00871 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 00872 return RValue::get(Builder.CreateCall(F, Buf)); 00873 } 00874 case Builtin::BI__builtin_longjmp: { 00875 Value *Buf = EmitScalarExpr(E->getArg(0)); 00876 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 00877 00878 // Call LLVM's EH longjmp, which is lightweight. 00879 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 00880 00881 // longjmp doesn't return; mark this as unreachable. 00882 Builder.CreateUnreachable(); 00883 00884 // We do need to preserve an insertion point. 00885 EmitBlock(createBasicBlock("longjmp.cont")); 00886 00887 return RValue::get(nullptr); 00888 } 00889 case Builtin::BI__sync_fetch_and_add: 00890 case Builtin::BI__sync_fetch_and_sub: 00891 case Builtin::BI__sync_fetch_and_or: 00892 case Builtin::BI__sync_fetch_and_and: 00893 case Builtin::BI__sync_fetch_and_xor: 00894 case Builtin::BI__sync_fetch_and_nand: 00895 case Builtin::BI__sync_add_and_fetch: 00896 case Builtin::BI__sync_sub_and_fetch: 00897 case Builtin::BI__sync_and_and_fetch: 00898 case Builtin::BI__sync_or_and_fetch: 00899 case Builtin::BI__sync_xor_and_fetch: 00900 case Builtin::BI__sync_nand_and_fetch: 00901 case Builtin::BI__sync_val_compare_and_swap: 00902 case Builtin::BI__sync_bool_compare_and_swap: 00903 case Builtin::BI__sync_lock_test_and_set: 00904 case Builtin::BI__sync_lock_release: 00905 case Builtin::BI__sync_swap: 00906 llvm_unreachable("Shouldn't make it through sema"); 00907 case Builtin::BI__sync_fetch_and_add_1: 00908 case Builtin::BI__sync_fetch_and_add_2: 00909 case Builtin::BI__sync_fetch_and_add_4: 00910 case Builtin::BI__sync_fetch_and_add_8: 00911 case Builtin::BI__sync_fetch_and_add_16: 00912 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 00913 case Builtin::BI__sync_fetch_and_sub_1: 00914 case Builtin::BI__sync_fetch_and_sub_2: 00915 case Builtin::BI__sync_fetch_and_sub_4: 00916 case Builtin::BI__sync_fetch_and_sub_8: 00917 case Builtin::BI__sync_fetch_and_sub_16: 00918 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 00919 case Builtin::BI__sync_fetch_and_or_1: 00920 case Builtin::BI__sync_fetch_and_or_2: 00921 case Builtin::BI__sync_fetch_and_or_4: 00922 case Builtin::BI__sync_fetch_and_or_8: 00923 case Builtin::BI__sync_fetch_and_or_16: 00924 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 00925 case Builtin::BI__sync_fetch_and_and_1: 00926 case Builtin::BI__sync_fetch_and_and_2: 00927 case Builtin::BI__sync_fetch_and_and_4: 00928 case Builtin::BI__sync_fetch_and_and_8: 00929 case Builtin::BI__sync_fetch_and_and_16: 00930 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 00931 case Builtin::BI__sync_fetch_and_xor_1: 00932 case Builtin::BI__sync_fetch_and_xor_2: 00933 case Builtin::BI__sync_fetch_and_xor_4: 00934 case Builtin::BI__sync_fetch_and_xor_8: 00935 case Builtin::BI__sync_fetch_and_xor_16: 00936 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 00937 case Builtin::BI__sync_fetch_and_nand_1: 00938 case Builtin::BI__sync_fetch_and_nand_2: 00939 case Builtin::BI__sync_fetch_and_nand_4: 00940 case Builtin::BI__sync_fetch_and_nand_8: 00941 case Builtin::BI__sync_fetch_and_nand_16: 00942 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 00943 00944 // Clang extensions: not overloaded yet. 00945 case Builtin::BI__sync_fetch_and_min: 00946 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 00947 case Builtin::BI__sync_fetch_and_max: 00948 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 00949 case Builtin::BI__sync_fetch_and_umin: 00950 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 00951 case Builtin::BI__sync_fetch_and_umax: 00952 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 00953 00954 case Builtin::BI__sync_add_and_fetch_1: 00955 case Builtin::BI__sync_add_and_fetch_2: 00956 case Builtin::BI__sync_add_and_fetch_4: 00957 case Builtin::BI__sync_add_and_fetch_8: 00958 case Builtin::BI__sync_add_and_fetch_16: 00959 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 00960 llvm::Instruction::Add); 00961 case Builtin::BI__sync_sub_and_fetch_1: 00962 case Builtin::BI__sync_sub_and_fetch_2: 00963 case Builtin::BI__sync_sub_and_fetch_4: 00964 case Builtin::BI__sync_sub_and_fetch_8: 00965 case Builtin::BI__sync_sub_and_fetch_16: 00966 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 00967 llvm::Instruction::Sub); 00968 case Builtin::BI__sync_and_and_fetch_1: 00969 case Builtin::BI__sync_and_and_fetch_2: 00970 case Builtin::BI__sync_and_and_fetch_4: 00971 case Builtin::BI__sync_and_and_fetch_8: 00972 case Builtin::BI__sync_and_and_fetch_16: 00973 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 00974 llvm::Instruction::And); 00975 case Builtin::BI__sync_or_and_fetch_1: 00976 case Builtin::BI__sync_or_and_fetch_2: 00977 case Builtin::BI__sync_or_and_fetch_4: 00978 case Builtin::BI__sync_or_and_fetch_8: 00979 case Builtin::BI__sync_or_and_fetch_16: 00980 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 00981 llvm::Instruction::Or); 00982 case Builtin::BI__sync_xor_and_fetch_1: 00983 case Builtin::BI__sync_xor_and_fetch_2: 00984 case Builtin::BI__sync_xor_and_fetch_4: 00985 case Builtin::BI__sync_xor_and_fetch_8: 00986 case Builtin::BI__sync_xor_and_fetch_16: 00987 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 00988 llvm::Instruction::Xor); 00989 case Builtin::BI__sync_nand_and_fetch_1: 00990 case Builtin::BI__sync_nand_and_fetch_2: 00991 case Builtin::BI__sync_nand_and_fetch_4: 00992 case Builtin::BI__sync_nand_and_fetch_8: 00993 case Builtin::BI__sync_nand_and_fetch_16: 00994 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 00995 llvm::Instruction::And, true); 00996 00997 case Builtin::BI__sync_val_compare_and_swap_1: 00998 case Builtin::BI__sync_val_compare_and_swap_2: 00999 case Builtin::BI__sync_val_compare_and_swap_4: 01000 case Builtin::BI__sync_val_compare_and_swap_8: 01001 case Builtin::BI__sync_val_compare_and_swap_16: { 01002 QualType T = E->getType(); 01003 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 01004 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 01005 01006 llvm::IntegerType *IntType = 01007 llvm::IntegerType::get(getLLVMContext(), 01008 getContext().getTypeSize(T)); 01009 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 01010 01011 Value *Args[3]; 01012 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 01013 Args[1] = EmitScalarExpr(E->getArg(1)); 01014 llvm::Type *ValueType = Args[1]->getType(); 01015 Args[1] = EmitToInt(*this, Args[1], T, IntType); 01016 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 01017 01018 Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], 01019 llvm::SequentiallyConsistent, 01020 llvm::SequentiallyConsistent); 01021 Result = Builder.CreateExtractValue(Result, 0); 01022 Result = EmitFromInt(*this, Result, T, ValueType); 01023 return RValue::get(Result); 01024 } 01025 01026 case Builtin::BI__sync_bool_compare_and_swap_1: 01027 case Builtin::BI__sync_bool_compare_and_swap_2: 01028 case Builtin::BI__sync_bool_compare_and_swap_4: 01029 case Builtin::BI__sync_bool_compare_and_swap_8: 01030 case Builtin::BI__sync_bool_compare_and_swap_16: { 01031 QualType T = E->getArg(1)->getType(); 01032 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 01033 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 01034 01035 llvm::IntegerType *IntType = 01036 llvm::IntegerType::get(getLLVMContext(), 01037 getContext().getTypeSize(T)); 01038 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 01039 01040 Value *Args[3]; 01041 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 01042 Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType); 01043 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 01044 01045 Value *Pair = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], 01046 llvm::SequentiallyConsistent, 01047 llvm::SequentiallyConsistent); 01048 Value *Result = Builder.CreateExtractValue(Pair, 1); 01049 // zext bool to int. 01050 Result = Builder.CreateZExt(Result, ConvertType(E->getType())); 01051 return RValue::get(Result); 01052 } 01053 01054 case Builtin::BI__sync_swap_1: 01055 case Builtin::BI__sync_swap_2: 01056 case Builtin::BI__sync_swap_4: 01057 case Builtin::BI__sync_swap_8: 01058 case Builtin::BI__sync_swap_16: 01059 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 01060 01061 case Builtin::BI__sync_lock_test_and_set_1: 01062 case Builtin::BI__sync_lock_test_and_set_2: 01063 case Builtin::BI__sync_lock_test_and_set_4: 01064 case Builtin::BI__sync_lock_test_and_set_8: 01065 case Builtin::BI__sync_lock_test_and_set_16: 01066 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 01067 01068 case Builtin::BI__sync_lock_release_1: 01069 case Builtin::BI__sync_lock_release_2: 01070 case Builtin::BI__sync_lock_release_4: 01071 case Builtin::BI__sync_lock_release_8: 01072 case Builtin::BI__sync_lock_release_16: { 01073 Value *Ptr = EmitScalarExpr(E->getArg(0)); 01074 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 01075 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 01076 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 01077 StoreSize.getQuantity() * 8); 01078 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 01079 llvm::StoreInst *Store = 01080 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr); 01081 Store->setAlignment(StoreSize.getQuantity()); 01082 Store->setAtomic(llvm::Release); 01083 return RValue::get(nullptr); 01084 } 01085 01086 case Builtin::BI__sync_synchronize: { 01087 // We assume this is supposed to correspond to a C++0x-style 01088 // sequentially-consistent fence (i.e. this is only usable for 01089 // synchonization, not device I/O or anything like that). This intrinsic 01090 // is really badly designed in the sense that in theory, there isn't 01091 // any way to safely use it... but in practice, it mostly works 01092 // to use it with non-atomic loads and stores to get acquire/release 01093 // semantics. 01094 Builder.CreateFence(llvm::SequentiallyConsistent); 01095 return RValue::get(nullptr); 01096 } 01097 01098 case Builtin::BI__c11_atomic_is_lock_free: 01099 case Builtin::BI__atomic_is_lock_free: { 01100 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 01101 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 01102 // _Atomic(T) is always properly-aligned. 01103 const char *LibCallName = "__atomic_is_lock_free"; 01104 CallArgList Args; 01105 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 01106 getContext().getSizeType()); 01107 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 01108 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 01109 getContext().VoidPtrTy); 01110 else 01111 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 01112 getContext().VoidPtrTy); 01113 const CGFunctionInfo &FuncInfo = 01114 CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args, 01115 FunctionType::ExtInfo(), 01116 RequiredArgs::All); 01117 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 01118 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 01119 return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args); 01120 } 01121 01122 case Builtin::BI__atomic_test_and_set: { 01123 // Look at the argument type to determine whether this is a volatile 01124 // operation. The parameter type is always volatile. 01125 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 01126 bool Volatile = 01127 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 01128 01129 Value *Ptr = EmitScalarExpr(E->getArg(0)); 01130 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 01131 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 01132 Value *NewVal = Builder.getInt8(1); 01133 Value *Order = EmitScalarExpr(E->getArg(1)); 01134 if (isa<llvm::ConstantInt>(Order)) { 01135 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 01136 AtomicRMWInst *Result = nullptr; 01137 switch (ord) { 01138 case 0: // memory_order_relaxed 01139 default: // invalid order 01140 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 01141 Ptr, NewVal, 01142 llvm::Monotonic); 01143 break; 01144 case 1: // memory_order_consume 01145 case 2: // memory_order_acquire 01146 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 01147 Ptr, NewVal, 01148 llvm::Acquire); 01149 break; 01150 case 3: // memory_order_release 01151 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 01152 Ptr, NewVal, 01153 llvm::Release); 01154 break; 01155 case 4: // memory_order_acq_rel 01156 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 01157 Ptr, NewVal, 01158 llvm::AcquireRelease); 01159 break; 01160 case 5: // memory_order_seq_cst 01161 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 01162 Ptr, NewVal, 01163 llvm::SequentiallyConsistent); 01164 break; 01165 } 01166 Result->setVolatile(Volatile); 01167 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 01168 } 01169 01170 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 01171 01172 llvm::BasicBlock *BBs[5] = { 01173 createBasicBlock("monotonic", CurFn), 01174 createBasicBlock("acquire", CurFn), 01175 createBasicBlock("release", CurFn), 01176 createBasicBlock("acqrel", CurFn), 01177 createBasicBlock("seqcst", CurFn) 01178 }; 01179 llvm::AtomicOrdering Orders[5] = { 01180 llvm::Monotonic, llvm::Acquire, llvm::Release, 01181 llvm::AcquireRelease, llvm::SequentiallyConsistent 01182 }; 01183 01184 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 01185 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 01186 01187 Builder.SetInsertPoint(ContBB); 01188 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 01189 01190 for (unsigned i = 0; i < 5; ++i) { 01191 Builder.SetInsertPoint(BBs[i]); 01192 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 01193 Ptr, NewVal, Orders[i]); 01194 RMW->setVolatile(Volatile); 01195 Result->addIncoming(RMW, BBs[i]); 01196 Builder.CreateBr(ContBB); 01197 } 01198 01199 SI->addCase(Builder.getInt32(0), BBs[0]); 01200 SI->addCase(Builder.getInt32(1), BBs[1]); 01201 SI->addCase(Builder.getInt32(2), BBs[1]); 01202 SI->addCase(Builder.getInt32(3), BBs[2]); 01203 SI->addCase(Builder.getInt32(4), BBs[3]); 01204 SI->addCase(Builder.getInt32(5), BBs[4]); 01205 01206 Builder.SetInsertPoint(ContBB); 01207 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 01208 } 01209 01210 case Builtin::BI__atomic_clear: { 01211 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 01212 bool Volatile = 01213 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 01214 01215 Value *Ptr = EmitScalarExpr(E->getArg(0)); 01216 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 01217 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 01218 Value *NewVal = Builder.getInt8(0); 01219 Value *Order = EmitScalarExpr(E->getArg(1)); 01220 if (isa<llvm::ConstantInt>(Order)) { 01221 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 01222 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 01223 Store->setAlignment(1); 01224 switch (ord) { 01225 case 0: // memory_order_relaxed 01226 default: // invalid order 01227 Store->setOrdering(llvm::Monotonic); 01228 break; 01229 case 3: // memory_order_release 01230 Store->setOrdering(llvm::Release); 01231 break; 01232 case 5: // memory_order_seq_cst 01233 Store->setOrdering(llvm::SequentiallyConsistent); 01234 break; 01235 } 01236 return RValue::get(nullptr); 01237 } 01238 01239 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 01240 01241 llvm::BasicBlock *BBs[3] = { 01242 createBasicBlock("monotonic", CurFn), 01243 createBasicBlock("release", CurFn), 01244 createBasicBlock("seqcst", CurFn) 01245 }; 01246 llvm::AtomicOrdering Orders[3] = { 01247 llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent 01248 }; 01249 01250 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 01251 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 01252 01253 for (unsigned i = 0; i < 3; ++i) { 01254 Builder.SetInsertPoint(BBs[i]); 01255 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 01256 Store->setAlignment(1); 01257 Store->setOrdering(Orders[i]); 01258 Builder.CreateBr(ContBB); 01259 } 01260 01261 SI->addCase(Builder.getInt32(0), BBs[0]); 01262 SI->addCase(Builder.getInt32(3), BBs[1]); 01263 SI->addCase(Builder.getInt32(5), BBs[2]); 01264 01265 Builder.SetInsertPoint(ContBB); 01266 return RValue::get(nullptr); 01267 } 01268 01269 case Builtin::BI__atomic_thread_fence: 01270 case Builtin::BI__atomic_signal_fence: 01271 case Builtin::BI__c11_atomic_thread_fence: 01272 case Builtin::BI__c11_atomic_signal_fence: { 01273 llvm::SynchronizationScope Scope; 01274 if (BuiltinID == Builtin::BI__atomic_signal_fence || 01275 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 01276 Scope = llvm::SingleThread; 01277 else 01278 Scope = llvm::CrossThread; 01279 Value *Order = EmitScalarExpr(E->getArg(0)); 01280 if (isa<llvm::ConstantInt>(Order)) { 01281 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 01282 switch (ord) { 01283 case 0: // memory_order_relaxed 01284 default: // invalid order 01285 break; 01286 case 1: // memory_order_consume 01287 case 2: // memory_order_acquire 01288 Builder.CreateFence(llvm::Acquire, Scope); 01289 break; 01290 case 3: // memory_order_release 01291 Builder.CreateFence(llvm::Release, Scope); 01292 break; 01293 case 4: // memory_order_acq_rel 01294 Builder.CreateFence(llvm::AcquireRelease, Scope); 01295 break; 01296 case 5: // memory_order_seq_cst 01297 Builder.CreateFence(llvm::SequentiallyConsistent, Scope); 01298 break; 01299 } 01300 return RValue::get(nullptr); 01301 } 01302 01303 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 01304 AcquireBB = createBasicBlock("acquire", CurFn); 01305 ReleaseBB = createBasicBlock("release", CurFn); 01306 AcqRelBB = createBasicBlock("acqrel", CurFn); 01307 SeqCstBB = createBasicBlock("seqcst", CurFn); 01308 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 01309 01310 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 01311 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 01312 01313 Builder.SetInsertPoint(AcquireBB); 01314 Builder.CreateFence(llvm::Acquire, Scope); 01315 Builder.CreateBr(ContBB); 01316 SI->addCase(Builder.getInt32(1), AcquireBB); 01317 SI->addCase(Builder.getInt32(2), AcquireBB); 01318 01319 Builder.SetInsertPoint(ReleaseBB); 01320 Builder.CreateFence(llvm::Release, Scope); 01321 Builder.CreateBr(ContBB); 01322 SI->addCase(Builder.getInt32(3), ReleaseBB); 01323 01324 Builder.SetInsertPoint(AcqRelBB); 01325 Builder.CreateFence(llvm::AcquireRelease, Scope); 01326 Builder.CreateBr(ContBB); 01327 SI->addCase(Builder.getInt32(4), AcqRelBB); 01328 01329 Builder.SetInsertPoint(SeqCstBB); 01330 Builder.CreateFence(llvm::SequentiallyConsistent, Scope); 01331 Builder.CreateBr(ContBB); 01332 SI->addCase(Builder.getInt32(5), SeqCstBB); 01333 01334 Builder.SetInsertPoint(ContBB); 01335 return RValue::get(nullptr); 01336 } 01337 01338 // Library functions with special handling. 01339 case Builtin::BIsqrt: 01340 case Builtin::BIsqrtf: 01341 case Builtin::BIsqrtl: { 01342 // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only 01343 // in finite- or unsafe-math mode (the intrinsic has different semantics 01344 // for handling negative numbers compared to the library function, so 01345 // -fmath-errno=0 is not enough). 01346 if (!FD->hasAttr<ConstAttr>()) 01347 break; 01348 if (!(CGM.getCodeGenOpts().UnsafeFPMath || 01349 CGM.getCodeGenOpts().NoNaNsFPMath)) 01350 break; 01351 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 01352 llvm::Type *ArgType = Arg0->getType(); 01353 Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); 01354 return RValue::get(Builder.CreateCall(F, Arg0)); 01355 } 01356 01357 case Builtin::BIpow: 01358 case Builtin::BIpowf: 01359 case Builtin::BIpowl: { 01360 // Transform a call to pow* into a @llvm.pow.* intrinsic call. 01361 if (!FD->hasAttr<ConstAttr>()) 01362 break; 01363 Value *Base = EmitScalarExpr(E->getArg(0)); 01364 Value *Exponent = EmitScalarExpr(E->getArg(1)); 01365 llvm::Type *ArgType = Base->getType(); 01366 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); 01367 return RValue::get(Builder.CreateCall2(F, Base, Exponent)); 01368 } 01369 01370 case Builtin::BIfma: 01371 case Builtin::BIfmaf: 01372 case Builtin::BIfmal: 01373 case Builtin::BI__builtin_fma: 01374 case Builtin::BI__builtin_fmaf: 01375 case Builtin::BI__builtin_fmal: { 01376 // Rewrite fma to intrinsic. 01377 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 01378 llvm::Type *ArgType = FirstArg->getType(); 01379 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); 01380 return RValue::get(Builder.CreateCall3(F, FirstArg, 01381 EmitScalarExpr(E->getArg(1)), 01382 EmitScalarExpr(E->getArg(2)))); 01383 } 01384 01385 case Builtin::BI__builtin_signbit: 01386 case Builtin::BI__builtin_signbitf: 01387 case Builtin::BI__builtin_signbitl: { 01388 LLVMContext &C = CGM.getLLVMContext(); 01389 01390 Value *Arg = EmitScalarExpr(E->getArg(0)); 01391 llvm::Type *ArgTy = Arg->getType(); 01392 int ArgWidth = ArgTy->getPrimitiveSizeInBits(); 01393 llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth); 01394 Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy); 01395 if (ArgTy->isPPC_FP128Ty()) { 01396 // The higher-order double comes first, and so we need to truncate the 01397 // pair to extract the overall sign. The order of the pair is the same 01398 // in both little- and big-Endian modes. 01399 ArgWidth >>= 1; 01400 ArgIntTy = llvm::IntegerType::get(C, ArgWidth); 01401 BCArg = Builder.CreateTrunc(BCArg, ArgIntTy); 01402 } 01403 Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy); 01404 Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp); 01405 return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType()))); 01406 } 01407 case Builtin::BI__builtin_annotation: { 01408 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 01409 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 01410 AnnVal->getType()); 01411 01412 // Get the annotation string, go through casts. Sema requires this to be a 01413 // non-wide string literal, potentially casted, so the cast<> is safe. 01414 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 01415 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 01416 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 01417 } 01418 case Builtin::BI__builtin_addcb: 01419 case Builtin::BI__builtin_addcs: 01420 case Builtin::BI__builtin_addc: 01421 case Builtin::BI__builtin_addcl: 01422 case Builtin::BI__builtin_addcll: 01423 case Builtin::BI__builtin_subcb: 01424 case Builtin::BI__builtin_subcs: 01425 case Builtin::BI__builtin_subc: 01426 case Builtin::BI__builtin_subcl: 01427 case Builtin::BI__builtin_subcll: { 01428 01429 // We translate all of these builtins from expressions of the form: 01430 // int x = ..., y = ..., carryin = ..., carryout, result; 01431 // result = __builtin_addc(x, y, carryin, &carryout); 01432 // 01433 // to LLVM IR of the form: 01434 // 01435 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 01436 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 01437 // %carry1 = extractvalue {i32, i1} %tmp1, 1 01438 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 01439 // i32 %carryin) 01440 // %result = extractvalue {i32, i1} %tmp2, 0 01441 // %carry2 = extractvalue {i32, i1} %tmp2, 1 01442 // %tmp3 = or i1 %carry1, %carry2 01443 // %tmp4 = zext i1 %tmp3 to i32 01444 // store i32 %tmp4, i32* %carryout 01445 01446 // Scalarize our inputs. 01447 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 01448 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 01449 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 01450 std::pair<llvm::Value*, unsigned> CarryOutPtr = 01451 EmitPointerWithAlignment(E->getArg(3)); 01452 01453 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 01454 llvm::Intrinsic::ID IntrinsicId; 01455 switch (BuiltinID) { 01456 default: llvm_unreachable("Unknown multiprecision builtin id."); 01457 case Builtin::BI__builtin_addcb: 01458 case Builtin::BI__builtin_addcs: 01459 case Builtin::BI__builtin_addc: 01460 case Builtin::BI__builtin_addcl: 01461 case Builtin::BI__builtin_addcll: 01462 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 01463 break; 01464 case Builtin::BI__builtin_subcb: 01465 case Builtin::BI__builtin_subcs: 01466 case Builtin::BI__builtin_subc: 01467 case Builtin::BI__builtin_subcl: 01468 case Builtin::BI__builtin_subcll: 01469 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 01470 break; 01471 } 01472 01473 // Construct our resulting LLVM IR expression. 01474 llvm::Value *Carry1; 01475 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 01476 X, Y, Carry1); 01477 llvm::Value *Carry2; 01478 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 01479 Sum1, Carryin, Carry2); 01480 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 01481 X->getType()); 01482 llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut, 01483 CarryOutPtr.first); 01484 CarryOutStore->setAlignment(CarryOutPtr.second); 01485 return RValue::get(Sum2); 01486 } 01487 case Builtin::BI__builtin_uadd_overflow: 01488 case Builtin::BI__builtin_uaddl_overflow: 01489 case Builtin::BI__builtin_uaddll_overflow: 01490 case Builtin::BI__builtin_usub_overflow: 01491 case Builtin::BI__builtin_usubl_overflow: 01492 case Builtin::BI__builtin_usubll_overflow: 01493 case Builtin::BI__builtin_umul_overflow: 01494 case Builtin::BI__builtin_umull_overflow: 01495 case Builtin::BI__builtin_umulll_overflow: 01496 case Builtin::BI__builtin_sadd_overflow: 01497 case Builtin::BI__builtin_saddl_overflow: 01498 case Builtin::BI__builtin_saddll_overflow: 01499 case Builtin::BI__builtin_ssub_overflow: 01500 case Builtin::BI__builtin_ssubl_overflow: 01501 case Builtin::BI__builtin_ssubll_overflow: 01502 case Builtin::BI__builtin_smul_overflow: 01503 case Builtin::BI__builtin_smull_overflow: 01504 case Builtin::BI__builtin_smulll_overflow: { 01505 01506 // We translate all of these builtins directly to the relevant llvm IR node. 01507 01508 // Scalarize our inputs. 01509 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 01510 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 01511 std::pair<llvm::Value *, unsigned> SumOutPtr = 01512 EmitPointerWithAlignment(E->getArg(2)); 01513 01514 // Decide which of the overflow intrinsics we are lowering to: 01515 llvm::Intrinsic::ID IntrinsicId; 01516 switch (BuiltinID) { 01517 default: llvm_unreachable("Unknown security overflow builtin id."); 01518 case Builtin::BI__builtin_uadd_overflow: 01519 case Builtin::BI__builtin_uaddl_overflow: 01520 case Builtin::BI__builtin_uaddll_overflow: 01521 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 01522 break; 01523 case Builtin::BI__builtin_usub_overflow: 01524 case Builtin::BI__builtin_usubl_overflow: 01525 case Builtin::BI__builtin_usubll_overflow: 01526 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 01527 break; 01528 case Builtin::BI__builtin_umul_overflow: 01529 case Builtin::BI__builtin_umull_overflow: 01530 case Builtin::BI__builtin_umulll_overflow: 01531 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 01532 break; 01533 case Builtin::BI__builtin_sadd_overflow: 01534 case Builtin::BI__builtin_saddl_overflow: 01535 case Builtin::BI__builtin_saddll_overflow: 01536 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 01537 break; 01538 case Builtin::BI__builtin_ssub_overflow: 01539 case Builtin::BI__builtin_ssubl_overflow: 01540 case Builtin::BI__builtin_ssubll_overflow: 01541 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 01542 break; 01543 case Builtin::BI__builtin_smul_overflow: 01544 case Builtin::BI__builtin_smull_overflow: 01545 case Builtin::BI__builtin_smulll_overflow: 01546 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 01547 break; 01548 } 01549 01550 01551 llvm::Value *Carry; 01552 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 01553 llvm::StoreInst *SumOutStore = Builder.CreateStore(Sum, SumOutPtr.first); 01554 SumOutStore->setAlignment(SumOutPtr.second); 01555 01556 return RValue::get(Carry); 01557 } 01558 case Builtin::BI__builtin_addressof: 01559 return RValue::get(EmitLValue(E->getArg(0)).getAddress()); 01560 case Builtin::BI__builtin_operator_new: 01561 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 01562 E->getArg(0), false); 01563 case Builtin::BI__builtin_operator_delete: 01564 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 01565 E->getArg(0), true); 01566 case Builtin::BI__noop: 01567 // __noop always evaluates to an integer literal zero. 01568 return RValue::get(ConstantInt::get(IntTy, 0)); 01569 case Builtin::BI_InterlockedExchange: 01570 case Builtin::BI_InterlockedExchangePointer: 01571 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 01572 case Builtin::BI_InterlockedCompareExchangePointer: { 01573 llvm::Type *RTy; 01574 llvm::IntegerType *IntType = 01575 IntegerType::get(getLLVMContext(), 01576 getContext().getTypeSize(E->getType())); 01577 llvm::Type *IntPtrType = IntType->getPointerTo(); 01578 01579 llvm::Value *Destination = 01580 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 01581 01582 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 01583 RTy = Exchange->getType(); 01584 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 01585 01586 llvm::Value *Comparand = 01587 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 01588 01589 auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 01590 SequentiallyConsistent, 01591 SequentiallyConsistent); 01592 Result->setVolatile(true); 01593 01594 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 01595 0), 01596 RTy)); 01597 } 01598 case Builtin::BI_InterlockedCompareExchange: { 01599 AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( 01600 EmitScalarExpr(E->getArg(0)), 01601 EmitScalarExpr(E->getArg(2)), 01602 EmitScalarExpr(E->getArg(1)), 01603 SequentiallyConsistent, 01604 SequentiallyConsistent); 01605 CXI->setVolatile(true); 01606 return RValue::get(Builder.CreateExtractValue(CXI, 0)); 01607 } 01608 case Builtin::BI_InterlockedIncrement: { 01609 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 01610 AtomicRMWInst::Add, 01611 EmitScalarExpr(E->getArg(0)), 01612 ConstantInt::get(Int32Ty, 1), 01613 llvm::SequentiallyConsistent); 01614 RMWI->setVolatile(true); 01615 return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1))); 01616 } 01617 case Builtin::BI_InterlockedDecrement: { 01618 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 01619 AtomicRMWInst::Sub, 01620 EmitScalarExpr(E->getArg(0)), 01621 ConstantInt::get(Int32Ty, 1), 01622 llvm::SequentiallyConsistent); 01623 RMWI->setVolatile(true); 01624 return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1))); 01625 } 01626 case Builtin::BI_InterlockedExchangeAdd: { 01627 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 01628 AtomicRMWInst::Add, 01629 EmitScalarExpr(E->getArg(0)), 01630 EmitScalarExpr(E->getArg(1)), 01631 llvm::SequentiallyConsistent); 01632 RMWI->setVolatile(true); 01633 return RValue::get(RMWI); 01634 } 01635 case Builtin::BI__readfsdword: { 01636 Value *IntToPtr = 01637 Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 01638 llvm::PointerType::get(CGM.Int32Ty, 257)); 01639 LoadInst *Load = 01640 Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true); 01641 return RValue::get(Load); 01642 } 01643 } 01644 01645 // If this is an alias for a lib function (e.g. __builtin_sin), emit 01646 // the call using the normal call path, but using the unmangled 01647 // version of the function name. 01648 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 01649 return emitLibraryCall(*this, FD, E, 01650 CGM.getBuiltinLibFunction(FD, BuiltinID)); 01651 01652 // If this is a predefined lib function (e.g. malloc), emit the call 01653 // using exactly the normal call path. 01654 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 01655 return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee())); 01656 01657 // See if we have a target specific intrinsic. 01658 const char *Name = getContext().BuiltinInfo.GetName(BuiltinID); 01659 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 01660 if (const char *Prefix = 01661 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) { 01662 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name); 01663 // NOTE we dont need to perform a compatibility flag check here since the 01664 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 01665 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 01666 if (IntrinsicID == Intrinsic::not_intrinsic) 01667 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name); 01668 } 01669 01670 if (IntrinsicID != Intrinsic::not_intrinsic) { 01671 SmallVector<Value*, 16> Args; 01672 01673 // Find out if any arguments are required to be integer constant 01674 // expressions. 01675 unsigned ICEArguments = 0; 01676 ASTContext::GetBuiltinTypeError Error; 01677 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 01678 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 01679 01680 Function *F = CGM.getIntrinsic(IntrinsicID); 01681 llvm::FunctionType *FTy = F->getFunctionType(); 01682 01683 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 01684 Value *ArgValue; 01685 // If this is a normal argument, just emit it as a scalar. 01686 if ((ICEArguments & (1 << i)) == 0) { 01687 ArgValue = EmitScalarExpr(E->getArg(i)); 01688 } else { 01689 // If this is required to be a constant, constant fold it so that we 01690 // know that the generated intrinsic gets a ConstantInt. 01691 llvm::APSInt Result; 01692 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 01693 assert(IsConst && "Constant arg isn't actually constant?"); 01694 (void)IsConst; 01695 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 01696 } 01697 01698 // If the intrinsic arg type is different from the builtin arg type 01699 // we need to do a bit cast. 01700 llvm::Type *PTy = FTy->getParamType(i); 01701 if (PTy != ArgValue->getType()) { 01702 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 01703 "Must be able to losslessly bit cast to param"); 01704 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 01705 } 01706 01707 Args.push_back(ArgValue); 01708 } 01709 01710 Value *V = Builder.CreateCall(F, Args); 01711 QualType BuiltinRetType = E->getType(); 01712 01713 llvm::Type *RetTy = VoidTy; 01714 if (!BuiltinRetType->isVoidType()) 01715 RetTy = ConvertType(BuiltinRetType); 01716 01717 if (RetTy != V->getType()) { 01718 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 01719 "Must be able to losslessly bit cast result type"); 01720 V = Builder.CreateBitCast(V, RetTy); 01721 } 01722 01723 return RValue::get(V); 01724 } 01725 01726 // See if we have a target specific builtin that needs to be lowered. 01727 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 01728 return RValue::get(V); 01729 01730 ErrorUnsupported(E, "builtin function"); 01731 01732 // Unknown builtin, for now just dump it out and return undef. 01733 return GetUndefRValue(E->getType()); 01734 } 01735 01736 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 01737 const CallExpr *E) { 01738 switch (getTarget().getTriple().getArch()) { 01739 case llvm::Triple::arm: 01740 case llvm::Triple::armeb: 01741 case llvm::Triple::thumb: 01742 case llvm::Triple::thumbeb: 01743 return EmitARMBuiltinExpr(BuiltinID, E); 01744 case llvm::Triple::aarch64: 01745 case llvm::Triple::aarch64_be: 01746 return EmitAArch64BuiltinExpr(BuiltinID, E); 01747 case llvm::Triple::x86: 01748 case llvm::Triple::x86_64: 01749 return EmitX86BuiltinExpr(BuiltinID, E); 01750 case llvm::Triple::ppc: 01751 case llvm::Triple::ppc64: 01752 case llvm::Triple::ppc64le: 01753 return EmitPPCBuiltinExpr(BuiltinID, E); 01754 case llvm::Triple::r600: 01755 return EmitR600BuiltinExpr(BuiltinID, E); 01756 default: 01757 return nullptr; 01758 } 01759 } 01760 01761 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 01762 NeonTypeFlags TypeFlags, 01763 bool V1Ty=false) { 01764 int IsQuad = TypeFlags.isQuad(); 01765 switch (TypeFlags.getEltType()) { 01766 case NeonTypeFlags::Int8: 01767 case NeonTypeFlags::Poly8: 01768 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 01769 case NeonTypeFlags::Int16: 01770 case NeonTypeFlags::Poly16: 01771 case NeonTypeFlags::Float16: 01772 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 01773 case NeonTypeFlags::Int32: 01774 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 01775 case NeonTypeFlags::Int64: 01776 case NeonTypeFlags::Poly64: 01777 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 01778 case NeonTypeFlags::Poly128: 01779 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 01780 // There is a lot of i128 and f128 API missing. 01781 // so we use v16i8 to represent poly128 and get pattern matched. 01782 return llvm::VectorType::get(CGF->Int8Ty, 16); 01783 case NeonTypeFlags::Float32: 01784 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 01785 case NeonTypeFlags::Float64: 01786 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 01787 } 01788 llvm_unreachable("Unknown vector element type!"); 01789 } 01790 01791 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 01792 unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements(); 01793 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 01794 return Builder.CreateShuffleVector(V, V, SV, "lane"); 01795 } 01796 01797 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 01798 const char *name, 01799 unsigned shift, bool rightshift) { 01800 unsigned j = 0; 01801 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 01802 ai != ae; ++ai, ++j) 01803 if (shift > 0 && shift == j) 01804 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 01805 else 01806 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 01807 01808 return Builder.CreateCall(F, Ops, name); 01809 } 01810 01811 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 01812 bool neg) { 01813 int SV = cast<ConstantInt>(V)->getSExtValue(); 01814 01815 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 01816 llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV); 01817 return llvm::ConstantVector::getSplat(VTy->getNumElements(), C); 01818 } 01819 01820 // \brief Right-shift a vector by a constant. 01821 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 01822 llvm::Type *Ty, bool usgn, 01823 const char *name) { 01824 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 01825 01826 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 01827 int EltSize = VTy->getScalarSizeInBits(); 01828 01829 Vec = Builder.CreateBitCast(Vec, Ty); 01830 01831 // lshr/ashr are undefined when the shift amount is equal to the vector 01832 // element size. 01833 if (ShiftAmt == EltSize) { 01834 if (usgn) { 01835 // Right-shifting an unsigned value by its size yields 0. 01836 llvm::Constant *Zero = ConstantInt::get(VTy->getElementType(), 0); 01837 return llvm::ConstantVector::getSplat(VTy->getNumElements(), Zero); 01838 } else { 01839 // Right-shifting a signed value by its size is equivalent 01840 // to a shift of size-1. 01841 --ShiftAmt; 01842 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 01843 } 01844 } 01845 01846 Shift = EmitNeonShiftVector(Shift, Ty, false); 01847 if (usgn) 01848 return Builder.CreateLShr(Vec, Shift, name); 01849 else 01850 return Builder.CreateAShr(Vec, Shift, name); 01851 } 01852 01853 /// GetPointeeAlignment - Given an expression with a pointer type, find the 01854 /// alignment of the type referenced by the pointer. Skip over implicit 01855 /// casts. 01856 std::pair<llvm::Value*, unsigned> 01857 CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) { 01858 assert(Addr->getType()->isPointerType()); 01859 Addr = Addr->IgnoreParens(); 01860 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Addr)) { 01861 if ((ICE->getCastKind() == CK_BitCast || ICE->getCastKind() == CK_NoOp) && 01862 ICE->getSubExpr()->getType()->isPointerType()) { 01863 std::pair<llvm::Value*, unsigned> Ptr = 01864 EmitPointerWithAlignment(ICE->getSubExpr()); 01865 Ptr.first = Builder.CreateBitCast(Ptr.first, 01866 ConvertType(Addr->getType())); 01867 return Ptr; 01868 } else if (ICE->getCastKind() == CK_ArrayToPointerDecay) { 01869 LValue LV = EmitLValue(ICE->getSubExpr()); 01870 unsigned Align = LV.getAlignment().getQuantity(); 01871 if (!Align) { 01872 // FIXME: Once LValues are fixed to always set alignment, 01873 // zap this code. 01874 QualType PtTy = ICE->getSubExpr()->getType(); 01875 if (!PtTy->isIncompleteType()) 01876 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 01877 else 01878 Align = 1; 01879 } 01880 return std::make_pair(LV.getAddress(), Align); 01881 } 01882 } 01883 if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(Addr)) { 01884 if (UO->getOpcode() == UO_AddrOf) { 01885 LValue LV = EmitLValue(UO->getSubExpr()); 01886 unsigned Align = LV.getAlignment().getQuantity(); 01887 if (!Align) { 01888 // FIXME: Once LValues are fixed to always set alignment, 01889 // zap this code. 01890 QualType PtTy = UO->getSubExpr()->getType(); 01891 if (!PtTy->isIncompleteType()) 01892 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 01893 else 01894 Align = 1; 01895 } 01896 return std::make_pair(LV.getAddress(), Align); 01897 } 01898 } 01899 01900 unsigned Align = 1; 01901 QualType PtTy = Addr->getType()->getPointeeType(); 01902 if (!PtTy->isIncompleteType()) 01903 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 01904 01905 return std::make_pair(EmitScalarExpr(Addr), Align); 01906 } 01907 01908 enum { 01909 AddRetType = (1 << 0), 01910 Add1ArgType = (1 << 1), 01911 Add2ArgTypes = (1 << 2), 01912 01913 VectorizeRetType = (1 << 3), 01914 VectorizeArgTypes = (1 << 4), 01915 01916 InventFloatType = (1 << 5), 01917 UnsignedAlts = (1 << 6), 01918 01919 Use64BitVectors = (1 << 7), 01920 Use128BitVectors = (1 << 8), 01921 01922 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 01923 VectorRet = AddRetType | VectorizeRetType, 01924 VectorRetGetArgs01 = 01925 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 01926 FpCmpzModifiers = 01927 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 01928 }; 01929 01930 struct NeonIntrinsicInfo { 01931 unsigned BuiltinID; 01932 unsigned LLVMIntrinsic; 01933 unsigned AltLLVMIntrinsic; 01934 const char *NameHint; 01935 unsigned TypeModifier; 01936 01937 bool operator<(unsigned RHSBuiltinID) const { 01938 return BuiltinID < RHSBuiltinID; 01939 } 01940 }; 01941 01942 #define NEONMAP0(NameBase) \ 01943 { NEON::BI__builtin_neon_ ## NameBase, 0, 0, #NameBase, 0 } 01944 01945 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 01946 { NEON:: BI__builtin_neon_ ## NameBase, \ 01947 Intrinsic::LLVMIntrinsic, 0, #NameBase, TypeModifier } 01948 01949 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 01950 { NEON:: BI__builtin_neon_ ## NameBase, \ 01951 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 01952 #NameBase, TypeModifier } 01953 01954 static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { 01955 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 01956 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 01957 NEONMAP1(vabs_v, arm_neon_vabs, 0), 01958 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 01959 NEONMAP0(vaddhn_v), 01960 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 01961 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 01962 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 01963 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 01964 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 01965 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 01966 NEONMAP1(vcage_v, arm_neon_vacge, 0), 01967 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 01968 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 01969 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 01970 NEONMAP1(vcale_v, arm_neon_vacge, 0), 01971 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 01972 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 01973 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 01974 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 01975 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 01976 NEONMAP1(vclz_v, ctlz, Add1ArgType), 01977 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 01978 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 01979 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 01980 NEONMAP1(vcvt_f16_v, arm_neon_vcvtfp2hf, 0), 01981 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 01982 NEONMAP0(vcvt_f32_v), 01983 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 01984 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 01985 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 01986 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 01987 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 01988 NEONMAP0(vcvt_s32_v), 01989 NEONMAP0(vcvt_s64_v), 01990 NEONMAP0(vcvt_u32_v), 01991 NEONMAP0(vcvt_u64_v), 01992 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 01993 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 01994 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 01995 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 01996 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 01997 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 01998 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 01999 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 02000 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 02001 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 02002 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 02003 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 02004 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 02005 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 02006 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 02007 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 02008 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 02009 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 02010 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 02011 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 02012 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 02013 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 02014 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 02015 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 02016 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 02017 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 02018 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 02019 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 02020 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 02021 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 02022 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 02023 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 02024 NEONMAP0(vcvtq_f32_v), 02025 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 02026 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 02027 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 02028 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 02029 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 02030 NEONMAP0(vcvtq_s32_v), 02031 NEONMAP0(vcvtq_s64_v), 02032 NEONMAP0(vcvtq_u32_v), 02033 NEONMAP0(vcvtq_u64_v), 02034 NEONMAP0(vext_v), 02035 NEONMAP0(vextq_v), 02036 NEONMAP0(vfma_v), 02037 NEONMAP0(vfmaq_v), 02038 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 02039 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 02040 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 02041 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 02042 NEONMAP0(vld1_dup_v), 02043 NEONMAP1(vld1_v, arm_neon_vld1, 0), 02044 NEONMAP0(vld1q_dup_v), 02045 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 02046 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 02047 NEONMAP1(vld2_v, arm_neon_vld2, 0), 02048 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 02049 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 02050 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 02051 NEONMAP1(vld3_v, arm_neon_vld3, 0), 02052 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 02053 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 02054 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 02055 NEONMAP1(vld4_v, arm_neon_vld4, 0), 02056 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 02057 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 02058 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 02059 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 02060 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 02061 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 02062 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 02063 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 02064 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 02065 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 02066 NEONMAP0(vmovl_v), 02067 NEONMAP0(vmovn_v), 02068 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 02069 NEONMAP0(vmull_v), 02070 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 02071 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 02072 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 02073 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 02074 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 02075 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 02076 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 02077 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 02078 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 02079 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 02080 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 02081 NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 02082 NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 02083 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), 02084 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), 02085 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 02086 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 02087 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 02088 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 02089 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 02090 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 02091 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 02092 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 02093 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 02094 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 02095 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 02096 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 02097 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 02098 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 02099 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 02100 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 02101 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 02102 NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 02103 NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 02104 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 02105 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 02106 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 02107 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 02108 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 02109 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 02110 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 02111 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 02112 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 02113 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 02114 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 02115 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 02116 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 02117 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 02118 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 02119 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 02120 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 02121 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 02122 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 02123 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 02124 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 02125 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 02126 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 02127 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 02128 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 02129 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 02130 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 02131 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 02132 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 02133 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 02134 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 02135 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 02136 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 02137 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 02138 NEONMAP0(vshl_n_v), 02139 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 02140 NEONMAP0(vshll_n_v), 02141 NEONMAP0(vshlq_n_v), 02142 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 02143 NEONMAP0(vshr_n_v), 02144 NEONMAP0(vshrn_n_v), 02145 NEONMAP0(vshrq_n_v), 02146 NEONMAP1(vst1_v, arm_neon_vst1, 0), 02147 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 02148 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 02149 NEONMAP1(vst2_v, arm_neon_vst2, 0), 02150 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 02151 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 02152 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 02153 NEONMAP1(vst3_v, arm_neon_vst3, 0), 02154 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 02155 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 02156 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 02157 NEONMAP1(vst4_v, arm_neon_vst4, 0), 02158 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 02159 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 02160 NEONMAP0(vsubhn_v), 02161 NEONMAP0(vtrn_v), 02162 NEONMAP0(vtrnq_v), 02163 NEONMAP0(vtst_v), 02164 NEONMAP0(vtstq_v), 02165 NEONMAP0(vuzp_v), 02166 NEONMAP0(vuzpq_v), 02167 NEONMAP0(vzip_v), 02168 NEONMAP0(vzipq_v) 02169 }; 02170 02171 static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 02172 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 02173 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 02174 NEONMAP0(vaddhn_v), 02175 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 02176 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 02177 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 02178 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 02179 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 02180 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 02181 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 02182 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 02183 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 02184 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 02185 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 02186 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 02187 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 02188 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 02189 NEONMAP1(vclz_v, ctlz, Add1ArgType), 02190 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 02191 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 02192 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 02193 NEONMAP1(vcvt_f16_v, aarch64_neon_vcvtfp2hf, 0), 02194 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 02195 NEONMAP0(vcvt_f32_v), 02196 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 02197 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 02198 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 02199 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 02200 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 02201 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 02202 NEONMAP0(vcvtq_f32_v), 02203 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 02204 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 02205 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 02206 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 02207 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 02208 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 02209 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 02210 NEONMAP0(vext_v), 02211 NEONMAP0(vextq_v), 02212 NEONMAP0(vfma_v), 02213 NEONMAP0(vfmaq_v), 02214 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 02215 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 02216 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 02217 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 02218 NEONMAP0(vmovl_v), 02219 NEONMAP0(vmovn_v), 02220 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 02221 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 02222 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 02223 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 02224 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 02225 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 02226 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 02227 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 02228 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 02229 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 02230 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 02231 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 02232 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 02233 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 02234 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 02235 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 02236 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 02237 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 02238 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 02239 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 02240 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 02241 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 02242 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 02243 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 02244 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 02245 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 02246 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 02247 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 02248 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 02249 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 02250 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 02251 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 02252 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 02253 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 02254 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 02255 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 02256 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 02257 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 02258 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 02259 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 02260 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 02261 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 02262 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 02263 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 02264 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 02265 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 02266 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 02267 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 02268 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 02269 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 02270 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 02271 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 02272 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 02273 NEONMAP0(vshl_n_v), 02274 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 02275 NEONMAP0(vshll_n_v), 02276 NEONMAP0(vshlq_n_v), 02277 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 02278 NEONMAP0(vshr_n_v), 02279 NEONMAP0(vshrn_n_v), 02280 NEONMAP0(vshrq_n_v), 02281 NEONMAP0(vsubhn_v), 02282 NEONMAP0(vtst_v), 02283 NEONMAP0(vtstq_v), 02284 }; 02285 02286 static NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { 02287 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 02288 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 02289 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 02290 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 02291 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 02292 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 02293 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 02294 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 02295 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 02296 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 02297 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 02298 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 02299 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 02300 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 02301 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 02302 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 02303 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 02304 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 02305 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 02306 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 02307 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 02308 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 02309 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 02310 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 02311 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 02312 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 02313 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 02314 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 02315 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 02316 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 02317 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 02318 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 02319 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 02320 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 02321 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 02322 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 02323 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 02324 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 02325 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 02326 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 02327 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 02328 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 02329 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 02330 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 02331 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 02332 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 02333 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 02334 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 02335 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 02336 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 02337 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 02338 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 02339 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 02340 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 02341 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 02342 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 02343 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 02344 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 02345 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 02346 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 02347 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 02348 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 02349 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 02350 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 02351 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 02352 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 02353 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 02354 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 02355 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 02356 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 02357 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 02358 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 02359 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 02360 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 02361 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 02362 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 02363 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 02364 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 02365 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 02366 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 02367 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 02368 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 02369 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 02370 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 02371 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 02372 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 02373 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 02374 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 02375 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 02376 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 02377 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 02378 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 02379 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 02380 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 02381 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 02382 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 02383 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 02384 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 02385 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 02386 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 02387 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 02388 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 02389 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 02390 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 02391 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 02392 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 02393 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 02394 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 02395 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 02396 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 02397 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 02398 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 02399 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 02400 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 02401 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 02402 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 02403 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 02404 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 02405 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 02406 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 02407 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 02408 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 02409 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 02410 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 02411 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 02412 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 02413 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 02414 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 02415 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 02416 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 02417 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 02418 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 02419 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 02420 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 02421 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 02422 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 02423 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 02424 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 02425 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 02426 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 02427 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 02428 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 02429 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 02430 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 02431 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 02432 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 02433 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 02434 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 02435 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 02436 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 02437 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 02438 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 02439 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 02440 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 02441 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 02442 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 02443 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 02444 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 02445 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 02446 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 02447 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 02448 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 02449 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 02450 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 02451 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 02452 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 02453 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 02454 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 02455 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 02456 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 02457 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 02458 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 02459 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 02460 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 02461 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 02462 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 02463 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 02464 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 02465 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 02466 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 02467 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 02468 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 02469 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 02470 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 02471 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 02472 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 02473 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 02474 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 02475 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 02476 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 02477 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 02478 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 02479 }; 02480 02481 #undef NEONMAP0 02482 #undef NEONMAP1 02483 #undef NEONMAP2 02484 02485 static bool NEONSIMDIntrinsicsProvenSorted = false; 02486 02487 static bool AArch64SIMDIntrinsicsProvenSorted = false; 02488 static bool AArch64SISDIntrinsicsProvenSorted = false; 02489 02490 02491 static const NeonIntrinsicInfo * 02492 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, 02493 unsigned BuiltinID, bool &MapProvenSorted) { 02494 02495 #ifndef NDEBUG 02496 if (!MapProvenSorted) { 02497 // FIXME: use std::is_sorted once C++11 is allowed 02498 for (unsigned i = 0; i < IntrinsicMap.size() - 1; ++i) 02499 assert(IntrinsicMap[i].BuiltinID <= IntrinsicMap[i + 1].BuiltinID); 02500 MapProvenSorted = true; 02501 } 02502 #endif 02503 02504 const NeonIntrinsicInfo *Builtin = 02505 std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); 02506 02507 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 02508 return Builtin; 02509 02510 return nullptr; 02511 } 02512 02513 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 02514 unsigned Modifier, 02515 llvm::Type *ArgType, 02516 const CallExpr *E) { 02517 int VectorSize = 0; 02518 if (Modifier & Use64BitVectors) 02519 VectorSize = 64; 02520 else if (Modifier & Use128BitVectors) 02521 VectorSize = 128; 02522 02523 // Return type. 02524 SmallVector<llvm::Type *, 3> Tys; 02525 if (Modifier & AddRetType) { 02526 llvm::Type *Ty = ConvertType(E->getCallReturnType()); 02527 if (Modifier & VectorizeRetType) 02528 Ty = llvm::VectorType::get( 02529 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 02530 02531 Tys.push_back(Ty); 02532 } 02533 02534 // Arguments. 02535 if (Modifier & VectorizeArgTypes) { 02536 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 02537 ArgType = llvm::VectorType::get(ArgType, Elts); 02538 } 02539 02540 if (Modifier & (Add1ArgType | Add2ArgTypes)) 02541 Tys.push_back(ArgType); 02542 02543 if (Modifier & Add2ArgTypes) 02544 Tys.push_back(ArgType); 02545 02546 if (Modifier & InventFloatType) 02547 Tys.push_back(FloatTy); 02548 02549 return CGM.getIntrinsic(IntrinsicID, Tys); 02550 } 02551 02552 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, 02553 const NeonIntrinsicInfo &SISDInfo, 02554 SmallVectorImpl<Value *> &Ops, 02555 const CallExpr *E) { 02556 unsigned BuiltinID = SISDInfo.BuiltinID; 02557 unsigned int Int = SISDInfo.LLVMIntrinsic; 02558 unsigned Modifier = SISDInfo.TypeModifier; 02559 const char *s = SISDInfo.NameHint; 02560 02561 switch (BuiltinID) { 02562 case NEON::BI__builtin_neon_vcled_s64: 02563 case NEON::BI__builtin_neon_vcled_u64: 02564 case NEON::BI__builtin_neon_vcles_f32: 02565 case NEON::BI__builtin_neon_vcled_f64: 02566 case NEON::BI__builtin_neon_vcltd_s64: 02567 case NEON::BI__builtin_neon_vcltd_u64: 02568 case NEON::BI__builtin_neon_vclts_f32: 02569 case NEON::BI__builtin_neon_vcltd_f64: 02570 case NEON::BI__builtin_neon_vcales_f32: 02571 case NEON::BI__builtin_neon_vcaled_f64: 02572 case NEON::BI__builtin_neon_vcalts_f32: 02573 case NEON::BI__builtin_neon_vcaltd_f64: 02574 // Only one direction of comparisons actually exist, cmle is actually a cmge 02575 // with swapped operands. The table gives us the right intrinsic but we 02576 // still need to do the swap. 02577 std::swap(Ops[0], Ops[1]); 02578 break; 02579 } 02580 02581 assert(Int && "Generic code assumes a valid intrinsic"); 02582 02583 // Determine the type(s) of this overloaded AArch64 intrinsic. 02584 const Expr *Arg = E->getArg(0); 02585 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 02586 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 02587 02588 int j = 0; 02589 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 02590 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 02591 ai != ae; ++ai, ++j) { 02592 llvm::Type *ArgTy = ai->getType(); 02593 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 02594 ArgTy->getPrimitiveSizeInBits()) 02595 continue; 02596 02597 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 02598 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 02599 // it before inserting. 02600 Ops[j] = 02601 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 02602 Ops[j] = 02603 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 02604 } 02605 02606 Value *Result = CGF.EmitNeonCall(F, Ops, s); 02607 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 02608 if (ResultType->getPrimitiveSizeInBits() < 02609 Result->getType()->getPrimitiveSizeInBits()) 02610 return CGF.Builder.CreateExtractElement(Result, C0); 02611 02612 return CGF.Builder.CreateBitCast(Result, ResultType, s); 02613 } 02614 02615 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 02616 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 02617 const char *NameHint, unsigned Modifier, const CallExpr *E, 02618 SmallVectorImpl<llvm::Value *> &Ops, llvm::Value *Align) { 02619 // Get the last argument, which specifies the vector type. 02620 llvm::APSInt NeonTypeConst; 02621 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 02622 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 02623 return nullptr; 02624 02625 // Determine the type of this overloaded NEON intrinsic. 02626 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 02627 bool Usgn = Type.isUnsigned(); 02628 bool Quad = Type.isQuad(); 02629 02630 llvm::VectorType *VTy = GetNeonType(this, Type); 02631 llvm::Type *Ty = VTy; 02632 if (!Ty) 02633 return nullptr; 02634 02635 unsigned Int = LLVMIntrinsic; 02636 if ((Modifier & UnsignedAlts) && !Usgn) 02637 Int = AltLLVMIntrinsic; 02638 02639 switch (BuiltinID) { 02640 default: break; 02641 case NEON::BI__builtin_neon_vabs_v: 02642 case NEON::BI__builtin_neon_vabsq_v: 02643 if (VTy->getElementType()->isFloatingPointTy()) 02644 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 02645 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 02646 case NEON::BI__builtin_neon_vaddhn_v: { 02647 llvm::VectorType *SrcTy = 02648 llvm::VectorType::getExtendedElementVectorType(VTy); 02649 02650 // %sum = add <4 x i32> %lhs, %rhs 02651 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 02652 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 02653 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 02654 02655 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 02656 Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), 02657 SrcTy->getScalarSizeInBits() / 2); 02658 ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); 02659 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 02660 02661 // %res = trunc <4 x i32> %high to <4 x i16> 02662 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 02663 } 02664 case NEON::BI__builtin_neon_vcale_v: 02665 case NEON::BI__builtin_neon_vcaleq_v: 02666 case NEON::BI__builtin_neon_vcalt_v: 02667 case NEON::BI__builtin_neon_vcaltq_v: 02668 std::swap(Ops[0], Ops[1]); 02669 case NEON::BI__builtin_neon_vcage_v: 02670 case NEON::BI__builtin_neon_vcageq_v: 02671 case NEON::BI__builtin_neon_vcagt_v: 02672 case NEON::BI__builtin_neon_vcagtq_v: { 02673 llvm::Type *VecFlt = llvm::VectorType::get( 02674 VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy, 02675 VTy->getNumElements()); 02676 llvm::Type *Tys[] = { VTy, VecFlt }; 02677 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 02678 return EmitNeonCall(F, Ops, NameHint); 02679 } 02680 case NEON::BI__builtin_neon_vclz_v: 02681 case NEON::BI__builtin_neon_vclzq_v: 02682 // We generate target-independent intrinsic, which needs a second argument 02683 // for whether or not clz of zero is undefined; on ARM it isn't. 02684 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 02685 break; 02686 case NEON::BI__builtin_neon_vcvt_f32_v: 02687 case NEON::BI__builtin_neon_vcvtq_f32_v: 02688 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 02689 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad)); 02690 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 02691 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 02692 case NEON::BI__builtin_neon_vcvt_n_f32_v: 02693 case NEON::BI__builtin_neon_vcvt_n_f64_v: 02694 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 02695 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 02696 bool Double = 02697 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 02698 llvm::Type *FloatTy = 02699 GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 02700 : NeonTypeFlags::Float32, 02701 false, Quad)); 02702 llvm::Type *Tys[2] = { FloatTy, Ty }; 02703 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 02704 Function *F = CGM.getIntrinsic(Int, Tys); 02705 return EmitNeonCall(F, Ops, "vcvt_n"); 02706 } 02707 case NEON::BI__builtin_neon_vcvt_n_s32_v: 02708 case NEON::BI__builtin_neon_vcvt_n_u32_v: 02709 case NEON::BI__builtin_neon_vcvt_n_s64_v: 02710 case NEON::BI__builtin_neon_vcvt_n_u64_v: 02711 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 02712 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 02713 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 02714 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 02715 bool Double = 02716 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 02717 llvm::Type *FloatTy = 02718 GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 02719 : NeonTypeFlags::Float32, 02720 false, Quad)); 02721 llvm::Type *Tys[2] = { Ty, FloatTy }; 02722 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 02723 return EmitNeonCall(F, Ops, "vcvt_n"); 02724 } 02725 case NEON::BI__builtin_neon_vcvt_s32_v: 02726 case NEON::BI__builtin_neon_vcvt_u32_v: 02727 case NEON::BI__builtin_neon_vcvt_s64_v: 02728 case NEON::BI__builtin_neon_vcvt_u64_v: 02729 case NEON::BI__builtin_neon_vcvtq_s32_v: 02730 case NEON::BI__builtin_neon_vcvtq_u32_v: 02731 case NEON::BI__builtin_neon_vcvtq_s64_v: 02732 case NEON::BI__builtin_neon_vcvtq_u64_v: { 02733 bool Double = 02734 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 02735 llvm::Type *FloatTy = 02736 GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 02737 : NeonTypeFlags::Float32, 02738 false, Quad)); 02739 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 02740 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 02741 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 02742 } 02743 case NEON::BI__builtin_neon_vcvta_s32_v: 02744 case NEON::BI__builtin_neon_vcvta_s64_v: 02745 case NEON::BI__builtin_neon_vcvta_u32_v: 02746 case NEON::BI__builtin_neon_vcvta_u64_v: 02747 case NEON::BI__builtin_neon_vcvtaq_s32_v: 02748 case NEON::BI__builtin_neon_vcvtaq_s64_v: 02749 case NEON::BI__builtin_neon_vcvtaq_u32_v: 02750 case NEON::BI__builtin_neon_vcvtaq_u64_v: 02751 case NEON::BI__builtin_neon_vcvtn_s32_v: 02752 case NEON::BI__builtin_neon_vcvtn_s64_v: 02753 case NEON::BI__builtin_neon_vcvtn_u32_v: 02754 case NEON::BI__builtin_neon_vcvtn_u64_v: 02755 case NEON::BI__builtin_neon_vcvtnq_s32_v: 02756 case NEON::BI__builtin_neon_vcvtnq_s64_v: 02757 case NEON::BI__builtin_neon_vcvtnq_u32_v: 02758 case NEON::BI__builtin_neon_vcvtnq_u64_v: 02759 case NEON::BI__builtin_neon_vcvtp_s32_v: 02760 case NEON::BI__builtin_neon_vcvtp_s64_v: 02761 case NEON::BI__builtin_neon_vcvtp_u32_v: 02762 case NEON::BI__builtin_neon_vcvtp_u64_v: 02763 case NEON::BI__builtin_neon_vcvtpq_s32_v: 02764 case NEON::BI__builtin_neon_vcvtpq_s64_v: 02765 case NEON::BI__builtin_neon_vcvtpq_u32_v: 02766 case NEON::BI__builtin_neon_vcvtpq_u64_v: 02767 case NEON::BI__builtin_neon_vcvtm_s32_v: 02768 case NEON::BI__builtin_neon_vcvtm_s64_v: 02769 case NEON::BI__builtin_neon_vcvtm_u32_v: 02770 case NEON::BI__builtin_neon_vcvtm_u64_v: 02771 case NEON::BI__builtin_neon_vcvtmq_s32_v: 02772 case NEON::BI__builtin_neon_vcvtmq_s64_v: 02773 case NEON::BI__builtin_neon_vcvtmq_u32_v: 02774 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 02775 bool Double = 02776 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 02777 llvm::Type *InTy = 02778 GetNeonType(this, 02779 NeonTypeFlags(Double ? NeonTypeFlags::Float64 02780 : NeonTypeFlags::Float32, false, Quad)); 02781 llvm::Type *Tys[2] = { Ty, InTy }; 02782 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 02783 } 02784 case NEON::BI__builtin_neon_vext_v: 02785 case NEON::BI__builtin_neon_vextq_v: { 02786 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 02787 SmallVector<Constant*, 16> Indices; 02788 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 02789 Indices.push_back(ConstantInt::get(Int32Ty, i+CV)); 02790 02791 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 02792 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 02793 Value *SV = llvm::ConstantVector::get(Indices); 02794 return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext"); 02795 } 02796 case NEON::BI__builtin_neon_vfma_v: 02797 case NEON::BI__builtin_neon_vfmaq_v: { 02798 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 02799 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 02800 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 02801 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 02802 02803 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 02804 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 02805 } 02806 case NEON::BI__builtin_neon_vld1_v: 02807 case NEON::BI__builtin_neon_vld1q_v: 02808 Ops.push_back(Align); 02809 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vld1"); 02810 case NEON::BI__builtin_neon_vld2_v: 02811 case NEON::BI__builtin_neon_vld2q_v: 02812 case NEON::BI__builtin_neon_vld3_v: 02813 case NEON::BI__builtin_neon_vld3q_v: 02814 case NEON::BI__builtin_neon_vld4_v: 02815 case NEON::BI__builtin_neon_vld4q_v: { 02816 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty); 02817 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, NameHint); 02818 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 02819 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 02820 return Builder.CreateStore(Ops[1], Ops[0]); 02821 } 02822 case NEON::BI__builtin_neon_vld1_dup_v: 02823 case NEON::BI__builtin_neon_vld1q_dup_v: { 02824 Value *V = UndefValue::get(Ty); 02825 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 02826 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 02827 LoadInst *Ld = Builder.CreateLoad(Ops[0]); 02828 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 02829 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 02830 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 02831 return EmitNeonSplat(Ops[0], CI); 02832 } 02833 case NEON::BI__builtin_neon_vld2_lane_v: 02834 case NEON::BI__builtin_neon_vld2q_lane_v: 02835 case NEON::BI__builtin_neon_vld3_lane_v: 02836 case NEON::BI__builtin_neon_vld3q_lane_v: 02837 case NEON::BI__builtin_neon_vld4_lane_v: 02838 case NEON::BI__builtin_neon_vld4q_lane_v: { 02839 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty); 02840 for (unsigned I = 2; I < Ops.size() - 1; ++I) 02841 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 02842 Ops.push_back(Align); 02843 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 02844 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 02845 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 02846 return Builder.CreateStore(Ops[1], Ops[0]); 02847 } 02848 case NEON::BI__builtin_neon_vmovl_v: { 02849 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 02850 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 02851 if (Usgn) 02852 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 02853 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 02854 } 02855 case NEON::BI__builtin_neon_vmovn_v: { 02856 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 02857 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 02858 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 02859 } 02860 case NEON::BI__builtin_neon_vmull_v: 02861 // FIXME: the integer vmull operations could be emitted in terms of pure 02862 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 02863 // hoisting the exts outside loops. Until global ISel comes along that can 02864 // see through such movement this leads to bad CodeGen. So we need an 02865 // intrinsic for now. 02866 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 02867 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 02868 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 02869 case NEON::BI__builtin_neon_vpadal_v: 02870 case NEON::BI__builtin_neon_vpadalq_v: { 02871 // The source operand type has twice as many elements of half the size. 02872 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 02873 llvm::Type *EltTy = 02874 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 02875 llvm::Type *NarrowTy = 02876 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 02877 llvm::Type *Tys[2] = { Ty, NarrowTy }; 02878 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 02879 } 02880 case NEON::BI__builtin_neon_vpaddl_v: 02881 case NEON::BI__builtin_neon_vpaddlq_v: { 02882 // The source operand type has twice as many elements of half the size. 02883 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 02884 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 02885 llvm::Type *NarrowTy = 02886 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 02887 llvm::Type *Tys[2] = { Ty, NarrowTy }; 02888 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 02889 } 02890 case NEON::BI__builtin_neon_vqdmlal_v: 02891 case NEON::BI__builtin_neon_vqdmlsl_v: { 02892 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 02893 Value *Mul = EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), 02894 MulOps, "vqdmlal"); 02895 02896 SmallVector<Value *, 2> AccumOps; 02897 AccumOps.push_back(Ops[0]); 02898 AccumOps.push_back(Mul); 02899 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), 02900 AccumOps, NameHint); 02901 } 02902 case NEON::BI__builtin_neon_vqshl_n_v: 02903 case NEON::BI__builtin_neon_vqshlq_n_v: 02904 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 02905 1, false); 02906 case NEON::BI__builtin_neon_vqshlu_n_v: 02907 case NEON::BI__builtin_neon_vqshluq_n_v: 02908 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 02909 1, false); 02910 case NEON::BI__builtin_neon_vrecpe_v: 02911 case NEON::BI__builtin_neon_vrecpeq_v: 02912 case NEON::BI__builtin_neon_vrsqrte_v: 02913 case NEON::BI__builtin_neon_vrsqrteq_v: 02914 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 02915 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 02916 02917 case NEON::BI__builtin_neon_vrshr_n_v: 02918 case NEON::BI__builtin_neon_vrshrq_n_v: 02919 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 02920 1, true); 02921 case NEON::BI__builtin_neon_vshl_n_v: 02922 case NEON::BI__builtin_neon_vshlq_n_v: 02923 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 02924 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 02925 "vshl_n"); 02926 case NEON::BI__builtin_neon_vshll_n_v: { 02927 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 02928 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 02929 if (Usgn) 02930 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 02931 else 02932 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 02933 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 02934 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 02935 } 02936 case NEON::BI__builtin_neon_vshrn_n_v: { 02937 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 02938 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 02939 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 02940 if (Usgn) 02941 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 02942 else 02943 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 02944 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 02945 } 02946 case NEON::BI__builtin_neon_vshr_n_v: 02947 case NEON::BI__builtin_neon_vshrq_n_v: 02948 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 02949 case NEON::BI__builtin_neon_vst1_v: 02950 case NEON::BI__builtin_neon_vst1q_v: 02951 case NEON::BI__builtin_neon_vst2_v: 02952 case NEON::BI__builtin_neon_vst2q_v: 02953 case NEON::BI__builtin_neon_vst3_v: 02954 case NEON::BI__builtin_neon_vst3q_v: 02955 case NEON::BI__builtin_neon_vst4_v: 02956 case NEON::BI__builtin_neon_vst4q_v: 02957 case NEON::BI__builtin_neon_vst2_lane_v: 02958 case NEON::BI__builtin_neon_vst2q_lane_v: 02959 case NEON::BI__builtin_neon_vst3_lane_v: 02960 case NEON::BI__builtin_neon_vst3q_lane_v: 02961 case NEON::BI__builtin_neon_vst4_lane_v: 02962 case NEON::BI__builtin_neon_vst4q_lane_v: 02963 Ops.push_back(Align); 02964 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, ""); 02965 case NEON::BI__builtin_neon_vsubhn_v: { 02966 llvm::VectorType *SrcTy = 02967 llvm::VectorType::getExtendedElementVectorType(VTy); 02968 02969 // %sum = add <4 x i32> %lhs, %rhs 02970 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 02971 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 02972 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 02973 02974 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 02975 Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), 02976 SrcTy->getScalarSizeInBits() / 2); 02977 ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); 02978 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 02979 02980 // %res = trunc <4 x i32> %high to <4 x i16> 02981 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 02982 } 02983 case NEON::BI__builtin_neon_vtrn_v: 02984 case NEON::BI__builtin_neon_vtrnq_v: { 02985 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 02986 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 02987 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 02988 Value *SV = nullptr; 02989 02990 for (unsigned vi = 0; vi != 2; ++vi) { 02991 SmallVector<Constant*, 16> Indices; 02992 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 02993 Indices.push_back(Builder.getInt32(i+vi)); 02994 Indices.push_back(Builder.getInt32(i+e+vi)); 02995 } 02996 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 02997 SV = llvm::ConstantVector::get(Indices); 02998 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); 02999 SV = Builder.CreateStore(SV, Addr); 03000 } 03001 return SV; 03002 } 03003 case NEON::BI__builtin_neon_vtst_v: 03004 case NEON::BI__builtin_neon_vtstq_v: { 03005 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 03006 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 03007 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 03008 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 03009 ConstantAggregateZero::get(Ty)); 03010 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 03011 } 03012 case NEON::BI__builtin_neon_vuzp_v: 03013 case NEON::BI__builtin_neon_vuzpq_v: { 03014 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 03015 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 03016 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 03017 Value *SV = nullptr; 03018 03019 for (unsigned vi = 0; vi != 2; ++vi) { 03020 SmallVector<Constant*, 16> Indices; 03021 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 03022 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); 03023 03024 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 03025 SV = llvm::ConstantVector::get(Indices); 03026 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); 03027 SV = Builder.CreateStore(SV, Addr); 03028 } 03029 return SV; 03030 } 03031 case NEON::BI__builtin_neon_vzip_v: 03032 case NEON::BI__builtin_neon_vzipq_v: { 03033 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 03034 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 03035 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 03036 Value *SV = nullptr; 03037 03038 for (unsigned vi = 0; vi != 2; ++vi) { 03039 SmallVector<Constant*, 16> Indices; 03040 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 03041 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); 03042 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); 03043 } 03044 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 03045 SV = llvm::ConstantVector::get(Indices); 03046 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); 03047 SV = Builder.CreateStore(SV, Addr); 03048 } 03049 return SV; 03050 } 03051 } 03052 03053 assert(Int && "Expected valid intrinsic number"); 03054 03055 // Determine the type(s) of this overloaded AArch64 intrinsic. 03056 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 03057 03058 Value *Result = EmitNeonCall(F, Ops, NameHint); 03059 llvm::Type *ResultType = ConvertType(E->getType()); 03060 // AArch64 intrinsic one-element vector type cast to 03061 // scalar type expected by the builtin 03062 return Builder.CreateBitCast(Result, ResultType, NameHint); 03063 } 03064 03065 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 03066 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 03067 const CmpInst::Predicate Ip, const Twine &Name) { 03068 llvm::Type *OTy = Op->getType(); 03069 03070 // FIXME: this is utterly horrific. We should not be looking at previous 03071 // codegen context to find out what needs doing. Unfortunately TableGen 03072 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 03073 // (etc). 03074 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 03075 OTy = BI->getOperand(0)->getType(); 03076 03077 Op = Builder.CreateBitCast(Op, OTy); 03078 if (OTy->getScalarType()->isFloatingPointTy()) { 03079 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 03080 } else { 03081 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 03082 } 03083 return Builder.CreateSExt(Op, Ty, Name); 03084 } 03085 03086 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 03087 Value *ExtOp, Value *IndexOp, 03088 llvm::Type *ResTy, unsigned IntID, 03089 const char *Name) { 03090 SmallVector<Value *, 2> TblOps; 03091 if (ExtOp) 03092 TblOps.push_back(ExtOp); 03093 03094 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 03095 SmallVector<Constant*, 16> Indices; 03096 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 03097 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 03098 Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i)); 03099 Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1)); 03100 } 03101 Value *SV = llvm::ConstantVector::get(Indices); 03102 03103 int PairPos = 0, End = Ops.size() - 1; 03104 while (PairPos < End) { 03105 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 03106 Ops[PairPos+1], SV, Name)); 03107 PairPos += 2; 03108 } 03109 03110 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 03111 // of the 128-bit lookup table with zero. 03112 if (PairPos == End) { 03113 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 03114 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 03115 ZeroTbl, SV, Name)); 03116 } 03117 03118 Function *TblF; 03119 TblOps.push_back(IndexOp); 03120 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 03121 03122 return CGF.EmitNeonCall(TblF, TblOps, Name); 03123 } 03124 03125 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 03126 const CallExpr *E) { 03127 unsigned HintID = static_cast<unsigned>(-1); 03128 switch (BuiltinID) { 03129 default: break; 03130 case ARM::BI__builtin_arm_nop: 03131 HintID = 0; 03132 break; 03133 case ARM::BI__builtin_arm_yield: 03134 case ARM::BI__yield: 03135 HintID = 1; 03136 break; 03137 case ARM::BI__builtin_arm_wfe: 03138 case ARM::BI__wfe: 03139 HintID = 2; 03140 break; 03141 case ARM::BI__builtin_arm_wfi: 03142 case ARM::BI__wfi: 03143 HintID = 3; 03144 break; 03145 case ARM::BI__builtin_arm_sev: 03146 case ARM::BI__sev: 03147 HintID = 4; 03148 break; 03149 case ARM::BI__builtin_arm_sevl: 03150 case ARM::BI__sevl: 03151 HintID = 5; 03152 break; 03153 } 03154 03155 if (HintID != static_cast<unsigned>(-1)) { 03156 Function *F = CGM.getIntrinsic(Intrinsic::arm_hint); 03157 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 03158 } 03159 03160 if (BuiltinID == ARM::BI__builtin_arm_dbg) { 03161 Value *Option = EmitScalarExpr(E->getArg(0)); 03162 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 03163 } 03164 03165 if (BuiltinID == ARM::BI__builtin_arm_prefetch) { 03166 Value *Address = EmitScalarExpr(E->getArg(0)); 03167 Value *RW = EmitScalarExpr(E->getArg(1)); 03168 Value *IsData = EmitScalarExpr(E->getArg(2)); 03169 03170 // Locality is not supported on ARM target 03171 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 03172 03173 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 03174 return Builder.CreateCall4(F, Address, RW, Locality, IsData); 03175 } 03176 03177 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 03178 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit), 03179 EmitScalarExpr(E->getArg(0)), 03180 "rbit"); 03181 } 03182 03183 if (BuiltinID == ARM::BI__clear_cache) { 03184 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 03185 const FunctionDecl *FD = E->getDirectCallee(); 03186 SmallVector<Value*, 2> Ops; 03187 for (unsigned i = 0; i < 2; i++) 03188 Ops.push_back(EmitScalarExpr(E->getArg(i))); 03189 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 03190 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 03191 StringRef Name = FD->getName(); 03192 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 03193 } 03194 03195 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 03196 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 03197 BuiltinID == ARM::BI__builtin_arm_ldaex) && 03198 getContext().getTypeSize(E->getType()) == 64) || 03199 BuiltinID == ARM::BI__ldrexd) { 03200 Function *F; 03201 03202 switch (BuiltinID) { 03203 default: llvm_unreachable("unexpected builtin"); 03204 case ARM::BI__builtin_arm_ldaex: 03205 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 03206 break; 03207 case ARM::BI__builtin_arm_ldrexd: 03208 case ARM::BI__builtin_arm_ldrex: 03209 case ARM::BI__ldrexd: 03210 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 03211 break; 03212 } 03213 03214 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 03215 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 03216 "ldrexd"); 03217 03218 Value *Val0 = Builder.CreateExtractValue(Val, 1); 03219 Value *Val1 = Builder.CreateExtractValue(Val, 0); 03220 Val0 = Builder.CreateZExt(Val0, Int64Ty); 03221 Val1 = Builder.CreateZExt(Val1, Int64Ty); 03222 03223 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 03224 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 03225 Val = Builder.CreateOr(Val, Val1); 03226 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 03227 } 03228 03229 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 03230 BuiltinID == ARM::BI__builtin_arm_ldaex) { 03231 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 03232 03233 QualType Ty = E->getType(); 03234 llvm::Type *RealResTy = ConvertType(Ty); 03235 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 03236 getContext().getTypeSize(Ty)); 03237 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 03238 03239 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 03240 ? Intrinsic::arm_ldaex 03241 : Intrinsic::arm_ldrex, 03242 LoadAddr->getType()); 03243 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 03244 03245 if (RealResTy->isPointerTy()) 03246 return Builder.CreateIntToPtr(Val, RealResTy); 03247 else { 03248 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 03249 return Builder.CreateBitCast(Val, RealResTy); 03250 } 03251 } 03252 03253 if (BuiltinID == ARM::BI__builtin_arm_strexd || 03254 ((BuiltinID == ARM::BI__builtin_arm_stlex || 03255 BuiltinID == ARM::BI__builtin_arm_strex) && 03256 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 03257 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 03258 ? Intrinsic::arm_stlexd 03259 : Intrinsic::arm_strexd); 03260 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL); 03261 03262 Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); 03263 Value *Val = EmitScalarExpr(E->getArg(0)); 03264 Builder.CreateStore(Val, Tmp); 03265 03266 Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 03267 Val = Builder.CreateLoad(LdPtr); 03268 03269 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 03270 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 03271 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 03272 return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd"); 03273 } 03274 03275 if (BuiltinID == ARM::BI__builtin_arm_strex || 03276 BuiltinID == ARM::BI__builtin_arm_stlex) { 03277 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 03278 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 03279 03280 QualType Ty = E->getArg(0)->getType(); 03281 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 03282 getContext().getTypeSize(Ty)); 03283 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 03284 03285 if (StoreVal->getType()->isPointerTy()) 03286 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 03287 else { 03288 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 03289 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 03290 } 03291 03292 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 03293 ? Intrinsic::arm_stlex 03294 : Intrinsic::arm_strex, 03295 StoreAddr->getType()); 03296 return Builder.CreateCall2(F, StoreVal, StoreAddr, "strex"); 03297 } 03298 03299 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 03300 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 03301 return Builder.CreateCall(F); 03302 } 03303 03304 // CRC32 03305 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 03306 switch (BuiltinID) { 03307 case ARM::BI__builtin_arm_crc32b: 03308 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 03309 case ARM::BI__builtin_arm_crc32cb: 03310 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 03311 case ARM::BI__builtin_arm_crc32h: 03312 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 03313 case ARM::BI__builtin_arm_crc32ch: 03314 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 03315 case ARM::BI__builtin_arm_crc32w: 03316 case ARM::BI__builtin_arm_crc32d: 03317 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 03318 case ARM::BI__builtin_arm_crc32cw: 03319 case ARM::BI__builtin_arm_crc32cd: 03320 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 03321 } 03322 03323 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 03324 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 03325 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 03326 03327 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 03328 // intrinsics, hence we need different codegen for these cases. 03329 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 03330 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 03331 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 03332 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 03333 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 03334 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 03335 03336 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 03337 Value *Res = Builder.CreateCall2(F, Arg0, Arg1a); 03338 return Builder.CreateCall2(F, Res, Arg1b); 03339 } else { 03340 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 03341 03342 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 03343 return Builder.CreateCall2(F, Arg0, Arg1); 03344 } 03345 } 03346 03347 SmallVector<Value*, 4> Ops; 03348 llvm::Value *Align = nullptr; 03349 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 03350 if (i == 0) { 03351 switch (BuiltinID) { 03352 case NEON::BI__builtin_neon_vld1_v: 03353 case NEON::BI__builtin_neon_vld1q_v: 03354 case NEON::BI__builtin_neon_vld1q_lane_v: 03355 case NEON::BI__builtin_neon_vld1_lane_v: 03356 case NEON::BI__builtin_neon_vld1_dup_v: 03357 case NEON::BI__builtin_neon_vld1q_dup_v: 03358 case NEON::BI__builtin_neon_vst1_v: 03359 case NEON::BI__builtin_neon_vst1q_v: 03360 case NEON::BI__builtin_neon_vst1q_lane_v: 03361 case NEON::BI__builtin_neon_vst1_lane_v: 03362 case NEON::BI__builtin_neon_vst2_v: 03363 case NEON::BI__builtin_neon_vst2q_v: 03364 case NEON::BI__builtin_neon_vst2_lane_v: 03365 case NEON::BI__builtin_neon_vst2q_lane_v: 03366 case NEON::BI__builtin_neon_vst3_v: 03367 case NEON::BI__builtin_neon_vst3q_v: 03368 case NEON::BI__builtin_neon_vst3_lane_v: 03369 case NEON::BI__builtin_neon_vst3q_lane_v: 03370 case NEON::BI__builtin_neon_vst4_v: 03371 case NEON::BI__builtin_neon_vst4q_v: 03372 case NEON::BI__builtin_neon_vst4_lane_v: 03373 case NEON::BI__builtin_neon_vst4q_lane_v: 03374 // Get the alignment for the argument in addition to the value; 03375 // we'll use it later. 03376 std::pair<llvm::Value*, unsigned> Src = 03377 EmitPointerWithAlignment(E->getArg(0)); 03378 Ops.push_back(Src.first); 03379 Align = Builder.getInt32(Src.second); 03380 continue; 03381 } 03382 } 03383 if (i == 1) { 03384 switch (BuiltinID) { 03385 case NEON::BI__builtin_neon_vld2_v: 03386 case NEON::BI__builtin_neon_vld2q_v: 03387 case NEON::BI__builtin_neon_vld3_v: 03388 case NEON::BI__builtin_neon_vld3q_v: 03389 case NEON::BI__builtin_neon_vld4_v: 03390 case NEON::BI__builtin_neon_vld4q_v: 03391 case NEON::BI__builtin_neon_vld2_lane_v: 03392 case NEON::BI__builtin_neon_vld2q_lane_v: 03393 case NEON::BI__builtin_neon_vld3_lane_v: 03394 case NEON::BI__builtin_neon_vld3q_lane_v: 03395 case NEON::BI__builtin_neon_vld4_lane_v: 03396 case NEON::BI__builtin_neon_vld4q_lane_v: 03397 case NEON::BI__builtin_neon_vld2_dup_v: 03398 case NEON::BI__builtin_neon_vld3_dup_v: 03399 case NEON::BI__builtin_neon_vld4_dup_v: 03400 // Get the alignment for the argument in addition to the value; 03401 // we'll use it later. 03402 std::pair<llvm::Value*, unsigned> Src = 03403 EmitPointerWithAlignment(E->getArg(1)); 03404 Ops.push_back(Src.first); 03405 Align = Builder.getInt32(Src.second); 03406 continue; 03407 } 03408 } 03409 Ops.push_back(EmitScalarExpr(E->getArg(i))); 03410 } 03411 03412 switch (BuiltinID) { 03413 default: break; 03414 // vget_lane and vset_lane are not overloaded and do not have an extra 03415 // argument that specifies the vector type. 03416 case NEON::BI__builtin_neon_vget_lane_i8: 03417 case NEON::BI__builtin_neon_vget_lane_i16: 03418 case NEON::BI__builtin_neon_vget_lane_i32: 03419 case NEON::BI__builtin_neon_vget_lane_i64: 03420 case NEON::BI__builtin_neon_vget_lane_f32: 03421 case NEON::BI__builtin_neon_vgetq_lane_i8: 03422 case NEON::BI__builtin_neon_vgetq_lane_i16: 03423 case NEON::BI__builtin_neon_vgetq_lane_i32: 03424 case NEON::BI__builtin_neon_vgetq_lane_i64: 03425 case NEON::BI__builtin_neon_vgetq_lane_f32: 03426 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 03427 "vget_lane"); 03428 case NEON::BI__builtin_neon_vset_lane_i8: 03429 case NEON::BI__builtin_neon_vset_lane_i16: 03430 case NEON::BI__builtin_neon_vset_lane_i32: 03431 case NEON::BI__builtin_neon_vset_lane_i64: 03432 case NEON::BI__builtin_neon_vset_lane_f32: 03433 case NEON::BI__builtin_neon_vsetq_lane_i8: 03434 case NEON::BI__builtin_neon_vsetq_lane_i16: 03435 case NEON::BI__builtin_neon_vsetq_lane_i32: 03436 case NEON::BI__builtin_neon_vsetq_lane_i64: 03437 case NEON::BI__builtin_neon_vsetq_lane_f32: 03438 Ops.push_back(EmitScalarExpr(E->getArg(2))); 03439 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 03440 03441 // Non-polymorphic crypto instructions also not overloaded 03442 case NEON::BI__builtin_neon_vsha1h_u32: 03443 Ops.push_back(EmitScalarExpr(E->getArg(0))); 03444 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 03445 "vsha1h"); 03446 case NEON::BI__builtin_neon_vsha1cq_u32: 03447 Ops.push_back(EmitScalarExpr(E->getArg(2))); 03448 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 03449 "vsha1h"); 03450 case NEON::BI__builtin_neon_vsha1pq_u32: 03451 Ops.push_back(EmitScalarExpr(E->getArg(2))); 03452 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 03453 "vsha1h"); 03454 case NEON::BI__builtin_neon_vsha1mq_u32: 03455 Ops.push_back(EmitScalarExpr(E->getArg(2))); 03456 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 03457 "vsha1h"); 03458 } 03459 03460 // Get the last argument, which specifies the vector type. 03461 llvm::APSInt Result; 03462 const Expr *Arg = E->getArg(E->getNumArgs()-1); 03463 if (!Arg->isIntegerConstantExpr(Result, getContext())) 03464 return nullptr; 03465 03466 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 03467 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 03468 // Determine the overloaded type of this builtin. 03469 llvm::Type *Ty; 03470 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 03471 Ty = FloatTy; 03472 else 03473 Ty = DoubleTy; 03474 03475 // Determine whether this is an unsigned conversion or not. 03476 bool usgn = Result.getZExtValue() == 1; 03477 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 03478 03479 // Call the appropriate intrinsic. 03480 Function *F = CGM.getIntrinsic(Int, Ty); 03481 return Builder.CreateCall(F, Ops, "vcvtr"); 03482 } 03483 03484 // Determine the type of this overloaded NEON intrinsic. 03485 NeonTypeFlags Type(Result.getZExtValue()); 03486 bool usgn = Type.isUnsigned(); 03487 bool rightShift = false; 03488 03489 llvm::VectorType *VTy = GetNeonType(this, Type); 03490 llvm::Type *Ty = VTy; 03491 if (!Ty) 03492 return nullptr; 03493 03494 // Many NEON builtins have identical semantics and uses in ARM and 03495 // AArch64. Emit these in a single function. 03496 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); 03497 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 03498 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 03499 if (Builtin) 03500 return EmitCommonNeonBuiltinExpr( 03501 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 03502 Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align); 03503 03504 unsigned Int; 03505 switch (BuiltinID) { 03506 default: return nullptr; 03507 case NEON::BI__builtin_neon_vld1q_lane_v: 03508 // Handle 64-bit integer elements as a special case. Use shuffles of 03509 // one-element vectors to avoid poor code for i64 in the backend. 03510 if (VTy->getElementType()->isIntegerTy(64)) { 03511 // Extract the other lane. 03512 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 03513 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 03514 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 03515 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 03516 // Load the value as a one-element vector. 03517 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 03518 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty); 03519 Value *Ld = Builder.CreateCall2(F, Ops[0], Align); 03520 // Combine them. 03521 SmallVector<Constant*, 2> Indices; 03522 Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane)); 03523 Indices.push_back(ConstantInt::get(Int32Ty, Lane)); 03524 SV = llvm::ConstantVector::get(Indices); 03525 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 03526 } 03527 // fall through 03528 case NEON::BI__builtin_neon_vld1_lane_v: { 03529 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 03530 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 03531 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 03532 LoadInst *Ld = Builder.CreateLoad(Ops[0]); 03533 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 03534 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 03535 } 03536 case NEON::BI__builtin_neon_vld2_dup_v: 03537 case NEON::BI__builtin_neon_vld3_dup_v: 03538 case NEON::BI__builtin_neon_vld4_dup_v: { 03539 // Handle 64-bit elements as a special-case. There is no "dup" needed. 03540 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 03541 switch (BuiltinID) { 03542 case NEON::BI__builtin_neon_vld2_dup_v: 03543 Int = Intrinsic::arm_neon_vld2; 03544 break; 03545 case NEON::BI__builtin_neon_vld3_dup_v: 03546 Int = Intrinsic::arm_neon_vld3; 03547 break; 03548 case NEON::BI__builtin_neon_vld4_dup_v: 03549 Int = Intrinsic::arm_neon_vld4; 03550 break; 03551 default: llvm_unreachable("unknown vld_dup intrinsic?"); 03552 } 03553 Function *F = CGM.getIntrinsic(Int, Ty); 03554 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); 03555 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 03556 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 03557 return Builder.CreateStore(Ops[1], Ops[0]); 03558 } 03559 switch (BuiltinID) { 03560 case NEON::BI__builtin_neon_vld2_dup_v: 03561 Int = Intrinsic::arm_neon_vld2lane; 03562 break; 03563 case NEON::BI__builtin_neon_vld3_dup_v: 03564 Int = Intrinsic::arm_neon_vld3lane; 03565 break; 03566 case NEON::BI__builtin_neon_vld4_dup_v: 03567 Int = Intrinsic::arm_neon_vld4lane; 03568 break; 03569 default: llvm_unreachable("unknown vld_dup intrinsic?"); 03570 } 03571 Function *F = CGM.getIntrinsic(Int, Ty); 03572 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 03573 03574 SmallVector<Value*, 6> Args; 03575 Args.push_back(Ops[1]); 03576 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 03577 03578 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 03579 Args.push_back(CI); 03580 Args.push_back(Align); 03581 03582 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 03583 // splat lane 0 to all elts in each vector of the result. 03584 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 03585 Value *Val = Builder.CreateExtractValue(Ops[1], i); 03586 Value *Elt = Builder.CreateBitCast(Val, Ty); 03587 Elt = EmitNeonSplat(Elt, CI); 03588 Elt = Builder.CreateBitCast(Elt, Val->getType()); 03589 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 03590 } 03591 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 03592 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 03593 return Builder.CreateStore(Ops[1], Ops[0]); 03594 } 03595 case NEON::BI__builtin_neon_vqrshrn_n_v: 03596 Int = 03597 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 03598 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 03599 1, true); 03600 case NEON::BI__builtin_neon_vqrshrun_n_v: 03601 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 03602 Ops, "vqrshrun_n", 1, true); 03603 case NEON::BI__builtin_neon_vqshrn_n_v: 03604 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 03605 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 03606 1, true); 03607 case NEON::BI__builtin_neon_vqshrun_n_v: 03608 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 03609 Ops, "vqshrun_n", 1, true); 03610 case NEON::BI__builtin_neon_vrecpe_v: 03611 case NEON::BI__builtin_neon_vrecpeq_v: 03612 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 03613 Ops, "vrecpe"); 03614 case NEON::BI__builtin_neon_vrshrn_n_v: 03615 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 03616 Ops, "vrshrn_n", 1, true); 03617 case NEON::BI__builtin_neon_vrsra_n_v: 03618 case NEON::BI__builtin_neon_vrsraq_n_v: 03619 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 03620 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 03621 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 03622 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 03623 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); 03624 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 03625 case NEON::BI__builtin_neon_vsri_n_v: 03626 case NEON::BI__builtin_neon_vsriq_n_v: 03627 rightShift = true; 03628 case NEON::BI__builtin_neon_vsli_n_v: 03629 case NEON::BI__builtin_neon_vsliq_n_v: 03630 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 03631 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 03632 Ops, "vsli_n"); 03633 case NEON::BI__builtin_neon_vsra_n_v: 03634 case NEON::BI__builtin_neon_vsraq_n_v: 03635 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 03636 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 03637 return Builder.CreateAdd(Ops[0], Ops[1]); 03638 case NEON::BI__builtin_neon_vst1q_lane_v: 03639 // Handle 64-bit integer elements as a special case. Use a shuffle to get 03640 // a one-element vector and avoid poor code for i64 in the backend. 03641 if (VTy->getElementType()->isIntegerTy(64)) { 03642 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 03643 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 03644 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 03645 Ops[2] = Align; 03646 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 03647 Ops[1]->getType()), Ops); 03648 } 03649 // fall through 03650 case NEON::BI__builtin_neon_vst1_lane_v: { 03651 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 03652 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 03653 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 03654 StoreInst *St = Builder.CreateStore(Ops[1], 03655 Builder.CreateBitCast(Ops[0], Ty)); 03656 St->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 03657 return St; 03658 } 03659 case NEON::BI__builtin_neon_vtbl1_v: 03660 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 03661 Ops, "vtbl1"); 03662 case NEON::BI__builtin_neon_vtbl2_v: 03663 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 03664 Ops, "vtbl2"); 03665 case NEON::BI__builtin_neon_vtbl3_v: 03666 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 03667 Ops, "vtbl3"); 03668 case NEON::BI__builtin_neon_vtbl4_v: 03669 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 03670 Ops, "vtbl4"); 03671 case NEON::BI__builtin_neon_vtbx1_v: 03672 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 03673 Ops, "vtbx1"); 03674 case NEON::BI__builtin_neon_vtbx2_v: 03675 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 03676 Ops, "vtbx2"); 03677 case NEON::BI__builtin_neon_vtbx3_v: 03678 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 03679 Ops, "vtbx3"); 03680 case NEON::BI__builtin_neon_vtbx4_v: 03681 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 03682 Ops, "vtbx4"); 03683 } 03684 } 03685 03686 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 03687 const CallExpr *E, 03688 SmallVectorImpl<Value *> &Ops) { 03689 unsigned int Int = 0; 03690 const char *s = nullptr; 03691 03692 switch (BuiltinID) { 03693 default: 03694 return nullptr; 03695 case NEON::BI__builtin_neon_vtbl1_v: 03696 case NEON::BI__builtin_neon_vqtbl1_v: 03697 case NEON::BI__builtin_neon_vqtbl1q_v: 03698 case NEON::BI__builtin_neon_vtbl2_v: 03699 case NEON::BI__builtin_neon_vqtbl2_v: 03700 case NEON::BI__builtin_neon_vqtbl2q_v: 03701 case NEON::BI__builtin_neon_vtbl3_v: 03702 case NEON::BI__builtin_neon_vqtbl3_v: 03703 case NEON::BI__builtin_neon_vqtbl3q_v: 03704 case NEON::BI__builtin_neon_vtbl4_v: 03705 case NEON::BI__builtin_neon_vqtbl4_v: 03706 case NEON::BI__builtin_neon_vqtbl4q_v: 03707 break; 03708 case NEON::BI__builtin_neon_vtbx1_v: 03709 case NEON::BI__builtin_neon_vqtbx1_v: 03710 case NEON::BI__builtin_neon_vqtbx1q_v: 03711 case NEON::BI__builtin_neon_vtbx2_v: 03712 case NEON::BI__builtin_neon_vqtbx2_v: 03713 case NEON::BI__builtin_neon_vqtbx2q_v: 03714 case NEON::BI__builtin_neon_vtbx3_v: 03715 case NEON::BI__builtin_neon_vqtbx3_v: 03716 case NEON::BI__builtin_neon_vqtbx3q_v: 03717 case NEON::BI__builtin_neon_vtbx4_v: 03718 case NEON::BI__builtin_neon_vqtbx4_v: 03719 case NEON::BI__builtin_neon_vqtbx4q_v: 03720 break; 03721 } 03722 03723 assert(E->getNumArgs() >= 3); 03724 03725 // Get the last argument, which specifies the vector type. 03726 llvm::APSInt Result; 03727 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 03728 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 03729 return nullptr; 03730 03731 // Determine the type of this overloaded NEON intrinsic. 03732 NeonTypeFlags Type(Result.getZExtValue()); 03733 llvm::VectorType *VTy = GetNeonType(&CGF, Type); 03734 llvm::Type *Ty = VTy; 03735 if (!Ty) 03736 return nullptr; 03737 03738 unsigned nElts = VTy->getNumElements(); 03739 03740 CodeGen::CGBuilderTy &Builder = CGF.Builder; 03741 03742 // AArch64 scalar builtins are not overloaded, they do not have an extra 03743 // argument that specifies the vector type, need to handle each case. 03744 SmallVector<Value *, 2> TblOps; 03745 switch (BuiltinID) { 03746 case NEON::BI__builtin_neon_vtbl1_v: { 03747 TblOps.push_back(Ops[0]); 03748 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[1], Ty, 03749 Intrinsic::aarch64_neon_tbl1, "vtbl1"); 03750 } 03751 case NEON::BI__builtin_neon_vtbl2_v: { 03752 TblOps.push_back(Ops[0]); 03753 TblOps.push_back(Ops[1]); 03754 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, 03755 Intrinsic::aarch64_neon_tbl1, "vtbl1"); 03756 } 03757 case NEON::BI__builtin_neon_vtbl3_v: { 03758 TblOps.push_back(Ops[0]); 03759 TblOps.push_back(Ops[1]); 03760 TblOps.push_back(Ops[2]); 03761 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[3], Ty, 03762 Intrinsic::aarch64_neon_tbl2, "vtbl2"); 03763 } 03764 case NEON::BI__builtin_neon_vtbl4_v: { 03765 TblOps.push_back(Ops[0]); 03766 TblOps.push_back(Ops[1]); 03767 TblOps.push_back(Ops[2]); 03768 TblOps.push_back(Ops[3]); 03769 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, 03770 Intrinsic::aarch64_neon_tbl2, "vtbl2"); 03771 } 03772 case NEON::BI__builtin_neon_vtbx1_v: { 03773 TblOps.push_back(Ops[1]); 03774 Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, 03775 Intrinsic::aarch64_neon_tbl1, "vtbl1"); 03776 03777 llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8); 03778 Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight); 03779 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 03780 CmpRes = Builder.CreateSExt(CmpRes, Ty); 03781 03782 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 03783 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 03784 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 03785 } 03786 case NEON::BI__builtin_neon_vtbx2_v: { 03787 TblOps.push_back(Ops[1]); 03788 TblOps.push_back(Ops[2]); 03789 return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty, 03790 Intrinsic::aarch64_neon_tbx1, "vtbx1"); 03791 } 03792 case NEON::BI__builtin_neon_vtbx3_v: { 03793 TblOps.push_back(Ops[1]); 03794 TblOps.push_back(Ops[2]); 03795 TblOps.push_back(Ops[3]); 03796 Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, 03797 Intrinsic::aarch64_neon_tbl2, "vtbl2"); 03798 03799 llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24); 03800 Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour); 03801 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 03802 TwentyFourV); 03803 CmpRes = Builder.CreateSExt(CmpRes, Ty); 03804 03805 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 03806 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 03807 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 03808 } 03809 case NEON::BI__builtin_neon_vtbx4_v: { 03810 TblOps.push_back(Ops[1]); 03811 TblOps.push_back(Ops[2]); 03812 TblOps.push_back(Ops[3]); 03813 TblOps.push_back(Ops[4]); 03814 return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty, 03815 Intrinsic::aarch64_neon_tbx2, "vtbx2"); 03816 } 03817 case NEON::BI__builtin_neon_vqtbl1_v: 03818 case NEON::BI__builtin_neon_vqtbl1q_v: 03819 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 03820 case NEON::BI__builtin_neon_vqtbl2_v: 03821 case NEON::BI__builtin_neon_vqtbl2q_v: { 03822 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 03823 case NEON::BI__builtin_neon_vqtbl3_v: 03824 case NEON::BI__builtin_neon_vqtbl3q_v: 03825 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 03826 case NEON::BI__builtin_neon_vqtbl4_v: 03827 case NEON::BI__builtin_neon_vqtbl4q_v: 03828 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 03829 case NEON::BI__builtin_neon_vqtbx1_v: 03830 case NEON::BI__builtin_neon_vqtbx1q_v: 03831 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 03832 case NEON::BI__builtin_neon_vqtbx2_v: 03833 case NEON::BI__builtin_neon_vqtbx2q_v: 03834 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 03835 case NEON::BI__builtin_neon_vqtbx3_v: 03836 case NEON::BI__builtin_neon_vqtbx3q_v: 03837 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 03838 case NEON::BI__builtin_neon_vqtbx4_v: 03839 case NEON::BI__builtin_neon_vqtbx4q_v: 03840 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 03841 } 03842 } 03843 03844 if (!Int) 03845 return nullptr; 03846 03847 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 03848 return CGF.EmitNeonCall(F, Ops, s); 03849 } 03850 03851 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 03852 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 03853 Op = Builder.CreateBitCast(Op, Int16Ty); 03854 Value *V = UndefValue::get(VTy); 03855 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 03856 Op = Builder.CreateInsertElement(V, Op, CI); 03857 return Op; 03858 } 03859 03860 Value *CodeGenFunction::vectorWrapScalar8(Value *Op) { 03861 llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8); 03862 Op = Builder.CreateBitCast(Op, Int8Ty); 03863 Value *V = UndefValue::get(VTy); 03864 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 03865 Op = Builder.CreateInsertElement(V, Op, CI); 03866 return Op; 03867 } 03868 03869 Value *CodeGenFunction:: 03870 emitVectorWrappedScalar8Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops, 03871 const char *Name) { 03872 // i8 is not a legal types for AArch64, so we can't just use 03873 // a normal overloaded intrinsic call for these scalar types. Instead 03874 // we'll build 64-bit vectors w/ lane zero being our input values and 03875 // perform the operation on that. The back end can pattern match directly 03876 // to the scalar instruction. 03877 Ops[0] = vectorWrapScalar8(Ops[0]); 03878 Ops[1] = vectorWrapScalar8(Ops[1]); 03879 llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8); 03880 Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name); 03881 Constant *CI = ConstantInt::get(SizeTy, 0); 03882 return Builder.CreateExtractElement(V, CI, "lane0"); 03883 } 03884 03885 Value *CodeGenFunction:: 03886 emitVectorWrappedScalar16Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops, 03887 const char *Name) { 03888 // i16 is not a legal types for AArch64, so we can't just use 03889 // a normal overloaded intrinsic call for these scalar types. Instead 03890 // we'll build 64-bit vectors w/ lane zero being our input values and 03891 // perform the operation on that. The back end can pattern match directly 03892 // to the scalar instruction. 03893 Ops[0] = vectorWrapScalar16(Ops[0]); 03894 Ops[1] = vectorWrapScalar16(Ops[1]); 03895 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 03896 Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name); 03897 Constant *CI = ConstantInt::get(SizeTy, 0); 03898 return Builder.CreateExtractElement(V, CI, "lane0"); 03899 } 03900 03901 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 03902 const CallExpr *E) { 03903 unsigned HintID = static_cast<unsigned>(-1); 03904 switch (BuiltinID) { 03905 default: break; 03906 case AArch64::BI__builtin_arm_nop: 03907 HintID = 0; 03908 break; 03909 case AArch64::BI__builtin_arm_yield: 03910 HintID = 1; 03911 break; 03912 case AArch64::BI__builtin_arm_wfe: 03913 HintID = 2; 03914 break; 03915 case AArch64::BI__builtin_arm_wfi: 03916 HintID = 3; 03917 break; 03918 case AArch64::BI__builtin_arm_sev: 03919 HintID = 4; 03920 break; 03921 case AArch64::BI__builtin_arm_sevl: 03922 HintID = 5; 03923 break; 03924 } 03925 03926 if (HintID != static_cast<unsigned>(-1)) { 03927 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 03928 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 03929 } 03930 03931 if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { 03932 Value *Address = EmitScalarExpr(E->getArg(0)); 03933 Value *RW = EmitScalarExpr(E->getArg(1)); 03934 Value *CacheLevel = EmitScalarExpr(E->getArg(2)); 03935 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); 03936 Value *IsData = EmitScalarExpr(E->getArg(4)); 03937 03938 Value *Locality = nullptr; 03939 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { 03940 // Temporal fetch, needs to convert cache level to locality. 03941 Locality = llvm::ConstantInt::get(Int32Ty, 03942 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); 03943 } else { 03944 // Streaming fetch. 03945 Locality = llvm::ConstantInt::get(Int32Ty, 0); 03946 } 03947 03948 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify 03949 // PLDL3STRM or PLDL2STRM. 03950 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 03951 return Builder.CreateCall4(F, Address, RW, Locality, IsData); 03952 } 03953 03954 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 03955 assert((getContext().getTypeSize(E->getType()) == 32) && 03956 "rbit of unusual size!"); 03957 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 03958 return Builder.CreateCall( 03959 CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); 03960 } 03961 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 03962 assert((getContext().getTypeSize(E->getType()) == 64) && 03963 "rbit of unusual size!"); 03964 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 03965 return Builder.CreateCall( 03966 CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); 03967 } 03968 03969 if (BuiltinID == AArch64::BI__clear_cache) { 03970 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 03971 const FunctionDecl *FD = E->getDirectCallee(); 03972 SmallVector<Value*, 2> Ops; 03973 for (unsigned i = 0; i < 2; i++) 03974 Ops.push_back(EmitScalarExpr(E->getArg(i))); 03975 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 03976 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 03977 StringRef Name = FD->getName(); 03978 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 03979 } 03980 03981 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 03982 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 03983 getContext().getTypeSize(E->getType()) == 128) { 03984 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 03985 ? Intrinsic::aarch64_ldaxp 03986 : Intrinsic::aarch64_ldxp); 03987 03988 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 03989 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 03990 "ldxp"); 03991 03992 Value *Val0 = Builder.CreateExtractValue(Val, 1); 03993 Value *Val1 = Builder.CreateExtractValue(Val, 0); 03994 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 03995 Val0 = Builder.CreateZExt(Val0, Int128Ty); 03996 Val1 = Builder.CreateZExt(Val1, Int128Ty); 03997 03998 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 03999 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 04000 Val = Builder.CreateOr(Val, Val1); 04001 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 04002 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 04003 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 04004 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 04005 04006 QualType Ty = E->getType(); 04007 llvm::Type *RealResTy = ConvertType(Ty); 04008 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 04009 getContext().getTypeSize(Ty)); 04010 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 04011 04012 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 04013 ? Intrinsic::aarch64_ldaxr 04014 : Intrinsic::aarch64_ldxr, 04015 LoadAddr->getType()); 04016 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 04017 04018 if (RealResTy->isPointerTy()) 04019 return Builder.CreateIntToPtr(Val, RealResTy); 04020 04021 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 04022 return Builder.CreateBitCast(Val, RealResTy); 04023 } 04024 04025 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 04026 BuiltinID == AArch64::BI__builtin_arm_stlex) && 04027 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 04028 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 04029 ? Intrinsic::aarch64_stlxp 04030 : Intrinsic::aarch64_stxp); 04031 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, NULL); 04032 04033 Value *One = llvm::ConstantInt::get(Int32Ty, 1); 04034 Value *Tmp = Builder.CreateAlloca(ConvertType(E->getArg(0)->getType()), 04035 One); 04036 Value *Val = EmitScalarExpr(E->getArg(0)); 04037 Builder.CreateStore(Val, Tmp); 04038 04039 Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 04040 Val = Builder.CreateLoad(LdPtr); 04041 04042 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 04043 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 04044 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 04045 Int8PtrTy); 04046 return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "stxp"); 04047 } else if (BuiltinID == AArch64::BI__builtin_arm_strex || 04048 BuiltinID == AArch64::BI__builtin_arm_stlex) { 04049 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 04050 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 04051 04052 QualType Ty = E->getArg(0)->getType(); 04053 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 04054 getContext().getTypeSize(Ty)); 04055 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 04056 04057 if (StoreVal->getType()->isPointerTy()) 04058 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 04059 else { 04060 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 04061 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 04062 } 04063 04064 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 04065 ? Intrinsic::aarch64_stlxr 04066 : Intrinsic::aarch64_stxr, 04067 StoreAddr->getType()); 04068 return Builder.CreateCall2(F, StoreVal, StoreAddr, "stxr"); 04069 } 04070 04071 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 04072 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 04073 return Builder.CreateCall(F); 04074 } 04075 04076 // CRC32 04077 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 04078 switch (BuiltinID) { 04079 case AArch64::BI__builtin_arm_crc32b: 04080 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 04081 case AArch64::BI__builtin_arm_crc32cb: 04082 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 04083 case AArch64::BI__builtin_arm_crc32h: 04084 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 04085 case AArch64::BI__builtin_arm_crc32ch: 04086 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 04087 case AArch64::BI__builtin_arm_crc32w: 04088 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 04089 case AArch64::BI__builtin_arm_crc32cw: 04090 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 04091 case AArch64::BI__builtin_arm_crc32d: 04092 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 04093 case AArch64::BI__builtin_arm_crc32cd: 04094 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 04095 } 04096 04097 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 04098 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 04099 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 04100 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 04101 04102 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 04103 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 04104 04105 return Builder.CreateCall2(F, Arg0, Arg1); 04106 } 04107 04108 llvm::SmallVector<Value*, 4> Ops; 04109 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) 04110 Ops.push_back(EmitScalarExpr(E->getArg(i))); 04111 04112 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); 04113 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 04114 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 04115 04116 if (Builtin) { 04117 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 04118 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 04119 assert(Result && "SISD intrinsic should have been handled"); 04120 return Result; 04121 } 04122 04123 llvm::APSInt Result; 04124 const Expr *Arg = E->getArg(E->getNumArgs()-1); 04125 NeonTypeFlags Type(0); 04126 if (Arg->isIntegerConstantExpr(Result, getContext())) 04127 // Determine the type of this overloaded NEON intrinsic. 04128 Type = NeonTypeFlags(Result.getZExtValue()); 04129 04130 bool usgn = Type.isUnsigned(); 04131 bool quad = Type.isQuad(); 04132 04133 // Handle non-overloaded intrinsics first. 04134 switch (BuiltinID) { 04135 default: break; 04136 case NEON::BI__builtin_neon_vldrq_p128: { 04137 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 04138 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 04139 return Builder.CreateLoad(Ptr); 04140 } 04141 case NEON::BI__builtin_neon_vstrq_p128: { 04142 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 04143 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 04144 return Builder.CreateStore(EmitScalarExpr(E->getArg(1)), Ptr); 04145 } 04146 case NEON::BI__builtin_neon_vcvts_u32_f32: 04147 case NEON::BI__builtin_neon_vcvtd_u64_f64: 04148 usgn = true; 04149 // FALL THROUGH 04150 case NEON::BI__builtin_neon_vcvts_s32_f32: 04151 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 04152 Ops.push_back(EmitScalarExpr(E->getArg(0))); 04153 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 04154 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 04155 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 04156 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 04157 if (usgn) 04158 return Builder.CreateFPToUI(Ops[0], InTy); 04159 return Builder.CreateFPToSI(Ops[0], InTy); 04160 } 04161 case NEON::BI__builtin_neon_vcvts_f32_u32: 04162 case NEON::BI__builtin_neon_vcvtd_f64_u64: 04163 usgn = true; 04164 // FALL THROUGH 04165 case NEON::BI__builtin_neon_vcvts_f32_s32: 04166 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 04167 Ops.push_back(EmitScalarExpr(E->getArg(0))); 04168 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 04169 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 04170 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 04171 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 04172 if (usgn) 04173 return Builder.CreateUIToFP(Ops[0], FTy); 04174 return Builder.CreateSIToFP(Ops[0], FTy); 04175 } 04176 case NEON::BI__builtin_neon_vpaddd_s64: { 04177 llvm::Type *Ty = 04178 llvm::VectorType::get(llvm::Type::getInt64Ty(getLLVMContext()), 2); 04179 Value *Vec = EmitScalarExpr(E->getArg(0)); 04180 // The vector is v2f64, so make sure it's bitcast to that. 04181 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 04182 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 04183 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 04184 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 04185 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 04186 // Pairwise addition of a v2f64 into a scalar f64. 04187 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 04188 } 04189 case NEON::BI__builtin_neon_vpaddd_f64: { 04190 llvm::Type *Ty = 04191 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2); 04192 Value *Vec = EmitScalarExpr(E->getArg(0)); 04193 // The vector is v2f64, so make sure it's bitcast to that. 04194 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 04195 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 04196 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 04197 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 04198 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 04199 // Pairwise addition of a v2f64 into a scalar f64. 04200 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 04201 } 04202 case NEON::BI__builtin_neon_vpadds_f32: { 04203 llvm::Type *Ty = 04204 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2); 04205 Value *Vec = EmitScalarExpr(E->getArg(0)); 04206 // The vector is v2f32, so make sure it's bitcast to that. 04207 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 04208 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 04209 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 04210 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 04211 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 04212 // Pairwise addition of a v2f32 into a scalar f32. 04213 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 04214 } 04215 case NEON::BI__builtin_neon_vceqzd_s64: 04216 case NEON::BI__builtin_neon_vceqzd_f64: 04217 case NEON::BI__builtin_neon_vceqzs_f32: 04218 Ops.push_back(EmitScalarExpr(E->getArg(0))); 04219 return EmitAArch64CompareBuiltinExpr( 04220 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OEQ, 04221 ICmpInst::ICMP_EQ, "vceqz"); 04222 case NEON::BI__builtin_neon_vcgezd_s64: 04223 case NEON::BI__builtin_neon_vcgezd_f64: 04224 case NEON::BI__builtin_neon_vcgezs_f32: 04225 Ops.push_back(EmitScalarExpr(E->getArg(0))); 04226 return EmitAArch64CompareBuiltinExpr( 04227 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OGE, 04228 ICmpInst::ICMP_SGE, "vcgez"); 04229 case NEON::BI__builtin_neon_vclezd_s64: 04230 case NEON::BI__builtin_neon_vclezd_f64: 04231 case NEON::BI__builtin_neon_vclezs_f32: 04232 Ops.push_back(EmitScalarExpr(E->getArg(0))); 04233 return EmitAArch64CompareBuiltinExpr( 04234 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OLE, 04235 ICmpInst::ICMP_SLE, "vclez"); 04236 case NEON::BI__builtin_neon_vcgtzd_s64: 04237 case NEON::BI__builtin_neon_vcgtzd_f64: 04238 case NEON::BI__builtin_neon_vcgtzs_f32: 04239 Ops.push_back(EmitScalarExpr(E->getArg(0))); 04240 return EmitAArch64CompareBuiltinExpr( 04241 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OGT, 04242 ICmpInst::ICMP_SGT, "vcgtz"); 04243 case NEON::BI__builtin_neon_vcltzd_s64: 04244 case NEON::BI__builtin_neon_vcltzd_f64: 04245 case NEON::BI__builtin_neon_vcltzs_f32: 04246 Ops.push_back(EmitScalarExpr(E->getArg(0))); 04247 return EmitAArch64CompareBuiltinExpr( 04248 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OLT, 04249 ICmpInst::ICMP_SLT, "vcltz"); 04250 04251 case NEON::BI__builtin_neon_vceqzd_u64: { 04252 llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext()); 04253 Ops.push_back(EmitScalarExpr(E->getArg(0))); 04254 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 04255 Ops[0] = Builder.CreateICmp(llvm::ICmpInst::ICMP_EQ, Ops[0], 04256 llvm::Constant::getNullValue(Ty)); 04257 return Builder.CreateSExt(Ops[0], Ty, "vceqzd"); 04258 } 04259 case NEON::BI__builtin_neon_vceqd_f64: 04260 case NEON::BI__builtin_neon_vcled_f64: 04261 case NEON::BI__builtin_neon_vcltd_f64: 04262 case NEON::BI__builtin_neon_vcged_f64: 04263 case NEON::BI__builtin_neon_vcgtd_f64: { 04264 llvm::CmpInst::Predicate P; 04265 switch (BuiltinID) { 04266 default: llvm_unreachable("missing builtin ID in switch!"); 04267 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 04268 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 04269 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 04270 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 04271 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 04272 } 04273 Ops.push_back(EmitScalarExpr(E->getArg(1))); 04274 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 04275 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 04276 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 04277 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 04278 } 04279 case NEON::BI__builtin_neon_vceqs_f32: 04280 case NEON::BI__builtin_neon_vcles_f32: 04281 case NEON::BI__builtin_neon_vclts_f32: 04282 case NEON::BI__builtin_neon_vcges_f32: 04283 case NEON::BI__builtin_neon_vcgts_f32: { 04284 llvm::CmpInst::Predicate P; 04285 switch (BuiltinID) { 04286 default: llvm_unreachable("missing builtin ID in switch!"); 04287 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 04288 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 04289 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 04290 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 04291 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 04292 } 04293 Ops.push_back(EmitScalarExpr(E->getArg(1))); 04294 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 04295 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 04296 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 04297 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 04298 } 04299 case NEON::BI__builtin_neon_vceqd_s64: 04300 case NEON::BI__builtin_neon_vceqd_u64: 04301 case NEON::BI__builtin_neon_vcgtd_s64: 04302 case NEON::BI__builtin_neon_vcgtd_u64: 04303 case NEON::BI__builtin_neon_vcltd_s64: 04304 case NEON::BI__builtin_neon_vcltd_u64: 04305 case NEON::BI__builtin_neon_vcged_u64: 04306 case NEON::BI__builtin_neon_vcged_s64: 04307 case NEON::BI__builtin_neon_vcled_u64: 04308 case NEON::BI__builtin_neon_vcled_s64: { 04309 llvm::CmpInst::Predicate P; 04310 switch (BuiltinID) { 04311 default: llvm_unreachable("missing builtin ID in switch!"); 04312 case NEON::BI__builtin_neon_vceqd_s64: 04313 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 04314 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 04315 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 04316 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 04317 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 04318 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 04319 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 04320 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 04321 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 04322 } 04323 Ops.push_back(EmitScalarExpr(E->getArg(1))); 04324 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 04325 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 04326 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 04327 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 04328 } 04329 case NEON::BI__builtin_neon_vtstd_s64: 04330 case NEON::BI__builtin_neon_vtstd_u64: { 04331 llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext()); 04332 Ops.push_back(EmitScalarExpr(E->getArg(1))); 04333 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 04334 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 04335 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 04336 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 04337 llvm::Constant::getNullValue(Ty)); 04338 return Builder.CreateSExt(Ops[0], Ty, "vtstd"); 04339 } 04340 case NEON::BI__builtin_neon_vset_lane_i8: 04341 case NEON::BI__builtin_neon_vset_lane_i16: 04342 case NEON::BI__builtin_neon_vset_lane_i32: 04343 case NEON::BI__builtin_neon_vset_lane_i64: 04344 case NEON::BI__builtin_neon_vset_lane_f32: 04345 case NEON::BI__builtin_neon_vsetq_lane_i8: 04346 case NEON::BI__builtin_neon_vsetq_lane_i16: 04347 case NEON::BI__builtin_neon_vsetq_lane_i32: 04348 case NEON::BI__builtin_neon_vsetq_lane_i64: 04349 case NEON::BI__builtin_neon_vsetq_lane_f32: 04350 Ops.push_back(EmitScalarExpr(E->getArg(2))); 04351 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 04352 case NEON::BI__builtin_neon_vset_lane_f64: 04353 // The vector type needs a cast for the v1f64 variant. 04354 Ops[1] = Builder.CreateBitCast(Ops[1], 04355 llvm::VectorType::get(DoubleTy, 1)); 04356 Ops.push_back(EmitScalarExpr(E->getArg(2))); 04357 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 04358 case NEON::BI__builtin_neon_vsetq_lane_f64: 04359 // The vector type needs a cast for the v2f64 variant. 04360 Ops[1] = Builder.CreateBitCast(Ops[1], 04361 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2)); 04362 Ops.push_back(EmitScalarExpr(E->getArg(2))); 04363 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 04364 04365 case NEON::BI__builtin_neon_vget_lane_i8: 04366 case NEON::BI__builtin_neon_vdupb_lane_i8: 04367 Ops[0] = Builder.CreateBitCast(Ops[0], 04368 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8)); 04369 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 04370 "vget_lane"); 04371 case NEON::BI__builtin_neon_vgetq_lane_i8: 04372 case NEON::BI__builtin_neon_vdupb_laneq_i8: 04373 Ops[0] = Builder.CreateBitCast(Ops[0], 04374 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16)); 04375 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 04376 "vgetq_lane"); 04377 case NEON::BI__builtin_neon_vget_lane_i16: 04378 case NEON::BI__builtin_neon_vduph_lane_i16: 04379 Ops[0] = Builder.CreateBitCast(Ops[0], 04380 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4)); 04381 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 04382 "vget_lane"); 04383 case NEON::BI__builtin_neon_vgetq_lane_i16: 04384 case NEON::BI__builtin_neon_vduph_laneq_i16: 04385 Ops[0] = Builder.CreateBitCast(Ops[0], 04386 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8)); 04387 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 04388 "vgetq_lane"); 04389 case NEON::BI__builtin_neon_vget_lane_i32: 04390 case NEON::BI__builtin_neon_vdups_lane_i32: 04391 Ops[0] = Builder.CreateBitCast( 04392 Ops[0], 04393 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 2)); 04394 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 04395 "vget_lane"); 04396 case NEON::BI__builtin_neon_vdups_lane_f32: 04397 Ops[0] = Builder.CreateBitCast(Ops[0], 04398 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2)); 04399 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 04400 "vdups_lane"); 04401 case NEON::BI__builtin_neon_vgetq_lane_i32: 04402 case NEON::BI__builtin_neon_vdups_laneq_i32: 04403 Ops[0] = Builder.CreateBitCast(Ops[0], 04404 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 4)); 04405 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 04406 "vgetq_lane"); 04407 case NEON::BI__builtin_neon_vget_lane_i64: 04408 case NEON::BI__builtin_neon_vdupd_lane_i64: 04409 Ops[0] = Builder.CreateBitCast(Ops[0], 04410 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 1)); 04411 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 04412 "vget_lane"); 04413 case NEON::BI__builtin_neon_vdupd_lane_f64: 04414 Ops[0] = Builder.CreateBitCast(Ops[0], 04415 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1)); 04416 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 04417 "vdupd_lane"); 04418 case NEON::BI__builtin_neon_vgetq_lane_i64: 04419 case NEON::BI__builtin_neon_vdupd_laneq_i64: 04420 Ops[0] = Builder.CreateBitCast(Ops[0], 04421 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 2)); 04422 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 04423 "vgetq_lane"); 04424 case NEON::BI__builtin_neon_vget_lane_f32: 04425 Ops[0] = Builder.CreateBitCast(Ops[0], 04426 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2)); 04427 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 04428 "vget_lane"); 04429 case NEON::BI__builtin_neon_vget_lane_f64: 04430 Ops[0] = Builder.CreateBitCast(Ops[0], 04431 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1)); 04432 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 04433 "vget_lane"); 04434 case NEON::BI__builtin_neon_vgetq_lane_f32: 04435 case NEON::BI__builtin_neon_vdups_laneq_f32: 04436 Ops[0] = Builder.CreateBitCast(Ops[0], 04437 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 4)); 04438 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 04439 "vgetq_lane"); 04440 case NEON::BI__builtin_neon_vgetq_lane_f64: 04441 case NEON::BI__builtin_neon_vdupd_laneq_f64: 04442 Ops[0] = Builder.CreateBitCast(Ops[0], 04443 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2)); 04444 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 04445 "vgetq_lane"); 04446 case NEON::BI__builtin_neon_vaddd_s64: 04447 case NEON::BI__builtin_neon_vaddd_u64: 04448 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 04449 case NEON::BI__builtin_neon_vsubd_s64: 04450 case NEON::BI__builtin_neon_vsubd_u64: 04451 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 04452 case NEON::BI__builtin_neon_vqdmlalh_s16: 04453 case NEON::BI__builtin_neon_vqdmlslh_s16: { 04454 SmallVector<Value *, 2> ProductOps; 04455 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 04456 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 04457 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 04458 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 04459 ProductOps, "vqdmlXl"); 04460 Constant *CI = ConstantInt::get(SizeTy, 0); 04461 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 04462 04463 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 04464 ? Intrinsic::aarch64_neon_sqadd 04465 : Intrinsic::aarch64_neon_sqsub; 04466 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 04467 } 04468 case NEON::BI__builtin_neon_vqshlud_n_s64: { 04469 Ops.push_back(EmitScalarExpr(E->getArg(1))); 04470 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 04471 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 04472 Ops, "vqshlu_n"); 04473 } 04474 case NEON::BI__builtin_neon_vqshld_n_u64: 04475 case NEON::BI__builtin_neon_vqshld_n_s64: { 04476 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 04477 ? Intrinsic::aarch64_neon_uqshl 04478 : Intrinsic::aarch64_neon_sqshl; 04479 Ops.push_back(EmitScalarExpr(E->getArg(1))); 04480 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 04481 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 04482 } 04483 case NEON::BI__builtin_neon_vrshrd_n_u64: 04484 case NEON::BI__builtin_neon_vrshrd_n_s64: { 04485 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 04486 ? Intrinsic::aarch64_neon_urshl 04487 : Intrinsic::aarch64_neon_srshl; 04488 Ops.push_back(EmitScalarExpr(E->getArg(1))); 04489 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 04490 Ops[1] = ConstantInt::get(Int64Ty, -SV); 04491 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 04492 } 04493 case NEON::BI__builtin_neon_vrsrad_n_u64: 04494 case NEON::BI__builtin_neon_vrsrad_n_s64: { 04495 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 04496 ? Intrinsic::aarch64_neon_urshl 04497 : Intrinsic::aarch64_neon_srshl; 04498 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 04499 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 04500 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Int64Ty), Ops[1], 04501 Builder.CreateSExt(Ops[2], Int64Ty)); 04502 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 04503 } 04504 case NEON::BI__builtin_neon_vshld_n_s64: 04505 case NEON::BI__builtin_neon_vshld_n_u64: { 04506 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 04507 return Builder.CreateShl( 04508 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 04509 } 04510 case NEON::BI__builtin_neon_vshrd_n_s64: { 04511 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 04512 return Builder.CreateAShr( 04513 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 04514 Amt->getZExtValue())), 04515 "shrd_n"); 04516 } 04517 case NEON::BI__builtin_neon_vshrd_n_u64: { 04518 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 04519 uint64_t ShiftAmt = Amt->getZExtValue(); 04520 // Right-shifting an unsigned value by its size yields 0. 04521 if (ShiftAmt == 64) 04522 return ConstantInt::get(Int64Ty, 0); 04523 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 04524 "shrd_n"); 04525 } 04526 case NEON::BI__builtin_neon_vsrad_n_s64: { 04527 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 04528 Ops[1] = Builder.CreateAShr( 04529 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 04530 Amt->getZExtValue())), 04531 "shrd_n"); 04532 return Builder.CreateAdd(Ops[0], Ops[1]); 04533 } 04534 case NEON::BI__builtin_neon_vsrad_n_u64: { 04535 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 04536 uint64_t ShiftAmt = Amt->getZExtValue(); 04537 // Right-shifting an unsigned value by its size yields 0. 04538 // As Op + 0 = Op, return Ops[0] directly. 04539 if (ShiftAmt == 64) 04540 return Ops[0]; 04541 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 04542 "shrd_n"); 04543 return Builder.CreateAdd(Ops[0], Ops[1]); 04544 } 04545 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 04546 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 04547 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 04548 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 04549 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 04550 "lane"); 04551 SmallVector<Value *, 2> ProductOps; 04552 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 04553 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 04554 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 04555 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 04556 ProductOps, "vqdmlXl"); 04557 Constant *CI = ConstantInt::get(SizeTy, 0); 04558 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 04559 Ops.pop_back(); 04560 04561 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 04562 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 04563 ? Intrinsic::aarch64_neon_sqadd 04564 : Intrinsic::aarch64_neon_sqsub; 04565 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 04566 } 04567 case NEON::BI__builtin_neon_vqdmlals_s32: 04568 case NEON::BI__builtin_neon_vqdmlsls_s32: { 04569 SmallVector<Value *, 2> ProductOps; 04570 ProductOps.push_back(Ops[1]); 04571 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 04572 Ops[1] = 04573 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 04574 ProductOps, "vqdmlXl"); 04575 04576 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 04577 ? Intrinsic::aarch64_neon_sqadd 04578 : Intrinsic::aarch64_neon_sqsub; 04579 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 04580 } 04581 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 04582 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 04583 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 04584 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 04585 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 04586 "lane"); 04587 SmallVector<Value *, 2> ProductOps; 04588 ProductOps.push_back(Ops[1]); 04589 ProductOps.push_back(Ops[2]); 04590 Ops[1] = 04591 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 04592 ProductOps, "vqdmlXl"); 04593 Ops.pop_back(); 04594 04595 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 04596 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 04597 ? Intrinsic::aarch64_neon_sqadd 04598 : Intrinsic::aarch64_neon_sqsub; 04599 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 04600 } 04601 } 04602 04603 llvm::VectorType *VTy = GetNeonType(this, Type); 04604 llvm::Type *Ty = VTy; 04605 if (!Ty) 04606 return nullptr; 04607 04608 // Not all intrinsics handled by the common case work for AArch64 yet, so only 04609 // defer to common code if it's been added to our special map. 04610 Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 04611 AArch64SIMDIntrinsicsProvenSorted); 04612 04613 if (Builtin) 04614 return EmitCommonNeonBuiltinExpr( 04615 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 04616 Builtin->NameHint, Builtin->TypeModifier, E, Ops, nullptr); 04617 04618 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops)) 04619 return V; 04620 04621 unsigned Int; 04622 switch (BuiltinID) { 04623 default: return nullptr; 04624 case NEON::BI__builtin_neon_vbsl_v: 04625 case NEON::BI__builtin_neon_vbslq_v: { 04626 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 04627 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 04628 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 04629 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 04630 04631 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 04632 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 04633 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 04634 return Builder.CreateBitCast(Ops[0], Ty); 04635 } 04636 case NEON::BI__builtin_neon_vfma_lane_v: 04637 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 04638 // The ARM builtins (and instructions) have the addend as the first 04639 // operand, but the 'fma' intrinsics have it last. Swap it around here. 04640 Value *Addend = Ops[0]; 04641 Value *Multiplicand = Ops[1]; 04642 Value *LaneSource = Ops[2]; 04643 Ops[0] = Multiplicand; 04644 Ops[1] = LaneSource; 04645 Ops[2] = Addend; 04646 04647 // Now adjust things to handle the lane access. 04648 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 04649 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 04650 VTy; 04651 llvm::Constant *cst = cast<Constant>(Ops[3]); 04652 Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); 04653 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 04654 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 04655 04656 Ops.pop_back(); 04657 Int = Intrinsic::fma; 04658 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 04659 } 04660 case NEON::BI__builtin_neon_vfma_laneq_v: { 04661 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 04662 // v1f64 fma should be mapped to Neon scalar f64 fma 04663 if (VTy && VTy->getElementType() == DoubleTy) { 04664 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 04665 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 04666 llvm::Type *VTy = GetNeonType(this, 04667 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 04668 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 04669 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 04670 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 04671 Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 04672 return Builder.CreateBitCast(Result, Ty); 04673 } 04674 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 04675 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 04676 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 04677 04678 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 04679 VTy->getNumElements() * 2); 04680 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 04681 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 04682 cast<ConstantInt>(Ops[3])); 04683 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 04684 04685 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 04686 } 04687 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 04688 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 04689 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 04690 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 04691 04692 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 04693 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 04694 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 04695 } 04696 case NEON::BI__builtin_neon_vfmas_lane_f32: 04697 case NEON::BI__builtin_neon_vfmas_laneq_f32: 04698 case NEON::BI__builtin_neon_vfmad_lane_f64: 04699 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 04700 Ops.push_back(EmitScalarExpr(E->getArg(3))); 04701 llvm::Type *Ty = ConvertType(E->getCallReturnType()); 04702 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 04703 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 04704 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 04705 } 04706 case NEON::BI__builtin_neon_vfms_v: 04707 case NEON::BI__builtin_neon_vfmsq_v: { // Only used for FP types 04708 // FIXME: probably remove when we no longer support aarch64_simd.h 04709 // (arm_neon.h delegates to vfma). 04710 04711 // The ARM builtins (and instructions) have the addend as the first 04712 // operand, but the 'fma' intrinsics have it last. Swap it around here. 04713 Value *Subtrahend = Ops[0]; 04714 Value *Multiplicand = Ops[2]; 04715 Ops[0] = Multiplicand; 04716 Ops[2] = Subtrahend; 04717 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 04718 Ops[1] = Builder.CreateFNeg(Ops[1]); 04719 Int = Intrinsic::fma; 04720 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls"); 04721 } 04722 case NEON::BI__builtin_neon_vmull_v: 04723 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 04724 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 04725 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 04726 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 04727 case NEON::BI__builtin_neon_vmax_v: 04728 case NEON::BI__builtin_neon_vmaxq_v: 04729 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 04730 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 04731 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 04732 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 04733 case NEON::BI__builtin_neon_vmin_v: 04734 case NEON::BI__builtin_neon_vminq_v: 04735 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 04736 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 04737 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 04738 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 04739 case NEON::BI__builtin_neon_vabd_v: 04740 case NEON::BI__builtin_neon_vabdq_v: 04741 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 04742 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 04743 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 04744 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 04745 case NEON::BI__builtin_neon_vpadal_v: 04746 case NEON::BI__builtin_neon_vpadalq_v: { 04747 unsigned ArgElts = VTy->getNumElements(); 04748 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 04749 unsigned BitWidth = EltTy->getBitWidth(); 04750 llvm::Type *ArgTy = llvm::VectorType::get( 04751 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 04752 llvm::Type* Tys[2] = { VTy, ArgTy }; 04753 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 04754 SmallVector<llvm::Value*, 1> TmpOps; 04755 TmpOps.push_back(Ops[1]); 04756 Function *F = CGM.getIntrinsic(Int, Tys); 04757 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 04758 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 04759 return Builder.CreateAdd(tmp, addend); 04760 } 04761 case NEON::BI__builtin_neon_vpmin_v: 04762 case NEON::BI__builtin_neon_vpminq_v: 04763 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 04764 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 04765 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 04766 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 04767 case NEON::BI__builtin_neon_vpmax_v: 04768 case NEON::BI__builtin_neon_vpmaxq_v: 04769 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 04770 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 04771 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 04772 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 04773 case NEON::BI__builtin_neon_vminnm_v: 04774 case NEON::BI__builtin_neon_vminnmq_v: 04775 Int = Intrinsic::aarch64_neon_fminnm; 04776 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 04777 case NEON::BI__builtin_neon_vmaxnm_v: 04778 case NEON::BI__builtin_neon_vmaxnmq_v: 04779 Int = Intrinsic::aarch64_neon_fmaxnm; 04780 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 04781 case NEON::BI__builtin_neon_vrecpss_f32: { 04782 llvm::Type *f32Type = llvm::Type::getFloatTy(getLLVMContext()); 04783 Ops.push_back(EmitScalarExpr(E->getArg(1))); 04784 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f32Type), 04785 Ops, "vrecps"); 04786 } 04787 case NEON::BI__builtin_neon_vrecpsd_f64: { 04788 llvm::Type *f64Type = llvm::Type::getDoubleTy(getLLVMContext()); 04789 Ops.push_back(EmitScalarExpr(E->getArg(1))); 04790 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f64Type), 04791 Ops, "vrecps"); 04792 } 04793 case NEON::BI__builtin_neon_vqshrun_n_v: 04794 Int = Intrinsic::aarch64_neon_sqshrun; 04795 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 04796 case NEON::BI__builtin_neon_vqrshrun_n_v: 04797 Int = Intrinsic::aarch64_neon_sqrshrun; 04798 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 04799 case NEON::BI__builtin_neon_vqshrn_n_v: 04800 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 04801 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 04802 case NEON::BI__builtin_neon_vrshrn_n_v: 04803 Int = Intrinsic::aarch64_neon_rshrn; 04804 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 04805 case NEON::BI__builtin_neon_vqrshrn_n_v: 04806 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 04807 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 04808 case NEON::BI__builtin_neon_vrnda_v: 04809 case NEON::BI__builtin_neon_vrndaq_v: { 04810 Int = Intrinsic::round; 04811 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 04812 } 04813 case NEON::BI__builtin_neon_vrndi_v: 04814 case NEON::BI__builtin_neon_vrndiq_v: { 04815 Int = Intrinsic::nearbyint; 04816 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 04817 } 04818 case NEON::BI__builtin_neon_vrndm_v: 04819 case NEON::BI__builtin_neon_vrndmq_v: { 04820 Int = Intrinsic::floor; 04821 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 04822 } 04823 case NEON::BI__builtin_neon_vrndn_v: 04824 case NEON::BI__builtin_neon_vrndnq_v: { 04825 Int = Intrinsic::aarch64_neon_frintn; 04826 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 04827 } 04828 case NEON::BI__builtin_neon_vrndp_v: 04829 case NEON::BI__builtin_neon_vrndpq_v: { 04830 Int = Intrinsic::ceil; 04831 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 04832 } 04833 case NEON::BI__builtin_neon_vrndx_v: 04834 case NEON::BI__builtin_neon_vrndxq_v: { 04835 Int = Intrinsic::rint; 04836 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 04837 } 04838 case NEON::BI__builtin_neon_vrnd_v: 04839 case NEON::BI__builtin_neon_vrndq_v: { 04840 Int = Intrinsic::trunc; 04841 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 04842 } 04843 case NEON::BI__builtin_neon_vceqz_v: 04844 case NEON::BI__builtin_neon_vceqzq_v: 04845 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 04846 ICmpInst::ICMP_EQ, "vceqz"); 04847 case NEON::BI__builtin_neon_vcgez_v: 04848 case NEON::BI__builtin_neon_vcgezq_v: 04849 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 04850 ICmpInst::ICMP_SGE, "vcgez"); 04851 case NEON::BI__builtin_neon_vclez_v: 04852 case NEON::BI__builtin_neon_vclezq_v: 04853 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 04854 ICmpInst::ICMP_SLE, "vclez"); 04855 case NEON::BI__builtin_neon_vcgtz_v: 04856 case NEON::BI__builtin_neon_vcgtzq_v: 04857 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 04858 ICmpInst::ICMP_SGT, "vcgtz"); 04859 case NEON::BI__builtin_neon_vcltz_v: 04860 case NEON::BI__builtin_neon_vcltzq_v: 04861 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 04862 ICmpInst::ICMP_SLT, "vcltz"); 04863 case NEON::BI__builtin_neon_vcvt_f64_v: 04864 case NEON::BI__builtin_neon_vcvtq_f64_v: 04865 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 04866 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 04867 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 04868 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 04869 case NEON::BI__builtin_neon_vcvt_f64_f32: { 04870 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 04871 "unexpected vcvt_f64_f32 builtin"); 04872 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 04873 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 04874 04875 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 04876 } 04877 case NEON::BI__builtin_neon_vcvt_f32_f64: { 04878 assert(Type.getEltType() == NeonTypeFlags::Float32 && 04879 "unexpected vcvt_f32_f64 builtin"); 04880 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 04881 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 04882 04883 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 04884 } 04885 case NEON::BI__builtin_neon_vcvt_s32_v: 04886 case NEON::BI__builtin_neon_vcvt_u32_v: 04887 case NEON::BI__builtin_neon_vcvt_s64_v: 04888 case NEON::BI__builtin_neon_vcvt_u64_v: 04889 case NEON::BI__builtin_neon_vcvtq_s32_v: 04890 case NEON::BI__builtin_neon_vcvtq_u32_v: 04891 case NEON::BI__builtin_neon_vcvtq_s64_v: 04892 case NEON::BI__builtin_neon_vcvtq_u64_v: { 04893 bool Double = 04894 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 04895 llvm::Type *InTy = 04896 GetNeonType(this, 04897 NeonTypeFlags(Double ? NeonTypeFlags::Float64 04898 : NeonTypeFlags::Float32, false, quad)); 04899 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 04900 if (usgn) 04901 return Builder.CreateFPToUI(Ops[0], Ty); 04902 return Builder.CreateFPToSI(Ops[0], Ty); 04903 } 04904 case NEON::BI__builtin_neon_vcvta_s32_v: 04905 case NEON::BI__builtin_neon_vcvtaq_s32_v: 04906 case NEON::BI__builtin_neon_vcvta_u32_v: 04907 case NEON::BI__builtin_neon_vcvtaq_u32_v: 04908 case NEON::BI__builtin_neon_vcvta_s64_v: 04909 case NEON::BI__builtin_neon_vcvtaq_s64_v: 04910 case NEON::BI__builtin_neon_vcvta_u64_v: 04911 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 04912 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 04913 bool Double = 04914 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 04915 llvm::Type *InTy = 04916 GetNeonType(this, 04917 NeonTypeFlags(Double ? NeonTypeFlags::Float64 04918 : NeonTypeFlags::Float32, false, quad)); 04919 llvm::Type *Tys[2] = { Ty, InTy }; 04920 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 04921 } 04922 case NEON::BI__builtin_neon_vcvtm_s32_v: 04923 case NEON::BI__builtin_neon_vcvtmq_s32_v: 04924 case NEON::BI__builtin_neon_vcvtm_u32_v: 04925 case NEON::BI__builtin_neon_vcvtmq_u32_v: 04926 case NEON::BI__builtin_neon_vcvtm_s64_v: 04927 case NEON::BI__builtin_neon_vcvtmq_s64_v: 04928 case NEON::BI__builtin_neon_vcvtm_u64_v: 04929 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 04930 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 04931 bool Double = 04932 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 04933 llvm::Type *InTy = 04934 GetNeonType(this, 04935 NeonTypeFlags(Double ? NeonTypeFlags::Float64 04936 : NeonTypeFlags::Float32, false, quad)); 04937 llvm::Type *Tys[2] = { Ty, InTy }; 04938 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 04939 } 04940 case NEON::BI__builtin_neon_vcvtn_s32_v: 04941 case NEON::BI__builtin_neon_vcvtnq_s32_v: 04942 case NEON::BI__builtin_neon_vcvtn_u32_v: 04943 case NEON::BI__builtin_neon_vcvtnq_u32_v: 04944 case NEON::BI__builtin_neon_vcvtn_s64_v: 04945 case NEON::BI__builtin_neon_vcvtnq_s64_v: 04946 case NEON::BI__builtin_neon_vcvtn_u64_v: 04947 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 04948 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 04949 bool Double = 04950 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 04951 llvm::Type *InTy = 04952 GetNeonType(this, 04953 NeonTypeFlags(Double ? NeonTypeFlags::Float64 04954 : NeonTypeFlags::Float32, false, quad)); 04955 llvm::Type *Tys[2] = { Ty, InTy }; 04956 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 04957 } 04958 case NEON::BI__builtin_neon_vcvtp_s32_v: 04959 case NEON::BI__builtin_neon_vcvtpq_s32_v: 04960 case NEON::BI__builtin_neon_vcvtp_u32_v: 04961 case NEON::BI__builtin_neon_vcvtpq_u32_v: 04962 case NEON::BI__builtin_neon_vcvtp_s64_v: 04963 case NEON::BI__builtin_neon_vcvtpq_s64_v: 04964 case NEON::BI__builtin_neon_vcvtp_u64_v: 04965 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 04966 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 04967 bool Double = 04968 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 04969 llvm::Type *InTy = 04970 GetNeonType(this, 04971 NeonTypeFlags(Double ? NeonTypeFlags::Float64 04972 : NeonTypeFlags::Float32, false, quad)); 04973 llvm::Type *Tys[2] = { Ty, InTy }; 04974 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 04975 } 04976 case NEON::BI__builtin_neon_vmulx_v: 04977 case NEON::BI__builtin_neon_vmulxq_v: { 04978 Int = Intrinsic::aarch64_neon_fmulx; 04979 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 04980 } 04981 case NEON::BI__builtin_neon_vmul_lane_v: 04982 case NEON::BI__builtin_neon_vmul_laneq_v: { 04983 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 04984 bool Quad = false; 04985 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 04986 Quad = true; 04987 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 04988 llvm::Type *VTy = GetNeonType(this, 04989 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 04990 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 04991 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 04992 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 04993 return Builder.CreateBitCast(Result, Ty); 04994 } 04995 case NEON::BI__builtin_neon_vnegd_s64: 04996 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 04997 case NEON::BI__builtin_neon_vpmaxnm_v: 04998 case NEON::BI__builtin_neon_vpmaxnmq_v: { 04999 Int = Intrinsic::aarch64_neon_fmaxnmp; 05000 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 05001 } 05002 case NEON::BI__builtin_neon_vpminnm_v: 05003 case NEON::BI__builtin_neon_vpminnmq_v: { 05004 Int = Intrinsic::aarch64_neon_fminnmp; 05005 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 05006 } 05007 case NEON::BI__builtin_neon_vsqrt_v: 05008 case NEON::BI__builtin_neon_vsqrtq_v: { 05009 Int = Intrinsic::sqrt; 05010 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 05011 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 05012 } 05013 case NEON::BI__builtin_neon_vrbit_v: 05014 case NEON::BI__builtin_neon_vrbitq_v: { 05015 Int = Intrinsic::aarch64_neon_rbit; 05016 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 05017 } 05018 case NEON::BI__builtin_neon_vaddv_u8: 05019 // FIXME: These are handled by the AArch64 scalar code. 05020 usgn = true; 05021 // FALLTHROUGH 05022 case NEON::BI__builtin_neon_vaddv_s8: { 05023 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 05024 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05025 VTy = 05026 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 05027 llvm::Type *Tys[2] = { Ty, VTy }; 05028 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05029 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 05030 return Builder.CreateTrunc(Ops[0], 05031 llvm::IntegerType::get(getLLVMContext(), 8)); 05032 } 05033 case NEON::BI__builtin_neon_vaddv_u16: 05034 usgn = true; 05035 // FALLTHROUGH 05036 case NEON::BI__builtin_neon_vaddv_s16: { 05037 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 05038 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05039 VTy = 05040 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 05041 llvm::Type *Tys[2] = { Ty, VTy }; 05042 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05043 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 05044 return Builder.CreateTrunc(Ops[0], 05045 llvm::IntegerType::get(getLLVMContext(), 16)); 05046 } 05047 case NEON::BI__builtin_neon_vaddvq_u8: 05048 usgn = true; 05049 // FALLTHROUGH 05050 case NEON::BI__builtin_neon_vaddvq_s8: { 05051 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 05052 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05053 VTy = 05054 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 05055 llvm::Type *Tys[2] = { Ty, VTy }; 05056 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05057 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 05058 return Builder.CreateTrunc(Ops[0], 05059 llvm::IntegerType::get(getLLVMContext(), 8)); 05060 } 05061 case NEON::BI__builtin_neon_vaddvq_u16: 05062 usgn = true; 05063 // FALLTHROUGH 05064 case NEON::BI__builtin_neon_vaddvq_s16: { 05065 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 05066 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05067 VTy = 05068 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 05069 llvm::Type *Tys[2] = { Ty, VTy }; 05070 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05071 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 05072 return Builder.CreateTrunc(Ops[0], 05073 llvm::IntegerType::get(getLLVMContext(), 16)); 05074 } 05075 case NEON::BI__builtin_neon_vmaxv_u8: { 05076 Int = Intrinsic::aarch64_neon_umaxv; 05077 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05078 VTy = 05079 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 05080 llvm::Type *Tys[2] = { Ty, VTy }; 05081 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05082 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 05083 return Builder.CreateTrunc(Ops[0], 05084 llvm::IntegerType::get(getLLVMContext(), 8)); 05085 } 05086 case NEON::BI__builtin_neon_vmaxv_u16: { 05087 Int = Intrinsic::aarch64_neon_umaxv; 05088 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05089 VTy = 05090 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 05091 llvm::Type *Tys[2] = { Ty, VTy }; 05092 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05093 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 05094 return Builder.CreateTrunc(Ops[0], 05095 llvm::IntegerType::get(getLLVMContext(), 16)); 05096 } 05097 case NEON::BI__builtin_neon_vmaxvq_u8: { 05098 Int = Intrinsic::aarch64_neon_umaxv; 05099 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05100 VTy = 05101 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 05102 llvm::Type *Tys[2] = { Ty, VTy }; 05103 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05104 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 05105 return Builder.CreateTrunc(Ops[0], 05106 llvm::IntegerType::get(getLLVMContext(), 8)); 05107 } 05108 case NEON::BI__builtin_neon_vmaxvq_u16: { 05109 Int = Intrinsic::aarch64_neon_umaxv; 05110 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05111 VTy = 05112 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 05113 llvm::Type *Tys[2] = { Ty, VTy }; 05114 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05115 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 05116 return Builder.CreateTrunc(Ops[0], 05117 llvm::IntegerType::get(getLLVMContext(), 16)); 05118 } 05119 case NEON::BI__builtin_neon_vmaxv_s8: { 05120 Int = Intrinsic::aarch64_neon_smaxv; 05121 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05122 VTy = 05123 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 05124 llvm::Type *Tys[2] = { Ty, VTy }; 05125 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05126 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 05127 return Builder.CreateTrunc(Ops[0], 05128 llvm::IntegerType::get(getLLVMContext(), 8)); 05129 } 05130 case NEON::BI__builtin_neon_vmaxv_s16: { 05131 Int = Intrinsic::aarch64_neon_smaxv; 05132 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05133 VTy = 05134 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 05135 llvm::Type *Tys[2] = { Ty, VTy }; 05136 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05137 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 05138 return Builder.CreateTrunc(Ops[0], 05139 llvm::IntegerType::get(getLLVMContext(), 16)); 05140 } 05141 case NEON::BI__builtin_neon_vmaxvq_s8: { 05142 Int = Intrinsic::aarch64_neon_smaxv; 05143 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05144 VTy = 05145 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 05146 llvm::Type *Tys[2] = { Ty, VTy }; 05147 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05148 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 05149 return Builder.CreateTrunc(Ops[0], 05150 llvm::IntegerType::get(getLLVMContext(), 8)); 05151 } 05152 case NEON::BI__builtin_neon_vmaxvq_s16: { 05153 Int = Intrinsic::aarch64_neon_smaxv; 05154 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05155 VTy = 05156 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 05157 llvm::Type *Tys[2] = { Ty, VTy }; 05158 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05159 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 05160 return Builder.CreateTrunc(Ops[0], 05161 llvm::IntegerType::get(getLLVMContext(), 16)); 05162 } 05163 case NEON::BI__builtin_neon_vminv_u8: { 05164 Int = Intrinsic::aarch64_neon_uminv; 05165 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05166 VTy = 05167 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 05168 llvm::Type *Tys[2] = { Ty, VTy }; 05169 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05170 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 05171 return Builder.CreateTrunc(Ops[0], 05172 llvm::IntegerType::get(getLLVMContext(), 8)); 05173 } 05174 case NEON::BI__builtin_neon_vminv_u16: { 05175 Int = Intrinsic::aarch64_neon_uminv; 05176 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05177 VTy = 05178 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 05179 llvm::Type *Tys[2] = { Ty, VTy }; 05180 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05181 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 05182 return Builder.CreateTrunc(Ops[0], 05183 llvm::IntegerType::get(getLLVMContext(), 16)); 05184 } 05185 case NEON::BI__builtin_neon_vminvq_u8: { 05186 Int = Intrinsic::aarch64_neon_uminv; 05187 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05188 VTy = 05189 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 05190 llvm::Type *Tys[2] = { Ty, VTy }; 05191 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05192 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 05193 return Builder.CreateTrunc(Ops[0], 05194 llvm::IntegerType::get(getLLVMContext(), 8)); 05195 } 05196 case NEON::BI__builtin_neon_vminvq_u16: { 05197 Int = Intrinsic::aarch64_neon_uminv; 05198 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05199 VTy = 05200 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 05201 llvm::Type *Tys[2] = { Ty, VTy }; 05202 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05203 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 05204 return Builder.CreateTrunc(Ops[0], 05205 llvm::IntegerType::get(getLLVMContext(), 16)); 05206 } 05207 case NEON::BI__builtin_neon_vminv_s8: { 05208 Int = Intrinsic::aarch64_neon_sminv; 05209 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05210 VTy = 05211 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 05212 llvm::Type *Tys[2] = { Ty, VTy }; 05213 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05214 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 05215 return Builder.CreateTrunc(Ops[0], 05216 llvm::IntegerType::get(getLLVMContext(), 8)); 05217 } 05218 case NEON::BI__builtin_neon_vminv_s16: { 05219 Int = Intrinsic::aarch64_neon_sminv; 05220 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05221 VTy = 05222 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 05223 llvm::Type *Tys[2] = { Ty, VTy }; 05224 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05225 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 05226 return Builder.CreateTrunc(Ops[0], 05227 llvm::IntegerType::get(getLLVMContext(), 16)); 05228 } 05229 case NEON::BI__builtin_neon_vminvq_s8: { 05230 Int = Intrinsic::aarch64_neon_sminv; 05231 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05232 VTy = 05233 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 05234 llvm::Type *Tys[2] = { Ty, VTy }; 05235 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05236 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 05237 return Builder.CreateTrunc(Ops[0], 05238 llvm::IntegerType::get(getLLVMContext(), 8)); 05239 } 05240 case NEON::BI__builtin_neon_vminvq_s16: { 05241 Int = Intrinsic::aarch64_neon_sminv; 05242 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05243 VTy = 05244 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 05245 llvm::Type *Tys[2] = { Ty, VTy }; 05246 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05247 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 05248 return Builder.CreateTrunc(Ops[0], 05249 llvm::IntegerType::get(getLLVMContext(), 16)); 05250 } 05251 case NEON::BI__builtin_neon_vmul_n_f64: { 05252 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 05253 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 05254 return Builder.CreateFMul(Ops[0], RHS); 05255 } 05256 case NEON::BI__builtin_neon_vaddlv_u8: { 05257 Int = Intrinsic::aarch64_neon_uaddlv; 05258 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05259 VTy = 05260 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 05261 llvm::Type *Tys[2] = { Ty, VTy }; 05262 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05263 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 05264 return Builder.CreateTrunc(Ops[0], 05265 llvm::IntegerType::get(getLLVMContext(), 16)); 05266 } 05267 case NEON::BI__builtin_neon_vaddlv_u16: { 05268 Int = Intrinsic::aarch64_neon_uaddlv; 05269 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05270 VTy = 05271 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 05272 llvm::Type *Tys[2] = { Ty, VTy }; 05273 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05274 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 05275 } 05276 case NEON::BI__builtin_neon_vaddlvq_u8: { 05277 Int = Intrinsic::aarch64_neon_uaddlv; 05278 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05279 VTy = 05280 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 05281 llvm::Type *Tys[2] = { Ty, VTy }; 05282 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05283 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 05284 return Builder.CreateTrunc(Ops[0], 05285 llvm::IntegerType::get(getLLVMContext(), 16)); 05286 } 05287 case NEON::BI__builtin_neon_vaddlvq_u16: { 05288 Int = Intrinsic::aarch64_neon_uaddlv; 05289 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05290 VTy = 05291 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 05292 llvm::Type *Tys[2] = { Ty, VTy }; 05293 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05294 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 05295 } 05296 case NEON::BI__builtin_neon_vaddlv_s8: { 05297 Int = Intrinsic::aarch64_neon_saddlv; 05298 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05299 VTy = 05300 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 05301 llvm::Type *Tys[2] = { Ty, VTy }; 05302 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05303 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 05304 return Builder.CreateTrunc(Ops[0], 05305 llvm::IntegerType::get(getLLVMContext(), 16)); 05306 } 05307 case NEON::BI__builtin_neon_vaddlv_s16: { 05308 Int = Intrinsic::aarch64_neon_saddlv; 05309 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05310 VTy = 05311 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 05312 llvm::Type *Tys[2] = { Ty, VTy }; 05313 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05314 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 05315 } 05316 case NEON::BI__builtin_neon_vaddlvq_s8: { 05317 Int = Intrinsic::aarch64_neon_saddlv; 05318 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05319 VTy = 05320 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 05321 llvm::Type *Tys[2] = { Ty, VTy }; 05322 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05323 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 05324 return Builder.CreateTrunc(Ops[0], 05325 llvm::IntegerType::get(getLLVMContext(), 16)); 05326 } 05327 case NEON::BI__builtin_neon_vaddlvq_s16: { 05328 Int = Intrinsic::aarch64_neon_saddlv; 05329 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 05330 VTy = 05331 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 05332 llvm::Type *Tys[2] = { Ty, VTy }; 05333 Ops.push_back(EmitScalarExpr(E->getArg(0))); 05334 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 05335 } 05336 case NEON::BI__builtin_neon_vsri_n_v: 05337 case NEON::BI__builtin_neon_vsriq_n_v: { 05338 Int = Intrinsic::aarch64_neon_vsri; 05339 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 05340 return EmitNeonCall(Intrin, Ops, "vsri_n"); 05341 } 05342 case NEON::BI__builtin_neon_vsli_n_v: 05343 case NEON::BI__builtin_neon_vsliq_n_v: { 05344 Int = Intrinsic::aarch64_neon_vsli; 05345 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 05346 return EmitNeonCall(Intrin, Ops, "vsli_n"); 05347 } 05348 case NEON::BI__builtin_neon_vsra_n_v: 05349 case NEON::BI__builtin_neon_vsraq_n_v: 05350 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 05351 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 05352 return Builder.CreateAdd(Ops[0], Ops[1]); 05353 case NEON::BI__builtin_neon_vrsra_n_v: 05354 case NEON::BI__builtin_neon_vrsraq_n_v: { 05355 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 05356 SmallVector<llvm::Value*,2> TmpOps; 05357 TmpOps.push_back(Ops[1]); 05358 TmpOps.push_back(Ops[2]); 05359 Function* F = CGM.getIntrinsic(Int, Ty); 05360 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 05361 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 05362 return Builder.CreateAdd(Ops[0], tmp); 05363 } 05364 // FIXME: Sharing loads & stores with 32-bit is complicated by the absence 05365 // of an Align parameter here. 05366 case NEON::BI__builtin_neon_vld1_x2_v: 05367 case NEON::BI__builtin_neon_vld1q_x2_v: 05368 case NEON::BI__builtin_neon_vld1_x3_v: 05369 case NEON::BI__builtin_neon_vld1q_x3_v: 05370 case NEON::BI__builtin_neon_vld1_x4_v: 05371 case NEON::BI__builtin_neon_vld1q_x4_v: { 05372 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 05373 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 05374 llvm::Type *Tys[2] = { VTy, PTy }; 05375 unsigned Int; 05376 switch (BuiltinID) { 05377 case NEON::BI__builtin_neon_vld1_x2_v: 05378 case NEON::BI__builtin_neon_vld1q_x2_v: 05379 Int = Intrinsic::aarch64_neon_ld1x2; 05380 break; 05381 case NEON::BI__builtin_neon_vld1_x3_v: 05382 case NEON::BI__builtin_neon_vld1q_x3_v: 05383 Int = Intrinsic::aarch64_neon_ld1x3; 05384 break; 05385 case NEON::BI__builtin_neon_vld1_x4_v: 05386 case NEON::BI__builtin_neon_vld1q_x4_v: 05387 Int = Intrinsic::aarch64_neon_ld1x4; 05388 break; 05389 } 05390 Function *F = CGM.getIntrinsic(Int, Tys); 05391 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 05392 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 05393 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 05394 return Builder.CreateStore(Ops[1], Ops[0]); 05395 } 05396 case NEON::BI__builtin_neon_vst1_x2_v: 05397 case NEON::BI__builtin_neon_vst1q_x2_v: 05398 case NEON::BI__builtin_neon_vst1_x3_v: 05399 case NEON::BI__builtin_neon_vst1q_x3_v: 05400 case NEON::BI__builtin_neon_vst1_x4_v: 05401 case NEON::BI__builtin_neon_vst1q_x4_v: { 05402 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 05403 llvm::Type *Tys[2] = { VTy, PTy }; 05404 unsigned Int; 05405 switch (BuiltinID) { 05406 case NEON::BI__builtin_neon_vst1_x2_v: 05407 case NEON::BI__builtin_neon_vst1q_x2_v: 05408 Int = Intrinsic::aarch64_neon_st1x2; 05409 break; 05410 case NEON::BI__builtin_neon_vst1_x3_v: 05411 case NEON::BI__builtin_neon_vst1q_x3_v: 05412 Int = Intrinsic::aarch64_neon_st1x3; 05413 break; 05414 case NEON::BI__builtin_neon_vst1_x4_v: 05415 case NEON::BI__builtin_neon_vst1q_x4_v: 05416 Int = Intrinsic::aarch64_neon_st1x4; 05417 break; 05418 } 05419 SmallVector<Value *, 4> IntOps(Ops.begin()+1, Ops.end()); 05420 IntOps.push_back(Ops[0]); 05421 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), IntOps, ""); 05422 } 05423 case NEON::BI__builtin_neon_vld1_v: 05424 case NEON::BI__builtin_neon_vld1q_v: 05425 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 05426 return Builder.CreateLoad(Ops[0]); 05427 case NEON::BI__builtin_neon_vst1_v: 05428 case NEON::BI__builtin_neon_vst1q_v: 05429 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 05430 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 05431 return Builder.CreateStore(Ops[1], Ops[0]); 05432 case NEON::BI__builtin_neon_vld1_lane_v: 05433 case NEON::BI__builtin_neon_vld1q_lane_v: 05434 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 05435 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 05436 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 05437 Ops[0] = Builder.CreateLoad(Ops[0]); 05438 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 05439 case NEON::BI__builtin_neon_vld1_dup_v: 05440 case NEON::BI__builtin_neon_vld1q_dup_v: { 05441 Value *V = UndefValue::get(Ty); 05442 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 05443 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 05444 Ops[0] = Builder.CreateLoad(Ops[0]); 05445 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 05446 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 05447 return EmitNeonSplat(Ops[0], CI); 05448 } 05449 case NEON::BI__builtin_neon_vst1_lane_v: 05450 case NEON::BI__builtin_neon_vst1q_lane_v: 05451 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 05452 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 05453 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 05454 return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty)); 05455 case NEON::BI__builtin_neon_vld2_v: 05456 case NEON::BI__builtin_neon_vld2q_v: { 05457 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 05458 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 05459 llvm::Type *Tys[2] = { VTy, PTy }; 05460 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 05461 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 05462 Ops[0] = Builder.CreateBitCast(Ops[0], 05463 llvm::PointerType::getUnqual(Ops[1]->getType())); 05464 return Builder.CreateStore(Ops[1], Ops[0]); 05465 } 05466 case NEON::BI__builtin_neon_vld3_v: 05467 case NEON::BI__builtin_neon_vld3q_v: { 05468 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 05469 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 05470 llvm::Type *Tys[2] = { VTy, PTy }; 05471 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 05472 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 05473 Ops[0] = Builder.CreateBitCast(Ops[0], 05474 llvm::PointerType::getUnqual(Ops[1]->getType())); 05475 return Builder.CreateStore(Ops[1], Ops[0]); 05476 } 05477 case NEON::BI__builtin_neon_vld4_v: 05478 case NEON::BI__builtin_neon_vld4q_v: { 05479 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 05480 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 05481 llvm::Type *Tys[2] = { VTy, PTy }; 05482 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 05483 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 05484 Ops[0] = Builder.CreateBitCast(Ops[0], 05485 llvm::PointerType::getUnqual(Ops[1]->getType())); 05486 return Builder.CreateStore(Ops[1], Ops[0]); 05487 } 05488 case NEON::BI__builtin_neon_vld2_dup_v: 05489 case NEON::BI__builtin_neon_vld2q_dup_v: { 05490 llvm::Type *PTy = 05491 llvm::PointerType::getUnqual(VTy->getElementType()); 05492 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 05493 llvm::Type *Tys[2] = { VTy, PTy }; 05494 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 05495 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 05496 Ops[0] = Builder.CreateBitCast(Ops[0], 05497 llvm::PointerType::getUnqual(Ops[1]->getType())); 05498 return Builder.CreateStore(Ops[1], Ops[0]); 05499 } 05500 case NEON::BI__builtin_neon_vld3_dup_v: 05501 case NEON::BI__builtin_neon_vld3q_dup_v: { 05502 llvm::Type *PTy = 05503 llvm::PointerType::getUnqual(VTy->getElementType()); 05504 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 05505 llvm::Type *Tys[2] = { VTy, PTy }; 05506 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 05507 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 05508 Ops[0] = Builder.CreateBitCast(Ops[0], 05509 llvm::PointerType::getUnqual(Ops[1]->getType())); 05510 return Builder.CreateStore(Ops[1], Ops[0]); 05511 } 05512 case NEON::BI__builtin_neon_vld4_dup_v: 05513 case NEON::BI__builtin_neon_vld4q_dup_v: { 05514 llvm::Type *PTy = 05515 llvm::PointerType::getUnqual(VTy->getElementType()); 05516 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 05517 llvm::Type *Tys[2] = { VTy, PTy }; 05518 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 05519 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 05520 Ops[0] = Builder.CreateBitCast(Ops[0], 05521 llvm::PointerType::getUnqual(Ops[1]->getType())); 05522 return Builder.CreateStore(Ops[1], Ops[0]); 05523 } 05524 case NEON::BI__builtin_neon_vld2_lane_v: 05525 case NEON::BI__builtin_neon_vld2q_lane_v: { 05526 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 05527 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 05528 Ops.push_back(Ops[1]); 05529 Ops.erase(Ops.begin()+1); 05530 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 05531 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 05532 Ops[3] = Builder.CreateZExt(Ops[3], 05533 llvm::IntegerType::get(getLLVMContext(), 64)); 05534 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 05535 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 05536 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 05537 return Builder.CreateStore(Ops[1], Ops[0]); 05538 } 05539 case NEON::BI__builtin_neon_vld3_lane_v: 05540 case NEON::BI__builtin_neon_vld3q_lane_v: { 05541 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 05542 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 05543 Ops.push_back(Ops[1]); 05544 Ops.erase(Ops.begin()+1); 05545 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 05546 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 05547 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 05548 Ops[4] = Builder.CreateZExt(Ops[4], 05549 llvm::IntegerType::get(getLLVMContext(), 64)); 05550 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 05551 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 05552 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 05553 return Builder.CreateStore(Ops[1], Ops[0]); 05554 } 05555 case NEON::BI__builtin_neon_vld4_lane_v: 05556 case NEON::BI__builtin_neon_vld4q_lane_v: { 05557 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 05558 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 05559 Ops.push_back(Ops[1]); 05560 Ops.erase(Ops.begin()+1); 05561 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 05562 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 05563 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 05564 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 05565 Ops[5] = Builder.CreateZExt(Ops[5], 05566 llvm::IntegerType::get(getLLVMContext(), 64)); 05567 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); 05568 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 05569 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 05570 return Builder.CreateStore(Ops[1], Ops[0]); 05571 } 05572 case NEON::BI__builtin_neon_vst2_v: 05573 case NEON::BI__builtin_neon_vst2q_v: { 05574 Ops.push_back(Ops[0]); 05575 Ops.erase(Ops.begin()); 05576 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 05577 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 05578 Ops, ""); 05579 } 05580 case NEON::BI__builtin_neon_vst2_lane_v: 05581 case NEON::BI__builtin_neon_vst2q_lane_v: { 05582 Ops.push_back(Ops[0]); 05583 Ops.erase(Ops.begin()); 05584 Ops[2] = Builder.CreateZExt(Ops[2], 05585 llvm::IntegerType::get(getLLVMContext(), 64)); 05586 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 05587 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 05588 Ops, ""); 05589 } 05590 case NEON::BI__builtin_neon_vst3_v: 05591 case NEON::BI__builtin_neon_vst3q_v: { 05592 Ops.push_back(Ops[0]); 05593 Ops.erase(Ops.begin()); 05594 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 05595 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 05596 Ops, ""); 05597 } 05598 case NEON::BI__builtin_neon_vst3_lane_v: 05599 case NEON::BI__builtin_neon_vst3q_lane_v: { 05600 Ops.push_back(Ops[0]); 05601 Ops.erase(Ops.begin()); 05602 Ops[3] = Builder.CreateZExt(Ops[3], 05603 llvm::IntegerType::get(getLLVMContext(), 64)); 05604 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 05605 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 05606 Ops, ""); 05607 } 05608 case NEON::BI__builtin_neon_vst4_v: 05609 case NEON::BI__builtin_neon_vst4q_v: { 05610 Ops.push_back(Ops[0]); 05611 Ops.erase(Ops.begin()); 05612 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 05613 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 05614 Ops, ""); 05615 } 05616 case NEON::BI__builtin_neon_vst4_lane_v: 05617 case NEON::BI__builtin_neon_vst4q_lane_v: { 05618 Ops.push_back(Ops[0]); 05619 Ops.erase(Ops.begin()); 05620 Ops[4] = Builder.CreateZExt(Ops[4], 05621 llvm::IntegerType::get(getLLVMContext(), 64)); 05622 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 05623 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 05624 Ops, ""); 05625 } 05626 case NEON::BI__builtin_neon_vtrn_v: 05627 case NEON::BI__builtin_neon_vtrnq_v: { 05628 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 05629 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 05630 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 05631 Value *SV = nullptr; 05632 05633 for (unsigned vi = 0; vi != 2; ++vi) { 05634 SmallVector<Constant*, 16> Indices; 05635 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 05636 Indices.push_back(ConstantInt::get(Int32Ty, i+vi)); 05637 Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi)); 05638 } 05639 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 05640 SV = llvm::ConstantVector::get(Indices); 05641 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); 05642 SV = Builder.CreateStore(SV, Addr); 05643 } 05644 return SV; 05645 } 05646 case NEON::BI__builtin_neon_vuzp_v: 05647 case NEON::BI__builtin_neon_vuzpq_v: { 05648 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 05649 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 05650 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 05651 Value *SV = nullptr; 05652 05653 for (unsigned vi = 0; vi != 2; ++vi) { 05654 SmallVector<Constant*, 16> Indices; 05655 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 05656 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); 05657 05658 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 05659 SV = llvm::ConstantVector::get(Indices); 05660 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); 05661 SV = Builder.CreateStore(SV, Addr); 05662 } 05663 return SV; 05664 } 05665 case NEON::BI__builtin_neon_vzip_v: 05666 case NEON::BI__builtin_neon_vzipq_v: { 05667 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 05668 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 05669 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 05670 Value *SV = nullptr; 05671 05672 for (unsigned vi = 0; vi != 2; ++vi) { 05673 SmallVector<Constant*, 16> Indices; 05674 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 05675 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); 05676 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); 05677 } 05678 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 05679 SV = llvm::ConstantVector::get(Indices); 05680 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); 05681 SV = Builder.CreateStore(SV, Addr); 05682 } 05683 return SV; 05684 } 05685 case NEON::BI__builtin_neon_vqtbl1q_v: { 05686 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 05687 Ops, "vtbl1"); 05688 } 05689 case NEON::BI__builtin_neon_vqtbl2q_v: { 05690 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 05691 Ops, "vtbl2"); 05692 } 05693 case NEON::BI__builtin_neon_vqtbl3q_v: { 05694 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 05695 Ops, "vtbl3"); 05696 } 05697 case NEON::BI__builtin_neon_vqtbl4q_v: { 05698 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 05699 Ops, "vtbl4"); 05700 } 05701 case NEON::BI__builtin_neon_vqtbx1q_v: { 05702 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 05703 Ops, "vtbx1"); 05704 } 05705 case NEON::BI__builtin_neon_vqtbx2q_v: { 05706 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 05707 Ops, "vtbx2"); 05708 } 05709 case NEON::BI__builtin_neon_vqtbx3q_v: { 05710 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 05711 Ops, "vtbx3"); 05712 } 05713 case NEON::BI__builtin_neon_vqtbx4q_v: { 05714 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 05715 Ops, "vtbx4"); 05716 } 05717 case NEON::BI__builtin_neon_vsqadd_v: 05718 case NEON::BI__builtin_neon_vsqaddq_v: { 05719 Int = Intrinsic::aarch64_neon_usqadd; 05720 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 05721 } 05722 case NEON::BI__builtin_neon_vuqadd_v: 05723 case NEON::BI__builtin_neon_vuqaddq_v: { 05724 Int = Intrinsic::aarch64_neon_suqadd; 05725 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 05726 } 05727 } 05728 } 05729 05730 llvm::Value *CodeGenFunction:: 05731 BuildVector(ArrayRef<llvm::Value*> Ops) { 05732 assert((Ops.size() & (Ops.size() - 1)) == 0 && 05733 "Not a power-of-two sized vector!"); 05734 bool AllConstants = true; 05735 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 05736 AllConstants &= isa<Constant>(Ops[i]); 05737 05738 // If this is a constant vector, create a ConstantVector. 05739 if (AllConstants) { 05740 SmallVector<llvm::Constant*, 16> CstOps; 05741 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 05742 CstOps.push_back(cast<Constant>(Ops[i])); 05743 return llvm::ConstantVector::get(CstOps); 05744 } 05745 05746 // Otherwise, insertelement the values to build the vector. 05747 Value *Result = 05748 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 05749 05750 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 05751 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 05752 05753 return Result; 05754 } 05755 05756 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 05757 const CallExpr *E) { 05758 SmallVector<Value*, 4> Ops; 05759 05760 // Find out if any arguments are required to be integer constant expressions. 05761 unsigned ICEArguments = 0; 05762 ASTContext::GetBuiltinTypeError Error; 05763 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 05764 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 05765 05766 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 05767 // If this is a normal argument, just emit it as a scalar. 05768 if ((ICEArguments & (1 << i)) == 0) { 05769 Ops.push_back(EmitScalarExpr(E->getArg(i))); 05770 continue; 05771 } 05772 05773 // If this is required to be a constant, constant fold it so that we know 05774 // that the generated intrinsic gets a ConstantInt. 05775 llvm::APSInt Result; 05776 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 05777 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 05778 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 05779 } 05780 05781 switch (BuiltinID) { 05782 default: return nullptr; 05783 case X86::BI_mm_prefetch: { 05784 Value *Address = EmitScalarExpr(E->getArg(0)); 05785 Value *RW = ConstantInt::get(Int32Ty, 0); 05786 Value *Locality = EmitScalarExpr(E->getArg(1)); 05787 Value *Data = ConstantInt::get(Int32Ty, 1); 05788 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 05789 return Builder.CreateCall4(F, Address, RW, Locality, Data); 05790 } 05791 case X86::BI__builtin_ia32_vec_init_v8qi: 05792 case X86::BI__builtin_ia32_vec_init_v4hi: 05793 case X86::BI__builtin_ia32_vec_init_v2si: 05794 return Builder.CreateBitCast(BuildVector(Ops), 05795 llvm::Type::getX86_MMXTy(getLLVMContext())); 05796 case X86::BI__builtin_ia32_vec_ext_v2si: 05797 return Builder.CreateExtractElement(Ops[0], 05798 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 05799 case X86::BI__builtin_ia32_ldmxcsr: { 05800 Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); 05801 Builder.CreateStore(Ops[0], Tmp); 05802 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 05803 Builder.CreateBitCast(Tmp, Int8PtrTy)); 05804 } 05805 case X86::BI__builtin_ia32_stmxcsr: { 05806 Value *Tmp = CreateMemTemp(E->getType()); 05807 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 05808 Builder.CreateBitCast(Tmp, Int8PtrTy)); 05809 return Builder.CreateLoad(Tmp, "stmxcsr"); 05810 } 05811 case X86::BI__builtin_ia32_storehps: 05812 case X86::BI__builtin_ia32_storelps: { 05813 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 05814 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 05815 05816 // cast val v2i64 05817 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 05818 05819 // extract (0, 1) 05820 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 05821 llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); 05822 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 05823 05824 // cast pointer to i64 & store 05825 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 05826 return Builder.CreateStore(Ops[1], Ops[0]); 05827 } 05828 case X86::BI__builtin_ia32_palignr: { 05829 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 05830 05831 // If palignr is shifting the pair of input vectors less than 9 bytes, 05832 // emit a shuffle instruction. 05833 if (shiftVal <= 8) { 05834 SmallVector<llvm::Constant*, 8> Indices; 05835 for (unsigned i = 0; i != 8; ++i) 05836 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); 05837 05838 Value* SV = llvm::ConstantVector::get(Indices); 05839 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 05840 } 05841 05842 // If palignr is shifting the pair of input vectors more than 8 but less 05843 // than 16 bytes, emit a logical right shift of the destination. 05844 if (shiftVal < 16) { 05845 // MMX has these as 1 x i64 vectors for some odd optimization reasons. 05846 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1); 05847 05848 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 05849 Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8); 05850 05851 // create i32 constant 05852 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q); 05853 return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr"); 05854 } 05855 05856 // If palignr is shifting the pair of vectors more than 16 bytes, emit zero. 05857 return llvm::Constant::getNullValue(ConvertType(E->getType())); 05858 } 05859 case X86::BI__builtin_ia32_palignr128: { 05860 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 05861 05862 // If palignr is shifting the pair of input vectors less than 17 bytes, 05863 // emit a shuffle instruction. 05864 if (shiftVal <= 16) { 05865 SmallVector<llvm::Constant*, 16> Indices; 05866 for (unsigned i = 0; i != 16; ++i) 05867 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); 05868 05869 Value* SV = llvm::ConstantVector::get(Indices); 05870 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 05871 } 05872 05873 // If palignr is shifting the pair of input vectors more than 16 but less 05874 // than 32 bytes, emit a logical right shift of the destination. 05875 if (shiftVal < 32) { 05876 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 05877 05878 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 05879 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8); 05880 05881 // create i32 constant 05882 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq); 05883 return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr"); 05884 } 05885 05886 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. 05887 return llvm::Constant::getNullValue(ConvertType(E->getType())); 05888 } 05889 case X86::BI__builtin_ia32_palignr256: { 05890 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 05891 05892 // If palignr is shifting the pair of input vectors less than 17 bytes, 05893 // emit a shuffle instruction. 05894 if (shiftVal <= 16) { 05895 SmallVector<llvm::Constant*, 32> Indices; 05896 // 256-bit palignr operates on 128-bit lanes so we need to handle that 05897 for (unsigned l = 0; l != 2; ++l) { 05898 unsigned LaneStart = l * 16; 05899 unsigned LaneEnd = (l+1) * 16; 05900 for (unsigned i = 0; i != 16; ++i) { 05901 unsigned Idx = shiftVal + i + LaneStart; 05902 if (Idx >= LaneEnd) Idx += 16; // end of lane, switch operand 05903 Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx)); 05904 } 05905 } 05906 05907 Value* SV = llvm::ConstantVector::get(Indices); 05908 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 05909 } 05910 05911 // If palignr is shifting the pair of input vectors more than 16 but less 05912 // than 32 bytes, emit a logical right shift of the destination. 05913 if (shiftVal < 32) { 05914 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 4); 05915 05916 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 05917 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8); 05918 05919 // create i32 constant 05920 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_avx2_psrl_dq); 05921 return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr"); 05922 } 05923 05924 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. 05925 return llvm::Constant::getNullValue(ConvertType(E->getType())); 05926 } 05927 case X86::BI__builtin_ia32_movntps: 05928 case X86::BI__builtin_ia32_movntps256: 05929 case X86::BI__builtin_ia32_movntpd: 05930 case X86::BI__builtin_ia32_movntpd256: 05931 case X86::BI__builtin_ia32_movntdq: 05932 case X86::BI__builtin_ia32_movntdq256: 05933 case X86::BI__builtin_ia32_movnti: 05934 case X86::BI__builtin_ia32_movnti64: { 05935 llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(), 05936 Builder.getInt32(1)); 05937 05938 // Convert the type of the pointer to a pointer to the stored type. 05939 Value *BC = Builder.CreateBitCast(Ops[0], 05940 llvm::PointerType::getUnqual(Ops[1]->getType()), 05941 "cast"); 05942 StoreInst *SI = Builder.CreateStore(Ops[1], BC); 05943 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 05944 05945 // If the operand is an integer, we can't assume alignment. Otherwise, 05946 // assume natural alignment. 05947 QualType ArgTy = E->getArg(1)->getType(); 05948 unsigned Align; 05949 if (ArgTy->isIntegerType()) 05950 Align = 1; 05951 else 05952 Align = getContext().getTypeSizeInChars(ArgTy).getQuantity(); 05953 SI->setAlignment(Align); 05954 return SI; 05955 } 05956 // 3DNow! 05957 case X86::BI__builtin_ia32_pswapdsf: 05958 case X86::BI__builtin_ia32_pswapdsi: { 05959 const char *name = nullptr; 05960 Intrinsic::ID ID = Intrinsic::not_intrinsic; 05961 switch(BuiltinID) { 05962 default: llvm_unreachable("Unsupported intrinsic!"); 05963 case X86::BI__builtin_ia32_pswapdsf: 05964 case X86::BI__builtin_ia32_pswapdsi: 05965 name = "pswapd"; 05966 ID = Intrinsic::x86_3dnowa_pswapd; 05967 break; 05968 } 05969 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 05970 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 05971 llvm::Function *F = CGM.getIntrinsic(ID); 05972 return Builder.CreateCall(F, Ops, name); 05973 } 05974 case X86::BI__builtin_ia32_rdrand16_step: 05975 case X86::BI__builtin_ia32_rdrand32_step: 05976 case X86::BI__builtin_ia32_rdrand64_step: 05977 case X86::BI__builtin_ia32_rdseed16_step: 05978 case X86::BI__builtin_ia32_rdseed32_step: 05979 case X86::BI__builtin_ia32_rdseed64_step: { 05980 Intrinsic::ID ID; 05981 switch (BuiltinID) { 05982 default: llvm_unreachable("Unsupported intrinsic!"); 05983 case X86::BI__builtin_ia32_rdrand16_step: 05984 ID = Intrinsic::x86_rdrand_16; 05985 break; 05986 case X86::BI__builtin_ia32_rdrand32_step: 05987 ID = Intrinsic::x86_rdrand_32; 05988 break; 05989 case X86::BI__builtin_ia32_rdrand64_step: 05990 ID = Intrinsic::x86_rdrand_64; 05991 break; 05992 case X86::BI__builtin_ia32_rdseed16_step: 05993 ID = Intrinsic::x86_rdseed_16; 05994 break; 05995 case X86::BI__builtin_ia32_rdseed32_step: 05996 ID = Intrinsic::x86_rdseed_32; 05997 break; 05998 case X86::BI__builtin_ia32_rdseed64_step: 05999 ID = Intrinsic::x86_rdseed_64; 06000 break; 06001 } 06002 06003 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 06004 Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]); 06005 return Builder.CreateExtractValue(Call, 1); 06006 } 06007 // AVX2 broadcast 06008 case X86::BI__builtin_ia32_vbroadcastsi256: { 06009 Value *VecTmp = CreateMemTemp(E->getArg(0)->getType()); 06010 Builder.CreateStore(Ops[0], VecTmp); 06011 Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128); 06012 return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy)); 06013 } 06014 } 06015 } 06016 06017 06018 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 06019 const CallExpr *E) { 06020 SmallVector<Value*, 4> Ops; 06021 06022 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 06023 Ops.push_back(EmitScalarExpr(E->getArg(i))); 06024 06025 Intrinsic::ID ID = Intrinsic::not_intrinsic; 06026 06027 switch (BuiltinID) { 06028 default: return nullptr; 06029 06030 // vec_ld, vec_lvsl, vec_lvsr 06031 case PPC::BI__builtin_altivec_lvx: 06032 case PPC::BI__builtin_altivec_lvxl: 06033 case PPC::BI__builtin_altivec_lvebx: 06034 case PPC::BI__builtin_altivec_lvehx: 06035 case PPC::BI__builtin_altivec_lvewx: 06036 case PPC::BI__builtin_altivec_lvsl: 06037 case PPC::BI__builtin_altivec_lvsr: 06038 case PPC::BI__builtin_vsx_lxvd2x: 06039 case PPC::BI__builtin_vsx_lxvw4x: 06040 { 06041 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 06042 06043 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 06044 Ops.pop_back(); 06045 06046 switch (BuiltinID) { 06047 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 06048 case PPC::BI__builtin_altivec_lvx: 06049 ID = Intrinsic::ppc_altivec_lvx; 06050 break; 06051 case PPC::BI__builtin_altivec_lvxl: 06052 ID = Intrinsic::ppc_altivec_lvxl; 06053 break; 06054 case PPC::BI__builtin_altivec_lvebx: 06055 ID = Intrinsic::ppc_altivec_lvebx; 06056 break; 06057 case PPC::BI__builtin_altivec_lvehx: 06058 ID = Intrinsic::ppc_altivec_lvehx; 06059 break; 06060 case PPC::BI__builtin_altivec_lvewx: 06061 ID = Intrinsic::ppc_altivec_lvewx; 06062 break; 06063 case PPC::BI__builtin_altivec_lvsl: 06064 ID = Intrinsic::ppc_altivec_lvsl; 06065 break; 06066 case PPC::BI__builtin_altivec_lvsr: 06067 ID = Intrinsic::ppc_altivec_lvsr; 06068 break; 06069 case PPC::BI__builtin_vsx_lxvd2x: 06070 ID = Intrinsic::ppc_vsx_lxvd2x; 06071 break; 06072 case PPC::BI__builtin_vsx_lxvw4x: 06073 ID = Intrinsic::ppc_vsx_lxvw4x; 06074 break; 06075 } 06076 llvm::Function *F = CGM.getIntrinsic(ID); 06077 return Builder.CreateCall(F, Ops, ""); 06078 } 06079 06080 // vec_st 06081 case PPC::BI__builtin_altivec_stvx: 06082 case PPC::BI__builtin_altivec_stvxl: 06083 case PPC::BI__builtin_altivec_stvebx: 06084 case PPC::BI__builtin_altivec_stvehx: 06085 case PPC::BI__builtin_altivec_stvewx: 06086 case PPC::BI__builtin_vsx_stxvd2x: 06087 case PPC::BI__builtin_vsx_stxvw4x: 06088 { 06089 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 06090 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 06091 Ops.pop_back(); 06092 06093 switch (BuiltinID) { 06094 default: llvm_unreachable("Unsupported st intrinsic!"); 06095 case PPC::BI__builtin_altivec_stvx: 06096 ID = Intrinsic::ppc_altivec_stvx; 06097 break; 06098 case PPC::BI__builtin_altivec_stvxl: 06099 ID = Intrinsic::ppc_altivec_stvxl; 06100 break; 06101 case PPC::BI__builtin_altivec_stvebx: 06102 ID = Intrinsic::ppc_altivec_stvebx; 06103 break; 06104 case PPC::BI__builtin_altivec_stvehx: 06105 ID = Intrinsic::ppc_altivec_stvehx; 06106 break; 06107 case PPC::BI__builtin_altivec_stvewx: 06108 ID = Intrinsic::ppc_altivec_stvewx; 06109 break; 06110 case PPC::BI__builtin_vsx_stxvd2x: 06111 ID = Intrinsic::ppc_vsx_stxvd2x; 06112 break; 06113 case PPC::BI__builtin_vsx_stxvw4x: 06114 ID = Intrinsic::ppc_vsx_stxvw4x; 06115 break; 06116 } 06117 llvm::Function *F = CGM.getIntrinsic(ID); 06118 return Builder.CreateCall(F, Ops, ""); 06119 } 06120 } 06121 } 06122 06123 // Emit an intrinsic that has 1 float or double. 06124 static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF, 06125 const CallExpr *E, 06126 unsigned IntrinsicID) { 06127 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 06128 06129 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 06130 return CGF.Builder.CreateCall(F, Src0); 06131 } 06132 06133 // Emit an intrinsic that has 3 float or double operands. 06134 static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF, 06135 const CallExpr *E, 06136 unsigned IntrinsicID) { 06137 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 06138 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 06139 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 06140 06141 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 06142 return CGF.Builder.CreateCall3(F, Src0, Src1, Src2); 06143 } 06144 06145 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 06146 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 06147 const CallExpr *E, 06148 unsigned IntrinsicID) { 06149 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 06150 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 06151 06152 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 06153 return CGF.Builder.CreateCall2(F, Src0, Src1); 06154 } 06155 06156 Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID, 06157 const CallExpr *E) { 06158 switch (BuiltinID) { 06159 case R600::BI__builtin_amdgpu_div_scale: 06160 case R600::BI__builtin_amdgpu_div_scalef: { 06161 // Translate from the intrinsics's struct return to the builtin's out 06162 // argument. 06163 06164 std::pair<llvm::Value *, unsigned> FlagOutPtr 06165 = EmitPointerWithAlignment(E->getArg(3)); 06166 06167 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 06168 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 06169 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 06170 06171 llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale, 06172 X->getType()); 06173 06174 llvm::Value *Tmp = Builder.CreateCall3(Callee, X, Y, Z); 06175 06176 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 06177 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 06178 06179 llvm::Type *RealFlagType 06180 = FlagOutPtr.first->getType()->getPointerElementType(); 06181 06182 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 06183 llvm::StoreInst *FlagStore = Builder.CreateStore(FlagExt, FlagOutPtr.first); 06184 FlagStore->setAlignment(FlagOutPtr.second); 06185 return Result; 06186 } 06187 case R600::BI__builtin_amdgpu_div_fmas: 06188 case R600::BI__builtin_amdgpu_div_fmasf: { 06189 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 06190 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 06191 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 06192 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); 06193 06194 llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas, 06195 Src0->getType()); 06196 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); 06197 return Builder.CreateCall4(F, Src0, Src1, Src2, Src3ToBool); 06198 } 06199 case R600::BI__builtin_amdgpu_div_fixup: 06200 case R600::BI__builtin_amdgpu_div_fixupf: 06201 return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup); 06202 case R600::BI__builtin_amdgpu_trig_preop: 06203 case R600::BI__builtin_amdgpu_trig_preopf: 06204 return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop); 06205 case R600::BI__builtin_amdgpu_rcp: 06206 case R600::BI__builtin_amdgpu_rcpf: 06207 return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp); 06208 case R600::BI__builtin_amdgpu_rsq: 06209 case R600::BI__builtin_amdgpu_rsqf: 06210 return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq); 06211 case R600::BI__builtin_amdgpu_rsq_clamped: 06212 case R600::BI__builtin_amdgpu_rsq_clampedf: 06213 return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped); 06214 case R600::BI__builtin_amdgpu_ldexp: 06215 case R600::BI__builtin_amdgpu_ldexpf: 06216 return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp); 06217 default: 06218 return nullptr; 06219 } 06220 }