LLVM API Documentation
00001 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines an instruction selector for the AArch64 target. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "AArch64TargetMachine.h" 00015 #include "MCTargetDesc/AArch64AddressingModes.h" 00016 #include "llvm/ADT/APSInt.h" 00017 #include "llvm/CodeGen/SelectionDAGISel.h" 00018 #include "llvm/IR/Function.h" // To access function attributes. 00019 #include "llvm/IR/GlobalValue.h" 00020 #include "llvm/IR/Intrinsics.h" 00021 #include "llvm/Support/Debug.h" 00022 #include "llvm/Support/ErrorHandling.h" 00023 #include "llvm/Support/MathExtras.h" 00024 #include "llvm/Support/raw_ostream.h" 00025 00026 using namespace llvm; 00027 00028 #define DEBUG_TYPE "aarch64-isel" 00029 00030 //===--------------------------------------------------------------------===// 00031 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 00032 /// instructions for SelectionDAG operations. 00033 /// 00034 namespace { 00035 00036 class AArch64DAGToDAGISel : public SelectionDAGISel { 00037 AArch64TargetMachine &TM; 00038 00039 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 00040 /// make the right decision when generating code for different targets. 00041 const AArch64Subtarget *Subtarget; 00042 00043 bool ForCodeSize; 00044 00045 public: 00046 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 00047 CodeGenOpt::Level OptLevel) 00048 : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr), 00049 ForCodeSize(false) {} 00050 00051 const char *getPassName() const override { 00052 return "AArch64 Instruction Selection"; 00053 } 00054 00055 bool runOnMachineFunction(MachineFunction &MF) override { 00056 AttributeSet FnAttrs = MF.getFunction()->getAttributes(); 00057 ForCodeSize = 00058 FnAttrs.hasAttribute(AttributeSet::FunctionIndex, 00059 Attribute::OptimizeForSize) || 00060 FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); 00061 Subtarget = &TM.getSubtarget<AArch64Subtarget>(); 00062 return SelectionDAGISel::runOnMachineFunction(MF); 00063 } 00064 00065 SDNode *Select(SDNode *Node) override; 00066 00067 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 00068 /// inline asm expressions. 00069 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 00070 char ConstraintCode, 00071 std::vector<SDValue> &OutOps) override; 00072 00073 SDNode *SelectMLAV64LaneV128(SDNode *N); 00074 SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N); 00075 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 00076 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 00077 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 00078 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 00079 return SelectShiftedRegister(N, false, Reg, Shift); 00080 } 00081 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 00082 return SelectShiftedRegister(N, true, Reg, Shift); 00083 } 00084 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 00085 return SelectAddrModeIndexed(N, 1, Base, OffImm); 00086 } 00087 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 00088 return SelectAddrModeIndexed(N, 2, Base, OffImm); 00089 } 00090 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 00091 return SelectAddrModeIndexed(N, 4, Base, OffImm); 00092 } 00093 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 00094 return SelectAddrModeIndexed(N, 8, Base, OffImm); 00095 } 00096 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 00097 return SelectAddrModeIndexed(N, 16, Base, OffImm); 00098 } 00099 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 00100 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 00101 } 00102 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 00103 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 00104 } 00105 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 00106 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 00107 } 00108 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 00109 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 00110 } 00111 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 00112 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 00113 } 00114 00115 template<int Width> 00116 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 00117 SDValue &SignExtend, SDValue &DoShift) { 00118 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 00119 } 00120 00121 template<int Width> 00122 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 00123 SDValue &SignExtend, SDValue &DoShift) { 00124 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 00125 } 00126 00127 00128 /// Form sequences of consecutive 64/128-bit registers for use in NEON 00129 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 00130 /// between 1 and 4 elements. If it contains a single element that is returned 00131 /// unchanged; otherwise a REG_SEQUENCE value is returned. 00132 SDValue createDTuple(ArrayRef<SDValue> Vecs); 00133 SDValue createQTuple(ArrayRef<SDValue> Vecs); 00134 00135 /// Generic helper for the createDTuple/createQTuple 00136 /// functions. Those should almost always be called instead. 00137 SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[], 00138 unsigned SubRegs[]); 00139 00140 SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 00141 00142 SDNode *SelectIndexedLoad(SDNode *N, bool &Done); 00143 00144 SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 00145 unsigned SubRegIdx); 00146 SDNode *SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 00147 unsigned SubRegIdx); 00148 SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 00149 SDNode *SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 00150 00151 SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 00152 SDNode *SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 00153 SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 00154 SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 00155 00156 SDNode *SelectBitfieldExtractOp(SDNode *N); 00157 SDNode *SelectBitfieldInsertOp(SDNode *N); 00158 00159 SDNode *SelectLIBM(SDNode *N); 00160 00161 // Include the pieces autogenerated from the target description. 00162 #include "AArch64GenDAGISel.inc" 00163 00164 private: 00165 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 00166 SDValue &Shift); 00167 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 00168 SDValue &OffImm); 00169 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 00170 SDValue &OffImm); 00171 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 00172 SDValue &Offset, SDValue &SignExtend, 00173 SDValue &DoShift); 00174 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 00175 SDValue &Offset, SDValue &SignExtend, 00176 SDValue &DoShift); 00177 bool isWorthFolding(SDValue V) const; 00178 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 00179 SDValue &Offset, SDValue &SignExtend); 00180 00181 template<unsigned RegWidth> 00182 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 00183 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 00184 } 00185 00186 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 00187 }; 00188 } // end anonymous namespace 00189 00190 /// isIntImmediate - This method tests to see if the node is a constant 00191 /// operand. If so Imm will receive the 32-bit value. 00192 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 00193 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 00194 Imm = C->getZExtValue(); 00195 return true; 00196 } 00197 return false; 00198 } 00199 00200 // isIntImmediate - This method tests to see if a constant operand. 00201 // If so Imm will receive the value. 00202 static bool isIntImmediate(SDValue N, uint64_t &Imm) { 00203 return isIntImmediate(N.getNode(), Imm); 00204 } 00205 00206 // isOpcWithIntImmediate - This method tests to see if the node is a specific 00207 // opcode and that it has a immediate integer right operand. 00208 // If so Imm will receive the 32 bit value. 00209 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 00210 uint64_t &Imm) { 00211 return N->getOpcode() == Opc && 00212 isIntImmediate(N->getOperand(1).getNode(), Imm); 00213 } 00214 00215 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 00216 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { 00217 assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); 00218 // Require the address to be in a register. That is safe for all AArch64 00219 // variants and it is hard to do anything much smarter without knowing 00220 // how the operand is used. 00221 OutOps.push_back(Op); 00222 return false; 00223 } 00224 00225 /// SelectArithImmed - Select an immediate value that can be represented as 00226 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 00227 /// Val set to the 12-bit value and Shift set to the shifter operand. 00228 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 00229 SDValue &Shift) { 00230 // This function is called from the addsub_shifted_imm ComplexPattern, 00231 // which lists [imm] as the list of opcode it's interested in, however 00232 // we still need to check whether the operand is actually an immediate 00233 // here because the ComplexPattern opcode list is only used in 00234 // root-level opcode matching. 00235 if (!isa<ConstantSDNode>(N.getNode())) 00236 return false; 00237 00238 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 00239 unsigned ShiftAmt; 00240 00241 if (Immed >> 12 == 0) { 00242 ShiftAmt = 0; 00243 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 00244 ShiftAmt = 12; 00245 Immed = Immed >> 12; 00246 } else 00247 return false; 00248 00249 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 00250 Val = CurDAG->getTargetConstant(Immed, MVT::i32); 00251 Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); 00252 return true; 00253 } 00254 00255 /// SelectNegArithImmed - As above, but negates the value before trying to 00256 /// select it. 00257 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 00258 SDValue &Shift) { 00259 // This function is called from the addsub_shifted_imm ComplexPattern, 00260 // which lists [imm] as the list of opcode it's interested in, however 00261 // we still need to check whether the operand is actually an immediate 00262 // here because the ComplexPattern opcode list is only used in 00263 // root-level opcode matching. 00264 if (!isa<ConstantSDNode>(N.getNode())) 00265 return false; 00266 00267 // The immediate operand must be a 24-bit zero-extended immediate. 00268 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 00269 00270 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 00271 // have the opposite effect on the C flag, so this pattern mustn't match under 00272 // those circumstances. 00273 if (Immed == 0) 00274 return false; 00275 00276 if (N.getValueType() == MVT::i32) 00277 Immed = ~((uint32_t)Immed) + 1; 00278 else 00279 Immed = ~Immed + 1ULL; 00280 if (Immed & 0xFFFFFFFFFF000000ULL) 00281 return false; 00282 00283 Immed &= 0xFFFFFFULL; 00284 return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift); 00285 } 00286 00287 /// getShiftTypeForNode - Translate a shift node to the corresponding 00288 /// ShiftType value. 00289 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 00290 switch (N.getOpcode()) { 00291 default: 00292 return AArch64_AM::InvalidShiftExtend; 00293 case ISD::SHL: 00294 return AArch64_AM::LSL; 00295 case ISD::SRL: 00296 return AArch64_AM::LSR; 00297 case ISD::SRA: 00298 return AArch64_AM::ASR; 00299 case ISD::ROTR: 00300 return AArch64_AM::ROR; 00301 } 00302 } 00303 00304 /// \brief Determine wether it is worth to fold V into an extended register. 00305 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { 00306 // it hurts if the value is used at least twice, unless we are optimizing 00307 // for code size. 00308 if (ForCodeSize || V.hasOneUse()) 00309 return true; 00310 return false; 00311 } 00312 00313 /// SelectShiftedRegister - Select a "shifted register" operand. If the value 00314 /// is not shifted, set the Shift operand to default of "LSL 0". The logical 00315 /// instructions allow the shifted register to be rotated, but the arithmetic 00316 /// instructions do not. The AllowROR parameter specifies whether ROR is 00317 /// supported. 00318 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 00319 SDValue &Reg, SDValue &Shift) { 00320 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 00321 if (ShType == AArch64_AM::InvalidShiftExtend) 00322 return false; 00323 if (!AllowROR && ShType == AArch64_AM::ROR) 00324 return false; 00325 00326 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 00327 unsigned BitSize = N.getValueType().getSizeInBits(); 00328 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 00329 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 00330 00331 Reg = N.getOperand(0); 00332 Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); 00333 return isWorthFolding(N); 00334 } 00335 00336 return false; 00337 } 00338 00339 /// getExtendTypeForNode - Translate an extend node to the corresponding 00340 /// ExtendType value. 00341 static AArch64_AM::ShiftExtendType 00342 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 00343 if (N.getOpcode() == ISD::SIGN_EXTEND || 00344 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 00345 EVT SrcVT; 00346 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 00347 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 00348 else 00349 SrcVT = N.getOperand(0).getValueType(); 00350 00351 if (!IsLoadStore && SrcVT == MVT::i8) 00352 return AArch64_AM::SXTB; 00353 else if (!IsLoadStore && SrcVT == MVT::i16) 00354 return AArch64_AM::SXTH; 00355 else if (SrcVT == MVT::i32) 00356 return AArch64_AM::SXTW; 00357 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 00358 00359 return AArch64_AM::InvalidShiftExtend; 00360 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 00361 N.getOpcode() == ISD::ANY_EXTEND) { 00362 EVT SrcVT = N.getOperand(0).getValueType(); 00363 if (!IsLoadStore && SrcVT == MVT::i8) 00364 return AArch64_AM::UXTB; 00365 else if (!IsLoadStore && SrcVT == MVT::i16) 00366 return AArch64_AM::UXTH; 00367 else if (SrcVT == MVT::i32) 00368 return AArch64_AM::UXTW; 00369 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 00370 00371 return AArch64_AM::InvalidShiftExtend; 00372 } else if (N.getOpcode() == ISD::AND) { 00373 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 00374 if (!CSD) 00375 return AArch64_AM::InvalidShiftExtend; 00376 uint64_t AndMask = CSD->getZExtValue(); 00377 00378 switch (AndMask) { 00379 default: 00380 return AArch64_AM::InvalidShiftExtend; 00381 case 0xFF: 00382 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 00383 case 0xFFFF: 00384 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 00385 case 0xFFFFFFFF: 00386 return AArch64_AM::UXTW; 00387 } 00388 } 00389 00390 return AArch64_AM::InvalidShiftExtend; 00391 } 00392 00393 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. 00394 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { 00395 if (DL->getOpcode() != AArch64ISD::DUPLANE16 && 00396 DL->getOpcode() != AArch64ISD::DUPLANE32) 00397 return false; 00398 00399 SDValue SV = DL->getOperand(0); 00400 if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) 00401 return false; 00402 00403 SDValue EV = SV.getOperand(1); 00404 if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) 00405 return false; 00406 00407 ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); 00408 ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); 00409 LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); 00410 LaneOp = EV.getOperand(0); 00411 00412 return true; 00413 } 00414 00415 // Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a 00416 // high lane extract. 00417 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, 00418 SDValue &LaneOp, int &LaneIdx) { 00419 00420 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { 00421 std::swap(Op0, Op1); 00422 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) 00423 return false; 00424 } 00425 StdOp = Op1; 00426 return true; 00427 } 00428 00429 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand 00430 /// is a lane in the upper half of a 128-bit vector. Recognize and select this 00431 /// so that we don't emit unnecessary lane extracts. 00432 SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) { 00433 SDValue Op0 = N->getOperand(0); 00434 SDValue Op1 = N->getOperand(1); 00435 SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. 00436 SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. 00437 int LaneIdx = -1; // Will hold the lane index. 00438 00439 if (Op1.getOpcode() != ISD::MUL || 00440 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 00441 LaneIdx)) { 00442 std::swap(Op0, Op1); 00443 if (Op1.getOpcode() != ISD::MUL || 00444 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 00445 LaneIdx)) 00446 return nullptr; 00447 } 00448 00449 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); 00450 00451 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; 00452 00453 unsigned MLAOpc = ~0U; 00454 00455 switch (N->getSimpleValueType(0).SimpleTy) { 00456 default: 00457 llvm_unreachable("Unrecognized MLA."); 00458 case MVT::v4i16: 00459 MLAOpc = AArch64::MLAv4i16_indexed; 00460 break; 00461 case MVT::v8i16: 00462 MLAOpc = AArch64::MLAv8i16_indexed; 00463 break; 00464 case MVT::v2i32: 00465 MLAOpc = AArch64::MLAv2i32_indexed; 00466 break; 00467 case MVT::v4i32: 00468 MLAOpc = AArch64::MLAv4i32_indexed; 00469 break; 00470 } 00471 00472 return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops); 00473 } 00474 00475 SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) { 00476 SDValue SMULLOp0; 00477 SDValue SMULLOp1; 00478 int LaneIdx; 00479 00480 if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, 00481 LaneIdx)) 00482 return nullptr; 00483 00484 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); 00485 00486 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; 00487 00488 unsigned SMULLOpc = ~0U; 00489 00490 if (IntNo == Intrinsic::aarch64_neon_smull) { 00491 switch (N->getSimpleValueType(0).SimpleTy) { 00492 default: 00493 llvm_unreachable("Unrecognized SMULL."); 00494 case MVT::v4i32: 00495 SMULLOpc = AArch64::SMULLv4i16_indexed; 00496 break; 00497 case MVT::v2i64: 00498 SMULLOpc = AArch64::SMULLv2i32_indexed; 00499 break; 00500 } 00501 } else if (IntNo == Intrinsic::aarch64_neon_umull) { 00502 switch (N->getSimpleValueType(0).SimpleTy) { 00503 default: 00504 llvm_unreachable("Unrecognized SMULL."); 00505 case MVT::v4i32: 00506 SMULLOpc = AArch64::UMULLv4i16_indexed; 00507 break; 00508 case MVT::v2i64: 00509 SMULLOpc = AArch64::UMULLv2i32_indexed; 00510 break; 00511 } 00512 } else 00513 llvm_unreachable("Unrecognized intrinsic."); 00514 00515 return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops); 00516 } 00517 00518 /// Instructions that accept extend modifiers like UXTW expect the register 00519 /// being extended to be a GPR32, but the incoming DAG might be acting on a 00520 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 00521 /// this is the case. 00522 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 00523 if (N.getValueType() == MVT::i32) 00524 return N; 00525 00526 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); 00527 MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 00528 SDLoc(N), MVT::i32, N, SubReg); 00529 return SDValue(Node, 0); 00530 } 00531 00532 00533 /// SelectArithExtendedRegister - Select a "extended register" operand. This 00534 /// operand folds in an extend followed by an optional left shift. 00535 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 00536 SDValue &Shift) { 00537 unsigned ShiftVal = 0; 00538 AArch64_AM::ShiftExtendType Ext; 00539 00540 if (N.getOpcode() == ISD::SHL) { 00541 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 00542 if (!CSD) 00543 return false; 00544 ShiftVal = CSD->getZExtValue(); 00545 if (ShiftVal > 4) 00546 return false; 00547 00548 Ext = getExtendTypeForNode(N.getOperand(0)); 00549 if (Ext == AArch64_AM::InvalidShiftExtend) 00550 return false; 00551 00552 Reg = N.getOperand(0).getOperand(0); 00553 } else { 00554 Ext = getExtendTypeForNode(N); 00555 if (Ext == AArch64_AM::InvalidShiftExtend) 00556 return false; 00557 00558 Reg = N.getOperand(0); 00559 } 00560 00561 // AArch64 mandates that the RHS of the operation must use the smallest 00562 // register classs that could contain the size being extended from. Thus, 00563 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 00564 // there might not be an actual 32-bit value in the program. We can 00565 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 00566 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 00567 Reg = narrowIfNeeded(CurDAG, Reg); 00568 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32); 00569 return isWorthFolding(N); 00570 } 00571 00572 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 00573 /// immediate" address. The "Size" argument is the size in bytes of the memory 00574 /// reference, which determines the scale. 00575 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 00576 SDValue &Base, SDValue &OffImm) { 00577 const TargetLowering *TLI = getTargetLowering(); 00578 if (N.getOpcode() == ISD::FrameIndex) { 00579 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 00580 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 00581 OffImm = CurDAG->getTargetConstant(0, MVT::i64); 00582 return true; 00583 } 00584 00585 if (N.getOpcode() == AArch64ISD::ADDlow) { 00586 GlobalAddressSDNode *GAN = 00587 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 00588 Base = N.getOperand(0); 00589 OffImm = N.getOperand(1); 00590 if (!GAN) 00591 return true; 00592 00593 const GlobalValue *GV = GAN->getGlobal(); 00594 unsigned Alignment = GV->getAlignment(); 00595 const DataLayout *DL = TLI->getDataLayout(); 00596 Type *Ty = GV->getType()->getElementType(); 00597 if (Alignment == 0 && Ty->isSized() && !Subtarget->isTargetDarwin()) 00598 Alignment = DL->getABITypeAlignment(Ty); 00599 00600 if (Alignment >= Size) 00601 return true; 00602 } 00603 00604 if (CurDAG->isBaseWithConstantOffset(N)) { 00605 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 00606 int64_t RHSC = (int64_t)RHS->getZExtValue(); 00607 unsigned Scale = Log2_32(Size); 00608 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 00609 Base = N.getOperand(0); 00610 if (Base.getOpcode() == ISD::FrameIndex) { 00611 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 00612 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 00613 } 00614 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64); 00615 return true; 00616 } 00617 } 00618 } 00619 00620 // Before falling back to our general case, check if the unscaled 00621 // instructions can handle this. If so, that's preferable. 00622 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 00623 return false; 00624 00625 // Base only. The address will be materialized into a register before 00626 // the memory is accessed. 00627 // add x0, Xbase, #offset 00628 // ldr x0, [x0] 00629 Base = N; 00630 OffImm = CurDAG->getTargetConstant(0, MVT::i64); 00631 return true; 00632 } 00633 00634 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 00635 /// immediate" address. This should only match when there is an offset that 00636 /// is not valid for a scaled immediate addressing mode. The "Size" argument 00637 /// is the size in bytes of the memory reference, which is needed here to know 00638 /// what is valid for a scaled immediate. 00639 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 00640 SDValue &Base, 00641 SDValue &OffImm) { 00642 if (!CurDAG->isBaseWithConstantOffset(N)) 00643 return false; 00644 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 00645 int64_t RHSC = RHS->getSExtValue(); 00646 // If the offset is valid as a scaled immediate, don't match here. 00647 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && 00648 RHSC < (0x1000 << Log2_32(Size))) 00649 return false; 00650 if (RHSC >= -256 && RHSC < 256) { 00651 Base = N.getOperand(0); 00652 if (Base.getOpcode() == ISD::FrameIndex) { 00653 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 00654 const TargetLowering *TLI = getTargetLowering(); 00655 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 00656 } 00657 OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64); 00658 return true; 00659 } 00660 } 00661 return false; 00662 } 00663 00664 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 00665 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); 00666 SDValue ImpDef = SDValue( 00667 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64), 00668 0); 00669 MachineSDNode *Node = CurDAG->getMachineNode( 00670 TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg); 00671 return SDValue(Node, 0); 00672 } 00673 00674 /// \brief Check if the given SHL node (\p N), can be used to form an 00675 /// extended register for an addressing mode. 00676 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 00677 bool WantExtend, SDValue &Offset, 00678 SDValue &SignExtend) { 00679 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 00680 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 00681 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 00682 return false; 00683 00684 if (WantExtend) { 00685 AArch64_AM::ShiftExtendType Ext = 00686 getExtendTypeForNode(N.getOperand(0), true); 00687 if (Ext == AArch64_AM::InvalidShiftExtend) 00688 return false; 00689 00690 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 00691 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); 00692 } else { 00693 Offset = N.getOperand(0); 00694 SignExtend = CurDAG->getTargetConstant(0, MVT::i32); 00695 } 00696 00697 unsigned LegalShiftVal = Log2_32(Size); 00698 unsigned ShiftVal = CSD->getZExtValue(); 00699 00700 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 00701 return false; 00702 00703 if (isWorthFolding(N)) 00704 return true; 00705 00706 return false; 00707 } 00708 00709 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 00710 SDValue &Base, SDValue &Offset, 00711 SDValue &SignExtend, 00712 SDValue &DoShift) { 00713 if (N.getOpcode() != ISD::ADD) 00714 return false; 00715 SDValue LHS = N.getOperand(0); 00716 SDValue RHS = N.getOperand(1); 00717 00718 // We don't want to match immediate adds here, because they are better lowered 00719 // to the register-immediate addressing modes. 00720 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 00721 return false; 00722 00723 // Check if this particular node is reused in any non-memory related 00724 // operation. If yes, do not try to fold this node into the address 00725 // computation, since the computation will be kept. 00726 const SDNode *Node = N.getNode(); 00727 for (SDNode *UI : Node->uses()) { 00728 if (!isa<MemSDNode>(*UI)) 00729 return false; 00730 } 00731 00732 // Remember if it is worth folding N when it produces extended register. 00733 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 00734 00735 // Try to match a shifted extend on the RHS. 00736 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 00737 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 00738 Base = LHS; 00739 DoShift = CurDAG->getTargetConstant(true, MVT::i32); 00740 return true; 00741 } 00742 00743 // Try to match a shifted extend on the LHS. 00744 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 00745 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 00746 Base = RHS; 00747 DoShift = CurDAG->getTargetConstant(true, MVT::i32); 00748 return true; 00749 } 00750 00751 // There was no shift, whatever else we find. 00752 DoShift = CurDAG->getTargetConstant(false, MVT::i32); 00753 00754 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 00755 // Try to match an unshifted extend on the LHS. 00756 if (IsExtendedRegisterWorthFolding && 00757 (Ext = getExtendTypeForNode(LHS, true)) != 00758 AArch64_AM::InvalidShiftExtend) { 00759 Base = RHS; 00760 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 00761 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); 00762 if (isWorthFolding(LHS)) 00763 return true; 00764 } 00765 00766 // Try to match an unshifted extend on the RHS. 00767 if (IsExtendedRegisterWorthFolding && 00768 (Ext = getExtendTypeForNode(RHS, true)) != 00769 AArch64_AM::InvalidShiftExtend) { 00770 Base = LHS; 00771 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 00772 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); 00773 if (isWorthFolding(RHS)) 00774 return true; 00775 } 00776 00777 return false; 00778 } 00779 00780 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 00781 SDValue &Base, SDValue &Offset, 00782 SDValue &SignExtend, 00783 SDValue &DoShift) { 00784 if (N.getOpcode() != ISD::ADD) 00785 return false; 00786 SDValue LHS = N.getOperand(0); 00787 SDValue RHS = N.getOperand(1); 00788 00789 // We don't want to match immediate adds here, because they are better lowered 00790 // to the register-immediate addressing modes. 00791 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 00792 return false; 00793 00794 // Check if this particular node is reused in any non-memory related 00795 // operation. If yes, do not try to fold this node into the address 00796 // computation, since the computation will be kept. 00797 const SDNode *Node = N.getNode(); 00798 for (SDNode *UI : Node->uses()) { 00799 if (!isa<MemSDNode>(*UI)) 00800 return false; 00801 } 00802 00803 // Remember if it is worth folding N when it produces extended register. 00804 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 00805 00806 // Try to match a shifted extend on the RHS. 00807 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 00808 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 00809 Base = LHS; 00810 DoShift = CurDAG->getTargetConstant(true, MVT::i32); 00811 return true; 00812 } 00813 00814 // Try to match a shifted extend on the LHS. 00815 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 00816 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 00817 Base = RHS; 00818 DoShift = CurDAG->getTargetConstant(true, MVT::i32); 00819 return true; 00820 } 00821 00822 // Match any non-shifted, non-extend, non-immediate add expression. 00823 Base = LHS; 00824 Offset = RHS; 00825 SignExtend = CurDAG->getTargetConstant(false, MVT::i32); 00826 DoShift = CurDAG->getTargetConstant(false, MVT::i32); 00827 // Reg1 + Reg2 is free: no check needed. 00828 return true; 00829 } 00830 00831 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 00832 static unsigned RegClassIDs[] = { 00833 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 00834 static unsigned SubRegs[] = { AArch64::dsub0, AArch64::dsub1, 00835 AArch64::dsub2, AArch64::dsub3 }; 00836 00837 return createTuple(Regs, RegClassIDs, SubRegs); 00838 } 00839 00840 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 00841 static unsigned RegClassIDs[] = { 00842 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 00843 static unsigned SubRegs[] = { AArch64::qsub0, AArch64::qsub1, 00844 AArch64::qsub2, AArch64::qsub3 }; 00845 00846 return createTuple(Regs, RegClassIDs, SubRegs); 00847 } 00848 00849 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 00850 unsigned RegClassIDs[], 00851 unsigned SubRegs[]) { 00852 // There's no special register-class for a vector-list of 1 element: it's just 00853 // a vector. 00854 if (Regs.size() == 1) 00855 return Regs[0]; 00856 00857 assert(Regs.size() >= 2 && Regs.size() <= 4); 00858 00859 SDLoc DL(Regs[0].getNode()); 00860 00861 SmallVector<SDValue, 4> Ops; 00862 00863 // First operand of REG_SEQUENCE is the desired RegClass. 00864 Ops.push_back( 00865 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32)); 00866 00867 // Then we get pairs of source & subregister-position for the components. 00868 for (unsigned i = 0; i < Regs.size(); ++i) { 00869 Ops.push_back(Regs[i]); 00870 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32)); 00871 } 00872 00873 SDNode *N = 00874 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 00875 return SDValue(N, 0); 00876 } 00877 00878 SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, 00879 unsigned Opc, bool isExt) { 00880 SDLoc dl(N); 00881 EVT VT = N->getValueType(0); 00882 00883 unsigned ExtOff = isExt; 00884 00885 // Form a REG_SEQUENCE to force register allocation. 00886 unsigned Vec0Off = ExtOff + 1; 00887 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 00888 N->op_begin() + Vec0Off + NumVecs); 00889 SDValue RegSeq = createQTuple(Regs); 00890 00891 SmallVector<SDValue, 6> Ops; 00892 if (isExt) 00893 Ops.push_back(N->getOperand(1)); 00894 Ops.push_back(RegSeq); 00895 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 00896 return CurDAG->getMachineNode(Opc, dl, VT, Ops); 00897 } 00898 00899 SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { 00900 LoadSDNode *LD = cast<LoadSDNode>(N); 00901 if (LD->isUnindexed()) 00902 return nullptr; 00903 EVT VT = LD->getMemoryVT(); 00904 EVT DstVT = N->getValueType(0); 00905 ISD::MemIndexedMode AM = LD->getAddressingMode(); 00906 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 00907 00908 // We're not doing validity checking here. That was done when checking 00909 // if we should mark the load as indexed or not. We're just selecting 00910 // the right instruction. 00911 unsigned Opcode = 0; 00912 00913 ISD::LoadExtType ExtType = LD->getExtensionType(); 00914 bool InsertTo64 = false; 00915 if (VT == MVT::i64) 00916 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 00917 else if (VT == MVT::i32) { 00918 if (ExtType == ISD::NON_EXTLOAD) 00919 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 00920 else if (ExtType == ISD::SEXTLOAD) 00921 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 00922 else { 00923 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 00924 InsertTo64 = true; 00925 // The result of the load is only i32. It's the subreg_to_reg that makes 00926 // it into an i64. 00927 DstVT = MVT::i32; 00928 } 00929 } else if (VT == MVT::i16) { 00930 if (ExtType == ISD::SEXTLOAD) { 00931 if (DstVT == MVT::i64) 00932 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 00933 else 00934 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 00935 } else { 00936 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 00937 InsertTo64 = DstVT == MVT::i64; 00938 // The result of the load is only i32. It's the subreg_to_reg that makes 00939 // it into an i64. 00940 DstVT = MVT::i32; 00941 } 00942 } else if (VT == MVT::i8) { 00943 if (ExtType == ISD::SEXTLOAD) { 00944 if (DstVT == MVT::i64) 00945 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 00946 else 00947 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 00948 } else { 00949 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 00950 InsertTo64 = DstVT == MVT::i64; 00951 // The result of the load is only i32. It's the subreg_to_reg that makes 00952 // it into an i64. 00953 DstVT = MVT::i32; 00954 } 00955 } else if (VT == MVT::f32) { 00956 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 00957 } else if (VT == MVT::f64 || VT.is64BitVector()) { 00958 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 00959 } else if (VT.is128BitVector()) { 00960 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 00961 } else 00962 return nullptr; 00963 SDValue Chain = LD->getChain(); 00964 SDValue Base = LD->getBasePtr(); 00965 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 00966 int OffsetVal = (int)OffsetOp->getZExtValue(); 00967 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64); 00968 SDValue Ops[] = { Base, Offset, Chain }; 00969 SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, DstVT, 00970 MVT::Other, Ops); 00971 // Either way, we're replacing the node, so tell the caller that. 00972 Done = true; 00973 SDValue LoadedVal = SDValue(Res, 1); 00974 if (InsertTo64) { 00975 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); 00976 LoadedVal = 00977 SDValue(CurDAG->getMachineNode( 00978 AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64, 00979 CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg), 00980 0); 00981 } 00982 00983 ReplaceUses(SDValue(N, 0), LoadedVal); 00984 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 00985 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 00986 00987 return nullptr; 00988 } 00989 00990 SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, 00991 unsigned Opc, unsigned SubRegIdx) { 00992 SDLoc dl(N); 00993 EVT VT = N->getValueType(0); 00994 SDValue Chain = N->getOperand(0); 00995 00996 SmallVector<SDValue, 6> Ops; 00997 Ops.push_back(N->getOperand(2)); // Mem operand; 00998 Ops.push_back(Chain); 00999 01000 std::vector<EVT> ResTys; 01001 ResTys.push_back(MVT::Untyped); 01002 ResTys.push_back(MVT::Other); 01003 01004 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 01005 SDValue SuperReg = SDValue(Ld, 0); 01006 for (unsigned i = 0; i < NumVecs; ++i) 01007 ReplaceUses(SDValue(N, i), 01008 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 01009 01010 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 01011 return nullptr; 01012 } 01013 01014 SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 01015 unsigned Opc, unsigned SubRegIdx) { 01016 SDLoc dl(N); 01017 EVT VT = N->getValueType(0); 01018 SDValue Chain = N->getOperand(0); 01019 01020 SmallVector<SDValue, 6> Ops; 01021 Ops.push_back(N->getOperand(1)); // Mem operand 01022 Ops.push_back(N->getOperand(2)); // Incremental 01023 Ops.push_back(Chain); 01024 01025 std::vector<EVT> ResTys; 01026 ResTys.push_back(MVT::i64); // Type of the write back register 01027 ResTys.push_back(MVT::Untyped); 01028 ResTys.push_back(MVT::Other); 01029 01030 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 01031 01032 // Update uses of write back register 01033 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 01034 01035 // Update uses of vector list 01036 SDValue SuperReg = SDValue(Ld, 1); 01037 if (NumVecs == 1) 01038 ReplaceUses(SDValue(N, 0), SuperReg); 01039 else 01040 for (unsigned i = 0; i < NumVecs; ++i) 01041 ReplaceUses(SDValue(N, i), 01042 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 01043 01044 // Update the chain 01045 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 01046 return nullptr; 01047 } 01048 01049 SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 01050 unsigned Opc) { 01051 SDLoc dl(N); 01052 EVT VT = N->getOperand(2)->getValueType(0); 01053 01054 // Form a REG_SEQUENCE to force register allocation. 01055 bool Is128Bit = VT.getSizeInBits() == 128; 01056 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 01057 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 01058 01059 SmallVector<SDValue, 6> Ops; 01060 Ops.push_back(RegSeq); 01061 Ops.push_back(N->getOperand(NumVecs + 2)); 01062 Ops.push_back(N->getOperand(0)); 01063 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 01064 01065 return St; 01066 } 01067 01068 SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 01069 unsigned Opc) { 01070 SDLoc dl(N); 01071 EVT VT = N->getOperand(2)->getValueType(0); 01072 SmallVector<EVT, 2> ResTys; 01073 ResTys.push_back(MVT::i64); // Type of the write back register 01074 ResTys.push_back(MVT::Other); // Type for the Chain 01075 01076 // Form a REG_SEQUENCE to force register allocation. 01077 bool Is128Bit = VT.getSizeInBits() == 128; 01078 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 01079 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 01080 01081 SmallVector<SDValue, 6> Ops; 01082 Ops.push_back(RegSeq); 01083 Ops.push_back(N->getOperand(NumVecs + 1)); // base register 01084 Ops.push_back(N->getOperand(NumVecs + 2)); // Incremental 01085 Ops.push_back(N->getOperand(0)); // Chain 01086 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 01087 01088 return St; 01089 } 01090 01091 /// WidenVector - Given a value in the V64 register class, produce the 01092 /// equivalent value in the V128 register class. 01093 class WidenVector { 01094 SelectionDAG &DAG; 01095 01096 public: 01097 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 01098 01099 SDValue operator()(SDValue V64Reg) { 01100 EVT VT = V64Reg.getValueType(); 01101 unsigned NarrowSize = VT.getVectorNumElements(); 01102 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 01103 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 01104 SDLoc DL(V64Reg); 01105 01106 SDValue Undef = 01107 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 01108 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 01109 } 01110 }; 01111 01112 /// NarrowVector - Given a value in the V128 register class, produce the 01113 /// equivalent value in the V64 register class. 01114 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 01115 EVT VT = V128Reg.getValueType(); 01116 unsigned WideSize = VT.getVectorNumElements(); 01117 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 01118 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 01119 01120 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 01121 V128Reg); 01122 } 01123 01124 SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 01125 unsigned Opc) { 01126 SDLoc dl(N); 01127 EVT VT = N->getValueType(0); 01128 bool Narrow = VT.getSizeInBits() == 64; 01129 01130 // Form a REG_SEQUENCE to force register allocation. 01131 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 01132 01133 if (Narrow) 01134 std::transform(Regs.begin(), Regs.end(), Regs.begin(), 01135 WidenVector(*CurDAG)); 01136 01137 SDValue RegSeq = createQTuple(Regs); 01138 01139 std::vector<EVT> ResTys; 01140 ResTys.push_back(MVT::Untyped); 01141 ResTys.push_back(MVT::Other); 01142 01143 unsigned LaneNo = 01144 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 01145 01146 SmallVector<SDValue, 6> Ops; 01147 Ops.push_back(RegSeq); 01148 Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); 01149 Ops.push_back(N->getOperand(NumVecs + 3)); 01150 Ops.push_back(N->getOperand(0)); 01151 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 01152 SDValue SuperReg = SDValue(Ld, 0); 01153 01154 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 01155 static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, 01156 AArch64::qsub3 }; 01157 for (unsigned i = 0; i < NumVecs; ++i) { 01158 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 01159 if (Narrow) 01160 NV = NarrowVector(NV, *CurDAG); 01161 ReplaceUses(SDValue(N, i), NV); 01162 } 01163 01164 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 01165 01166 return Ld; 01167 } 01168 01169 SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 01170 unsigned Opc) { 01171 SDLoc dl(N); 01172 EVT VT = N->getValueType(0); 01173 bool Narrow = VT.getSizeInBits() == 64; 01174 01175 // Form a REG_SEQUENCE to force register allocation. 01176 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 01177 01178 if (Narrow) 01179 std::transform(Regs.begin(), Regs.end(), Regs.begin(), 01180 WidenVector(*CurDAG)); 01181 01182 SDValue RegSeq = createQTuple(Regs); 01183 01184 std::vector<EVT> ResTys; 01185 ResTys.push_back(MVT::i64); // Type of the write back register 01186 ResTys.push_back(MVT::Untyped); 01187 ResTys.push_back(MVT::Other); 01188 01189 unsigned LaneNo = 01190 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 01191 01192 SmallVector<SDValue, 6> Ops; 01193 Ops.push_back(RegSeq); 01194 Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); // Lane Number 01195 Ops.push_back(N->getOperand(NumVecs + 2)); // Base register 01196 Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental 01197 Ops.push_back(N->getOperand(0)); 01198 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 01199 01200 // Update uses of the write back register 01201 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 01202 01203 // Update uses of the vector list 01204 SDValue SuperReg = SDValue(Ld, 1); 01205 if (NumVecs == 1) { 01206 ReplaceUses(SDValue(N, 0), 01207 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 01208 } else { 01209 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 01210 static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, 01211 AArch64::qsub3 }; 01212 for (unsigned i = 0; i < NumVecs; ++i) { 01213 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 01214 SuperReg); 01215 if (Narrow) 01216 NV = NarrowVector(NV, *CurDAG); 01217 ReplaceUses(SDValue(N, i), NV); 01218 } 01219 } 01220 01221 // Update the Chain 01222 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 01223 01224 return Ld; 01225 } 01226 01227 SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 01228 unsigned Opc) { 01229 SDLoc dl(N); 01230 EVT VT = N->getOperand(2)->getValueType(0); 01231 bool Narrow = VT.getSizeInBits() == 64; 01232 01233 // Form a REG_SEQUENCE to force register allocation. 01234 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 01235 01236 if (Narrow) 01237 std::transform(Regs.begin(), Regs.end(), Regs.begin(), 01238 WidenVector(*CurDAG)); 01239 01240 SDValue RegSeq = createQTuple(Regs); 01241 01242 unsigned LaneNo = 01243 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 01244 01245 SmallVector<SDValue, 6> Ops; 01246 Ops.push_back(RegSeq); 01247 Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); 01248 Ops.push_back(N->getOperand(NumVecs + 3)); 01249 Ops.push_back(N->getOperand(0)); 01250 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 01251 01252 // Transfer memoperands. 01253 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 01254 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 01255 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); 01256 01257 return St; 01258 } 01259 01260 SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 01261 unsigned Opc) { 01262 SDLoc dl(N); 01263 EVT VT = N->getOperand(2)->getValueType(0); 01264 bool Narrow = VT.getSizeInBits() == 64; 01265 01266 // Form a REG_SEQUENCE to force register allocation. 01267 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 01268 01269 if (Narrow) 01270 std::transform(Regs.begin(), Regs.end(), Regs.begin(), 01271 WidenVector(*CurDAG)); 01272 01273 SDValue RegSeq = createQTuple(Regs); 01274 01275 SmallVector<EVT, 2> ResTys; 01276 ResTys.push_back(MVT::i64); // Type of the write back register 01277 ResTys.push_back(MVT::Other); 01278 01279 unsigned LaneNo = 01280 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 01281 01282 SmallVector<SDValue, 6> Ops; 01283 Ops.push_back(RegSeq); 01284 Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); 01285 Ops.push_back(N->getOperand(NumVecs + 2)); // Base Register 01286 Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental 01287 Ops.push_back(N->getOperand(0)); 01288 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 01289 01290 // Transfer memoperands. 01291 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 01292 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 01293 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); 01294 01295 return St; 01296 } 01297 01298 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 01299 unsigned &Opc, SDValue &Opd0, 01300 unsigned &LSB, unsigned &MSB, 01301 unsigned NumberOfIgnoredLowBits, 01302 bool BiggerPattern) { 01303 assert(N->getOpcode() == ISD::AND && 01304 "N must be a AND operation to call this function"); 01305 01306 EVT VT = N->getValueType(0); 01307 01308 // Here we can test the type of VT and return false when the type does not 01309 // match, but since it is done prior to that call in the current context 01310 // we turned that into an assert to avoid redundant code. 01311 assert((VT == MVT::i32 || VT == MVT::i64) && 01312 "Type checking must have been done before calling this function"); 01313 01314 // FIXME: simplify-demanded-bits in DAGCombine will probably have 01315 // changed the AND node to a 32-bit mask operation. We'll have to 01316 // undo that as part of the transform here if we want to catch all 01317 // the opportunities. 01318 // Currently the NumberOfIgnoredLowBits argument helps to recover 01319 // form these situations when matching bigger pattern (bitfield insert). 01320 01321 // For unsigned extracts, check for a shift right and mask 01322 uint64_t And_imm = 0; 01323 if (!isOpcWithIntImmediate(N, ISD::AND, And_imm)) 01324 return false; 01325 01326 const SDNode *Op0 = N->getOperand(0).getNode(); 01327 01328 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 01329 // simplified. Try to undo that 01330 And_imm |= (1 << NumberOfIgnoredLowBits) - 1; 01331 01332 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 01333 if (And_imm & (And_imm + 1)) 01334 return false; 01335 01336 bool ClampMSB = false; 01337 uint64_t Srl_imm = 0; 01338 // Handle the SRL + ANY_EXTEND case. 01339 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 01340 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) { 01341 // Extend the incoming operand of the SRL to 64-bit. 01342 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 01343 // Make sure to clamp the MSB so that we preserve the semantics of the 01344 // original operations. 01345 ClampMSB = true; 01346 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 01347 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 01348 Srl_imm)) { 01349 // If the shift result was truncated, we can still combine them. 01350 Opd0 = Op0->getOperand(0).getOperand(0); 01351 01352 // Use the type of SRL node. 01353 VT = Opd0->getValueType(0); 01354 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) { 01355 Opd0 = Op0->getOperand(0); 01356 } else if (BiggerPattern) { 01357 // Let's pretend a 0 shift right has been performed. 01358 // The resulting code will be at least as good as the original one 01359 // plus it may expose more opportunities for bitfield insert pattern. 01360 // FIXME: Currently we limit this to the bigger pattern, because 01361 // some optimizations expect AND and not UBFM 01362 Opd0 = N->getOperand(0); 01363 } else 01364 return false; 01365 01366 assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) && 01367 "bad amount in shift node!"); 01368 01369 LSB = Srl_imm; 01370 MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm) 01371 : CountTrailingOnes_64(And_imm)) - 01372 1; 01373 if (ClampMSB) 01374 // Since we're moving the extend before the right shift operation, we need 01375 // to clamp the MSB to make sure we don't shift in undefined bits instead of 01376 // the zeros which would get shifted in with the original right shift 01377 // operation. 01378 MSB = MSB > 31 ? 31 : MSB; 01379 01380 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 01381 return true; 01382 } 01383 01384 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, 01385 SDValue &Opd0, unsigned &LSB, 01386 unsigned &MSB) { 01387 // We are looking for the following pattern which basically extracts several 01388 // continuous bits from the source value and places it from the LSB of the 01389 // destination value, all other bits of the destination value or set to zero: 01390 // 01391 // Value2 = AND Value, MaskImm 01392 // SRL Value2, ShiftImm 01393 // 01394 // with MaskImm >> ShiftImm to search for the bit width. 01395 // 01396 // This gets selected into a single UBFM: 01397 // 01398 // UBFM Value, ShiftImm, BitWide + Srl_imm -1 01399 // 01400 01401 if (N->getOpcode() != ISD::SRL) 01402 return false; 01403 01404 uint64_t And_mask = 0; 01405 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask)) 01406 return false; 01407 01408 Opd0 = N->getOperand(0).getOperand(0); 01409 01410 uint64_t Srl_imm = 0; 01411 if (!isIntImmediate(N->getOperand(1), Srl_imm)) 01412 return false; 01413 01414 // Check whether we really have several bits extract here. 01415 unsigned BitWide = 64 - CountLeadingOnes_64(~(And_mask >> Srl_imm)); 01416 if (BitWide && isMask_64(And_mask >> Srl_imm)) { 01417 if (N->getValueType(0) == MVT::i32) 01418 Opc = AArch64::UBFMWri; 01419 else 01420 Opc = AArch64::UBFMXri; 01421 01422 LSB = Srl_imm; 01423 MSB = BitWide + Srl_imm - 1; 01424 return true; 01425 } 01426 01427 return false; 01428 } 01429 01430 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 01431 unsigned &LSB, unsigned &MSB, 01432 bool BiggerPattern) { 01433 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 01434 "N must be a SHR/SRA operation to call this function"); 01435 01436 EVT VT = N->getValueType(0); 01437 01438 // Here we can test the type of VT and return false when the type does not 01439 // match, but since it is done prior to that call in the current context 01440 // we turned that into an assert to avoid redundant code. 01441 assert((VT == MVT::i32 || VT == MVT::i64) && 01442 "Type checking must have been done before calling this function"); 01443 01444 // Check for AND + SRL doing several bits extract. 01445 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB, MSB)) 01446 return true; 01447 01448 // we're looking for a shift of a shift 01449 uint64_t Shl_imm = 0; 01450 uint64_t Trunc_bits = 0; 01451 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 01452 Opd0 = N->getOperand(0).getOperand(0); 01453 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 01454 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 01455 // We are looking for a shift of truncate. Truncate from i64 to i32 could 01456 // be considered as setting high 32 bits as zero. Our strategy here is to 01457 // always generate 64bit UBFM. This consistency will help the CSE pass 01458 // later find more redundancy. 01459 Opd0 = N->getOperand(0).getOperand(0); 01460 Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 01461 VT = Opd0->getValueType(0); 01462 assert(VT == MVT::i64 && "the promoted type should be i64"); 01463 } else if (BiggerPattern) { 01464 // Let's pretend a 0 shift left has been performed. 01465 // FIXME: Currently we limit this to the bigger pattern case, 01466 // because some optimizations expect AND and not UBFM 01467 Opd0 = N->getOperand(0); 01468 } else 01469 return false; 01470 01471 assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!"); 01472 uint64_t Srl_imm = 0; 01473 if (!isIntImmediate(N->getOperand(1), Srl_imm)) 01474 return false; 01475 01476 assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && 01477 "bad amount in shift node!"); 01478 // Note: The width operand is encoded as width-1. 01479 unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1; 01480 int sLSB = Srl_imm - Shl_imm; 01481 if (sLSB < 0) 01482 return false; 01483 LSB = sLSB; 01484 MSB = LSB + Width; 01485 // SRA requires a signed extraction 01486 if (VT == MVT::i32) 01487 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 01488 else 01489 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 01490 return true; 01491 } 01492 01493 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 01494 SDValue &Opd0, unsigned &LSB, unsigned &MSB, 01495 unsigned NumberOfIgnoredLowBits = 0, 01496 bool BiggerPattern = false) { 01497 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 01498 return false; 01499 01500 switch (N->getOpcode()) { 01501 default: 01502 if (!N->isMachineOpcode()) 01503 return false; 01504 break; 01505 case ISD::AND: 01506 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB, 01507 NumberOfIgnoredLowBits, BiggerPattern); 01508 case ISD::SRL: 01509 case ISD::SRA: 01510 return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern); 01511 } 01512 01513 unsigned NOpc = N->getMachineOpcode(); 01514 switch (NOpc) { 01515 default: 01516 return false; 01517 case AArch64::SBFMWri: 01518 case AArch64::UBFMWri: 01519 case AArch64::SBFMXri: 01520 case AArch64::UBFMXri: 01521 Opc = NOpc; 01522 Opd0 = N->getOperand(0); 01523 LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); 01524 MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); 01525 return true; 01526 } 01527 // Unreachable 01528 return false; 01529 } 01530 01531 SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) { 01532 unsigned Opc, LSB, MSB; 01533 SDValue Opd0; 01534 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB)) 01535 return nullptr; 01536 01537 EVT VT = N->getValueType(0); 01538 01539 // If the bit extract operation is 64bit but the original type is 32bit, we 01540 // need to add one EXTRACT_SUBREG. 01541 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 01542 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64), 01543 CurDAG->getTargetConstant(MSB, MVT::i64)}; 01544 01545 SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64); 01546 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); 01547 MachineSDNode *Node = 01548 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, 01549 SDValue(BFM, 0), SubReg); 01550 return Node; 01551 } 01552 01553 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT), 01554 CurDAG->getTargetConstant(MSB, VT)}; 01555 return CurDAG->SelectNodeTo(N, Opc, VT, Ops); 01556 } 01557 01558 /// Does DstMask form a complementary pair with the mask provided by 01559 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 01560 /// this asks whether DstMask zeroes precisely those bits that will be set by 01561 /// the other half. 01562 static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted, 01563 unsigned NumberOfIgnoredHighBits, EVT VT) { 01564 assert((VT == MVT::i32 || VT == MVT::i64) && 01565 "i32 or i64 mask type expected!"); 01566 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 01567 01568 APInt SignificantDstMask = APInt(BitWidth, DstMask); 01569 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 01570 01571 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 01572 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue(); 01573 } 01574 01575 // Look for bits that will be useful for later uses. 01576 // A bit is consider useless as soon as it is dropped and never used 01577 // before it as been dropped. 01578 // E.g., looking for useful bit of x 01579 // 1. y = x & 0x7 01580 // 2. z = y >> 2 01581 // After #1, x useful bits are 0x7, then the useful bits of x, live through 01582 // y. 01583 // After #2, the useful bits of x are 0x4. 01584 // However, if x is used on an unpredicatable instruction, then all its bits 01585 // are useful. 01586 // E.g. 01587 // 1. y = x & 0x7 01588 // 2. z = y >> 2 01589 // 3. str x, [@x] 01590 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 01591 01592 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 01593 unsigned Depth) { 01594 uint64_t Imm = 01595 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 01596 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 01597 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 01598 getUsefulBits(Op, UsefulBits, Depth + 1); 01599 } 01600 01601 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 01602 uint64_t Imm, uint64_t MSB, 01603 unsigned Depth) { 01604 // inherit the bitwidth value 01605 APInt OpUsefulBits(UsefulBits); 01606 OpUsefulBits = 1; 01607 01608 if (MSB >= Imm) { 01609 OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); 01610 --OpUsefulBits; 01611 // The interesting part will be in the lower part of the result 01612 getUsefulBits(Op, OpUsefulBits, Depth + 1); 01613 // The interesting part was starting at Imm in the argument 01614 OpUsefulBits = OpUsefulBits.shl(Imm); 01615 } else { 01616 OpUsefulBits = OpUsefulBits.shl(MSB + 1); 01617 --OpUsefulBits; 01618 // The interesting part will be shifted in the result 01619 OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm); 01620 getUsefulBits(Op, OpUsefulBits, Depth + 1); 01621 // The interesting part was at zero in the argument 01622 OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm); 01623 } 01624 01625 UsefulBits &= OpUsefulBits; 01626 } 01627 01628 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 01629 unsigned Depth) { 01630 uint64_t Imm = 01631 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 01632 uint64_t MSB = 01633 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 01634 01635 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 01636 } 01637 01638 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 01639 unsigned Depth) { 01640 uint64_t ShiftTypeAndValue = 01641 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 01642 APInt Mask(UsefulBits); 01643 Mask.clearAllBits(); 01644 Mask.flipAllBits(); 01645 01646 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 01647 // Shift Left 01648 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 01649 Mask = Mask.shl(ShiftAmt); 01650 getUsefulBits(Op, Mask, Depth + 1); 01651 Mask = Mask.lshr(ShiftAmt); 01652 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 01653 // Shift Right 01654 // We do not handle AArch64_AM::ASR, because the sign will change the 01655 // number of useful bits 01656 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 01657 Mask = Mask.lshr(ShiftAmt); 01658 getUsefulBits(Op, Mask, Depth + 1); 01659 Mask = Mask.shl(ShiftAmt); 01660 } else 01661 return; 01662 01663 UsefulBits &= Mask; 01664 } 01665 01666 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 01667 unsigned Depth) { 01668 uint64_t Imm = 01669 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 01670 uint64_t MSB = 01671 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 01672 01673 if (Op.getOperand(1) == Orig) 01674 return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 01675 01676 APInt OpUsefulBits(UsefulBits); 01677 OpUsefulBits = 1; 01678 01679 if (MSB >= Imm) { 01680 OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); 01681 --OpUsefulBits; 01682 UsefulBits &= ~OpUsefulBits; 01683 getUsefulBits(Op, UsefulBits, Depth + 1); 01684 } else { 01685 OpUsefulBits = OpUsefulBits.shl(MSB + 1); 01686 --OpUsefulBits; 01687 UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm)); 01688 getUsefulBits(Op, UsefulBits, Depth + 1); 01689 } 01690 } 01691 01692 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 01693 SDValue Orig, unsigned Depth) { 01694 01695 // Users of this node should have already been instruction selected 01696 // FIXME: Can we turn that into an assert? 01697 if (!UserNode->isMachineOpcode()) 01698 return; 01699 01700 switch (UserNode->getMachineOpcode()) { 01701 default: 01702 return; 01703 case AArch64::ANDSWri: 01704 case AArch64::ANDSXri: 01705 case AArch64::ANDWri: 01706 case AArch64::ANDXri: 01707 // We increment Depth only when we call the getUsefulBits 01708 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 01709 Depth); 01710 case AArch64::UBFMWri: 01711 case AArch64::UBFMXri: 01712 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 01713 01714 case AArch64::ORRWrs: 01715 case AArch64::ORRXrs: 01716 if (UserNode->getOperand(1) != Orig) 01717 return; 01718 return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 01719 Depth); 01720 case AArch64::BFMWri: 01721 case AArch64::BFMXri: 01722 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 01723 } 01724 } 01725 01726 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 01727 if (Depth >= 6) 01728 return; 01729 // Initialize UsefulBits 01730 if (!Depth) { 01731 unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits(); 01732 // At the beginning, assume every produced bits is useful 01733 UsefulBits = APInt(Bitwidth, 0); 01734 UsefulBits.flipAllBits(); 01735 } 01736 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 01737 01738 for (SDNode *Node : Op.getNode()->uses()) { 01739 // A use cannot produce useful bits 01740 APInt UsefulBitsForUse = APInt(UsefulBits); 01741 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 01742 UsersUsefulBits |= UsefulBitsForUse; 01743 } 01744 // UsefulBits contains the produced bits that are meaningful for the 01745 // current definition, thus a user cannot make a bit meaningful at 01746 // this point 01747 UsefulBits &= UsersUsefulBits; 01748 } 01749 01750 /// Create a machine node performing a notional SHL of Op by ShlAmount. If 01751 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 01752 /// 0, return Op unchanged. 01753 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 01754 if (ShlAmount == 0) 01755 return Op; 01756 01757 EVT VT = Op.getValueType(); 01758 unsigned BitWidth = VT.getSizeInBits(); 01759 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 01760 01761 SDNode *ShiftNode; 01762 if (ShlAmount > 0) { 01763 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 01764 ShiftNode = CurDAG->getMachineNode( 01765 UBFMOpc, SDLoc(Op), VT, Op, 01766 CurDAG->getTargetConstant(BitWidth - ShlAmount, VT), 01767 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, VT)); 01768 } else { 01769 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 01770 assert(ShlAmount < 0 && "expected right shift"); 01771 int ShrAmount = -ShlAmount; 01772 ShiftNode = CurDAG->getMachineNode( 01773 UBFMOpc, SDLoc(Op), VT, Op, CurDAG->getTargetConstant(ShrAmount, VT), 01774 CurDAG->getTargetConstant(BitWidth - 1, VT)); 01775 } 01776 01777 return SDValue(ShiftNode, 0); 01778 } 01779 01780 /// Does this tree qualify as an attempt to move a bitfield into position, 01781 /// essentially "(and (shl VAL, N), Mask)". 01782 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 01783 SDValue &Src, int &ShiftAmount, 01784 int &MaskWidth) { 01785 EVT VT = Op.getValueType(); 01786 unsigned BitWidth = VT.getSizeInBits(); 01787 (void)BitWidth; 01788 assert(BitWidth == 32 || BitWidth == 64); 01789 01790 APInt KnownZero, KnownOne; 01791 CurDAG->computeKnownBits(Op, KnownZero, KnownOne); 01792 01793 // Non-zero in the sense that they're not provably zero, which is the key 01794 // point if we want to use this value 01795 uint64_t NonZeroBits = (~KnownZero).getZExtValue(); 01796 01797 // Discard a constant AND mask if present. It's safe because the node will 01798 // already have been factored into the computeKnownBits calculation above. 01799 uint64_t AndImm; 01800 if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { 01801 assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0); 01802 Op = Op.getOperand(0); 01803 } 01804 01805 uint64_t ShlImm; 01806 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 01807 return false; 01808 Op = Op.getOperand(0); 01809 01810 if (!isShiftedMask_64(NonZeroBits)) 01811 return false; 01812 01813 ShiftAmount = countTrailingZeros(NonZeroBits); 01814 MaskWidth = CountTrailingOnes_64(NonZeroBits >> ShiftAmount); 01815 01816 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 01817 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 01818 // amount. 01819 Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); 01820 01821 return true; 01822 } 01823 01824 // Given a OR operation, check if we have the following pattern 01825 // ubfm c, b, imm, imm2 (or something that does the same jobs, see 01826 // isBitfieldExtractOp) 01827 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 01828 // countTrailingZeros(mask2) == imm2 - imm + 1 01829 // f = d | c 01830 // if yes, given reference arguments will be update so that one can replace 01831 // the OR instruction with: 01832 // f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2 01833 static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst, 01834 SDValue &Src, unsigned &ImmR, 01835 unsigned &ImmS, SelectionDAG *CurDAG) { 01836 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 01837 01838 // Set Opc 01839 EVT VT = N->getValueType(0); 01840 if (VT == MVT::i32) 01841 Opc = AArch64::BFMWri; 01842 else if (VT == MVT::i64) 01843 Opc = AArch64::BFMXri; 01844 else 01845 return false; 01846 01847 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 01848 // have the expected shape. Try to undo that. 01849 APInt UsefulBits; 01850 getUsefulBits(SDValue(N, 0), UsefulBits); 01851 01852 unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); 01853 unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); 01854 01855 // OR is commutative, check both possibilities (does llvm provide a 01856 // way to do that directely, e.g., via code matcher?) 01857 SDValue OrOpd1Val = N->getOperand(1); 01858 SDNode *OrOpd0 = N->getOperand(0).getNode(); 01859 SDNode *OrOpd1 = N->getOperand(1).getNode(); 01860 for (int i = 0; i < 2; 01861 ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) { 01862 unsigned BFXOpc; 01863 int DstLSB, Width; 01864 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 01865 NumberOfIgnoredLowBits, true)) { 01866 // Check that the returned opcode is compatible with the pattern, 01867 // i.e., same type and zero extended (U and not S) 01868 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 01869 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 01870 continue; 01871 01872 // Compute the width of the bitfield insertion 01873 DstLSB = 0; 01874 Width = ImmS - ImmR + 1; 01875 // FIXME: This constraint is to catch bitfield insertion we may 01876 // want to widen the pattern if we want to grab general bitfied 01877 // move case 01878 if (Width <= 0) 01879 continue; 01880 01881 // If the mask on the insertee is correct, we have a BFXIL operation. We 01882 // can share the ImmR and ImmS values from the already-computed UBFM. 01883 } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src, 01884 DstLSB, Width)) { 01885 ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 01886 ImmS = Width - 1; 01887 } else 01888 continue; 01889 01890 // Check the second part of the pattern 01891 EVT VT = OrOpd1->getValueType(0); 01892 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 01893 01894 // Compute the Known Zero for the candidate of the first operand. 01895 // This allows to catch more general case than just looking for 01896 // AND with imm. Indeed, simplify-demanded-bits may have removed 01897 // the AND instruction because it proves it was useless. 01898 APInt KnownZero, KnownOne; 01899 CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne); 01900 01901 // Check if there is enough room for the second operand to appear 01902 // in the first one 01903 APInt BitsToBeInserted = 01904 APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width); 01905 01906 if ((BitsToBeInserted & ~KnownZero) != 0) 01907 continue; 01908 01909 // Set the first operand 01910 uint64_t Imm; 01911 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 01912 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 01913 // In that case, we can eliminate the AND 01914 Dst = OrOpd1->getOperand(0); 01915 else 01916 // Maybe the AND has been removed by simplify-demanded-bits 01917 // or is useful because it discards more bits 01918 Dst = OrOpd1Val; 01919 01920 // both parts match 01921 return true; 01922 } 01923 01924 return false; 01925 } 01926 01927 SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) { 01928 if (N->getOpcode() != ISD::OR) 01929 return nullptr; 01930 01931 unsigned Opc; 01932 unsigned LSB, MSB; 01933 SDValue Opd0, Opd1; 01934 01935 if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG)) 01936 return nullptr; 01937 01938 EVT VT = N->getValueType(0); 01939 SDValue Ops[] = { Opd0, 01940 Opd1, 01941 CurDAG->getTargetConstant(LSB, VT), 01942 CurDAG->getTargetConstant(MSB, VT) }; 01943 return CurDAG->SelectNodeTo(N, Opc, VT, Ops); 01944 } 01945 01946 SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) { 01947 EVT VT = N->getValueType(0); 01948 unsigned Variant; 01949 unsigned Opc; 01950 unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr }; 01951 01952 if (VT == MVT::f32) { 01953 Variant = 0; 01954 } else if (VT == MVT::f64) { 01955 Variant = 1; 01956 } else 01957 return nullptr; // Unrecognized argument type. Fall back on default codegen. 01958 01959 // Pick the FRINTX variant needed to set the flags. 01960 unsigned FRINTXOpc = FRINTXOpcs[Variant]; 01961 01962 switch (N->getOpcode()) { 01963 default: 01964 return nullptr; // Unrecognized libm ISD node. Fall back on default codegen. 01965 case ISD::FCEIL: { 01966 unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr }; 01967 Opc = FRINTPOpcs[Variant]; 01968 break; 01969 } 01970 case ISD::FFLOOR: { 01971 unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr }; 01972 Opc = FRINTMOpcs[Variant]; 01973 break; 01974 } 01975 case ISD::FTRUNC: { 01976 unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr }; 01977 Opc = FRINTZOpcs[Variant]; 01978 break; 01979 } 01980 case ISD::FROUND: { 01981 unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr }; 01982 Opc = FRINTAOpcs[Variant]; 01983 break; 01984 } 01985 } 01986 01987 SDLoc dl(N); 01988 SDValue In = N->getOperand(0); 01989 SmallVector<SDValue, 2> Ops; 01990 Ops.push_back(In); 01991 01992 if (!TM.Options.UnsafeFPMath) { 01993 SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In); 01994 Ops.push_back(SDValue(FRINTX, 1)); 01995 } 01996 01997 return CurDAG->getMachineNode(Opc, dl, VT, Ops); 01998 } 01999 02000 bool 02001 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 02002 unsigned RegWidth) { 02003 APFloat FVal(0.0); 02004 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 02005 FVal = CN->getValueAPF(); 02006 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 02007 // Some otherwise illegal constants are allowed in this case. 02008 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 02009 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 02010 return false; 02011 02012 ConstantPoolSDNode *CN = 02013 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 02014 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 02015 } else 02016 return false; 02017 02018 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 02019 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 02020 // x-register. 02021 // 02022 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 02023 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 02024 // integers. 02025 bool IsExact; 02026 02027 // fbits is between 1 and 64 in the worst-case, which means the fmul 02028 // could have 2^64 as an actual operand. Need 65 bits of precision. 02029 APSInt IntVal(65, true); 02030 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 02031 02032 // N.b. isPowerOf2 also checks for > 0. 02033 if (!IsExact || !IntVal.isPowerOf2()) return false; 02034 unsigned FBits = IntVal.logBase2(); 02035 02036 // Checks above should have guaranteed that we haven't lost information in 02037 // finding FBits, but it must still be in range. 02038 if (FBits == 0 || FBits > RegWidth) return false; 02039 02040 FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32); 02041 return true; 02042 } 02043 02044 SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { 02045 // Dump information about the Node being selected 02046 DEBUG(errs() << "Selecting: "); 02047 DEBUG(Node->dump(CurDAG)); 02048 DEBUG(errs() << "\n"); 02049 02050 // If we have a custom node, we already have selected! 02051 if (Node->isMachineOpcode()) { 02052 DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 02053 Node->setNodeId(-1); 02054 return nullptr; 02055 } 02056 02057 // Few custom selection stuff. 02058 SDNode *ResNode = nullptr; 02059 EVT VT = Node->getValueType(0); 02060 02061 switch (Node->getOpcode()) { 02062 default: 02063 break; 02064 02065 case ISD::ADD: 02066 if (SDNode *I = SelectMLAV64LaneV128(Node)) 02067 return I; 02068 break; 02069 02070 case ISD::LOAD: { 02071 // Try to select as an indexed load. Fall through to normal processing 02072 // if we can't. 02073 bool Done = false; 02074 SDNode *I = SelectIndexedLoad(Node, Done); 02075 if (Done) 02076 return I; 02077 break; 02078 } 02079 02080 case ISD::SRL: 02081 case ISD::AND: 02082 case ISD::SRA: 02083 if (SDNode *I = SelectBitfieldExtractOp(Node)) 02084 return I; 02085 break; 02086 02087 case ISD::OR: 02088 if (SDNode *I = SelectBitfieldInsertOp(Node)) 02089 return I; 02090 break; 02091 02092 case ISD::EXTRACT_VECTOR_ELT: { 02093 // Extracting lane zero is a special case where we can just use a plain 02094 // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for 02095 // the rest of the compiler, especially the register allocator and copyi 02096 // propagation, to reason about, so is preferred when it's possible to 02097 // use it. 02098 ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1)); 02099 // Bail and use the default Select() for non-zero lanes. 02100 if (LaneNode->getZExtValue() != 0) 02101 break; 02102 // If the element type is not the same as the result type, likewise 02103 // bail and use the default Select(), as there's more to do than just 02104 // a cross-class COPY. This catches extracts of i8 and i16 elements 02105 // since they will need an explicit zext. 02106 if (VT != Node->getOperand(0).getValueType().getVectorElementType()) 02107 break; 02108 unsigned SubReg; 02109 switch (Node->getOperand(0) 02110 .getValueType() 02111 .getVectorElementType() 02112 .getSizeInBits()) { 02113 default: 02114 llvm_unreachable("Unexpected vector element type!"); 02115 case 64: 02116 SubReg = AArch64::dsub; 02117 break; 02118 case 32: 02119 SubReg = AArch64::ssub; 02120 break; 02121 case 16: 02122 SubReg = AArch64::hsub; 02123 break; 02124 case 8: 02125 llvm_unreachable("unexpected zext-requiring extract element!"); 02126 } 02127 SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT, 02128 Node->getOperand(0)); 02129 DEBUG(dbgs() << "ISEL: Custom selection!\n=> "); 02130 DEBUG(Extract->dumpr(CurDAG)); 02131 DEBUG(dbgs() << "\n"); 02132 return Extract.getNode(); 02133 } 02134 case ISD::Constant: { 02135 // Materialize zero constants as copies from WZR/XZR. This allows 02136 // the coalescer to propagate these into other instructions. 02137 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 02138 if (ConstNode->isNullValue()) { 02139 if (VT == MVT::i32) 02140 return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), 02141 AArch64::WZR, MVT::i32).getNode(); 02142 else if (VT == MVT::i64) 02143 return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), 02144 AArch64::XZR, MVT::i64).getNode(); 02145 } 02146 break; 02147 } 02148 02149 case ISD::FrameIndex: { 02150 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 02151 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 02152 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 02153 const TargetLowering *TLI = getTargetLowering(); 02154 SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); 02155 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), 02156 CurDAG->getTargetConstant(Shifter, MVT::i32) }; 02157 return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 02158 } 02159 case ISD::INTRINSIC_W_CHAIN: { 02160 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 02161 switch (IntNo) { 02162 default: 02163 break; 02164 case Intrinsic::aarch64_ldaxp: 02165 case Intrinsic::aarch64_ldxp: { 02166 unsigned Op = 02167 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 02168 SDValue MemAddr = Node->getOperand(2); 02169 SDLoc DL(Node); 02170 SDValue Chain = Node->getOperand(0); 02171 02172 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 02173 MVT::Other, MemAddr, Chain); 02174 02175 // Transfer memoperands. 02176 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 02177 MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 02178 cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); 02179 return Ld; 02180 } 02181 case Intrinsic::aarch64_stlxp: 02182 case Intrinsic::aarch64_stxp: { 02183 unsigned Op = 02184 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 02185 SDLoc DL(Node); 02186 SDValue Chain = Node->getOperand(0); 02187 SDValue ValLo = Node->getOperand(2); 02188 SDValue ValHi = Node->getOperand(3); 02189 SDValue MemAddr = Node->getOperand(4); 02190 02191 // Place arguments in the right order. 02192 SmallVector<SDValue, 7> Ops; 02193 Ops.push_back(ValLo); 02194 Ops.push_back(ValHi); 02195 Ops.push_back(MemAddr); 02196 Ops.push_back(Chain); 02197 02198 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 02199 // Transfer memoperands. 02200 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 02201 MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 02202 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); 02203 02204 return St; 02205 } 02206 case Intrinsic::aarch64_neon_ld1x2: 02207 if (VT == MVT::v8i8) 02208 return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 02209 else if (VT == MVT::v16i8) 02210 return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 02211 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02212 return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 02213 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02214 return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 02215 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02216 return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 02217 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02218 return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 02219 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02220 return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 02221 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02222 return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 02223 break; 02224 case Intrinsic::aarch64_neon_ld1x3: 02225 if (VT == MVT::v8i8) 02226 return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 02227 else if (VT == MVT::v16i8) 02228 return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 02229 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02230 return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 02231 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02232 return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 02233 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02234 return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 02235 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02236 return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 02237 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02238 return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 02239 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02240 return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 02241 break; 02242 case Intrinsic::aarch64_neon_ld1x4: 02243 if (VT == MVT::v8i8) 02244 return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 02245 else if (VT == MVT::v16i8) 02246 return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 02247 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02248 return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 02249 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02250 return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 02251 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02252 return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 02253 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02254 return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 02255 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02256 return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 02257 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02258 return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 02259 break; 02260 case Intrinsic::aarch64_neon_ld2: 02261 if (VT == MVT::v8i8) 02262 return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 02263 else if (VT == MVT::v16i8) 02264 return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 02265 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02266 return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 02267 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02268 return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 02269 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02270 return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 02271 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02272 return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 02273 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02274 return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 02275 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02276 return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 02277 break; 02278 case Intrinsic::aarch64_neon_ld3: 02279 if (VT == MVT::v8i8) 02280 return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 02281 else if (VT == MVT::v16i8) 02282 return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 02283 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02284 return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 02285 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02286 return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 02287 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02288 return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 02289 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02290 return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 02291 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02292 return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 02293 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02294 return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 02295 break; 02296 case Intrinsic::aarch64_neon_ld4: 02297 if (VT == MVT::v8i8) 02298 return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 02299 else if (VT == MVT::v16i8) 02300 return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 02301 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02302 return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 02303 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02304 return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 02305 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02306 return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 02307 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02308 return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 02309 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02310 return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 02311 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02312 return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 02313 break; 02314 case Intrinsic::aarch64_neon_ld2r: 02315 if (VT == MVT::v8i8) 02316 return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 02317 else if (VT == MVT::v16i8) 02318 return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 02319 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02320 return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 02321 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02322 return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 02323 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02324 return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 02325 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02326 return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 02327 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02328 return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 02329 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02330 return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 02331 break; 02332 case Intrinsic::aarch64_neon_ld3r: 02333 if (VT == MVT::v8i8) 02334 return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 02335 else if (VT == MVT::v16i8) 02336 return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 02337 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02338 return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 02339 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02340 return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 02341 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02342 return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 02343 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02344 return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 02345 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02346 return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 02347 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02348 return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 02349 break; 02350 case Intrinsic::aarch64_neon_ld4r: 02351 if (VT == MVT::v8i8) 02352 return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 02353 else if (VT == MVT::v16i8) 02354 return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 02355 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02356 return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 02357 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02358 return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 02359 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02360 return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 02361 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02362 return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 02363 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02364 return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 02365 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02366 return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 02367 break; 02368 case Intrinsic::aarch64_neon_ld2lane: 02369 if (VT == MVT::v16i8 || VT == MVT::v8i8) 02370 return SelectLoadLane(Node, 2, AArch64::LD2i8); 02371 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 02372 VT == MVT::v8f16) 02373 return SelectLoadLane(Node, 2, AArch64::LD2i16); 02374 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 02375 VT == MVT::v2f32) 02376 return SelectLoadLane(Node, 2, AArch64::LD2i32); 02377 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 02378 VT == MVT::v1f64) 02379 return SelectLoadLane(Node, 2, AArch64::LD2i64); 02380 break; 02381 case Intrinsic::aarch64_neon_ld3lane: 02382 if (VT == MVT::v16i8 || VT == MVT::v8i8) 02383 return SelectLoadLane(Node, 3, AArch64::LD3i8); 02384 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 02385 VT == MVT::v8f16) 02386 return SelectLoadLane(Node, 3, AArch64::LD3i16); 02387 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 02388 VT == MVT::v2f32) 02389 return SelectLoadLane(Node, 3, AArch64::LD3i32); 02390 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 02391 VT == MVT::v1f64) 02392 return SelectLoadLane(Node, 3, AArch64::LD3i64); 02393 break; 02394 case Intrinsic::aarch64_neon_ld4lane: 02395 if (VT == MVT::v16i8 || VT == MVT::v8i8) 02396 return SelectLoadLane(Node, 4, AArch64::LD4i8); 02397 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 02398 VT == MVT::v8f16) 02399 return SelectLoadLane(Node, 4, AArch64::LD4i16); 02400 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 02401 VT == MVT::v2f32) 02402 return SelectLoadLane(Node, 4, AArch64::LD4i32); 02403 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 02404 VT == MVT::v1f64) 02405 return SelectLoadLane(Node, 4, AArch64::LD4i64); 02406 break; 02407 } 02408 } break; 02409 case ISD::INTRINSIC_WO_CHAIN: { 02410 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 02411 switch (IntNo) { 02412 default: 02413 break; 02414 case Intrinsic::aarch64_neon_tbl2: 02415 return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two 02416 : AArch64::TBLv16i8Two, 02417 false); 02418 case Intrinsic::aarch64_neon_tbl3: 02419 return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 02420 : AArch64::TBLv16i8Three, 02421 false); 02422 case Intrinsic::aarch64_neon_tbl4: 02423 return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 02424 : AArch64::TBLv16i8Four, 02425 false); 02426 case Intrinsic::aarch64_neon_tbx2: 02427 return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two 02428 : AArch64::TBXv16i8Two, 02429 true); 02430 case Intrinsic::aarch64_neon_tbx3: 02431 return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 02432 : AArch64::TBXv16i8Three, 02433 true); 02434 case Intrinsic::aarch64_neon_tbx4: 02435 return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 02436 : AArch64::TBXv16i8Four, 02437 true); 02438 case Intrinsic::aarch64_neon_smull: 02439 case Intrinsic::aarch64_neon_umull: 02440 if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node)) 02441 return N; 02442 break; 02443 } 02444 break; 02445 } 02446 case ISD::INTRINSIC_VOID: { 02447 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 02448 if (Node->getNumOperands() >= 3) 02449 VT = Node->getOperand(2)->getValueType(0); 02450 switch (IntNo) { 02451 default: 02452 break; 02453 case Intrinsic::aarch64_neon_st1x2: { 02454 if (VT == MVT::v8i8) 02455 return SelectStore(Node, 2, AArch64::ST1Twov8b); 02456 else if (VT == MVT::v16i8) 02457 return SelectStore(Node, 2, AArch64::ST1Twov16b); 02458 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02459 return SelectStore(Node, 2, AArch64::ST1Twov4h); 02460 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02461 return SelectStore(Node, 2, AArch64::ST1Twov8h); 02462 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02463 return SelectStore(Node, 2, AArch64::ST1Twov2s); 02464 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02465 return SelectStore(Node, 2, AArch64::ST1Twov4s); 02466 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02467 return SelectStore(Node, 2, AArch64::ST1Twov2d); 02468 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02469 return SelectStore(Node, 2, AArch64::ST1Twov1d); 02470 break; 02471 } 02472 case Intrinsic::aarch64_neon_st1x3: { 02473 if (VT == MVT::v8i8) 02474 return SelectStore(Node, 3, AArch64::ST1Threev8b); 02475 else if (VT == MVT::v16i8) 02476 return SelectStore(Node, 3, AArch64::ST1Threev16b); 02477 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02478 return SelectStore(Node, 3, AArch64::ST1Threev4h); 02479 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02480 return SelectStore(Node, 3, AArch64::ST1Threev8h); 02481 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02482 return SelectStore(Node, 3, AArch64::ST1Threev2s); 02483 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02484 return SelectStore(Node, 3, AArch64::ST1Threev4s); 02485 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02486 return SelectStore(Node, 3, AArch64::ST1Threev2d); 02487 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02488 return SelectStore(Node, 3, AArch64::ST1Threev1d); 02489 break; 02490 } 02491 case Intrinsic::aarch64_neon_st1x4: { 02492 if (VT == MVT::v8i8) 02493 return SelectStore(Node, 4, AArch64::ST1Fourv8b); 02494 else if (VT == MVT::v16i8) 02495 return SelectStore(Node, 4, AArch64::ST1Fourv16b); 02496 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02497 return SelectStore(Node, 4, AArch64::ST1Fourv4h); 02498 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02499 return SelectStore(Node, 4, AArch64::ST1Fourv8h); 02500 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02501 return SelectStore(Node, 4, AArch64::ST1Fourv2s); 02502 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02503 return SelectStore(Node, 4, AArch64::ST1Fourv4s); 02504 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02505 return SelectStore(Node, 4, AArch64::ST1Fourv2d); 02506 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02507 return SelectStore(Node, 4, AArch64::ST1Fourv1d); 02508 break; 02509 } 02510 case Intrinsic::aarch64_neon_st2: { 02511 if (VT == MVT::v8i8) 02512 return SelectStore(Node, 2, AArch64::ST2Twov8b); 02513 else if (VT == MVT::v16i8) 02514 return SelectStore(Node, 2, AArch64::ST2Twov16b); 02515 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02516 return SelectStore(Node, 2, AArch64::ST2Twov4h); 02517 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02518 return SelectStore(Node, 2, AArch64::ST2Twov8h); 02519 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02520 return SelectStore(Node, 2, AArch64::ST2Twov2s); 02521 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02522 return SelectStore(Node, 2, AArch64::ST2Twov4s); 02523 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02524 return SelectStore(Node, 2, AArch64::ST2Twov2d); 02525 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02526 return SelectStore(Node, 2, AArch64::ST1Twov1d); 02527 break; 02528 } 02529 case Intrinsic::aarch64_neon_st3: { 02530 if (VT == MVT::v8i8) 02531 return SelectStore(Node, 3, AArch64::ST3Threev8b); 02532 else if (VT == MVT::v16i8) 02533 return SelectStore(Node, 3, AArch64::ST3Threev16b); 02534 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02535 return SelectStore(Node, 3, AArch64::ST3Threev4h); 02536 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02537 return SelectStore(Node, 3, AArch64::ST3Threev8h); 02538 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02539 return SelectStore(Node, 3, AArch64::ST3Threev2s); 02540 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02541 return SelectStore(Node, 3, AArch64::ST3Threev4s); 02542 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02543 return SelectStore(Node, 3, AArch64::ST3Threev2d); 02544 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02545 return SelectStore(Node, 3, AArch64::ST1Threev1d); 02546 break; 02547 } 02548 case Intrinsic::aarch64_neon_st4: { 02549 if (VT == MVT::v8i8) 02550 return SelectStore(Node, 4, AArch64::ST4Fourv8b); 02551 else if (VT == MVT::v16i8) 02552 return SelectStore(Node, 4, AArch64::ST4Fourv16b); 02553 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02554 return SelectStore(Node, 4, AArch64::ST4Fourv4h); 02555 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02556 return SelectStore(Node, 4, AArch64::ST4Fourv8h); 02557 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02558 return SelectStore(Node, 4, AArch64::ST4Fourv2s); 02559 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02560 return SelectStore(Node, 4, AArch64::ST4Fourv4s); 02561 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02562 return SelectStore(Node, 4, AArch64::ST4Fourv2d); 02563 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02564 return SelectStore(Node, 4, AArch64::ST1Fourv1d); 02565 break; 02566 } 02567 case Intrinsic::aarch64_neon_st2lane: { 02568 if (VT == MVT::v16i8 || VT == MVT::v8i8) 02569 return SelectStoreLane(Node, 2, AArch64::ST2i8); 02570 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 02571 VT == MVT::v8f16) 02572 return SelectStoreLane(Node, 2, AArch64::ST2i16); 02573 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 02574 VT == MVT::v2f32) 02575 return SelectStoreLane(Node, 2, AArch64::ST2i32); 02576 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 02577 VT == MVT::v1f64) 02578 return SelectStoreLane(Node, 2, AArch64::ST2i64); 02579 break; 02580 } 02581 case Intrinsic::aarch64_neon_st3lane: { 02582 if (VT == MVT::v16i8 || VT == MVT::v8i8) 02583 return SelectStoreLane(Node, 3, AArch64::ST3i8); 02584 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 02585 VT == MVT::v8f16) 02586 return SelectStoreLane(Node, 3, AArch64::ST3i16); 02587 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 02588 VT == MVT::v2f32) 02589 return SelectStoreLane(Node, 3, AArch64::ST3i32); 02590 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 02591 VT == MVT::v1f64) 02592 return SelectStoreLane(Node, 3, AArch64::ST3i64); 02593 break; 02594 } 02595 case Intrinsic::aarch64_neon_st4lane: { 02596 if (VT == MVT::v16i8 || VT == MVT::v8i8) 02597 return SelectStoreLane(Node, 4, AArch64::ST4i8); 02598 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 02599 VT == MVT::v8f16) 02600 return SelectStoreLane(Node, 4, AArch64::ST4i16); 02601 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 02602 VT == MVT::v2f32) 02603 return SelectStoreLane(Node, 4, AArch64::ST4i32); 02604 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 02605 VT == MVT::v1f64) 02606 return SelectStoreLane(Node, 4, AArch64::ST4i64); 02607 break; 02608 } 02609 } 02610 } 02611 case AArch64ISD::LD2post: { 02612 if (VT == MVT::v8i8) 02613 return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 02614 else if (VT == MVT::v16i8) 02615 return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 02616 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02617 return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 02618 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02619 return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 02620 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02621 return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 02622 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02623 return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 02624 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02625 return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 02626 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02627 return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 02628 break; 02629 } 02630 case AArch64ISD::LD3post: { 02631 if (VT == MVT::v8i8) 02632 return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 02633 else if (VT == MVT::v16i8) 02634 return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 02635 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02636 return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 02637 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02638 return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 02639 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02640 return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 02641 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02642 return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 02643 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02644 return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 02645 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02646 return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 02647 break; 02648 } 02649 case AArch64ISD::LD4post: { 02650 if (VT == MVT::v8i8) 02651 return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 02652 else if (VT == MVT::v16i8) 02653 return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 02654 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02655 return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 02656 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02657 return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 02658 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02659 return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 02660 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02661 return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 02662 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02663 return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 02664 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02665 return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 02666 break; 02667 } 02668 case AArch64ISD::LD1x2post: { 02669 if (VT == MVT::v8i8) 02670 return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 02671 else if (VT == MVT::v16i8) 02672 return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 02673 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02674 return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 02675 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02676 return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 02677 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02678 return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 02679 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02680 return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 02681 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02682 return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 02683 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02684 return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 02685 break; 02686 } 02687 case AArch64ISD::LD1x3post: { 02688 if (VT == MVT::v8i8) 02689 return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 02690 else if (VT == MVT::v16i8) 02691 return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 02692 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02693 return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 02694 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02695 return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 02696 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02697 return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 02698 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02699 return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 02700 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02701 return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 02702 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02703 return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 02704 break; 02705 } 02706 case AArch64ISD::LD1x4post: { 02707 if (VT == MVT::v8i8) 02708 return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 02709 else if (VT == MVT::v16i8) 02710 return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 02711 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02712 return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 02713 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02714 return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 02715 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02716 return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 02717 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02718 return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 02719 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02720 return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 02721 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02722 return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 02723 break; 02724 } 02725 case AArch64ISD::LD1DUPpost: { 02726 if (VT == MVT::v8i8) 02727 return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 02728 else if (VT == MVT::v16i8) 02729 return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 02730 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02731 return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 02732 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02733 return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 02734 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02735 return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 02736 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02737 return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 02738 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02739 return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 02740 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02741 return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 02742 break; 02743 } 02744 case AArch64ISD::LD2DUPpost: { 02745 if (VT == MVT::v8i8) 02746 return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 02747 else if (VT == MVT::v16i8) 02748 return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 02749 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02750 return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 02751 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02752 return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 02753 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02754 return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 02755 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02756 return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 02757 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02758 return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 02759 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02760 return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 02761 break; 02762 } 02763 case AArch64ISD::LD3DUPpost: { 02764 if (VT == MVT::v8i8) 02765 return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 02766 else if (VT == MVT::v16i8) 02767 return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 02768 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02769 return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 02770 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02771 return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 02772 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02773 return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 02774 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02775 return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 02776 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02777 return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 02778 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02779 return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 02780 break; 02781 } 02782 case AArch64ISD::LD4DUPpost: { 02783 if (VT == MVT::v8i8) 02784 return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 02785 else if (VT == MVT::v16i8) 02786 return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 02787 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02788 return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 02789 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02790 return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 02791 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02792 return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 02793 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02794 return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 02795 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02796 return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 02797 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02798 return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 02799 break; 02800 } 02801 case AArch64ISD::LD1LANEpost: { 02802 if (VT == MVT::v16i8 || VT == MVT::v8i8) 02803 return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 02804 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 02805 VT == MVT::v8f16) 02806 return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 02807 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 02808 VT == MVT::v2f32) 02809 return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 02810 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 02811 VT == MVT::v1f64) 02812 return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 02813 break; 02814 } 02815 case AArch64ISD::LD2LANEpost: { 02816 if (VT == MVT::v16i8 || VT == MVT::v8i8) 02817 return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 02818 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 02819 VT == MVT::v8f16) 02820 return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 02821 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 02822 VT == MVT::v2f32) 02823 return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 02824 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 02825 VT == MVT::v1f64) 02826 return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 02827 break; 02828 } 02829 case AArch64ISD::LD3LANEpost: { 02830 if (VT == MVT::v16i8 || VT == MVT::v8i8) 02831 return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 02832 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 02833 VT == MVT::v8f16) 02834 return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 02835 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 02836 VT == MVT::v2f32) 02837 return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 02838 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 02839 VT == MVT::v1f64) 02840 return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 02841 break; 02842 } 02843 case AArch64ISD::LD4LANEpost: { 02844 if (VT == MVT::v16i8 || VT == MVT::v8i8) 02845 return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 02846 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 02847 VT == MVT::v8f16) 02848 return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 02849 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 02850 VT == MVT::v2f32) 02851 return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 02852 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 02853 VT == MVT::v1f64) 02854 return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 02855 break; 02856 } 02857 case AArch64ISD::ST2post: { 02858 VT = Node->getOperand(1).getValueType(); 02859 if (VT == MVT::v8i8) 02860 return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 02861 else if (VT == MVT::v16i8) 02862 return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 02863 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02864 return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 02865 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02866 return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 02867 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02868 return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 02869 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02870 return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 02871 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02872 return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 02873 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02874 return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 02875 break; 02876 } 02877 case AArch64ISD::ST3post: { 02878 VT = Node->getOperand(1).getValueType(); 02879 if (VT == MVT::v8i8) 02880 return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 02881 else if (VT == MVT::v16i8) 02882 return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 02883 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02884 return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 02885 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02886 return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 02887 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02888 return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 02889 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02890 return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 02891 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02892 return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 02893 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02894 return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 02895 break; 02896 } 02897 case AArch64ISD::ST4post: { 02898 VT = Node->getOperand(1).getValueType(); 02899 if (VT == MVT::v8i8) 02900 return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 02901 else if (VT == MVT::v16i8) 02902 return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 02903 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02904 return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 02905 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02906 return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 02907 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02908 return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 02909 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02910 return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 02911 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02912 return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 02913 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02914 return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 02915 break; 02916 } 02917 case AArch64ISD::ST1x2post: { 02918 VT = Node->getOperand(1).getValueType(); 02919 if (VT == MVT::v8i8) 02920 return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 02921 else if (VT == MVT::v16i8) 02922 return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 02923 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02924 return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 02925 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02926 return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 02927 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02928 return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 02929 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02930 return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 02931 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02932 return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 02933 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02934 return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 02935 break; 02936 } 02937 case AArch64ISD::ST1x3post: { 02938 VT = Node->getOperand(1).getValueType(); 02939 if (VT == MVT::v8i8) 02940 return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 02941 else if (VT == MVT::v16i8) 02942 return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 02943 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02944 return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 02945 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02946 return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 02947 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02948 return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 02949 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02950 return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 02951 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02952 return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 02953 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02954 return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 02955 break; 02956 } 02957 case AArch64ISD::ST1x4post: { 02958 VT = Node->getOperand(1).getValueType(); 02959 if (VT == MVT::v8i8) 02960 return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 02961 else if (VT == MVT::v16i8) 02962 return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 02963 else if (VT == MVT::v4i16 || VT == MVT::v4f16) 02964 return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 02965 else if (VT == MVT::v8i16 || VT == MVT::v8f16) 02966 return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 02967 else if (VT == MVT::v2i32 || VT == MVT::v2f32) 02968 return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 02969 else if (VT == MVT::v4i32 || VT == MVT::v4f32) 02970 return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 02971 else if (VT == MVT::v1i64 || VT == MVT::v1f64) 02972 return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 02973 else if (VT == MVT::v2i64 || VT == MVT::v2f64) 02974 return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 02975 break; 02976 } 02977 case AArch64ISD::ST2LANEpost: { 02978 VT = Node->getOperand(1).getValueType(); 02979 if (VT == MVT::v16i8 || VT == MVT::v8i8) 02980 return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 02981 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 02982 VT == MVT::v8f16) 02983 return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 02984 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 02985 VT == MVT::v2f32) 02986 return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 02987 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 02988 VT == MVT::v1f64) 02989 return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 02990 break; 02991 } 02992 case AArch64ISD::ST3LANEpost: { 02993 VT = Node->getOperand(1).getValueType(); 02994 if (VT == MVT::v16i8 || VT == MVT::v8i8) 02995 return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 02996 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 02997 VT == MVT::v8f16) 02998 return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 02999 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 03000 VT == MVT::v2f32) 03001 return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 03002 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 03003 VT == MVT::v1f64) 03004 return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 03005 break; 03006 } 03007 case AArch64ISD::ST4LANEpost: { 03008 VT = Node->getOperand(1).getValueType(); 03009 if (VT == MVT::v16i8 || VT == MVT::v8i8) 03010 return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 03011 else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 03012 VT == MVT::v8f16) 03013 return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 03014 else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 03015 VT == MVT::v2f32) 03016 return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 03017 else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 03018 VT == MVT::v1f64) 03019 return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 03020 break; 03021 } 03022 03023 case ISD::FCEIL: 03024 case ISD::FFLOOR: 03025 case ISD::FTRUNC: 03026 case ISD::FROUND: 03027 if (SDNode *I = SelectLIBM(Node)) 03028 return I; 03029 break; 03030 } 03031 03032 // Select the default instruction 03033 ResNode = SelectCode(Node); 03034 03035 DEBUG(errs() << "=> "); 03036 if (ResNode == nullptr || ResNode == Node) 03037 DEBUG(Node->dump(CurDAG)); 03038 else 03039 DEBUG(ResNode->dump(CurDAG)); 03040 DEBUG(errs() << "\n"); 03041 03042 return ResNode; 03043 } 03044 03045 /// createAArch64ISelDag - This pass converts a legalized DAG into a 03046 /// AArch64-specific DAG, ready for instruction scheduling. 03047 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 03048 CodeGenOpt::Level OptLevel) { 03049 return new AArch64DAGToDAGISel(TM, OptLevel); 03050 }