LLVM API Documentation
00001 //===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines the PowerPC-specific support for the FastISel class. Some 00011 // of the target-specific code is generated by tablegen in the file 00012 // PPCGenFastISel.inc, which is #included here. 00013 // 00014 //===----------------------------------------------------------------------===// 00015 00016 #include "PPC.h" 00017 #include "MCTargetDesc/PPCPredicates.h" 00018 #include "PPCISelLowering.h" 00019 #include "PPCSubtarget.h" 00020 #include "PPCTargetMachine.h" 00021 #include "llvm/ADT/Optional.h" 00022 #include "llvm/CodeGen/CallingConvLower.h" 00023 #include "llvm/CodeGen/FastISel.h" 00024 #include "llvm/CodeGen/FunctionLoweringInfo.h" 00025 #include "llvm/CodeGen/MachineConstantPool.h" 00026 #include "llvm/CodeGen/MachineFrameInfo.h" 00027 #include "llvm/CodeGen/MachineInstrBuilder.h" 00028 #include "llvm/CodeGen/MachineRegisterInfo.h" 00029 #include "llvm/IR/CallingConv.h" 00030 #include "llvm/IR/GetElementPtrTypeIterator.h" 00031 #include "llvm/IR/GlobalAlias.h" 00032 #include "llvm/IR/GlobalVariable.h" 00033 #include "llvm/IR/IntrinsicInst.h" 00034 #include "llvm/IR/Operator.h" 00035 #include "llvm/Support/Debug.h" 00036 #include "llvm/Target/TargetLowering.h" 00037 #include "llvm/Target/TargetMachine.h" 00038 00039 //===----------------------------------------------------------------------===// 00040 // 00041 // TBD: 00042 // fastLowerArguments: Handle simple cases. 00043 // PPCMaterializeGV: Handle TLS. 00044 // SelectCall: Handle function pointers. 00045 // SelectCall: Handle multi-register return values. 00046 // SelectCall: Optimize away nops for local calls. 00047 // processCallArgs: Handle bit-converted arguments. 00048 // finishCall: Handle multi-register return values. 00049 // PPCComputeAddress: Handle parameter references as FrameIndex's. 00050 // PPCEmitCmp: Handle immediate as operand 1. 00051 // SelectCall: Handle small byval arguments. 00052 // SelectIntrinsicCall: Implement. 00053 // SelectSelect: Implement. 00054 // Consider factoring isTypeLegal into the base class. 00055 // Implement switches and jump tables. 00056 // 00057 //===----------------------------------------------------------------------===// 00058 using namespace llvm; 00059 00060 #define DEBUG_TYPE "ppcfastisel" 00061 00062 namespace { 00063 00064 typedef struct Address { 00065 enum { 00066 RegBase, 00067 FrameIndexBase 00068 } BaseType; 00069 00070 union { 00071 unsigned Reg; 00072 int FI; 00073 } Base; 00074 00075 long Offset; 00076 00077 // Innocuous defaults for our address. 00078 Address() 00079 : BaseType(RegBase), Offset(0) { 00080 Base.Reg = 0; 00081 } 00082 } Address; 00083 00084 class PPCFastISel final : public FastISel { 00085 00086 const TargetMachine &TM; 00087 const TargetInstrInfo &TII; 00088 const TargetLowering &TLI; 00089 const PPCSubtarget *PPCSubTarget; 00090 LLVMContext *Context; 00091 00092 public: 00093 explicit PPCFastISel(FunctionLoweringInfo &FuncInfo, 00094 const TargetLibraryInfo *LibInfo) 00095 : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()), 00096 TII(*TM.getSubtargetImpl()->getInstrInfo()), 00097 TLI(*TM.getSubtargetImpl()->getTargetLowering()), 00098 PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()), 00099 Context(&FuncInfo.Fn->getContext()) {} 00100 00101 // Backend specific FastISel code. 00102 private: 00103 bool fastSelectInstruction(const Instruction *I) override; 00104 unsigned fastMaterializeConstant(const Constant *C) override; 00105 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 00106 bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, 00107 const LoadInst *LI) override; 00108 bool fastLowerArguments() override; 00109 unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override; 00110 unsigned fastEmitInst_ri(unsigned MachineInstOpcode, 00111 const TargetRegisterClass *RC, 00112 unsigned Op0, bool Op0IsKill, 00113 uint64_t Imm); 00114 unsigned fastEmitInst_r(unsigned MachineInstOpcode, 00115 const TargetRegisterClass *RC, 00116 unsigned Op0, bool Op0IsKill); 00117 unsigned fastEmitInst_rr(unsigned MachineInstOpcode, 00118 const TargetRegisterClass *RC, 00119 unsigned Op0, bool Op0IsKill, 00120 unsigned Op1, bool Op1IsKill); 00121 00122 // Instruction selection routines. 00123 private: 00124 bool SelectLoad(const Instruction *I); 00125 bool SelectStore(const Instruction *I); 00126 bool SelectBranch(const Instruction *I); 00127 bool SelectIndirectBr(const Instruction *I); 00128 bool SelectFPExt(const Instruction *I); 00129 bool SelectFPTrunc(const Instruction *I); 00130 bool SelectIToFP(const Instruction *I, bool IsSigned); 00131 bool SelectFPToI(const Instruction *I, bool IsSigned); 00132 bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); 00133 bool SelectCall(const Instruction *I); 00134 bool SelectRet(const Instruction *I); 00135 bool SelectTrunc(const Instruction *I); 00136 bool SelectIntExt(const Instruction *I); 00137 00138 // Utility routines. 00139 private: 00140 bool isTypeLegal(Type *Ty, MVT &VT); 00141 bool isLoadTypeLegal(Type *Ty, MVT &VT); 00142 bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, 00143 bool isZExt, unsigned DestReg); 00144 bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, 00145 const TargetRegisterClass *RC, bool IsZExt = true, 00146 unsigned FP64LoadOpc = PPC::LFD); 00147 bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr); 00148 bool PPCComputeAddress(const Value *Obj, Address &Addr); 00149 void PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset, 00150 unsigned &IndexReg); 00151 bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 00152 unsigned DestReg, bool IsZExt); 00153 unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT); 00154 unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT); 00155 unsigned PPCMaterializeInt(const Constant *C, MVT VT, bool UseSExt = true); 00156 unsigned PPCMaterialize32BitInt(int64_t Imm, 00157 const TargetRegisterClass *RC); 00158 unsigned PPCMaterialize64BitInt(int64_t Imm, 00159 const TargetRegisterClass *RC); 00160 unsigned PPCMoveToIntReg(const Instruction *I, MVT VT, 00161 unsigned SrcReg, bool IsSigned); 00162 unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned); 00163 00164 // Call handling routines. 00165 private: 00166 bool processCallArgs(SmallVectorImpl<Value*> &Args, 00167 SmallVectorImpl<unsigned> &ArgRegs, 00168 SmallVectorImpl<MVT> &ArgVTs, 00169 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 00170 SmallVectorImpl<unsigned> &RegArgs, 00171 CallingConv::ID CC, 00172 unsigned &NumBytes, 00173 bool IsVarArg); 00174 void finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 00175 const Instruction *I, CallingConv::ID CC, 00176 unsigned &NumBytes, bool IsVarArg); 00177 CCAssignFn *usePPC32CCs(unsigned Flag); 00178 00179 private: 00180 #include "PPCGenFastISel.inc" 00181 00182 }; 00183 00184 } // end anonymous namespace 00185 00186 #include "PPCGenCallingConv.inc" 00187 00188 // Function whose sole purpose is to kill compiler warnings 00189 // stemming from unused functions included from PPCGenCallingConv.inc. 00190 CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) { 00191 if (Flag == 1) 00192 return CC_PPC32_SVR4; 00193 else if (Flag == 2) 00194 return CC_PPC32_SVR4_ByVal; 00195 else if (Flag == 3) 00196 return CC_PPC32_SVR4_VarArg; 00197 else 00198 return RetCC_PPC; 00199 } 00200 00201 static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) { 00202 switch (Pred) { 00203 // These are not representable with any single compare. 00204 case CmpInst::FCMP_FALSE: 00205 case CmpInst::FCMP_UEQ: 00206 case CmpInst::FCMP_UGT: 00207 case CmpInst::FCMP_UGE: 00208 case CmpInst::FCMP_ULT: 00209 case CmpInst::FCMP_ULE: 00210 case CmpInst::FCMP_UNE: 00211 case CmpInst::FCMP_TRUE: 00212 default: 00213 return Optional<PPC::Predicate>(); 00214 00215 case CmpInst::FCMP_OEQ: 00216 case CmpInst::ICMP_EQ: 00217 return PPC::PRED_EQ; 00218 00219 case CmpInst::FCMP_OGT: 00220 case CmpInst::ICMP_UGT: 00221 case CmpInst::ICMP_SGT: 00222 return PPC::PRED_GT; 00223 00224 case CmpInst::FCMP_OGE: 00225 case CmpInst::ICMP_UGE: 00226 case CmpInst::ICMP_SGE: 00227 return PPC::PRED_GE; 00228 00229 case CmpInst::FCMP_OLT: 00230 case CmpInst::ICMP_ULT: 00231 case CmpInst::ICMP_SLT: 00232 return PPC::PRED_LT; 00233 00234 case CmpInst::FCMP_OLE: 00235 case CmpInst::ICMP_ULE: 00236 case CmpInst::ICMP_SLE: 00237 return PPC::PRED_LE; 00238 00239 case CmpInst::FCMP_ONE: 00240 case CmpInst::ICMP_NE: 00241 return PPC::PRED_NE; 00242 00243 case CmpInst::FCMP_ORD: 00244 return PPC::PRED_NU; 00245 00246 case CmpInst::FCMP_UNO: 00247 return PPC::PRED_UN; 00248 } 00249 } 00250 00251 // Determine whether the type Ty is simple enough to be handled by 00252 // fast-isel, and return its equivalent machine type in VT. 00253 // FIXME: Copied directly from ARM -- factor into base class? 00254 bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) { 00255 EVT Evt = TLI.getValueType(Ty, true); 00256 00257 // Only handle simple types. 00258 if (Evt == MVT::Other || !Evt.isSimple()) return false; 00259 VT = Evt.getSimpleVT(); 00260 00261 // Handle all legal types, i.e. a register that will directly hold this 00262 // value. 00263 return TLI.isTypeLegal(VT); 00264 } 00265 00266 // Determine whether the type Ty is simple enough to be handled by 00267 // fast-isel as a load target, and return its equivalent machine type in VT. 00268 bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { 00269 if (isTypeLegal(Ty, VT)) return true; 00270 00271 // If this is a type than can be sign or zero-extended to a basic operation 00272 // go ahead and accept it now. 00273 if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) { 00274 return true; 00275 } 00276 00277 return false; 00278 } 00279 00280 // Given a value Obj, create an Address object Addr that represents its 00281 // address. Return false if we can't handle it. 00282 bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { 00283 const User *U = nullptr; 00284 unsigned Opcode = Instruction::UserOp1; 00285 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 00286 // Don't walk into other basic blocks unless the object is an alloca from 00287 // another block, otherwise it may not have a virtual register assigned. 00288 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 00289 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 00290 Opcode = I->getOpcode(); 00291 U = I; 00292 } 00293 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 00294 Opcode = C->getOpcode(); 00295 U = C; 00296 } 00297 00298 switch (Opcode) { 00299 default: 00300 break; 00301 case Instruction::BitCast: 00302 // Look through bitcasts. 00303 return PPCComputeAddress(U->getOperand(0), Addr); 00304 case Instruction::IntToPtr: 00305 // Look past no-op inttoptrs. 00306 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 00307 return PPCComputeAddress(U->getOperand(0), Addr); 00308 break; 00309 case Instruction::PtrToInt: 00310 // Look past no-op ptrtoints. 00311 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 00312 return PPCComputeAddress(U->getOperand(0), Addr); 00313 break; 00314 case Instruction::GetElementPtr: { 00315 Address SavedAddr = Addr; 00316 long TmpOffset = Addr.Offset; 00317 00318 // Iterate through the GEP folding the constants into offsets where 00319 // we can. 00320 gep_type_iterator GTI = gep_type_begin(U); 00321 for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end(); 00322 II != IE; ++II, ++GTI) { 00323 const Value *Op = *II; 00324 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 00325 const StructLayout *SL = DL.getStructLayout(STy); 00326 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 00327 TmpOffset += SL->getElementOffset(Idx); 00328 } else { 00329 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 00330 for (;;) { 00331 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 00332 // Constant-offset addressing. 00333 TmpOffset += CI->getSExtValue() * S; 00334 break; 00335 } 00336 if (canFoldAddIntoGEP(U, Op)) { 00337 // A compatible add with a constant operand. Fold the constant. 00338 ConstantInt *CI = 00339 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 00340 TmpOffset += CI->getSExtValue() * S; 00341 // Iterate on the other operand. 00342 Op = cast<AddOperator>(Op)->getOperand(0); 00343 continue; 00344 } 00345 // Unsupported 00346 goto unsupported_gep; 00347 } 00348 } 00349 } 00350 00351 // Try to grab the base operand now. 00352 Addr.Offset = TmpOffset; 00353 if (PPCComputeAddress(U->getOperand(0), Addr)) return true; 00354 00355 // We failed, restore everything and try the other options. 00356 Addr = SavedAddr; 00357 00358 unsupported_gep: 00359 break; 00360 } 00361 case Instruction::Alloca: { 00362 const AllocaInst *AI = cast<AllocaInst>(Obj); 00363 DenseMap<const AllocaInst*, int>::iterator SI = 00364 FuncInfo.StaticAllocaMap.find(AI); 00365 if (SI != FuncInfo.StaticAllocaMap.end()) { 00366 Addr.BaseType = Address::FrameIndexBase; 00367 Addr.Base.FI = SI->second; 00368 return true; 00369 } 00370 break; 00371 } 00372 } 00373 00374 // FIXME: References to parameters fall through to the behavior 00375 // below. They should be able to reference a frame index since 00376 // they are stored to the stack, so we can get "ld rx, offset(r1)" 00377 // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will 00378 // just contain the parameter. Try to handle this with a FI. 00379 00380 // Try to get this in a register if nothing else has worked. 00381 if (Addr.Base.Reg == 0) 00382 Addr.Base.Reg = getRegForValue(Obj); 00383 00384 // Prevent assignment of base register to X0, which is inappropriate 00385 // for loads and stores alike. 00386 if (Addr.Base.Reg != 0) 00387 MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass); 00388 00389 return Addr.Base.Reg != 0; 00390 } 00391 00392 // Fix up some addresses that can't be used directly. For example, if 00393 // an offset won't fit in an instruction field, we may need to move it 00394 // into an index register. 00395 void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset, 00396 unsigned &IndexReg) { 00397 00398 // Check whether the offset fits in the instruction field. 00399 if (!isInt<16>(Addr.Offset)) 00400 UseOffset = false; 00401 00402 // If this is a stack pointer and the offset needs to be simplified then 00403 // put the alloca address into a register, set the base type back to 00404 // register and continue. This should almost never happen. 00405 if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) { 00406 unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); 00407 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8), 00408 ResultReg).addFrameIndex(Addr.Base.FI).addImm(0); 00409 Addr.Base.Reg = ResultReg; 00410 Addr.BaseType = Address::RegBase; 00411 } 00412 00413 if (!UseOffset) { 00414 IntegerType *OffsetTy = ((VT == MVT::i32) ? Type::getInt32Ty(*Context) 00415 : Type::getInt64Ty(*Context)); 00416 const ConstantInt *Offset = 00417 ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset)); 00418 IndexReg = PPCMaterializeInt(Offset, MVT::i64); 00419 assert(IndexReg && "Unexpected error in PPCMaterializeInt!"); 00420 } 00421 } 00422 00423 // Emit a load instruction if possible, returning true if we succeeded, 00424 // otherwise false. See commentary below for how the register class of 00425 // the load is determined. 00426 bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, 00427 const TargetRegisterClass *RC, 00428 bool IsZExt, unsigned FP64LoadOpc) { 00429 unsigned Opc; 00430 bool UseOffset = true; 00431 00432 // If ResultReg is given, it determines the register class of the load. 00433 // Otherwise, RC is the register class to use. If the result of the 00434 // load isn't anticipated in this block, both may be zero, in which 00435 // case we must make a conservative guess. In particular, don't assign 00436 // R0 or X0 to the result register, as the result may be used in a load, 00437 // store, add-immediate, or isel that won't permit this. (Though 00438 // perhaps the spill and reload of live-exit values would handle this?) 00439 const TargetRegisterClass *UseRC = 00440 (ResultReg ? MRI.getRegClass(ResultReg) : 00441 (RC ? RC : 00442 (VT == MVT::f64 ? &PPC::F8RCRegClass : 00443 (VT == MVT::f32 ? &PPC::F4RCRegClass : 00444 (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass : 00445 &PPC::GPRC_and_GPRC_NOR0RegClass))))); 00446 00447 bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass); 00448 00449 switch (VT.SimpleTy) { 00450 default: // e.g., vector types not handled 00451 return false; 00452 case MVT::i8: 00453 Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8; 00454 break; 00455 case MVT::i16: 00456 Opc = (IsZExt ? 00457 (Is32BitInt ? PPC::LHZ : PPC::LHZ8) : 00458 (Is32BitInt ? PPC::LHA : PPC::LHA8)); 00459 break; 00460 case MVT::i32: 00461 Opc = (IsZExt ? 00462 (Is32BitInt ? PPC::LWZ : PPC::LWZ8) : 00463 (Is32BitInt ? PPC::LWA_32 : PPC::LWA)); 00464 if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0)) 00465 UseOffset = false; 00466 break; 00467 case MVT::i64: 00468 Opc = PPC::LD; 00469 assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) && 00470 "64-bit load with 32-bit target??"); 00471 UseOffset = ((Addr.Offset & 3) == 0); 00472 break; 00473 case MVT::f32: 00474 Opc = PPC::LFS; 00475 break; 00476 case MVT::f64: 00477 Opc = FP64LoadOpc; 00478 break; 00479 } 00480 00481 // If necessary, materialize the offset into a register and use 00482 // the indexed form. Also handle stack pointers with special needs. 00483 unsigned IndexReg = 0; 00484 PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg); 00485 if (ResultReg == 0) 00486 ResultReg = createResultReg(UseRC); 00487 00488 // Note: If we still have a frame index here, we know the offset is 00489 // in range, as otherwise PPCSimplifyAddress would have converted it 00490 // into a RegBase. 00491 if (Addr.BaseType == Address::FrameIndexBase) { 00492 00493 MachineMemOperand *MMO = 00494 FuncInfo.MF->getMachineMemOperand( 00495 MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset), 00496 MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI), 00497 MFI.getObjectAlignment(Addr.Base.FI)); 00498 00499 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 00500 .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO); 00501 00502 // Base reg with offset in range. 00503 } else if (UseOffset) { 00504 00505 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 00506 .addImm(Addr.Offset).addReg(Addr.Base.Reg); 00507 00508 // Indexed form. 00509 } else { 00510 // Get the RR opcode corresponding to the RI one. FIXME: It would be 00511 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it 00512 // is hard to get at. 00513 switch (Opc) { 00514 default: llvm_unreachable("Unexpected opcode!"); 00515 case PPC::LBZ: Opc = PPC::LBZX; break; 00516 case PPC::LBZ8: Opc = PPC::LBZX8; break; 00517 case PPC::LHZ: Opc = PPC::LHZX; break; 00518 case PPC::LHZ8: Opc = PPC::LHZX8; break; 00519 case PPC::LHA: Opc = PPC::LHAX; break; 00520 case PPC::LHA8: Opc = PPC::LHAX8; break; 00521 case PPC::LWZ: Opc = PPC::LWZX; break; 00522 case PPC::LWZ8: Opc = PPC::LWZX8; break; 00523 case PPC::LWA: Opc = PPC::LWAX; break; 00524 case PPC::LWA_32: Opc = PPC::LWAX_32; break; 00525 case PPC::LD: Opc = PPC::LDX; break; 00526 case PPC::LFS: Opc = PPC::LFSX; break; 00527 case PPC::LFD: Opc = PPC::LFDX; break; 00528 } 00529 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 00530 .addReg(Addr.Base.Reg).addReg(IndexReg); 00531 } 00532 00533 return true; 00534 } 00535 00536 // Attempt to fast-select a load instruction. 00537 bool PPCFastISel::SelectLoad(const Instruction *I) { 00538 // FIXME: No atomic loads are supported. 00539 if (cast<LoadInst>(I)->isAtomic()) 00540 return false; 00541 00542 // Verify we have a legal type before going any further. 00543 MVT VT; 00544 if (!isLoadTypeLegal(I->getType(), VT)) 00545 return false; 00546 00547 // See if we can handle this address. 00548 Address Addr; 00549 if (!PPCComputeAddress(I->getOperand(0), Addr)) 00550 return false; 00551 00552 // Look at the currently assigned register for this instruction 00553 // to determine the required register class. This is necessary 00554 // to constrain RA from using R0/X0 when this is not legal. 00555 unsigned AssignedReg = FuncInfo.ValueMap[I]; 00556 const TargetRegisterClass *RC = 00557 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; 00558 00559 unsigned ResultReg = 0; 00560 if (!PPCEmitLoad(VT, ResultReg, Addr, RC)) 00561 return false; 00562 updateValueMap(I, ResultReg); 00563 return true; 00564 } 00565 00566 // Emit a store instruction to store SrcReg at Addr. 00567 bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { 00568 assert(SrcReg && "Nothing to store!"); 00569 unsigned Opc; 00570 bool UseOffset = true; 00571 00572 const TargetRegisterClass *RC = MRI.getRegClass(SrcReg); 00573 bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass); 00574 00575 switch (VT.SimpleTy) { 00576 default: // e.g., vector types not handled 00577 return false; 00578 case MVT::i8: 00579 Opc = Is32BitInt ? PPC::STB : PPC::STB8; 00580 break; 00581 case MVT::i16: 00582 Opc = Is32BitInt ? PPC::STH : PPC::STH8; 00583 break; 00584 case MVT::i32: 00585 assert(Is32BitInt && "Not GPRC for i32??"); 00586 Opc = PPC::STW; 00587 break; 00588 case MVT::i64: 00589 Opc = PPC::STD; 00590 UseOffset = ((Addr.Offset & 3) == 0); 00591 break; 00592 case MVT::f32: 00593 Opc = PPC::STFS; 00594 break; 00595 case MVT::f64: 00596 Opc = PPC::STFD; 00597 break; 00598 } 00599 00600 // If necessary, materialize the offset into a register and use 00601 // the indexed form. Also handle stack pointers with special needs. 00602 unsigned IndexReg = 0; 00603 PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg); 00604 00605 // Note: If we still have a frame index here, we know the offset is 00606 // in range, as otherwise PPCSimplifyAddress would have converted it 00607 // into a RegBase. 00608 if (Addr.BaseType == Address::FrameIndexBase) { 00609 MachineMemOperand *MMO = 00610 FuncInfo.MF->getMachineMemOperand( 00611 MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset), 00612 MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI), 00613 MFI.getObjectAlignment(Addr.Base.FI)); 00614 00615 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 00616 .addReg(SrcReg) 00617 .addImm(Addr.Offset) 00618 .addFrameIndex(Addr.Base.FI) 00619 .addMemOperand(MMO); 00620 00621 // Base reg with offset in range. 00622 } else if (UseOffset) 00623 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 00624 .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg); 00625 00626 // Indexed form. 00627 else { 00628 // Get the RR opcode corresponding to the RI one. FIXME: It would be 00629 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it 00630 // is hard to get at. 00631 switch (Opc) { 00632 default: llvm_unreachable("Unexpected opcode!"); 00633 case PPC::STB: Opc = PPC::STBX; break; 00634 case PPC::STH : Opc = PPC::STHX; break; 00635 case PPC::STW : Opc = PPC::STWX; break; 00636 case PPC::STB8: Opc = PPC::STBX8; break; 00637 case PPC::STH8: Opc = PPC::STHX8; break; 00638 case PPC::STW8: Opc = PPC::STWX8; break; 00639 case PPC::STD: Opc = PPC::STDX; break; 00640 case PPC::STFS: Opc = PPC::STFSX; break; 00641 case PPC::STFD: Opc = PPC::STFDX; break; 00642 } 00643 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 00644 .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg); 00645 } 00646 00647 return true; 00648 } 00649 00650 // Attempt to fast-select a store instruction. 00651 bool PPCFastISel::SelectStore(const Instruction *I) { 00652 Value *Op0 = I->getOperand(0); 00653 unsigned SrcReg = 0; 00654 00655 // FIXME: No atomics loads are supported. 00656 if (cast<StoreInst>(I)->isAtomic()) 00657 return false; 00658 00659 // Verify we have a legal type before going any further. 00660 MVT VT; 00661 if (!isLoadTypeLegal(Op0->getType(), VT)) 00662 return false; 00663 00664 // Get the value to be stored into a register. 00665 SrcReg = getRegForValue(Op0); 00666 if (SrcReg == 0) 00667 return false; 00668 00669 // See if we can handle this address. 00670 Address Addr; 00671 if (!PPCComputeAddress(I->getOperand(1), Addr)) 00672 return false; 00673 00674 if (!PPCEmitStore(VT, SrcReg, Addr)) 00675 return false; 00676 00677 return true; 00678 } 00679 00680 // Attempt to fast-select a branch instruction. 00681 bool PPCFastISel::SelectBranch(const Instruction *I) { 00682 const BranchInst *BI = cast<BranchInst>(I); 00683 MachineBasicBlock *BrBB = FuncInfo.MBB; 00684 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 00685 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 00686 00687 // For now, just try the simplest case where it's fed by a compare. 00688 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 00689 Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate()); 00690 if (!OptPPCPred) 00691 return false; 00692 00693 PPC::Predicate PPCPred = OptPPCPred.getValue(); 00694 00695 // Take advantage of fall-through opportunities. 00696 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 00697 std::swap(TBB, FBB); 00698 PPCPred = PPC::InvertPredicate(PPCPred); 00699 } 00700 00701 unsigned CondReg = createResultReg(&PPC::CRRCRegClass); 00702 00703 if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), 00704 CondReg)) 00705 return false; 00706 00707 BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC)) 00708 .addImm(PPCPred).addReg(CondReg).addMBB(TBB); 00709 fastEmitBranch(FBB, DbgLoc); 00710 FuncInfo.MBB->addSuccessor(TBB); 00711 return true; 00712 00713 } else if (const ConstantInt *CI = 00714 dyn_cast<ConstantInt>(BI->getCondition())) { 00715 uint64_t Imm = CI->getZExtValue(); 00716 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 00717 fastEmitBranch(Target, DbgLoc); 00718 return true; 00719 } 00720 00721 // FIXME: ARM looks for a case where the block containing the compare 00722 // has been split from the block containing the branch. If this happens, 00723 // there is a vreg available containing the result of the compare. I'm 00724 // not sure we can do much, as we've lost the predicate information with 00725 // the compare instruction -- we have a 4-bit CR but don't know which bit 00726 // to test here. 00727 return false; 00728 } 00729 00730 // Attempt to emit a compare of the two source values. Signed and unsigned 00731 // comparisons are supported. Return false if we can't handle it. 00732 bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, 00733 bool IsZExt, unsigned DestReg) { 00734 Type *Ty = SrcValue1->getType(); 00735 EVT SrcEVT = TLI.getValueType(Ty, true); 00736 if (!SrcEVT.isSimple()) 00737 return false; 00738 MVT SrcVT = SrcEVT.getSimpleVT(); 00739 00740 if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits()) 00741 return false; 00742 00743 // See if operand 2 is an immediate encodeable in the compare. 00744 // FIXME: Operands are not in canonical order at -O0, so an immediate 00745 // operand in position 1 is a lost opportunity for now. We are 00746 // similar to ARM in this regard. 00747 long Imm = 0; 00748 bool UseImm = false; 00749 00750 // Only 16-bit integer constants can be represented in compares for 00751 // PowerPC. Others will be materialized into a register. 00752 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) { 00753 if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 || 00754 SrcVT == MVT::i8 || SrcVT == MVT::i1) { 00755 const APInt &CIVal = ConstInt->getValue(); 00756 Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue(); 00757 if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm))) 00758 UseImm = true; 00759 } 00760 } 00761 00762 unsigned CmpOpc; 00763 bool NeedsExt = false; 00764 switch (SrcVT.SimpleTy) { 00765 default: return false; 00766 case MVT::f32: 00767 CmpOpc = PPC::FCMPUS; 00768 break; 00769 case MVT::f64: 00770 CmpOpc = PPC::FCMPUD; 00771 break; 00772 case MVT::i1: 00773 case MVT::i8: 00774 case MVT::i16: 00775 NeedsExt = true; 00776 // Intentional fall-through. 00777 case MVT::i32: 00778 if (!UseImm) 00779 CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW; 00780 else 00781 CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI; 00782 break; 00783 case MVT::i64: 00784 if (!UseImm) 00785 CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD; 00786 else 00787 CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI; 00788 break; 00789 } 00790 00791 unsigned SrcReg1 = getRegForValue(SrcValue1); 00792 if (SrcReg1 == 0) 00793 return false; 00794 00795 unsigned SrcReg2 = 0; 00796 if (!UseImm) { 00797 SrcReg2 = getRegForValue(SrcValue2); 00798 if (SrcReg2 == 0) 00799 return false; 00800 } 00801 00802 if (NeedsExt) { 00803 unsigned ExtReg = createResultReg(&PPC::GPRCRegClass); 00804 if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt)) 00805 return false; 00806 SrcReg1 = ExtReg; 00807 00808 if (!UseImm) { 00809 unsigned ExtReg = createResultReg(&PPC::GPRCRegClass); 00810 if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt)) 00811 return false; 00812 SrcReg2 = ExtReg; 00813 } 00814 } 00815 00816 if (!UseImm) 00817 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg) 00818 .addReg(SrcReg1).addReg(SrcReg2); 00819 else 00820 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg) 00821 .addReg(SrcReg1).addImm(Imm); 00822 00823 return true; 00824 } 00825 00826 // Attempt to fast-select a floating-point extend instruction. 00827 bool PPCFastISel::SelectFPExt(const Instruction *I) { 00828 Value *Src = I->getOperand(0); 00829 EVT SrcVT = TLI.getValueType(Src->getType(), true); 00830 EVT DestVT = TLI.getValueType(I->getType(), true); 00831 00832 if (SrcVT != MVT::f32 || DestVT != MVT::f64) 00833 return false; 00834 00835 unsigned SrcReg = getRegForValue(Src); 00836 if (!SrcReg) 00837 return false; 00838 00839 // No code is generated for a FP extend. 00840 updateValueMap(I, SrcReg); 00841 return true; 00842 } 00843 00844 // Attempt to fast-select a floating-point truncate instruction. 00845 bool PPCFastISel::SelectFPTrunc(const Instruction *I) { 00846 Value *Src = I->getOperand(0); 00847 EVT SrcVT = TLI.getValueType(Src->getType(), true); 00848 EVT DestVT = TLI.getValueType(I->getType(), true); 00849 00850 if (SrcVT != MVT::f64 || DestVT != MVT::f32) 00851 return false; 00852 00853 unsigned SrcReg = getRegForValue(Src); 00854 if (!SrcReg) 00855 return false; 00856 00857 // Round the result to single precision. 00858 unsigned DestReg = createResultReg(&PPC::F4RCRegClass); 00859 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg) 00860 .addReg(SrcReg); 00861 00862 updateValueMap(I, DestReg); 00863 return true; 00864 } 00865 00866 // Move an i32 or i64 value in a GPR to an f64 value in an FPR. 00867 // FIXME: When direct register moves are implemented (see PowerISA 2.08), 00868 // those should be used instead of moving via a stack slot when the 00869 // subtarget permits. 00870 // FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte 00871 // stack slot and 4-byte store/load sequence. Or just sext the 4-byte 00872 // case to 8 bytes which produces tighter code but wastes stack space. 00873 unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, 00874 bool IsSigned) { 00875 00876 // If necessary, extend 32-bit int to 64-bit. 00877 if (SrcVT == MVT::i32) { 00878 unsigned TmpReg = createResultReg(&PPC::G8RCRegClass); 00879 if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned)) 00880 return 0; 00881 SrcReg = TmpReg; 00882 } 00883 00884 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary. 00885 Address Addr; 00886 Addr.BaseType = Address::FrameIndexBase; 00887 Addr.Base.FI = MFI.CreateStackObject(8, 8, false); 00888 00889 // Store the value from the GPR. 00890 if (!PPCEmitStore(MVT::i64, SrcReg, Addr)) 00891 return 0; 00892 00893 // Load the integer value into an FPR. The kind of load used depends 00894 // on a number of conditions. 00895 unsigned LoadOpc = PPC::LFD; 00896 00897 if (SrcVT == MVT::i32) { 00898 if (!IsSigned) { 00899 LoadOpc = PPC::LFIWZX; 00900 Addr.Offset = 4; 00901 } else if (PPCSubTarget->hasLFIWAX()) { 00902 LoadOpc = PPC::LFIWAX; 00903 Addr.Offset = 4; 00904 } 00905 } 00906 00907 const TargetRegisterClass *RC = &PPC::F8RCRegClass; 00908 unsigned ResultReg = 0; 00909 if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc)) 00910 return 0; 00911 00912 return ResultReg; 00913 } 00914 00915 // Attempt to fast-select an integer-to-floating-point conversion. 00916 bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { 00917 MVT DstVT; 00918 Type *DstTy = I->getType(); 00919 if (!isTypeLegal(DstTy, DstVT)) 00920 return false; 00921 00922 if (DstVT != MVT::f32 && DstVT != MVT::f64) 00923 return false; 00924 00925 Value *Src = I->getOperand(0); 00926 EVT SrcEVT = TLI.getValueType(Src->getType(), true); 00927 if (!SrcEVT.isSimple()) 00928 return false; 00929 00930 MVT SrcVT = SrcEVT.getSimpleVT(); 00931 00932 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && 00933 SrcVT != MVT::i32 && SrcVT != MVT::i64) 00934 return false; 00935 00936 unsigned SrcReg = getRegForValue(Src); 00937 if (SrcReg == 0) 00938 return false; 00939 00940 // We can only lower an unsigned convert if we have the newer 00941 // floating-point conversion operations. 00942 if (!IsSigned && !PPCSubTarget->hasFPCVT()) 00943 return false; 00944 00945 // FIXME: For now we require the newer floating-point conversion operations 00946 // (which are present only on P7 and A2 server models) when converting 00947 // to single-precision float. Otherwise we have to generate a lot of 00948 // fiddly code to avoid double rounding. If necessary, the fiddly code 00949 // can be found in PPCTargetLowering::LowerINT_TO_FP(). 00950 if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT()) 00951 return false; 00952 00953 // Extend the input if necessary. 00954 if (SrcVT == MVT::i8 || SrcVT == MVT::i16) { 00955 unsigned TmpReg = createResultReg(&PPC::G8RCRegClass); 00956 if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned)) 00957 return false; 00958 SrcVT = MVT::i64; 00959 SrcReg = TmpReg; 00960 } 00961 00962 // Move the integer value to an FPR. 00963 unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned); 00964 if (FPReg == 0) 00965 return false; 00966 00967 // Determine the opcode for the conversion. 00968 const TargetRegisterClass *RC = &PPC::F8RCRegClass; 00969 unsigned DestReg = createResultReg(RC); 00970 unsigned Opc; 00971 00972 if (DstVT == MVT::f32) 00973 Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS; 00974 else 00975 Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU; 00976 00977 // Generate the convert. 00978 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) 00979 .addReg(FPReg); 00980 00981 updateValueMap(I, DestReg); 00982 return true; 00983 } 00984 00985 // Move the floating-point value in SrcReg into an integer destination 00986 // register, and return the register (or zero if we can't handle it). 00987 // FIXME: When direct register moves are implemented (see PowerISA 2.08), 00988 // those should be used instead of moving via a stack slot when the 00989 // subtarget permits. 00990 unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, 00991 unsigned SrcReg, bool IsSigned) { 00992 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary. 00993 // Note that if have STFIWX available, we could use a 4-byte stack 00994 // slot for i32, but this being fast-isel we'll just go with the 00995 // easiest code gen possible. 00996 Address Addr; 00997 Addr.BaseType = Address::FrameIndexBase; 00998 Addr.Base.FI = MFI.CreateStackObject(8, 8, false); 00999 01000 // Store the value from the FPR. 01001 if (!PPCEmitStore(MVT::f64, SrcReg, Addr)) 01002 return 0; 01003 01004 // Reload it into a GPR. If we want an i32, modify the address 01005 // to have a 4-byte offset so we load from the right place. 01006 if (VT == MVT::i32) 01007 Addr.Offset = 4; 01008 01009 // Look at the currently assigned register for this instruction 01010 // to determine the required register class. 01011 unsigned AssignedReg = FuncInfo.ValueMap[I]; 01012 const TargetRegisterClass *RC = 01013 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; 01014 01015 unsigned ResultReg = 0; 01016 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned)) 01017 return 0; 01018 01019 return ResultReg; 01020 } 01021 01022 // Attempt to fast-select a floating-point-to-integer conversion. 01023 bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { 01024 MVT DstVT, SrcVT; 01025 Type *DstTy = I->getType(); 01026 if (!isTypeLegal(DstTy, DstVT)) 01027 return false; 01028 01029 if (DstVT != MVT::i32 && DstVT != MVT::i64) 01030 return false; 01031 01032 // If we don't have FCTIDUZ and we need it, punt to SelectionDAG. 01033 if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT()) 01034 return false; 01035 01036 Value *Src = I->getOperand(0); 01037 Type *SrcTy = Src->getType(); 01038 if (!isTypeLegal(SrcTy, SrcVT)) 01039 return false; 01040 01041 if (SrcVT != MVT::f32 && SrcVT != MVT::f64) 01042 return false; 01043 01044 unsigned SrcReg = getRegForValue(Src); 01045 if (SrcReg == 0) 01046 return false; 01047 01048 // Convert f32 to f64 if necessary. This is just a meaningless copy 01049 // to get the register class right. COPY_TO_REGCLASS is needed since 01050 // a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream. 01051 const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg); 01052 if (InRC == &PPC::F4RCRegClass) { 01053 unsigned TmpReg = createResultReg(&PPC::F8RCRegClass); 01054 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01055 TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg) 01056 .addReg(SrcReg).addImm(PPC::F8RCRegClassID); 01057 SrcReg = TmpReg; 01058 } 01059 01060 // Determine the opcode for the conversion, which takes place 01061 // entirely within FPRs. 01062 unsigned DestReg = createResultReg(&PPC::F8RCRegClass); 01063 unsigned Opc; 01064 01065 if (DstVT == MVT::i32) 01066 if (IsSigned) 01067 Opc = PPC::FCTIWZ; 01068 else 01069 Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; 01070 else 01071 Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ; 01072 01073 // Generate the convert. 01074 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) 01075 .addReg(SrcReg); 01076 01077 // Now move the integer value from a float register to an integer register. 01078 unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned); 01079 if (IntReg == 0) 01080 return false; 01081 01082 updateValueMap(I, IntReg); 01083 return true; 01084 } 01085 01086 // Attempt to fast-select a binary integer operation that isn't already 01087 // handled automatically. 01088 bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { 01089 EVT DestVT = TLI.getValueType(I->getType(), true); 01090 01091 // We can get here in the case when we have a binary operation on a non-legal 01092 // type and the target independent selector doesn't know how to handle it. 01093 if (DestVT != MVT::i16 && DestVT != MVT::i8) 01094 return false; 01095 01096 // Look at the currently assigned register for this instruction 01097 // to determine the required register class. If there is no register, 01098 // make a conservative choice (don't assign R0). 01099 unsigned AssignedReg = FuncInfo.ValueMap[I]; 01100 const TargetRegisterClass *RC = 01101 (AssignedReg ? MRI.getRegClass(AssignedReg) : 01102 &PPC::GPRC_and_GPRC_NOR0RegClass); 01103 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass); 01104 01105 unsigned Opc; 01106 switch (ISDOpcode) { 01107 default: return false; 01108 case ISD::ADD: 01109 Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8; 01110 break; 01111 case ISD::OR: 01112 Opc = IsGPRC ? PPC::OR : PPC::OR8; 01113 break; 01114 case ISD::SUB: 01115 Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8; 01116 break; 01117 } 01118 01119 unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass); 01120 unsigned SrcReg1 = getRegForValue(I->getOperand(0)); 01121 if (SrcReg1 == 0) return false; 01122 01123 // Handle case of small immediate operand. 01124 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) { 01125 const APInt &CIVal = ConstInt->getValue(); 01126 int Imm = (int)CIVal.getSExtValue(); 01127 bool UseImm = true; 01128 if (isInt<16>(Imm)) { 01129 switch (Opc) { 01130 default: 01131 llvm_unreachable("Missing case!"); 01132 case PPC::ADD4: 01133 Opc = PPC::ADDI; 01134 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass); 01135 break; 01136 case PPC::ADD8: 01137 Opc = PPC::ADDI8; 01138 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass); 01139 break; 01140 case PPC::OR: 01141 Opc = PPC::ORI; 01142 break; 01143 case PPC::OR8: 01144 Opc = PPC::ORI8; 01145 break; 01146 case PPC::SUBF: 01147 if (Imm == -32768) 01148 UseImm = false; 01149 else { 01150 Opc = PPC::ADDI; 01151 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass); 01152 Imm = -Imm; 01153 } 01154 break; 01155 case PPC::SUBF8: 01156 if (Imm == -32768) 01157 UseImm = false; 01158 else { 01159 Opc = PPC::ADDI8; 01160 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass); 01161 Imm = -Imm; 01162 } 01163 break; 01164 } 01165 01166 if (UseImm) { 01167 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), 01168 ResultReg) 01169 .addReg(SrcReg1) 01170 .addImm(Imm); 01171 updateValueMap(I, ResultReg); 01172 return true; 01173 } 01174 } 01175 } 01176 01177 // Reg-reg case. 01178 unsigned SrcReg2 = getRegForValue(I->getOperand(1)); 01179 if (SrcReg2 == 0) return false; 01180 01181 // Reverse operands for subtract-from. 01182 if (ISDOpcode == ISD::SUB) 01183 std::swap(SrcReg1, SrcReg2); 01184 01185 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 01186 .addReg(SrcReg1).addReg(SrcReg2); 01187 updateValueMap(I, ResultReg); 01188 return true; 01189 } 01190 01191 // Handle arguments to a call that we're attempting to fast-select. 01192 // Return false if the arguments are too complex for us at the moment. 01193 bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, 01194 SmallVectorImpl<unsigned> &ArgRegs, 01195 SmallVectorImpl<MVT> &ArgVTs, 01196 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 01197 SmallVectorImpl<unsigned> &RegArgs, 01198 CallingConv::ID CC, 01199 unsigned &NumBytes, 01200 bool IsVarArg) { 01201 SmallVector<CCValAssign, 16> ArgLocs; 01202 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context); 01203 01204 // Reserve space for the linkage area on the stack. 01205 bool isELFv2ABI = PPCSubTarget->isELFv2ABI(); 01206 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, 01207 isELFv2ABI); 01208 CCInfo.AllocateStack(LinkageSize, 8); 01209 01210 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS); 01211 01212 // Bail out if we can't handle any of the arguments. 01213 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 01214 CCValAssign &VA = ArgLocs[I]; 01215 MVT ArgVT = ArgVTs[VA.getValNo()]; 01216 01217 // Skip vector arguments for now, as well as long double and 01218 // uint128_t, and anything that isn't passed in a register. 01219 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 || 01220 !VA.isRegLoc() || VA.needsCustom()) 01221 return false; 01222 01223 // Skip bit-converted arguments for now. 01224 if (VA.getLocInfo() == CCValAssign::BCvt) 01225 return false; 01226 } 01227 01228 // Get a count of how many bytes are to be pushed onto the stack. 01229 NumBytes = CCInfo.getNextStackOffset(); 01230 01231 // The prolog code of the callee may store up to 8 GPR argument registers to 01232 // the stack, allowing va_start to index over them in memory if its varargs. 01233 // Because we cannot tell if this is needed on the caller side, we have to 01234 // conservatively assume that it is needed. As such, make sure we have at 01235 // least enough stack space for the caller to store the 8 GPRs. 01236 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. 01237 NumBytes = std::max(NumBytes, LinkageSize + 64); 01238 01239 // Issue CALLSEQ_START. 01240 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01241 TII.get(TII.getCallFrameSetupOpcode())) 01242 .addImm(NumBytes); 01243 01244 // Prepare to assign register arguments. Every argument uses up a 01245 // GPR protocol register even if it's passed in a floating-point 01246 // register. 01247 unsigned NextGPR = PPC::X3; 01248 unsigned NextFPR = PPC::F1; 01249 01250 // Process arguments. 01251 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 01252 CCValAssign &VA = ArgLocs[I]; 01253 unsigned Arg = ArgRegs[VA.getValNo()]; 01254 MVT ArgVT = ArgVTs[VA.getValNo()]; 01255 01256 // Handle argument promotion and bitcasts. 01257 switch (VA.getLocInfo()) { 01258 default: 01259 llvm_unreachable("Unknown loc info!"); 01260 case CCValAssign::Full: 01261 break; 01262 case CCValAssign::SExt: { 01263 MVT DestVT = VA.getLocVT(); 01264 const TargetRegisterClass *RC = 01265 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 01266 unsigned TmpReg = createResultReg(RC); 01267 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false)) 01268 llvm_unreachable("Failed to emit a sext!"); 01269 ArgVT = DestVT; 01270 Arg = TmpReg; 01271 break; 01272 } 01273 case CCValAssign::AExt: 01274 case CCValAssign::ZExt: { 01275 MVT DestVT = VA.getLocVT(); 01276 const TargetRegisterClass *RC = 01277 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 01278 unsigned TmpReg = createResultReg(RC); 01279 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true)) 01280 llvm_unreachable("Failed to emit a zext!"); 01281 ArgVT = DestVT; 01282 Arg = TmpReg; 01283 break; 01284 } 01285 case CCValAssign::BCvt: { 01286 // FIXME: Not yet handled. 01287 llvm_unreachable("Should have bailed before getting here!"); 01288 break; 01289 } 01290 } 01291 01292 // Copy this argument to the appropriate register. 01293 unsigned ArgReg; 01294 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) { 01295 ArgReg = NextFPR++; 01296 ++NextGPR; 01297 } else 01298 ArgReg = NextGPR++; 01299 01300 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01301 TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg); 01302 RegArgs.push_back(ArgReg); 01303 } 01304 01305 return true; 01306 } 01307 01308 // For a call that we've determined we can fast-select, finish the 01309 // call sequence and generate a copy to obtain the return value (if any). 01310 void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 01311 const Instruction *I, CallingConv::ID CC, 01312 unsigned &NumBytes, bool IsVarArg) { 01313 // Issue CallSEQ_END. 01314 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01315 TII.get(TII.getCallFrameDestroyOpcode())) 01316 .addImm(NumBytes).addImm(0); 01317 01318 // Next, generate a copy to obtain the return value. 01319 // FIXME: No multi-register return values yet, though I don't foresee 01320 // any real difficulties there. 01321 if (RetVT != MVT::isVoid) { 01322 SmallVector<CCValAssign, 16> RVLocs; 01323 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context); 01324 CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); 01325 CCValAssign &VA = RVLocs[0]; 01326 assert(RVLocs.size() == 1 && "No support for multi-reg return values!"); 01327 assert(VA.isRegLoc() && "Can only return in registers!"); 01328 01329 MVT DestVT = VA.getValVT(); 01330 MVT CopyVT = DestVT; 01331 01332 // Ints smaller than a register still arrive in a full 64-bit 01333 // register, so make sure we recognize this. 01334 if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) 01335 CopyVT = MVT::i64; 01336 01337 unsigned SourcePhysReg = VA.getLocReg(); 01338 unsigned ResultReg = 0; 01339 01340 if (RetVT == CopyVT) { 01341 const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT); 01342 ResultReg = createResultReg(CpyRC); 01343 01344 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01345 TII.get(TargetOpcode::COPY), ResultReg) 01346 .addReg(SourcePhysReg); 01347 01348 // If necessary, round the floating result to single precision. 01349 } else if (CopyVT == MVT::f64) { 01350 ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); 01351 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), 01352 ResultReg).addReg(SourcePhysReg); 01353 01354 // If only the low half of a general register is needed, generate 01355 // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be 01356 // used along the fast-isel path (not lowered), and downstream logic 01357 // also doesn't like a direct subreg copy on a physical reg.) 01358 } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) { 01359 ResultReg = createResultReg(&PPC::GPRCRegClass); 01360 // Convert physical register from G8RC to GPRC. 01361 SourcePhysReg -= PPC::X0 - PPC::R0; 01362 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01363 TII.get(TargetOpcode::COPY), ResultReg) 01364 .addReg(SourcePhysReg); 01365 } 01366 01367 assert(ResultReg && "ResultReg unset!"); 01368 UsedRegs.push_back(SourcePhysReg); 01369 updateValueMap(I, ResultReg); 01370 } 01371 } 01372 01373 // Attempt to fast-select a call instruction. 01374 bool PPCFastISel::SelectCall(const Instruction *I) { 01375 const CallInst *CI = cast<CallInst>(I); 01376 const Value *Callee = CI->getCalledValue(); 01377 01378 // Can't handle inline asm. 01379 if (isa<InlineAsm>(Callee)) 01380 return false; 01381 01382 // Allow SelectionDAG isel to handle tail calls. 01383 if (CI->isTailCall()) 01384 return false; 01385 01386 // Obtain calling convention. 01387 ImmutableCallSite CS(CI); 01388 CallingConv::ID CC = CS.getCallingConv(); 01389 01390 PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 01391 FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 01392 bool IsVarArg = FTy->isVarArg(); 01393 01394 // Not ready for varargs yet. 01395 if (IsVarArg) 01396 return false; 01397 01398 // Handle simple calls for now, with legal return types and 01399 // those that can be extended. 01400 Type *RetTy = I->getType(); 01401 MVT RetVT; 01402 if (RetTy->isVoidTy()) 01403 RetVT = MVT::isVoid; 01404 else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 && 01405 RetVT != MVT::i8) 01406 return false; 01407 01408 // FIXME: No multi-register return values yet. 01409 if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 && 01410 RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 && 01411 RetVT != MVT::f64) { 01412 SmallVector<CCValAssign, 16> RVLocs; 01413 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context); 01414 CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); 01415 if (RVLocs.size() > 1) 01416 return false; 01417 } 01418 01419 // Bail early if more than 8 arguments, as we only currently 01420 // handle arguments passed in registers. 01421 unsigned NumArgs = CS.arg_size(); 01422 if (NumArgs > 8) 01423 return false; 01424 01425 // Set up the argument vectors. 01426 SmallVector<Value*, 8> Args; 01427 SmallVector<unsigned, 8> ArgRegs; 01428 SmallVector<MVT, 8> ArgVTs; 01429 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 01430 01431 Args.reserve(NumArgs); 01432 ArgRegs.reserve(NumArgs); 01433 ArgVTs.reserve(NumArgs); 01434 ArgFlags.reserve(NumArgs); 01435 01436 for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end(); 01437 II != IE; ++II) { 01438 // FIXME: ARM does something for intrinsic calls here, check into that. 01439 01440 unsigned AttrIdx = II - CS.arg_begin() + 1; 01441 01442 // Only handle easy calls for now. It would be reasonably easy 01443 // to handle <= 8-byte structures passed ByVal in registers, but we 01444 // have to ensure they are right-justified in the register. 01445 if (CS.paramHasAttr(AttrIdx, Attribute::InReg) || 01446 CS.paramHasAttr(AttrIdx, Attribute::StructRet) || 01447 CS.paramHasAttr(AttrIdx, Attribute::Nest) || 01448 CS.paramHasAttr(AttrIdx, Attribute::ByVal)) 01449 return false; 01450 01451 ISD::ArgFlagsTy Flags; 01452 if (CS.paramHasAttr(AttrIdx, Attribute::SExt)) 01453 Flags.setSExt(); 01454 if (CS.paramHasAttr(AttrIdx, Attribute::ZExt)) 01455 Flags.setZExt(); 01456 01457 Type *ArgTy = (*II)->getType(); 01458 MVT ArgVT; 01459 if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8) 01460 return false; 01461 01462 if (ArgVT.isVector()) 01463 return false; 01464 01465 unsigned Arg = getRegForValue(*II); 01466 if (Arg == 0) 01467 return false; 01468 01469 unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); 01470 Flags.setOrigAlign(OriginalAlignment); 01471 01472 Args.push_back(*II); 01473 ArgRegs.push_back(Arg); 01474 ArgVTs.push_back(ArgVT); 01475 ArgFlags.push_back(Flags); 01476 } 01477 01478 // Process the arguments. 01479 SmallVector<unsigned, 8> RegArgs; 01480 unsigned NumBytes; 01481 01482 if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, 01483 RegArgs, CC, NumBytes, IsVarArg)) 01484 return false; 01485 01486 // FIXME: No handling for function pointers yet. This requires 01487 // implementing the function descriptor (OPD) setup. 01488 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); 01489 if (!GV) 01490 return false; 01491 01492 // Build direct call with NOP for TOC restore. 01493 // FIXME: We can and should optimize away the NOP for local calls. 01494 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01495 TII.get(PPC::BL8_NOP)); 01496 // Add callee. 01497 MIB.addGlobalAddress(GV); 01498 01499 // Add implicit physical register uses to the call. 01500 for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II) 01501 MIB.addReg(RegArgs[II], RegState::Implicit); 01502 01503 // Direct calls in the ELFv2 ABI need the TOC register live into the call. 01504 if (PPCSubTarget->isELFv2ABI()) 01505 MIB.addReg(PPC::X2, RegState::Implicit); 01506 01507 // Add a register mask with the call-preserved registers. Proper 01508 // defs for return values will be added by setPhysRegsDeadExcept(). 01509 MIB.addRegMask(TRI.getCallPreservedMask(CC)); 01510 01511 // Finish off the call including any return values. 01512 SmallVector<unsigned, 4> UsedRegs; 01513 finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg); 01514 01515 // Set all unused physregs defs as dead. 01516 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 01517 01518 return true; 01519 } 01520 01521 // Attempt to fast-select a return instruction. 01522 bool PPCFastISel::SelectRet(const Instruction *I) { 01523 01524 if (!FuncInfo.CanLowerReturn) 01525 return false; 01526 01527 const ReturnInst *Ret = cast<ReturnInst>(I); 01528 const Function &F = *I->getParent()->getParent(); 01529 01530 // Build a list of return value registers. 01531 SmallVector<unsigned, 4> RetRegs; 01532 CallingConv::ID CC = F.getCallingConv(); 01533 01534 if (Ret->getNumOperands() > 0) { 01535 SmallVector<ISD::OutputArg, 4> Outs; 01536 GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); 01537 01538 // Analyze operands of the call, assigning locations to each operand. 01539 SmallVector<CCValAssign, 16> ValLocs; 01540 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context); 01541 CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS); 01542 const Value *RV = Ret->getOperand(0); 01543 01544 // FIXME: Only one output register for now. 01545 if (ValLocs.size() > 1) 01546 return false; 01547 01548 // Special case for returning a constant integer of any size. 01549 // Materialize the constant as an i64 and copy it to the return 01550 // register. We still need to worry about properly extending the sign. E.g: 01551 // If the constant has only one bit, it means it is a boolean. Therefore 01552 // we can't use PPCMaterializeInt because it extends the sign which will 01553 // cause negations of the returned value to be incorrect as they are 01554 // implemented as the flip of the least significant bit. 01555 if (isa<ConstantInt>(*RV)) { 01556 const Constant *C = cast<Constant>(RV); 01557 01558 CCValAssign &VA = ValLocs[0]; 01559 01560 unsigned RetReg = VA.getLocReg(); 01561 unsigned SrcReg = PPCMaterializeInt(C, MVT::i64, 01562 VA.getLocInfo() == CCValAssign::SExt); 01563 01564 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01565 TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg); 01566 01567 RetRegs.push_back(RetReg); 01568 01569 } else { 01570 unsigned Reg = getRegForValue(RV); 01571 01572 if (Reg == 0) 01573 return false; 01574 01575 // Copy the result values into the output registers. 01576 for (unsigned i = 0; i < ValLocs.size(); ++i) { 01577 01578 CCValAssign &VA = ValLocs[i]; 01579 assert(VA.isRegLoc() && "Can only return in registers!"); 01580 RetRegs.push_back(VA.getLocReg()); 01581 unsigned SrcReg = Reg + VA.getValNo(); 01582 01583 EVT RVEVT = TLI.getValueType(RV->getType()); 01584 if (!RVEVT.isSimple()) 01585 return false; 01586 MVT RVVT = RVEVT.getSimpleVT(); 01587 MVT DestVT = VA.getLocVT(); 01588 01589 if (RVVT != DestVT && RVVT != MVT::i8 && 01590 RVVT != MVT::i16 && RVVT != MVT::i32) 01591 return false; 01592 01593 if (RVVT != DestVT) { 01594 switch (VA.getLocInfo()) { 01595 default: 01596 llvm_unreachable("Unknown loc info!"); 01597 case CCValAssign::Full: 01598 llvm_unreachable("Full value assign but types don't match?"); 01599 case CCValAssign::AExt: 01600 case CCValAssign::ZExt: { 01601 const TargetRegisterClass *RC = 01602 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 01603 unsigned TmpReg = createResultReg(RC); 01604 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true)) 01605 return false; 01606 SrcReg = TmpReg; 01607 break; 01608 } 01609 case CCValAssign::SExt: { 01610 const TargetRegisterClass *RC = 01611 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 01612 unsigned TmpReg = createResultReg(RC); 01613 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false)) 01614 return false; 01615 SrcReg = TmpReg; 01616 break; 01617 } 01618 } 01619 } 01620 01621 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01622 TII.get(TargetOpcode::COPY), RetRegs[i]) 01623 .addReg(SrcReg); 01624 } 01625 } 01626 } 01627 01628 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01629 TII.get(PPC::BLR)); 01630 01631 for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) 01632 MIB.addReg(RetRegs[i], RegState::Implicit); 01633 01634 return true; 01635 } 01636 01637 // Attempt to emit an integer extend of SrcReg into DestReg. Both 01638 // signed and zero extensions are supported. Return false if we 01639 // can't handle it. 01640 bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 01641 unsigned DestReg, bool IsZExt) { 01642 if (DestVT != MVT::i32 && DestVT != MVT::i64) 01643 return false; 01644 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32) 01645 return false; 01646 01647 // Signed extensions use EXTSB, EXTSH, EXTSW. 01648 if (!IsZExt) { 01649 unsigned Opc; 01650 if (SrcVT == MVT::i8) 01651 Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64; 01652 else if (SrcVT == MVT::i16) 01653 Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64; 01654 else { 01655 assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??"); 01656 Opc = PPC::EXTSW_32_64; 01657 } 01658 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) 01659 .addReg(SrcReg); 01660 01661 // Unsigned 32-bit extensions use RLWINM. 01662 } else if (DestVT == MVT::i32) { 01663 unsigned MB; 01664 if (SrcVT == MVT::i8) 01665 MB = 24; 01666 else { 01667 assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??"); 01668 MB = 16; 01669 } 01670 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM), 01671 DestReg) 01672 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31); 01673 01674 // Unsigned 64-bit extensions use RLDICL (with a 32-bit source). 01675 } else { 01676 unsigned MB; 01677 if (SrcVT == MVT::i8) 01678 MB = 56; 01679 else if (SrcVT == MVT::i16) 01680 MB = 48; 01681 else 01682 MB = 32; 01683 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01684 TII.get(PPC::RLDICL_32_64), DestReg) 01685 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB); 01686 } 01687 01688 return true; 01689 } 01690 01691 // Attempt to fast-select an indirect branch instruction. 01692 bool PPCFastISel::SelectIndirectBr(const Instruction *I) { 01693 unsigned AddrReg = getRegForValue(I->getOperand(0)); 01694 if (AddrReg == 0) 01695 return false; 01696 01697 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8)) 01698 .addReg(AddrReg); 01699 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8)); 01700 01701 const IndirectBrInst *IB = cast<IndirectBrInst>(I); 01702 for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i) 01703 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]); 01704 01705 return true; 01706 } 01707 01708 // Attempt to fast-select an integer truncate instruction. 01709 bool PPCFastISel::SelectTrunc(const Instruction *I) { 01710 Value *Src = I->getOperand(0); 01711 EVT SrcVT = TLI.getValueType(Src->getType(), true); 01712 EVT DestVT = TLI.getValueType(I->getType(), true); 01713 01714 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16) 01715 return false; 01716 01717 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) 01718 return false; 01719 01720 unsigned SrcReg = getRegForValue(Src); 01721 if (!SrcReg) 01722 return false; 01723 01724 // The only interesting case is when we need to switch register classes. 01725 if (SrcVT == MVT::i64) { 01726 unsigned ResultReg = createResultReg(&PPC::GPRCRegClass); 01727 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01728 TII.get(TargetOpcode::COPY), 01729 ResultReg).addReg(SrcReg, 0, PPC::sub_32); 01730 SrcReg = ResultReg; 01731 } 01732 01733 updateValueMap(I, SrcReg); 01734 return true; 01735 } 01736 01737 // Attempt to fast-select an integer extend instruction. 01738 bool PPCFastISel::SelectIntExt(const Instruction *I) { 01739 Type *DestTy = I->getType(); 01740 Value *Src = I->getOperand(0); 01741 Type *SrcTy = Src->getType(); 01742 01743 bool IsZExt = isa<ZExtInst>(I); 01744 unsigned SrcReg = getRegForValue(Src); 01745 if (!SrcReg) return false; 01746 01747 EVT SrcEVT, DestEVT; 01748 SrcEVT = TLI.getValueType(SrcTy, true); 01749 DestEVT = TLI.getValueType(DestTy, true); 01750 if (!SrcEVT.isSimple()) 01751 return false; 01752 if (!DestEVT.isSimple()) 01753 return false; 01754 01755 MVT SrcVT = SrcEVT.getSimpleVT(); 01756 MVT DestVT = DestEVT.getSimpleVT(); 01757 01758 // If we know the register class needed for the result of this 01759 // instruction, use it. Otherwise pick the register class of the 01760 // correct size that does not contain X0/R0, since we don't know 01761 // whether downstream uses permit that assignment. 01762 unsigned AssignedReg = FuncInfo.ValueMap[I]; 01763 const TargetRegisterClass *RC = 01764 (AssignedReg ? MRI.getRegClass(AssignedReg) : 01765 (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass : 01766 &PPC::GPRC_and_GPRC_NOR0RegClass)); 01767 unsigned ResultReg = createResultReg(RC); 01768 01769 if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt)) 01770 return false; 01771 01772 updateValueMap(I, ResultReg); 01773 return true; 01774 } 01775 01776 // Attempt to fast-select an instruction that wasn't handled by 01777 // the table-generated machinery. 01778 bool PPCFastISel::fastSelectInstruction(const Instruction *I) { 01779 01780 switch (I->getOpcode()) { 01781 case Instruction::Load: 01782 return SelectLoad(I); 01783 case Instruction::Store: 01784 return SelectStore(I); 01785 case Instruction::Br: 01786 return SelectBranch(I); 01787 case Instruction::IndirectBr: 01788 return SelectIndirectBr(I); 01789 case Instruction::FPExt: 01790 return SelectFPExt(I); 01791 case Instruction::FPTrunc: 01792 return SelectFPTrunc(I); 01793 case Instruction::SIToFP: 01794 return SelectIToFP(I, /*IsSigned*/ true); 01795 case Instruction::UIToFP: 01796 return SelectIToFP(I, /*IsSigned*/ false); 01797 case Instruction::FPToSI: 01798 return SelectFPToI(I, /*IsSigned*/ true); 01799 case Instruction::FPToUI: 01800 return SelectFPToI(I, /*IsSigned*/ false); 01801 case Instruction::Add: 01802 return SelectBinaryIntOp(I, ISD::ADD); 01803 case Instruction::Or: 01804 return SelectBinaryIntOp(I, ISD::OR); 01805 case Instruction::Sub: 01806 return SelectBinaryIntOp(I, ISD::SUB); 01807 case Instruction::Call: 01808 if (dyn_cast<IntrinsicInst>(I)) 01809 return false; 01810 return SelectCall(I); 01811 case Instruction::Ret: 01812 return SelectRet(I); 01813 case Instruction::Trunc: 01814 return SelectTrunc(I); 01815 case Instruction::ZExt: 01816 case Instruction::SExt: 01817 return SelectIntExt(I); 01818 // Here add other flavors of Instruction::XXX that automated 01819 // cases don't catch. For example, switches are terminators 01820 // that aren't yet handled. 01821 default: 01822 break; 01823 } 01824 return false; 01825 } 01826 01827 // Materialize a floating-point constant into a register, and return 01828 // the register number (or zero if we failed to handle it). 01829 unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { 01830 // No plans to handle long double here. 01831 if (VT != MVT::f32 && VT != MVT::f64) 01832 return 0; 01833 01834 // All FP constants are loaded from the constant pool. 01835 unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); 01836 assert(Align > 0 && "Unexpectedly missing alignment information!"); 01837 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 01838 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 01839 CodeModel::Model CModel = TM.getCodeModel(); 01840 01841 MachineMemOperand *MMO = 01842 FuncInfo.MF->getMachineMemOperand( 01843 MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, 01844 (VT == MVT::f32) ? 4 : 8, Align); 01845 01846 unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD; 01847 unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); 01848 01849 // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)). 01850 if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) { 01851 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT), 01852 TmpReg) 01853 .addConstantPoolIndex(Idx).addReg(PPC::X2); 01854 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) 01855 .addImm(0).addReg(TmpReg).addMemOperand(MMO); 01856 } else { 01857 // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)). 01858 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), 01859 TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx); 01860 // But for large code model, we must generate a LDtocL followed 01861 // by the LF[SD]. 01862 if (CModel == CodeModel::Large) { 01863 unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); 01864 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), 01865 TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg); 01866 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) 01867 .addImm(0).addReg(TmpReg2); 01868 } else 01869 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) 01870 .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO) 01871 .addReg(TmpReg) 01872 .addMemOperand(MMO); 01873 } 01874 01875 return DestReg; 01876 } 01877 01878 // Materialize the address of a global value into a register, and return 01879 // the register number (or zero if we failed to handle it). 01880 unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { 01881 assert(VT == MVT::i64 && "Non-address!"); 01882 const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass; 01883 unsigned DestReg = createResultReg(RC); 01884 01885 // Global values may be plain old object addresses, TLS object 01886 // addresses, constant pool entries, or jump tables. How we generate 01887 // code for these may depend on small, medium, or large code model. 01888 CodeModel::Model CModel = TM.getCodeModel(); 01889 01890 // FIXME: Jump tables are not yet required because fast-isel doesn't 01891 // handle switches; if that changes, we need them as well. For now, 01892 // what follows assumes everything's a generic (or TLS) global address. 01893 01894 // FIXME: We don't yet handle the complexity of TLS. 01895 if (GV->isThreadLocal()) 01896 return 0; 01897 01898 // For small code model, generate a simple TOC load. 01899 if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) 01900 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc), 01901 DestReg) 01902 .addGlobalAddress(GV) 01903 .addReg(PPC::X2); 01904 else { 01905 // If the address is an externally defined symbol, a symbol with common 01906 // or externally available linkage, a non-local function address, or a 01907 // jump table address (not yet needed), or if we are generating code 01908 // for large code model, we generate: 01909 // LDtocL(GV, ADDIStocHA(%X2, GV)) 01910 // Otherwise we generate: 01911 // ADDItocL(ADDIStocHA(%X2, GV), GV) 01912 // Either way, start with the ADDIStocHA: 01913 unsigned HighPartReg = createResultReg(RC); 01914 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), 01915 HighPartReg).addReg(PPC::X2).addGlobalAddress(GV); 01916 01917 // If/when switches are implemented, jump tables should be handled 01918 // on the "if" path here. 01919 if (CModel == CodeModel::Large || 01920 (GV->getType()->getElementType()->isFunctionTy() && 01921 (GV->isDeclaration() || GV->isWeakForLinker())) || 01922 GV->isDeclaration() || GV->hasCommonLinkage() || 01923 GV->hasAvailableExternallyLinkage()) 01924 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), 01925 DestReg).addGlobalAddress(GV).addReg(HighPartReg); 01926 else 01927 // Otherwise generate the ADDItocL. 01928 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL), 01929 DestReg).addReg(HighPartReg).addGlobalAddress(GV); 01930 } 01931 01932 return DestReg; 01933 } 01934 01935 // Materialize a 32-bit integer constant into a register, and return 01936 // the register number (or zero if we failed to handle it). 01937 unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm, 01938 const TargetRegisterClass *RC) { 01939 unsigned Lo = Imm & 0xFFFF; 01940 unsigned Hi = (Imm >> 16) & 0xFFFF; 01941 01942 unsigned ResultReg = createResultReg(RC); 01943 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass); 01944 01945 if (isInt<16>(Imm)) 01946 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01947 TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg) 01948 .addImm(Imm); 01949 else if (Lo) { 01950 // Both Lo and Hi have nonzero bits. 01951 unsigned TmpReg = createResultReg(RC); 01952 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01953 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg) 01954 .addImm(Hi); 01955 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01956 TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg) 01957 .addReg(TmpReg).addImm(Lo); 01958 } else 01959 // Just Hi bits. 01960 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 01961 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg) 01962 .addImm(Hi); 01963 01964 return ResultReg; 01965 } 01966 01967 // Materialize a 64-bit integer constant into a register, and return 01968 // the register number (or zero if we failed to handle it). 01969 unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, 01970 const TargetRegisterClass *RC) { 01971 unsigned Remainder = 0; 01972 unsigned Shift = 0; 01973 01974 // If the value doesn't fit in 32 bits, see if we can shift it 01975 // so that it fits in 32 bits. 01976 if (!isInt<32>(Imm)) { 01977 Shift = countTrailingZeros<uint64_t>(Imm); 01978 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; 01979 01980 if (isInt<32>(ImmSh)) 01981 Imm = ImmSh; 01982 else { 01983 Remainder = Imm; 01984 Shift = 32; 01985 Imm >>= 32; 01986 } 01987 } 01988 01989 // Handle the high-order 32 bits (if shifted) or the whole 32 bits 01990 // (if not shifted). 01991 unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC); 01992 if (!Shift) 01993 return TmpReg1; 01994 01995 // If upper 32 bits were not zero, we've built them and need to shift 01996 // them into place. 01997 unsigned TmpReg2; 01998 if (Imm) { 01999 TmpReg2 = createResultReg(RC); 02000 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR), 02001 TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift); 02002 } else 02003 TmpReg2 = TmpReg1; 02004 02005 unsigned TmpReg3, Hi, Lo; 02006 if ((Hi = (Remainder >> 16) & 0xFFFF)) { 02007 TmpReg3 = createResultReg(RC); 02008 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8), 02009 TmpReg3).addReg(TmpReg2).addImm(Hi); 02010 } else 02011 TmpReg3 = TmpReg2; 02012 02013 if ((Lo = Remainder & 0xFFFF)) { 02014 unsigned ResultReg = createResultReg(RC); 02015 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8), 02016 ResultReg).addReg(TmpReg3).addImm(Lo); 02017 return ResultReg; 02018 } 02019 02020 return TmpReg3; 02021 } 02022 02023 02024 // Materialize an integer constant into a register, and return 02025 // the register number (or zero if we failed to handle it). 02026 unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT, 02027 bool UseSExt) { 02028 // If we're using CR bit registers for i1 values, handle that as a special 02029 // case first. 02030 if (VT == MVT::i1 && PPCSubTarget->useCRBits()) { 02031 const ConstantInt *CI = cast<ConstantInt>(C); 02032 unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); 02033 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02034 TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg); 02035 return ImmReg; 02036 } 02037 02038 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && 02039 VT != MVT::i8 && VT != MVT::i1) 02040 return 0; 02041 02042 const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass : 02043 &PPC::GPRCRegClass); 02044 02045 // If the constant is in range, use a load-immediate. 02046 const ConstantInt *CI = cast<ConstantInt>(C); 02047 if (isInt<16>(CI->getSExtValue())) { 02048 unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI; 02049 unsigned ImmReg = createResultReg(RC); 02050 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) 02051 .addImm( (UseSExt) ? CI->getSExtValue() : CI->getZExtValue() ); 02052 return ImmReg; 02053 } 02054 02055 // Construct the constant piecewise. 02056 int64_t Imm = CI->getZExtValue(); 02057 02058 if (VT == MVT::i64) 02059 return PPCMaterialize64BitInt(Imm, RC); 02060 else if (VT == MVT::i32) 02061 return PPCMaterialize32BitInt(Imm, RC); 02062 02063 return 0; 02064 } 02065 02066 // Materialize a constant into a register, and return the register 02067 // number (or zero if we failed to handle it). 02068 unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) { 02069 EVT CEVT = TLI.getValueType(C->getType(), true); 02070 02071 // Only handle simple types. 02072 if (!CEVT.isSimple()) return 0; 02073 MVT VT = CEVT.getSimpleVT(); 02074 02075 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 02076 return PPCMaterializeFP(CFP, VT); 02077 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 02078 return PPCMaterializeGV(GV, VT); 02079 else if (isa<ConstantInt>(C)) 02080 return PPCMaterializeInt(C, VT); 02081 02082 return 0; 02083 } 02084 02085 // Materialize the address created by an alloca into a register, and 02086 // return the register number (or zero if we failed to handle it). 02087 unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) { 02088 // Don't handle dynamic allocas. 02089 if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; 02090 02091 MVT VT; 02092 if (!isLoadTypeLegal(AI->getType(), VT)) return 0; 02093 02094 DenseMap<const AllocaInst*, int>::iterator SI = 02095 FuncInfo.StaticAllocaMap.find(AI); 02096 02097 if (SI != FuncInfo.StaticAllocaMap.end()) { 02098 unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); 02099 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8), 02100 ResultReg).addFrameIndex(SI->second).addImm(0); 02101 return ResultReg; 02102 } 02103 02104 return 0; 02105 } 02106 02107 // Fold loads into extends when possible. 02108 // FIXME: We can have multiple redundant extend/trunc instructions 02109 // following a load. The folding only picks up one. Extend this 02110 // to check subsequent instructions for the same pattern and remove 02111 // them. Thus ResultReg should be the def reg for the last redundant 02112 // instruction in a chain, and all intervening instructions can be 02113 // removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll 02114 // to add ELF64-NOT: rldicl to the appropriate tests when this works. 02115 bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, 02116 const LoadInst *LI) { 02117 // Verify we have a legal type before going any further. 02118 MVT VT; 02119 if (!isLoadTypeLegal(LI->getType(), VT)) 02120 return false; 02121 02122 // Combine load followed by zero- or sign-extend. 02123 bool IsZExt = false; 02124 switch(MI->getOpcode()) { 02125 default: 02126 return false; 02127 02128 case PPC::RLDICL: 02129 case PPC::RLDICL_32_64: { 02130 IsZExt = true; 02131 unsigned MB = MI->getOperand(3).getImm(); 02132 if ((VT == MVT::i8 && MB <= 56) || 02133 (VT == MVT::i16 && MB <= 48) || 02134 (VT == MVT::i32 && MB <= 32)) 02135 break; 02136 return false; 02137 } 02138 02139 case PPC::RLWINM: 02140 case PPC::RLWINM8: { 02141 IsZExt = true; 02142 unsigned MB = MI->getOperand(3).getImm(); 02143 if ((VT == MVT::i8 && MB <= 24) || 02144 (VT == MVT::i16 && MB <= 16)) 02145 break; 02146 return false; 02147 } 02148 02149 case PPC::EXTSB: 02150 case PPC::EXTSB8: 02151 case PPC::EXTSB8_32_64: 02152 /* There is no sign-extending load-byte instruction. */ 02153 return false; 02154 02155 case PPC::EXTSH: 02156 case PPC::EXTSH8: 02157 case PPC::EXTSH8_32_64: { 02158 if (VT != MVT::i16 && VT != MVT::i8) 02159 return false; 02160 break; 02161 } 02162 02163 case PPC::EXTSW: 02164 case PPC::EXTSW_32_64: { 02165 if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8) 02166 return false; 02167 break; 02168 } 02169 } 02170 02171 // See if we can handle this address. 02172 Address Addr; 02173 if (!PPCComputeAddress(LI->getOperand(0), Addr)) 02174 return false; 02175 02176 unsigned ResultReg = MI->getOperand(0).getReg(); 02177 02178 if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt)) 02179 return false; 02180 02181 MI->eraseFromParent(); 02182 return true; 02183 } 02184 02185 // Attempt to lower call arguments in a faster way than done by 02186 // the selection DAG code. 02187 bool PPCFastISel::fastLowerArguments() { 02188 // Defer to normal argument lowering for now. It's reasonably 02189 // efficient. Consider doing something like ARM to handle the 02190 // case where all args fit in registers, no varargs, no float 02191 // or vector args. 02192 return false; 02193 } 02194 02195 // Handle materializing integer constants into a register. This is not 02196 // automatically generated for PowerPC, so must be explicitly created here. 02197 unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { 02198 02199 if (Opc != ISD::Constant) 02200 return 0; 02201 02202 // If we're using CR bit registers for i1 values, handle that as a special 02203 // case first. 02204 if (VT == MVT::i1 && PPCSubTarget->useCRBits()) { 02205 unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); 02206 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 02207 TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg); 02208 return ImmReg; 02209 } 02210 02211 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && 02212 VT != MVT::i8 && VT != MVT::i1) 02213 return 0; 02214 02215 const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass : 02216 &PPC::GPRCRegClass); 02217 if (VT == MVT::i64) 02218 return PPCMaterialize64BitInt(Imm, RC); 02219 else 02220 return PPCMaterialize32BitInt(Imm, RC); 02221 } 02222 02223 // Override for ADDI and ADDI8 to set the correct register class 02224 // on RHS operand 0. The automatic infrastructure naively assumes 02225 // GPRC for i32 and G8RC for i64; the concept of "no R0" is lost 02226 // for these cases. At the moment, none of the other automatically 02227 // generated RI instructions require special treatment. However, once 02228 // SelectSelect is implemented, "isel" requires similar handling. 02229 // 02230 // Also be conservative about the output register class. Avoid 02231 // assigning R0 or X0 to the output register for GPRC and G8RC 02232 // register classes, as any such result could be used in ADDI, etc., 02233 // where those regs have another meaning. 02234 unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode, 02235 const TargetRegisterClass *RC, 02236 unsigned Op0, bool Op0IsKill, 02237 uint64_t Imm) { 02238 if (MachineInstOpcode == PPC::ADDI) 02239 MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass); 02240 else if (MachineInstOpcode == PPC::ADDI8) 02241 MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass); 02242 02243 const TargetRegisterClass *UseRC = 02244 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : 02245 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); 02246 02247 return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC, 02248 Op0, Op0IsKill, Imm); 02249 } 02250 02251 // Override for instructions with one register operand to avoid use of 02252 // R0/X0. The automatic infrastructure isn't aware of the context so 02253 // we must be conservative. 02254 unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode, 02255 const TargetRegisterClass* RC, 02256 unsigned Op0, bool Op0IsKill) { 02257 const TargetRegisterClass *UseRC = 02258 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : 02259 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); 02260 02261 return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill); 02262 } 02263 02264 // Override for instructions with two register operands to avoid use 02265 // of R0/X0. The automatic infrastructure isn't aware of the context 02266 // so we must be conservative. 02267 unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode, 02268 const TargetRegisterClass* RC, 02269 unsigned Op0, bool Op0IsKill, 02270 unsigned Op1, bool Op1IsKill) { 02271 const TargetRegisterClass *UseRC = 02272 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : 02273 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); 02274 02275 return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill, 02276 Op1, Op1IsKill); 02277 } 02278 02279 namespace llvm { 02280 // Create the fast instruction selector for PowerPC64 ELF. 02281 FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo, 02282 const TargetLibraryInfo *LibInfo) { 02283 const TargetMachine &TM = FuncInfo.MF->getTarget(); 02284 02285 // Only available on 64-bit ELF for now. 02286 const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); 02287 if (Subtarget->isPPC64() && Subtarget->isSVR4ABI()) 02288 return new PPCFastISel(FuncInfo, LibInfo); 02289 02290 return nullptr; 02291 } 02292 }