LLVM API Documentation
00001 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file contains a printer that converts from our internal representation 00011 // of machine-dependent LLVM code to NVPTX assembly language. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "NVPTXAsmPrinter.h" 00016 #include "InstPrinter/NVPTXInstPrinter.h" 00017 #include "MCTargetDesc/NVPTXMCAsmInfo.h" 00018 #include "NVPTX.h" 00019 #include "NVPTXInstrInfo.h" 00020 #include "NVPTXMachineFunctionInfo.h" 00021 #include "NVPTXMCExpr.h" 00022 #include "NVPTXRegisterInfo.h" 00023 #include "NVPTXTargetMachine.h" 00024 #include "NVPTXUtilities.h" 00025 #include "cl_common_defines.h" 00026 #include "llvm/ADT/StringExtras.h" 00027 #include "llvm/Analysis/ConstantFolding.h" 00028 #include "llvm/CodeGen/Analysis.h" 00029 #include "llvm/CodeGen/MachineFrameInfo.h" 00030 #include "llvm/CodeGen/MachineModuleInfo.h" 00031 #include "llvm/CodeGen/MachineRegisterInfo.h" 00032 #include "llvm/IR/DebugInfo.h" 00033 #include "llvm/IR/DerivedTypes.h" 00034 #include "llvm/IR/Function.h" 00035 #include "llvm/IR/GlobalVariable.h" 00036 #include "llvm/IR/Mangler.h" 00037 #include "llvm/IR/Module.h" 00038 #include "llvm/IR/Operator.h" 00039 #include "llvm/MC/MCStreamer.h" 00040 #include "llvm/MC/MCSymbol.h" 00041 #include "llvm/Support/CommandLine.h" 00042 #include "llvm/Support/ErrorHandling.h" 00043 #include "llvm/Support/FormattedStream.h" 00044 #include "llvm/Support/Path.h" 00045 #include "llvm/Support/TargetRegistry.h" 00046 #include "llvm/Support/TimeValue.h" 00047 #include "llvm/Target/TargetLoweringObjectFile.h" 00048 #include <sstream> 00049 using namespace llvm; 00050 00051 #define DEPOTNAME "__local_depot" 00052 00053 static cl::opt<bool> 00054 EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden, 00055 cl::desc("NVPTX Specific: Emit Line numbers even without -G"), 00056 cl::init(true)); 00057 00058 static cl::opt<bool> 00059 InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden, 00060 cl::desc("NVPTX Specific: Emit source line in ptx file"), 00061 cl::init(false)); 00062 00063 namespace { 00064 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V 00065 /// depends. 00066 void DiscoverDependentGlobals(const Value *V, 00067 DenseSet<const GlobalVariable *> &Globals) { 00068 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 00069 Globals.insert(GV); 00070 else { 00071 if (const User *U = dyn_cast<User>(V)) { 00072 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { 00073 DiscoverDependentGlobals(U->getOperand(i), Globals); 00074 } 00075 } 00076 } 00077 } 00078 00079 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable 00080 /// instances to be emitted, but only after any dependents have been added 00081 /// first. 00082 void VisitGlobalVariableForEmission( 00083 const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order, 00084 DenseSet<const GlobalVariable *> &Visited, 00085 DenseSet<const GlobalVariable *> &Visiting) { 00086 // Have we already visited this one? 00087 if (Visited.count(GV)) 00088 return; 00089 00090 // Do we have a circular dependency? 00091 if (Visiting.count(GV)) 00092 report_fatal_error("Circular dependency found in global variable set"); 00093 00094 // Start visiting this global 00095 Visiting.insert(GV); 00096 00097 // Make sure we visit all dependents first 00098 DenseSet<const GlobalVariable *> Others; 00099 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) 00100 DiscoverDependentGlobals(GV->getOperand(i), Others); 00101 00102 for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(), 00103 E = Others.end(); 00104 I != E; ++I) 00105 VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); 00106 00107 // Now we can visit ourself 00108 Order.push_back(GV); 00109 Visited.insert(GV); 00110 Visiting.erase(GV); 00111 } 00112 } 00113 00114 // @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we 00115 // cannot just link to the existing version. 00116 /// LowerConstant - Lower the specified LLVM Constant to an MCExpr. 00117 /// 00118 using namespace nvptx; 00119 const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { 00120 MCContext &Ctx = AP.OutContext; 00121 00122 if (CV->isNullValue() || isa<UndefValue>(CV)) 00123 return MCConstantExpr::Create(0, Ctx); 00124 00125 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 00126 return MCConstantExpr::Create(CI->getZExtValue(), Ctx); 00127 00128 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) 00129 return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx); 00130 00131 if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) 00132 return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); 00133 00134 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 00135 if (!CE) 00136 llvm_unreachable("Unknown constant value to lower!"); 00137 00138 switch (CE->getOpcode()) { 00139 default: 00140 // If the code isn't optimized, there may be outstanding folding 00141 // opportunities. Attempt to fold the expression using DataLayout as a 00142 // last resort before giving up. 00143 if (Constant *C = ConstantFoldConstantExpression( 00144 CE, AP.TM.getSubtargetImpl()->getDataLayout())) 00145 if (C != CE) 00146 return LowerConstant(C, AP); 00147 00148 // Otherwise report the problem to the user. 00149 { 00150 std::string S; 00151 raw_string_ostream OS(S); 00152 OS << "Unsupported expression in static initializer: "; 00153 CE->printAsOperand(OS, /*PrintType=*/ false, 00154 !AP.MF ? nullptr : AP.MF->getFunction()->getParent()); 00155 report_fatal_error(OS.str()); 00156 } 00157 case Instruction::AddrSpaceCast: { 00158 // Strip any addrspace(1)->addrspace(0) addrspace casts. These will be 00159 // handled by the generic() logic in the MCExpr printer 00160 PointerType *DstTy = cast<PointerType>(CE->getType()); 00161 PointerType *SrcTy = cast<PointerType>(CE->getOperand(0)->getType()); 00162 if (SrcTy->getAddressSpace() == 1 && DstTy->getAddressSpace() == 0) { 00163 return LowerConstant(cast<const Constant>(CE->getOperand(0)), AP); 00164 } 00165 std::string S; 00166 raw_string_ostream OS(S); 00167 OS << "Unsupported expression in static initializer: "; 00168 CE->printAsOperand(OS, /*PrintType=*/ false, 00169 !AP.MF ? nullptr : AP.MF->getFunction()->getParent()); 00170 report_fatal_error(OS.str()); 00171 } 00172 case Instruction::GetElementPtr: { 00173 const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout(); 00174 // Generate a symbolic expression for the byte address 00175 APInt OffsetAI(TD.getPointerSizeInBits(), 0); 00176 cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); 00177 00178 const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); 00179 if (!OffsetAI) 00180 return Base; 00181 00182 int64_t Offset = OffsetAI.getSExtValue(); 00183 return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), 00184 Ctx); 00185 } 00186 00187 case Instruction::Trunc: 00188 // We emit the value and depend on the assembler to truncate the generated 00189 // expression properly. This is important for differences between 00190 // blockaddress labels. Since the two labels are in the same function, it 00191 // is reasonable to treat their delta as a 32-bit value. 00192 // FALL THROUGH. 00193 case Instruction::BitCast: 00194 return LowerConstant(CE->getOperand(0), AP); 00195 00196 case Instruction::IntToPtr: { 00197 const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout(); 00198 // Handle casts to pointers by changing them into casts to the appropriate 00199 // integer type. This promotes constant folding and simplifies this code. 00200 Constant *Op = CE->getOperand(0); 00201 Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), 00202 false /*ZExt*/); 00203 return LowerConstant(Op, AP); 00204 } 00205 00206 case Instruction::PtrToInt: { 00207 const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout(); 00208 // Support only foldable casts to/from pointers that can be eliminated by 00209 // changing the pointer to the appropriately sized integer type. 00210 Constant *Op = CE->getOperand(0); 00211 Type *Ty = CE->getType(); 00212 00213 const MCExpr *OpExpr = LowerConstant(Op, AP); 00214 00215 // We can emit the pointer value into this slot if the slot is an 00216 // integer slot equal to the size of the pointer. 00217 if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) 00218 return OpExpr; 00219 00220 // Otherwise the pointer is smaller than the resultant integer, mask off 00221 // the high bits so we are sure to get a proper truncation if the input is 00222 // a constant expr. 00223 unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); 00224 const MCExpr *MaskExpr = 00225 MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx); 00226 return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); 00227 } 00228 00229 // The MC library also has a right-shift operator, but it isn't consistently 00230 // signed or unsigned between different targets. 00231 case Instruction::Add: 00232 case Instruction::Sub: 00233 case Instruction::Mul: 00234 case Instruction::SDiv: 00235 case Instruction::SRem: 00236 case Instruction::Shl: 00237 case Instruction::And: 00238 case Instruction::Or: 00239 case Instruction::Xor: { 00240 const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); 00241 const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); 00242 switch (CE->getOpcode()) { 00243 default: 00244 llvm_unreachable("Unknown binary operator constant cast expr"); 00245 case Instruction::Add: 00246 return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); 00247 case Instruction::Sub: 00248 return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); 00249 case Instruction::Mul: 00250 return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); 00251 case Instruction::SDiv: 00252 return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); 00253 case Instruction::SRem: 00254 return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); 00255 case Instruction::Shl: 00256 return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); 00257 case Instruction::And: 00258 return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); 00259 case Instruction::Or: 00260 return MCBinaryExpr::CreateOr(LHS, RHS, Ctx); 00261 case Instruction::Xor: 00262 return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); 00263 } 00264 } 00265 } 00266 } 00267 00268 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { 00269 if (!EmitLineNumbers) 00270 return; 00271 if (ignoreLoc(MI)) 00272 return; 00273 00274 DebugLoc curLoc = MI.getDebugLoc(); 00275 00276 if (prevDebugLoc.isUnknown() && curLoc.isUnknown()) 00277 return; 00278 00279 if (prevDebugLoc == curLoc) 00280 return; 00281 00282 prevDebugLoc = curLoc; 00283 00284 if (curLoc.isUnknown()) 00285 return; 00286 00287 const MachineFunction *MF = MI.getParent()->getParent(); 00288 //const TargetMachine &TM = MF->getTarget(); 00289 00290 const LLVMContext &ctx = MF->getFunction()->getContext(); 00291 DIScope Scope(curLoc.getScope(ctx)); 00292 00293 assert((!Scope || Scope.isScope()) && 00294 "Scope of a DebugLoc should be null or a DIScope."); 00295 if (!Scope) 00296 return; 00297 00298 StringRef fileName(Scope.getFilename()); 00299 StringRef dirName(Scope.getDirectory()); 00300 SmallString<128> FullPathName = dirName; 00301 if (!dirName.empty() && !sys::path::is_absolute(fileName)) { 00302 sys::path::append(FullPathName, fileName); 00303 fileName = FullPathName.str(); 00304 } 00305 00306 if (filenameMap.find(fileName.str()) == filenameMap.end()) 00307 return; 00308 00309 // Emit the line from the source file. 00310 if (InterleaveSrc) 00311 this->emitSrcInText(fileName.str(), curLoc.getLine()); 00312 00313 std::stringstream temp; 00314 temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine() 00315 << " " << curLoc.getCol(); 00316 OutStreamer.EmitRawText(Twine(temp.str().c_str())); 00317 } 00318 00319 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { 00320 SmallString<128> Str; 00321 raw_svector_ostream OS(Str); 00322 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 00323 emitLineNumberAsDotLoc(*MI); 00324 00325 MCInst Inst; 00326 lowerToMCInst(MI, Inst); 00327 EmitToStreamer(OutStreamer, Inst); 00328 } 00329 00330 // Handle symbol backtracking for targets that do not support image handles 00331 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI, 00332 unsigned OpNo, MCOperand &MCOp) { 00333 const MachineOperand &MO = MI->getOperand(OpNo); 00334 const MCInstrDesc &MCID = MI->getDesc(); 00335 00336 if (MCID.TSFlags & NVPTXII::IsTexFlag) { 00337 // This is a texture fetch, so operand 4 is a texref and operand 5 is 00338 // a samplerref 00339 if (OpNo == 4 && MO.isImm()) { 00340 lowerImageHandleSymbol(MO.getImm(), MCOp); 00341 return true; 00342 } 00343 if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) { 00344 lowerImageHandleSymbol(MO.getImm(), MCOp); 00345 return true; 00346 } 00347 00348 return false; 00349 } else if (MCID.TSFlags & NVPTXII::IsSuldMask) { 00350 unsigned VecSize = 00351 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1); 00352 00353 // For a surface load of vector size N, the Nth operand will be the surfref 00354 if (OpNo == VecSize && MO.isImm()) { 00355 lowerImageHandleSymbol(MO.getImm(), MCOp); 00356 return true; 00357 } 00358 00359 return false; 00360 } else if (MCID.TSFlags & NVPTXII::IsSustFlag) { 00361 // This is a surface store, so operand 0 is a surfref 00362 if (OpNo == 0 && MO.isImm()) { 00363 lowerImageHandleSymbol(MO.getImm(), MCOp); 00364 return true; 00365 } 00366 00367 return false; 00368 } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) { 00369 // This is a query, so operand 1 is a surfref/texref 00370 if (OpNo == 1 && MO.isImm()) { 00371 lowerImageHandleSymbol(MO.getImm(), MCOp); 00372 return true; 00373 } 00374 00375 return false; 00376 } 00377 00378 return false; 00379 } 00380 00381 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { 00382 // Ewwww 00383 TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget()); 00384 NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM); 00385 const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>(); 00386 const char *Sym = MFI->getImageHandleSymbol(Index); 00387 std::string *SymNamePtr = 00388 nvTM.getManagedStrPool()->getManagedString(Sym); 00389 MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol( 00390 StringRef(SymNamePtr->c_str()))); 00391 } 00392 00393 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { 00394 OutMI.setOpcode(MI->getOpcode()); 00395 const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>(); 00396 00397 // Special: Do not mangle symbol operand of CALL_PROTOTYPE 00398 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) { 00399 const MachineOperand &MO = MI->getOperand(0); 00400 OutMI.addOperand(GetSymbolRef( 00401 OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName())))); 00402 return; 00403 } 00404 00405 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00406 const MachineOperand &MO = MI->getOperand(i); 00407 00408 MCOperand MCOp; 00409 if (!ST.hasImageHandles()) { 00410 if (lowerImageHandleOperand(MI, i, MCOp)) { 00411 OutMI.addOperand(MCOp); 00412 continue; 00413 } 00414 } 00415 00416 if (lowerOperand(MO, MCOp)) 00417 OutMI.addOperand(MCOp); 00418 } 00419 } 00420 00421 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, 00422 MCOperand &MCOp) { 00423 switch (MO.getType()) { 00424 default: llvm_unreachable("unknown operand type"); 00425 case MachineOperand::MO_Register: 00426 MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg())); 00427 break; 00428 case MachineOperand::MO_Immediate: 00429 MCOp = MCOperand::CreateImm(MO.getImm()); 00430 break; 00431 case MachineOperand::MO_MachineBasicBlock: 00432 MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( 00433 MO.getMBB()->getSymbol(), OutContext)); 00434 break; 00435 case MachineOperand::MO_ExternalSymbol: 00436 MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName())); 00437 break; 00438 case MachineOperand::MO_GlobalAddress: 00439 MCOp = GetSymbolRef(getSymbol(MO.getGlobal())); 00440 break; 00441 case MachineOperand::MO_FPImmediate: { 00442 const ConstantFP *Cnt = MO.getFPImm(); 00443 APFloat Val = Cnt->getValueAPF(); 00444 00445 switch (Cnt->getType()->getTypeID()) { 00446 default: report_fatal_error("Unsupported FP type"); break; 00447 case Type::FloatTyID: 00448 MCOp = MCOperand::CreateExpr( 00449 NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext)); 00450 break; 00451 case Type::DoubleTyID: 00452 MCOp = MCOperand::CreateExpr( 00453 NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext)); 00454 break; 00455 } 00456 break; 00457 } 00458 } 00459 return true; 00460 } 00461 00462 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { 00463 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 00464 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 00465 00466 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC]; 00467 unsigned RegNum = RegMap[Reg]; 00468 00469 // Encode the register class in the upper 4 bits 00470 // Must be kept in sync with NVPTXInstPrinter::printRegName 00471 unsigned Ret = 0; 00472 if (RC == &NVPTX::Int1RegsRegClass) { 00473 Ret = (1 << 28); 00474 } else if (RC == &NVPTX::Int16RegsRegClass) { 00475 Ret = (2 << 28); 00476 } else if (RC == &NVPTX::Int32RegsRegClass) { 00477 Ret = (3 << 28); 00478 } else if (RC == &NVPTX::Int64RegsRegClass) { 00479 Ret = (4 << 28); 00480 } else if (RC == &NVPTX::Float32RegsRegClass) { 00481 Ret = (5 << 28); 00482 } else if (RC == &NVPTX::Float64RegsRegClass) { 00483 Ret = (6 << 28); 00484 } else { 00485 report_fatal_error("Bad register class"); 00486 } 00487 00488 // Insert the vreg number 00489 Ret |= (RegNum & 0x0FFFFFFF); 00490 return Ret; 00491 } else { 00492 // Some special-use registers are actually physical registers. 00493 // Encode this as the register class ID of 0 and the real register ID. 00494 return Reg & 0x0FFFFFFF; 00495 } 00496 } 00497 00498 MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { 00499 const MCExpr *Expr; 00500 Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, 00501 OutContext); 00502 return MCOperand::CreateExpr(Expr); 00503 } 00504 00505 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { 00506 const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); 00507 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 00508 00509 Type *Ty = F->getReturnType(); 00510 00511 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 00512 00513 if (Ty->getTypeID() == Type::VoidTyID) 00514 return; 00515 00516 O << " ("; 00517 00518 if (isABI) { 00519 if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) { 00520 unsigned size = 0; 00521 if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { 00522 size = ITy->getBitWidth(); 00523 if (size < 32) 00524 size = 32; 00525 } else { 00526 assert(Ty->isFloatingPointTy() && "Floating point type expected here"); 00527 size = Ty->getPrimitiveSizeInBits(); 00528 } 00529 00530 O << ".param .b" << size << " func_retval0"; 00531 } else if (isa<PointerType>(Ty)) { 00532 O << ".param .b" << TLI->getPointerTy().getSizeInBits() 00533 << " func_retval0"; 00534 } else { 00535 if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { 00536 unsigned totalsz = TD->getTypeAllocSize(Ty); 00537 unsigned retAlignment = 0; 00538 if (!llvm::getAlign(*F, 0, retAlignment)) 00539 retAlignment = TD->getABITypeAlignment(Ty); 00540 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz 00541 << "]"; 00542 } else 00543 assert(false && "Unknown return type"); 00544 } 00545 } else { 00546 SmallVector<EVT, 16> vtparts; 00547 ComputeValueVTs(*TLI, Ty, vtparts); 00548 unsigned idx = 0; 00549 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 00550 unsigned elems = 1; 00551 EVT elemtype = vtparts[i]; 00552 if (vtparts[i].isVector()) { 00553 elems = vtparts[i].getVectorNumElements(); 00554 elemtype = vtparts[i].getVectorElementType(); 00555 } 00556 00557 for (unsigned j = 0, je = elems; j != je; ++j) { 00558 unsigned sz = elemtype.getSizeInBits(); 00559 if (elemtype.isInteger() && (sz < 32)) 00560 sz = 32; 00561 O << ".reg .b" << sz << " func_retval" << idx; 00562 if (j < je - 1) 00563 O << ", "; 00564 ++idx; 00565 } 00566 if (i < e - 1) 00567 O << ", "; 00568 } 00569 } 00570 O << ") "; 00571 return; 00572 } 00573 00574 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 00575 raw_ostream &O) { 00576 const Function *F = MF.getFunction(); 00577 printReturnValStr(F, O); 00578 } 00579 00580 void NVPTXAsmPrinter::EmitFunctionEntryLabel() { 00581 SmallString<128> Str; 00582 raw_svector_ostream O(Str); 00583 00584 if (!GlobalsEmitted) { 00585 emitGlobals(*MF->getFunction()->getParent()); 00586 GlobalsEmitted = true; 00587 } 00588 00589 // Set up 00590 MRI = &MF->getRegInfo(); 00591 F = MF->getFunction(); 00592 emitLinkageDirective(F, O); 00593 if (llvm::isKernelFunction(*F)) 00594 O << ".entry "; 00595 else { 00596 O << ".func "; 00597 printReturnValStr(*MF, O); 00598 } 00599 00600 O << *CurrentFnSym; 00601 00602 emitFunctionParamList(*MF, O); 00603 00604 if (llvm::isKernelFunction(*F)) 00605 emitKernelFunctionDirectives(*F, O); 00606 00607 OutStreamer.EmitRawText(O.str()); 00608 00609 prevDebugLoc = DebugLoc(); 00610 } 00611 00612 void NVPTXAsmPrinter::EmitFunctionBodyStart() { 00613 VRegMapping.clear(); 00614 OutStreamer.EmitRawText(StringRef("{\n")); 00615 setAndEmitFunctionVirtualRegisters(*MF); 00616 00617 SmallString<128> Str; 00618 raw_svector_ostream O(Str); 00619 emitDemotedVars(MF->getFunction(), O); 00620 OutStreamer.EmitRawText(O.str()); 00621 } 00622 00623 void NVPTXAsmPrinter::EmitFunctionBodyEnd() { 00624 OutStreamer.EmitRawText(StringRef("}\n")); 00625 VRegMapping.clear(); 00626 } 00627 00628 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { 00629 unsigned RegNo = MI->getOperand(0).getReg(); 00630 const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); 00631 if (TRI->isVirtualRegister(RegNo)) { 00632 OutStreamer.AddComment(Twine("implicit-def: ") + 00633 getVirtualRegisterName(RegNo)); 00634 } else { 00635 OutStreamer.AddComment( 00636 Twine("implicit-def: ") + 00637 TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo)); 00638 } 00639 OutStreamer.AddBlankLine(); 00640 } 00641 00642 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, 00643 raw_ostream &O) const { 00644 // If the NVVM IR has some of reqntid* specified, then output 00645 // the reqntid directive, and set the unspecified ones to 1. 00646 // If none of reqntid* is specified, don't output reqntid directive. 00647 unsigned reqntidx, reqntidy, reqntidz; 00648 bool specified = false; 00649 if (llvm::getReqNTIDx(F, reqntidx) == false) 00650 reqntidx = 1; 00651 else 00652 specified = true; 00653 if (llvm::getReqNTIDy(F, reqntidy) == false) 00654 reqntidy = 1; 00655 else 00656 specified = true; 00657 if (llvm::getReqNTIDz(F, reqntidz) == false) 00658 reqntidz = 1; 00659 else 00660 specified = true; 00661 00662 if (specified) 00663 O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz 00664 << "\n"; 00665 00666 // If the NVVM IR has some of maxntid* specified, then output 00667 // the maxntid directive, and set the unspecified ones to 1. 00668 // If none of maxntid* is specified, don't output maxntid directive. 00669 unsigned maxntidx, maxntidy, maxntidz; 00670 specified = false; 00671 if (llvm::getMaxNTIDx(F, maxntidx) == false) 00672 maxntidx = 1; 00673 else 00674 specified = true; 00675 if (llvm::getMaxNTIDy(F, maxntidy) == false) 00676 maxntidy = 1; 00677 else 00678 specified = true; 00679 if (llvm::getMaxNTIDz(F, maxntidz) == false) 00680 maxntidz = 1; 00681 else 00682 specified = true; 00683 00684 if (specified) 00685 O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz 00686 << "\n"; 00687 00688 unsigned mincta; 00689 if (llvm::getMinCTASm(F, mincta)) 00690 O << ".minnctapersm " << mincta << "\n"; 00691 } 00692 00693 std::string 00694 NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { 00695 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 00696 00697 std::string Name; 00698 raw_string_ostream NameStr(Name); 00699 00700 VRegRCMap::const_iterator I = VRegMapping.find(RC); 00701 assert(I != VRegMapping.end() && "Bad register class"); 00702 const DenseMap<unsigned, unsigned> &RegMap = I->second; 00703 00704 VRegMap::const_iterator VI = RegMap.find(Reg); 00705 assert(VI != RegMap.end() && "Bad virtual register"); 00706 unsigned MappedVR = VI->second; 00707 00708 NameStr << getNVPTXRegClassStr(RC) << MappedVR; 00709 00710 NameStr.flush(); 00711 return Name; 00712 } 00713 00714 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, 00715 raw_ostream &O) { 00716 O << getVirtualRegisterName(vr); 00717 } 00718 00719 void NVPTXAsmPrinter::printVecModifiedImmediate( 00720 const MachineOperand &MO, const char *Modifier, raw_ostream &O) { 00721 static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; 00722 int Imm = (int) MO.getImm(); 00723 if (0 == strcmp(Modifier, "vecelem")) 00724 O << "_" << vecelem[Imm]; 00725 else if (0 == strcmp(Modifier, "vecv4comm1")) { 00726 if ((Imm < 0) || (Imm > 3)) 00727 O << "//"; 00728 } else if (0 == strcmp(Modifier, "vecv4comm2")) { 00729 if ((Imm < 4) || (Imm > 7)) 00730 O << "//"; 00731 } else if (0 == strcmp(Modifier, "vecv4pos")) { 00732 if (Imm < 0) 00733 Imm = 0; 00734 O << "_" << vecelem[Imm % 4]; 00735 } else if (0 == strcmp(Modifier, "vecv2comm1")) { 00736 if ((Imm < 0) || (Imm > 1)) 00737 O << "//"; 00738 } else if (0 == strcmp(Modifier, "vecv2comm2")) { 00739 if ((Imm < 2) || (Imm > 3)) 00740 O << "//"; 00741 } else if (0 == strcmp(Modifier, "vecv2pos")) { 00742 if (Imm < 0) 00743 Imm = 0; 00744 O << "_" << vecelem[Imm % 2]; 00745 } else 00746 llvm_unreachable("Unknown Modifier on immediate operand"); 00747 } 00748 00749 00750 00751 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { 00752 00753 emitLinkageDirective(F, O); 00754 if (llvm::isKernelFunction(*F)) 00755 O << ".entry "; 00756 else 00757 O << ".func "; 00758 printReturnValStr(F, O); 00759 O << *getSymbol(F) << "\n"; 00760 emitFunctionParamList(F, O); 00761 O << ";\n"; 00762 } 00763 00764 static bool usedInGlobalVarDef(const Constant *C) { 00765 if (!C) 00766 return false; 00767 00768 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 00769 if (GV->getName().str() == "llvm.used") 00770 return false; 00771 return true; 00772 } 00773 00774 for (const User *U : C->users()) 00775 if (const Constant *C = dyn_cast<Constant>(U)) 00776 if (usedInGlobalVarDef(C)) 00777 return true; 00778 00779 return false; 00780 } 00781 00782 static bool usedInOneFunc(const User *U, Function const *&oneFunc) { 00783 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 00784 if (othergv->getName().str() == "llvm.used") 00785 return true; 00786 } 00787 00788 if (const Instruction *instr = dyn_cast<Instruction>(U)) { 00789 if (instr->getParent() && instr->getParent()->getParent()) { 00790 const Function *curFunc = instr->getParent()->getParent(); 00791 if (oneFunc && (curFunc != oneFunc)) 00792 return false; 00793 oneFunc = curFunc; 00794 return true; 00795 } else 00796 return false; 00797 } 00798 00799 if (const MDNode *md = dyn_cast<MDNode>(U)) 00800 if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || 00801 (md->getName().str() == "llvm.dbg.sp"))) 00802 return true; 00803 00804 for (const User *UU : U->users()) 00805 if (usedInOneFunc(UU, oneFunc) == false) 00806 return false; 00807 00808 return true; 00809 } 00810 00811 /* Find out if a global variable can be demoted to local scope. 00812 * Currently, this is valid for CUDA shared variables, which have local 00813 * scope and global lifetime. So the conditions to check are : 00814 * 1. Is the global variable in shared address space? 00815 * 2. Does it have internal linkage? 00816 * 3. Is the global variable referenced only in one function? 00817 */ 00818 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 00819 if (gv->hasInternalLinkage() == false) 00820 return false; 00821 const PointerType *Pty = gv->getType(); 00822 if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) 00823 return false; 00824 00825 const Function *oneFunc = nullptr; 00826 00827 bool flag = usedInOneFunc(gv, oneFunc); 00828 if (flag == false) 00829 return false; 00830 if (!oneFunc) 00831 return false; 00832 f = oneFunc; 00833 return true; 00834 } 00835 00836 static bool useFuncSeen(const Constant *C, 00837 llvm::DenseMap<const Function *, bool> &seenMap) { 00838 for (const User *U : C->users()) { 00839 if (const Constant *cu = dyn_cast<Constant>(U)) { 00840 if (useFuncSeen(cu, seenMap)) 00841 return true; 00842 } else if (const Instruction *I = dyn_cast<Instruction>(U)) { 00843 const BasicBlock *bb = I->getParent(); 00844 if (!bb) 00845 continue; 00846 const Function *caller = bb->getParent(); 00847 if (!caller) 00848 continue; 00849 if (seenMap.find(caller) != seenMap.end()) 00850 return true; 00851 } 00852 } 00853 return false; 00854 } 00855 00856 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { 00857 llvm::DenseMap<const Function *, bool> seenMap; 00858 for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { 00859 const Function *F = FI; 00860 00861 if (F->isDeclaration()) { 00862 if (F->use_empty()) 00863 continue; 00864 if (F->getIntrinsicID()) 00865 continue; 00866 emitDeclaration(F, O); 00867 continue; 00868 } 00869 for (const User *U : F->users()) { 00870 if (const Constant *C = dyn_cast<Constant>(U)) { 00871 if (usedInGlobalVarDef(C)) { 00872 // The use is in the initialization of a global variable 00873 // that is a function pointer, so print a declaration 00874 // for the original function 00875 emitDeclaration(F, O); 00876 break; 00877 } 00878 // Emit a declaration of this function if the function that 00879 // uses this constant expr has already been seen. 00880 if (useFuncSeen(C, seenMap)) { 00881 emitDeclaration(F, O); 00882 break; 00883 } 00884 } 00885 00886 if (!isa<Instruction>(U)) 00887 continue; 00888 const Instruction *instr = cast<Instruction>(U); 00889 const BasicBlock *bb = instr->getParent(); 00890 if (!bb) 00891 continue; 00892 const Function *caller = bb->getParent(); 00893 if (!caller) 00894 continue; 00895 00896 // If a caller has already been seen, then the caller is 00897 // appearing in the module before the callee. so print out 00898 // a declaration for the callee. 00899 if (seenMap.find(caller) != seenMap.end()) { 00900 emitDeclaration(F, O); 00901 break; 00902 } 00903 } 00904 seenMap[F] = true; 00905 } 00906 } 00907 00908 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { 00909 DebugInfoFinder DbgFinder; 00910 DbgFinder.processModule(M); 00911 00912 unsigned i = 1; 00913 for (DICompileUnit DIUnit : DbgFinder.compile_units()) { 00914 StringRef Filename(DIUnit.getFilename()); 00915 StringRef Dirname(DIUnit.getDirectory()); 00916 SmallString<128> FullPathName = Dirname; 00917 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 00918 sys::path::append(FullPathName, Filename); 00919 Filename = FullPathName.str(); 00920 } 00921 if (filenameMap.find(Filename.str()) != filenameMap.end()) 00922 continue; 00923 filenameMap[Filename.str()] = i; 00924 OutStreamer.EmitDwarfFileDirective(i, "", Filename.str()); 00925 ++i; 00926 } 00927 00928 for (DISubprogram SP : DbgFinder.subprograms()) { 00929 StringRef Filename(SP.getFilename()); 00930 StringRef Dirname(SP.getDirectory()); 00931 SmallString<128> FullPathName = Dirname; 00932 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 00933 sys::path::append(FullPathName, Filename); 00934 Filename = FullPathName.str(); 00935 } 00936 if (filenameMap.find(Filename.str()) != filenameMap.end()) 00937 continue; 00938 filenameMap[Filename.str()] = i; 00939 ++i; 00940 } 00941 } 00942 00943 bool NVPTXAsmPrinter::doInitialization(Module &M) { 00944 00945 SmallString<128> Str1; 00946 raw_svector_ostream OS1(Str1); 00947 00948 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 00949 MMI->AnalyzeModule(M); 00950 00951 // We need to call the parent's one explicitly. 00952 //bool Result = AsmPrinter::doInitialization(M); 00953 00954 // Initialize TargetLoweringObjectFile. 00955 const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) 00956 .Initialize(OutContext, TM); 00957 00958 Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout()); 00959 00960 // Emit header before any dwarf directives are emitted below. 00961 emitHeader(M, OS1); 00962 OutStreamer.EmitRawText(OS1.str()); 00963 00964 // Already commented out 00965 //bool Result = AsmPrinter::doInitialization(M); 00966 00967 // Emit module-level inline asm if it exists. 00968 if (!M.getModuleInlineAsm().empty()) { 00969 OutStreamer.AddComment("Start of file scope inline assembly"); 00970 OutStreamer.AddBlankLine(); 00971 OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm())); 00972 OutStreamer.AddBlankLine(); 00973 OutStreamer.AddComment("End of file scope inline assembly"); 00974 OutStreamer.AddBlankLine(); 00975 } 00976 00977 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 00978 recordAndEmitFilenames(M); 00979 00980 GlobalsEmitted = false; 00981 00982 return false; // success 00983 } 00984 00985 void NVPTXAsmPrinter::emitGlobals(const Module &M) { 00986 SmallString<128> Str2; 00987 raw_svector_ostream OS2(Str2); 00988 00989 emitDeclarations(M, OS2); 00990 00991 // As ptxas does not support forward references of globals, we need to first 00992 // sort the list of module-level globals in def-use order. We visit each 00993 // global variable in order, and ensure that we emit it *after* its dependent 00994 // globals. We use a little extra memory maintaining both a set and a list to 00995 // have fast searches while maintaining a strict ordering. 00996 SmallVector<const GlobalVariable *, 8> Globals; 00997 DenseSet<const GlobalVariable *> GVVisited; 00998 DenseSet<const GlobalVariable *> GVVisiting; 00999 01000 // Visit each global variable, in order 01001 for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); 01002 I != E; ++I) 01003 VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); 01004 01005 assert(GVVisited.size() == M.getGlobalList().size() && 01006 "Missed a global variable"); 01007 assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); 01008 01009 // Print out module-level global variables in proper order 01010 for (unsigned i = 0, e = Globals.size(); i != e; ++i) 01011 printModuleLevelGV(Globals[i], OS2); 01012 01013 OS2 << '\n'; 01014 01015 OutStreamer.EmitRawText(OS2.str()); 01016 } 01017 01018 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { 01019 O << "//\n"; 01020 O << "// Generated by LLVM NVPTX Back-End\n"; 01021 O << "//\n"; 01022 O << "\n"; 01023 01024 unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); 01025 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 01026 01027 O << ".target "; 01028 O << nvptxSubtarget.getTargetName(); 01029 01030 if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) 01031 O << ", texmode_independent"; 01032 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 01033 if (!nvptxSubtarget.hasDouble()) 01034 O << ", map_f64_to_f32"; 01035 } 01036 01037 if (MAI->doesSupportDebugInformation()) 01038 O << ", debug"; 01039 01040 O << "\n"; 01041 01042 O << ".address_size "; 01043 if (nvptxSubtarget.is64Bit()) 01044 O << "64"; 01045 else 01046 O << "32"; 01047 O << "\n"; 01048 01049 O << "\n"; 01050 } 01051 01052 bool NVPTXAsmPrinter::doFinalization(Module &M) { 01053 01054 // If we did not emit any functions, then the global declarations have not 01055 // yet been emitted. 01056 if (!GlobalsEmitted) { 01057 emitGlobals(M); 01058 GlobalsEmitted = true; 01059 } 01060 01061 // XXX Temproarily remove global variables so that doFinalization() will not 01062 // emit them again (global variables are emitted at beginning). 01063 01064 Module::GlobalListType &global_list = M.getGlobalList(); 01065 int i, n = global_list.size(); 01066 GlobalVariable **gv_array = new GlobalVariable *[n]; 01067 01068 // first, back-up GlobalVariable in gv_array 01069 i = 0; 01070 for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 01071 I != E; ++I) 01072 gv_array[i++] = &*I; 01073 01074 // second, empty global_list 01075 while (!global_list.empty()) 01076 global_list.remove(global_list.begin()); 01077 01078 // call doFinalization 01079 bool ret = AsmPrinter::doFinalization(M); 01080 01081 // now we restore global variables 01082 for (i = 0; i < n; i++) 01083 global_list.insert(global_list.end(), gv_array[i]); 01084 01085 clearAnnotationCache(&M); 01086 01087 delete[] gv_array; 01088 return ret; 01089 01090 //bool Result = AsmPrinter::doFinalization(M); 01091 // Instead of calling the parents doFinalization, we may 01092 // clone parents doFinalization and customize here. 01093 // Currently, we if NVISA out the EmitGlobals() in 01094 // parent's doFinalization, which is too intrusive. 01095 // 01096 // Same for the doInitialization. 01097 //return Result; 01098 } 01099 01100 // This function emits appropriate linkage directives for 01101 // functions and global variables. 01102 // 01103 // extern function declaration -> .extern 01104 // extern function definition -> .visible 01105 // external global variable with init -> .visible 01106 // external without init -> .extern 01107 // appending -> not allowed, assert. 01108 // for any linkage other than 01109 // internal, private, linker_private, 01110 // linker_private_weak, linker_private_weak_def_auto, 01111 // we emit -> .weak. 01112 01113 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, 01114 raw_ostream &O) { 01115 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 01116 if (V->hasExternalLinkage()) { 01117 if (isa<GlobalVariable>(V)) { 01118 const GlobalVariable *GVar = cast<GlobalVariable>(V); 01119 if (GVar) { 01120 if (GVar->hasInitializer()) 01121 O << ".visible "; 01122 else 01123 O << ".extern "; 01124 } 01125 } else if (V->isDeclaration()) 01126 O << ".extern "; 01127 else 01128 O << ".visible "; 01129 } else if (V->hasAppendingLinkage()) { 01130 std::string msg; 01131 msg.append("Error: "); 01132 msg.append("Symbol "); 01133 if (V->hasName()) 01134 msg.append(V->getName().str()); 01135 msg.append("has unsupported appending linkage type"); 01136 llvm_unreachable(msg.c_str()); 01137 } else if (!V->hasInternalLinkage() && 01138 !V->hasPrivateLinkage()) { 01139 O << ".weak "; 01140 } 01141 } 01142 } 01143 01144 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, 01145 raw_ostream &O, 01146 bool processDemoted) { 01147 01148 // Skip meta data 01149 if (GVar->hasSection()) { 01150 if (GVar->getSection() == StringRef("llvm.metadata")) 01151 return; 01152 } 01153 01154 // Skip LLVM intrinsic global variables 01155 if (GVar->getName().startswith("llvm.") || 01156 GVar->getName().startswith("nvvm.")) 01157 return; 01158 01159 const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); 01160 01161 // GlobalVariables are always constant pointers themselves. 01162 const PointerType *PTy = GVar->getType(); 01163 Type *ETy = PTy->getElementType(); 01164 01165 if (GVar->hasExternalLinkage()) { 01166 if (GVar->hasInitializer()) 01167 O << ".visible "; 01168 else 01169 O << ".extern "; 01170 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() || 01171 GVar->hasAvailableExternallyLinkage() || 01172 GVar->hasCommonLinkage()) { 01173 O << ".weak "; 01174 } 01175 01176 if (llvm::isTexture(*GVar)) { 01177 O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n"; 01178 return; 01179 } 01180 01181 if (llvm::isSurface(*GVar)) { 01182 O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n"; 01183 return; 01184 } 01185 01186 if (GVar->isDeclaration()) { 01187 // (extern) declarations, no definition or initializer 01188 // Currently the only known declaration is for an automatic __local 01189 // (.shared) promoted to global. 01190 emitPTXGlobalVariable(GVar, O); 01191 O << ";\n"; 01192 return; 01193 } 01194 01195 if (llvm::isSampler(*GVar)) { 01196 O << ".global .samplerref " << llvm::getSamplerName(*GVar); 01197 01198 const Constant *Initializer = nullptr; 01199 if (GVar->hasInitializer()) 01200 Initializer = GVar->getInitializer(); 01201 const ConstantInt *CI = nullptr; 01202 if (Initializer) 01203 CI = dyn_cast<ConstantInt>(Initializer); 01204 if (CI) { 01205 unsigned sample = CI->getZExtValue(); 01206 01207 O << " = { "; 01208 01209 for (int i = 0, 01210 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); 01211 i < 3; i++) { 01212 O << "addr_mode_" << i << " = "; 01213 switch (addr) { 01214 case 0: 01215 O << "wrap"; 01216 break; 01217 case 1: 01218 O << "clamp_to_border"; 01219 break; 01220 case 2: 01221 O << "clamp_to_edge"; 01222 break; 01223 case 3: 01224 O << "wrap"; 01225 break; 01226 case 4: 01227 O << "mirror"; 01228 break; 01229 } 01230 O << ", "; 01231 } 01232 O << "filter_mode = "; 01233 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { 01234 case 0: 01235 O << "nearest"; 01236 break; 01237 case 1: 01238 O << "linear"; 01239 break; 01240 case 2: 01241 llvm_unreachable("Anisotropic filtering is not supported"); 01242 default: 01243 O << "nearest"; 01244 break; 01245 } 01246 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { 01247 O << ", force_unnormalized_coords = 1"; 01248 } 01249 O << " }"; 01250 } 01251 01252 O << ";\n"; 01253 return; 01254 } 01255 01256 if (GVar->hasPrivateLinkage()) { 01257 01258 if (!strncmp(GVar->getName().data(), "unrollpragma", 12)) 01259 return; 01260 01261 // FIXME - need better way (e.g. Metadata) to avoid generating this global 01262 if (!strncmp(GVar->getName().data(), "filename", 8)) 01263 return; 01264 if (GVar->use_empty()) 01265 return; 01266 } 01267 01268 const Function *demotedFunc = nullptr; 01269 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 01270 O << "// " << GVar->getName().str() << " has been demoted\n"; 01271 if (localDecls.find(demotedFunc) != localDecls.end()) 01272 localDecls[demotedFunc].push_back(GVar); 01273 else { 01274 std::vector<const GlobalVariable *> temp; 01275 temp.push_back(GVar); 01276 localDecls[demotedFunc] = temp; 01277 } 01278 return; 01279 } 01280 01281 O << "."; 01282 emitPTXAddressSpace(PTy->getAddressSpace(), O); 01283 01284 if (isManaged(*GVar)) { 01285 O << " .attribute(.managed)"; 01286 } 01287 01288 if (GVar->getAlignment() == 0) 01289 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 01290 else 01291 O << " .align " << GVar->getAlignment(); 01292 01293 if (ETy->isSingleValueType()) { 01294 O << " ."; 01295 // Special case: ABI requires that we use .u8 for predicates 01296 if (ETy->isIntegerTy(1)) 01297 O << "u8"; 01298 else 01299 O << getPTXFundamentalTypeStr(ETy, false); 01300 O << " "; 01301 O << *getSymbol(GVar); 01302 01303 // Ptx allows variable initilization only for constant and global state 01304 // spaces. 01305 if (GVar->hasInitializer()) { 01306 if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 01307 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) { 01308 const Constant *Initializer = GVar->getInitializer(); 01309 // 'undef' is treated as there is no value spefied. 01310 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) { 01311 O << " = "; 01312 printScalarConstant(Initializer, O); 01313 } 01314 } else { 01315 // The frontend adds zero-initializer to variables that don't have an 01316 // initial value, so skip warning for this case. 01317 if (!GVar->getInitializer()->isNullValue()) { 01318 std::string warnMsg = "initial value of '" + GVar->getName().str() + 01319 "' is not allowed in addrspace(" + 01320 llvm::utostr_32(PTy->getAddressSpace()) + ")"; 01321 report_fatal_error(warnMsg.c_str()); 01322 } 01323 } 01324 } 01325 } else { 01326 unsigned int ElementSize = 0; 01327 01328 // Although PTX has direct support for struct type and array type and 01329 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 01330 // targets that support these high level field accesses. Structs, arrays 01331 // and vectors are lowered into arrays of bytes. 01332 switch (ETy->getTypeID()) { 01333 case Type::StructTyID: 01334 case Type::ArrayTyID: 01335 case Type::VectorTyID: 01336 ElementSize = TD->getTypeStoreSize(ETy); 01337 // Ptx allows variable initilization only for constant and 01338 // global state spaces. 01339 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 01340 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 01341 GVar->hasInitializer()) { 01342 const Constant *Initializer = GVar->getInitializer(); 01343 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { 01344 AggBuffer aggBuffer(ElementSize, O, *this); 01345 bufferAggregateConstant(Initializer, &aggBuffer); 01346 if (aggBuffer.numSymbols) { 01347 if (nvptxSubtarget.is64Bit()) { 01348 O << " .u64 " << *getSymbol(GVar) << "["; 01349 O << ElementSize / 8; 01350 } else { 01351 O << " .u32 " << *getSymbol(GVar) << "["; 01352 O << ElementSize / 4; 01353 } 01354 O << "]"; 01355 } else { 01356 O << " .b8 " << *getSymbol(GVar) << "["; 01357 O << ElementSize; 01358 O << "]"; 01359 } 01360 O << " = {"; 01361 aggBuffer.print(); 01362 O << "}"; 01363 } else { 01364 O << " .b8 " << *getSymbol(GVar); 01365 if (ElementSize) { 01366 O << "["; 01367 O << ElementSize; 01368 O << "]"; 01369 } 01370 } 01371 } else { 01372 O << " .b8 " << *getSymbol(GVar); 01373 if (ElementSize) { 01374 O << "["; 01375 O << ElementSize; 01376 O << "]"; 01377 } 01378 } 01379 break; 01380 default: 01381 llvm_unreachable("type not supported yet"); 01382 } 01383 01384 } 01385 O << ";\n"; 01386 } 01387 01388 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 01389 if (localDecls.find(f) == localDecls.end()) 01390 return; 01391 01392 std::vector<const GlobalVariable *> &gvars = localDecls[f]; 01393 01394 for (unsigned i = 0, e = gvars.size(); i != e; ++i) { 01395 O << "\t// demoted variable\n\t"; 01396 printModuleLevelGV(gvars[i], O, true); 01397 } 01398 } 01399 01400 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 01401 raw_ostream &O) const { 01402 switch (AddressSpace) { 01403 case llvm::ADDRESS_SPACE_LOCAL: 01404 O << "local"; 01405 break; 01406 case llvm::ADDRESS_SPACE_GLOBAL: 01407 O << "global"; 01408 break; 01409 case llvm::ADDRESS_SPACE_CONST: 01410 O << "const"; 01411 break; 01412 case llvm::ADDRESS_SPACE_SHARED: 01413 O << "shared"; 01414 break; 01415 default: 01416 report_fatal_error("Bad address space found while emitting PTX"); 01417 break; 01418 } 01419 } 01420 01421 std::string 01422 NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { 01423 switch (Ty->getTypeID()) { 01424 default: 01425 llvm_unreachable("unexpected type"); 01426 break; 01427 case Type::IntegerTyID: { 01428 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 01429 if (NumBits == 1) 01430 return "pred"; 01431 else if (NumBits <= 64) { 01432 std::string name = "u"; 01433 return name + utostr(NumBits); 01434 } else { 01435 llvm_unreachable("Integer too large"); 01436 break; 01437 } 01438 break; 01439 } 01440 case Type::FloatTyID: 01441 return "f32"; 01442 case Type::DoubleTyID: 01443 return "f64"; 01444 case Type::PointerTyID: 01445 if (nvptxSubtarget.is64Bit()) 01446 if (useB4PTR) 01447 return "b64"; 01448 else 01449 return "u64"; 01450 else if (useB4PTR) 01451 return "b32"; 01452 else 01453 return "u32"; 01454 } 01455 llvm_unreachable("unexpected type"); 01456 return nullptr; 01457 } 01458 01459 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, 01460 raw_ostream &O) { 01461 01462 const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); 01463 01464 // GlobalVariables are always constant pointers themselves. 01465 const PointerType *PTy = GVar->getType(); 01466 Type *ETy = PTy->getElementType(); 01467 01468 O << "."; 01469 emitPTXAddressSpace(PTy->getAddressSpace(), O); 01470 if (GVar->getAlignment() == 0) 01471 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 01472 else 01473 O << " .align " << GVar->getAlignment(); 01474 01475 if (ETy->isSingleValueType()) { 01476 O << " ."; 01477 O << getPTXFundamentalTypeStr(ETy); 01478 O << " "; 01479 O << *getSymbol(GVar); 01480 return; 01481 } 01482 01483 int64_t ElementSize = 0; 01484 01485 // Although PTX has direct support for struct type and array type and LLVM IR 01486 // is very similar to PTX, the LLVM CodeGen does not support for targets that 01487 // support these high level field accesses. Structs and arrays are lowered 01488 // into arrays of bytes. 01489 switch (ETy->getTypeID()) { 01490 case Type::StructTyID: 01491 case Type::ArrayTyID: 01492 case Type::VectorTyID: 01493 ElementSize = TD->getTypeStoreSize(ETy); 01494 O << " .b8 " << *getSymbol(GVar) << "["; 01495 if (ElementSize) { 01496 O << itostr(ElementSize); 01497 } 01498 O << "]"; 01499 break; 01500 default: 01501 llvm_unreachable("type not supported yet"); 01502 } 01503 return; 01504 } 01505 01506 static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { 01507 if (Ty->isSingleValueType()) 01508 return TD->getPrefTypeAlignment(Ty); 01509 01510 const ArrayType *ATy = dyn_cast<ArrayType>(Ty); 01511 if (ATy) 01512 return getOpenCLAlignment(TD, ATy->getElementType()); 01513 01514 const VectorType *VTy = dyn_cast<VectorType>(Ty); 01515 if (VTy) { 01516 Type *ETy = VTy->getElementType(); 01517 unsigned int numE = VTy->getNumElements(); 01518 unsigned int alignE = TD->getPrefTypeAlignment(ETy); 01519 if (numE == 3) 01520 return 4 * alignE; 01521 else 01522 return numE * alignE; 01523 } 01524 01525 const StructType *STy = dyn_cast<StructType>(Ty); 01526 if (STy) { 01527 unsigned int alignStruct = 1; 01528 // Go through each element of the struct and find the 01529 // largest alignment. 01530 for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { 01531 Type *ETy = STy->getElementType(i); 01532 unsigned int align = getOpenCLAlignment(TD, ETy); 01533 if (align > alignStruct) 01534 alignStruct = align; 01535 } 01536 return alignStruct; 01537 } 01538 01539 const FunctionType *FTy = dyn_cast<FunctionType>(Ty); 01540 if (FTy) 01541 return TD->getPointerPrefAlignment(); 01542 return TD->getPrefTypeAlignment(Ty); 01543 } 01544 01545 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 01546 int paramIndex, raw_ostream &O) { 01547 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 01548 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) 01549 O << *getSymbol(I->getParent()) << "_param_" << paramIndex; 01550 else { 01551 std::string argName = I->getName(); 01552 const char *p = argName.c_str(); 01553 while (*p) { 01554 if (*p == '.') 01555 O << "_"; 01556 else 01557 O << *p; 01558 p++; 01559 } 01560 } 01561 } 01562 01563 void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { 01564 Function::const_arg_iterator I, E; 01565 int i = 0; 01566 01567 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 01568 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) { 01569 O << *CurrentFnSym << "_param_" << paramIndex; 01570 return; 01571 } 01572 01573 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { 01574 if (i == paramIndex) { 01575 printParamName(I, paramIndex, O); 01576 return; 01577 } 01578 } 01579 llvm_unreachable("paramIndex out of bound"); 01580 } 01581 01582 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { 01583 const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); 01584 const AttributeSet &PAL = F->getAttributes(); 01585 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 01586 Function::const_arg_iterator I, E; 01587 unsigned paramIndex = 0; 01588 bool first = true; 01589 bool isKernelFunc = llvm::isKernelFunction(*F); 01590 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 01591 MVT thePointerTy = TLI->getPointerTy(); 01592 01593 O << "(\n"; 01594 01595 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 01596 Type *Ty = I->getType(); 01597 01598 if (!first) 01599 O << ",\n"; 01600 01601 first = false; 01602 01603 // Handle image/sampler parameters 01604 if (isKernelFunction(*F)) { 01605 if (isSampler(*I) || isImage(*I)) { 01606 if (isImage(*I)) { 01607 std::string sname = I->getName(); 01608 if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { 01609 if (nvptxSubtarget.hasImageHandles()) 01610 O << "\t.param .u64 .ptr .surfref "; 01611 else 01612 O << "\t.param .surfref "; 01613 O << *CurrentFnSym << "_param_" << paramIndex; 01614 } 01615 else { // Default image is read_only 01616 if (nvptxSubtarget.hasImageHandles()) 01617 O << "\t.param .u64 .ptr .texref "; 01618 else 01619 O << "\t.param .texref "; 01620 O << *CurrentFnSym << "_param_" << paramIndex; 01621 } 01622 } else { 01623 if (nvptxSubtarget.hasImageHandles()) 01624 O << "\t.param .u64 .ptr .samplerref "; 01625 else 01626 O << "\t.param .samplerref "; 01627 O << *CurrentFnSym << "_param_" << paramIndex; 01628 } 01629 continue; 01630 } 01631 } 01632 01633 if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { 01634 if (Ty->isAggregateType() || Ty->isVectorTy()) { 01635 // Just print .param .align <a> .b8 .param[size]; 01636 // <a> = PAL.getparamalignment 01637 // size = typeallocsize of element type 01638 unsigned align = PAL.getParamAlignment(paramIndex + 1); 01639 if (align == 0) 01640 align = TD->getABITypeAlignment(Ty); 01641 01642 unsigned sz = TD->getTypeAllocSize(Ty); 01643 O << "\t.param .align " << align << " .b8 "; 01644 printParamName(I, paramIndex, O); 01645 O << "[" << sz << "]"; 01646 01647 continue; 01648 } 01649 // Just a scalar 01650 const PointerType *PTy = dyn_cast<PointerType>(Ty); 01651 if (isKernelFunc) { 01652 if (PTy) { 01653 // Special handling for pointer arguments to kernel 01654 O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 01655 01656 if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { 01657 Type *ETy = PTy->getElementType(); 01658 int addrSpace = PTy->getAddressSpace(); 01659 switch (addrSpace) { 01660 default: 01661 O << ".ptr "; 01662 break; 01663 case llvm::ADDRESS_SPACE_CONST: 01664 O << ".ptr .const "; 01665 break; 01666 case llvm::ADDRESS_SPACE_SHARED: 01667 O << ".ptr .shared "; 01668 break; 01669 case llvm::ADDRESS_SPACE_GLOBAL: 01670 O << ".ptr .global "; 01671 break; 01672 } 01673 O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " "; 01674 } 01675 printParamName(I, paramIndex, O); 01676 continue; 01677 } 01678 01679 // non-pointer scalar to kernel func 01680 O << "\t.param ."; 01681 // Special case: predicate operands become .u8 types 01682 if (Ty->isIntegerTy(1)) 01683 O << "u8"; 01684 else 01685 O << getPTXFundamentalTypeStr(Ty); 01686 O << " "; 01687 printParamName(I, paramIndex, O); 01688 continue; 01689 } 01690 // Non-kernel function, just print .param .b<size> for ABI 01691 // and .reg .b<size> for non-ABI 01692 unsigned sz = 0; 01693 if (isa<IntegerType>(Ty)) { 01694 sz = cast<IntegerType>(Ty)->getBitWidth(); 01695 if (sz < 32) 01696 sz = 32; 01697 } else if (isa<PointerType>(Ty)) 01698 sz = thePointerTy.getSizeInBits(); 01699 else 01700 sz = Ty->getPrimitiveSizeInBits(); 01701 if (isABI) 01702 O << "\t.param .b" << sz << " "; 01703 else 01704 O << "\t.reg .b" << sz << " "; 01705 printParamName(I, paramIndex, O); 01706 continue; 01707 } 01708 01709 // param has byVal attribute. So should be a pointer 01710 const PointerType *PTy = dyn_cast<PointerType>(Ty); 01711 assert(PTy && "Param with byval attribute should be a pointer type"); 01712 Type *ETy = PTy->getElementType(); 01713 01714 if (isABI || isKernelFunc) { 01715 // Just print .param .align <a> .b8 .param[size]; 01716 // <a> = PAL.getparamalignment 01717 // size = typeallocsize of element type 01718 unsigned align = PAL.getParamAlignment(paramIndex + 1); 01719 if (align == 0) 01720 align = TD->getABITypeAlignment(ETy); 01721 01722 unsigned sz = TD->getTypeAllocSize(ETy); 01723 O << "\t.param .align " << align << " .b8 "; 01724 printParamName(I, paramIndex, O); 01725 O << "[" << sz << "]"; 01726 continue; 01727 } else { 01728 // Split the ETy into constituent parts and 01729 // print .param .b<size> <name> for each part. 01730 // Further, if a part is vector, print the above for 01731 // each vector element. 01732 SmallVector<EVT, 16> vtparts; 01733 ComputeValueVTs(*TLI, ETy, vtparts); 01734 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 01735 unsigned elems = 1; 01736 EVT elemtype = vtparts[i]; 01737 if (vtparts[i].isVector()) { 01738 elems = vtparts[i].getVectorNumElements(); 01739 elemtype = vtparts[i].getVectorElementType(); 01740 } 01741 01742 for (unsigned j = 0, je = elems; j != je; ++j) { 01743 unsigned sz = elemtype.getSizeInBits(); 01744 if (elemtype.isInteger() && (sz < 32)) 01745 sz = 32; 01746 O << "\t.reg .b" << sz << " "; 01747 printParamName(I, paramIndex, O); 01748 if (j < je - 1) 01749 O << ",\n"; 01750 ++paramIndex; 01751 } 01752 if (i < e - 1) 01753 O << ",\n"; 01754 } 01755 --paramIndex; 01756 continue; 01757 } 01758 } 01759 01760 O << "\n)\n"; 01761 } 01762 01763 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 01764 raw_ostream &O) { 01765 const Function *F = MF.getFunction(); 01766 emitFunctionParamList(F, O); 01767 } 01768 01769 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( 01770 const MachineFunction &MF) { 01771 SmallString<128> Str; 01772 raw_svector_ostream O(Str); 01773 01774 // Map the global virtual register number to a register class specific 01775 // virtual register number starting from 1 with that class. 01776 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 01777 //unsigned numRegClasses = TRI->getNumRegClasses(); 01778 01779 // Emit the Fake Stack Object 01780 const MachineFrameInfo *MFI = MF.getFrameInfo(); 01781 int NumBytes = (int) MFI->getStackSize(); 01782 if (NumBytes) { 01783 O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME 01784 << getFunctionNumber() << "[" << NumBytes << "];\n"; 01785 if (nvptxSubtarget.is64Bit()) { 01786 O << "\t.reg .b64 \t%SP;\n"; 01787 O << "\t.reg .b64 \t%SPL;\n"; 01788 } else { 01789 O << "\t.reg .b32 \t%SP;\n"; 01790 O << "\t.reg .b32 \t%SPL;\n"; 01791 } 01792 } 01793 01794 // Go through all virtual registers to establish the mapping between the 01795 // global virtual 01796 // register number and the per class virtual register number. 01797 // We use the per class virtual register number in the ptx output. 01798 unsigned int numVRs = MRI->getNumVirtRegs(); 01799 for (unsigned i = 0; i < numVRs; i++) { 01800 unsigned int vr = TRI->index2VirtReg(i); 01801 const TargetRegisterClass *RC = MRI->getRegClass(vr); 01802 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 01803 int n = regmap.size(); 01804 regmap.insert(std::make_pair(vr, n + 1)); 01805 } 01806 01807 // Emit register declarations 01808 // @TODO: Extract out the real register usage 01809 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 01810 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 01811 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 01812 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 01813 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n"; 01814 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 01815 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n"; 01816 01817 // Emit declaration of the virtual registers or 'physical' registers for 01818 // each register class 01819 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) { 01820 const TargetRegisterClass *RC = TRI->getRegClass(i); 01821 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 01822 std::string rcname = getNVPTXRegClassName(RC); 01823 std::string rcStr = getNVPTXRegClassStr(RC); 01824 int n = regmap.size(); 01825 01826 // Only declare those registers that may be used. 01827 if (n) { 01828 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 01829 << ">;\n"; 01830 } 01831 } 01832 01833 OutStreamer.EmitRawText(O.str()); 01834 } 01835 01836 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 01837 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 01838 bool ignored; 01839 unsigned int numHex; 01840 const char *lead; 01841 01842 if (Fp->getType()->getTypeID() == Type::FloatTyID) { 01843 numHex = 8; 01844 lead = "0f"; 01845 APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored); 01846 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 01847 numHex = 16; 01848 lead = "0d"; 01849 APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); 01850 } else 01851 llvm_unreachable("unsupported fp type"); 01852 01853 APInt API = APF.bitcastToAPInt(); 01854 std::string hexstr(utohexstr(API.getZExtValue())); 01855 O << lead; 01856 if (hexstr.length() < numHex) 01857 O << std::string(numHex - hexstr.length(), '0'); 01858 O << utohexstr(API.getZExtValue()); 01859 } 01860 01861 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { 01862 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 01863 O << CI->getValue(); 01864 return; 01865 } 01866 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 01867 printFPConstant(CFP, O); 01868 return; 01869 } 01870 if (isa<ConstantPointerNull>(CPV)) { 01871 O << "0"; 01872 return; 01873 } 01874 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 01875 PointerType *PTy = dyn_cast<PointerType>(GVar->getType()); 01876 bool IsNonGenericPointer = false; 01877 if (PTy && PTy->getAddressSpace() != 0) { 01878 IsNonGenericPointer = true; 01879 } 01880 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) { 01881 O << "generic("; 01882 O << *getSymbol(GVar); 01883 O << ")"; 01884 } else { 01885 O << *getSymbol(GVar); 01886 } 01887 return; 01888 } 01889 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 01890 const Value *v = Cexpr->stripPointerCasts(); 01891 PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType()); 01892 bool IsNonGenericPointer = false; 01893 if (PTy && PTy->getAddressSpace() != 0) { 01894 IsNonGenericPointer = true; 01895 } 01896 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 01897 if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { 01898 O << "generic("; 01899 O << *getSymbol(GVar); 01900 O << ")"; 01901 } else { 01902 O << *getSymbol(GVar); 01903 } 01904 return; 01905 } else { 01906 O << *LowerConstant(CPV, *this); 01907 return; 01908 } 01909 } 01910 llvm_unreachable("Not scalar type found in printScalarConstant()"); 01911 } 01912 01913 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, 01914 AggBuffer *aggBuffer) { 01915 01916 const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); 01917 01918 if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 01919 int s = TD->getTypeAllocSize(CPV->getType()); 01920 if (s < Bytes) 01921 s = Bytes; 01922 aggBuffer->addZeros(s); 01923 return; 01924 } 01925 01926 unsigned char *ptr; 01927 switch (CPV->getType()->getTypeID()) { 01928 01929 case Type::IntegerTyID: { 01930 const Type *ETy = CPV->getType(); 01931 if (ETy == Type::getInt8Ty(CPV->getContext())) { 01932 unsigned char c = 01933 (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 01934 ptr = &c; 01935 aggBuffer->addBytes(ptr, 1, Bytes); 01936 } else if (ETy == Type::getInt16Ty(CPV->getContext())) { 01937 short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 01938 ptr = (unsigned char *)&int16; 01939 aggBuffer->addBytes(ptr, 2, Bytes); 01940 } else if (ETy == Type::getInt32Ty(CPV->getContext())) { 01941 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 01942 int int32 = (int)(constInt->getZExtValue()); 01943 ptr = (unsigned char *)&int32; 01944 aggBuffer->addBytes(ptr, 4, Bytes); 01945 break; 01946 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 01947 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 01948 ConstantFoldConstantExpression(Cexpr, TD))) { 01949 int int32 = (int)(constInt->getZExtValue()); 01950 ptr = (unsigned char *)&int32; 01951 aggBuffer->addBytes(ptr, 4, Bytes); 01952 break; 01953 } 01954 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 01955 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 01956 aggBuffer->addSymbol(v); 01957 aggBuffer->addZeros(4); 01958 break; 01959 } 01960 } 01961 llvm_unreachable("unsupported integer const type"); 01962 } else if (ETy == Type::getInt64Ty(CPV->getContext())) { 01963 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 01964 long long int64 = (long long)(constInt->getZExtValue()); 01965 ptr = (unsigned char *)&int64; 01966 aggBuffer->addBytes(ptr, 8, Bytes); 01967 break; 01968 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 01969 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 01970 ConstantFoldConstantExpression(Cexpr, TD))) { 01971 long long int64 = (long long)(constInt->getZExtValue()); 01972 ptr = (unsigned char *)&int64; 01973 aggBuffer->addBytes(ptr, 8, Bytes); 01974 break; 01975 } 01976 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 01977 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 01978 aggBuffer->addSymbol(v); 01979 aggBuffer->addZeros(8); 01980 break; 01981 } 01982 } 01983 llvm_unreachable("unsupported integer const type"); 01984 } else 01985 llvm_unreachable("unsupported integer const type"); 01986 break; 01987 } 01988 case Type::FloatTyID: 01989 case Type::DoubleTyID: { 01990 const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); 01991 const Type *Ty = CFP->getType(); 01992 if (Ty == Type::getFloatTy(CPV->getContext())) { 01993 float float32 = (float) CFP->getValueAPF().convertToFloat(); 01994 ptr = (unsigned char *)&float32; 01995 aggBuffer->addBytes(ptr, 4, Bytes); 01996 } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 01997 double float64 = CFP->getValueAPF().convertToDouble(); 01998 ptr = (unsigned char *)&float64; 01999 aggBuffer->addBytes(ptr, 8, Bytes); 02000 } else { 02001 llvm_unreachable("unsupported fp const type"); 02002 } 02003 break; 02004 } 02005 case Type::PointerTyID: { 02006 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 02007 aggBuffer->addSymbol(GVar); 02008 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 02009 const Value *v = Cexpr->stripPointerCasts(); 02010 aggBuffer->addSymbol(v); 02011 } 02012 unsigned int s = TD->getTypeAllocSize(CPV->getType()); 02013 aggBuffer->addZeros(s); 02014 break; 02015 } 02016 02017 case Type::ArrayTyID: 02018 case Type::VectorTyID: 02019 case Type::StructTyID: { 02020 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) || 02021 isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) { 02022 int ElementSize = TD->getTypeAllocSize(CPV->getType()); 02023 bufferAggregateConstant(CPV, aggBuffer); 02024 if (Bytes > ElementSize) 02025 aggBuffer->addZeros(Bytes - ElementSize); 02026 } else if (isa<ConstantAggregateZero>(CPV)) 02027 aggBuffer->addZeros(Bytes); 02028 else 02029 llvm_unreachable("Unexpected Constant type"); 02030 break; 02031 } 02032 02033 default: 02034 llvm_unreachable("unsupported type"); 02035 } 02036 } 02037 02038 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, 02039 AggBuffer *aggBuffer) { 02040 const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); 02041 int Bytes; 02042 02043 // Old constants 02044 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 02045 if (CPV->getNumOperands()) 02046 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 02047 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 02048 return; 02049 } 02050 02051 if (const ConstantDataSequential *CDS = 02052 dyn_cast<ConstantDataSequential>(CPV)) { 02053 if (CDS->getNumElements()) 02054 for (unsigned i = 0; i < CDS->getNumElements(); ++i) 02055 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 02056 aggBuffer); 02057 return; 02058 } 02059 02060 if (isa<ConstantStruct>(CPV)) { 02061 if (CPV->getNumOperands()) { 02062 StructType *ST = cast<StructType>(CPV->getType()); 02063 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 02064 if (i == (e - 1)) 02065 Bytes = TD->getStructLayout(ST)->getElementOffset(0) + 02066 TD->getTypeAllocSize(ST) - 02067 TD->getStructLayout(ST)->getElementOffset(i); 02068 else 02069 Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) - 02070 TD->getStructLayout(ST)->getElementOffset(i); 02071 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); 02072 } 02073 } 02074 return; 02075 } 02076 llvm_unreachable("unsupported constant type in printAggregateConstant()"); 02077 } 02078 02079 // buildTypeNameMap - Run through symbol table looking for type names. 02080 // 02081 02082 bool NVPTXAsmPrinter::isImageType(const Type *Ty) { 02083 02084 std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); 02085 02086 if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") || 02087 !PI->second.compare("struct._image2d_t") || 02088 !PI->second.compare("struct._image3d_t"))) 02089 return true; 02090 02091 return false; 02092 } 02093 02094 02095 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { 02096 switch (MI.getOpcode()) { 02097 default: 02098 return false; 02099 case NVPTX::CallArgBeginInst: 02100 case NVPTX::CallArgEndInst0: 02101 case NVPTX::CallArgEndInst1: 02102 case NVPTX::CallArgF32: 02103 case NVPTX::CallArgF64: 02104 case NVPTX::CallArgI16: 02105 case NVPTX::CallArgI32: 02106 case NVPTX::CallArgI32imm: 02107 case NVPTX::CallArgI64: 02108 case NVPTX::CallArgParam: 02109 case NVPTX::CallVoidInst: 02110 case NVPTX::CallVoidInstReg: 02111 case NVPTX::Callseq_End: 02112 case NVPTX::CallVoidInstReg64: 02113 case NVPTX::DeclareParamInst: 02114 case NVPTX::DeclareRetMemInst: 02115 case NVPTX::DeclareRetRegInst: 02116 case NVPTX::DeclareRetScalarInst: 02117 case NVPTX::DeclareScalarParamInst: 02118 case NVPTX::DeclareScalarRegInst: 02119 case NVPTX::StoreParamF32: 02120 case NVPTX::StoreParamF64: 02121 case NVPTX::StoreParamI16: 02122 case NVPTX::StoreParamI32: 02123 case NVPTX::StoreParamI64: 02124 case NVPTX::StoreParamI8: 02125 case NVPTX::StoreRetvalF32: 02126 case NVPTX::StoreRetvalF64: 02127 case NVPTX::StoreRetvalI16: 02128 case NVPTX::StoreRetvalI32: 02129 case NVPTX::StoreRetvalI64: 02130 case NVPTX::StoreRetvalI8: 02131 case NVPTX::LastCallArgF32: 02132 case NVPTX::LastCallArgF64: 02133 case NVPTX::LastCallArgI16: 02134 case NVPTX::LastCallArgI32: 02135 case NVPTX::LastCallArgI32imm: 02136 case NVPTX::LastCallArgI64: 02137 case NVPTX::LastCallArgParam: 02138 case NVPTX::LoadParamMemF32: 02139 case NVPTX::LoadParamMemF64: 02140 case NVPTX::LoadParamMemI16: 02141 case NVPTX::LoadParamMemI32: 02142 case NVPTX::LoadParamMemI64: 02143 case NVPTX::LoadParamMemI8: 02144 case NVPTX::PrototypeInst: 02145 case NVPTX::DBG_VALUE: 02146 return true; 02147 } 02148 return false; 02149 } 02150 02151 /// PrintAsmOperand - Print out an operand for an inline asm expression. 02152 /// 02153 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 02154 unsigned AsmVariant, 02155 const char *ExtraCode, raw_ostream &O) { 02156 if (ExtraCode && ExtraCode[0]) { 02157 if (ExtraCode[1] != 0) 02158 return true; // Unknown modifier. 02159 02160 switch (ExtraCode[0]) { 02161 default: 02162 // See if this is a generic print operand 02163 return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); 02164 case 'r': 02165 break; 02166 } 02167 } 02168 02169 printOperand(MI, OpNo, O); 02170 02171 return false; 02172 } 02173 02174 bool NVPTXAsmPrinter::PrintAsmMemoryOperand( 02175 const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, 02176 const char *ExtraCode, raw_ostream &O) { 02177 if (ExtraCode && ExtraCode[0]) 02178 return true; // Unknown modifier 02179 02180 O << '['; 02181 printMemOperand(MI, OpNo, O); 02182 O << ']'; 02183 02184 return false; 02185 } 02186 02187 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, 02188 raw_ostream &O, const char *Modifier) { 02189 const MachineOperand &MO = MI->getOperand(opNum); 02190 switch (MO.getType()) { 02191 case MachineOperand::MO_Register: 02192 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 02193 if (MO.getReg() == NVPTX::VRDepot) 02194 O << DEPOTNAME << getFunctionNumber(); 02195 else 02196 O << NVPTXInstPrinter::getRegisterName(MO.getReg()); 02197 } else { 02198 emitVirtualRegister(MO.getReg(), O); 02199 } 02200 return; 02201 02202 case MachineOperand::MO_Immediate: 02203 if (!Modifier) 02204 O << MO.getImm(); 02205 else if (strstr(Modifier, "vec") == Modifier) 02206 printVecModifiedImmediate(MO, Modifier, O); 02207 else 02208 llvm_unreachable( 02209 "Don't know how to handle modifier on immediate operand"); 02210 return; 02211 02212 case MachineOperand::MO_FPImmediate: 02213 printFPConstant(MO.getFPImm(), O); 02214 break; 02215 02216 case MachineOperand::MO_GlobalAddress: 02217 O << *getSymbol(MO.getGlobal()); 02218 break; 02219 02220 case MachineOperand::MO_MachineBasicBlock: 02221 O << *MO.getMBB()->getSymbol(); 02222 return; 02223 02224 default: 02225 llvm_unreachable("Operand type not supported."); 02226 } 02227 } 02228 02229 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, 02230 raw_ostream &O, const char *Modifier) { 02231 printOperand(MI, opNum, O); 02232 02233 if (Modifier && !strcmp(Modifier, "add")) { 02234 O << ", "; 02235 printOperand(MI, opNum + 1, O); 02236 } else { 02237 if (MI->getOperand(opNum + 1).isImm() && 02238 MI->getOperand(opNum + 1).getImm() == 0) 02239 return; // don't print ',0' or '+0' 02240 O << "+"; 02241 printOperand(MI, opNum + 1, O); 02242 } 02243 } 02244 02245 02246 // Force static initialization. 02247 extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { 02248 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 02249 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 02250 } 02251 02252 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { 02253 std::stringstream temp; 02254 LineReader *reader = this->getReader(filename.str()); 02255 temp << "\n//"; 02256 temp << filename.str(); 02257 temp << ":"; 02258 temp << line; 02259 temp << " "; 02260 temp << reader->readLine(line); 02261 temp << "\n"; 02262 this->OutStreamer.EmitRawText(Twine(temp.str())); 02263 } 02264 02265 LineReader *NVPTXAsmPrinter::getReader(std::string filename) { 02266 if (!reader) { 02267 reader = new LineReader(filename); 02268 } 02269 02270 if (reader->fileName() != filename) { 02271 delete reader; 02272 reader = new LineReader(filename); 02273 } 02274 02275 return reader; 02276 } 02277 02278 std::string LineReader::readLine(unsigned lineNum) { 02279 if (lineNum < theCurLine) { 02280 theCurLine = 0; 02281 fstr.seekg(0, std::ios::beg); 02282 } 02283 while (theCurLine < lineNum) { 02284 fstr.getline(buff, 500); 02285 theCurLine++; 02286 } 02287 return buff; 02288 } 02289 02290 // Force static initialization. 02291 extern "C" void LLVMInitializeNVPTXAsmPrinter() { 02292 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 02293 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 02294 }