Lines Matching +full:max +full:- +full:rt
1 //===- AMDGPU.cpp ---------------------------------------------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
18 //===----------------------------------------------------------------------===//
37 if (PtrTy && PtrTy->getAddressSpace() == FromAS) in coerceKernelArgumentType()
38 return llvm::PointerType::get(Ty->getContext(), ToAS); in coerceKernelArgumentType()
72 if (const VectorType *VT = Ty->getAs<VectorType>()) { in numRegsForType()
74 // in-memory size, which includes the padding 4th element for 3-vectors. in numRegsForType()
75 QualType EltTy = VT->getElementType(); in numRegsForType()
78 // 16-bit element vectors should be passed as packed. in numRegsForType()
80 return (VT->getNumElements() + 1) / 2; in numRegsForType()
83 return EltNumRegs * VT->getNumElements(); in numRegsForType()
86 if (const RecordType *RT = Ty->getAs<RecordType>()) { in numRegsForType() local
87 const RecordDecl *RD = RT->getDecl(); in numRegsForType()
88 assert(!RD->hasFlexibleArrayMember()); in numRegsForType()
90 for (const FieldDecl *Field : RD->fields()) { in numRegsForType()
91 QualType FieldTy = Field->getType(); in numRegsForType()
132 // Records with non-trivial destructors/copy-constructors should not be in classifyReturnType()
139 // Lower single-element structs to just return a regular value. in classifyReturnType()
143 if (const RecordType *RT = RetTy->getAs<RecordType>()) { in classifyReturnType() local
144 const RecordDecl *RD = RT->getDecl(); in classifyReturnType()
145 if (RD->hasFlexibleArrayMember()) in classifyReturnType()
194 // new kind of coercion of the in-memory type when for indirect arguments. in classifyKernelArgumentType()
224 // Records with non-trivial destructors/copy-constructors should not be in classifyArgumentType()
233 // Lower single-element structs to just pass a regular value. TODO: We in classifyArgumentType()
234 // could do reasonable-size multiple-element structs too, using getExpand(), in classifyArgumentType()
239 if (const RecordType *RT = Ty->getAs<RecordType>()) { in classifyArgumentType() local
240 const RecordDecl *RD = RT->getDecl(); in classifyArgumentType()
241 if (RD->hasFlexibleArrayMember()) in classifyArgumentType()
249 NumRegsLeft -= std::min(NumRegsLeft, NumRegs); in classifyArgumentType()
265 NumRegsLeft -= NumRegs; in classifyArgumentType()
270 // Use pass-by-reference in stead of pass-by-value for struct arguments in in classifyArgumentType()
281 NumRegsLeft -= std::min(NumRegs, NumRegsLeft); in classifyArgumentType()
325 if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility) in requiresAMDGPUProtectedVisibility()
328 return !D->hasAttr<OMPDeclareTargetDeclAttr>() && in requiresAMDGPUProtectedVisibility()
329 (D->hasAttr<OpenCLKernelAttr>() || in requiresAMDGPUProtectedVisibility()
330 (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) || in requiresAMDGPUProtectedVisibility()
332 (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || in requiresAMDGPUProtectedVisibility()
333 cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() || in requiresAMDGPUProtectedVisibility()
334 cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType()))); in requiresAMDGPUProtectedVisibility()
340 M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; in setFunctionDeclAttributes()
342 M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>(); in setFunctionDeclAttributes()
343 const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>(); in setFunctionDeclAttributes()
345 const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); in setFunctionDeclAttributes()
350 // --gpu-max-threads-per-block=n or its default value for HIP. in setFunctionDeclAttributes()
357 F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); in setFunctionDeclAttributes()
360 if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) in setFunctionDeclAttributes()
363 if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) { in setFunctionDeclAttributes()
364 unsigned NumSGPR = Attr->getNumSGPR(); in setFunctionDeclAttributes()
367 F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR)); in setFunctionDeclAttributes()
370 if (const auto *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) { in setFunctionDeclAttributes()
371 uint32_t NumVGPR = Attr->getNumVGPR(); in setFunctionDeclAttributes()
374 F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); in setFunctionDeclAttributes()
377 if (const auto *Attr = FD->getAttr<AMDGPUMaxNumWorkGroupsAttr>()) { in setFunctionDeclAttributes()
378 uint32_t X = Attr->getMaxNumWorkGroupsX() in setFunctionDeclAttributes()
379 ->EvaluateKnownConstInt(M.getContext()) in setFunctionDeclAttributes()
382 uint32_t Y = Attr->getMaxNumWorkGroupsY() in setFunctionDeclAttributes()
383 ? Attr->getMaxNumWorkGroupsY() in setFunctionDeclAttributes()
384 ->EvaluateKnownConstInt(M.getContext()) in setFunctionDeclAttributes()
387 uint32_t Z = Attr->getMaxNumWorkGroupsZ() in setFunctionDeclAttributes()
388 ? Attr->getMaxNumWorkGroupsZ() in setFunctionDeclAttributes()
389 ->EvaluateKnownConstInt(M.getContext()) in setFunctionDeclAttributes()
397 F->addFnAttr("amdgpu-max-num-workgroups", AttrVal.str()); in setFunctionDeclAttributes()
401 /// Emits control constants used to change per-architecture behaviour in the
407 if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage())) in emitTargetGlobals()
424 GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); in emitTargetGlobals()
425 GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility); in emitTargetGlobals()
429 OriginalGV->replaceAllUsesWith(GV); in emitTargetGlobals()
430 GV->takeName(OriginalGV); in emitTargetGlobals()
431 OriginalGV->eraseFromParent(); in emitTargetGlobals()
438 GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); in setTargetAttributes()
439 GV->setDSOLocal(true); in setTargetAttributes()
442 if (GV->isDeclaration()) in setTargetAttributes()
454 F->addFnAttr("amdgpu-unsafe-fp-atomics", "true"); in setTargetAttributes()
457 F->addFnAttr("amdgpu-ieee", "false"); in setTargetAttributes()
477 PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic)); in getNullPointer()
493 LangAS AddrSpace = D->getType().getAddressSpace(); in getGlobalVarAddressSpace()
498 if (D->getType().isConstantStorage(CGM.getContext(), false, false) && in getGlobalVarAddressSpace()
499 D->hasConstantInitialization()) { in getGlobalVarAddressSpace()
541 Name = Twine(Twine(Name) + Twine("-")).str(); in getLLVMSyncScopeID()
543 Name = Twine(Twine(Name) + Twine("one-as")).str(); in getLLVMSyncScopeID()
560 FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); in setCUDAKernelCallingConvention()
570 /// has "enqueued-block" function attribute and kernel argument metadata.
576 auto *InvokeFT = Invoke->getFunctionType(); in createEnqueuedBlockKernel()
592 for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) { in createEnqueuedBlockKernel()
593 ArgTys.push_back(InvokeFT->getParamType(I)); in createEnqueuedBlockKernel()
602 std::string Name = Invoke->getName().str() + "_kernel"; in createEnqueuedBlockKernel()
606 F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); in createEnqueuedBlockKernel()
612 KernelAttrs.addAttribute("enqueued-block"); in createEnqueuedBlockKernel()
613 F->addFnAttrs(KernelAttrs); in createEnqueuedBlockKernel()
620 BlockPtr->setAlignment(BlockAlign); in createEnqueuedBlockKernel()
621 Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign); in createEnqueuedBlockKernel()
622 auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0)); in createEnqueuedBlockKernel()
625 for (llvm::Argument &A : llvm::drop_begin(F->args())) in createEnqueuedBlockKernel()
628 call->setCallingConv(Invoke->getCallingConv()); in createEnqueuedBlockKernel()
632 F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals)); in createEnqueuedBlockKernel()
633 F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals)); in createEnqueuedBlockKernel()
634 F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames)); in createEnqueuedBlockKernel()
635 F->setMetadata("kernel_arg_base_type", in createEnqueuedBlockKernel()
637 F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals)); in createEnqueuedBlockKernel()
639 F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames)); in createEnqueuedBlockKernel()
649 unsigned Max = 0; in handleAMDGPUFlatWorkGroupSizeAttr() local
651 Min = FlatWGS->getMin()->EvaluateKnownConstInt(getContext()).getExtValue(); in handleAMDGPUFlatWorkGroupSizeAttr()
652 Max = FlatWGS->getMax()->EvaluateKnownConstInt(getContext()).getExtValue(); in handleAMDGPUFlatWorkGroupSizeAttr()
654 if (ReqdWGS && Min == 0 && Max == 0) in handleAMDGPUFlatWorkGroupSizeAttr()
655 Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); in handleAMDGPUFlatWorkGroupSizeAttr()
658 assert(Min <= Max && "Min must be less than or equal Max"); in handleAMDGPUFlatWorkGroupSizeAttr()
663 *MaxThreadsVal = Max; in handleAMDGPUFlatWorkGroupSizeAttr()
664 std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); in handleAMDGPUFlatWorkGroupSizeAttr()
666 F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); in handleAMDGPUFlatWorkGroupSizeAttr()
668 assert(Max == 0 && "Max must be zero"); in handleAMDGPUFlatWorkGroupSizeAttr()
674 Attr->getMin()->EvaluateKnownConstInt(getContext()).getExtValue(); in handleAMDGPUWavesPerEUAttr()
675 unsigned Max = in handleAMDGPUWavesPerEUAttr() local
676 Attr->getMax() in handleAMDGPUWavesPerEUAttr()
677 ? Attr->getMax()->EvaluateKnownConstInt(getContext()).getExtValue() in handleAMDGPUWavesPerEUAttr()
681 assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); in handleAMDGPUWavesPerEUAttr()
684 if (Max != 0) in handleAMDGPUWavesPerEUAttr()
685 AttrVal = AttrVal + "," + llvm::utostr(Max); in handleAMDGPUWavesPerEUAttr()
686 F->addFnAttr("amdgpu-waves-per-eu", AttrVal); in handleAMDGPUWavesPerEUAttr()
688 assert(Max == 0 && "Max must be zero"); in handleAMDGPUWavesPerEUAttr()