AMDGPU.cpp - OpenGrok cross reference for /freebsd/contrib/llvm-project/clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines Matching +full:max +full:- +full:rt
1 //===- AMDGPU.cpp ---------------------------------------------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
18 //===----------------------------------------------------------------------===//
37     if (PtrTy && PtrTy->getAddressSpace() == FromAS)  in coerceKernelArgumentType()
38       return llvm::PointerType::get(Ty->getContext(), ToAS);  in coerceKernelArgumentType()
72   if (const VectorType *VT = Ty->getAs<VectorType>()) {  in numRegsForType()
74     // in-memory size, which includes the padding 4th element for 3-vectors.  in numRegsForType()
75     QualType EltTy = VT->getElementType();  in numRegsForType()
78     // 16-bit element vectors should be passed as packed.  in numRegsForType()
80       return (VT->getNumElements() + 1) / 2;  in numRegsForType()
83     return EltNumRegs * VT->getNumElements();  in numRegsForType()
86   if (const RecordType *RT = Ty->getAs<RecordType>()) {  in numRegsForType()  local
87     const RecordDecl *RD = RT->getDecl();  in numRegsForType()
88     assert(!RD->hasFlexibleArrayMember());  in numRegsForType()
90     for (const FieldDecl *Field : RD->fields()) {  in numRegsForType()
91       QualType FieldTy = Field->getType();  in numRegsForType()
132     // Records with non-trivial destructors/copy-constructors should not be  in classifyReturnType()
139       // Lower single-element structs to just return a regular value.  in classifyReturnType()
143       if (const RecordType *RT = RetTy->getAs<RecordType>()) {  in classifyReturnType()  local
144         const RecordDecl *RD = RT->getDecl();  in classifyReturnType()
145         if (RD->hasFlexibleArrayMember())  in classifyReturnType()
194   // new kind of coercion of the in-memory type when for indirect arguments.  in classifyKernelArgumentType()
224     // Records with non-trivial destructors/copy-constructors should not be  in classifyArgumentType()
233     // Lower single-element structs to just pass a regular value. TODO: We  in classifyArgumentType()
234     // could do reasonable-size multiple-element structs too, using getExpand(),  in classifyArgumentType()
239     if (const RecordType *RT = Ty->getAs<RecordType>()) {  in classifyArgumentType()  local
240       const RecordDecl *RD = RT->getDecl();  in classifyArgumentType()
241       if (RD->hasFlexibleArrayMember())  in classifyArgumentType()
249       NumRegsLeft -= std::min(NumRegsLeft, NumRegs);  in classifyArgumentType()
265         NumRegsLeft -= NumRegs;  in classifyArgumentType()
270     // Use pass-by-reference in stead of pass-by-value for struct arguments in  in classifyArgumentType()
281     NumRegsLeft -= std::min(NumRegs, NumRegsLeft);  in classifyArgumentType()
325   if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility)  in requiresAMDGPUProtectedVisibility()
328   return !D->hasAttr<OMPDeclareTargetDeclAttr>() &&  in requiresAMDGPUProtectedVisibility()
329          (D->hasAttr<OpenCLKernelAttr>() ||  in requiresAMDGPUProtectedVisibility()
330           (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) ||  in requiresAMDGPUProtectedVisibility()
332            (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||  in requiresAMDGPUProtectedVisibility()
333             cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() ||  in requiresAMDGPUProtectedVisibility()
334             cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType())));  in requiresAMDGPUProtectedVisibility()
340       M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;  in setFunctionDeclAttributes()
342       M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>();  in setFunctionDeclAttributes()
343   const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>();  in setFunctionDeclAttributes()
345   const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();  in setFunctionDeclAttributes()
350     // --gpu-max-threads-per-block=n or its default value for HIP.  in setFunctionDeclAttributes()
357     F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);  in setFunctionDeclAttributes()
360   if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>())  in setFunctionDeclAttributes()
363   if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {  in setFunctionDeclAttributes()
364     unsigned NumSGPR = Attr->getNumSGPR();  in setFunctionDeclAttributes()
367       F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR));  in setFunctionDeclAttributes()
370   if (const auto *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) {  in setFunctionDeclAttributes()
371     uint32_t NumVGPR = Attr->getNumVGPR();  in setFunctionDeclAttributes()
374       F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));  in setFunctionDeclAttributes()
377   if (const auto *Attr = FD->getAttr<AMDGPUMaxNumWorkGroupsAttr>()) {  in setFunctionDeclAttributes()
378     uint32_t X = Attr->getMaxNumWorkGroupsX()  in setFunctionDeclAttributes()
379                      ->EvaluateKnownConstInt(M.getContext())  in setFunctionDeclAttributes()
382     uint32_t Y = Attr->getMaxNumWorkGroupsY()  in setFunctionDeclAttributes()
383                      ? Attr->getMaxNumWorkGroupsY()  in setFunctionDeclAttributes()
384                            ->EvaluateKnownConstInt(M.getContext())  in setFunctionDeclAttributes()
387     uint32_t Z = Attr->getMaxNumWorkGroupsZ()  in setFunctionDeclAttributes()
388                      ? Attr->getMaxNumWorkGroupsZ()  in setFunctionDeclAttributes()
389                            ->EvaluateKnownConstInt(M.getContext())  in setFunctionDeclAttributes()
397     F->addFnAttr("amdgpu-max-num-workgroups", AttrVal.str());  in setFunctionDeclAttributes()
401 /// Emits control constants used to change per-architecture behaviour in the
407   if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage()))  in emitTargetGlobals()
424   GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);  in emitTargetGlobals()
425   GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility);  in emitTargetGlobals()
429     OriginalGV->replaceAllUsesWith(GV);  in emitTargetGlobals()
430     GV->takeName(OriginalGV);  in emitTargetGlobals()
431     OriginalGV->eraseFromParent();  in emitTargetGlobals()
438     GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);  in setTargetAttributes()
439     GV->setDSOLocal(true);  in setTargetAttributes()
442   if (GV->isDeclaration())  in setTargetAttributes()
454     F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");  in setTargetAttributes()
457     F->addFnAttr("amdgpu-ieee", "false");  in setTargetAttributes()
477       PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));  in getNullPointer()
493   LangAS AddrSpace = D->getType().getAddressSpace();  in getGlobalVarAddressSpace()
498   if (D->getType().isConstantStorage(CGM.getContext(), false, false) &&  in getGlobalVarAddressSpace()
499       D->hasConstantInitialization()) {  in getGlobalVarAddressSpace()
541       Name = Twine(Twine(Name) + Twine("-")).str();  in getLLVMSyncScopeID()
543     Name = Twine(Twine(Name) + Twine("one-as")).str();  in getLLVMSyncScopeID()
560       FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));  in setCUDAKernelCallingConvention()
570 /// has "enqueued-block" function attribute and kernel argument metadata.
576   auto *InvokeFT = Invoke->getFunctionType();  in createEnqueuedBlockKernel()
592   for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) {  in createEnqueuedBlockKernel()
593     ArgTys.push_back(InvokeFT->getParamType(I));  in createEnqueuedBlockKernel()
602   std::string Name = Invoke->getName().str() + "_kernel";  in createEnqueuedBlockKernel()
606   F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);  in createEnqueuedBlockKernel()
612   KernelAttrs.addAttribute("enqueued-block");  in createEnqueuedBlockKernel()
613   F->addFnAttrs(KernelAttrs);  in createEnqueuedBlockKernel()
620   BlockPtr->setAlignment(BlockAlign);  in createEnqueuedBlockKernel()
621   Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign);  in createEnqueuedBlockKernel()
622   auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0));  in createEnqueuedBlockKernel()
625   for (llvm::Argument &A : llvm::drop_begin(F->args()))  in createEnqueuedBlockKernel()
628   call->setCallingConv(Invoke->getCallingConv());  in createEnqueuedBlockKernel()
632   F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals));  in createEnqueuedBlockKernel()
633   F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals));  in createEnqueuedBlockKernel()
634   F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames));  in createEnqueuedBlockKernel()
635   F->setMetadata("kernel_arg_base_type",  in createEnqueuedBlockKernel()
637   F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals));  in createEnqueuedBlockKernel()
639     F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames));  in createEnqueuedBlockKernel()
649   unsigned Max = 0;  in handleAMDGPUFlatWorkGroupSizeAttr()  local
651     Min = FlatWGS->getMin()->EvaluateKnownConstInt(getContext()).getExtValue();  in handleAMDGPUFlatWorkGroupSizeAttr()
652     Max = FlatWGS->getMax()->EvaluateKnownConstInt(getContext()).getExtValue();  in handleAMDGPUFlatWorkGroupSizeAttr()
654   if (ReqdWGS && Min == 0 && Max == 0)  in handleAMDGPUFlatWorkGroupSizeAttr()
655     Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();  in handleAMDGPUFlatWorkGroupSizeAttr()
658     assert(Min <= Max && "Min must be less than or equal Max");  in handleAMDGPUFlatWorkGroupSizeAttr()
663       *MaxThreadsVal = Max;  in handleAMDGPUFlatWorkGroupSizeAttr()
664     std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max);  in handleAMDGPUFlatWorkGroupSizeAttr()
666       F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);  in handleAMDGPUFlatWorkGroupSizeAttr()
668     assert(Max == 0 && "Max must be zero");  in handleAMDGPUFlatWorkGroupSizeAttr()
674       Attr->getMin()->EvaluateKnownConstInt(getContext()).getExtValue();  in handleAMDGPUWavesPerEUAttr()
675   unsigned Max =  in handleAMDGPUWavesPerEUAttr()  local
676       Attr->getMax()  in handleAMDGPUWavesPerEUAttr()
677           ? Attr->getMax()->EvaluateKnownConstInt(getContext()).getExtValue()  in handleAMDGPUWavesPerEUAttr()
681     assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max");  in handleAMDGPUWavesPerEUAttr()
684     if (Max != 0)  in handleAMDGPUWavesPerEUAttr()
685       AttrVal = AttrVal + "," + llvm::utostr(Max);  in handleAMDGPUWavesPerEUAttr()
686     F->addFnAttr("amdgpu-waves-per-eu", AttrVal);  in handleAMDGPUWavesPerEUAttr()
688     assert(Max == 0 && "Max must be zero");  in handleAMDGPUWavesPerEUAttr()