Lines Matching +full:high +full:- +full:fidelity

1 //===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
15 //===----------------------------------------------------------------------===//
62 // Check if a value can be converted to a 16-bit value without losing
68 if (VTy->isHalfTy() || VTy->isIntegerTy(16)) { in canSafelyConvertTo16Bit()
69 // The value is already 16-bit, so we don't want to convert to 16-bit again! in canSafelyConvertTo16Bit()
76 APFloat FloatValue(ConstFloat->getValueAPF()); in canSafelyConvertTo16Bit()
86 APInt IntValue(ConstInt->getValue()); in canSafelyConvertTo16Bit()
95 Type *CastSrcTy = CastSrc->getType(); in canSafelyConvertTo16Bit()
96 if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16)) in canSafelyConvertTo16Bit()
103 // Convert a value to 16-bit.
107 return cast<Instruction>(&V)->getOperand(0); in convertTo16Bit()
108 if (VTy->isIntegerTy()) in convertTo16Bit()
110 if (VTy->isFloatingPointTy()) in convertTo16Bit()
136 NewCall->takeName(&OldIntr); in modifyIntrinsicCall()
137 NewCall->copyMetadata(OldIntr); in modifyIntrinsicCall()
139 NewCall->copyFastMathFlags(&OldIntr); in modifyIntrinsicCall()
142 if (!InstToReplace.getType()->isVoidTy()) in modifyIntrinsicCall()
160 AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) { in simplifyAMDGCNImageIntrinsic()
162 dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) { in simplifyAMDGCNImageIntrinsic()
163 if (ConstantLod->isZero() || ConstantLod->isNegative()) { in simplifyAMDGCNImageIntrinsic()
165 AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ, in simplifyAMDGCNImageIntrinsic()
166 ImageDimIntr->Dim); in simplifyAMDGCNImageIntrinsic()
168 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { in simplifyAMDGCNImageIntrinsic()
169 Args.erase(Args.begin() + ImageDimIntr->LodIndex); in simplifyAMDGCNImageIntrinsic()
177 AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) { in simplifyAMDGCNImageIntrinsic()
179 dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) { in simplifyAMDGCNImageIntrinsic()
180 if (ConstantMip->isZero()) { in simplifyAMDGCNImageIntrinsic()
182 AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP, in simplifyAMDGCNImageIntrinsic()
183 ImageDimIntr->Dim); in simplifyAMDGCNImageIntrinsic()
185 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { in simplifyAMDGCNImageIntrinsic()
186 Args.erase(Args.begin() + ImageDimIntr->MipIndex); in simplifyAMDGCNImageIntrinsic()
194 AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) { in simplifyAMDGCNImageIntrinsic()
196 dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) { in simplifyAMDGCNImageIntrinsic()
197 if (ConstantBias->isZero()) { in simplifyAMDGCNImageIntrinsic()
199 AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias, in simplifyAMDGCNImageIntrinsic()
200 ImageDimIntr->Dim); in simplifyAMDGCNImageIntrinsic()
202 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { in simplifyAMDGCNImageIntrinsic()
203 Args.erase(Args.begin() + ImageDimIntr->BiasIndex); in simplifyAMDGCNImageIntrinsic()
204 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg); in simplifyAMDGCNImageIntrinsic()
212 AMDGPU::getMIMGOffsetMappingInfo(ImageDimIntr->BaseOpcode)) { in simplifyAMDGCNImageIntrinsic()
214 dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->OffsetIndex))) { in simplifyAMDGCNImageIntrinsic()
215 if (ConstantOffset->isZero()) { in simplifyAMDGCNImageIntrinsic()
218 OffsetMappingInfo->NoOffset, ImageDimIntr->Dim); in simplifyAMDGCNImageIntrinsic()
220 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { in simplifyAMDGCNImageIntrinsic()
221 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex); in simplifyAMDGCNImageIntrinsic()
228 if (ST->hasD16Images()) { in simplifyAMDGCNImageIntrinsic()
231 AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode); in simplifyAMDGCNImageIntrinsic()
233 if (BaseOpcode->HasD16) { in simplifyAMDGCNImageIntrinsic()
241 if (User->getOpcode() == Instruction::FPTrunc && in simplifyAMDGCNImageIntrinsic()
242 User->getType()->getScalarType()->isHalfTy()) { in simplifyAMDGCNImageIntrinsic()
244 return modifyIntrinsicCall(II, *User, ImageDimIntr->Intr, IC, in simplifyAMDGCNImageIntrinsic()
248 ArgTys[0] = User->getType(); in simplifyAMDGCNImageIntrinsic()
256 if (!ST->hasA16() && !ST->hasG16()) in simplifyAMDGCNImageIntrinsic()
262 AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode)->Sampler; in simplifyAMDGCNImageIntrinsic()
267 for (unsigned OperandIndex = ImageDimIntr->GradientStart; in simplifyAMDGCNImageIntrinsic()
268 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) { in simplifyAMDGCNImageIntrinsic()
270 // If the values are not derived from 16-bit values, we cannot optimize. in simplifyAMDGCNImageIntrinsic()
272 if (OperandIndex < ImageDimIntr->CoordStart || in simplifyAMDGCNImageIntrinsic()
273 ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) { in simplifyAMDGCNImageIntrinsic()
281 assert(OperandIndex == ImageDimIntr->GradientStart || in simplifyAMDGCNImageIntrinsic()
282 FloatCoord == Coord->getType()->isFloatingPointTy()); in simplifyAMDGCNImageIntrinsic()
283 FloatCoord = Coord->getType()->isFloatingPointTy(); in simplifyAMDGCNImageIntrinsic()
286 if (!OnlyDerivatives && !ST->hasA16()) in simplifyAMDGCNImageIntrinsic()
290 if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) { in simplifyAMDGCNImageIntrinsic()
291 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex); in simplifyAMDGCNImageIntrinsic()
298 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart == in simplifyAMDGCNImageIntrinsic()
299 ImageDimIntr->CoordStart)) in simplifyAMDGCNImageIntrinsic()
307 ArgTys[ImageDimIntr->GradientTyArg] = CoordType; in simplifyAMDGCNImageIntrinsic()
309 ArgTys[ImageDimIntr->CoordTyArg] = CoordType; in simplifyAMDGCNImageIntrinsic()
312 if (ImageDimIntr->NumBiasArgs != 0) in simplifyAMDGCNImageIntrinsic()
313 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext()); in simplifyAMDGCNImageIntrinsic()
317 OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd; in simplifyAMDGCNImageIntrinsic()
318 for (unsigned OperandIndex = ImageDimIntr->GradientStart; in simplifyAMDGCNImageIntrinsic()
325 if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) { in simplifyAMDGCNImageIntrinsic()
326 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex); in simplifyAMDGCNImageIntrinsic()
327 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder); in simplifyAMDGCNImageIntrinsic()
335 // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or in canSimplifyLegacyMulToMul()
358 return FPExtSrc->getType()->isHalfTy(); in matchFPExtFromF16()
363 APFloat Val(CFP->getValueAPF()); in matchFPExtFromF16()
368 FPExtSrc = ConstantFP::get(Type::getHalfTy(Arg->getContext()), Val); in matchFPExtFromF16()
379 auto *VTy = cast<FixedVectorType>(UseV->getType()); in trimTrailingZerosInVector()
380 unsigned VWidth = VTy->getNumElements(); in trimTrailingZerosInVector()
383 for (int i = VWidth - 1; i > 0; --i) { in trimTrailingZerosInVector()
389 if (!ConstElt->isNullValue() && !isa<UndefValue>(Elt)) in trimTrailingZerosInVector()
404 auto *VTy = cast<FixedVectorType>(V->getType()); in defaultComponentBroadcast()
405 unsigned VWidth = VTy->getNumElements(); in defaultComponentBroadcast()
411 SVI->getShuffleMask(ShuffleMask); in defaultComponentBroadcast()
413 for (int I = VWidth - 1; I > 0; --I) { in defaultComponentBroadcast()
433 int DMaskIdx = -1,
438 return (SqrtOp->getType()->isFloatTy() && in canContractSqrtToRsq()
439 (SqrtOp->hasApproxFunc() || SqrtOp->getFPAccuracy() >= 1.0f)) || in canContractSqrtToRsq()
440 SqrtOp->getType()->isHalfTy(); in canContractSqrtToRsq()
453 auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics())); in instCombineIntrinsic()
461 const APFloat &ArgVal = C->getValueAPF(); in instCombineIntrinsic()
479 auto IID = SrcCI->getIntrinsicID(); in instCombineIntrinsic()
480 // llvm.amdgcn.rcp(llvm.amdgcn.sqrt(x)) -> llvm.amdgcn.rsq(x) if contractable in instCombineIntrinsic()
482 // llvm.amdgcn.rcp(llvm.sqrt(x)) -> llvm.amdgcn.rsq(x) if contractable and in instCombineIntrinsic()
486 FastMathFlags InnerFMF = SqrtOp->getFastMathFlags(); in instCombineIntrinsic()
487 if (!InnerFMF.allowContract() || !SrcCI->hasOneUse()) in instCombineIntrinsic()
494 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()}); in instCombineIntrinsic()
500 return IC.replaceOperand(II, 0, SrcCI->getArgOperand(0)); in instCombineIntrinsic()
512 auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics())); in instCombineIntrinsic()
517 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) { in instCombineIntrinsic()
540 if (C->isInfinity()) { in instCombineIntrinsic()
541 // exp2(+inf) -> +inf in instCombineIntrinsic()
542 // log2(+inf) -> +inf in instCombineIntrinsic()
543 if (!C->isNegative()) in instCombineIntrinsic()
546 // exp2(-inf) -> 0 in instCombineIntrinsic()
547 if (IsExp && C->isNegative()) in instCombineIntrinsic()
554 if (C->isNaN()) { in instCombineIntrinsic()
555 Constant *Quieted = ConstantFP::get(Ty, C->getValue().makeQuiet()); in instCombineIntrinsic()
560 if (C->isZero() || (C->getValue().isDenormal() && Ty->isFloatTy())) { in instCombineIntrinsic()
566 if (IsLog && C->isNegative()) in instCombineIntrinsic()
580 frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven); in instCombineIntrinsic()
606 II.getModule(), Intrinsic::is_fpclass, Src0->getType())); in instCombineIntrinsic()
609 II.setArgOperand(1, ConstantInt::get(Src1->getType(), in instCombineIntrinsic()
610 CMask->getZExtValue() & fcAllFlags)); in instCombineIntrinsic()
618 // llvm.amdgcn.class(_, undef) -> false in instCombineIntrinsic()
622 // llvm.amdgcn.class(undef, mask) -> mask != 0 in instCombineIntrinsic()
625 Src1, ConstantInt::getNullValue(Src1->getType())); in instCombineIntrinsic()
636 II.getType()->getScalarType()->getFltSemantics(); in instCombineIntrinsic()
638 APFloat Val0 = C0->getValueAPF(); in instCombineIntrinsic()
639 APFloat Val1 = C1->getValueAPF(); in instCombineIntrinsic()
679 unsigned IntSize = Ty->getIntegerBitWidth(); in instCombineIntrinsic()
683 Width = CWidth->getZExtValue(); in instCombineIntrinsic()
684 if ((Width & (IntSize - 1)) == 0) { in instCombineIntrinsic()
688 // Hardware ignores high bits, so remove those. in instCombineIntrinsic()
691 II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1))); in instCombineIntrinsic()
698 Offset = COffset->getZExtValue(); in instCombineIntrinsic()
702 ConstantInt::get(COffset->getType(), Offset & (IntSize - 1))); in instCombineIntrinsic()
719 Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width); in instCombineIntrinsic()
720 Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width) in instCombineIntrinsic()
721 : IC.Builder.CreateLShr(Shl, IntSize - Width); in instCombineIntrinsic()
722 RightShift->takeName(&II); in instCombineIntrinsic()
729 RightShift->takeName(&II); in instCombineIntrinsic()
736 unsigned EnBits = En->getZExtValue(); in instCombineIntrinsic()
747 IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType())); in instCombineIntrinsic()
760 // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled in instCombineIntrinsic()
767 // Checking for NaN before canonicalization provides better fidelity when in instCombineIntrinsic()
781 CI->copyFastMathFlags(&II); in instCombineIntrinsic()
782 CI->takeName(&II); in instCombineIntrinsic()
790 // fmed3(c0, x, c1) -> fmed3(x, c0, c1) in instCombineIntrinsic()
816 APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(), in instCombineIntrinsic()
817 C2->getValueAPF()); in instCombineIntrinsic()
824 if (!ST->hasMed3_16()) in instCombineIntrinsic()
829 // Repeat floating-point width reduction done for minnum/maxnum. in instCombineIntrinsic()
830 // fmed3((fpext X), (fpext Y), (fpext Z)) -> fpext (fmed3(X, Y, Z)) in instCombineIntrinsic()
833 Value *NewCall = IC.Builder.CreateIntrinsic(IID, {X->getType()}, in instCombineIntrinsic()
844 int64_t CCVal = CC->getZExtValue(); in instCombineIntrinsic()
859 if (CCmp && CCmp->isNullValue()) { in instCombineIntrinsic()
875 NewCall->addFnAttr(Attribute::Convergent); in instCombineIntrinsic()
876 NewCall->takeName(&II); in instCombineIntrinsic()
886 2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred))); in instCombineIntrinsic()
895 // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne) in instCombineIntrinsic()
896 // llvm.amdgcn.icmp(sext (i1 x), -1, eq) in instCombineIntrinsic()
897 // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne) in instCombineIntrinsic()
904 ExtSrc->getType()->isIntegerTy(1)) { in instCombineIntrinsic()
905 IC.replaceOperand(II, 1, ConstantInt::getNullValue(Src1->getType())); in instCombineIntrinsic()
907 ConstantInt::get(CC->getType(), CmpInst::ICMP_NE)); in instCombineIntrinsic()
921 // -> llvm.amdgcn.[if]cmp(a, b, pred) in instCombineIntrinsic()
924 // -> llvm.amdgcn.[if]cmp(a, b, inv pred) in instCombineIntrinsic()
936 Type *Ty = SrcLHS->getType(); in instCombineIntrinsic()
939 unsigned Width = CmpType->getBitWidth(); in instCombineIntrinsic()
965 } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy()) in instCombineIntrinsic()
969 II.getModule(), NewIID, {II.getType(), SrcLHS->getType()}); in instCombineIntrinsic()
971 ConstantInt::get(CC->getType(), SrcPred)}; in instCombineIntrinsic()
973 NewCall->takeName(&II); in instCombineIntrinsic()
981 if (ST->isWave32()) in instCombineIntrinsic()
987 if (Src->isZero()) { in instCombineIntrinsic()
992 if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) { in instCombineIntrinsic()
1002 Call->takeName(&II); in instCombineIntrinsic()
1016 if (!C || !C->getZExtValue()) in instCombineIntrinsic()
1019 // amdgcn.kill(i1 1) is a no-op in instCombineIntrinsic()
1028 if (BC->isZeroValue() || RM->getZExtValue() != 0xF || in instCombineIntrinsic()
1029 BM->getZExtValue() != 0xF || isa<UndefValue>(Old)) in instCombineIntrinsic()
1033 return IC.replaceOperand(II, 0, UndefValue::get(Old->getType())); in instCombineIntrinsic()
1057 if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue()) in instCombineIntrinsic()
1060 return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType())); in instCombineIntrinsic()
1079 if (SrcInst && SrcInst->getParent() != II.getParent()) in instCombineIntrinsic()
1082 // readfirstlane (readfirstlane x) -> readfirstlane x in instCombineIntrinsic()
1083 // readlane (readfirstlane x), y -> readfirstlane x in instCombineIntrinsic()
1090 // readfirstlane (readlane x, y) -> readlane x, y in instCombineIntrinsic()
1095 // readlane (readlane x, y), y -> readlane x, y in instCombineIntrinsic()
1108 if (!II.getType()->isDoubleTy()) in instCombineIntrinsic()
1118 II.getType(), APFloat::getQNaN(II.getType()->getFltSemantics())); in instCombineIntrinsic()
1129 const APFloat &Fsrc = Csrc->getValueAPF(); in instCombineIntrinsic()
1140 unsigned SegmentVal = Cseg->getValue().trunc(5).getZExtValue(); in instCombineIntrinsic()
1143 Shift += Exponent - 1077; in instCombineIntrinsic()
1158 APFloat Zero = APFloat::getZero(II.getType()->getFltSemantics()); in instCombineIntrinsic()
1166 Thi = (Thi << BShift) | (Tlo >> (64 - BShift)); in instCombineIntrinsic()
1170 int Scale = -53 - Shift; in instCombineIntrinsic()
1175 return IC.replaceInstUsesWith(II, ConstantFP::get(Src->getType(), Result)); in instCombineIntrinsic()
1181 // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or in instCombineIntrinsic()
1192 FMul->takeName(&II); in instCombineIntrinsic()
1202 // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or in instCombineIntrinsic()
1208 // result if Op2 was -0.0. in instCombineIntrinsic()
1211 FAdd->takeName(&II); in instCombineIntrinsic()
1251 if (!isa<FixedVectorType>(II.getArgOperand(0)->getType())) in instCombineIntrinsic()
1255 if (ST->hasDefaultComponentBroadcast()) in instCombineIntrinsic()
1257 else if (ST->hasDefaultComponentZero()) in instCombineIntrinsic()
1262 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID()) ? 1 : -1; in instCombineIntrinsic()
1283 /// Note: This only supports non-TFE/LWE image intrinsic calls; those have
1291 : II.getOperand(0)->getType()); in simplifyAMDGCNMemoryIntrinsicDemanded()
1292 unsigned VWidth = IIVTy->getNumElements(); in simplifyAMDGCNMemoryIntrinsicDemanded()
1295 Type *EltTy = IIVTy->getElementType(); in simplifyAMDGCNMemoryIntrinsicDemanded()
1312 DemandedElts = (1 << ActiveBits) - 1; in simplifyAMDGCNMemoryIntrinsicDemanded()
1344 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1); in simplifyAMDGCNMemoryIntrinsicDemanded()
1350 auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd); in simplifyAMDGCNMemoryIntrinsicDemanded()
1358 unsigned DMaskVal = DMask->getZExtValue() & 0xf; in simplifyAMDGCNMemoryIntrinsicDemanded()
1365 DemandedElts &= (1 << llvm::popcount(DMaskVal)) - 1; in simplifyAMDGCNMemoryIntrinsicDemanded()
1379 Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal); in simplifyAMDGCNMemoryIntrinsicDemanded()
1417 NewCall->takeName(&II); in simplifyAMDGCNMemoryIntrinsicDemanded()
1418 NewCall->copyMetadata(II); in simplifyAMDGCNMemoryIntrinsicDemanded()