Lines Matching full:s16
70 /// additional element. This is mostly to handle <3 x s16> -> <4 x s16>. This
180 // <2 x s8> -> s16 in getBitcastRegisterType()
284 static const LLT S16 = LLT::scalar(16); variable
373 // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
486 // The current selector can't handle <6 x s16>, <8 x s16>, s96, s128 etc, so
693 S32, S64, S16 in AMDGPULegalizerInfo()
697 S32, S64, S16, V2S16 in AMDGPULegalizerInfo()
700 const LLT MinScalarFPTy = ST.has16BitInsts() ? S16 : S32; in AMDGPULegalizerInfo()
708 .legalFor({S32, S64, V2S16, S16, V4S16, S1, S128, S256}) in AMDGPULegalizerInfo()
715 .clampScalar(0, S16, S256) in AMDGPULegalizerInfo()
725 .legalFor({S64, S32, S16, V2S16}) in AMDGPULegalizerInfo()
726 .clampMaxNumElementsStrict(0, S16, 2) in AMDGPULegalizerInfo()
728 .minScalar(0, S16) in AMDGPULegalizerInfo()
733 .legalFor({S32, S16, V2S16}) in AMDGPULegalizerInfo()
734 .clampMaxNumElementsStrict(0, S16, 2) in AMDGPULegalizerInfo()
736 .minScalar(0, S16) in AMDGPULegalizerInfo()
743 .legalFor({S64, S32, S16, V2S16}) in AMDGPULegalizerInfo()
744 .clampMaxNumElementsStrict(0, S16, 2) in AMDGPULegalizerInfo()
746 .minScalar(0, S16) in AMDGPULegalizerInfo()
751 .legalFor({S32, S16, V2S16}) in AMDGPULegalizerInfo()
752 .clampMaxNumElementsStrict(0, S16, 2) in AMDGPULegalizerInfo()
754 .minScalar(0, S16) in AMDGPULegalizerInfo()
761 .legalFor({S32, S16, V2S16}) // Clamp modifier in AMDGPULegalizerInfo()
762 .minScalarOrElt(0, S16) in AMDGPULegalizerInfo()
763 .clampMaxNumElementsStrict(0, S16, 2) in AMDGPULegalizerInfo()
769 .legalFor({S32, S16}) in AMDGPULegalizerInfo()
770 .minScalar(0, S16) in AMDGPULegalizerInfo()
776 .legalFor({S32, S16}) in AMDGPULegalizerInfo()
778 .minScalar(0, S16) in AMDGPULegalizerInfo()
786 .legalFor({S32, S16}) // Clamp modifier in AMDGPULegalizerInfo()
787 .minScalar(0, S16) in AMDGPULegalizerInfo()
795 .minScalar(0, S16) in AMDGPULegalizerInfo()
862 .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16}) in AMDGPULegalizerInfo()
881 .legalFor({S1, S32, S64, S16, GlobalPtr, in AMDGPULegalizerInfo()
888 .legalFor({S32, S64, S16}) in AMDGPULegalizerInfo()
889 .clampScalar(0, S16, S64); in AMDGPULegalizerInfo()
893 // s1 and s16 are special cases because they have legal operations on in AMDGPULegalizerInfo()
895 .legalFor({S1, S16}) in AMDGPULegalizerInfo()
931 FPOpActions.legalFor({S16, V2S16}); in AMDGPULegalizerInfo()
933 FPOpActions.legalFor({S16}); in AMDGPULegalizerInfo()
935 TrigActions.customFor({S16}); in AMDGPULegalizerInfo()
936 FDIVActions.customFor({S16}); in AMDGPULegalizerInfo()
950 .clampMaxNumElements(0, S16, 2) in AMDGPULegalizerInfo()
951 .clampScalar(0, S16, S64) in AMDGPULegalizerInfo()
955 .clampScalar(0, S16, S64) in AMDGPULegalizerInfo()
964 FPOpActions.clampMaxNumElementsStrict(0, S16, 2); in AMDGPULegalizerInfo()
968 .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); in AMDGPULegalizerInfo()
972 .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); in AMDGPULegalizerInfo()
976 .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); in AMDGPULegalizerInfo()
980 .clampMaxNumElementsStrict(0, S16, 2) in AMDGPULegalizerInfo()
982 .clampScalar(0, S16, S64); in AMDGPULegalizerInfo()
986 .legalFor({S16}) in AMDGPULegalizerInfo()
991 .legalFor({S32, S64, S16}) in AMDGPULegalizerInfo()
993 .clampScalar(0, S16, S64); in AMDGPULegalizerInfo()
996 .legalFor({{S32, S32}, {S64, S32}, {S16, S16}}) in AMDGPULegalizerInfo()
998 .maxScalarIf(typeIs(0, S16), 1, S16) in AMDGPULegalizerInfo()
1003 .customFor({{S32, S32}, {S64, S32}, {S16, S16}, {S16, S32}}) in AMDGPULegalizerInfo()
1008 .customFor({S32, S64, S16}) in AMDGPULegalizerInfo()
1042 .legalFor({{S32, S64}, {S16, S32}}) in AMDGPULegalizerInfo()
1047 .legalFor({{S64, S32}, {S32, S16}}) in AMDGPULegalizerInfo()
1048 .narrowScalarFor({{S64, S16}}, changeTo(0, S32)) in AMDGPULegalizerInfo()
1055 .legalFor({S32, S16}) in AMDGPULegalizerInfo()
1063 .lowerFor({S64, S16, V2S16}); in AMDGPULegalizerInfo()
1073 FMad.customFor({S32, S16}); in AMDGPULegalizerInfo()
1077 FMad.customFor({S16}); in AMDGPULegalizerInfo()
1083 FRem.customFor({S16, S32, S64}); in AMDGPULegalizerInfo()
1094 .clampMaxNumElements(0, S16, 2) in AMDGPULegalizerInfo()
1102 .legalFor({{S64, S32}, {S32, S16}, {S64, S16}, in AMDGPULegalizerInfo()
1103 {S32, S1}, {S64, S1}, {S16, S1}}) in AMDGPULegalizerInfo()
1110 .legalFor({{S32, S32}, {S64, S32}, {S16, S32}}) in AMDGPULegalizerInfo()
1114 IToFP.legalFor({{S16, S16}}); in AMDGPULegalizerInfo()
1121 .legalFor({{S32, S32}, {S32, S64}, {S32, S16}}) in AMDGPULegalizerInfo()
1123 .narrowScalarFor({{S64, S16}}, changeTo(0, S32)); in AMDGPULegalizerInfo()
1125 FPToI.legalFor({{S16, S16}}); in AMDGPULegalizerInfo()
1135 .customFor({S16, S32}) in AMDGPULegalizerInfo()
1147 .legalFor({S16, S32, S64}) in AMDGPULegalizerInfo()
1148 .clampScalar(0, S16, S64) in AMDGPULegalizerInfo()
1193 CmpBuilder.legalFor({{S1, S16}}); in AMDGPULegalizerInfo()
1207 FCmpBuilder.legalForCartesianProduct({S32}, {S16, S32}); in AMDGPULegalizerInfo()
1217 ExpOps.customFor({{S32}, {S16}}); in AMDGPULegalizerInfo()
1230 Log2Ops.legalFor({S16}); in AMDGPULegalizerInfo()
1232 Log2Ops.customFor({S16}); in AMDGPULegalizerInfo()
1238 LogOps.customFor({S32, S16}); in AMDGPULegalizerInfo()
1261 .lowerFor({S1, S16}) in AMDGPULegalizerInfo()
1305 .legalFor({S16, S32, V2S16}) in AMDGPULegalizerInfo()
1306 .clampMaxNumElementsStrict(0, S16, 2) in AMDGPULegalizerInfo()
1310 .clampScalar(0, S16, S32) in AMDGPULegalizerInfo()
1315 .legalFor({S32, S16, V2S16}) in AMDGPULegalizerInfo()
1316 .clampMaxNumElements(0, S16, 2) in AMDGPULegalizerInfo()
1317 .minScalar(0, S16) in AMDGPULegalizerInfo()
1323 .legalFor({S32, S16}) in AMDGPULegalizerInfo()
1325 .minScalar(0, S16) in AMDGPULegalizerInfo()
1438 {S32, GlobalPtr, S16, GlobalAlign16}, in AMDGPULegalizerInfo()
1444 {S32, LocalPtr, S16, 16}, in AMDGPULegalizerInfo()
1449 {S32, PrivatePtr, S16, 16}, in AMDGPULegalizerInfo()
1600 {S32, GlobalPtr, S16, 2 * 8}, in AMDGPULegalizerInfo()
1602 {S32, LocalPtr, S16, 16}, in AMDGPULegalizerInfo()
1604 {S32, PrivatePtr, S16, 16}, in AMDGPULegalizerInfo()
1606 {S32, ConstantPtr, S16, 2 * 8}}) in AMDGPULegalizerInfo()
1614 {{S32, FlatPtr, S8, 8}, {S32, FlatPtr, S16, 16}}); in AMDGPULegalizerInfo()
1700 .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16, GlobalPtr, in AMDGPULegalizerInfo()
1705 .clampScalar(0, S16, S64) in AMDGPULegalizerInfo()
1722 Shifts.legalFor({{S16, S16}, {V2S16, V2S16}}) in AMDGPULegalizerInfo()
1723 .clampMaxNumElements(0, S16, 2); in AMDGPULegalizerInfo()
1725 Shifts.legalFor({{S16, S16}}); in AMDGPULegalizerInfo()
1736 }, changeTo(1, S16)); in AMDGPULegalizerInfo()
1737 Shifts.maxScalarIf(typeIs(0, S16), 1, S16); in AMDGPULegalizerInfo()
1740 Shifts.clampScalar(0, S16, S64); in AMDGPULegalizerInfo()
1743 .minScalar(0, S16) in AMDGPULegalizerInfo()
1832 .lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32))) in AMDGPULegalizerInfo()
1877 .minScalarOrElt(0, S16) in AMDGPULegalizerInfo()
1878 .minScalar(1, S16); in AMDGPULegalizerInfo()
1884 BuildVector.customFor({V2S16, S16}); in AMDGPULegalizerInfo()
1898 .clampMaxNumElements(1, S16, 2) // TODO: Make 4? in AMDGPULegalizerInfo()
1899 .clampMaxNumElements(0, S16, 64); in AMDGPULegalizerInfo()
1922 .lowerFor({{S16, V2S16}}) in AMDGPULegalizerInfo()
1927 // Try to widen to s16 first for small types. in AMDGPULegalizerInfo()
1928 // TODO: Only do this on targets with legal s16 shifts in AMDGPULegalizerInfo()
1929 .minScalarOrEltIf(scalarNarrowerThan(LitTyIdx, 16), LitTyIdx, S16) in AMDGPULegalizerInfo()
1932 .fewerElementsIf(all(typeIs(0, S16), vectorWiderThan(1, 32), in AMDGPULegalizerInfo()
1933 elementTypeIs(1, S16)), in AMDGPULegalizerInfo()
1991 .clampMaxNumElementsStrict(0, S16, 2); in AMDGPULegalizerInfo()
1993 SextInReg.lowerFor({{S32}, {S64}, {S16}}); in AMDGPULegalizerInfo()
2013 .clampMaxNumElementsStrict(0, S16, 2) in AMDGPULegalizerInfo()
2020 .clampMaxNumElementsStrict(0, S16, 2) in AMDGPULegalizerInfo()
2062 .clampMaxNumElements(0, S16, 2) in AMDGPULegalizerInfo()
2706 // TODO: Promote dynamic indexing of s16 to s32 in legalizeExtractVectorElt()
2757 // TODO: Promote dynamic indexing of s16 to s32 in legalizeInsertVectorElt()
3140 // from a widened register (e.g. <3 x s16> -> <4 x s16>) in legalizeLoad()
3839 const LLT S16 = LLT::scalar(16); in legalizeBuildVector() local
3847 Src0 = B.buildTrunc(S16, MI.getOperand(1).getReg()).getReg(0); in legalizeBuildVector()
3848 Src1 = B.buildTrunc(S16, MI.getOperand(2).getReg()).getReg(0); in legalizeBuildVector()
4454 LLT S16 = LLT::scalar(16); in legalizeFDIV() local
4458 if (DstTy == S16) in legalizeFDIV()
4887 LLT S16 = LLT::scalar(16); in legalizeFDIV16() local
4898 auto RDst = B.buildFPTrunc(S16, QUOT, Flags); in legalizeFDIV16()
5718 const LLT S16 = LLT::scalar(16); in handleD16VData() local
5721 assert(StoreVT.isVector() && StoreVT.getElementType() == S16); in handleD16VData()
5724 auto Unmerge = B.buildUnmerge(S16, Reg); in handleD16VData()
5748 auto Unmerge = B.buildUnmerge(S16, Reg); in handleD16VData()
5751 PackedRegs.resize(6, B.buildUndef(S16).getReg(0)); in handleD16VData()
5752 Reg = B.buildBuildVector(LLT::fixed_vector(6, S16), PackedRegs).getReg(0); in handleD16VData()
5770 if (StoreVT == LLT::fixed_vector(3, S16)) { in handleD16VData()
5771 Reg = B.buildPadVectorWithUndefElements(LLT::fixed_vector(4, S16), Reg) in handleD16VData()
5782 const LLT S16 = LLT::scalar(16); in fixStoreSourceType() local
5789 if (Ty == LLT::scalar(8) || Ty == S16) { in fixStoreSourceType()
5795 if (Ty.getElementType() == S16 && Ty.getNumElements() <= 4) { in fixStoreSourceType()
6213 /// Turn a set of s16 typed registers in \p AddrRegs into a dword sized
6214 /// vector with s16 typed elements.
6220 const LLT S16 = LLT::scalar(16); in packImage16bitOpsToDwords() local
6235 (B.getMRI()->getType(AddrReg) == S16)) { in packImage16bitOpsToDwords()
6240 B.buildBuildVector(V2S16, {AddrReg, B.buildUndef(S16).getReg(0)}) in packImage16bitOpsToDwords()
6261 B.buildBuildVector(V2S16, {AddrReg, B.buildUndef(S16).getReg(0)}) in packImage16bitOpsToDwords()
6333 const LLT S16 = LLT::scalar(16); in legalizeImageIntrinsic() local
6355 ST.hasG16() ? (BaseOpcode->Gradients && GradTy == S16) : GradTy == S16; in legalizeImageIntrinsic()
6356 const bool IsA16 = AddrTy == S16; in legalizeImageIntrinsic()
6357 const bool IsD16 = !IsAtomicPacked16Bit && Ty.getScalarType() == S16; in legalizeImageIntrinsic()
6543 // truncated from v2s16 or v4s16 to s16 type. in legalizeImageIntrinsic()
6554 // s16 -> <2 x s16>, and <3 x s16> -> <4 x s16>, in legalizeImageIntrinsic()
6579 // TODO: Should we change s16 case to s32 or <2 x s16>? in legalizeImageIntrinsic()
6589 // s16> instead of s32, we would only need 1 bitcast instead of multiple. in legalizeImageIntrinsic()
6638 // For an s16 scalar result, we form an s32 result with a truncate regardless in legalizeImageIntrinsic()
6664 Reg = B.buildTrunc(S16, Reg).getReg(0); in legalizeImageIntrinsic()
6931 const LLT S16 = LLT::scalar(16); in legalizeBVHIntrinsic() local
6997 auto UnmergeRayDir = B.buildUnmerge({S16, S16, S16}, RayDir); in legalizeBVHIntrinsic()
6998 auto UnmergeRayInvDir = B.buildUnmerge({S16, S16, S16}, RayInvDir); in legalizeBVHIntrinsic()
7037 auto UnmergeRayDir = B.buildUnmerge({S16, S16, S16}, RayDir); in legalizeBVHIntrinsic()
7038 auto UnmergeRayInvDir = B.buildUnmerge({S16, S16, S16}, RayInvDir); in legalizeBVHIntrinsic()
7325 // TODO: Could insert G_ASSERT_ZEXT from s16 in legalizeIntrinsic()
7328 // TODO: Could insert G_ASSERT_ZEXT from s16 in legalizeIntrinsic()
7330 // TODO: Could insert G_ASSERT_ZEXT from s16 in legalizeIntrinsic()