Lines Matching refs:S32

285 static const LLT S32 = LLT::scalar(32);  variable
335 static std::initializer_list<LLT> AllScalarTypes = {S32, S64, S96, S128,
599 const LLT S32 = LLT::scalar(32); in castBufferRsrcFromV4I32() local
606 B.buildExtractVectorElementConstant(S32, VectorReg, I).getReg(0); in castBufferRsrcFromV4I32()
689 S32, S64 in AMDGPULegalizerInfo()
693 S32, S64, S16 in AMDGPULegalizerInfo()
697 S32, S64, S16, V2S16 in AMDGPULegalizerInfo()
700 const LLT MinScalarFPTy = ST.has16BitInsts() ? S16 : S32; in AMDGPULegalizerInfo()
703 getActionDefinitionsBuilder(G_BRCOND).legalFor({S1, S32}); in AMDGPULegalizerInfo()
708 .legalFor({S32, S64, V2S16, S16, V4S16, S1, S128, S256}) in AMDGPULegalizerInfo()
717 .clampMaxNumElements(0, S32, 16) in AMDGPULegalizerInfo()
725 .legalFor({S64, S32, S16, V2S16}) in AMDGPULegalizerInfo()
730 .maxScalar(0, S32); in AMDGPULegalizerInfo()
733 .legalFor({S32, S16, V2S16}) in AMDGPULegalizerInfo()
738 .maxScalar(0, S32); in AMDGPULegalizerInfo()
743 .legalFor({S64, S32, S16, V2S16}) in AMDGPULegalizerInfo()
751 .legalFor({S32, S16, V2S16}) in AMDGPULegalizerInfo()
761 .legalFor({S32, S16, V2S16}) // Clamp modifier in AMDGPULegalizerInfo()
769 .legalFor({S32, S16}) in AMDGPULegalizerInfo()
772 .maxScalar(0, S32) in AMDGPULegalizerInfo()
776 .legalFor({S32, S16}) in AMDGPULegalizerInfo()
786 .legalFor({S32, S16}) // Clamp modifier in AMDGPULegalizerInfo()
800 .legalFor({S32}) in AMDGPULegalizerInfo()
802 .clampScalar(0, S32, S32) in AMDGPULegalizerInfo()
806 .legalFor({S32}) in AMDGPULegalizerInfo()
808 .minScalar(0, S32) in AMDGPULegalizerInfo()
814 Mul.maxScalar(0, S32); in AMDGPULegalizerInfo()
818 .legalFor({S32}) // Clamp modifier. in AMDGPULegalizerInfo()
820 .minScalarOrElt(0, S32) in AMDGPULegalizerInfo()
825 .minScalar(0, S32) in AMDGPULegalizerInfo()
833 .minScalar(0, S32) in AMDGPULegalizerInfo()
840 .customFor({S32, S64}) in AMDGPULegalizerInfo()
841 .clampScalar(0, S32, S64) in AMDGPULegalizerInfo()
846 .legalFor({S32}) in AMDGPULegalizerInfo()
847 .maxScalar(0, S32); in AMDGPULegalizerInfo()
862 .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16}) in AMDGPULegalizerInfo()
863 .clampScalar(0, S32, S64) in AMDGPULegalizerInfo()
871 .legalFor({{S32, S1}, {S32, S32}}) in AMDGPULegalizerInfo()
872 .clampScalar(0, S32, S32) in AMDGPULegalizerInfo()
881 .legalFor({S1, S32, S64, S16, GlobalPtr, in AMDGPULegalizerInfo()
884 .clampScalar(0, S32, S64) in AMDGPULegalizerInfo()
888 .legalFor({S32, S64, S16}) in AMDGPULegalizerInfo()
897 .clampScalarOrElt(0, S32, MaxScalar) in AMDGPULegalizerInfo()
899 .clampMaxNumElements(0, S32, 16); in AMDGPULegalizerInfo()
906 .legalFor({{PrivatePtr, S32}}); in AMDGPULegalizerInfo()
923 .legalFor({S32, S64}); in AMDGPULegalizerInfo()
925 .customFor({S32, S64}); in AMDGPULegalizerInfo()
927 .customFor({S32, S64}); in AMDGPULegalizerInfo()
941 FPOpActions.clampMaxNumElementsStrict(0, S32, 2); in AMDGPULegalizerInfo()
959 .clampScalar(0, S32, S64) in AMDGPULegalizerInfo()
968 .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); in AMDGPULegalizerInfo()
972 .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); in AMDGPULegalizerInfo()
976 .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); in AMDGPULegalizerInfo()
987 .customFor({S32, S64}) in AMDGPULegalizerInfo()
991 .legalFor({S32, S64, S16}) in AMDGPULegalizerInfo()
996 .legalFor({{S32, S32}, {S64, S32}, {S16, S16}}) in AMDGPULegalizerInfo()
999 .clampScalar(1, S32, S32) in AMDGPULegalizerInfo()
1003 .customFor({{S32, S32}, {S64, S32}, {S16, S16}, {S16, S32}}) in AMDGPULegalizerInfo()
1008 .customFor({S32, S64, S16}) in AMDGPULegalizerInfo()
1016 .legalFor({S32, S64}) in AMDGPULegalizerInfo()
1018 .clampScalar(0, S32, S64); in AMDGPULegalizerInfo()
1021 .legalFor({S32, S64}) in AMDGPULegalizerInfo()
1023 .clampScalar(0, S32, S64); in AMDGPULegalizerInfo()
1027 .legalFor({{S32, S32}, {S64, S32}}) in AMDGPULegalizerInfo()
1029 .clampScalar(0, S32, S64) in AMDGPULegalizerInfo()
1030 .clampScalar(1, S32, S32) in AMDGPULegalizerInfo()
1034 .customFor({{S32, S32}, {S64, S32}}) in AMDGPULegalizerInfo()
1036 .minScalar(0, S32) in AMDGPULegalizerInfo()
1037 .clampScalar(1, S32, S32) in AMDGPULegalizerInfo()
1042 .legalFor({{S32, S64}, {S16, S32}}) in AMDGPULegalizerInfo()
1047 .legalFor({{S64, S32}, {S32, S16}}) in AMDGPULegalizerInfo()
1048 .narrowScalarFor({{S64, S16}}, changeTo(0, S32)) in AMDGPULegalizerInfo()
1055 .legalFor({S32, S16}) in AMDGPULegalizerInfo()
1061 .legalFor({S32}) in AMDGPULegalizerInfo()
1068 .clampScalar(0, S32, S64); in AMDGPULegalizerInfo()
1073 FMad.customFor({S32, S16}); in AMDGPULegalizerInfo()
1075 FMad.customFor({S32}); in AMDGPULegalizerInfo()
1083 FRem.customFor({S16, S32, S64}); in AMDGPULegalizerInfo()
1085 FRem.minScalar(0, S32) in AMDGPULegalizerInfo()
1086 .customFor({S32, S64}); in AMDGPULegalizerInfo()
1102 .legalFor({{S64, S32}, {S32, S16}, {S64, S16}, in AMDGPULegalizerInfo()
1103 {S32, S1}, {S64, S1}, {S16, S1}}) in AMDGPULegalizerInfo()
1105 .clampScalar(0, S32, S64) in AMDGPULegalizerInfo()
1110 .legalFor({{S32, S32}, {S64, S32}, {S16, S32}}) in AMDGPULegalizerInfo()
1112 .customFor({{S32, S64}, {S64, S64}}); in AMDGPULegalizerInfo()
1115 IToFP.clampScalar(1, S32, S64) in AMDGPULegalizerInfo()
1116 .minScalar(0, S32) in AMDGPULegalizerInfo()
1121 .legalFor({{S32, S32}, {S32, S64}, {S32, S16}}) in AMDGPULegalizerInfo()
1122 .customFor({{S64, S32}, {S64, S64}}) in AMDGPULegalizerInfo()
1123 .narrowScalarFor({{S64, S16}}, changeTo(0, S32)); in AMDGPULegalizerInfo()
1127 FPToI.minScalar(1, S32); in AMDGPULegalizerInfo()
1129 FPToI.minScalar(0, S32) in AMDGPULegalizerInfo()
1135 .customFor({S16, S32}) in AMDGPULegalizerInfo()
1147 .legalFor({S16, S32, S64}) in AMDGPULegalizerInfo()
1153 .legalFor({S32, S64}) in AMDGPULegalizerInfo()
1154 .clampScalar(0, S32, S64) in AMDGPULegalizerInfo()
1159 .legalFor({S32}) in AMDGPULegalizerInfo()
1161 .clampScalar(0, S32, S64) in AMDGPULegalizerInfo()
1172 .legalIf(all(sameSize(0, 1), typeInSet(1, {S64, S32}))) in AMDGPULegalizerInfo()
1189 {S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr}) in AMDGPULegalizerInfo()
1191 {S32}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr}); in AMDGPULegalizerInfo()
1198 .clampScalar(1, S32, S64) in AMDGPULegalizerInfo()
1200 .legalIf(all(typeInSet(0, {S1, S32}), isPointer(1))); in AMDGPULegalizerInfo()
1207 FCmpBuilder.legalForCartesianProduct({S32}, {S16, S32}); in AMDGPULegalizerInfo()
1211 .clampScalar(1, S32, S64) in AMDGPULegalizerInfo()
1217 ExpOps.customFor({{S32}, {S16}}); in AMDGPULegalizerInfo()
1219 ExpOps.customFor({S32}); in AMDGPULegalizerInfo()
1220 ExpOps.clampScalar(0, MinScalarFPTy, S32) in AMDGPULegalizerInfo()
1224 .clampScalar(0, MinScalarFPTy, S32) in AMDGPULegalizerInfo()
1228 Log2Ops.customFor({S32}); in AMDGPULegalizerInfo()
1238 LogOps.customFor({S32, S16}); in AMDGPULegalizerInfo()
1239 LogOps.clampScalar(0, MinScalarFPTy, S32) in AMDGPULegalizerInfo()
1244 .legalFor({{S32, S32}, {S32, S64}}) in AMDGPULegalizerInfo()
1245 .clampScalar(0, S32, S32) in AMDGPULegalizerInfo()
1247 .clampScalar(1, S32, S64) in AMDGPULegalizerInfo()
1271 .clampScalar(0, S32, S32) in AMDGPULegalizerInfo()
1272 .clampScalar(1, S32, S64) in AMDGPULegalizerInfo()
1279 .legalFor({{S32, S32}, {S32, S64}}) in AMDGPULegalizerInfo()
1281 .clampScalar(0, S32, S32) in AMDGPULegalizerInfo()
1282 .clampScalar(1, S32, S64) in AMDGPULegalizerInfo()
1288 .legalFor({{S32, S32}, {S32, S64}}) in AMDGPULegalizerInfo()
1289 .clampScalar(0, S32, S32) in AMDGPULegalizerInfo()
1290 .clampScalar(1, S32, S64) in AMDGPULegalizerInfo()
1298 .legalFor({S32, S64}) in AMDGPULegalizerInfo()
1299 .clampScalar(0, S32, S64) in AMDGPULegalizerInfo()
1305 .legalFor({S16, S32, V2S16}) in AMDGPULegalizerInfo()
1310 .clampScalar(0, S16, S32) in AMDGPULegalizerInfo()
1315 .legalFor({S32, S16, V2S16}) in AMDGPULegalizerInfo()
1323 .legalFor({S32, S16}) in AMDGPULegalizerInfo()
1332 .legalFor({S32}) in AMDGPULegalizerInfo()
1337 .maxScalar(0, S32) in AMDGPULegalizerInfo()
1342 .legalFor({S32}) in AMDGPULegalizerInfo()
1343 .minScalar(0, S32) in AMDGPULegalizerInfo()
1352 .legalForCartesianProduct(AddrSpaces32, {S32}) in AMDGPULegalizerInfo()
1368 .legalForCartesianProduct(AddrSpaces32, {S32}) in AMDGPULegalizerInfo()
1431 Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, S32, GlobalAlign32}, in AMDGPULegalizerInfo()
1437 {S32, GlobalPtr, S8, GlobalAlign8}, in AMDGPULegalizerInfo()
1438 {S32, GlobalPtr, S16, GlobalAlign16}, in AMDGPULegalizerInfo()
1440 {S32, LocalPtr, S32, 32}, in AMDGPULegalizerInfo()
1443 {S32, LocalPtr, S8, 8}, in AMDGPULegalizerInfo()
1444 {S32, LocalPtr, S16, 16}, in AMDGPULegalizerInfo()
1445 {V2S16, LocalPtr, S32, 32}, in AMDGPULegalizerInfo()
1447 {S32, PrivatePtr, S32, 32}, in AMDGPULegalizerInfo()
1448 {S32, PrivatePtr, S8, 8}, in AMDGPULegalizerInfo()
1449 {S32, PrivatePtr, S16, 16}, in AMDGPULegalizerInfo()
1450 {V2S16, PrivatePtr, S32, 32}, in AMDGPULegalizerInfo()
1452 {S32, ConstantPtr, S32, GlobalAlign32}, in AMDGPULegalizerInfo()
1590 .minScalar(0, S32) in AMDGPULegalizerInfo()
1591 .narrowScalarIf(isWideScalarExtLoadTruncStore(0), changeTo(0, S32)) in AMDGPULegalizerInfo()
1599 .legalForTypesWithMemDesc({{S32, GlobalPtr, S8, 8}, in AMDGPULegalizerInfo()
1600 {S32, GlobalPtr, S16, 2 * 8}, in AMDGPULegalizerInfo()
1601 {S32, LocalPtr, S8, 8}, in AMDGPULegalizerInfo()
1602 {S32, LocalPtr, S16, 16}, in AMDGPULegalizerInfo()
1603 {S32, PrivatePtr, S8, 8}, in AMDGPULegalizerInfo()
1604 {S32, PrivatePtr, S16, 16}, in AMDGPULegalizerInfo()
1605 {S32, ConstantPtr, S8, 8}, in AMDGPULegalizerInfo()
1606 {S32, ConstantPtr, S16, 2 * 8}}) in AMDGPULegalizerInfo()
1614 {{S32, FlatPtr, S8, 8}, {S32, FlatPtr, S16, 16}}); in AMDGPULegalizerInfo()
1624 ExtLoads.clampScalar(0, S32, S32) in AMDGPULegalizerInfo()
1633 .legalFor({{S32, GlobalPtr}, {S32, LocalPtr}, in AMDGPULegalizerInfo()
1635 {S32, RegionPtr}, {S64, RegionPtr}}); in AMDGPULegalizerInfo()
1637 Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}}); in AMDGPULegalizerInfo()
1643 Atomic.legalFor({{S32, LocalPtr}, {S32, RegionPtr}}); in AMDGPULegalizerInfo()
1650 Atomic.legalFor({{S32, GlobalPtr}}); in AMDGPULegalizerInfo()
1652 Atomic.legalFor({{S32, FlatPtr}}); in AMDGPULegalizerInfo()
1659 {S32, GlobalPtr}, in AMDGPULegalizerInfo()
1692 .customFor({{S32, GlobalPtr}, {S64, GlobalPtr}, in AMDGPULegalizerInfo()
1693 {S32, FlatPtr}, {S64, FlatPtr}}) in AMDGPULegalizerInfo()
1694 .legalFor({{S32, LocalPtr}, {S64, LocalPtr}, in AMDGPULegalizerInfo()
1695 {S32, RegionPtr}, {S64, RegionPtr}}); in AMDGPULegalizerInfo()
1700 .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16, GlobalPtr, in AMDGPULegalizerInfo()
1704 {S1, S32}) in AMDGPULegalizerInfo()
1709 .clampMaxNumElements(0, S32, 2) in AMDGPULegalizerInfo()
1714 .legalIf(all(isPointer(0), typeInSet(1, {S1, S32}))); in AMDGPULegalizerInfo()
1719 .legalFor({{S32, S32}, {S64, S32}}); in AMDGPULegalizerInfo()
1738 Shifts.clampScalar(1, S32, S32); in AMDGPULegalizerInfo()
1750 Shifts.clampScalar(1, S32, S32); in AMDGPULegalizerInfo()
1752 Shifts.clampScalar(0, S32, S64); in AMDGPULegalizerInfo()
1755 .minScalar(0, S32) in AMDGPULegalizerInfo()
1807 .clampScalar(EltTypeIdx, S32, S64) in AMDGPULegalizerInfo()
1808 .clampScalar(VecTypeIdx, S32, S64) in AMDGPULegalizerInfo()
1809 .clampScalar(IdxTypeIdx, S32, S32) in AMDGPULegalizerInfo()
1810 .clampMaxNumElements(VecTypeIdx, S32, 32) in AMDGPULegalizerInfo()
1865 .legalForCartesianProduct(AllS32Vectors, {S32}) in AMDGPULegalizerInfo()
1881 .legalFor({V2S16, S32}) in AMDGPULegalizerInfo()
1885 BuildVector.minScalarOrElt(0, S32); in AMDGPULegalizerInfo()
1888 .customFor({V2S16, S32}) in AMDGPULegalizerInfo()
1897 .clampMaxNumElements(0, S32, 32) in AMDGPULegalizerInfo()
1938 .clampScalar(LitTyIdx, S32, S512) in AMDGPULegalizerInfo()
1947 .clampScalar(BigTyIdx, S32, MaxScalar); in AMDGPULegalizerInfo()
1956 changeTo(LitTyIdx, S32)); in AMDGPULegalizerInfo()
1984 .legalFor({{S32}, {S64}}); in AMDGPULegalizerInfo()
1993 SextInReg.lowerFor({{S32}, {S64}, {S16}}); in AMDGPULegalizerInfo()
1997 SextInReg.lowerFor({{S32}, {S64}}); in AMDGPULegalizerInfo()
2002 .clampScalar(0, S32, S64) in AMDGPULegalizerInfo()
2011 .legalFor({{S32, S32}}) in AMDGPULegalizerInfo()
2039 .minScalar(0, S32) in AMDGPULegalizerInfo()
2043 .legalFor({{S32, S32}, {S64, S32}}) in AMDGPULegalizerInfo()
2044 .clampScalar(1, S32, S32) in AMDGPULegalizerInfo()
2045 .clampScalar(0, S32, S64) in AMDGPULegalizerInfo()
2197 const LLT S32 = LLT::scalar(32); in getSegmentAperture() local
2219 return B.buildUnmerge(S32, Dst).getReg(1); in getSegmentAperture()
2253 return B.buildLoad(S32, LoadAddr, *MMO).getReg(0); in getSegmentAperture()
2274 return B.buildLoad(S32, LoadAddr, *MMO).getReg(0); in getSegmentAperture()
2309 const LLT S32 = LLT::scalar(32); in legalizeAddrSpaceCast() local
2366 Register SrcAsInt = B.buildPtrToInt(S32, Src).getReg(0); in legalizeAddrSpaceCast()
2404 auto PtrLo = B.buildPtrToInt(S32, Src); in legalizeAddrSpaceCast()
2405 auto HighAddr = B.buildConstant(S32, AddrHiVal); in legalizeAddrSpaceCast()
2497 LLT S32 = LLT::scalar(32); in extractF64Exponent() local
2499 auto Const0 = B.buildConstant(S32, FractBits - 32); in extractF64Exponent()
2500 auto Const1 = B.buildConstant(S32, ExpBits); in extractF64Exponent()
2502 auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}) in extractF64Exponent()
2507 return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023)); in extractF64Exponent()
2514 const LLT S32 = LLT::scalar(32); in legalizeIntrinsicTrunc() local
2521 auto Unmerge = B.buildUnmerge({S32, S32}, Src); in legalizeIntrinsicTrunc()
2531 const auto SignBitMask = B.buildConstant(S32, UINT32_C(1) << 31); in legalizeIntrinsicTrunc()
2532 auto SignBit = B.buildAnd(S32, Hi, SignBitMask); in legalizeIntrinsicTrunc()
2536 const auto Zero32 = B.buildConstant(S32, 0); in legalizeIntrinsicTrunc()
2544 auto FiftyOne = B.buildConstant(S32, FractBits - 1); in legalizeIntrinsicTrunc()
2563 const LLT S32 = LLT::scalar(32); in legalizeITOFP() local
2567 auto Unmerge = B.buildUnmerge({S32, S32}, Src); in legalizeITOFP()
2568 auto ThirtyTwo = B.buildConstant(S32, 32); in legalizeITOFP()
2583 assert(MRI.getType(Dst) == S32); in legalizeITOFP()
2585 auto One = B.buildConstant(S32, 1); in legalizeITOFP()
2589 auto ThirtyOne = B.buildConstant(S32, 31); in legalizeITOFP()
2590 auto X = B.buildXor(S32, Unmerge.getReg(0), Unmerge.getReg(1)); in legalizeITOFP()
2591 auto OppositeSign = B.buildAShr(S32, X, ThirtyOne); in legalizeITOFP()
2592 auto MaxShAmt = B.buildAdd(S32, ThirtyTwo, OppositeSign); in legalizeITOFP()
2593 auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32}) in legalizeITOFP()
2595 auto LS2 = B.buildSub(S32, LS, One); in legalizeITOFP()
2596 ShAmt = B.buildUMin(S32, LS2, MaxShAmt); in legalizeITOFP()
2598 ShAmt = B.buildCTLZ(S32, Unmerge.getReg(1)); in legalizeITOFP()
2600 auto Unmerge2 = B.buildUnmerge({S32, S32}, Norm); in legalizeITOFP()
2601 auto Adjust = B.buildUMin(S32, One, Unmerge2.getReg(0)); in legalizeITOFP()
2602 auto Norm2 = B.buildOr(S32, Unmerge2.getReg(1), Adjust); in legalizeITOFP()
2603 auto FVal = Signed ? B.buildSITOFP(S32, Norm2) : B.buildUITOFP(S32, Norm2); in legalizeITOFP()
2604 auto Scale = B.buildSub(S32, ThirtyTwo, ShAmt); in legalizeITOFP()
2621 const LLT S32 = LLT::scalar(32); in legalizeFPTOI() local
2624 assert((SrcLT == S32 || SrcLT == S64) && MRI.getType(Dst) == S64); in legalizeFPTOI()
2639 if (Signed && SrcLT == S32) { in legalizeFPTOI()
2645 Sign = B.buildAShr(S32, Src, B.buildConstant(S32, 31)); in legalizeFPTOI()
2646 Trunc = B.buildFAbs(S32, Trunc, Flags); in legalizeFPTOI()
2656 S32, llvm::bit_cast<float>(UINT32_C(/*2^-32*/ 0x2f800000))); in legalizeFPTOI()
2658 S32, llvm::bit_cast<float>(UINT32_C(/*-2^32*/ 0xcf800000))); in legalizeFPTOI()
2665 auto Hi = (Signed && SrcLT == S64) ? B.buildFPTOSI(S32, FloorMul) in legalizeFPTOI()
2666 : B.buildFPTOUI(S32, FloorMul); in legalizeFPTOI()
2667 auto Lo = B.buildFPTOUI(S32, Fma); in legalizeFPTOI()
2669 if (Signed && SrcLT == S32) { in legalizeFPTOI()
2900 LLT S32 = LLT::scalar(32); in buildAbsGlobalAddress() local
2906 : MRI.createGenericVirtualRegister(S32); in buildAbsGlobalAddress()
2921 Register AddrHi = MRI.createGenericVirtualRegister(S32); in buildAbsGlobalAddress()
3000 LLT S32 = LLT::scalar(32); in legalizeGlobalValue() local
3001 auto Sz = B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}); in legalizeGlobalValue()
3838 const LLT S32 = LLT::scalar(32); in legalizeBuildVector() local
3846 assert(MRI.getType(Src0) == S32); in legalizeBuildVector()
3851 auto Merge = B.buildMergeLikeInstr(S32, {Src0, Src1}); in legalizeBuildVector()
3881 const LLT S32 = LLT::scalar(32); in buildMultiply() local
3889 Zero32 = B.buildConstant(S32, 0).getReg(0); in buildMultiply()
3917 LocalAccum = B.buildZExt(S32, CarryIn[0]).getReg(0); in buildMultiply()
3923 CarryAccum = B.buildZExt(S32, CarryIn[0]).getReg(0); in buildMultiply()
3926 B.buildUAdde(S32, S1, CarryAccum, getZero32(), CarryIn[i]) in buildMultiply()
3937 B.buildUAdde(S32, S1, CarryAccum, LocalAccum, CarryIn.back()); in buildMultiply()
3972 auto Mul = B.buildMul(S32, Src0[j0], Src1[j1]); in buildMultiply()
3977 LocalAccum[0] = B.buildAdd(S32, LocalAccum[0], Mul).getReg(0); in buildMultiply()
3980 B.buildUAdde(S32, S1, LocalAccum[0], Mul, CarryIn.back()) in buildMultiply()
4027 auto Unmerge = B.buildUnmerge(S32, Tmp); in buildMultiply()
4085 Lo = B.buildUAddo(S32, S1, Accum[2 * i - 1], SeparateOddOut[0]); in buildMultiply()
4087 Lo = B.buildAdd(S32, Accum[2 * i - 1], SeparateOddOut[0]); in buildMultiply()
4089 Lo = B.buildUAdde(S32, S1, Accum[2 * i - 1], SeparateOddOut[0], in buildMultiply()
4095 auto Hi = B.buildUAdde(S32, S1, Accum[2 * i], SeparateOddOut[1], in buildMultiply()
4150 LLT S32 = LLT::scalar(32); in legalizeMul() local
4153 Src0Parts.push_back(MRI.createGenericVirtualRegister(S32)); in legalizeMul()
4154 Src1Parts.push_back(MRI.createGenericVirtualRegister(S32)); in legalizeMul()
4199 auto ShiftAmt = B.buildConstant(S32, 32u - NumBits); in legalizeCTLZ_ZERO_UNDEF()
4200 auto Extend = B.buildAnyExt(S32, {Src}).getReg(0u); in legalizeCTLZ_ZERO_UNDEF()
4201 auto Shift = B.buildShl(S32, Extend, ShiftAmt); in legalizeCTLZ_ZERO_UNDEF()
4202 auto Ctlz = B.buildInstr(AMDGPU::G_AMDGPU_FFBH_U32, {S32}, {Shift}); in legalizeCTLZ_ZERO_UNDEF()
4271 const LLT S32 = LLT::scalar(32); in loadInputValue() local
4280 auto ShiftAmt = B.buildConstant(S32, Shift); in loadInputValue()
4281 AndMaskSrc = B.buildLShr(S32, LiveIn, ShiftAmt).getReg(0); in loadInputValue()
4284 B.buildAnd(DstReg, AndMaskSrc, B.buildConstant(S32, Mask >> Shift)); in loadInputValue()
4455 LLT S32 = LLT::scalar(32); in legalizeFDIV() local
4460 if (DstTy == S32) in legalizeFDIV()
4474 const LLT S32 = LLT::scalar(32); in legalizeUnsignedDIV_REM32Impl() local
4480 auto FloatY = B.buildUITOFP(S32, Y); in legalizeUnsignedDIV_REM32Impl()
4481 auto RcpIFlag = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {FloatY}); in legalizeUnsignedDIV_REM32Impl()
4482 auto Scale = B.buildFConstant(S32, llvm::bit_cast<float>(0x4f7ffffe)); in legalizeUnsignedDIV_REM32Impl()
4483 auto ScaledY = B.buildFMul(S32, RcpIFlag, Scale); in legalizeUnsignedDIV_REM32Impl()
4484 auto Z = B.buildFPTOUI(S32, ScaledY); in legalizeUnsignedDIV_REM32Impl()
4487 auto NegY = B.buildSub(S32, B.buildConstant(S32, 0), Y); in legalizeUnsignedDIV_REM32Impl()
4488 auto NegYZ = B.buildMul(S32, NegY, Z); in legalizeUnsignedDIV_REM32Impl()
4489 Z = B.buildAdd(S32, Z, B.buildUMulH(S32, Z, NegYZ)); in legalizeUnsignedDIV_REM32Impl()
4492 auto Q = B.buildUMulH(S32, X, Z); in legalizeUnsignedDIV_REM32Impl()
4493 auto R = B.buildSub(S32, X, B.buildMul(S32, Q, Y)); in legalizeUnsignedDIV_REM32Impl()
4496 auto One = B.buildConstant(S32, 1); in legalizeUnsignedDIV_REM32Impl()
4499 Q = B.buildSelect(S32, Cond, B.buildAdd(S32, Q, One), Q); in legalizeUnsignedDIV_REM32Impl()
4500 R = B.buildSelect(S32, Cond, B.buildSub(S32, R, Y), R); in legalizeUnsignedDIV_REM32Impl()
4505 B.buildSelect(DstDivReg, Cond, B.buildAdd(S32, Q, One), Q); in legalizeUnsignedDIV_REM32Impl()
4508 B.buildSelect(DstRemReg, Cond, B.buildSub(S32, R, Y), R); in legalizeUnsignedDIV_REM32Impl()
4526 const LLT S32 = LLT::scalar(32); in emitReciprocalU64() local
4527 auto Unmerge = B.buildUnmerge(S32, Val); in emitReciprocalU64()
4529 auto CvtLo = B.buildUITOFP(S32, Unmerge.getReg(0)); in emitReciprocalU64()
4530 auto CvtHi = B.buildUITOFP(S32, Unmerge.getReg(1)); in emitReciprocalU64()
4533 S32, CvtHi, // 2**32 in emitReciprocalU64()
4534 B.buildFConstant(S32, llvm::bit_cast<float>(0x4f800000)), CvtLo); in emitReciprocalU64()
4536 auto Rcp = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {Mad}); in emitReciprocalU64()
4538 S32, Rcp, B.buildFConstant(S32, llvm::bit_cast<float>(0x5f7ffffc))); in emitReciprocalU64()
4542 S32, Mul1, B.buildFConstant(S32, llvm::bit_cast<float>(0x2f800000))); in emitReciprocalU64()
4543 auto Trunc = B.buildIntrinsicTrunc(S32, Mul2); in emitReciprocalU64()
4547 S32, Trunc, B.buildFConstant(S32, llvm::bit_cast<float>(0xcf800000)), in emitReciprocalU64()
4550 auto ResultLo = B.buildFPTOUI(S32, Mad2); in emitReciprocalU64()
4551 auto ResultHi = B.buildFPTOUI(S32, Trunc); in emitReciprocalU64()
4561 const LLT S32 = LLT::scalar(32); in legalizeUnsignedDIV_REM64Impl() local
4576 auto UnmergeMulHi1 = B.buildUnmerge(S32, MulHi1); in legalizeUnsignedDIV_REM64Impl()
4580 auto Add1_Lo = B.buildUAddo(S32, S1, RcpLo, MulHi1_Lo); in legalizeUnsignedDIV_REM64Impl()
4581 auto Add1_Hi = B.buildUAdde(S32, S1, RcpHi, MulHi1_Hi, Add1_Lo.getReg(1)); in legalizeUnsignedDIV_REM64Impl()
4586 auto UnmergeMulHi2 = B.buildUnmerge(S32, MulHi2); in legalizeUnsignedDIV_REM64Impl()
4590 auto Zero32 = B.buildConstant(S32, 0); in legalizeUnsignedDIV_REM64Impl()
4591 auto Add2_Lo = B.buildUAddo(S32, S1, Add1_Lo, MulHi2_Lo); in legalizeUnsignedDIV_REM64Impl()
4592 auto Add2_Hi = B.buildUAdde(S32, S1, Add1_Hi, MulHi2_Hi, Add2_Lo.getReg(1)); in legalizeUnsignedDIV_REM64Impl()
4595 auto UnmergeNumer = B.buildUnmerge(S32, Numer); in legalizeUnsignedDIV_REM64Impl()
4601 auto UnmergeMul3 = B.buildUnmerge(S32, Mul3); in legalizeUnsignedDIV_REM64Impl()
4604 auto Sub1_Lo = B.buildUSubo(S32, S1, NumerLo, Mul3_Lo); in legalizeUnsignedDIV_REM64Impl()
4605 auto Sub1_Hi = B.buildUSube(S32, S1, NumerHi, Mul3_Hi, Sub1_Lo.getReg(1)); in legalizeUnsignedDIV_REM64Impl()
4606 auto Sub1_Mi = B.buildSub(S32, NumerHi, Mul3_Hi); in legalizeUnsignedDIV_REM64Impl()
4609 auto UnmergeDenom = B.buildUnmerge(S32, Denom); in legalizeUnsignedDIV_REM64Impl()
4614 auto C1 = B.buildSExt(S32, CmpHi); in legalizeUnsignedDIV_REM64Impl()
4617 auto C2 = B.buildSExt(S32, CmpLo); in legalizeUnsignedDIV_REM64Impl()
4620 auto C3 = B.buildSelect(S32, CmpEq, C2, C1); in legalizeUnsignedDIV_REM64Impl()
4627 auto Sub2_Lo = B.buildUSubo(S32, S1, Sub1_Lo, DenomLo); in legalizeUnsignedDIV_REM64Impl()
4628 auto Sub2_Mi = B.buildUSube(S32, S1, Sub1_Mi, DenomHi, Sub1_Lo.getReg(1)); in legalizeUnsignedDIV_REM64Impl()
4629 auto Sub2_Hi = B.buildUSube(S32, S1, Sub2_Mi, Zero32, Sub2_Lo.getReg(1)); in legalizeUnsignedDIV_REM64Impl()
4636 B.buildSExt(S32, B.buildICmp(CmpInst::ICMP_UGE, S1, Sub2_Hi, DenomHi)); in legalizeUnsignedDIV_REM64Impl()
4638 B.buildSExt(S32, B.buildICmp(CmpInst::ICMP_UGE, S1, Sub2_Lo, DenomLo)); in legalizeUnsignedDIV_REM64Impl()
4640 S32, B.buildICmp(CmpInst::ICMP_EQ, S1, Sub2_Hi, DenomHi), C5, C4); in legalizeUnsignedDIV_REM64Impl()
4644 auto Sub3_Lo = B.buildUSubo(S32, S1, Sub2_Lo, DenomLo); in legalizeUnsignedDIV_REM64Impl()
4646 auto Sub3_Mi = B.buildUSube(S32, S1, Sub2_Mi, DenomHi, Sub2_Lo.getReg(1)); in legalizeUnsignedDIV_REM64Impl()
4647 auto Sub3_Hi = B.buildUSube(S32, S1, Sub3_Mi, Zero32, Sub3_Lo.getReg(1)); in legalizeUnsignedDIV_REM64Impl()
4691 const LLT S32 = LLT::scalar(32); in legalizeUnsignedDIV_REM() local
4697 if (Ty == S32) in legalizeUnsignedDIV_REM()
4712 const LLT S32 = LLT::scalar(32); in legalizeSignedDIV_REM() local
4715 if (Ty != S32 && Ty != S64) in legalizeSignedDIV_REM()
4722 auto SignBitOffset = B.buildConstant(S32, Ty.getSizeInBits() - 1); in legalizeSignedDIV_REM()
4755 if (Ty == S32) in legalizeSignedDIV_REM()
4888 LLT S32 = LLT::scalar(32); in legalizeFDIV16() local
4890 auto LHSExt = B.buildFPExt(S32, LHS, Flags); in legalizeFDIV16()
4891 auto RHSExt = B.buildFPExt(S32, RHS, Flags); in legalizeFDIV16()
4893 auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}) in legalizeFDIV16()
4897 auto QUOT = B.buildFMul(S32, LHSExt, RCP, Flags); in legalizeFDIV16()
4951 LLT S32 = LLT::scalar(32); in legalizeFDIV32() local
4954 auto One = B.buildFConstant(S32, 1.0f); in legalizeFDIV32()
4957 B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1}) in legalizeFDIV32()
4963 B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1}) in legalizeFDIV32()
4969 auto ApproxRcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}) in legalizeFDIV32()
4972 auto NegDivScale0 = B.buildFNeg(S32, DenominatorScaled, Flags); in legalizeFDIV32()
4990 auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags); in legalizeFDIV32()
4991 auto Fma1 = B.buildFMA(S32, Fma0, ApproxRcp, ApproxRcp, Flags); in legalizeFDIV32()
4992 auto Mul = B.buildFMul(S32, NumeratorScaled, Fma1, Flags); in legalizeFDIV32()
4993 auto Fma2 = B.buildFMA(S32, NegDivScale0, Mul, NumeratorScaled, Flags); in legalizeFDIV32()
4994 auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags); in legalizeFDIV32()
4995 auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags); in legalizeFDIV32()
5007 auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}) in legalizeFDIV32()
5072 LLT S32 = LLT::scalar(32); in legalizeFDIV64() local
5074 auto NumUnmerge = B.buildUnmerge(S32, LHS); in legalizeFDIV64()
5075 auto DenUnmerge = B.buildUnmerge(S32, RHS); in legalizeFDIV64()
5076 auto Scale0Unmerge = B.buildUnmerge(S32, DivScale0); in legalizeFDIV64()
5077 auto Scale1Unmerge = B.buildUnmerge(S32, DivScale1); in legalizeFDIV64()
5148 LLT S32 = LLT::scalar(32); in legalizeFDIVFastIntrin() local
5151 auto Abs = B.buildFAbs(S32, RHS, Flags); in legalizeFDIVFastIntrin()
5154 auto C0 = B.buildFConstant(S32, 0x1p+96f); in legalizeFDIVFastIntrin()
5155 auto C1 = B.buildFConstant(S32, 0x1p-32f); in legalizeFDIVFastIntrin()
5156 auto C2 = B.buildFConstant(S32, 1.0f); in legalizeFDIVFastIntrin()
5159 auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags); in legalizeFDIVFastIntrin()
5161 auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags); in legalizeFDIVFastIntrin()
5163 auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}) in legalizeFDIVFastIntrin()
5167 auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags); in legalizeFDIVFastIntrin()
5297 const LLT S32 = LLT::scalar(32); in legalizeFSQRTF64() local
5308 auto ZeroInt = B.buildConstant(S32, 0); in legalizeFSQRTF64()
5312 auto ScaleUpFactor = B.buildConstant(S32, 256); in legalizeFSQRTF64()
5313 auto ScaleUp = B.buildSelect(S32, Scaling, ScaleUpFactor, ZeroInt); in legalizeFSQRTF64()
5340 auto ScaleDownFactor = B.buildConstant(S32, -128); in legalizeFSQRTF64()
5341 auto ScaleDown = B.buildSelect(S32, Scaling, ScaleDownFactor, ZeroInt); in legalizeFSQRTF64()
5478 Src0 = B.buildAnyExt(S32, Src0).getReg(0); in legalizeLaneOp()
5486 Register LaneOpDst = createLaneOp(Src0, Src1, Src2, S32); in legalizeLaneOp()
5495 LLT PartialResTy = S32; in legalizeLaneOp()
5570 LLT S32 = LLT::scalar(32); in legalizePointerAsRsrcIntrin() local
5573 auto Unmerge = B.buildUnmerge(S32, Pointer); in legalizePointerAsRsrcIntrin()
5577 auto AndMask = B.buildConstant(S32, 0x0000ffff); in legalizePointerAsRsrcIntrin()
5578 auto Masked = B.buildAnd(S32, HighHalf, AndMask); in legalizePointerAsRsrcIntrin()
5588 ShiftedStride = B.buildConstant(S32, ShiftedStrideVal); in legalizePointerAsRsrcIntrin()
5590 auto ExtStride = B.buildAnyExt(S32, Stride); in legalizePointerAsRsrcIntrin()
5591 auto ShiftConst = B.buildConstant(S32, 16); in legalizePointerAsRsrcIntrin()
5592 ShiftedStride = B.buildShl(S32, ExtStride, ShiftConst); in legalizePointerAsRsrcIntrin()
5594 NewHighHalf = B.buildOr(S32, Masked, ShiftedStride); in legalizePointerAsRsrcIntrin()
5673 const LLT S32 = LLT::scalar(32); in splitBufferOffsets() local
5700 BaseReg = B.buildConstant(S32, Overflow).getReg(0); in splitBufferOffsets()
5702 auto OverflowVal = B.buildConstant(S32, Overflow); in splitBufferOffsets()
5703 BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0); in splitBufferOffsets()
5708 BaseReg = B.buildConstant(S32, 0).getReg(0); in splitBufferOffsets()
5719 const LLT S32 = LLT::scalar(32); in handleD16VData() local
5728 WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0)); in handleD16VData()
5732 return B.buildBuildVector(LLT::fixed_vector(NumElts, S32), WideRegs) in handleD16VData()
5739 Reg = B.buildBitcast(S32, Reg).getReg(0); in handleD16VData()
5741 PackedRegs.resize(2, B.buildUndef(S32).getReg(0)); in handleD16VData()
5742 return B.buildBuildVector(LLT::fixed_vector(2, S32), PackedRegs) in handleD16VData()
5753 return B.buildBitcast(LLT::fixed_vector(3, S32), Reg).getReg(0); in handleD16VData()
5758 Reg = B.buildBitcast(LLT::fixed_vector(2, S32), Reg).getReg(0); in handleD16VData()
5759 auto Unmerge = B.buildUnmerge(S32, Reg); in handleD16VData()
5762 PackedRegs.resize(4, B.buildUndef(S32).getReg(0)); in handleD16VData()
5763 return B.buildBuildVector(LLT::fixed_vector(4, S32), PackedRegs) in handleD16VData()
5813 const LLT S32 = LLT::scalar(32); in legalizeBufferStore() local
5835 VIndex = B.buildConstant(S32, 0).getReg(0); in legalizeBufferStore()
5920 const LLT S32 = LLT::scalar(32); in legalizeBufferLoad() local
5946 VIndex = B.buildConstant(S32, 0).getReg(0); in legalizeBufferLoad()
6011 LLT LoadTy = LLT::fixed_vector(NumLoadDWords, S32); in legalizeBufferLoad()
6016 Register ExtDst = B.getMRI()->createGenericVirtualRegister(S32); in legalizeBufferLoad()
6024 LoadElts.push_back(B.getMRI()->createGenericVirtualRegister(S32)); in legalizeBufferLoad()
6032 Register LoadDstReg = B.getMRI()->createGenericVirtualRegister(S32); in legalizeBufferLoad()
6044 auto Unmerge = B.buildUnmerge(S32, LoadDstReg); in legalizeBufferLoad()
6278 const LLT S32 = LLT::scalar(32); in convertImageAddrToPacked() local
6279 (void)S32; in convertImageAddrToPacked()
6285 assert(B.getMRI()->getType(SrcOp.getReg()) == S32); in convertImageAddrToPacked()
6332 const LLT S32 = LLT::scalar(32); in legalizeImageIntrinsic() local
6567 RegTy = S32; in legalizeImageIntrinsic()
6574 TFETy = LLT::fixed_vector(RoundedSize / 32 + 1, S32); in legalizeImageIntrinsic()
6575 RegTy = !IsTFE && EltSize == 16 ? V2S16 : S32; in legalizeImageIntrinsic()
6604 if (MRI->getType(Dst1Reg) != S32) in legalizeImageIntrinsic()
6611 if (Ty == S32) { in legalizeImageIntrinsic()
6932 const LLT S32 = LLT::scalar(32); in legalizeBVHIntrinsic() local
6985 auto packLanes = [&Ops, &S32, &V3S32, &B](Register Src) { in legalizeBVHIntrinsic()
6986 auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src); in legalizeBVHIntrinsic()
7002 S32, B.buildMergeLikeInstr(V2S16, {UnmergeRayInvDir.getReg(0), in legalizeBVHIntrinsic()
7006 S32, B.buildMergeLikeInstr(V2S16, {UnmergeRayInvDir.getReg(1), in legalizeBVHIntrinsic()
7010 S32, B.buildMergeLikeInstr(V2S16, {UnmergeRayInvDir.getReg(2), in legalizeBVHIntrinsic()
7020 auto Unmerge = B.buildUnmerge({S32, S32}, NodePtr); in legalizeBVHIntrinsic()
7028 auto packLanes = [&Ops, &S32, &B](Register Src) { in legalizeBVHIntrinsic()
7029 auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src); in legalizeBVHIntrinsic()
7039 Register R1 = MRI.createGenericVirtualRegister(S32); in legalizeBVHIntrinsic()
7040 Register R2 = MRI.createGenericVirtualRegister(S32); in legalizeBVHIntrinsic()
7041 Register R3 = MRI.createGenericVirtualRegister(S32); in legalizeBVHIntrinsic()
7117 LLT S32 = LLT::scalar(32); in legalizeWaveID() local
7119 auto TTMP8 = B.buildCopy(S32, Register(AMDGPU::TTMP8)); in legalizeWaveID()
7120 auto LSB = B.buildConstant(S32, 25); in legalizeWaveID()
7121 auto Width = B.buildConstant(S32, 5); in legalizeWaveID()
7141 B.buildIntrinsic(Intrinsic::amdgcn_s_getreg, {S32}, in legalizeGetFPEnv()
7145 B.buildIntrinsic(Intrinsic::amdgcn_s_getreg, {S32}, in legalizeGetFPEnv()
7160 auto Unmerge = B.buildUnmerge({S32, S32}, MI.getOperand(0)); in legalizeSetFPEnv()
7462 LLT S32 = LLT::scalar(32); in legalizeIntrinsic() local
7463 if (MRI.getType(Index) != S32) in legalizeIntrinsic()
7464 MI.getOperand(5).setReg(B.buildAnyExt(S32, Index).getReg(0)); in legalizeIntrinsic()
7471 LLT S32 = LLT::scalar(32); in legalizeIntrinsic() local
7472 if (MRI.getType(Index) != S32) in legalizeIntrinsic()
7473 MI.getOperand(7).setReg(B.buildAnyExt(S32, Index).getReg(0)); in legalizeIntrinsic()