1//===-- AMDGPUGIsel.td - AMDGPU GlobalISel Patterns---------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// This files contains patterns that should only be used by GlobalISel. For 9// example patterns for V_* instructions that have S_* equivalents. 10// SelectionDAG does not support selecting V_* instructions. 11//===----------------------------------------------------------------------===// 12 13include "AMDGPU.td" 14 15def sd_vsrc0 : ComplexPattern<i32, 1, "">; 16def gi_vsrc0 : 17 GIComplexOperandMatcher<s32, "selectVSRC0">, 18 GIComplexPatternEquiv<sd_vsrc0>; 19 20def sd_vcsrc : ComplexPattern<i32, 1, "">; 21def gi_vcsrc : 22 GIComplexOperandMatcher<s32, "selectVCSRC">, 23 GIComplexPatternEquiv<sd_vcsrc>; 24 25def gi_vop3mods0 : 26 GIComplexOperandMatcher<s32, "selectVOP3Mods0">, 27 GIComplexPatternEquiv<VOP3Mods0>; 28 29def gi_vop3mods : 30 GIComplexOperandMatcher<s32, "selectVOP3Mods">, 31 GIComplexPatternEquiv<VOP3Mods>; 32 33def gi_vop3mods_nnan : 34 GIComplexOperandMatcher<s32, "selectVOP3Mods_nnan">, 35 GIComplexPatternEquiv<VOP3Mods_nnan>; 36 37def gi_vop3omods : 38 GIComplexOperandMatcher<s32, "selectVOP3OMods">, 39 GIComplexPatternEquiv<VOP3OMods>; 40 41def gi_vop3opselmods0 : 42 GIComplexOperandMatcher<s32, "selectVOP3OpSelMods0">, 43 GIComplexPatternEquiv<VOP3OpSelMods0>; 44 45def gi_vop3opselmods : 46 GIComplexOperandMatcher<s32, "selectVOP3OpSelMods">, 47 GIComplexPatternEquiv<VOP3OpSelMods>; 48 49def gi_smrd_imm : 50 GIComplexOperandMatcher<s64, "selectSmrdImm">, 51 GIComplexPatternEquiv<SMRDImm>; 52 53def gi_smrd_imm32 : 54 GIComplexOperandMatcher<s64, "selectSmrdImm32">, 55 GIComplexPatternEquiv<SMRDImm32>; 56 57def gi_smrd_sgpr : 58 GIComplexOperandMatcher<s64, "selectSmrdSgpr">, 59 GIComplexPatternEquiv<SMRDSgpr>; 60 61// FIXME: Why are the atomic versions separated? 62def gi_flat_offset : 63 GIComplexOperandMatcher<s64, "selectFlatOffset">, 64 GIComplexPatternEquiv<FLATOffset>; 65def gi_flat_offset_signed : 66 GIComplexOperandMatcher<s64, "selectFlatOffsetSigned">, 67 GIComplexPatternEquiv<FLATOffsetSigned>; 68def gi_flat_atomic : 69 GIComplexOperandMatcher<s64, "selectFlatOffset">, 70 GIComplexPatternEquiv<FLATAtomic>; 71def gi_flat_signed_atomic : 72 GIComplexOperandMatcher<s64, "selectFlatOffsetSigned">, 73 GIComplexPatternEquiv<FLATSignedAtomic>; 74 75def gi_mubuf_scratch_offset : 76 GIComplexOperandMatcher<s32, "selectMUBUFScratchOffset">, 77 GIComplexPatternEquiv<MUBUFScratchOffset>; 78def gi_mubuf_scratch_offen : 79 GIComplexOperandMatcher<s32, "selectMUBUFScratchOffen">, 80 GIComplexPatternEquiv<MUBUFScratchOffen>; 81 82def gi_ds_1addr_1offset : 83 GIComplexOperandMatcher<s32, "selectDS1Addr1Offset">, 84 GIComplexPatternEquiv<DS1Addr1Offset>; 85 86 87// Separate load nodes are defined to glue m0 initialization in 88// SelectionDAG. The GISel selector can just insert m0 initialization 89// directly before before selecting a glue-less load, so hide this 90// distinction. 91 92def : GINodeEquiv<G_LOAD, AMDGPUld_glue> { 93 let CheckMMOIsNonAtomic = 1; 94} 95 96def : GINodeEquiv<G_STORE, AMDGPUst_glue> { 97 let CheckMMOIsNonAtomic = 1; 98} 99 100def : GINodeEquiv<G_LOAD, AMDGPUatomic_ld_glue> { 101 bit CheckMMOIsAtomic = 1; 102} 103 104 105 106def : GINodeEquiv<G_ATOMIC_CMPXCHG, atomic_cmp_swap_glue>; 107def : GINodeEquiv<G_ATOMICRMW_XCHG, atomic_swap_glue>; 108def : GINodeEquiv<G_ATOMICRMW_ADD, atomic_load_add_glue>; 109def : GINodeEquiv<G_ATOMICRMW_SUB, atomic_load_sub_glue>; 110def : GINodeEquiv<G_ATOMICRMW_AND, atomic_load_and_glue>; 111def : GINodeEquiv<G_ATOMICRMW_OR, atomic_load_or_glue>; 112def : GINodeEquiv<G_ATOMICRMW_XOR, atomic_load_xor_glue>; 113def : GINodeEquiv<G_ATOMICRMW_MIN, atomic_load_min_glue>; 114def : GINodeEquiv<G_ATOMICRMW_MAX, atomic_load_max_glue>; 115def : GINodeEquiv<G_ATOMICRMW_UMIN, atomic_load_umin_glue>; 116def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax_glue>; 117def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd_glue>; 118 119def : GINodeEquiv<G_AMDGPU_FFBH_U32, AMDGPUffbh_u32>; 120def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>; 121 122 123class GISelSop2Pat < 124 SDPatternOperator node, 125 Instruction inst, 126 ValueType dst_vt, 127 ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat < 128 129 (dst_vt (node (src0_vt SReg_32:$src0), (src1_vt SReg_32:$src1))), 130 (inst src0_vt:$src0, src1_vt:$src1) 131>; 132 133class GISelVop2Pat < 134 SDPatternOperator node, 135 Instruction inst, 136 ValueType dst_vt, 137 ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat < 138 139 (dst_vt (node (src0_vt (sd_vsrc0 src0_vt:$src0)), (src1_vt VGPR_32:$src1))), 140 (inst src0_vt:$src0, src1_vt:$src1) 141>; 142 143class GISelVop2CommutePat < 144 SDPatternOperator node, 145 Instruction inst, 146 ValueType dst_vt, 147 ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat < 148 149 (dst_vt (node (src1_vt VGPR_32:$src1), (src0_vt (sd_vsrc0 src0_vt:$src0)))), 150 (inst src0_vt:$src0, src1_vt:$src1) 151>; 152 153class GISelVop3Pat2 < 154 SDPatternOperator node, 155 Instruction inst, 156 ValueType dst_vt, 157 ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat < 158 159 (dst_vt (node (src0_vt (sd_vcsrc src0_vt:$src0)), (src1_vt (sd_vcsrc src1_vt:$src1)))), 160 (inst src0_vt:$src0, src1_vt:$src1) 161>; 162 163class GISelVop3Pat2CommutePat < 164 SDPatternOperator node, 165 Instruction inst, 166 ValueType dst_vt, 167 ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat < 168 169 (dst_vt (node (src0_vt (sd_vcsrc src0_vt:$src0)), (src1_vt (sd_vcsrc src1_vt:$src1)))), 170 (inst src0_vt:$src1, src1_vt:$src0) 171>; 172 173class GISelVop3Pat2ModsPat < 174 SDPatternOperator node, 175 Instruction inst, 176 ValueType dst_vt, 177 ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat < 178 179 (dst_vt (node (src0_vt (VOP3Mods0 src0_vt:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omods)), 180 (src1_vt (VOP3Mods src1_vt:$src1, i32:$src1_modifiers)))), 181 (inst i32:$src0_modifiers, src0_vt:$src0, 182 i32:$src1_modifiers, src1_vt:$src1, $clamp, $omods) 183>; 184 185multiclass GISelVop2IntrPat < 186 SDPatternOperator node, Instruction inst, 187 ValueType dst_vt, ValueType src_vt = dst_vt> { 188 189 def : GISelVop2Pat <node, inst, dst_vt, src_vt>; 190 191 // FIXME: Intrinsics aren't marked as commutable, so we need to add an explcit 192 // pattern to handle commuting. This is another reason why legalizing to a 193 // generic machine instruction may be better that matching the intrinsic 194 // directly. 195 def : GISelVop2CommutePat <node, inst, dst_vt, src_vt>; 196} 197 198def : GISelSop2Pat <or, S_OR_B32, i32>; 199def : GISelVop2Pat <or, V_OR_B32_e32, i32>; 200 201// Since GlobalISel is more flexible then SelectionDAG, I think we can get 202// away with adding patterns for integer types and not legalizing all 203// loads and stores to vector types. This should help simplify the load/store 204// legalization. 205foreach Ty = [i64, p0, p1, p4] in { 206 defm : SMRD_Pattern <"S_LOAD_DWORDX2", Ty>; 207} 208 209def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">, 210 GISDNodeXFormEquiv<as_i32timm>; 211 212def gi_as_i16timm : GICustomOperandRenderer<"renderTruncTImm">, 213 GISDNodeXFormEquiv<as_i16timm>; 214 215def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">, 216 GISDNodeXFormEquiv<NegateImm>; 217 218def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastImm">, 219 GISDNodeXFormEquiv<bitcast_fpimm_to_i32>; 220 221def gi_IMMPopCount : GICustomOperandRenderer<"renderPopcntImm">, 222 GISDNodeXFormEquiv<IMMPopCount>; 223