1//===- HexagonPatternsHVX.td - Selection Patterns for HVX --*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9 10def SDTVecUnaryOp: 11 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 12 13def SDTVecBinOp: 14 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>; 15 16def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2, 17 [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>; 18def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>; 19 20def SDTHexagonVINSERTW0: SDTypeProfile<1, 2, 21 [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; 22def HexagonVINSERTW0: SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>; 23 24def HwLen2: SDNodeXForm<imm, [{ 25 const auto &ST = static_cast<const HexagonSubtarget&>(CurDAG->getSubtarget()); 26 return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32); 27}]>; 28 29def Q2V: OutPatFrag<(ops node:$Qs), (V6_vandqrt $Qs, (A2_tfrsi -1))>; 30 31def Combinev: OutPatFrag<(ops node:$Vs, node:$Vt), 32 (REG_SEQUENCE HvxWR, $Vs, vsub_hi, $Vt, vsub_lo)>; 33 34def Combineq: OutPatFrag<(ops node:$Qs, node:$Qt), 35 (V6_vandvrt 36 (V6_vor 37 (V6_vror (V6_vpackeb (V6_vd0), (Q2V $Qs)), 38 (A2_tfrsi (HwLen2 (i32 0)))), // Half the vector length 39 (V6_vpackeb (V6_vd0), (Q2V $Qt))), 40 (A2_tfrsi -1))>; 41 42def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>; 43def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>; 44 45def HexagonQCAT: SDNode<"HexagonISD::QCAT", SDTVecBinOp>; 46def HexagonQTRUE: SDNode<"HexagonISD::QTRUE", SDTVecLeaf>; 47def HexagonQFALSE: SDNode<"HexagonISD::QFALSE", SDTVecLeaf>; 48def HexagonVPACKL: SDNode<"HexagonISD::VPACKL", SDTVecUnaryOp>; 49def HexagonVUNPACK: SDNode<"HexagonISD::VUNPACK", SDTVecUnaryOp>; 50def HexagonVUNPACKU: SDNode<"HexagonISD::VUNPACKU", SDTVecUnaryOp>; 51 52def vzero: PatFrag<(ops), (splat_vector (i32 0))>; 53def qtrue: PatFrag<(ops), (HexagonQTRUE)>; 54def qfalse: PatFrag<(ops), (HexagonQFALSE)>; 55def qcat: PatFrag<(ops node:$Qs, node:$Qt), 56 (HexagonQCAT node:$Qs, node:$Qt)>; 57 58def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>; 59def vpackl: PatFrag<(ops node:$Vs), (HexagonVPACKL node:$Vs)>; 60def vunpack: PatFrag<(ops node:$Vs), (HexagonVUNPACK node:$Vs)>; 61def vunpacku: PatFrag<(ops node:$Vs), (HexagonVUNPACKU node:$Vs)>; 62 63def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb $Vs)>; 64def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>; 65def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>; 66def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>; 67 68def IsVecOff : PatLeaf<(i32 imm), [{ 69 int32_t V = N->getSExtValue(); 70 int32_t VecSize = HRI->getSpillSize(Hexagon::HvxVRRegClass); 71 assert(isPowerOf2_32(VecSize)); 72 if ((uint32_t(V) & (uint32_t(VecSize)-1)) != 0) 73 return false; 74 int32_t L = Log2_32(VecSize); 75 return isInt<4>(V >> L); 76}]>; 77 78 79def alignedload: PatFrag<(ops node:$a), (load $a), [{ 80 return isAlignedMemNode(dyn_cast<MemSDNode>(N)); 81}]>; 82 83def unalignedload: PatFrag<(ops node:$a), (load $a), [{ 84 return !isAlignedMemNode(dyn_cast<MemSDNode>(N)); 85}]>; 86 87def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ 88 return isAlignedMemNode(dyn_cast<MemSDNode>(N)); 89}]>; 90 91def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ 92 return !isAlignedMemNode(dyn_cast<MemSDNode>(N)); 93}]>; 94 95 96// HVX loads 97 98multiclass HvxLdfi_pat<InstHexagon MI, PatFrag Load, ValueType ResType, 99 PatFrag ImmPred> { 100 def: Pat<(ResType (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), 101 (MI AddrFI:$fi, imm:$Off)>; 102 def: Pat<(ResType (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))), 103 (MI AddrFI:$fi, imm:$Off)>; 104 def: Pat<(ResType (Load AddrFI:$fi)), (ResType (MI AddrFI:$fi, 0))>; 105} 106 107multiclass HvxLdgi_pat<InstHexagon MI, PatFrag Load, ValueType ResType, 108 PatFrag ImmPred> { 109 def: Pat<(ResType (Load (add I32:$Rt, ImmPred:$Off))), 110 (MI I32:$Rt, imm:$Off)>; 111 def: Pat<(ResType (Load I32:$Rt)), 112 (MI I32:$Rt, 0)>; 113} 114 115multiclass HvxLdc_pat<InstHexagon MI, PatFrag Load, ValueType ResType> { 116 // The HVX selection code for shuffles can generate vector constants. 117 // Calling "Select" on the resulting loads from CP fails without these 118 // patterns. 119 def: Pat<(ResType (Load (HexagonCP tconstpool:$Addr))), 120 (MI (A2_tfrsi imm:$Addr), 0)>; 121 def: Pat<(ResType (Load (HexagonAtPcrel tconstpool:$Addr))), 122 (MI (C4_addipc imm:$Addr), 0)>; 123} 124 125multiclass HvxLd_pat<InstHexagon MI, PatFrag Load, ValueType ResType, 126 PatFrag ImmPred> { 127 defm: HvxLdfi_pat<MI, Load, ResType, ImmPred>; 128 defm: HvxLdgi_pat<MI, Load, ResType, ImmPred>; 129 defm: HvxLdc_pat <MI, Load, ResType>; 130} 131 132// Aligned loads: everything, plus loads with valignaddr node. 133multiclass HvxLda_pat<InstHexagon MI, PatFrag Load, ValueType ResType, 134 PatFrag ImmPred> { 135 let AddedComplexity = 50 in { 136 def: Pat<(ResType (Load (valignaddr I32:$Rt))), 137 (MI I32:$Rt, 0)>; 138 def: Pat<(ResType (Load (add (valignaddr I32:$Rt), ImmPred:$Off))), 139 (MI I32:$Rt, imm:$Off)>; 140 } 141 defm: HvxLd_pat<MI, Load, ResType, ImmPred>; 142} 143 144let Predicates = [UseHVX] in { 145 // alignedload will match a non-temporal load as well, so try non-temporal 146 // first. 147 defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI8, IsVecOff>; 148 defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI16, IsVecOff>; 149 defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI32, IsVecOff>; 150 defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI8, IsVecOff>; 151 defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI16, IsVecOff>; 152 defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI32, IsVecOff>; 153 154 defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI8, IsVecOff>; 155 defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI16, IsVecOff>; 156 defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI32, IsVecOff>; 157} 158 159 160// HVX stores 161 162multiclass HvxStfi_pat<InstHexagon MI, PatFrag Store, PatFrag Value, 163 PatFrag ImmPred> { 164 def: Pat<(Store Value:$Vs, (add (i32 AddrFI:$fi), ImmPred:$Off)), 165 (MI AddrFI:$fi, imm:$Off, Value:$Vs)>; 166 def: Pat<(Store Value:$Vs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)), 167 (MI AddrFI:$fi, imm:$Off, Value:$Vs)>; 168 def: Pat<(Store Value:$Vs, AddrFI:$fi), 169 (MI AddrFI:$fi, 0, Value:$Vs)>; 170} 171 172multiclass HvxStgi_pat<InstHexagon MI, PatFrag Store, PatFrag Value, 173 PatFrag ImmPred> { 174 def: Pat<(Store Value:$Vs, (add I32:$Rt, ImmPred:$Off)), 175 (MI I32:$Rt, imm:$Off, Value:$Vs)>; 176 def: Pat<(Store Value:$Vs, (IsOrAdd I32:$Rt, ImmPred:$Off)), 177 (MI I32:$Rt, imm:$Off, Value:$Vs)>; 178 def: Pat<(Store Value:$Vs, I32:$Rt), 179 (MI I32:$Rt, 0, Value:$Vs)>; 180} 181 182multiclass HvxSt_pat<InstHexagon MI, PatFrag Store, PatFrag Value, 183 PatFrag ImmPred> { 184 defm: HvxStfi_pat<MI, Store, Value, ImmPred>; 185 defm: HvxStgi_pat<MI, Store, Value, ImmPred>; 186} 187 188let Predicates = [UseHVX] in { 189 // alignedstore will match a non-temporal store as well, so try non-temporal 190 // first. 191 defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVI8, IsVecOff>; 192 defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVI16, IsVecOff>; 193 defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVI32, IsVecOff>; 194 defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVI8, IsVecOff>; 195 defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVI16, IsVecOff>; 196 defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVI32, IsVecOff>; 197 defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVI8, IsVecOff>; 198 defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVI16, IsVecOff>; 199 defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVI32, IsVecOff>; 200} 201 202// Bitcasts between same-size vector types are no-ops, except for the 203// actual type change. 204let Predicates = [UseHVX] in { 205 defm: NopCast_pat<VecI8, VecI16, HvxVR>; 206 defm: NopCast_pat<VecI8, VecI32, HvxVR>; 207 defm: NopCast_pat<VecI16, VecI32, HvxVR>; 208 209 defm: NopCast_pat<VecPI8, VecPI16, HvxWR>; 210 defm: NopCast_pat<VecPI8, VecPI32, HvxWR>; 211 defm: NopCast_pat<VecPI16, VecPI32, HvxWR>; 212} 213 214let Predicates = [UseHVX] in { 215 let AddedComplexity = 100 in { 216 // These should be preferred over a vsplat of 0. 217 def: Pat<(VecI8 vzero), (V6_vd0)>; 218 def: Pat<(VecI16 vzero), (V6_vd0)>; 219 def: Pat<(VecI32 vzero), (V6_vd0)>; 220 def: Pat<(VecPI8 vzero), (PS_vdd0)>; 221 def: Pat<(VecPI16 vzero), (PS_vdd0)>; 222 def: Pat<(VecPI32 vzero), (PS_vdd0)>; 223 224 def: Pat<(concat_vectors (VecI8 vzero), (VecI8 vzero)), (PS_vdd0)>; 225 def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>; 226 def: Pat<(concat_vectors (VecI32 vzero), (VecI32 vzero)), (PS_vdd0)>; 227 } 228 229 def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)), 230 (Combinev HvxVR:$Vt, HvxVR:$Vs)>; 231 def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)), 232 (Combinev HvxVR:$Vt, HvxVR:$Vs)>; 233 def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)), 234 (Combinev HvxVR:$Vt, HvxVR:$Vs)>; 235 236 def: Pat<(VecQ8 (qcat HQ16:$Qs, HQ16:$Qt)), (Combineq $Qt, $Qs)>; 237 def: Pat<(VecQ16 (qcat HQ32:$Qs, HQ32:$Qt)), (Combineq $Qt, $Qs)>; 238 239 def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs), 240 (V6_extractw HvxVR:$Vu, I32:$Rs)>; 241 def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs), 242 (V6_extractw HvxVR:$Vu, I32:$Rs)>; 243 def: Pat<(HexagonVEXTRACTW HVI32:$Vu, I32:$Rs), 244 (V6_extractw HvxVR:$Vu, I32:$Rs)>; 245 246 def: Pat<(HexagonVINSERTW0 HVI8:$Vu, I32:$Rt), 247 (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; 248 def: Pat<(HexagonVINSERTW0 HVI16:$Vu, I32:$Rt), 249 (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; 250 def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt), 251 (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; 252} 253 254// Splats for HvxV60 255def V60splatib: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatB $V)))>; 256def V60splatih: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatH $V)))>; 257def V60splatiw: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 $V))>; 258def V60splatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatw (S2_vsplatrb $Rs))>; 259def V60splatrh: OutPatFrag<(ops node:$Rs), 260 (V6_lvsplatw (A2_combine_ll $Rs, $Rs))>; 261def V60splatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>; 262 263// Splats for HvxV62+ 264def V62splatib: OutPatFrag<(ops node:$V), (V6_lvsplatb (ToI32 $V))>; 265def V62splatih: OutPatFrag<(ops node:$V), (V6_lvsplath (ToI32 $V))>; 266def V62splatiw: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 $V))>; 267def V62splatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatb $Rs)>; 268def V62splatrh: OutPatFrag<(ops node:$Rs), (V6_lvsplath $Rs)>; 269def V62splatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>; 270 271def Rep: OutPatFrag<(ops node:$N), (Combinev $N, $N)>; 272 273let Predicates = [UseHVX,UseHVXV60] in { 274 let AddedComplexity = 10 in { 275 def: Pat<(VecI8 (splat_vector u8_0ImmPred:$V)), (V60splatib $V)>; 276 def: Pat<(VecI16 (splat_vector u16_0ImmPred:$V)), (V60splatih $V)>; 277 def: Pat<(VecI32 (splat_vector anyimm:$V)), (V60splatiw $V)>; 278 def: Pat<(VecPI8 (splat_vector u8_0ImmPred:$V)), (Rep (V60splatib $V))>; 279 def: Pat<(VecPI16 (splat_vector u16_0ImmPred:$V)), (Rep (V60splatih $V))>; 280 def: Pat<(VecPI32 (splat_vector anyimm:$V)), (Rep (V60splatiw $V))>; 281 } 282 def: Pat<(VecI8 (splat_vector I32:$Rs)), (V60splatrb $Rs)>; 283 def: Pat<(VecI16 (splat_vector I32:$Rs)), (V60splatrh $Rs)>; 284 def: Pat<(VecI32 (splat_vector I32:$Rs)), (V60splatrw $Rs)>; 285 def: Pat<(VecPI8 (splat_vector I32:$Rs)), (Rep (V60splatrb $Rs))>; 286 def: Pat<(VecPI16 (splat_vector I32:$Rs)), (Rep (V60splatrh $Rs))>; 287 def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V60splatrw $Rs))>; 288} 289let Predicates = [UseHVX,UseHVXV62] in { 290 let AddedComplexity = 30 in { 291 def: Pat<(VecI8 (splat_vector u8_0ImmPred:$V)), (V62splatib imm:$V)>; 292 def: Pat<(VecI16 (splat_vector u16_0ImmPred:$V)), (V62splatih imm:$V)>; 293 def: Pat<(VecI32 (splat_vector anyimm:$V)), (V62splatiw imm:$V)>; 294 def: Pat<(VecPI8 (splat_vector u8_0ImmPred:$V)), 295 (Rep (V62splatib imm:$V))>; 296 def: Pat<(VecPI16 (splat_vector u16_0ImmPred:$V)), 297 (Rep (V62splatih imm:$V))>; 298 def: Pat<(VecPI32 (splat_vector anyimm:$V)), 299 (Rep (V62splatiw imm:$V))>; 300 } 301 let AddedComplexity = 20 in { 302 def: Pat<(VecI8 (splat_vector I32:$Rs)), (V62splatrb $Rs)>; 303 def: Pat<(VecI16 (splat_vector I32:$Rs)), (V62splatrh $Rs)>; 304 def: Pat<(VecI32 (splat_vector I32:$Rs)), (V62splatrw $Rs)>; 305 def: Pat<(VecPI8 (splat_vector I32:$Rs)), (Rep (V62splatrb $Rs))>; 306 def: Pat<(VecPI16 (splat_vector I32:$Rs)), (Rep (V62splatrh $Rs))>; 307 def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V62splatrw $Rs))>; 308 } 309} 310 311class Vneg1<ValueType VecTy> 312 : PatFrag<(ops), (VecTy (splat_vector (i32 -1)))>; 313 314class Vnot<ValueType VecTy> 315 : PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>; 316 317let Predicates = [UseHVX] in { 318 let AddedComplexity = 200 in { 319 def: Pat<(Vnot<VecI8> HVI8:$Vs), (V6_vnot HvxVR:$Vs)>; 320 def: Pat<(Vnot<VecI16> HVI16:$Vs), (V6_vnot HvxVR:$Vs)>; 321 def: Pat<(Vnot<VecI32> HVI32:$Vs), (V6_vnot HvxVR:$Vs)>; 322 } 323 324 def: OpR_RR_pat<V6_vaddb, Add, VecI8, HVI8>; 325 def: OpR_RR_pat<V6_vaddh, Add, VecI16, HVI16>; 326 def: OpR_RR_pat<V6_vaddw, Add, VecI32, HVI32>; 327 def: OpR_RR_pat<V6_vaddb_dv, Add, VecPI8, HWI8>; 328 def: OpR_RR_pat<V6_vaddh_dv, Add, VecPI16, HWI16>; 329 def: OpR_RR_pat<V6_vaddw_dv, Add, VecPI32, HWI32>; 330 def: OpR_RR_pat<V6_vsubb, Sub, VecI8, HVI8>; 331 def: OpR_RR_pat<V6_vsubh, Sub, VecI16, HVI16>; 332 def: OpR_RR_pat<V6_vsubw, Sub, VecI32, HVI32>; 333 def: OpR_RR_pat<V6_vsubb_dv, Sub, VecPI8, HWI8>; 334 def: OpR_RR_pat<V6_vsubh_dv, Sub, VecPI16, HWI16>; 335 def: OpR_RR_pat<V6_vsubw_dv, Sub, VecPI32, HWI32>; 336 def: OpR_RR_pat<V6_vand, And, VecI8, HVI8>; 337 def: OpR_RR_pat<V6_vand, And, VecI16, HVI16>; 338 def: OpR_RR_pat<V6_vand, And, VecI32, HVI32>; 339 def: OpR_RR_pat<V6_vor, Or, VecI8, HVI8>; 340 def: OpR_RR_pat<V6_vor, Or, VecI16, HVI16>; 341 def: OpR_RR_pat<V6_vor, Or, VecI32, HVI32>; 342 def: OpR_RR_pat<V6_vxor, Xor, VecI8, HVI8>; 343 def: OpR_RR_pat<V6_vxor, Xor, VecI16, HVI16>; 344 def: OpR_RR_pat<V6_vxor, Xor, VecI32, HVI32>; 345 346 def: OpR_RR_pat<V6_vminb, Smin, VecI8, HVI8>; 347 def: OpR_RR_pat<V6_vmaxb, Smax, VecI8, HVI8>; 348 def: OpR_RR_pat<V6_vminub, Umin, VecI8, HVI8>; 349 def: OpR_RR_pat<V6_vmaxub, Umax, VecI8, HVI8>; 350 def: OpR_RR_pat<V6_vminh, Smin, VecI16, HVI16>; 351 def: OpR_RR_pat<V6_vmaxh, Smax, VecI16, HVI16>; 352 def: OpR_RR_pat<V6_vminuh, Umin, VecI16, HVI16>; 353 def: OpR_RR_pat<V6_vmaxuh, Umax, VecI16, HVI16>; 354 def: OpR_RR_pat<V6_vminw, Smin, VecI32, HVI32>; 355 def: OpR_RR_pat<V6_vmaxw, Smax, VecI32, HVI32>; 356 357 def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt), 358 (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; 359 def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt), 360 (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; 361 def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt), 362 (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; 363 364 def: Pat<(vselect (qnot HQ8:$Qu), HVI8:$Vs, HVI8:$Vt), 365 (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; 366 def: Pat<(vselect (qnot HQ16:$Qu), HVI16:$Vs, HVI16:$Vt), 367 (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; 368 def: Pat<(vselect (qnot HQ32:$Qu), HVI32:$Vs, HVI32:$Vt), 369 (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; 370} 371 372let Predicates = [UseHVX] in { 373 // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), 374 // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, 375 // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). 376 def: Pat<(mul HVI8:$Vs, HVI8:$Vt), 377 (V6_vshuffeb (HiVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)), 378 (LoVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)))>; 379 def: Pat<(mul HVI16:$Vs, HVI16:$Vt), 380 (V6_vmpyih HvxVR:$Vs, HvxVR:$Vt)>; 381 def: Pat<(mul HVI32:$Vs, HVI32:$Vt), 382 (V6_vmpyiewuh_acc (V6_vmpyieoh HvxVR:$Vs, HvxVR:$Vt), 383 HvxVR:$Vs, HvxVR:$Vt)>; 384} 385 386let Predicates = [UseHVX] in { 387 def: Pat<(VecPI16 (sext HVI8:$Vs)), (VSxtb $Vs)>; 388 def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>; 389 def: Pat<(VecPI16 (zext HVI8:$Vs)), (VZxtb $Vs)>; 390 def: Pat<(VecPI32 (zext HVI16:$Vs)), (VZxth $Vs)>; 391 392 def: Pat<(VecI16 (sext_invec HVI8:$Vs)), (LoVec (VSxtb $Vs))>; 393 def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (VSxth $Vs))>; 394 def: Pat<(VecI32 (sext_invec HVI8:$Vs)), 395 (LoVec (VSxth (LoVec (VSxtb $Vs))))>; 396 def: Pat<(VecPI16 (sext_invec HWI8:$Vss)), (VSxtb (LoVec $Vss))>; 397 def: Pat<(VecPI32 (sext_invec HWI16:$Vss)), (VSxth (LoVec $Vss))>; 398 def: Pat<(VecPI32 (sext_invec HWI8:$Vss)), 399 (VSxth (LoVec (VSxtb (LoVec $Vss))))>; 400 401 def: Pat<(VecI16 (zext_invec HVI8:$Vs)), (LoVec (VZxtb $Vs))>; 402 def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (VZxth $Vs))>; 403 def: Pat<(VecI32 (zext_invec HVI8:$Vs)), 404 (LoVec (VZxth (LoVec (VZxtb $Vs))))>; 405 def: Pat<(VecPI16 (zext_invec HWI8:$Vss)), (VZxtb (LoVec $Vss))>; 406 def: Pat<(VecPI32 (zext_invec HWI16:$Vss)), (VZxth (LoVec $Vss))>; 407 def: Pat<(VecPI32 (zext_invec HWI8:$Vss)), 408 (VZxth (LoVec (VZxtb (LoVec $Vss))))>; 409 410 def: Pat<(VecI8 (trunc HWI16:$Vss)), 411 (V6_vpackeb (HiVec $Vss), (LoVec $Vss))>; 412 def: Pat<(VecI16 (trunc HWI32:$Vss)), 413 (V6_vpackeh (HiVec $Vss), (LoVec $Vss))>; 414 415 def: Pat<(VecQ8 (trunc HVI8:$Vs)), 416 (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>; 417 def: Pat<(VecQ16 (trunc HVI16:$Vs)), 418 (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>; 419 def: Pat<(VecQ32 (trunc HVI32:$Vs)), 420 (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>; 421} 422 423let Predicates = [UseHVX] in { 424 // The "source" types are not legal, and there are no parameterized 425 // definitions for them, but they are length-specific. 426 let Predicates = [UseHVX,UseHVX64B] in { 427 def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v32i8)), 428 (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>; 429 def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i8)), 430 (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>; 431 def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i16)), 432 (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>; 433 } 434 let Predicates = [UseHVX,UseHVX128B] in { 435 def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v64i8)), 436 (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>; 437 def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i8)), 438 (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>; 439 def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i16)), 440 (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>; 441 } 442 443 // Take a pair of vectors Vt:Vs and shift them towards LSB by (Rt & HwLen). 444 def: Pat<(VecI8 (valign HVI8:$Vt, HVI8:$Vs, I32:$Rt)), 445 (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>; 446 def: Pat<(VecI16 (valign HVI16:$Vt, HVI16:$Vs, I32:$Rt)), 447 (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>; 448 def: Pat<(VecI32 (valign HVI32:$Vt, HVI32:$Vs, I32:$Rt)), 449 (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>; 450 451 def: Pat<(HexagonVASL HVI8:$Vs, I32:$Rt), 452 (V6_vpackeb (V6_vaslh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt), 453 (V6_vaslh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>; 454 def: Pat<(HexagonVASR HVI8:$Vs, I32:$Rt), 455 (V6_vpackeb (V6_vasrh (HiVec (VSxtb HvxVR:$Vs)), I32:$Rt), 456 (V6_vasrh (LoVec (VSxtb HvxVR:$Vs)), I32:$Rt))>; 457 def: Pat<(HexagonVLSR HVI8:$Vs, I32:$Rt), 458 (V6_vpackeb (V6_vlsrh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt), 459 (V6_vlsrh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>; 460 461 def: Pat<(HexagonVASL HVI16:$Vs, I32:$Rt), (V6_vaslh HvxVR:$Vs, I32:$Rt)>; 462 def: Pat<(HexagonVASL HVI32:$Vs, I32:$Rt), (V6_vaslw HvxVR:$Vs, I32:$Rt)>; 463 def: Pat<(HexagonVASR HVI16:$Vs, I32:$Rt), (V6_vasrh HvxVR:$Vs, I32:$Rt)>; 464 def: Pat<(HexagonVASR HVI32:$Vs, I32:$Rt), (V6_vasrw HvxVR:$Vs, I32:$Rt)>; 465 def: Pat<(HexagonVLSR HVI16:$Vs, I32:$Rt), (V6_vlsrh HvxVR:$Vs, I32:$Rt)>; 466 def: Pat<(HexagonVLSR HVI32:$Vs, I32:$Rt), (V6_vlsrw HvxVR:$Vs, I32:$Rt)>; 467 468 def: Pat<(add HVI32:$Vx, (HexagonVASL HVI32:$Vu, I32:$Rt)), 469 (V6_vaslw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>; 470 def: Pat<(add HVI32:$Vx, (HexagonVASR HVI32:$Vu, I32:$Rt)), 471 (V6_vasrw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>; 472 473 def: Pat<(shl HVI16:$Vs, HVI16:$Vt), (V6_vaslhv HvxVR:$Vs, HvxVR:$Vt)>; 474 def: Pat<(shl HVI32:$Vs, HVI32:$Vt), (V6_vaslwv HvxVR:$Vs, HvxVR:$Vt)>; 475 def: Pat<(sra HVI16:$Vs, HVI16:$Vt), (V6_vasrhv HvxVR:$Vs, HvxVR:$Vt)>; 476 def: Pat<(sra HVI32:$Vs, HVI32:$Vt), (V6_vasrwv HvxVR:$Vs, HvxVR:$Vt)>; 477 def: Pat<(srl HVI16:$Vs, HVI16:$Vt), (V6_vlsrhv HvxVR:$Vs, HvxVR:$Vt)>; 478 def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>; 479 480 // Vpackl is a pseudo-op that is used when legalizing widened truncates. 481 // It should never be produced with a register pair in the output, but 482 // it can happen to have a pair as an input. 483 def: Pat<(VecI8 (vpackl HVI16:$Vs)), (V6_vdealb HvxVR:$Vs)>; 484 def: Pat<(VecI8 (vpackl HVI32:$Vs)), (V6_vdealb4w (IMPLICIT_DEF), HvxVR:$Vs)>; 485 def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>; 486 def: Pat<(VecI8 (vpackl HWI16:$Vs)), (V6_vpackeb (HiVec $Vs), (LoVec $Vs))>; 487 def: Pat<(VecI8 (vpackl HWI32:$Vs)), 488 (V6_vpackeb (IMPLICIT_DEF), (V6_vpackeh (HiVec $Vs), (LoVec $Vs)))>; 489 def: Pat<(VecI16 (vpackl HWI32:$Vs)), (V6_vpackeh (HiVec $Vs), (LoVec $Vs))>; 490 491 def: Pat<(VecI16 (vunpack HVI8:$Vs)), (LoVec (VSxtb $Vs))>; 492 def: Pat<(VecI32 (vunpack HVI8:$Vs)), (LoVec (VSxth (LoVec (VSxtb $Vs))))>; 493 def: Pat<(VecI32 (vunpack HVI16:$Vs)), (LoVec (VSxth $Vs))>; 494 def: Pat<(VecPI16 (vunpack HVI8:$Vs)), (VSxtb $Vs)>; 495 def: Pat<(VecPI32 (vunpack HVI8:$Vs)), (VSxth (LoVec (VSxtb $Vs)))>; 496 def: Pat<(VecPI32 (vunpack HVI32:$Vs)), (VSxth $Vs)>; 497 498 def: Pat<(VecI16 (vunpacku HVI8:$Vs)), (LoVec (VZxtb $Vs))>; 499 def: Pat<(VecI32 (vunpacku HVI8:$Vs)), (LoVec (VZxth (LoVec (VZxtb $Vs))))>; 500 def: Pat<(VecI32 (vunpacku HVI16:$Vs)), (LoVec (VZxth $Vs))>; 501 def: Pat<(VecPI16 (vunpacku HVI8:$Vs)), (VZxtb $Vs)>; 502 def: Pat<(VecPI32 (vunpacku HVI8:$Vs)), (VZxth (LoVec (VZxtb $Vs)))>; 503 def: Pat<(VecPI32 (vunpacku HVI32:$Vs)), (VZxth $Vs)>; 504 505 let Predicates = [UseHVX,UseHVXV60] in { 506 def: Pat<(VecI16 (bswap HVI16:$Vs)), 507 (V6_vdelta HvxVR:$Vs, (V60splatib (i32 0x01)))>; 508 def: Pat<(VecI32 (bswap HVI32:$Vs)), 509 (V6_vdelta HvxVR:$Vs, (V60splatib (i32 0x03)))>; 510 } 511 let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in { 512 def: Pat<(VecI16 (bswap HVI16:$Vs)), 513 (V6_vdelta HvxVR:$Vs, (V62splatib (i32 0x01)))>; 514 def: Pat<(VecI32 (bswap HVI32:$Vs)), 515 (V6_vdelta HvxVR:$Vs, (V62splatib (i32 0x03)))>; 516 } 517 518 def: Pat<(VecI8 (ctpop HVI8:$Vs)), 519 (V6_vpackeb (V6_vpopcounth (HiVec (V6_vunpackub HvxVR:$Vs))), 520 (V6_vpopcounth (LoVec (V6_vunpackub HvxVR:$Vs))))>; 521 def: Pat<(VecI16 (ctpop HVI16:$Vs)), (V6_vpopcounth HvxVR:$Vs)>; 522 def: Pat<(VecI32 (ctpop HVI32:$Vs)), 523 (V6_vaddw (LoVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))), 524 (HiVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))))>; 525 526 let Predicates = [UseHVX,UseHVXV60] in 527 def: Pat<(VecI8 (ctlz HVI8:$Vs)), 528 (V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))), 529 (V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))), 530 (V60splatib (i32 0x08)))>; 531 let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in 532 def: Pat<(VecI8 (ctlz HVI8:$Vs)), 533 (V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))), 534 (V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))), 535 (V62splatib (i32 0x08)))>; 536 537 def: Pat<(VecI16 (ctlz HVI16:$Vs)), (V6_vcl0h HvxVR:$Vs)>; 538 def: Pat<(VecI32 (ctlz HVI32:$Vs)), (V6_vcl0w HvxVR:$Vs)>; 539} 540 541class HvxSel_pat<InstHexagon MI, PatFrag RegPred> 542 : Pat<(select I1:$Pu, RegPred:$Vs, RegPred:$Vt), 543 (MI I1:$Pu, RegPred:$Vs, RegPred:$Vt)>; 544 545let Predicates = [UseHVX] in { 546 def: HvxSel_pat<PS_vselect, HVI8>; 547 def: HvxSel_pat<PS_vselect, HVI16>; 548 def: HvxSel_pat<PS_vselect, HVI32>; 549 def: HvxSel_pat<PS_wselect, HWI8>; 550 def: HvxSel_pat<PS_wselect, HWI16>; 551 def: HvxSel_pat<PS_wselect, HWI32>; 552} 553 554let Predicates = [UseHVX] in { 555 def: Pat<(VecQ8 (qtrue)), (PS_qtrue)>; 556 def: Pat<(VecQ16 (qtrue)), (PS_qtrue)>; 557 def: Pat<(VecQ32 (qtrue)), (PS_qtrue)>; 558 def: Pat<(VecQ8 (qfalse)), (PS_qfalse)>; 559 def: Pat<(VecQ16 (qfalse)), (PS_qfalse)>; 560 def: Pat<(VecQ32 (qfalse)), (PS_qfalse)>; 561 562 def: Pat<(vnot HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>; 563 def: Pat<(vnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>; 564 def: Pat<(vnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>; 565 def: Pat<(qnot HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>; 566 def: Pat<(qnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>; 567 def: Pat<(qnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>; 568 569 def: OpR_RR_pat<V6_pred_and, And, VecQ8, HQ8>; 570 def: OpR_RR_pat<V6_pred_and, And, VecQ16, HQ16>; 571 def: OpR_RR_pat<V6_pred_and, And, VecQ32, HQ32>; 572 def: OpR_RR_pat<V6_pred_or, Or, VecQ8, HQ8>; 573 def: OpR_RR_pat<V6_pred_or, Or, VecQ16, HQ16>; 574 def: OpR_RR_pat<V6_pred_or, Or, VecQ32, HQ32>; 575 def: OpR_RR_pat<V6_pred_xor, Xor, VecQ8, HQ8>; 576 def: OpR_RR_pat<V6_pred_xor, Xor, VecQ16, HQ16>; 577 def: OpR_RR_pat<V6_pred_xor, Xor, VecQ32, HQ32>; 578 579 def: OpR_RR_pat<V6_pred_and_n, VNot2<And, qnot>, VecQ8, HQ8>; 580 def: OpR_RR_pat<V6_pred_and_n, VNot2<And, qnot>, VecQ16, HQ16>; 581 def: OpR_RR_pat<V6_pred_and_n, VNot2<And, qnot>, VecQ32, HQ32>; 582 def: OpR_RR_pat<V6_pred_or_n, VNot2<Or, qnot>, VecQ8, HQ8>; 583 def: OpR_RR_pat<V6_pred_or_n, VNot2<Or, qnot>, VecQ16, HQ16>; 584 def: OpR_RR_pat<V6_pred_or_n, VNot2<Or, qnot>, VecQ32, HQ32>; 585 586 def: OpR_RR_pat<V6_veqb, seteq, VecQ8, HVI8>; 587 def: OpR_RR_pat<V6_veqh, seteq, VecQ16, HVI16>; 588 def: OpR_RR_pat<V6_veqw, seteq, VecQ32, HVI32>; 589 def: OpR_RR_pat<V6_vgtb, setgt, VecQ8, HVI8>; 590 def: OpR_RR_pat<V6_vgth, setgt, VecQ16, HVI16>; 591 def: OpR_RR_pat<V6_vgtw, setgt, VecQ32, HVI32>; 592 def: OpR_RR_pat<V6_vgtub, setugt, VecQ8, HVI8>; 593 def: OpR_RR_pat<V6_vgtuh, setugt, VecQ16, HVI16>; 594 def: OpR_RR_pat<V6_vgtuw, setugt, VecQ32, HVI32>; 595 596 def: AccRRR_pat<V6_veqb_and, And, seteq, HQ8, HVI8, HVI8>; 597 def: AccRRR_pat<V6_veqb_or, Or, seteq, HQ8, HVI8, HVI8>; 598 def: AccRRR_pat<V6_veqb_xor, Xor, seteq, HQ8, HVI8, HVI8>; 599 def: AccRRR_pat<V6_veqh_and, And, seteq, HQ16, HVI16, HVI16>; 600 def: AccRRR_pat<V6_veqh_or, Or, seteq, HQ16, HVI16, HVI16>; 601 def: AccRRR_pat<V6_veqh_xor, Xor, seteq, HQ16, HVI16, HVI16>; 602 def: AccRRR_pat<V6_veqw_and, And, seteq, HQ32, HVI32, HVI32>; 603 def: AccRRR_pat<V6_veqw_or, Or, seteq, HQ32, HVI32, HVI32>; 604 def: AccRRR_pat<V6_veqw_xor, Xor, seteq, HQ32, HVI32, HVI32>; 605 606 def: AccRRR_pat<V6_vgtb_and, And, setgt, HQ8, HVI8, HVI8>; 607 def: AccRRR_pat<V6_vgtb_or, Or, setgt, HQ8, HVI8, HVI8>; 608 def: AccRRR_pat<V6_vgtb_xor, Xor, setgt, HQ8, HVI8, HVI8>; 609 def: AccRRR_pat<V6_vgth_and, And, setgt, HQ16, HVI16, HVI16>; 610 def: AccRRR_pat<V6_vgth_or, Or, setgt, HQ16, HVI16, HVI16>; 611 def: AccRRR_pat<V6_vgth_xor, Xor, setgt, HQ16, HVI16, HVI16>; 612 def: AccRRR_pat<V6_vgtw_and, And, setgt, HQ32, HVI32, HVI32>; 613 def: AccRRR_pat<V6_vgtw_or, Or, setgt, HQ32, HVI32, HVI32>; 614 def: AccRRR_pat<V6_vgtw_xor, Xor, setgt, HQ32, HVI32, HVI32>; 615 616 def: AccRRR_pat<V6_vgtub_and, And, setugt, HQ8, HVI8, HVI8>; 617 def: AccRRR_pat<V6_vgtub_or, Or, setugt, HQ8, HVI8, HVI8>; 618 def: AccRRR_pat<V6_vgtub_xor, Xor, setugt, HQ8, HVI8, HVI8>; 619 def: AccRRR_pat<V6_vgtuh_and, And, setugt, HQ16, HVI16, HVI16>; 620 def: AccRRR_pat<V6_vgtuh_or, Or, setugt, HQ16, HVI16, HVI16>; 621 def: AccRRR_pat<V6_vgtuh_xor, Xor, setugt, HQ16, HVI16, HVI16>; 622 def: AccRRR_pat<V6_vgtuw_and, And, setugt, HQ32, HVI32, HVI32>; 623 def: AccRRR_pat<V6_vgtuw_or, Or, setugt, HQ32, HVI32, HVI32>; 624 def: AccRRR_pat<V6_vgtuw_xor, Xor, setugt, HQ32, HVI32, HVI32>; 625} 626