1//===- HexagonPatternsHVX.td - Selection Patterns for HVX --*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def HQ8: PatLeaf<(VecQ8 HvxQR:$R)>; 10def HQ16: PatLeaf<(VecQ16 HvxQR:$R)>; 11def HQ32: PatLeaf<(VecQ32 HvxQR:$R)>; 12 13def HVI8: PatLeaf<(VecI8 HvxVR:$R)>; 14def HVI16: PatLeaf<(VecI16 HvxVR:$R)>; 15def HVI32: PatLeaf<(VecI32 HvxVR:$R)>; 16def HVF16: PatLeaf<(VecF16 HvxVR:$R)>; 17def HVF32: PatLeaf<(VecF32 HvxVR:$R)>; 18 19def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; 20def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; 21def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; 22def HWF16: PatLeaf<(VecPF16 HvxWR:$R)>; 23def HWF32: PatLeaf<(VecPF32 HvxWR:$R)>; 24 25def SDTVecUnaryOp: 26 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 27 28def SDTVecBinOp: 29 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>; 30 31def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2, 32 [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>; 33def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>; 34 35def SDTHexagonVINSERTW0: SDTypeProfile<1, 2, 36 [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; 37def HexagonVINSERTW0: SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>; 38 39def HwLen2: SDNodeXForm<imm, [{ 40 const auto &ST = CurDAG->getSubtarget<HexagonSubtarget>(); 41 return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32); 42}]>; 43 44def Q2V: OutPatFrag<(ops node:$Qs), (V6_vandqrt $Qs, (A2_tfrsi -1))>; 45 46def Combinev: OutPatFrag<(ops node:$Vs, node:$Vt), 47 (REG_SEQUENCE HvxWR, $Vs, vsub_hi, $Vt, vsub_lo)>; 48 49def Combineq: OutPatFrag<(ops node:$Qs, node:$Qt), 50 (V6_vandvrt 51 (V6_vor 52 (V6_vror (V6_vpackeb (V6_vd0), (Q2V $Qs)), 53 (A2_tfrsi (HwLen2 (i32 0)))), // Half the vector length 54 (V6_vpackeb (V6_vd0), (Q2V $Qt))), 55 (A2_tfrsi -1))>; 56 57def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>; 58def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>; 59 60def HexagonQCAT: SDNode<"HexagonISD::QCAT", SDTVecBinOp>; 61def HexagonQTRUE: SDNode<"HexagonISD::QTRUE", SDTVecLeaf>; 62def HexagonQFALSE: SDNode<"HexagonISD::QFALSE", SDTVecLeaf>; 63def HexagonVPACKL: SDNode<"HexagonISD::VPACKL", SDTVecUnaryOp>; 64def HexagonVUNPACK: SDNode<"HexagonISD::VUNPACK", SDTVecUnaryOp>; 65def HexagonVUNPACKU: SDNode<"HexagonISD::VUNPACKU", SDTVecUnaryOp>; 66 67def vzero: PatFrags<(ops), [(splat_vector (i32 0)), (splat_vector (f32zero))]>; 68def qtrue: PatFrag<(ops), (HexagonQTRUE)>; 69def qfalse: PatFrag<(ops), (HexagonQFALSE)>; 70def qcat: PatFrag<(ops node:$Qs, node:$Qt), 71 (HexagonQCAT node:$Qs, node:$Qt)>; 72 73def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>; 74def vpackl: PatFrag<(ops node:$Vs), (HexagonVPACKL node:$Vs)>; 75def vunpack: PatFrag<(ops node:$Vs), (HexagonVUNPACK node:$Vs)>; 76def vunpacku: PatFrag<(ops node:$Vs), (HexagonVUNPACKU node:$Vs)>; 77 78def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb $Vs)>; 79def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>; 80def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>; 81def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>; 82 83def IsVecOff : PatLeaf<(i32 imm), [{ 84 int32_t V = N->getSExtValue(); 85 int32_t VecSize = HRI->getSpillSize(Hexagon::HvxVRRegClass); 86 assert(isPowerOf2_32(VecSize)); 87 if ((uint32_t(V) & (uint32_t(VecSize)-1)) != 0) 88 return false; 89 int32_t L = Log2_32(VecSize); 90 return isInt<4>(V >> L); 91}]>; 92 93 94def alignedload: PatFrag<(ops node:$a), (load $a), [{ 95 return isAlignedMemNode(cast<MemSDNode>(N)); 96}]>; 97 98def unalignedload: PatFrag<(ops node:$a), (load $a), [{ 99 return !isAlignedMemNode(cast<MemSDNode>(N)); 100}]>; 101 102def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ 103 return isAlignedMemNode(cast<MemSDNode>(N)); 104}]>; 105 106def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ 107 return !isAlignedMemNode(cast<MemSDNode>(N)); 108}]>; 109 110 111// HVX loads 112 113multiclass HvxLdfi_pat<InstHexagon MI, PatFrag Load, ValueType ResType, 114 PatFrag ImmPred> { 115 def: Pat<(ResType (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), 116 (MI AddrFI:$fi, imm:$Off)>; 117 def: Pat<(ResType (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))), 118 (MI AddrFI:$fi, imm:$Off)>; 119 def: Pat<(ResType (Load AddrFI:$fi)), (ResType (MI AddrFI:$fi, 0))>; 120} 121 122multiclass HvxLdgi_pat<InstHexagon MI, PatFrag Load, ValueType ResType, 123 PatFrag ImmPred> { 124 def: Pat<(ResType (Load (add I32:$Rt, ImmPred:$Off))), 125 (MI I32:$Rt, imm:$Off)>; 126 def: Pat<(ResType (Load I32:$Rt)), 127 (MI I32:$Rt, 0)>; 128} 129 130multiclass HvxLdc_pat<InstHexagon MI, PatFrag Load, ValueType ResType> { 131 // The HVX selection code for shuffles can generate vector constants. 132 // Calling "Select" on the resulting loads from CP fails without these 133 // patterns. 134 def: Pat<(ResType (Load (HexagonCP tconstpool:$Addr))), 135 (MI (A2_tfrsi imm:$Addr), 0)>; 136 def: Pat<(ResType (Load (HexagonAtPcrel tconstpool:$Addr))), 137 (MI (C4_addipc imm:$Addr), 0)>; 138} 139 140multiclass HvxLd_pat<InstHexagon MI, PatFrag Load, ValueType ResType, 141 PatFrag ImmPred> { 142 defm: HvxLdfi_pat<MI, Load, ResType, ImmPred>; 143 defm: HvxLdgi_pat<MI, Load, ResType, ImmPred>; 144 defm: HvxLdc_pat <MI, Load, ResType>; 145} 146 147// Aligned loads: everything, plus loads with valignaddr node. 148multiclass HvxLda_pat<InstHexagon MI, PatFrag Load, ValueType ResType, 149 PatFrag ImmPred> { 150 let AddedComplexity = 50 in { 151 def: Pat<(ResType (Load (valignaddr I32:$Rt))), 152 (MI I32:$Rt, 0)>; 153 def: Pat<(ResType (Load (add (valignaddr I32:$Rt), ImmPred:$Off))), 154 (MI I32:$Rt, imm:$Off)>; 155 } 156 defm: HvxLd_pat<MI, Load, ResType, ImmPred>; 157} 158 159let Predicates = [UseHVX] in { 160 // alignedload will match a non-temporal load as well, so try non-temporal 161 // first. 162 defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI8, IsVecOff>; 163 defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI16, IsVecOff>; 164 defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI32, IsVecOff>; 165 defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI8, IsVecOff>; 166 defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI16, IsVecOff>; 167 defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI32, IsVecOff>; 168 defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI8, IsVecOff>; 169 defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI16, IsVecOff>; 170 defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI32, IsVecOff>; 171} 172 173let Predicates = [UseHVXV68] in { 174 defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF16, IsVecOff>; 175 defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF32, IsVecOff>; 176 defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecF16, IsVecOff>; 177 defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecF32, IsVecOff>; 178 defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecF16, IsVecOff>; 179 defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecF32, IsVecOff>; 180} 181 182// HVX stores 183 184multiclass HvxStfi_pat<InstHexagon MI, PatFrag Store, PatFrag Value, 185 PatFrag ImmPred> { 186 def: Pat<(Store Value:$Vs, (add (i32 AddrFI:$fi), ImmPred:$Off)), 187 (MI AddrFI:$fi, imm:$Off, Value:$Vs)>; 188 def: Pat<(Store Value:$Vs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)), 189 (MI AddrFI:$fi, imm:$Off, Value:$Vs)>; 190 def: Pat<(Store Value:$Vs, AddrFI:$fi), 191 (MI AddrFI:$fi, 0, Value:$Vs)>; 192} 193 194multiclass HvxStgi_pat<InstHexagon MI, PatFrag Store, PatFrag Value, 195 PatFrag ImmPred> { 196 def: Pat<(Store Value:$Vs, (add I32:$Rt, ImmPred:$Off)), 197 (MI I32:$Rt, imm:$Off, Value:$Vs)>; 198 def: Pat<(Store Value:$Vs, (IsOrAdd I32:$Rt, ImmPred:$Off)), 199 (MI I32:$Rt, imm:$Off, Value:$Vs)>; 200 def: Pat<(Store Value:$Vs, I32:$Rt), 201 (MI I32:$Rt, 0, Value:$Vs)>; 202} 203 204multiclass HvxSt_pat<InstHexagon MI, PatFrag Store, PatFrag Value, 205 PatFrag ImmPred> { 206 defm: HvxStfi_pat<MI, Store, Value, ImmPred>; 207 defm: HvxStgi_pat<MI, Store, Value, ImmPred>; 208} 209 210let Predicates = [UseHVX] in { 211 // alignedstore will match a non-temporal store as well, so try non-temporal 212 // first. 213 defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVI8, IsVecOff>; 214 defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVI16, IsVecOff>; 215 defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVI32, IsVecOff>; 216 defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVI8, IsVecOff>; 217 defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVI16, IsVecOff>; 218 defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVI32, IsVecOff>; 219 defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVI8, IsVecOff>; 220 defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVI16, IsVecOff>; 221 defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVI32, IsVecOff>; 222} 223 224let Predicates = [UseHVXV68] in { 225 defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVF16, IsVecOff>; 226 defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVF32, IsVecOff>; 227 defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVF16, IsVecOff>; 228 defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVF32, IsVecOff>; 229 defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVF16, IsVecOff>; 230 defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVF32, IsVecOff>; 231} 232 233// Bitcasts between same-size vector types are no-ops, except for the 234// actual type change. 235let Predicates = [UseHVX] in { 236 defm: NopCast_pat<VecI8, VecI16, HvxVR>; 237 defm: NopCast_pat<VecI8, VecI32, HvxVR>; 238 defm: NopCast_pat<VecI16, VecI32, HvxVR>; 239 240 defm: NopCast_pat<VecPI8, VecPI16, HvxWR>; 241 defm: NopCast_pat<VecPI8, VecPI32, HvxWR>; 242 defm: NopCast_pat<VecPI16, VecPI32, HvxWR>; 243} 244 245let Predicates = [UseHVX, UseHVXFloatingPoint] in { 246 defm: NopCast_pat<VecI8, VecF16, HvxVR>; 247 defm: NopCast_pat<VecI8, VecF32, HvxVR>; 248 defm: NopCast_pat<VecI16, VecF16, HvxVR>; 249 defm: NopCast_pat<VecI16, VecF32, HvxVR>; 250 defm: NopCast_pat<VecI32, VecF16, HvxVR>; 251 defm: NopCast_pat<VecI32, VecF32, HvxVR>; 252 defm: NopCast_pat<VecF16, VecF32, HvxVR>; 253 254 defm: NopCast_pat<VecPI8, VecPF16, HvxWR>; 255 defm: NopCast_pat<VecPI8, VecPF32, HvxWR>; 256 defm: NopCast_pat<VecPI16, VecPF16, HvxWR>; 257 defm: NopCast_pat<VecPI16, VecPF32, HvxWR>; 258 defm: NopCast_pat<VecPI32, VecPF16, HvxWR>; 259 defm: NopCast_pat<VecPI32, VecPF32, HvxWR>; 260 defm: NopCast_pat<VecPF16, VecPF32, HvxWR>; 261} 262 263let Predicates = [UseHVX] in { 264 let AddedComplexity = 100 in { 265 // These should be preferred over a vsplat of 0. 266 def: Pat<(VecI8 vzero), (V6_vd0)>; 267 def: Pat<(VecI16 vzero), (V6_vd0)>; 268 def: Pat<(VecI32 vzero), (V6_vd0)>; 269 def: Pat<(VecPI8 vzero), (PS_vdd0)>; 270 def: Pat<(VecPI16 vzero), (PS_vdd0)>; 271 def: Pat<(VecPI32 vzero), (PS_vdd0)>; 272 def: Pat<(VecPF32 vzero), (PS_vdd0)>; 273 274 def: Pat<(concat_vectors (VecI8 vzero), (VecI8 vzero)), (PS_vdd0)>; 275 def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>; 276 def: Pat<(concat_vectors (VecI32 vzero), (VecI32 vzero)), (PS_vdd0)>; 277 } 278 279 def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)), 280 (Combinev HvxVR:$Vt, HvxVR:$Vs)>; 281 def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)), 282 (Combinev HvxVR:$Vt, HvxVR:$Vs)>; 283 def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)), 284 (Combinev HvxVR:$Vt, HvxVR:$Vs)>; 285 286 def: Pat<(VecQ8 (qcat HQ16:$Qs, HQ16:$Qt)), (Combineq $Qt, $Qs)>; 287 def: Pat<(VecQ16 (qcat HQ32:$Qs, HQ32:$Qt)), (Combineq $Qt, $Qs)>; 288 289 def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs), 290 (V6_extractw HvxVR:$Vu, I32:$Rs)>; 291 def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs), 292 (V6_extractw HvxVR:$Vu, I32:$Rs)>; 293 def: Pat<(HexagonVEXTRACTW HVI32:$Vu, I32:$Rs), 294 (V6_extractw HvxVR:$Vu, I32:$Rs)>; 295 296 def: Pat<(HexagonVINSERTW0 HVI8:$Vu, I32:$Rt), 297 (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; 298 def: Pat<(HexagonVINSERTW0 HVI16:$Vu, I32:$Rt), 299 (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; 300 def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt), 301 (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; 302} 303 304let Predicates = [UseHVX, UseHVXFloatingPoint] in { 305 let AddedComplexity = 100 in { 306 def: Pat<(VecF16 vzero), (V6_vd0)>; 307 def: Pat<(VecF32 vzero), (V6_vd0)>; 308 def: Pat<(VecPF16 vzero), (PS_vdd0)>; 309 def: Pat<(VecPF32 vzero), (PS_vdd0)>; 310 311 def: Pat<(concat_vectors (VecF16 vzero), (VecF16 vzero)), (PS_vdd0)>; 312 def: Pat<(concat_vectors (VecF32 vzero), (VecF32 vzero)), (PS_vdd0)>; 313 } 314 315 def: Pat<(VecPF16 (concat_vectors HVF16:$Vs, HVF16:$Vt)), 316 (Combinev HvxVR:$Vt, HvxVR:$Vs)>; 317 def: Pat<(VecPF32 (concat_vectors HVF32:$Vs, HVF32:$Vt)), 318 (Combinev HvxVR:$Vt, HvxVR:$Vs)>; 319 320 def: Pat<(HexagonVINSERTW0 HVF16:$Vu, I32:$Rt), 321 (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; 322 def: Pat<(HexagonVINSERTW0 HVF32:$Vu, I32:$Rt), 323 (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; 324} 325 326// Splats for HvxV60 327def V60splatib: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatB $V)))>; 328def V60splatih: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatH $V)))>; 329def V60splatiw: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 $V))>; 330def V60splatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatw (S2_vsplatrb $Rs))>; 331def V60splatrh: OutPatFrag<(ops node:$Rs), 332 (V6_lvsplatw (A2_combine_ll $Rs, $Rs))>; 333def V60splatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>; 334 335// Splats for HvxV62+ 336def V62splatib: OutPatFrag<(ops node:$V), (V6_lvsplatb (ToI32 $V))>; 337def V62splatih: OutPatFrag<(ops node:$V), (V6_lvsplath (ToI32 $V))>; 338def V62splatiw: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 $V))>; 339def V62splatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatb $Rs)>; 340def V62splatrh: OutPatFrag<(ops node:$Rs), (V6_lvsplath $Rs)>; 341def V62splatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>; 342 343def Rep: OutPatFrag<(ops node:$N), (Combinev $N, $N)>; 344 345let Predicates = [UseHVX,UseHVXV60] in { 346 let AddedComplexity = 10 in { 347 def: Pat<(VecI8 (splat_vector u8_0ImmPred:$V)), (V60splatib $V)>; 348 def: Pat<(VecI16 (splat_vector u16_0ImmPred:$V)), (V60splatih $V)>; 349 def: Pat<(VecI32 (splat_vector anyimm:$V)), (V60splatiw $V)>; 350 def: Pat<(VecPI8 (splat_vector u8_0ImmPred:$V)), (Rep (V60splatib $V))>; 351 def: Pat<(VecPI16 (splat_vector u16_0ImmPred:$V)), (Rep (V60splatih $V))>; 352 def: Pat<(VecPI32 (splat_vector anyimm:$V)), (Rep (V60splatiw $V))>; 353 } 354 def: Pat<(VecI8 (splat_vector I32:$Rs)), (V60splatrb $Rs)>; 355 def: Pat<(VecI16 (splat_vector I32:$Rs)), (V60splatrh $Rs)>; 356 def: Pat<(VecI32 (splat_vector I32:$Rs)), (V60splatrw $Rs)>; 357 def: Pat<(VecPI8 (splat_vector I32:$Rs)), (Rep (V60splatrb $Rs))>; 358 def: Pat<(VecPI16 (splat_vector I32:$Rs)), (Rep (V60splatrh $Rs))>; 359 def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V60splatrw $Rs))>; 360} 361let Predicates = [UseHVX,UseHVXV62] in { 362 let AddedComplexity = 30 in { 363 def: Pat<(VecI8 (splat_vector u8_0ImmPred:$V)), (V62splatib imm:$V)>; 364 def: Pat<(VecI16 (splat_vector u16_0ImmPred:$V)), (V62splatih imm:$V)>; 365 def: Pat<(VecI32 (splat_vector anyimm:$V)), (V62splatiw imm:$V)>; 366 def: Pat<(VecPI8 (splat_vector u8_0ImmPred:$V)), 367 (Rep (V62splatib imm:$V))>; 368 def: Pat<(VecPI16 (splat_vector u16_0ImmPred:$V)), 369 (Rep (V62splatih imm:$V))>; 370 def: Pat<(VecPI32 (splat_vector anyimm:$V)), 371 (Rep (V62splatiw imm:$V))>; 372 } 373 let AddedComplexity = 20 in { 374 def: Pat<(VecI8 (splat_vector I32:$Rs)), (V62splatrb $Rs)>; 375 def: Pat<(VecI16 (splat_vector I32:$Rs)), (V62splatrh $Rs)>; 376 def: Pat<(VecI32 (splat_vector I32:$Rs)), (V62splatrw $Rs)>; 377 def: Pat<(VecPI8 (splat_vector I32:$Rs)), (Rep (V62splatrb $Rs))>; 378 def: Pat<(VecPI16 (splat_vector I32:$Rs)), (Rep (V62splatrh $Rs))>; 379 def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V62splatrw $Rs))>; 380 } 381} 382let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { 383 let AddedComplexity = 30 in { 384 def: Pat<(VecF16 (splat_vector u16_0ImmPred:$V)), (V62splatih imm:$V)>; 385 def: Pat<(VecF32 (splat_vector anyint:$V)), (V62splatiw imm:$V)>; 386 def: Pat<(VecF32 (splat_vector f32ImmPred:$V)), (V62splatiw (ftoi $V))>; 387 } 388 let AddedComplexity = 20 in { 389 def: Pat<(VecF16 (splat_vector I32:$Rs)), (V62splatrh $Rs)>; 390 def: Pat<(VecF32 (splat_vector I32:$Rs)), (V62splatrw $Rs)>; 391 def: Pat<(VecF32 (splat_vector F32:$Rs)), (V62splatrw $Rs)>; 392 } 393} 394 395class Vneg1<ValueType VecTy> 396 : PatFrag<(ops), (VecTy (splat_vector (i32 -1)))>; 397 398class Vnot<ValueType VecTy> 399 : PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>; 400 401let Predicates = [UseHVX] in { 402 let AddedComplexity = 200 in { 403 def: Pat<(Vnot<VecI8> HVI8:$Vs), (V6_vnot HvxVR:$Vs)>; 404 def: Pat<(Vnot<VecI16> HVI16:$Vs), (V6_vnot HvxVR:$Vs)>; 405 def: Pat<(Vnot<VecI32> HVI32:$Vs), (V6_vnot HvxVR:$Vs)>; 406 } 407 408 def: OpR_RR_pat<V6_vaddb, Add, VecI8, HVI8>; 409 def: OpR_RR_pat<V6_vaddh, Add, VecI16, HVI16>; 410 def: OpR_RR_pat<V6_vaddw, Add, VecI32, HVI32>; 411 def: OpR_RR_pat<V6_vaddb_dv, Add, VecPI8, HWI8>; 412 def: OpR_RR_pat<V6_vaddh_dv, Add, VecPI16, HWI16>; 413 def: OpR_RR_pat<V6_vaddw_dv, Add, VecPI32, HWI32>; 414 def: OpR_RR_pat<V6_vsubb, Sub, VecI8, HVI8>; 415 def: OpR_RR_pat<V6_vsubh, Sub, VecI16, HVI16>; 416 def: OpR_RR_pat<V6_vsubw, Sub, VecI32, HVI32>; 417 def: OpR_RR_pat<V6_vsubb_dv, Sub, VecPI8, HWI8>; 418 def: OpR_RR_pat<V6_vsubh_dv, Sub, VecPI16, HWI16>; 419 def: OpR_RR_pat<V6_vsubw_dv, Sub, VecPI32, HWI32>; 420 def: OpR_RR_pat<V6_vand, And, VecI8, HVI8>; 421 def: OpR_RR_pat<V6_vand, And, VecI16, HVI16>; 422 def: OpR_RR_pat<V6_vand, And, VecI32, HVI32>; 423 def: OpR_RR_pat<V6_vor, Or, VecI8, HVI8>; 424 def: OpR_RR_pat<V6_vor, Or, VecI16, HVI16>; 425 def: OpR_RR_pat<V6_vor, Or, VecI32, HVI32>; 426 def: OpR_RR_pat<V6_vxor, Xor, VecI8, HVI8>; 427 def: OpR_RR_pat<V6_vxor, Xor, VecI16, HVI16>; 428 def: OpR_RR_pat<V6_vxor, Xor, VecI32, HVI32>; 429 430 def: OpR_RR_pat<V6_vminb, Smin, VecI8, HVI8>; 431 def: OpR_RR_pat<V6_vmaxb, Smax, VecI8, HVI8>; 432 def: OpR_RR_pat<V6_vminub, Umin, VecI8, HVI8>; 433 def: OpR_RR_pat<V6_vmaxub, Umax, VecI8, HVI8>; 434 def: OpR_RR_pat<V6_vminh, Smin, VecI16, HVI16>; 435 def: OpR_RR_pat<V6_vmaxh, Smax, VecI16, HVI16>; 436 def: OpR_RR_pat<V6_vminuh, Umin, VecI16, HVI16>; 437 def: OpR_RR_pat<V6_vmaxuh, Umax, VecI16, HVI16>; 438 def: OpR_RR_pat<V6_vminw, Smin, VecI32, HVI32>; 439 def: OpR_RR_pat<V6_vmaxw, Smax, VecI32, HVI32>; 440 441 def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt), 442 (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; 443 def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt), 444 (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; 445 def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt), 446 (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; 447 448 def: Pat<(vselect (qnot HQ8:$Qu), HVI8:$Vs, HVI8:$Vt), 449 (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; 450 def: Pat<(vselect (qnot HQ16:$Qu), HVI16:$Vs, HVI16:$Vt), 451 (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; 452 def: Pat<(vselect (qnot HQ32:$Qu), HVI32:$Vs, HVI32:$Vt), 453 (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; 454} 455 456// For now, we always deal with vector floating point in SF mode. 457class OpR_RR_pat_conv<InstHexagon MI, PatFrag Op, ValueType ResType, 458 PatFrag RsPred, PatFrag RtPred = RsPred> 459 : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), 460 (V6_vconv_sf_qf32 (VecF32 (MI RsPred:$Rs, RtPred:$Rt)))>; 461 462class OpR_RR_pat_conv_hf<InstHexagon MI, PatFrag Op, ValueType ResType, 463 PatFrag RsPred, PatFrag RtPred = RsPred> 464 : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), 465 (V6_vconv_hf_qf16 (VecF16 (MI RsPred:$Rs, RtPred:$Rt)))>; 466 467let Predicates = [UseHVXV68, UseHVXQFloat] in { 468 def: OpR_RR_pat_conv_hf<V6_vsub_hf, pf2<fsub>, VecF16, HVF16>; 469 def: OpR_RR_pat_conv_hf<V6_vadd_hf, pf2<fadd>, VecF16, HVF16>; 470 def: OpR_RR_pat_conv_hf<V6_vmpy_qf16_hf, pf2<fmul>, VecF16, HVF16>; 471 def: OpR_RR_pat_conv<V6_vsub_sf, pf2<fsub>, VecF32, HVF32>; 472 def: OpR_RR_pat_conv<V6_vadd_sf, pf2<fadd>, VecF32, HVF32>; 473 def: OpR_RR_pat_conv<V6_vmpy_qf32_sf, pf2<fmul>, VecF32, HVF32>; 474 475 // For now we assume that the fp32 register is always coming in as IEEE float 476 // since the qfloat arithmetic instructions above always generate the 477 // accompanying conversions as part of their pattern 478 def: Pat<(VecF16 (pf1<fpround> HWF32:$Vuu)), 479 (V6_vdealh (V6_vconv_hf_qf32 480 (VecPF32 (Combinev (V6_vadd_sf (HiVec HvxWR:$Vuu), (V6_vd0)), 481 (V6_vadd_sf (LoVec HvxWR:$Vuu), (V6_vd0)) 482 ))))>; 483 // fpextend for QFloat is handled manually in HexagonISelLoweringHVX.cpp. 484} 485 486// HVX IEEE arithmetic Instructions 487let Predicates = [UseHVXV68, UseHVXIEEEFP] in { 488 def: Pat<(fadd HVF16:$Rs, HVF16:$Rt), 489 (V6_vadd_hf_hf HVF16:$Rs, HVF16:$Rt)>; 490 def: Pat<(fadd HVF32:$Rs, HVF32:$Rt), 491 (V6_vadd_sf_sf HVF32:$Rs, HVF32:$Rt)>; 492 def: Pat<(fsub HVF16:$Rs, HVF16:$Rt), 493 (V6_vsub_hf_hf HVF16:$Rs, HVF16:$Rt)>; 494 def: Pat<(fsub HVF32:$Rs, HVF32:$Rt), 495 (V6_vsub_sf_sf HVF32:$Rs, HVF32:$Rt)>; 496 def: Pat<(fmul HVF16:$Rs, HVF16:$Rt), 497 (V6_vmpy_hf_hf HVF16:$Rs, HVF16:$Rt)>; 498 def: Pat<(fmul HVF32:$Rs, HVF32:$Rt), 499 (V6_vmpy_sf_sf HVF32:$Rs, HVF32:$Rt)>; 500 501 def: Pat<(VecF16 (pf1<fpround> HWF32:$Vuu)), 502 (V6_vdealh (V6_vcvt_hf_sf (HiVec HvxWR:$Vuu), (LoVec HvxWR:$Vuu)))>; 503 def: Pat<(VecPF32 (pf1<fpextend> HVF16:$Vu)), 504 (V6_vcvt_sf_hf (V6_vshuffh HvxVR:$Vu))>; 505 506 def: OpR_R_pat<V6_vcvt_h_hf, Fptosi, VecI16, HVF16>; 507 def: OpR_R_pat<V6_vcvt_uh_hf, Fptoui, VecI16, HVF16>; 508 def: OpR_R_pat<V6_vcvt_hf_h, Sitofp, VecF16, HVI16>; 509 def: OpR_R_pat<V6_vcvt_hf_uh, Uitofp, VecF16, HVI16>; 510 511 def: Pat<(VecI8 (Fptosi HWF16:$Vu)), 512 (V6_vcvt_b_hf (HiVec $Vu), (LoVec $Vu))>; 513 def: Pat<(VecI8 (Fptoui HWF16:$Vu)), 514 (V6_vcvt_ub_hf (HiVec $Vu), (LoVec $Vu))>; 515 def: Pat<(VecPF16 (Sitofp HVI8:$Vu)), (V6_vcvt_hf_b HvxVR:$Vu)>; 516 def: Pat<(VecPF16 (Uitofp HVI8:$Vu)), (V6_vcvt_hf_ub HvxVR:$Vu)>; 517} 518 519let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { 520 def: Pat<(vselect HQ16:$Qu, HVF16:$Vs, HVF16:$Vt), 521 (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; 522 def: Pat<(vselect (qnot HQ16:$Qu), HVF16:$Vs, HVF16:$Vt), 523 (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; 524 525 def: Pat<(vselect HQ32:$Qu, HVF32:$Vs, HVF32:$Vt), 526 (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; 527 def: Pat<(vselect (qnot HQ32:$Qu), HVF32:$Vs, HVF32:$Vt), 528 (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; 529} 530 531let Predicates = [UseHVXV68, UseHVX128B, UseHVXQFloat] in { 532 let AddedComplexity = 220 in { 533 defm: MinMax_pats<V6_vmin_hf, V6_vmax_hf, vselect, setgt, VecQ16, HVF16>; 534 defm: MinMax_pats<V6_vmin_hf, V6_vmax_hf, vselect, setogt, VecQ16, HVF16>; 535 defm: MinMax_pats<V6_vmin_sf, V6_vmax_sf, vselect, setgt, VecQ32, HVF32>; 536 defm: MinMax_pats<V6_vmin_sf, V6_vmax_sf, vselect, setogt, VecQ32, HVF32>; 537 } 538 def: OpR_RR_pat<V6_vmin_hf, pf2<fminnum>, VecF16, HVF16>; 539 def: OpR_RR_pat<V6_vmax_hf, pf2<fmaxnum>, VecF16, HVF16>; 540 def: OpR_RR_pat<V6_vmin_sf, pf2<fminnum>, VecF32, HVF32>; 541 def: OpR_RR_pat<V6_vmax_sf, pf2<fmaxnum>, VecF32, HVF32>; 542} 543 544let Predicates = [UseHVXV68, UseHVX128B, UseHVXIEEEFP] in { 545 let AddedComplexity = 220 in { 546 defm: MinMax_pats<V6_vfmin_hf, V6_vfmax_hf, vselect, setgt, VecQ16, HVF16>; 547 defm: MinMax_pats<V6_vfmin_hf, V6_vfmax_hf, vselect, setogt, VecQ16, HVF16>; 548 defm: MinMax_pats<V6_vfmin_sf, V6_vfmax_sf, vselect, setgt, VecQ32, HVF32>; 549 defm: MinMax_pats<V6_vfmin_sf, V6_vfmax_sf, vselect, setogt, VecQ32, HVF32>; 550 } 551 def: OpR_RR_pat<V6_vfmin_hf, pf2<fminnum>, VecF16, HVF16>; 552 def: OpR_RR_pat<V6_vfmax_hf, pf2<fmaxnum>, VecF16, HVF16>; 553 def: OpR_RR_pat<V6_vfmin_sf, pf2<fminnum>, VecF32, HVF32>; 554 def: OpR_RR_pat<V6_vfmax_sf, pf2<fmaxnum>, VecF32, HVF32>; 555} 556 557let Predicates = [UseHVX] in { 558 // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), 559 // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, 560 // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). 561 def: Pat<(mul HVI8:$Vs, HVI8:$Vt), 562 (V6_vshuffeb (HiVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)), 563 (LoVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)))>; 564 def: Pat<(mul HVI16:$Vs, HVI16:$Vt), 565 (V6_vmpyih HvxVR:$Vs, HvxVR:$Vt)>; 566 def: Pat<(mul HVI32:$Vs, HVI32:$Vt), 567 (V6_vmpyiewuh_acc (V6_vmpyieoh HvxVR:$Vs, HvxVR:$Vt), 568 HvxVR:$Vs, HvxVR:$Vt)>; 569} 570 571let Predicates = [UseHVX] in { 572 def: Pat<(VecPI16 (sext HVI8:$Vs)), (VSxtb $Vs)>; 573 def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>; 574 def: Pat<(VecPI16 (zext HVI8:$Vs)), (VZxtb $Vs)>; 575 def: Pat<(VecPI32 (zext HVI16:$Vs)), (VZxth $Vs)>; 576 577 def: Pat<(VecI16 (sext_invec HVI8:$Vs)), (LoVec (VSxtb $Vs))>; 578 def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (VSxth $Vs))>; 579 def: Pat<(VecI32 (sext_invec HVI8:$Vs)), 580 (LoVec (VSxth (LoVec (VSxtb $Vs))))>; 581 def: Pat<(VecPI16 (sext_invec HWI8:$Vss)), (VSxtb (LoVec $Vss))>; 582 def: Pat<(VecPI32 (sext_invec HWI16:$Vss)), (VSxth (LoVec $Vss))>; 583 def: Pat<(VecPI32 (sext_invec HWI8:$Vss)), 584 (VSxth (LoVec (VSxtb (LoVec $Vss))))>; 585 586 def: Pat<(VecI16 (zext_invec HVI8:$Vs)), (LoVec (VZxtb $Vs))>; 587 def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (VZxth $Vs))>; 588 def: Pat<(VecI32 (zext_invec HVI8:$Vs)), 589 (LoVec (VZxth (LoVec (VZxtb $Vs))))>; 590 def: Pat<(VecPI16 (zext_invec HWI8:$Vss)), (VZxtb (LoVec $Vss))>; 591 def: Pat<(VecPI32 (zext_invec HWI16:$Vss)), (VZxth (LoVec $Vss))>; 592 def: Pat<(VecPI32 (zext_invec HWI8:$Vss)), 593 (VZxth (LoVec (VZxtb (LoVec $Vss))))>; 594 595 def: Pat<(VecI8 (trunc HWI16:$Vss)), 596 (V6_vpackeb (HiVec $Vss), (LoVec $Vss))>; 597 def: Pat<(VecI16 (trunc HWI32:$Vss)), 598 (V6_vpackeh (HiVec $Vss), (LoVec $Vss))>; 599 600 def: Pat<(VecQ8 (trunc HVI8:$Vs)), 601 (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>; 602 def: Pat<(VecQ16 (trunc HVI16:$Vs)), 603 (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>; 604 def: Pat<(VecQ32 (trunc HVI32:$Vs)), 605 (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>; 606} 607 608let Predicates = [UseHVX] in { 609 // The "source" types are not legal, and there are no parameterized 610 // definitions for them, but they are length-specific. 611 let Predicates = [UseHVX,UseHVX64B] in { 612 def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v32i8)), 613 (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>; 614 def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i8)), 615 (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>; 616 def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i16)), 617 (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>; 618 } 619 let Predicates = [UseHVX,UseHVX128B] in { 620 def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v64i8)), 621 (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>; 622 def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i8)), 623 (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>; 624 def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i16)), 625 (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>; 626 } 627 628 // Take a pair of vectors Vt:Vs and shift them towards LSB by (Rt & HwLen). 629 def: Pat<(VecI8 (valign HVI8:$Vt, HVI8:$Vs, I32:$Rt)), 630 (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>; 631 def: Pat<(VecI16 (valign HVI16:$Vt, HVI16:$Vs, I32:$Rt)), 632 (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>; 633 def: Pat<(VecI32 (valign HVI32:$Vt, HVI32:$Vs, I32:$Rt)), 634 (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>; 635 636 def: Pat<(HexagonVASL HVI8:$Vs, I32:$Rt), 637 (V6_vpackeb (V6_vaslh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt), 638 (V6_vaslh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>; 639 def: Pat<(HexagonVASR HVI8:$Vs, I32:$Rt), 640 (V6_vpackeb (V6_vasrh (HiVec (VSxtb HvxVR:$Vs)), I32:$Rt), 641 (V6_vasrh (LoVec (VSxtb HvxVR:$Vs)), I32:$Rt))>; 642 def: Pat<(HexagonVLSR HVI8:$Vs, I32:$Rt), 643 (V6_vpackeb (V6_vlsrh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt), 644 (V6_vlsrh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>; 645 646 def: Pat<(HexagonVASL HVI16:$Vs, I32:$Rt), (V6_vaslh HvxVR:$Vs, I32:$Rt)>; 647 def: Pat<(HexagonVASL HVI32:$Vs, I32:$Rt), (V6_vaslw HvxVR:$Vs, I32:$Rt)>; 648 def: Pat<(HexagonVASR HVI16:$Vs, I32:$Rt), (V6_vasrh HvxVR:$Vs, I32:$Rt)>; 649 def: Pat<(HexagonVASR HVI32:$Vs, I32:$Rt), (V6_vasrw HvxVR:$Vs, I32:$Rt)>; 650 def: Pat<(HexagonVLSR HVI16:$Vs, I32:$Rt), (V6_vlsrh HvxVR:$Vs, I32:$Rt)>; 651 def: Pat<(HexagonVLSR HVI32:$Vs, I32:$Rt), (V6_vlsrw HvxVR:$Vs, I32:$Rt)>; 652 653 def: Pat<(add HVI32:$Vx, (HexagonVASL HVI32:$Vu, I32:$Rt)), 654 (V6_vaslw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>; 655 def: Pat<(add HVI32:$Vx, (HexagonVASR HVI32:$Vu, I32:$Rt)), 656 (V6_vasrw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>; 657 658 def: Pat<(shl HVI16:$Vs, HVI16:$Vt), (V6_vaslhv HvxVR:$Vs, HvxVR:$Vt)>; 659 def: Pat<(shl HVI32:$Vs, HVI32:$Vt), (V6_vaslwv HvxVR:$Vs, HvxVR:$Vt)>; 660 def: Pat<(sra HVI16:$Vs, HVI16:$Vt), (V6_vasrhv HvxVR:$Vs, HvxVR:$Vt)>; 661 def: Pat<(sra HVI32:$Vs, HVI32:$Vt), (V6_vasrwv HvxVR:$Vs, HvxVR:$Vt)>; 662 def: Pat<(srl HVI16:$Vs, HVI16:$Vt), (V6_vlsrhv HvxVR:$Vs, HvxVR:$Vt)>; 663 def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>; 664 665 // Vpackl is a pseudo-op that is used when legalizing widened truncates. 666 // It should never be produced with a register pair in the output, but 667 // it can happen to have a pair as an input. 668 def: Pat<(VecI8 (vpackl HVI16:$Vs)), (V6_vdealb HvxVR:$Vs)>; 669 def: Pat<(VecI8 (vpackl HVI32:$Vs)), (V6_vdealb4w (IMPLICIT_DEF), HvxVR:$Vs)>; 670 def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>; 671 def: Pat<(VecI8 (vpackl HWI16:$Vs)), (V6_vpackeb (HiVec $Vs), (LoVec $Vs))>; 672 def: Pat<(VecI8 (vpackl HWI32:$Vs)), 673 (V6_vpackeb (IMPLICIT_DEF), (V6_vpackeh (HiVec $Vs), (LoVec $Vs)))>; 674 def: Pat<(VecI16 (vpackl HWI32:$Vs)), (V6_vpackeh (HiVec $Vs), (LoVec $Vs))>; 675 676 def: Pat<(VecI16 (vunpack HVI8:$Vs)), (LoVec (VSxtb $Vs))>; 677 def: Pat<(VecI32 (vunpack HVI8:$Vs)), (LoVec (VSxth (LoVec (VSxtb $Vs))))>; 678 def: Pat<(VecI32 (vunpack HVI16:$Vs)), (LoVec (VSxth $Vs))>; 679 def: Pat<(VecPI16 (vunpack HVI8:$Vs)), (VSxtb $Vs)>; 680 def: Pat<(VecPI32 (vunpack HVI8:$Vs)), (VSxth (LoVec (VSxtb $Vs)))>; 681 def: Pat<(VecPI32 (vunpack HVI32:$Vs)), (VSxth $Vs)>; 682 683 def: Pat<(VecI16 (vunpacku HVI8:$Vs)), (LoVec (VZxtb $Vs))>; 684 def: Pat<(VecI32 (vunpacku HVI8:$Vs)), (LoVec (VZxth (LoVec (VZxtb $Vs))))>; 685 def: Pat<(VecI32 (vunpacku HVI16:$Vs)), (LoVec (VZxth $Vs))>; 686 def: Pat<(VecPI16 (vunpacku HVI8:$Vs)), (VZxtb $Vs)>; 687 def: Pat<(VecPI32 (vunpacku HVI8:$Vs)), (VZxth (LoVec (VZxtb $Vs)))>; 688 def: Pat<(VecPI32 (vunpacku HVI32:$Vs)), (VZxth $Vs)>; 689 690 let Predicates = [UseHVX,UseHVXV60] in { 691 def: Pat<(VecI16 (bswap HVI16:$Vs)), 692 (V6_vdelta HvxVR:$Vs, (V60splatib (i32 0x01)))>; 693 def: Pat<(VecI32 (bswap HVI32:$Vs)), 694 (V6_vdelta HvxVR:$Vs, (V60splatib (i32 0x03)))>; 695 } 696 let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in { 697 def: Pat<(VecI16 (bswap HVI16:$Vs)), 698 (V6_vdelta HvxVR:$Vs, (V62splatib (i32 0x01)))>; 699 def: Pat<(VecI32 (bswap HVI32:$Vs)), 700 (V6_vdelta HvxVR:$Vs, (V62splatib (i32 0x03)))>; 701 } 702 703 def: Pat<(VecI8 (ctpop HVI8:$Vs)), 704 (V6_vpackeb (V6_vpopcounth (HiVec (V6_vunpackub HvxVR:$Vs))), 705 (V6_vpopcounth (LoVec (V6_vunpackub HvxVR:$Vs))))>; 706 def: Pat<(VecI16 (ctpop HVI16:$Vs)), (V6_vpopcounth HvxVR:$Vs)>; 707 def: Pat<(VecI32 (ctpop HVI32:$Vs)), 708 (V6_vaddw (LoVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))), 709 (HiVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))))>; 710 711 let Predicates = [UseHVX,UseHVXV60] in 712 def: Pat<(VecI8 (ctlz HVI8:$Vs)), 713 (V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))), 714 (V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))), 715 (V60splatib (i32 0x08)))>; 716 let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in 717 def: Pat<(VecI8 (ctlz HVI8:$Vs)), 718 (V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))), 719 (V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))), 720 (V62splatib (i32 0x08)))>; 721 722 def: Pat<(VecI16 (ctlz HVI16:$Vs)), (V6_vcl0h HvxVR:$Vs)>; 723 def: Pat<(VecI32 (ctlz HVI32:$Vs)), (V6_vcl0w HvxVR:$Vs)>; 724} 725 726class HvxSel_pat<InstHexagon MI, PatFrag RegPred> 727 : Pat<(select I1:$Pu, RegPred:$Vs, RegPred:$Vt), 728 (MI I1:$Pu, RegPred:$Vs, RegPred:$Vt)>; 729 730let Predicates = [UseHVX] in { 731 def: HvxSel_pat<PS_vselect, HVI8>; 732 def: HvxSel_pat<PS_vselect, HVI16>; 733 def: HvxSel_pat<PS_vselect, HVI32>; 734 def: HvxSel_pat<PS_wselect, HWI8>; 735 def: HvxSel_pat<PS_wselect, HWI16>; 736 def: HvxSel_pat<PS_wselect, HWI32>; 737} 738 739def V2Q: OutPatFrag<(ops node:$Vs), (V6_vandvrt $Vs, (A2_tfrsi -1))>; 740 741let Predicates = [UseHVX] in { 742 def: Pat<(select I1:$Pu, VecQ8:$Qs, VecQ8:$Qt), 743 (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>; 744 def: Pat<(select I1:$Pu, VecQ16:$Qs, VecQ16:$Qt), 745 (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>; 746 def: Pat<(select I1:$Pu, VecQ32:$Qs, VecQ32:$Qt), 747 (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>; 748} 749 750let Predicates = [UseHVX] in { 751 def: Pat<(VecQ8 (qtrue)), (PS_qtrue)>; 752 def: Pat<(VecQ16 (qtrue)), (PS_qtrue)>; 753 def: Pat<(VecQ32 (qtrue)), (PS_qtrue)>; 754 def: Pat<(VecQ8 (qfalse)), (PS_qfalse)>; 755 def: Pat<(VecQ16 (qfalse)), (PS_qfalse)>; 756 def: Pat<(VecQ32 (qfalse)), (PS_qfalse)>; 757 758 def: Pat<(vnot HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>; 759 def: Pat<(vnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>; 760 def: Pat<(vnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>; 761 def: Pat<(qnot HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>; 762 def: Pat<(qnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>; 763 def: Pat<(qnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>; 764 765 def: OpR_RR_pat<V6_pred_and, And, VecQ8, HQ8>; 766 def: OpR_RR_pat<V6_pred_and, And, VecQ16, HQ16>; 767 def: OpR_RR_pat<V6_pred_and, And, VecQ32, HQ32>; 768 def: OpR_RR_pat<V6_pred_or, Or, VecQ8, HQ8>; 769 def: OpR_RR_pat<V6_pred_or, Or, VecQ16, HQ16>; 770 def: OpR_RR_pat<V6_pred_or, Or, VecQ32, HQ32>; 771 def: OpR_RR_pat<V6_pred_xor, Xor, VecQ8, HQ8>; 772 def: OpR_RR_pat<V6_pred_xor, Xor, VecQ16, HQ16>; 773 def: OpR_RR_pat<V6_pred_xor, Xor, VecQ32, HQ32>; 774 775 def: OpR_RR_pat<V6_pred_and_n, VNot2<And, qnot>, VecQ8, HQ8>; 776 def: OpR_RR_pat<V6_pred_and_n, VNot2<And, qnot>, VecQ16, HQ16>; 777 def: OpR_RR_pat<V6_pred_and_n, VNot2<And, qnot>, VecQ32, HQ32>; 778 def: OpR_RR_pat<V6_pred_or_n, VNot2<Or, qnot>, VecQ8, HQ8>; 779 def: OpR_RR_pat<V6_pred_or_n, VNot2<Or, qnot>, VecQ16, HQ16>; 780 def: OpR_RR_pat<V6_pred_or_n, VNot2<Or, qnot>, VecQ32, HQ32>; 781 782 def: OpR_RR_pat<V6_veqb, seteq, VecQ8, HVI8>; 783 def: OpR_RR_pat<V6_veqh, seteq, VecQ16, HVI16>; 784 def: OpR_RR_pat<V6_veqw, seteq, VecQ32, HVI32>; 785 def: OpR_RR_pat<V6_vgtb, setgt, VecQ8, HVI8>; 786 def: OpR_RR_pat<V6_vgth, setgt, VecQ16, HVI16>; 787 def: OpR_RR_pat<V6_vgtw, setgt, VecQ32, HVI32>; 788 def: OpR_RR_pat<V6_vgtub, setugt, VecQ8, HVI8>; 789 def: OpR_RR_pat<V6_vgtuh, setugt, VecQ16, HVI16>; 790 def: OpR_RR_pat<V6_vgtuw, setugt, VecQ32, HVI32>; 791 792 def: AccRRR_pat<V6_veqb_and, And, seteq, HQ8, HVI8, HVI8>; 793 def: AccRRR_pat<V6_veqb_or, Or, seteq, HQ8, HVI8, HVI8>; 794 def: AccRRR_pat<V6_veqb_xor, Xor, seteq, HQ8, HVI8, HVI8>; 795 def: AccRRR_pat<V6_veqh_and, And, seteq, HQ16, HVI16, HVI16>; 796 def: AccRRR_pat<V6_veqh_or, Or, seteq, HQ16, HVI16, HVI16>; 797 def: AccRRR_pat<V6_veqh_xor, Xor, seteq, HQ16, HVI16, HVI16>; 798 def: AccRRR_pat<V6_veqw_and, And, seteq, HQ32, HVI32, HVI32>; 799 def: AccRRR_pat<V6_veqw_or, Or, seteq, HQ32, HVI32, HVI32>; 800 def: AccRRR_pat<V6_veqw_xor, Xor, seteq, HQ32, HVI32, HVI32>; 801 802 def: AccRRR_pat<V6_vgtb_and, And, setgt, HQ8, HVI8, HVI8>; 803 def: AccRRR_pat<V6_vgtb_or, Or, setgt, HQ8, HVI8, HVI8>; 804 def: AccRRR_pat<V6_vgtb_xor, Xor, setgt, HQ8, HVI8, HVI8>; 805 def: AccRRR_pat<V6_vgth_and, And, setgt, HQ16, HVI16, HVI16>; 806 def: AccRRR_pat<V6_vgth_or, Or, setgt, HQ16, HVI16, HVI16>; 807 def: AccRRR_pat<V6_vgth_xor, Xor, setgt, HQ16, HVI16, HVI16>; 808 def: AccRRR_pat<V6_vgtw_and, And, setgt, HQ32, HVI32, HVI32>; 809 def: AccRRR_pat<V6_vgtw_or, Or, setgt, HQ32, HVI32, HVI32>; 810 def: AccRRR_pat<V6_vgtw_xor, Xor, setgt, HQ32, HVI32, HVI32>; 811 812 def: AccRRR_pat<V6_vgtub_and, And, setugt, HQ8, HVI8, HVI8>; 813 def: AccRRR_pat<V6_vgtub_or, Or, setugt, HQ8, HVI8, HVI8>; 814 def: AccRRR_pat<V6_vgtub_xor, Xor, setugt, HQ8, HVI8, HVI8>; 815 def: AccRRR_pat<V6_vgtuh_and, And, setugt, HQ16, HVI16, HVI16>; 816 def: AccRRR_pat<V6_vgtuh_or, Or, setugt, HQ16, HVI16, HVI16>; 817 def: AccRRR_pat<V6_vgtuh_xor, Xor, setugt, HQ16, HVI16, HVI16>; 818 def: AccRRR_pat<V6_vgtuw_and, And, setugt, HQ32, HVI32, HVI32>; 819 def: AccRRR_pat<V6_vgtuw_or, Or, setugt, HQ32, HVI32, HVI32>; 820 def: AccRRR_pat<V6_vgtuw_xor, Xor, setugt, HQ32, HVI32, HVI32>; 821} 822 823let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { 824 def: OpR_RR_pat<V6_veqh, seteq, VecQ16, HVF16>; 825 def: OpR_RR_pat<V6_veqh, setoeq, VecQ16, HVF16>; 826 def: OpR_RR_pat<V6_veqh, setueq, VecQ16, HVF16>; 827 def: OpR_RR_pat<V6_vgthf, setgt, VecQ16, HVF16>; 828 def: OpR_RR_pat<V6_vgthf, setogt, VecQ16, HVF16>; 829 def: OpR_RR_pat<V6_vgthf, setugt, VecQ16, HVF16>; 830 831 def: OpR_RR_pat<V6_veqw, seteq, VecQ32, HVF32>; 832 def: OpR_RR_pat<V6_veqw, setoeq, VecQ32, HVF32>; 833 def: OpR_RR_pat<V6_veqw, setueq, VecQ32, HVF32>; 834 def: OpR_RR_pat<V6_vgtsf, setgt, VecQ32, HVF32>; 835 def: OpR_RR_pat<V6_vgtsf, setogt, VecQ32, HVF32>; 836 def: OpR_RR_pat<V6_vgtsf, setugt, VecQ32, HVF32>; 837 838 def: AccRRR_pat<V6_veqh_and, And, seteq, HQ16, HVF16, HVF16>; 839 def: AccRRR_pat<V6_veqh_or, Or, seteq, HQ16, HVF16, HVF16>; 840 def: AccRRR_pat<V6_veqh_xor, Xor, seteq, HQ16, HVF16, HVF16>; 841 def: AccRRR_pat<V6_veqh_and, And, setoeq, HQ16, HVF16, HVF16>; 842 def: AccRRR_pat<V6_veqh_or, Or, setoeq, HQ16, HVF16, HVF16>; 843 def: AccRRR_pat<V6_veqh_xor, Xor, setoeq, HQ16, HVF16, HVF16>; 844 def: AccRRR_pat<V6_veqh_and, And, setueq, HQ16, HVF16, HVF16>; 845 def: AccRRR_pat<V6_veqh_or, Or, setueq, HQ16, HVF16, HVF16>; 846 def: AccRRR_pat<V6_veqh_xor, Xor, setueq, HQ16, HVF16, HVF16>; 847 def: AccRRR_pat<V6_vgthf_and, And, setgt, HQ16, HVF16, HVF16>; 848 def: AccRRR_pat<V6_vgthf_or, Or, setgt, HQ16, HVF16, HVF16>; 849 def: AccRRR_pat<V6_vgthf_xor, Xor, setgt, HQ16, HVF16, HVF16>; 850 def: AccRRR_pat<V6_vgthf_and, And, setogt, HQ16, HVF16, HVF16>; 851 def: AccRRR_pat<V6_vgthf_or, Or, setogt, HQ16, HVF16, HVF16>; 852 def: AccRRR_pat<V6_vgthf_xor, Xor, setogt, HQ16, HVF16, HVF16>; 853 def: AccRRR_pat<V6_vgthf_and, And, setugt, HQ16, HVF16, HVF16>; 854 def: AccRRR_pat<V6_vgthf_or, Or, setugt, HQ16, HVF16, HVF16>; 855 def: AccRRR_pat<V6_vgthf_xor, Xor, setugt, HQ16, HVF16, HVF16>; 856 857 def: AccRRR_pat<V6_veqw_and, And, seteq, HQ32, HVF32, HVF32>; 858 def: AccRRR_pat<V6_veqw_or, Or, seteq, HQ32, HVF32, HVF32>; 859 def: AccRRR_pat<V6_veqw_xor, Xor, seteq, HQ32, HVF32, HVF32>; 860 def: AccRRR_pat<V6_veqw_and, And, setoeq, HQ32, HVF32, HVF32>; 861 def: AccRRR_pat<V6_veqw_or, Or, setoeq, HQ32, HVF32, HVF32>; 862 def: AccRRR_pat<V6_veqw_xor, Xor, setoeq, HQ32, HVF32, HVF32>; 863 def: AccRRR_pat<V6_veqw_and, And, setueq, HQ32, HVF32, HVF32>; 864 def: AccRRR_pat<V6_veqw_or, Or, setueq, HQ32, HVF32, HVF32>; 865 def: AccRRR_pat<V6_veqw_xor, Xor, setueq, HQ32, HVF32, HVF32>; 866 def: AccRRR_pat<V6_vgtsf_and, And, setgt, HQ32, HVF32, HVF32>; 867 def: AccRRR_pat<V6_vgtsf_or, Or, setgt, HQ32, HVF32, HVF32>; 868 def: AccRRR_pat<V6_vgtsf_xor, Xor, setgt, HQ32, HVF32, HVF32>; 869 def: AccRRR_pat<V6_vgtsf_and, And, setogt, HQ32, HVF32, HVF32>; 870 def: AccRRR_pat<V6_vgtsf_or, Or, setogt, HQ32, HVF32, HVF32>; 871 def: AccRRR_pat<V6_vgtsf_xor, Xor, setogt, HQ32, HVF32, HVF32>; 872 def: AccRRR_pat<V6_vgtsf_and, And, setugt, HQ32, HVF32, HVF32>; 873 def: AccRRR_pat<V6_vgtsf_or, Or, setugt, HQ32, HVF32, HVF32>; 874 def: AccRRR_pat<V6_vgtsf_xor, Xor, setugt, HQ32, HVF32, HVF32>; 875 876 def: Pat<(VecQ16 (setone HVF16:$Vt, HVF16:$Vu)), 877 (V6_pred_not (V6_veqh HvxVR:$Vt, HvxVR:$Vu))>; 878 879 def: Pat<(VecQ32 (setone HVF32:$Vt, HVF32:$Vu)), 880 (V6_pred_not (V6_veqw HvxVR:$Vt, HvxVR:$Vu))>; 881} 882