1//===- HexagonPatternsHVX.td - Selection Patterns for HVX --*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def HQ8: PatLeaf<(VecQ8 HvxQR:$R)>; 10def HQ16: PatLeaf<(VecQ16 HvxQR:$R)>; 11def HQ32: PatLeaf<(VecQ32 HvxQR:$R)>; 12 13def HVI8: PatLeaf<(VecI8 HvxVR:$R)>; 14def HVI16: PatLeaf<(VecI16 HvxVR:$R)>; 15def HVI32: PatLeaf<(VecI32 HvxVR:$R)>; 16def HVF16: PatLeaf<(VecF16 HvxVR:$R)>; 17def HVF32: PatLeaf<(VecF32 HvxVR:$R)>; 18 19def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; 20def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; 21def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; 22def HWF16: PatLeaf<(VecPF16 HvxWR:$R)>; 23def HWF32: PatLeaf<(VecPF32 HvxWR:$R)>; 24 25def SDTVecUnaryOp: 26 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 27 28def SDTVecBinOp: 29 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>; 30 31def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2, 32 [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>; 33def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>; 34 35def SDTHexagonVINSERTW0: SDTypeProfile<1, 2, 36 [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; 37def HexagonVINSERTW0: SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>; 38 39def HwLen2: SDNodeXForm<imm, [{ 40 const auto &ST = CurDAG->getSubtarget<HexagonSubtarget>(); 41 return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32); 42}]>; 43 44def Q2V: OutPatFrag<(ops node:$Qs), (V6_vandqrt $Qs, (ToI32 -1))>; 45 46def Combinev: OutPatFrag<(ops node:$Vs, node:$Vt), 47 (REG_SEQUENCE HvxWR, $Vs, vsub_hi, $Vt, vsub_lo)>; 48 49def Combineq: OutPatFrag<(ops node:$Qs, node:$Qt), 50 (V6_vandvrt 51 (V6_vor 52 (V6_vror (V6_vpackeb (V6_vd0), (Q2V $Qs)), 53 (ToI32 (HwLen2 (i32 0)))), // Half the vector length 54 (V6_vpackeb (V6_vd0), (Q2V $Qt))), 55 (ToI32 -1))>; 56 57def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>; 58def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>; 59 60def HexagonQCAT: SDNode<"HexagonISD::QCAT", SDTVecBinOp>; 61def HexagonQTRUE: SDNode<"HexagonISD::QTRUE", SDTVecLeaf>; 62def HexagonQFALSE: SDNode<"HexagonISD::QFALSE", SDTVecLeaf>; 63 64def vzero: PatFrags<(ops), [(splat_vector (i32 0)), (splat_vector (f32zero))]>; 65def qtrue: PatFrag<(ops), (HexagonQTRUE)>; 66def qfalse: PatFrag<(ops), (HexagonQFALSE)>; 67def qcat: PatFrag<(ops node:$Qs, node:$Qt), 68 (HexagonQCAT node:$Qs, node:$Qt)>; 69 70def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>; 71 72def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb $Vs)>; 73def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>; 74def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>; 75def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>; 76 77class VSubi<InstHexagon VSub, InstHexagon VSplati>: 78 OutPatFrag<(ops node:$Imm, node:$Vs), (VSub (VSplati (i32 $Imm)), $Vs)>; 79 80def VSubib: VSubi<V6_vsubb, PS_vsplatib>; 81def VSubih: VSubi<V6_vsubh, PS_vsplatih>; 82def VSubiw: VSubi<V6_vsubw, PS_vsplatiw>; 83 84def VNegb: OutPatFrag<(ops node:$Vs), (VSubib 0, $Vs)>; 85def VNegh: OutPatFrag<(ops node:$Vs), (VSubih 0, $Vs)>; 86def VNegw: OutPatFrag<(ops node:$Vs), (VSubiw 0, $Vs)>; 87 88class pf3<SDNode Op>: PatFrag<(ops node:$a, node:$b, node:$c), 89 (Op node:$a, node:$b, node:$c)>; 90 91def Mfshl: pf3<HexagonMFSHL>; 92def Mfshr: pf3<HexagonMFSHR>; 93 94def IsVecOff : PatLeaf<(i32 imm), [{ 95 int32_t V = N->getSExtValue(); 96 int32_t VecSize = HRI->getSpillSize(Hexagon::HvxVRRegClass); 97 assert(isPowerOf2_32(VecSize)); 98 if ((uint32_t(V) & (uint32_t(VecSize)-1)) != 0) 99 return false; 100 int32_t L = Log2_32(VecSize); 101 return isInt<4>(V >> L); 102}]>; 103 104 105def alignedload: PatFrag<(ops node:$a), (load $a), [{ 106 return isAlignedMemNode(cast<MemSDNode>(N)); 107}]>; 108 109def unalignedload: PatFrag<(ops node:$a), (load $a), [{ 110 return !isAlignedMemNode(cast<MemSDNode>(N)); 111}]>; 112 113def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ 114 return isAlignedMemNode(cast<MemSDNode>(N)); 115}]>; 116 117def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ 118 return !isAlignedMemNode(cast<MemSDNode>(N)); 119}]>; 120 121 122// HVX loads 123 124multiclass HvxLdfi_pat<InstHexagon MI, PatFrag Load, ValueType ResType, 125 PatFrag ImmPred> { 126 def: Pat<(ResType (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), 127 (MI AddrFI:$fi, imm:$Off)>; 128 def: Pat<(ResType (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))), 129 (MI AddrFI:$fi, imm:$Off)>; 130 def: Pat<(ResType (Load AddrFI:$fi)), (ResType (MI AddrFI:$fi, 0))>; 131} 132 133multiclass HvxLdgi_pat<InstHexagon MI, PatFrag Load, ValueType ResType, 134 PatFrag ImmPred> { 135 def: Pat<(ResType (Load (add I32:$Rt, ImmPred:$Off))), 136 (MI I32:$Rt, imm:$Off)>; 137 def: Pat<(ResType (Load I32:$Rt)), 138 (MI I32:$Rt, 0)>; 139} 140 141multiclass HvxLdc_pat<InstHexagon MI, PatFrag Load, ValueType ResType> { 142 // The HVX selection code for shuffles can generate vector constants. 143 // Calling "Select" on the resulting loads from CP fails without these 144 // patterns. 145 def: Pat<(ResType (Load (HexagonCP tconstpool:$Addr))), 146 (MI (ToI32 imm:$Addr), 0)>; 147 def: Pat<(ResType (Load (HexagonAtPcrel tconstpool:$Addr))), 148 (MI (C4_addipc imm:$Addr), 0)>; 149} 150 151multiclass HvxLd_pat<InstHexagon MI, PatFrag Load, ValueType ResType, 152 PatFrag ImmPred> { 153 defm: HvxLdfi_pat<MI, Load, ResType, ImmPred>; 154 defm: HvxLdgi_pat<MI, Load, ResType, ImmPred>; 155 defm: HvxLdc_pat <MI, Load, ResType>; 156} 157 158// Aligned loads: everything, plus loads with valignaddr node. 159multiclass HvxLda_pat<InstHexagon MI, PatFrag Load, ValueType ResType, 160 PatFrag ImmPred> { 161 let AddedComplexity = 50 in { 162 def: Pat<(ResType (Load (valignaddr I32:$Rt))), 163 (MI I32:$Rt, 0)>; 164 def: Pat<(ResType (Load (add (valignaddr I32:$Rt), ImmPred:$Off))), 165 (MI I32:$Rt, imm:$Off)>; 166 } 167 defm: HvxLd_pat<MI, Load, ResType, ImmPred>; 168} 169 170let Predicates = [UseHVX] in { 171 // alignedload will match a non-temporal load as well, so try non-temporal 172 // first. 173 defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI8, IsVecOff>; 174 defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI16, IsVecOff>; 175 defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI32, IsVecOff>; 176 defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI8, IsVecOff>; 177 defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI16, IsVecOff>; 178 defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI32, IsVecOff>; 179 defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI8, IsVecOff>; 180 defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI16, IsVecOff>; 181 defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI32, IsVecOff>; 182} 183 184let Predicates = [UseHVXV68] in { 185 defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF16, IsVecOff>; 186 defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF32, IsVecOff>; 187 defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecF16, IsVecOff>; 188 defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecF32, IsVecOff>; 189 defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecF16, IsVecOff>; 190 defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecF32, IsVecOff>; 191} 192 193// HVX stores 194 195multiclass HvxStfi_pat<InstHexagon MI, PatFrag Store, PatFrag Value, 196 PatFrag ImmPred> { 197 def: Pat<(Store Value:$Vs, (add (i32 AddrFI:$fi), ImmPred:$Off)), 198 (MI AddrFI:$fi, imm:$Off, Value:$Vs)>; 199 def: Pat<(Store Value:$Vs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)), 200 (MI AddrFI:$fi, imm:$Off, Value:$Vs)>; 201 def: Pat<(Store Value:$Vs, AddrFI:$fi), 202 (MI AddrFI:$fi, 0, Value:$Vs)>; 203} 204 205multiclass HvxStgi_pat<InstHexagon MI, PatFrag Store, PatFrag Value, 206 PatFrag ImmPred> { 207 def: Pat<(Store Value:$Vs, (add I32:$Rt, ImmPred:$Off)), 208 (MI I32:$Rt, imm:$Off, Value:$Vs)>; 209 def: Pat<(Store Value:$Vs, (IsOrAdd I32:$Rt, ImmPred:$Off)), 210 (MI I32:$Rt, imm:$Off, Value:$Vs)>; 211 def: Pat<(Store Value:$Vs, I32:$Rt), 212 (MI I32:$Rt, 0, Value:$Vs)>; 213} 214 215multiclass HvxSt_pat<InstHexagon MI, PatFrag Store, PatFrag Value, 216 PatFrag ImmPred> { 217 defm: HvxStfi_pat<MI, Store, Value, ImmPred>; 218 defm: HvxStgi_pat<MI, Store, Value, ImmPred>; 219} 220 221let Predicates = [UseHVX] in { 222 // alignedstore will match a non-temporal store as well, so try non-temporal 223 // first. 224 defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVI8, IsVecOff>; 225 defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVI16, IsVecOff>; 226 defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVI32, IsVecOff>; 227 defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVI8, IsVecOff>; 228 defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVI16, IsVecOff>; 229 defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVI32, IsVecOff>; 230 defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVI8, IsVecOff>; 231 defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVI16, IsVecOff>; 232 defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVI32, IsVecOff>; 233} 234 235let Predicates = [UseHVXV68] in { 236 defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVF16, IsVecOff>; 237 defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVF32, IsVecOff>; 238 defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVF16, IsVecOff>; 239 defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVF32, IsVecOff>; 240 defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVF16, IsVecOff>; 241 defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVF32, IsVecOff>; 242} 243 244// Bitcasts between same-size vector types are no-ops, except for the 245// actual type change. 246let Predicates = [UseHVX] in { 247 defm: NopCast_pat<VecI8, VecI16, HvxVR>; 248 defm: NopCast_pat<VecI8, VecI32, HvxVR>; 249 defm: NopCast_pat<VecI16, VecI32, HvxVR>; 250 251 defm: NopCast_pat<VecPI8, VecPI16, HvxWR>; 252 defm: NopCast_pat<VecPI8, VecPI32, HvxWR>; 253 defm: NopCast_pat<VecPI16, VecPI32, HvxWR>; 254} 255 256let Predicates = [UseHVX, UseHVXFloatingPoint] in { 257 defm: NopCast_pat<VecI8, VecF16, HvxVR>; 258 defm: NopCast_pat<VecI8, VecF32, HvxVR>; 259 defm: NopCast_pat<VecI16, VecF16, HvxVR>; 260 defm: NopCast_pat<VecI16, VecF32, HvxVR>; 261 defm: NopCast_pat<VecI32, VecF16, HvxVR>; 262 defm: NopCast_pat<VecI32, VecF32, HvxVR>; 263 defm: NopCast_pat<VecF16, VecF32, HvxVR>; 264 265 defm: NopCast_pat<VecPI8, VecPF16, HvxWR>; 266 defm: NopCast_pat<VecPI8, VecPF32, HvxWR>; 267 defm: NopCast_pat<VecPI16, VecPF16, HvxWR>; 268 defm: NopCast_pat<VecPI16, VecPF32, HvxWR>; 269 defm: NopCast_pat<VecPI32, VecPF16, HvxWR>; 270 defm: NopCast_pat<VecPI32, VecPF32, HvxWR>; 271 defm: NopCast_pat<VecPF16, VecPF32, HvxWR>; 272} 273 274let Predicates = [UseHVX] in { 275 let AddedComplexity = 100 in { 276 // These should be preferred over a vsplat of 0. 277 def: Pat<(VecI8 vzero), (V6_vd0)>; 278 def: Pat<(VecI16 vzero), (V6_vd0)>; 279 def: Pat<(VecI32 vzero), (V6_vd0)>; 280 def: Pat<(VecPI8 vzero), (PS_vdd0)>; 281 def: Pat<(VecPI16 vzero), (PS_vdd0)>; 282 def: Pat<(VecPI32 vzero), (PS_vdd0)>; 283 def: Pat<(VecPF32 vzero), (PS_vdd0)>; 284 285 def: Pat<(concat_vectors (VecI8 vzero), (VecI8 vzero)), (PS_vdd0)>; 286 def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>; 287 def: Pat<(concat_vectors (VecI32 vzero), (VecI32 vzero)), (PS_vdd0)>; 288 } 289 290 def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)), 291 (Combinev HvxVR:$Vt, HvxVR:$Vs)>; 292 def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)), 293 (Combinev HvxVR:$Vt, HvxVR:$Vs)>; 294 def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)), 295 (Combinev HvxVR:$Vt, HvxVR:$Vs)>; 296 297 def: Pat<(VecQ8 (qcat HQ16:$Qs, HQ16:$Qt)), (Combineq $Qt, $Qs)>; 298 def: Pat<(VecQ16 (qcat HQ32:$Qs, HQ32:$Qt)), (Combineq $Qt, $Qs)>; 299 300 def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs), 301 (V6_extractw HvxVR:$Vu, I32:$Rs)>; 302 def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs), 303 (V6_extractw HvxVR:$Vu, I32:$Rs)>; 304 def: Pat<(HexagonVEXTRACTW HVI32:$Vu, I32:$Rs), 305 (V6_extractw HvxVR:$Vu, I32:$Rs)>; 306 307 def: Pat<(HexagonVINSERTW0 HVI8:$Vu, I32:$Rt), 308 (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; 309 def: Pat<(HexagonVINSERTW0 HVI16:$Vu, I32:$Rt), 310 (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; 311 def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt), 312 (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; 313} 314 315let Predicates = [UseHVX, UseHVXFloatingPoint] in { 316 let AddedComplexity = 100 in { 317 def: Pat<(VecF16 vzero), (V6_vd0)>; 318 def: Pat<(VecF32 vzero), (V6_vd0)>; 319 def: Pat<(VecPF16 vzero), (PS_vdd0)>; 320 def: Pat<(VecPF32 vzero), (PS_vdd0)>; 321 322 def: Pat<(concat_vectors (VecF16 vzero), (VecF16 vzero)), (PS_vdd0)>; 323 def: Pat<(concat_vectors (VecF32 vzero), (VecF32 vzero)), (PS_vdd0)>; 324 } 325 326 def: Pat<(VecPF16 (concat_vectors HVF16:$Vs, HVF16:$Vt)), 327 (Combinev HvxVR:$Vt, HvxVR:$Vs)>; 328 def: Pat<(VecPF32 (concat_vectors HVF32:$Vs, HVF32:$Vt)), 329 (Combinev HvxVR:$Vt, HvxVR:$Vs)>; 330 331 def: Pat<(HexagonVINSERTW0 HVF16:$Vu, I32:$Rt), 332 (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; 333 def: Pat<(HexagonVINSERTW0 HVF32:$Vu, I32:$Rt), 334 (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; 335} 336 337def Rep: OutPatFrag<(ops node:$N), (Combinev $N, $N)>; 338 339let Predicates = [UseHVX] in { 340 let AddedComplexity = 10 in { 341 def: Pat<(VecI8 (splat_vector u8_0ImmPred:$V)), (PS_vsplatib imm:$V)>; 342 def: Pat<(VecI16 (splat_vector u16_0ImmPred:$V)), (PS_vsplatih imm:$V)>; 343 def: Pat<(VecI32 (splat_vector anyimm:$V)), (PS_vsplatiw imm:$V)>; 344 def: Pat<(VecPI8 (splat_vector u8_0ImmPred:$V)), (Rep (PS_vsplatib imm:$V))>; 345 def: Pat<(VecPI16 (splat_vector u16_0ImmPred:$V)), (Rep (PS_vsplatih imm:$V))>; 346 def: Pat<(VecPI32 (splat_vector anyimm:$V)), (Rep (PS_vsplatiw imm:$V))>; 347 } 348 def: Pat<(VecI8 (splat_vector I32:$Rs)), (PS_vsplatrb $Rs)>; 349 def: Pat<(VecI16 (splat_vector I32:$Rs)), (PS_vsplatrh $Rs)>; 350 def: Pat<(VecI32 (splat_vector I32:$Rs)), (PS_vsplatrw $Rs)>; 351 def: Pat<(VecPI8 (splat_vector I32:$Rs)), (Rep (PS_vsplatrb $Rs))>; 352 def: Pat<(VecPI16 (splat_vector I32:$Rs)), (Rep (PS_vsplatrh $Rs))>; 353 def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (PS_vsplatrw $Rs))>; 354} 355let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { 356 let AddedComplexity = 30 in { 357 def: Pat<(VecF16 (splat_vector u16_0ImmPred:$V)), (PS_vsplatih imm:$V)>; 358 def: Pat<(VecF32 (splat_vector anyint:$V)), (PS_vsplatiw imm:$V)>; 359 def: Pat<(VecF32 (splat_vector f32ImmPred:$V)), (PS_vsplatiw (ftoi $V))>; 360 } 361 let AddedComplexity = 20 in { 362 def: Pat<(VecF16 (splat_vector I32:$Rs)), (PS_vsplatrh $Rs)>; 363 def: Pat<(VecF32 (splat_vector I32:$Rs)), (PS_vsplatrw $Rs)>; 364 def: Pat<(VecF32 (splat_vector F32:$Rs)), (PS_vsplatrw $Rs)>; 365 } 366} 367 368class Vneg1<ValueType VecTy> 369 : PatFrag<(ops), (VecTy (splat_vector (i32 -1)))>; 370 371class Vnot<ValueType VecTy> 372 : PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>; 373 374let Predicates = [UseHVX] in { 375 let AddedComplexity = 200 in { 376 def: Pat<(Vnot<VecI8> HVI8:$Vs), (V6_vnot HvxVR:$Vs)>; 377 def: Pat<(Vnot<VecI16> HVI16:$Vs), (V6_vnot HvxVR:$Vs)>; 378 def: Pat<(Vnot<VecI32> HVI32:$Vs), (V6_vnot HvxVR:$Vs)>; 379 } 380 381 def: OpR_RR_pat<V6_vaddb, Add, VecI8, HVI8>; 382 def: OpR_RR_pat<V6_vaddh, Add, VecI16, HVI16>; 383 def: OpR_RR_pat<V6_vaddw, Add, VecI32, HVI32>; 384 def: OpR_RR_pat<V6_vaddb_dv, Add, VecPI8, HWI8>; 385 def: OpR_RR_pat<V6_vaddh_dv, Add, VecPI16, HWI16>; 386 def: OpR_RR_pat<V6_vaddw_dv, Add, VecPI32, HWI32>; 387 def: OpR_RR_pat<V6_vsubb, Sub, VecI8, HVI8>; 388 def: OpR_RR_pat<V6_vsubh, Sub, VecI16, HVI16>; 389 def: OpR_RR_pat<V6_vsubw, Sub, VecI32, HVI32>; 390 def: OpR_RR_pat<V6_vsubb_dv, Sub, VecPI8, HWI8>; 391 def: OpR_RR_pat<V6_vsubh_dv, Sub, VecPI16, HWI16>; 392 def: OpR_RR_pat<V6_vsubw_dv, Sub, VecPI32, HWI32>; 393 def: OpR_RR_pat<V6_vand, And, VecI8, HVI8>; 394 def: OpR_RR_pat<V6_vand, And, VecI16, HVI16>; 395 def: OpR_RR_pat<V6_vand, And, VecI32, HVI32>; 396 def: OpR_RR_pat<V6_vor, Or, VecI8, HVI8>; 397 def: OpR_RR_pat<V6_vor, Or, VecI16, HVI16>; 398 def: OpR_RR_pat<V6_vor, Or, VecI32, HVI32>; 399 def: OpR_RR_pat<V6_vxor, Xor, VecI8, HVI8>; 400 def: OpR_RR_pat<V6_vxor, Xor, VecI16, HVI16>; 401 def: OpR_RR_pat<V6_vxor, Xor, VecI32, HVI32>; 402 403 def: OpR_RR_pat<V6_vminb, Smin, VecI8, HVI8>; 404 def: OpR_RR_pat<V6_vmaxb, Smax, VecI8, HVI8>; 405 def: OpR_RR_pat<V6_vminub, Umin, VecI8, HVI8>; 406 def: OpR_RR_pat<V6_vmaxub, Umax, VecI8, HVI8>; 407 def: OpR_RR_pat<V6_vminh, Smin, VecI16, HVI16>; 408 def: OpR_RR_pat<V6_vmaxh, Smax, VecI16, HVI16>; 409 def: OpR_RR_pat<V6_vminuh, Umin, VecI16, HVI16>; 410 def: OpR_RR_pat<V6_vmaxuh, Umax, VecI16, HVI16>; 411 def: OpR_RR_pat<V6_vminw, Smin, VecI32, HVI32>; 412 def: OpR_RR_pat<V6_vmaxw, Smax, VecI32, HVI32>; 413 414 def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt), 415 (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; 416 def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt), 417 (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; 418 def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt), 419 (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; 420 421 def: Pat<(vselect (qnot HQ8:$Qu), HVI8:$Vs, HVI8:$Vt), 422 (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; 423 def: Pat<(vselect (qnot HQ16:$Qu), HVI16:$Vs, HVI16:$Vt), 424 (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; 425 def: Pat<(vselect (qnot HQ32:$Qu), HVI32:$Vs, HVI32:$Vt), 426 (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; 427} 428 429// For now, we always deal with vector floating point in SF mode. 430class OpR_RR_pat_conv<InstHexagon MI, PatFrag Op, ValueType ResType, 431 PatFrag RsPred, PatFrag RtPred = RsPred> 432 : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), 433 (V6_vconv_sf_qf32 (VecF32 (MI RsPred:$Rs, RtPred:$Rt)))>; 434 435class OpR_RR_pat_conv_hf<InstHexagon MI, PatFrag Op, ValueType ResType, 436 PatFrag RsPred, PatFrag RtPred = RsPred> 437 : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), 438 (V6_vconv_hf_qf16 (VecF16 (MI RsPred:$Rs, RtPred:$Rt)))>; 439 440let Predicates = [UseHVXV68, UseHVXQFloat] in { 441 def: OpR_RR_pat_conv_hf<V6_vsub_hf, pf2<fsub>, VecF16, HVF16>; 442 def: OpR_RR_pat_conv_hf<V6_vadd_hf, pf2<fadd>, VecF16, HVF16>; 443 def: OpR_RR_pat_conv_hf<V6_vmpy_qf16_hf, pf2<fmul>, VecF16, HVF16>; 444 def: OpR_RR_pat_conv<V6_vsub_sf, pf2<fsub>, VecF32, HVF32>; 445 def: OpR_RR_pat_conv<V6_vadd_sf, pf2<fadd>, VecF32, HVF32>; 446 def: OpR_RR_pat_conv<V6_vmpy_qf32_sf, pf2<fmul>, VecF32, HVF32>; 447 448 // For now we assume that the fp32 register is always coming in as IEEE float 449 // since the qfloat arithmetic instructions above always generate the 450 // accompanying conversions as part of their pattern 451 def: Pat<(VecF16 (pf1<fpround> HWF32:$Vuu)), 452 (V6_vdealh (V6_vconv_hf_qf32 453 (VecPF32 (Combinev (V6_vadd_sf (HiVec HvxWR:$Vuu), (V6_vd0)), 454 (V6_vadd_sf (LoVec HvxWR:$Vuu), (V6_vd0)) 455 ))))>; 456 // fpextend for QFloat is handled manually in HexagonISelLoweringHVX.cpp. 457} 458 459// HVX IEEE arithmetic Instructions 460let Predicates = [UseHVXV68, UseHVXIEEEFP] in { 461 def: Pat<(fadd HVF16:$Rs, HVF16:$Rt), 462 (V6_vadd_hf_hf HVF16:$Rs, HVF16:$Rt)>; 463 def: Pat<(fadd HVF32:$Rs, HVF32:$Rt), 464 (V6_vadd_sf_sf HVF32:$Rs, HVF32:$Rt)>; 465 def: Pat<(fsub HVF16:$Rs, HVF16:$Rt), 466 (V6_vsub_hf_hf HVF16:$Rs, HVF16:$Rt)>; 467 def: Pat<(fsub HVF32:$Rs, HVF32:$Rt), 468 (V6_vsub_sf_sf HVF32:$Rs, HVF32:$Rt)>; 469 def: Pat<(fmul HVF16:$Rs, HVF16:$Rt), 470 (V6_vmpy_hf_hf HVF16:$Rs, HVF16:$Rt)>; 471 def: Pat<(fmul HVF32:$Rs, HVF32:$Rt), 472 (V6_vmpy_sf_sf HVF32:$Rs, HVF32:$Rt)>; 473 474 def: Pat<(VecF16 (pf1<fpround> HWF32:$Vuu)), 475 (V6_vdealh (V6_vcvt_hf_sf (HiVec HvxWR:$Vuu), (LoVec HvxWR:$Vuu)))>; 476 def: Pat<(VecPF32 (pf1<fpextend> HVF16:$Vu)), 477 (V6_vcvt_sf_hf (V6_vshuffh HvxVR:$Vu))>; 478 479 def: OpR_R_pat<V6_vcvt_h_hf, Fptosi, VecI16, HVF16>; 480 def: OpR_R_pat<V6_vcvt_uh_hf, Fptoui, VecI16, HVF16>; 481 def: OpR_R_pat<V6_vcvt_hf_h, Sitofp, VecF16, HVI16>; 482 def: OpR_R_pat<V6_vcvt_hf_uh, Uitofp, VecF16, HVI16>; 483 484 def: Pat<(VecI8 (Fptosi HWF16:$Vu)), 485 (V6_vcvt_b_hf (HiVec $Vu), (LoVec $Vu))>; 486 def: Pat<(VecI8 (Fptoui HWF16:$Vu)), 487 (V6_vcvt_ub_hf (HiVec $Vu), (LoVec $Vu))>; 488 def: Pat<(VecPF16 (Sitofp HVI8:$Vu)), (V6_vcvt_hf_b HvxVR:$Vu)>; 489 def: Pat<(VecPF16 (Uitofp HVI8:$Vu)), (V6_vcvt_hf_ub HvxVR:$Vu)>; 490} 491 492let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { 493 def: Pat<(vselect HQ16:$Qu, HVF16:$Vs, HVF16:$Vt), 494 (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; 495 def: Pat<(vselect (qnot HQ16:$Qu), HVF16:$Vs, HVF16:$Vt), 496 (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; 497 498 def: Pat<(vselect HQ32:$Qu, HVF32:$Vs, HVF32:$Vt), 499 (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; 500 def: Pat<(vselect (qnot HQ32:$Qu), HVF32:$Vs, HVF32:$Vt), 501 (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; 502} 503 504let Predicates = [UseHVXV68, UseHVX128B, UseHVXQFloat] in { 505 let AddedComplexity = 220 in { 506 defm: MinMax_pats<V6_vmin_hf, V6_vmax_hf, vselect, setgt, VecQ16, HVF16>; 507 defm: MinMax_pats<V6_vmin_hf, V6_vmax_hf, vselect, setogt, VecQ16, HVF16>; 508 defm: MinMax_pats<V6_vmin_sf, V6_vmax_sf, vselect, setgt, VecQ32, HVF32>; 509 defm: MinMax_pats<V6_vmin_sf, V6_vmax_sf, vselect, setogt, VecQ32, HVF32>; 510 } 511 def: OpR_RR_pat<V6_vmin_hf, pf2<fminnum>, VecF16, HVF16>; 512 def: OpR_RR_pat<V6_vmax_hf, pf2<fmaxnum>, VecF16, HVF16>; 513 def: OpR_RR_pat<V6_vmin_sf, pf2<fminnum>, VecF32, HVF32>; 514 def: OpR_RR_pat<V6_vmax_sf, pf2<fmaxnum>, VecF32, HVF32>; 515} 516 517let Predicates = [UseHVXV68, UseHVX128B, UseHVXIEEEFP] in { 518 let AddedComplexity = 220 in { 519 defm: MinMax_pats<V6_vfmin_hf, V6_vfmax_hf, vselect, setgt, VecQ16, HVF16>; 520 defm: MinMax_pats<V6_vfmin_hf, V6_vfmax_hf, vselect, setogt, VecQ16, HVF16>; 521 defm: MinMax_pats<V6_vfmin_sf, V6_vfmax_sf, vselect, setgt, VecQ32, HVF32>; 522 defm: MinMax_pats<V6_vfmin_sf, V6_vfmax_sf, vselect, setogt, VecQ32, HVF32>; 523 } 524 def: OpR_RR_pat<V6_vfmin_hf, pf2<fminnum>, VecF16, HVF16>; 525 def: OpR_RR_pat<V6_vfmax_hf, pf2<fmaxnum>, VecF16, HVF16>; 526 def: OpR_RR_pat<V6_vfmin_sf, pf2<fminnum>, VecF32, HVF32>; 527 def: OpR_RR_pat<V6_vfmax_sf, pf2<fmaxnum>, VecF32, HVF32>; 528} 529 530let Predicates = [UseHVX] in { 531 // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), 532 // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, 533 // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). 534 def: Pat<(mul HVI8:$Vs, HVI8:$Vt), 535 (V6_vshuffeb (HiVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)), 536 (LoVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)))>; 537 def: Pat<(mul HVI16:$Vs, HVI16:$Vt), 538 (V6_vmpyih HvxVR:$Vs, HvxVR:$Vt)>; 539 def: Pat<(mul HVI32:$Vs, HVI32:$Vt), 540 (V6_vmpyiewuh_acc (V6_vmpyieoh HvxVR:$Vs, HvxVR:$Vt), 541 HvxVR:$Vs, HvxVR:$Vt)>; 542} 543 544let Predicates = [UseHVX] in { 545 def: Pat<(VecPI16 (sext HVI8:$Vs)), (VSxtb $Vs)>; 546 def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>; 547 def: Pat<(VecPI16 (zext HVI8:$Vs)), (VZxtb $Vs)>; 548 def: Pat<(VecPI32 (zext HVI16:$Vs)), (VZxth $Vs)>; 549 550 def: Pat<(VecI16 (sext_invec HVI8:$Vs)), (LoVec (VSxtb $Vs))>; 551 def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (VSxth $Vs))>; 552 def: Pat<(VecI32 (sext_invec HVI8:$Vs)), 553 (LoVec (VSxth (LoVec (VSxtb $Vs))))>; 554 def: Pat<(VecPI16 (sext_invec HWI8:$Vss)), (VSxtb (LoVec $Vss))>; 555 def: Pat<(VecPI32 (sext_invec HWI16:$Vss)), (VSxth (LoVec $Vss))>; 556 def: Pat<(VecPI32 (sext_invec HWI8:$Vss)), 557 (VSxth (LoVec (VSxtb (LoVec $Vss))))>; 558 559 def: Pat<(VecI16 (zext_invec HVI8:$Vs)), (LoVec (VZxtb $Vs))>; 560 def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (VZxth $Vs))>; 561 def: Pat<(VecI32 (zext_invec HVI8:$Vs)), 562 (LoVec (VZxth (LoVec (VZxtb $Vs))))>; 563 def: Pat<(VecPI16 (zext_invec HWI8:$Vss)), (VZxtb (LoVec $Vss))>; 564 def: Pat<(VecPI32 (zext_invec HWI16:$Vss)), (VZxth (LoVec $Vss))>; 565 def: Pat<(VecPI32 (zext_invec HWI8:$Vss)), 566 (VZxth (LoVec (VZxtb (LoVec $Vss))))>; 567 568 def: Pat<(VecI8 (trunc HWI16:$Vss)), 569 (V6_vpackeb (HiVec $Vss), (LoVec $Vss))>; 570 def: Pat<(VecI16 (trunc HWI32:$Vss)), 571 (V6_vpackeh (HiVec $Vss), (LoVec $Vss))>; 572 // Pattern for (v32i8 (trunc v32i32:$Vs)) after widening: 573 def: Pat<(VecI8 (trunc 574 (concat_vectors 575 (VecI16 (trunc (concat_vectors HVI32:$Vs, undef))), 576 undef))), 577 (V6_vdealb4w (IMPLICIT_DEF), HvxVR:$Vs)>; 578 579 def: Pat<(VecQ8 (trunc HVI8:$Vs)), 580 (V6_vandvrt HvxVR:$Vs, (ToI32 0x01010101))>; 581 def: Pat<(VecQ16 (trunc HVI16:$Vs)), 582 (V6_vandvrt HvxVR:$Vs, (ToI32 0x01010101))>; 583 def: Pat<(VecQ32 (trunc HVI32:$Vs)), 584 (V6_vandvrt HvxVR:$Vs, (ToI32 0x01010101))>; 585} 586 587let Predicates = [UseHVX] in { 588 // The "source" types are not legal, and there are no parameterized 589 // definitions for them, but they are length-specific. 590 let Predicates = [UseHVX,UseHVX64B] in { 591 def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v32i8)), 592 (V6_vasrh (V6_vaslh HVI16:$Vs, (ToI32 8)), (ToI32 8))>; 593 def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i8)), 594 (V6_vasrw (V6_vaslw HVI32:$Vs, (ToI32 24)), (ToI32 24))>; 595 def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i16)), 596 (V6_vasrw (V6_vaslw HVI32:$Vs, (ToI32 16)), (ToI32 16))>; 597 } 598 let Predicates = [UseHVX,UseHVX128B] in { 599 def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v64i8)), 600 (V6_vasrh (V6_vaslh HVI16:$Vs, (ToI32 8)), (ToI32 8))>; 601 def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i8)), 602 (V6_vasrw (V6_vaslw HVI32:$Vs, (ToI32 24)), (ToI32 24))>; 603 def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i16)), 604 (V6_vasrw (V6_vaslw HVI32:$Vs, (ToI32 16)), (ToI32 16))>; 605 } 606 607 // Take a pair of vectors Vt:Vs and shift them towards LSB by (Rt & HwLen). 608 def: Pat<(VecI8 (valign HVI8:$Vt, HVI8:$Vs, I32:$Rt)), 609 (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>; 610 def: Pat<(VecI16 (valign HVI16:$Vt, HVI16:$Vs, I32:$Rt)), 611 (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>; 612 def: Pat<(VecI32 (valign HVI32:$Vt, HVI32:$Vs, I32:$Rt)), 613 (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>; 614 615 def: Pat<(HexagonVASL HVI8:$Vs, I32:$Rt), 616 (V6_vshuffeb (V6_vaslh (HiVec (V6_vzb HvxVR:$Vs)), I32:$Rt), 617 (V6_vaslh (LoVec (V6_vzb HvxVR:$Vs)), I32:$Rt))>; 618 def: Pat<(HexagonVASR HVI8:$Vs, I32:$Rt), 619 (V6_vshuffeb (V6_vasrh (HiVec (V6_vsb HvxVR:$Vs)), I32:$Rt), 620 (V6_vasrh (LoVec (V6_vsb HvxVR:$Vs)), I32:$Rt))>; 621 def: Pat<(HexagonVLSR HVI8:$Vs, I32:$Rt), 622 (V6_vshuffeb (V6_vlsrh (HiVec (V6_vzb HvxVR:$Vs)), I32:$Rt), 623 (V6_vlsrh (LoVec (V6_vzb HvxVR:$Vs)), I32:$Rt))>; 624 625 def: Pat<(HexagonVASL HVI16:$Vs, I32:$Rt), (V6_vaslh HvxVR:$Vs, I32:$Rt)>; 626 def: Pat<(HexagonVASL HVI32:$Vs, I32:$Rt), (V6_vaslw HvxVR:$Vs, I32:$Rt)>; 627 def: Pat<(HexagonVASR HVI16:$Vs, I32:$Rt), (V6_vasrh HvxVR:$Vs, I32:$Rt)>; 628 def: Pat<(HexagonVASR HVI32:$Vs, I32:$Rt), (V6_vasrw HvxVR:$Vs, I32:$Rt)>; 629 def: Pat<(HexagonVLSR HVI16:$Vs, I32:$Rt), (V6_vlsrh HvxVR:$Vs, I32:$Rt)>; 630 def: Pat<(HexagonVLSR HVI32:$Vs, I32:$Rt), (V6_vlsrw HvxVR:$Vs, I32:$Rt)>; 631 632 def: Pat<(add HVI32:$Vx, (HexagonVASL HVI32:$Vu, I32:$Rt)), 633 (V6_vaslw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>; 634 def: Pat<(add HVI32:$Vx, (HexagonVASR HVI32:$Vu, I32:$Rt)), 635 (V6_vasrw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>; 636 637 def: Pat<(shl HVI8:$Vs, HVI8:$Vt), 638 (V6_vshuffeb (V6_vaslhv (HiVec (V6_vzb $Vs)), (HiVec (V6_vzb $Vt))), 639 (V6_vaslhv (LoVec (V6_vzb $Vs)), (LoVec (V6_vzb $Vt))))>; 640 def: Pat<(sra HVI8:$Vs, HVI8:$Vt), 641 (V6_vshuffeb (V6_vasrhv (HiVec (V6_vsb $Vs)), (HiVec (V6_vzb $Vt))), 642 (V6_vasrhv (LoVec (V6_vsb $Vs)), (LoVec (V6_vzb $Vt))))>; 643 def: Pat<(srl HVI8:$Vs, HVI8:$Vt), 644 (V6_vshuffeb (V6_vlsrhv (HiVec (V6_vzb $Vs)), (HiVec (V6_vzb $Vt))), 645 (V6_vlsrhv (LoVec (V6_vzb $Vs)), (LoVec (V6_vzb $Vt))))>; 646 647 def: Pat<(shl HVI16:$Vs, HVI16:$Vt), (V6_vaslhv HvxVR:$Vs, HvxVR:$Vt)>; 648 def: Pat<(shl HVI32:$Vs, HVI32:$Vt), (V6_vaslwv HvxVR:$Vs, HvxVR:$Vt)>; 649 def: Pat<(sra HVI16:$Vs, HVI16:$Vt), (V6_vasrhv HvxVR:$Vs, HvxVR:$Vt)>; 650 def: Pat<(sra HVI32:$Vs, HVI32:$Vt), (V6_vasrwv HvxVR:$Vs, HvxVR:$Vt)>; 651 def: Pat<(srl HVI16:$Vs, HVI16:$Vt), (V6_vlsrhv HvxVR:$Vs, HvxVR:$Vt)>; 652 def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>; 653 654 // Mfshl hi, lo, amt 655 def: Pat<(Mfshl HVI8:$Vu, HVI8:$Vv, HVI8:$Vs), 656 (V6_vshuffob (V6_vaslhv (HiVec (V6_vshufoeb $Vu, $Vv)), 657 (HiVec (V6_vzb $Vs))), 658 (V6_vaslhv (LoVec (V6_vshufoeb $Vu, $Vv)), 659 (LoVec (V6_vzb $Vs))))>; 660 let Predicates = [UseHVX,UseHVXV60] in { 661 // V60 doesn't produce 0 on shifts by bitwidth, e.g. Vv.h << 16-0 662 def: Pat<(Mfshl HVI16:$Vu, HVI16:$Vv, HVI16:$Vs), 663 (V6_vmux (V6_veqh $Vs, (V6_vd0)), 664 $Vu, 665 (V6_vor (V6_vaslhv $Vu, $Vs), 666 (V6_vlsrhv $Vv, (VSubih 16, $Vs))))>; 667 def: Pat<(Mfshl HVI32:$Vu, HVI32:$Vv, HVI32:$Vs), 668 (V6_vmux (V6_veqw (V6_vand $Vs, (PS_vsplatiw (i32 31))), (V6_vd0)), 669 $Vu, 670 (V6_vor (V6_vaslwv $Vu, $Vs), 671 (V6_vlsrwv $Vv, (VSubiw 32, $Vs))))>; 672 } 673 let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in { 674 // Do it as (Vu << Vs) | (Vv >> (BW-Vs)). 675 // For Vs == 0 becomes Vu | (Vv >> -BW), since the shift amount is 676 // sign-extended. Then this becomes Vu | (Vv << BW) == Vu. 677 def: Pat<(Mfshl HVI16:$Vu, HVI16:$Vv, HVI16:$Vs), 678 (V6_vor (V6_vaslhv $Vu, $Vs), 679 (V6_vlsrhv $Vv, (VSubih 16, $Vs)))>; 680 def: Pat<(Mfshl HVI32:$Vu, HVI32:$Vv, HVI32:$Vs), 681 (V6_vor (V6_vaslwv $Vu, $Vs), 682 (V6_vlsrwv $Vv, (VSubiw 32, $Vs)))>; 683 } 684 let Predicates = [UseHVX,UseHVXV66], AddedComplexity = 20 in { 685 // Assume Vs > 0 (and within bit width) 686 // Vx[1]:Vx[0] = V6_vasr_into Vx[0], Vv, Vs 687 // --> (Vx[0]:Vx[0] & (ffffffff << -Vs)) | (Vv:00000000 << -Vs) 688 // i.e. Vx[1] = insert ((Vv << -Vs) -> Vx[0]) 689 def: Pat<(Mfshl HVI32:$Vu, HVI32:$Vv, HVI32:$Vs), 690 (HiVec (V6_vasr_into (Combinev (VecI32 (IMPLICIT_DEF)), 691 (V6_vlsrwv $Vv, (VSubiw 32, $Vs))), 692 $Vu, 693 (V6_vsubw (V6_vd0), $Vs)))>; 694 } 695 696 // Mfshr hi, lo, amt 697 def: Pat<(Mfshr HVI8:$Vu, HVI8:$Vv, HVI8:$Vs), 698 (V6_vshuffeb (V6_vlsrhv (HiVec (V6_vshufoeb $Vu, $Vv)), 699 (HiVec (V6_vzb $Vs))), 700 (V6_vlsrhv (LoVec (V6_vshufoeb $Vu, $Vv)), 701 (LoVec (V6_vzb $Vs))))>; 702 let Predicates = [UseHVX,UseHVXV60] in { 703 def: Pat<(Mfshr HVI16:$Vu, HVI16:$Vv, HVI16:$Vs), 704 (V6_vmux (V6_veqh $Vs, (V6_vd0)), 705 $Vv, 706 (V6_vor (V6_vaslhv $Vu, (VSubih 16, $Vs)), 707 (V6_vlsrhv $Vv, $Vs)))>; 708 def: Pat<(Mfshr HVI32:$Vu, HVI32:$Vv, HVI32:$Vs), 709 (V6_vmux (V6_veqw $Vs, (V6_vd0)), 710 $Vv, 711 (V6_vor (V6_vaslwv $Vu, (VSubiw 32, $Vs)), 712 (V6_vlsrwv $Vv, $Vs)))>; 713 } 714 let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in { 715 // Do it as (Vu >> -(BW-Vs)) | (Vv >> Vs). 716 // For Vs == 0 becomes (Vu << BW) | Vs == 0 | Vv 717 def: Pat<(Mfshr HVI16:$Vu, HVI16:$Vv, HVI16:$Vs), 718 (V6_vor (V6_vlsrhv $Vu, (V6_vsubh $Vs, (PS_vsplatih (i32 16)))), 719 (V6_vlsrhv $Vv, $Vs))>; 720 def: Pat<(Mfshr HVI32:$Vu, HVI32:$Vv, HVI32:$Vs), 721 (V6_vor (V6_vlsrwv $Vu, (V6_vsubw $Vs, (PS_vsplatiw (i32 32)))), 722 (V6_vlsrwv $Vv, $Vs))>; 723 } 724 let Predicates = [UseHVX,UseHVXV66], AddedComplexity = 20 in { 725 // Assume Vs > 0 (and within bit width) 726 // Vx[1]:Vx[0] = V6_vasr_into Vx[0], Vv, Vs 727 // --> (Vx[0]:Vx[0] & (ffffffff >> Vs)) | (Vv:00000000 >> Vs) 728 // i.e. Vx[0] = insert ((Vv >> Vs) -> Vx[0]) 729 def: Pat<(Mfshr HVI32:$Vu, HVI32:$Vv, HVI32:$Vs), 730 (LoVec (V6_vasr_into (Combinev (VecI32 (IMPLICIT_DEF)), 731 (V6_vlsrwv $Vv, $Vs)), 732 $Vu, 733 $Vs))>; 734 } 735 736 def: Pat<(VecI16 (bswap HVI16:$Vs)), 737 (V6_vdelta HvxVR:$Vs, (PS_vsplatib (i32 0x01)))>; 738 def: Pat<(VecI32 (bswap HVI32:$Vs)), 739 (V6_vdelta HvxVR:$Vs, (PS_vsplatib (i32 0x03)))>; 740 741 def: Pat<(VecI8 (ctpop HVI8:$Vs)), 742 (V6_vshuffeb (V6_vpopcounth (HiVec (V6_vzb HvxVR:$Vs))), 743 (V6_vpopcounth (LoVec (V6_vzb HvxVR:$Vs))))>; 744 def: Pat<(VecI16 (ctpop HVI16:$Vs)), (V6_vpopcounth HvxVR:$Vs)>; 745 def: Pat<(VecI32 (ctpop HVI32:$Vs)), 746 (V6_vaddw (LoVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))), 747 (HiVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))))>; 748 749 def: Pat<(VecI8 (ctlz HVI8:$Vs)), 750 (V6_vsubb (V6_vshuffeb (V6_vcl0h (HiVec (V6_vzb HvxVR:$Vs))), 751 (V6_vcl0h (LoVec (V6_vzb HvxVR:$Vs)))), 752 (PS_vsplatib (i32 0x08)))>; 753 754 def: Pat<(VecI16 (ctlz HVI16:$Vs)), (V6_vcl0h HvxVR:$Vs)>; 755 def: Pat<(VecI32 (ctlz HVI32:$Vs)), (V6_vcl0w HvxVR:$Vs)>; 756} 757 758class HvxSel_pat<InstHexagon MI, PatFrag RegPred> 759 : Pat<(select I1:$Pu, RegPred:$Vs, RegPred:$Vt), 760 (MI I1:$Pu, RegPred:$Vs, RegPred:$Vt)>; 761 762let Predicates = [UseHVX] in { 763 def: HvxSel_pat<PS_vselect, HVI8>; 764 def: HvxSel_pat<PS_vselect, HVI16>; 765 def: HvxSel_pat<PS_vselect, HVI32>; 766 def: HvxSel_pat<PS_wselect, HWI8>; 767 def: HvxSel_pat<PS_wselect, HWI16>; 768 def: HvxSel_pat<PS_wselect, HWI32>; 769} 770 771def V2Q: OutPatFrag<(ops node:$Vs), (V6_vandvrt $Vs, (ToI32 -1))>; 772 773let Predicates = [UseHVX] in { 774 def: Pat<(select I1:$Pu, VecQ8:$Qs, VecQ8:$Qt), 775 (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>; 776 def: Pat<(select I1:$Pu, VecQ16:$Qs, VecQ16:$Qt), 777 (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>; 778 def: Pat<(select I1:$Pu, VecQ32:$Qs, VecQ32:$Qt), 779 (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>; 780} 781 782let Predicates = [UseHVX] in { 783 def: Pat<(VecQ8 (qtrue)), (PS_qtrue)>; 784 def: Pat<(VecQ16 (qtrue)), (PS_qtrue)>; 785 def: Pat<(VecQ32 (qtrue)), (PS_qtrue)>; 786 def: Pat<(VecQ8 (qfalse)), (PS_qfalse)>; 787 def: Pat<(VecQ16 (qfalse)), (PS_qfalse)>; 788 def: Pat<(VecQ32 (qfalse)), (PS_qfalse)>; 789 790 def: Pat<(vnot HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>; 791 def: Pat<(vnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>; 792 def: Pat<(vnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>; 793 def: Pat<(qnot HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>; 794 def: Pat<(qnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>; 795 def: Pat<(qnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>; 796 797 def: OpR_RR_pat<V6_pred_and, And, VecQ8, HQ8>; 798 def: OpR_RR_pat<V6_pred_and, And, VecQ16, HQ16>; 799 def: OpR_RR_pat<V6_pred_and, And, VecQ32, HQ32>; 800 def: OpR_RR_pat<V6_pred_or, Or, VecQ8, HQ8>; 801 def: OpR_RR_pat<V6_pred_or, Or, VecQ16, HQ16>; 802 def: OpR_RR_pat<V6_pred_or, Or, VecQ32, HQ32>; 803 def: OpR_RR_pat<V6_pred_xor, Xor, VecQ8, HQ8>; 804 def: OpR_RR_pat<V6_pred_xor, Xor, VecQ16, HQ16>; 805 def: OpR_RR_pat<V6_pred_xor, Xor, VecQ32, HQ32>; 806 807 def: OpR_RR_pat<V6_pred_and_n, VNot2<And, qnot>, VecQ8, HQ8>; 808 def: OpR_RR_pat<V6_pred_and_n, VNot2<And, qnot>, VecQ16, HQ16>; 809 def: OpR_RR_pat<V6_pred_and_n, VNot2<And, qnot>, VecQ32, HQ32>; 810 def: OpR_RR_pat<V6_pred_or_n, VNot2<Or, qnot>, VecQ8, HQ8>; 811 def: OpR_RR_pat<V6_pred_or_n, VNot2<Or, qnot>, VecQ16, HQ16>; 812 def: OpR_RR_pat<V6_pred_or_n, VNot2<Or, qnot>, VecQ32, HQ32>; 813 814 def: OpR_RR_pat<V6_veqb, seteq, VecQ8, HVI8>; 815 def: OpR_RR_pat<V6_veqh, seteq, VecQ16, HVI16>; 816 def: OpR_RR_pat<V6_veqw, seteq, VecQ32, HVI32>; 817 def: OpR_RR_pat<V6_vgtb, setgt, VecQ8, HVI8>; 818 def: OpR_RR_pat<V6_vgth, setgt, VecQ16, HVI16>; 819 def: OpR_RR_pat<V6_vgtw, setgt, VecQ32, HVI32>; 820 def: OpR_RR_pat<V6_vgtub, setugt, VecQ8, HVI8>; 821 def: OpR_RR_pat<V6_vgtuh, setugt, VecQ16, HVI16>; 822 def: OpR_RR_pat<V6_vgtuw, setugt, VecQ32, HVI32>; 823 824 def: AccRRR_pat<V6_veqb_and, And, seteq, HQ8, HVI8, HVI8>; 825 def: AccRRR_pat<V6_veqb_or, Or, seteq, HQ8, HVI8, HVI8>; 826 def: AccRRR_pat<V6_veqb_xor, Xor, seteq, HQ8, HVI8, HVI8>; 827 def: AccRRR_pat<V6_veqh_and, And, seteq, HQ16, HVI16, HVI16>; 828 def: AccRRR_pat<V6_veqh_or, Or, seteq, HQ16, HVI16, HVI16>; 829 def: AccRRR_pat<V6_veqh_xor, Xor, seteq, HQ16, HVI16, HVI16>; 830 def: AccRRR_pat<V6_veqw_and, And, seteq, HQ32, HVI32, HVI32>; 831 def: AccRRR_pat<V6_veqw_or, Or, seteq, HQ32, HVI32, HVI32>; 832 def: AccRRR_pat<V6_veqw_xor, Xor, seteq, HQ32, HVI32, HVI32>; 833 834 def: AccRRR_pat<V6_vgtb_and, And, setgt, HQ8, HVI8, HVI8>; 835 def: AccRRR_pat<V6_vgtb_or, Or, setgt, HQ8, HVI8, HVI8>; 836 def: AccRRR_pat<V6_vgtb_xor, Xor, setgt, HQ8, HVI8, HVI8>; 837 def: AccRRR_pat<V6_vgth_and, And, setgt, HQ16, HVI16, HVI16>; 838 def: AccRRR_pat<V6_vgth_or, Or, setgt, HQ16, HVI16, HVI16>; 839 def: AccRRR_pat<V6_vgth_xor, Xor, setgt, HQ16, HVI16, HVI16>; 840 def: AccRRR_pat<V6_vgtw_and, And, setgt, HQ32, HVI32, HVI32>; 841 def: AccRRR_pat<V6_vgtw_or, Or, setgt, HQ32, HVI32, HVI32>; 842 def: AccRRR_pat<V6_vgtw_xor, Xor, setgt, HQ32, HVI32, HVI32>; 843 844 def: AccRRR_pat<V6_vgtub_and, And, setugt, HQ8, HVI8, HVI8>; 845 def: AccRRR_pat<V6_vgtub_or, Or, setugt, HQ8, HVI8, HVI8>; 846 def: AccRRR_pat<V6_vgtub_xor, Xor, setugt, HQ8, HVI8, HVI8>; 847 def: AccRRR_pat<V6_vgtuh_and, And, setugt, HQ16, HVI16, HVI16>; 848 def: AccRRR_pat<V6_vgtuh_or, Or, setugt, HQ16, HVI16, HVI16>; 849 def: AccRRR_pat<V6_vgtuh_xor, Xor, setugt, HQ16, HVI16, HVI16>; 850 def: AccRRR_pat<V6_vgtuw_and, And, setugt, HQ32, HVI32, HVI32>; 851 def: AccRRR_pat<V6_vgtuw_or, Or, setugt, HQ32, HVI32, HVI32>; 852 def: AccRRR_pat<V6_vgtuw_xor, Xor, setugt, HQ32, HVI32, HVI32>; 853} 854 855let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { 856 def: OpR_RR_pat<V6_veqh, seteq, VecQ16, HVF16>; 857 def: OpR_RR_pat<V6_veqh, setoeq, VecQ16, HVF16>; 858 def: OpR_RR_pat<V6_veqh, setueq, VecQ16, HVF16>; 859 def: OpR_RR_pat<V6_vgthf, setgt, VecQ16, HVF16>; 860 def: OpR_RR_pat<V6_vgthf, setogt, VecQ16, HVF16>; 861 def: OpR_RR_pat<V6_vgthf, setugt, VecQ16, HVF16>; 862 863 def: OpR_RR_pat<V6_veqw, seteq, VecQ32, HVF32>; 864 def: OpR_RR_pat<V6_veqw, setoeq, VecQ32, HVF32>; 865 def: OpR_RR_pat<V6_veqw, setueq, VecQ32, HVF32>; 866 def: OpR_RR_pat<V6_vgtsf, setgt, VecQ32, HVF32>; 867 def: OpR_RR_pat<V6_vgtsf, setogt, VecQ32, HVF32>; 868 def: OpR_RR_pat<V6_vgtsf, setugt, VecQ32, HVF32>; 869 870 def: AccRRR_pat<V6_veqh_and, And, seteq, HQ16, HVF16, HVF16>; 871 def: AccRRR_pat<V6_veqh_or, Or, seteq, HQ16, HVF16, HVF16>; 872 def: AccRRR_pat<V6_veqh_xor, Xor, seteq, HQ16, HVF16, HVF16>; 873 def: AccRRR_pat<V6_veqh_and, And, setoeq, HQ16, HVF16, HVF16>; 874 def: AccRRR_pat<V6_veqh_or, Or, setoeq, HQ16, HVF16, HVF16>; 875 def: AccRRR_pat<V6_veqh_xor, Xor, setoeq, HQ16, HVF16, HVF16>; 876 def: AccRRR_pat<V6_veqh_and, And, setueq, HQ16, HVF16, HVF16>; 877 def: AccRRR_pat<V6_veqh_or, Or, setueq, HQ16, HVF16, HVF16>; 878 def: AccRRR_pat<V6_veqh_xor, Xor, setueq, HQ16, HVF16, HVF16>; 879 def: AccRRR_pat<V6_vgthf_and, And, setgt, HQ16, HVF16, HVF16>; 880 def: AccRRR_pat<V6_vgthf_or, Or, setgt, HQ16, HVF16, HVF16>; 881 def: AccRRR_pat<V6_vgthf_xor, Xor, setgt, HQ16, HVF16, HVF16>; 882 def: AccRRR_pat<V6_vgthf_and, And, setogt, HQ16, HVF16, HVF16>; 883 def: AccRRR_pat<V6_vgthf_or, Or, setogt, HQ16, HVF16, HVF16>; 884 def: AccRRR_pat<V6_vgthf_xor, Xor, setogt, HQ16, HVF16, HVF16>; 885 def: AccRRR_pat<V6_vgthf_and, And, setugt, HQ16, HVF16, HVF16>; 886 def: AccRRR_pat<V6_vgthf_or, Or, setugt, HQ16, HVF16, HVF16>; 887 def: AccRRR_pat<V6_vgthf_xor, Xor, setugt, HQ16, HVF16, HVF16>; 888 889 def: AccRRR_pat<V6_veqw_and, And, seteq, HQ32, HVF32, HVF32>; 890 def: AccRRR_pat<V6_veqw_or, Or, seteq, HQ32, HVF32, HVF32>; 891 def: AccRRR_pat<V6_veqw_xor, Xor, seteq, HQ32, HVF32, HVF32>; 892 def: AccRRR_pat<V6_veqw_and, And, setoeq, HQ32, HVF32, HVF32>; 893 def: AccRRR_pat<V6_veqw_or, Or, setoeq, HQ32, HVF32, HVF32>; 894 def: AccRRR_pat<V6_veqw_xor, Xor, setoeq, HQ32, HVF32, HVF32>; 895 def: AccRRR_pat<V6_veqw_and, And, setueq, HQ32, HVF32, HVF32>; 896 def: AccRRR_pat<V6_veqw_or, Or, setueq, HQ32, HVF32, HVF32>; 897 def: AccRRR_pat<V6_veqw_xor, Xor, setueq, HQ32, HVF32, HVF32>; 898 def: AccRRR_pat<V6_vgtsf_and, And, setgt, HQ32, HVF32, HVF32>; 899 def: AccRRR_pat<V6_vgtsf_or, Or, setgt, HQ32, HVF32, HVF32>; 900 def: AccRRR_pat<V6_vgtsf_xor, Xor, setgt, HQ32, HVF32, HVF32>; 901 def: AccRRR_pat<V6_vgtsf_and, And, setogt, HQ32, HVF32, HVF32>; 902 def: AccRRR_pat<V6_vgtsf_or, Or, setogt, HQ32, HVF32, HVF32>; 903 def: AccRRR_pat<V6_vgtsf_xor, Xor, setogt, HQ32, HVF32, HVF32>; 904 def: AccRRR_pat<V6_vgtsf_and, And, setugt, HQ32, HVF32, HVF32>; 905 def: AccRRR_pat<V6_vgtsf_or, Or, setugt, HQ32, HVF32, HVF32>; 906 def: AccRRR_pat<V6_vgtsf_xor, Xor, setugt, HQ32, HVF32, HVF32>; 907 908 def: Pat<(VecQ16 (setone HVF16:$Vt, HVF16:$Vu)), 909 (V6_pred_not (V6_veqh HvxVR:$Vt, HvxVR:$Vu))>; 910 911 def: Pat<(VecQ32 (setone HVF32:$Vt, HVF32:$Vu)), 912 (V6_pred_not (V6_veqw HvxVR:$Vt, HvxVR:$Vu))>; 913} 914 915// Multiply high for non-i32 types 916def: Pat<(VecI8 (mulhs HVI8:$Vu, HVI8:$Vv)), 917 (V6_vshuffob (HiVec (V6_vmpybv $Vu, $Vv)), 918 (LoVec (V6_vmpybv $Vu, $Vv)))>; 919def: Pat<(VecI16 (mulhs HVI16:$Vu, HVI16:$Vv)), 920 (V6_vshufoh (HiVec (V6_vmpyhv $Vu, $Vv)), 921 (LoVec (V6_vmpyhv $Vu, $Vv)))>; 922def: Pat<(VecI8 (mulhu HVI8:$Vu, HVI8:$Vv)), 923 (V6_vshuffob (HiVec (V6_vmpyubv $Vu, $Vv)), 924 (LoVec (V6_vmpyubv $Vu, $Vv)))>; 925def: Pat<(VecI16 (mulhu HVI16:$Vu, HVI16:$Vv)), 926 (V6_vshufoh (HiVec (V6_vmpyuhv $Vu, $Vv)), 927 (LoVec (V6_vmpyuhv $Vu, $Vv)))>; 928let Predicates = [UseHVXV69], AddedComplexity = 20 in { 929 def: Pat<(VecI16 (mulhu HVI16:$Vu, HVI16:$Vv)), 930 (V6_vmpyuhvs $Vu, $Vv)>; 931} 932 933let Predicates = [UseHVXV60] in { 934 // V60 doesn't have vabsb or byte shifts. 935 // Do the "mask = x >> width-1; abs = (x + mask) ^ mask" trick. 936 // v31:30.h = vsxt(Inp.b) ; generate masks in odd bytes in 937 // ; interleaved half-words 938 // v29:28.b = vshuffoe(v31.b,v30.b) ; collect odd/even bytes, masks = v29 939 // v27.b = vadd(Inp.b,v29.b) ; x + masks 940 // Abs = vxor(v27,v29) ; ^ masks 941 def: Pat<(VecI8 (abs HVI8:$Vs)), 942 (V6_vxor HvxVR:$Vs, 943 (V6_vaddb HvxVR:$Vs, 944 (HiVec 945 (V6_vshufoeb 946 (HiVec (V6_vsb HvxVR:$Vs)), 947 (LoVec (V6_vsb HvxVR:$Vs))))))>; 948} 949 950let Predicates = [UseHVXV62], AddedComplexity = 20 in { 951 def: Pat<(VecI8 (abs HVI8:$Vs)), (V6_vabsb HvxVR:$Vs)>; 952} 953 954def: Pat<(VecI16 (abs HVI16:$Vs)), (V6_vabsh HvxVR:$Vs)>; 955def: Pat<(VecI32 (abs HVI32:$Vs)), (V6_vabsw HvxVR:$Vs)>; 956 957// If a node takes an MVT type as a parameter, the argument must be 958// a name of a member of MVT. 959multiclass Saturates<ValueType HvxTy_i8, ValueType HvxTy_i16> { 960 def: Pat<(VecI8 (ssat HWI16:$Vss, HvxTy_i8)), 961 (V6_vpackhb_sat (HiVec $Vss), (LoVec $Vss))>; 962 def: Pat<(VecI8 (ssat (concat_vectors HWI32:$Vss, HWI32:$Vtt), HvxTy_i8)), 963 (V6_vpackhb_sat (V6_vpackwh_sat (HiVec $Vtt), (LoVec $Vtt)), 964 (V6_vpackwh_sat (HiVec $Vss), (LoVec $Vss)))>; 965 def: Pat<(VecI16 (ssat HWI32:$Vss, HvxTy_i16)), 966 (V6_vpackwh_sat (HiVec $Vss), (LoVec $Vss))>; 967 968 def: Pat<(VecI8 (usat HWI16:$Vss, HvxTy_i8)), 969 (V6_vpackhub_sat (HiVec $Vss), (LoVec $Vss))>; 970 def: Pat<(VecI8 (usat (concat_vectors HWI32:$Vss, HWI32:$Vtt), HvxTy_i8)), 971 (V6_vpackhub_sat (V6_vpackwuh_sat (HiVec $Vtt), (LoVec $Vtt)), 972 (V6_vpackwuh_sat (HiVec $Vss), (LoVec $Vss)))>; 973 def: Pat<(VecI16 (usat HWI32:$Vss, HvxTy_i16)), 974 (V6_vpackwuh_sat (HiVec $Vss), (LoVec $Vss))>; 975} 976let Predicates = [UseHVX64B] in { 977 defm: Saturates<v64i8, v32i16>; 978} 979let Predicates = [UseHVX128B] in { 980 defm: Saturates<v128i8, v64i16>; 981} 982