xref: /freebsd/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td (revision 963f5dc7a30624e95d72fb7f87b8892651164e46)
1//===- HexagonPatternsHVX.td - Selection Patterns for HVX --*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9
10def SDTVecUnaryOp:
11  SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
12
13def SDTVecBinOp:
14  SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>;
15
16def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2,
17  [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>;
18def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>;
19
20def SDTHexagonVINSERTW0: SDTypeProfile<1, 2,
21  [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>;
22def HexagonVINSERTW0: SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>;
23
24def HwLen2: SDNodeXForm<imm, [{
25  const auto &ST = static_cast<const HexagonSubtarget&>(CurDAG->getSubtarget());
26  return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32);
27}]>;
28
29def Q2V: OutPatFrag<(ops node:$Qs), (V6_vandqrt $Qs, (A2_tfrsi -1))>;
30
31def Combinev: OutPatFrag<(ops node:$Vs, node:$Vt),
32  (REG_SEQUENCE HvxWR, $Vs, vsub_hi, $Vt, vsub_lo)>;
33
34def Combineq: OutPatFrag<(ops node:$Qs, node:$Qt),
35  (V6_vandvrt
36    (V6_vor
37      (V6_vror (V6_vpackeb (V6_vd0), (Q2V $Qs)),
38               (A2_tfrsi (HwLen2 (i32 0)))),  // Half the vector length
39      (V6_vpackeb (V6_vd0), (Q2V $Qt))),
40    (A2_tfrsi -1))>;
41
42def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>;
43def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>;
44
45def HexagonQCAT:       SDNode<"HexagonISD::QCAT",       SDTVecBinOp>;
46def HexagonQTRUE:      SDNode<"HexagonISD::QTRUE",      SDTVecLeaf>;
47def HexagonQFALSE:     SDNode<"HexagonISD::QFALSE",     SDTVecLeaf>;
48def HexagonVPACKL:     SDNode<"HexagonISD::VPACKL",     SDTVecUnaryOp>;
49def HexagonVUNPACK:    SDNode<"HexagonISD::VUNPACK",    SDTVecUnaryOp>;
50def HexagonVUNPACKU:   SDNode<"HexagonISD::VUNPACKU",   SDTVecUnaryOp>;
51
52def vzero:  PatFrag<(ops), (splat_vector (i32 0))>;
53def qtrue:  PatFrag<(ops), (HexagonQTRUE)>;
54def qfalse: PatFrag<(ops), (HexagonQFALSE)>;
55def qcat:   PatFrag<(ops node:$Qs, node:$Qt),
56                    (HexagonQCAT node:$Qs, node:$Qt)>;
57
58def qnot:     PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>;
59def vpackl:   PatFrag<(ops node:$Vs), (HexagonVPACKL node:$Vs)>;
60def vunpack:  PatFrag<(ops node:$Vs), (HexagonVUNPACK node:$Vs)>;
61def vunpacku: PatFrag<(ops node:$Vs), (HexagonVUNPACKU node:$Vs)>;
62
63def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb  $Vs)>;
64def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh  $Vs)>;
65def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>;
66def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>;
67
68def IsVecOff : PatLeaf<(i32 imm), [{
69  int32_t V = N->getSExtValue();
70  int32_t VecSize = HRI->getSpillSize(Hexagon::HvxVRRegClass);
71  assert(isPowerOf2_32(VecSize));
72  if ((uint32_t(V) & (uint32_t(VecSize)-1)) != 0)
73    return false;
74  int32_t L = Log2_32(VecSize);
75  return isInt<4>(V >> L);
76}]>;
77
78
79def alignedload: PatFrag<(ops node:$a), (load $a), [{
80  return isAlignedMemNode(dyn_cast<MemSDNode>(N));
81}]>;
82
83def unalignedload: PatFrag<(ops node:$a), (load $a), [{
84  return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
85}]>;
86
87def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{
88  return isAlignedMemNode(dyn_cast<MemSDNode>(N));
89}]>;
90
91def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{
92  return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
93}]>;
94
95
96// HVX loads
97
98multiclass HvxLdfi_pat<InstHexagon MI, PatFrag Load, ValueType ResType,
99                       PatFrag ImmPred> {
100  def: Pat<(ResType (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
101           (MI AddrFI:$fi, imm:$Off)>;
102  def: Pat<(ResType (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))),
103           (MI AddrFI:$fi, imm:$Off)>;
104  def: Pat<(ResType (Load AddrFI:$fi)), (ResType (MI AddrFI:$fi, 0))>;
105}
106
107multiclass HvxLdgi_pat<InstHexagon MI, PatFrag Load, ValueType ResType,
108                     PatFrag ImmPred> {
109  def: Pat<(ResType (Load (add I32:$Rt, ImmPred:$Off))),
110           (MI I32:$Rt, imm:$Off)>;
111  def: Pat<(ResType (Load I32:$Rt)),
112           (MI I32:$Rt, 0)>;
113}
114
115multiclass HvxLdc_pat<InstHexagon MI, PatFrag Load, ValueType ResType> {
116  // The HVX selection code for shuffles can generate vector constants.
117  // Calling "Select" on the resulting loads from CP fails without these
118  // patterns.
119  def: Pat<(ResType (Load (HexagonCP tconstpool:$Addr))),
120           (MI (A2_tfrsi imm:$Addr), 0)>;
121  def: Pat<(ResType (Load (HexagonAtPcrel tconstpool:$Addr))),
122           (MI (C4_addipc imm:$Addr), 0)>;
123}
124
125multiclass HvxLd_pat<InstHexagon MI, PatFrag Load, ValueType ResType,
126                     PatFrag ImmPred> {
127  defm: HvxLdfi_pat<MI, Load, ResType, ImmPred>;
128  defm: HvxLdgi_pat<MI, Load, ResType, ImmPred>;
129  defm: HvxLdc_pat <MI, Load, ResType>;
130}
131
132// Aligned loads: everything, plus loads with valignaddr node.
133multiclass HvxLda_pat<InstHexagon MI, PatFrag Load, ValueType ResType,
134                      PatFrag ImmPred> {
135  let AddedComplexity = 50 in {
136    def: Pat<(ResType (Load (valignaddr I32:$Rt))),
137             (MI I32:$Rt, 0)>;
138    def: Pat<(ResType (Load (add (valignaddr I32:$Rt), ImmPred:$Off))),
139             (MI I32:$Rt, imm:$Off)>;
140  }
141  defm: HvxLd_pat<MI, Load, ResType, ImmPred>;
142}
143
144let Predicates = [UseHVX] in {
145  // alignedload will match a non-temporal load as well, so try non-temporal
146  // first.
147  defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI8,  IsVecOff>;
148  defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI16, IsVecOff>;
149  defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI32, IsVecOff>;
150  defm: HvxLda_pat<V6_vL32b_ai,               alignedload, VecI8,  IsVecOff>;
151  defm: HvxLda_pat<V6_vL32b_ai,               alignedload, VecI16, IsVecOff>;
152  defm: HvxLda_pat<V6_vL32b_ai,               alignedload, VecI32, IsVecOff>;
153
154  defm: HvxLd_pat<V6_vL32Ub_ai,             unalignedload, VecI8,  IsVecOff>;
155  defm: HvxLd_pat<V6_vL32Ub_ai,             unalignedload, VecI16, IsVecOff>;
156  defm: HvxLd_pat<V6_vL32Ub_ai,             unalignedload, VecI32, IsVecOff>;
157}
158
159
160// HVX stores
161
162multiclass HvxStfi_pat<InstHexagon MI, PatFrag Store, PatFrag Value,
163                       PatFrag ImmPred> {
164  def: Pat<(Store Value:$Vs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
165           (MI AddrFI:$fi, imm:$Off, Value:$Vs)>;
166  def: Pat<(Store Value:$Vs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)),
167           (MI AddrFI:$fi, imm:$Off, Value:$Vs)>;
168  def: Pat<(Store Value:$Vs, AddrFI:$fi),
169           (MI AddrFI:$fi, 0, Value:$Vs)>;
170}
171
172multiclass HvxStgi_pat<InstHexagon MI, PatFrag Store, PatFrag Value,
173                       PatFrag ImmPred> {
174  def: Pat<(Store Value:$Vs, (add I32:$Rt, ImmPred:$Off)),
175           (MI I32:$Rt, imm:$Off, Value:$Vs)>;
176  def: Pat<(Store Value:$Vs, (IsOrAdd I32:$Rt, ImmPred:$Off)),
177           (MI I32:$Rt, imm:$Off, Value:$Vs)>;
178  def: Pat<(Store Value:$Vs, I32:$Rt),
179           (MI I32:$Rt, 0, Value:$Vs)>;
180}
181
182multiclass HvxSt_pat<InstHexagon MI, PatFrag Store, PatFrag Value,
183                     PatFrag ImmPred> {
184  defm: HvxStfi_pat<MI, Store, Value, ImmPred>;
185  defm: HvxStgi_pat<MI, Store, Value, ImmPred>;
186}
187
188let Predicates = [UseHVX] in {
189  // alignedstore will match a non-temporal store as well, so try non-temporal
190  // first.
191  defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore,  HVI8, IsVecOff>;
192  defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVI16, IsVecOff>;
193  defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVI32, IsVecOff>;
194  defm: HvxSt_pat<V6_vS32b_ai,               alignedstore,  HVI8, IsVecOff>;
195  defm: HvxSt_pat<V6_vS32b_ai,               alignedstore, HVI16, IsVecOff>;
196  defm: HvxSt_pat<V6_vS32b_ai,               alignedstore, HVI32, IsVecOff>;
197  defm: HvxSt_pat<V6_vS32Ub_ai,            unalignedstore,  HVI8, IsVecOff>;
198  defm: HvxSt_pat<V6_vS32Ub_ai,            unalignedstore, HVI16, IsVecOff>;
199  defm: HvxSt_pat<V6_vS32Ub_ai,            unalignedstore, HVI32, IsVecOff>;
200}
201
202// Bitcasts between same-size vector types are no-ops, except for the
203// actual type change.
204let Predicates = [UseHVX] in {
205  defm: NopCast_pat<VecI8,   VecI16,  HvxVR>;
206  defm: NopCast_pat<VecI8,   VecI32,  HvxVR>;
207  defm: NopCast_pat<VecI16,  VecI32,  HvxVR>;
208
209  defm: NopCast_pat<VecPI8,  VecPI16, HvxWR>;
210  defm: NopCast_pat<VecPI8,  VecPI32, HvxWR>;
211  defm: NopCast_pat<VecPI16, VecPI32, HvxWR>;
212}
213
214let Predicates = [UseHVX] in {
215  let AddedComplexity = 100 in {
216    // These should be preferred over a vsplat of 0.
217    def: Pat<(VecI8   vzero), (V6_vd0)>;
218    def: Pat<(VecI16  vzero), (V6_vd0)>;
219    def: Pat<(VecI32  vzero), (V6_vd0)>;
220    def: Pat<(VecPI8  vzero), (PS_vdd0)>;
221    def: Pat<(VecPI16 vzero), (PS_vdd0)>;
222    def: Pat<(VecPI32 vzero), (PS_vdd0)>;
223
224    def: Pat<(concat_vectors  (VecI8 vzero),  (VecI8 vzero)), (PS_vdd0)>;
225    def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>;
226    def: Pat<(concat_vectors (VecI32 vzero), (VecI32 vzero)), (PS_vdd0)>;
227  }
228
229  def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)),
230           (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
231  def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)),
232           (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
233  def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)),
234           (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
235
236  def: Pat<(VecQ8  (qcat HQ16:$Qs, HQ16:$Qt)), (Combineq $Qt, $Qs)>;
237  def: Pat<(VecQ16 (qcat HQ32:$Qs, HQ32:$Qt)), (Combineq $Qt, $Qs)>;
238
239  def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs),
240           (V6_extractw HvxVR:$Vu, I32:$Rs)>;
241  def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs),
242           (V6_extractw HvxVR:$Vu, I32:$Rs)>;
243  def: Pat<(HexagonVEXTRACTW HVI32:$Vu, I32:$Rs),
244           (V6_extractw HvxVR:$Vu, I32:$Rs)>;
245
246  def: Pat<(HexagonVINSERTW0 HVI8:$Vu,  I32:$Rt),
247           (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
248  def: Pat<(HexagonVINSERTW0 HVI16:$Vu, I32:$Rt),
249           (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
250  def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt),
251           (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
252}
253
254// Splats for HvxV60
255def V60splatib: OutPatFrag<(ops node:$V),  (V6_lvsplatw (ToI32 (SplatB $V)))>;
256def V60splatih: OutPatFrag<(ops node:$V),  (V6_lvsplatw (ToI32 (SplatH $V)))>;
257def V60splatiw: OutPatFrag<(ops node:$V),  (V6_lvsplatw (ToI32 $V))>;
258def V60splatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatw (S2_vsplatrb $Rs))>;
259def V60splatrh: OutPatFrag<(ops node:$Rs),
260                           (V6_lvsplatw (A2_combine_ll $Rs, $Rs))>;
261def V60splatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>;
262
263// Splats for HvxV62+
264def V62splatib: OutPatFrag<(ops node:$V),  (V6_lvsplatb (ToI32 $V))>;
265def V62splatih: OutPatFrag<(ops node:$V),  (V6_lvsplath (ToI32 $V))>;
266def V62splatiw: OutPatFrag<(ops node:$V),  (V6_lvsplatw (ToI32 $V))>;
267def V62splatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatb $Rs)>;
268def V62splatrh: OutPatFrag<(ops node:$Rs), (V6_lvsplath $Rs)>;
269def V62splatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>;
270
271def Rep: OutPatFrag<(ops node:$N), (Combinev $N, $N)>;
272
273let Predicates = [UseHVX,UseHVXV60] in {
274  let AddedComplexity = 10 in {
275    def: Pat<(VecI8   (splat_vector u8_0ImmPred:$V)),  (V60splatib $V)>;
276    def: Pat<(VecI16  (splat_vector u16_0ImmPred:$V)), (V60splatih $V)>;
277    def: Pat<(VecI32  (splat_vector anyimm:$V)),       (V60splatiw $V)>;
278    def: Pat<(VecPI8  (splat_vector u8_0ImmPred:$V)),  (Rep (V60splatib $V))>;
279    def: Pat<(VecPI16 (splat_vector u16_0ImmPred:$V)), (Rep (V60splatih $V))>;
280    def: Pat<(VecPI32 (splat_vector anyimm:$V)),       (Rep (V60splatiw $V))>;
281  }
282  def: Pat<(VecI8   (splat_vector I32:$Rs)), (V60splatrb $Rs)>;
283  def: Pat<(VecI16  (splat_vector I32:$Rs)), (V60splatrh $Rs)>;
284  def: Pat<(VecI32  (splat_vector I32:$Rs)), (V60splatrw $Rs)>;
285  def: Pat<(VecPI8  (splat_vector I32:$Rs)), (Rep (V60splatrb $Rs))>;
286  def: Pat<(VecPI16 (splat_vector I32:$Rs)), (Rep (V60splatrh $Rs))>;
287  def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V60splatrw $Rs))>;
288}
289let Predicates = [UseHVX,UseHVXV62] in {
290  let AddedComplexity = 30 in {
291    def: Pat<(VecI8   (splat_vector u8_0ImmPred:$V)),  (V62splatib imm:$V)>;
292    def: Pat<(VecI16  (splat_vector u16_0ImmPred:$V)), (V62splatih imm:$V)>;
293    def: Pat<(VecI32  (splat_vector anyimm:$V)),       (V62splatiw imm:$V)>;
294    def: Pat<(VecPI8  (splat_vector u8_0ImmPred:$V)),
295             (Rep (V62splatib imm:$V))>;
296    def: Pat<(VecPI16 (splat_vector u16_0ImmPred:$V)),
297             (Rep (V62splatih imm:$V))>;
298    def: Pat<(VecPI32 (splat_vector anyimm:$V)),
299             (Rep (V62splatiw imm:$V))>;
300  }
301  let AddedComplexity = 20 in {
302    def: Pat<(VecI8   (splat_vector I32:$Rs)), (V62splatrb $Rs)>;
303    def: Pat<(VecI16  (splat_vector I32:$Rs)), (V62splatrh $Rs)>;
304    def: Pat<(VecI32  (splat_vector I32:$Rs)), (V62splatrw $Rs)>;
305    def: Pat<(VecPI8  (splat_vector I32:$Rs)), (Rep (V62splatrb $Rs))>;
306    def: Pat<(VecPI16 (splat_vector I32:$Rs)), (Rep (V62splatrh $Rs))>;
307    def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V62splatrw $Rs))>;
308  }
309}
310
311class Vneg1<ValueType VecTy>
312  : PatFrag<(ops), (VecTy (splat_vector (i32 -1)))>;
313
314class Vnot<ValueType VecTy>
315  : PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>;
316
317let Predicates = [UseHVX] in {
318  let AddedComplexity = 200 in {
319    def: Pat<(Vnot<VecI8>   HVI8:$Vs), (V6_vnot HvxVR:$Vs)>;
320    def: Pat<(Vnot<VecI16> HVI16:$Vs), (V6_vnot HvxVR:$Vs)>;
321    def: Pat<(Vnot<VecI32> HVI32:$Vs), (V6_vnot HvxVR:$Vs)>;
322  }
323
324  def: OpR_RR_pat<V6_vaddb,    Add,   VecI8,  HVI8>;
325  def: OpR_RR_pat<V6_vaddh,    Add,  VecI16, HVI16>;
326  def: OpR_RR_pat<V6_vaddw,    Add,  VecI32, HVI32>;
327  def: OpR_RR_pat<V6_vaddb_dv, Add,  VecPI8,  HWI8>;
328  def: OpR_RR_pat<V6_vaddh_dv, Add, VecPI16, HWI16>;
329  def: OpR_RR_pat<V6_vaddw_dv, Add, VecPI32, HWI32>;
330  def: OpR_RR_pat<V6_vsubb,    Sub,   VecI8,  HVI8>;
331  def: OpR_RR_pat<V6_vsubh,    Sub,  VecI16, HVI16>;
332  def: OpR_RR_pat<V6_vsubw,    Sub,  VecI32, HVI32>;
333  def: OpR_RR_pat<V6_vsubb_dv, Sub,  VecPI8,  HWI8>;
334  def: OpR_RR_pat<V6_vsubh_dv, Sub, VecPI16, HWI16>;
335  def: OpR_RR_pat<V6_vsubw_dv, Sub, VecPI32, HWI32>;
336  def: OpR_RR_pat<V6_vand,     And,   VecI8,  HVI8>;
337  def: OpR_RR_pat<V6_vand,     And,  VecI16, HVI16>;
338  def: OpR_RR_pat<V6_vand,     And,  VecI32, HVI32>;
339  def: OpR_RR_pat<V6_vor,       Or,   VecI8,  HVI8>;
340  def: OpR_RR_pat<V6_vor,       Or,  VecI16, HVI16>;
341  def: OpR_RR_pat<V6_vor,       Or,  VecI32, HVI32>;
342  def: OpR_RR_pat<V6_vxor,     Xor,   VecI8,  HVI8>;
343  def: OpR_RR_pat<V6_vxor,     Xor,  VecI16, HVI16>;
344  def: OpR_RR_pat<V6_vxor,     Xor,  VecI32, HVI32>;
345
346  def: OpR_RR_pat<V6_vminb,   Smin,   VecI8,  HVI8>;
347  def: OpR_RR_pat<V6_vmaxb,   Smax,   VecI8,  HVI8>;
348  def: OpR_RR_pat<V6_vminub,  Umin,   VecI8,  HVI8>;
349  def: OpR_RR_pat<V6_vmaxub,  Umax,   VecI8,  HVI8>;
350  def: OpR_RR_pat<V6_vminh,   Smin,  VecI16, HVI16>;
351  def: OpR_RR_pat<V6_vmaxh,   Smax,  VecI16, HVI16>;
352  def: OpR_RR_pat<V6_vminuh,  Umin,  VecI16, HVI16>;
353  def: OpR_RR_pat<V6_vmaxuh,  Umax,  VecI16, HVI16>;
354  def: OpR_RR_pat<V6_vminw,   Smin,  VecI32, HVI32>;
355  def: OpR_RR_pat<V6_vmaxw,   Smax,  VecI32, HVI32>;
356
357  def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt),
358           (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
359  def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt),
360           (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
361  def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt),
362           (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
363
364  def: Pat<(vselect (qnot HQ8:$Qu), HVI8:$Vs, HVI8:$Vt),
365           (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
366  def: Pat<(vselect (qnot HQ16:$Qu), HVI16:$Vs, HVI16:$Vt),
367           (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
368  def: Pat<(vselect (qnot HQ32:$Qu), HVI32:$Vs, HVI32:$Vt),
369           (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
370}
371
372let Predicates = [UseHVX] in {
373  // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
374  // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
375  // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
376  def: Pat<(mul HVI8:$Vs, HVI8:$Vt),
377           (V6_vshuffeb (HiVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)),
378                        (LoVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)))>;
379  def: Pat<(mul HVI16:$Vs, HVI16:$Vt),
380           (V6_vmpyih HvxVR:$Vs, HvxVR:$Vt)>;
381  def: Pat<(mul HVI32:$Vs, HVI32:$Vt),
382           (V6_vmpyiewuh_acc (V6_vmpyieoh HvxVR:$Vs, HvxVR:$Vt),
383                             HvxVR:$Vs, HvxVR:$Vt)>;
384}
385
386let Predicates = [UseHVX] in {
387  def: Pat<(VecPI16 (sext HVI8:$Vs)),  (VSxtb $Vs)>;
388  def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>;
389  def: Pat<(VecPI16 (zext HVI8:$Vs)),  (VZxtb $Vs)>;
390  def: Pat<(VecPI32 (zext HVI16:$Vs)), (VZxth $Vs)>;
391
392  def: Pat<(VecI16 (sext_invec HVI8:$Vs)),  (LoVec (VSxtb $Vs))>;
393  def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (VSxth $Vs))>;
394  def: Pat<(VecI32 (sext_invec HVI8:$Vs)),
395           (LoVec (VSxth (LoVec (VSxtb $Vs))))>;
396  def: Pat<(VecPI16 (sext_invec HWI8:$Vss)),  (VSxtb (LoVec $Vss))>;
397  def: Pat<(VecPI32 (sext_invec HWI16:$Vss)), (VSxth (LoVec $Vss))>;
398  def: Pat<(VecPI32 (sext_invec HWI8:$Vss)),
399           (VSxth (LoVec (VSxtb (LoVec $Vss))))>;
400
401  def: Pat<(VecI16 (zext_invec HVI8:$Vs)),  (LoVec (VZxtb $Vs))>;
402  def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (VZxth $Vs))>;
403  def: Pat<(VecI32 (zext_invec HVI8:$Vs)),
404           (LoVec (VZxth (LoVec (VZxtb $Vs))))>;
405  def: Pat<(VecPI16 (zext_invec HWI8:$Vss)),  (VZxtb (LoVec $Vss))>;
406  def: Pat<(VecPI32 (zext_invec HWI16:$Vss)), (VZxth (LoVec $Vss))>;
407  def: Pat<(VecPI32 (zext_invec HWI8:$Vss)),
408           (VZxth (LoVec (VZxtb (LoVec $Vss))))>;
409
410  def: Pat<(VecI8 (trunc HWI16:$Vss)),
411           (V6_vpackeb (HiVec $Vss), (LoVec $Vss))>;
412  def: Pat<(VecI16 (trunc HWI32:$Vss)),
413           (V6_vpackeh (HiVec $Vss), (LoVec $Vss))>;
414
415  def: Pat<(VecQ8 (trunc HVI8:$Vs)),
416           (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>;
417  def: Pat<(VecQ16 (trunc HVI16:$Vs)),
418           (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>;
419  def: Pat<(VecQ32 (trunc HVI32:$Vs)),
420           (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>;
421}
422
423let Predicates = [UseHVX] in {
424  // The "source" types are not legal, and there are no parameterized
425  // definitions for them, but they are length-specific.
426  let Predicates = [UseHVX,UseHVX64B] in {
427    def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v32i8)),
428             (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>;
429    def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i8)),
430             (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>;
431    def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i16)),
432             (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>;
433  }
434  let Predicates = [UseHVX,UseHVX128B] in {
435    def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v64i8)),
436             (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>;
437    def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i8)),
438             (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>;
439    def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i16)),
440             (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>;
441  }
442
443  // Take a pair of vectors Vt:Vs and shift them towards LSB by (Rt & HwLen).
444  def: Pat<(VecI8 (valign HVI8:$Vt, HVI8:$Vs, I32:$Rt)),
445           (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>;
446  def: Pat<(VecI16 (valign HVI16:$Vt, HVI16:$Vs, I32:$Rt)),
447           (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>;
448  def: Pat<(VecI32 (valign HVI32:$Vt, HVI32:$Vs, I32:$Rt)),
449           (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>;
450
451  def: Pat<(HexagonVASL HVI8:$Vs, I32:$Rt),
452           (V6_vpackeb (V6_vaslh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt),
453                       (V6_vaslh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>;
454  def: Pat<(HexagonVASR HVI8:$Vs, I32:$Rt),
455           (V6_vpackeb (V6_vasrh (HiVec (VSxtb HvxVR:$Vs)), I32:$Rt),
456                       (V6_vasrh (LoVec (VSxtb HvxVR:$Vs)), I32:$Rt))>;
457  def: Pat<(HexagonVLSR HVI8:$Vs, I32:$Rt),
458           (V6_vpackeb (V6_vlsrh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt),
459                       (V6_vlsrh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>;
460
461  def: Pat<(HexagonVASL HVI16:$Vs, I32:$Rt), (V6_vaslh HvxVR:$Vs, I32:$Rt)>;
462  def: Pat<(HexagonVASL HVI32:$Vs, I32:$Rt), (V6_vaslw HvxVR:$Vs, I32:$Rt)>;
463  def: Pat<(HexagonVASR HVI16:$Vs, I32:$Rt), (V6_vasrh HvxVR:$Vs, I32:$Rt)>;
464  def: Pat<(HexagonVASR HVI32:$Vs, I32:$Rt), (V6_vasrw HvxVR:$Vs, I32:$Rt)>;
465  def: Pat<(HexagonVLSR HVI16:$Vs, I32:$Rt), (V6_vlsrh HvxVR:$Vs, I32:$Rt)>;
466  def: Pat<(HexagonVLSR HVI32:$Vs, I32:$Rt), (V6_vlsrw HvxVR:$Vs, I32:$Rt)>;
467
468  def: Pat<(add HVI32:$Vx, (HexagonVASL HVI32:$Vu, I32:$Rt)),
469           (V6_vaslw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>;
470  def: Pat<(add HVI32:$Vx, (HexagonVASR HVI32:$Vu, I32:$Rt)),
471           (V6_vasrw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>;
472
473  def: Pat<(shl HVI16:$Vs, HVI16:$Vt), (V6_vaslhv HvxVR:$Vs, HvxVR:$Vt)>;
474  def: Pat<(shl HVI32:$Vs, HVI32:$Vt), (V6_vaslwv HvxVR:$Vs, HvxVR:$Vt)>;
475  def: Pat<(sra HVI16:$Vs, HVI16:$Vt), (V6_vasrhv HvxVR:$Vs, HvxVR:$Vt)>;
476  def: Pat<(sra HVI32:$Vs, HVI32:$Vt), (V6_vasrwv HvxVR:$Vs, HvxVR:$Vt)>;
477  def: Pat<(srl HVI16:$Vs, HVI16:$Vt), (V6_vlsrhv HvxVR:$Vs, HvxVR:$Vt)>;
478  def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>;
479
480  // Vpackl is a pseudo-op that is used when legalizing widened truncates.
481  // It should never be produced with a register pair in the output, but
482  // it can happen to have a pair as an input.
483  def: Pat<(VecI8  (vpackl HVI16:$Vs)), (V6_vdealb HvxVR:$Vs)>;
484  def: Pat<(VecI8  (vpackl HVI32:$Vs)), (V6_vdealb4w (IMPLICIT_DEF), HvxVR:$Vs)>;
485  def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>;
486  def: Pat<(VecI8  (vpackl HWI16:$Vs)), (V6_vpackeb (HiVec $Vs), (LoVec $Vs))>;
487  def: Pat<(VecI8  (vpackl HWI32:$Vs)),
488           (V6_vpackeb (IMPLICIT_DEF), (V6_vpackeh (HiVec $Vs), (LoVec $Vs)))>;
489  def: Pat<(VecI16 (vpackl HWI32:$Vs)), (V6_vpackeh (HiVec $Vs), (LoVec $Vs))>;
490
491  def: Pat<(VecI16  (vunpack   HVI8:$Vs)), (LoVec (VSxtb $Vs))>;
492  def: Pat<(VecI32  (vunpack   HVI8:$Vs)), (LoVec (VSxth (LoVec (VSxtb $Vs))))>;
493  def: Pat<(VecI32  (vunpack  HVI16:$Vs)), (LoVec (VSxth $Vs))>;
494  def: Pat<(VecPI16 (vunpack   HVI8:$Vs)), (VSxtb $Vs)>;
495  def: Pat<(VecPI32 (vunpack   HVI8:$Vs)), (VSxth (LoVec (VSxtb $Vs)))>;
496  def: Pat<(VecPI32 (vunpack  HVI32:$Vs)), (VSxth $Vs)>;
497
498  def: Pat<(VecI16  (vunpacku  HVI8:$Vs)), (LoVec (VZxtb $Vs))>;
499  def: Pat<(VecI32  (vunpacku  HVI8:$Vs)), (LoVec (VZxth (LoVec (VZxtb $Vs))))>;
500  def: Pat<(VecI32  (vunpacku HVI16:$Vs)), (LoVec (VZxth $Vs))>;
501  def: Pat<(VecPI16 (vunpacku  HVI8:$Vs)), (VZxtb $Vs)>;
502  def: Pat<(VecPI32 (vunpacku  HVI8:$Vs)), (VZxth (LoVec (VZxtb $Vs)))>;
503  def: Pat<(VecPI32 (vunpacku HVI32:$Vs)), (VZxth $Vs)>;
504
505  let Predicates = [UseHVX,UseHVXV60] in {
506    def: Pat<(VecI16 (bswap HVI16:$Vs)),
507             (V6_vdelta HvxVR:$Vs, (V60splatib (i32 0x01)))>;
508    def: Pat<(VecI32 (bswap HVI32:$Vs)),
509             (V6_vdelta HvxVR:$Vs, (V60splatib (i32 0x03)))>;
510  }
511  let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in {
512    def: Pat<(VecI16 (bswap HVI16:$Vs)),
513             (V6_vdelta HvxVR:$Vs, (V62splatib (i32 0x01)))>;
514    def: Pat<(VecI32 (bswap HVI32:$Vs)),
515             (V6_vdelta HvxVR:$Vs, (V62splatib (i32 0x03)))>;
516  }
517
518  def: Pat<(VecI8 (ctpop HVI8:$Vs)),
519           (V6_vpackeb (V6_vpopcounth (HiVec (V6_vunpackub HvxVR:$Vs))),
520                       (V6_vpopcounth (LoVec (V6_vunpackub HvxVR:$Vs))))>;
521  def: Pat<(VecI16 (ctpop HVI16:$Vs)), (V6_vpopcounth HvxVR:$Vs)>;
522  def: Pat<(VecI32 (ctpop HVI32:$Vs)),
523           (V6_vaddw (LoVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))),
524                     (HiVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))))>;
525
526  let Predicates = [UseHVX,UseHVXV60] in
527  def: Pat<(VecI8 (ctlz HVI8:$Vs)),
528           (V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))),
529                                 (V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))),
530                     (V60splatib (i32 0x08)))>;
531  let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in
532  def: Pat<(VecI8 (ctlz HVI8:$Vs)),
533           (V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))),
534                                 (V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))),
535                     (V62splatib (i32 0x08)))>;
536
537  def: Pat<(VecI16 (ctlz HVI16:$Vs)), (V6_vcl0h HvxVR:$Vs)>;
538  def: Pat<(VecI32 (ctlz HVI32:$Vs)), (V6_vcl0w HvxVR:$Vs)>;
539}
540
541class HvxSel_pat<InstHexagon MI, PatFrag RegPred>
542  : Pat<(select I1:$Pu, RegPred:$Vs, RegPred:$Vt),
543        (MI I1:$Pu, RegPred:$Vs, RegPred:$Vt)>;
544
545let Predicates = [UseHVX] in {
546  def: HvxSel_pat<PS_vselect, HVI8>;
547  def: HvxSel_pat<PS_vselect, HVI16>;
548  def: HvxSel_pat<PS_vselect, HVI32>;
549  def: HvxSel_pat<PS_wselect, HWI8>;
550  def: HvxSel_pat<PS_wselect, HWI16>;
551  def: HvxSel_pat<PS_wselect, HWI32>;
552}
553
554let Predicates = [UseHVX] in {
555  def: Pat<(VecQ8   (qtrue)), (PS_qtrue)>;
556  def: Pat<(VecQ16  (qtrue)), (PS_qtrue)>;
557  def: Pat<(VecQ32  (qtrue)), (PS_qtrue)>;
558  def: Pat<(VecQ8  (qfalse)), (PS_qfalse)>;
559  def: Pat<(VecQ16 (qfalse)), (PS_qfalse)>;
560  def: Pat<(VecQ32 (qfalse)), (PS_qfalse)>;
561
562  def: Pat<(vnot  HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>;
563  def: Pat<(vnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>;
564  def: Pat<(vnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>;
565  def: Pat<(qnot  HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>;
566  def: Pat<(qnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>;
567  def: Pat<(qnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>;
568
569  def: OpR_RR_pat<V6_pred_and,  And,  VecQ8,   HQ8>;
570  def: OpR_RR_pat<V6_pred_and,  And, VecQ16,  HQ16>;
571  def: OpR_RR_pat<V6_pred_and,  And, VecQ32,  HQ32>;
572  def: OpR_RR_pat<V6_pred_or,    Or,  VecQ8,   HQ8>;
573  def: OpR_RR_pat<V6_pred_or,    Or, VecQ16,  HQ16>;
574  def: OpR_RR_pat<V6_pred_or,    Or, VecQ32,  HQ32>;
575  def: OpR_RR_pat<V6_pred_xor,  Xor,  VecQ8,   HQ8>;
576  def: OpR_RR_pat<V6_pred_xor,  Xor, VecQ16,  HQ16>;
577  def: OpR_RR_pat<V6_pred_xor,  Xor, VecQ32,  HQ32>;
578
579  def: OpR_RR_pat<V6_pred_and_n,  VNot2<And, qnot>,  VecQ8,   HQ8>;
580  def: OpR_RR_pat<V6_pred_and_n,  VNot2<And, qnot>, VecQ16,  HQ16>;
581  def: OpR_RR_pat<V6_pred_and_n,  VNot2<And, qnot>, VecQ32,  HQ32>;
582  def: OpR_RR_pat<V6_pred_or_n,    VNot2<Or, qnot>,  VecQ8,   HQ8>;
583  def: OpR_RR_pat<V6_pred_or_n,    VNot2<Or, qnot>, VecQ16,  HQ16>;
584  def: OpR_RR_pat<V6_pred_or_n,    VNot2<Or, qnot>, VecQ32,  HQ32>;
585
586  def: OpR_RR_pat<V6_veqb,      seteq,  VecQ8,  HVI8>;
587  def: OpR_RR_pat<V6_veqh,      seteq, VecQ16, HVI16>;
588  def: OpR_RR_pat<V6_veqw,      seteq, VecQ32, HVI32>;
589  def: OpR_RR_pat<V6_vgtb,      setgt,  VecQ8,  HVI8>;
590  def: OpR_RR_pat<V6_vgth,      setgt, VecQ16, HVI16>;
591  def: OpR_RR_pat<V6_vgtw,      setgt, VecQ32, HVI32>;
592  def: OpR_RR_pat<V6_vgtub,    setugt,  VecQ8,  HVI8>;
593  def: OpR_RR_pat<V6_vgtuh,    setugt, VecQ16, HVI16>;
594  def: OpR_RR_pat<V6_vgtuw,    setugt, VecQ32, HVI32>;
595
596  def: AccRRR_pat<V6_veqb_and,    And,  seteq,    HQ8,  HVI8,  HVI8>;
597  def: AccRRR_pat<V6_veqb_or,      Or,  seteq,    HQ8,  HVI8,  HVI8>;
598  def: AccRRR_pat<V6_veqb_xor,    Xor,  seteq,    HQ8,  HVI8,  HVI8>;
599  def: AccRRR_pat<V6_veqh_and,    And,  seteq,   HQ16, HVI16, HVI16>;
600  def: AccRRR_pat<V6_veqh_or,      Or,  seteq,   HQ16, HVI16, HVI16>;
601  def: AccRRR_pat<V6_veqh_xor,    Xor,  seteq,   HQ16, HVI16, HVI16>;
602  def: AccRRR_pat<V6_veqw_and,    And,  seteq,   HQ32, HVI32, HVI32>;
603  def: AccRRR_pat<V6_veqw_or,      Or,  seteq,   HQ32, HVI32, HVI32>;
604  def: AccRRR_pat<V6_veqw_xor,    Xor,  seteq,   HQ32, HVI32, HVI32>;
605
606  def: AccRRR_pat<V6_vgtb_and,    And,  setgt,    HQ8,  HVI8,  HVI8>;
607  def: AccRRR_pat<V6_vgtb_or,      Or,  setgt,    HQ8,  HVI8,  HVI8>;
608  def: AccRRR_pat<V6_vgtb_xor,    Xor,  setgt,    HQ8,  HVI8,  HVI8>;
609  def: AccRRR_pat<V6_vgth_and,    And,  setgt,   HQ16, HVI16, HVI16>;
610  def: AccRRR_pat<V6_vgth_or,      Or,  setgt,   HQ16, HVI16, HVI16>;
611  def: AccRRR_pat<V6_vgth_xor,    Xor,  setgt,   HQ16, HVI16, HVI16>;
612  def: AccRRR_pat<V6_vgtw_and,    And,  setgt,   HQ32, HVI32, HVI32>;
613  def: AccRRR_pat<V6_vgtw_or,      Or,  setgt,   HQ32, HVI32, HVI32>;
614  def: AccRRR_pat<V6_vgtw_xor,    Xor,  setgt,   HQ32, HVI32, HVI32>;
615
616  def: AccRRR_pat<V6_vgtub_and,   And, setugt,    HQ8,  HVI8,  HVI8>;
617  def: AccRRR_pat<V6_vgtub_or,     Or, setugt,    HQ8,  HVI8,  HVI8>;
618  def: AccRRR_pat<V6_vgtub_xor,   Xor, setugt,    HQ8,  HVI8,  HVI8>;
619  def: AccRRR_pat<V6_vgtuh_and,   And, setugt,   HQ16, HVI16, HVI16>;
620  def: AccRRR_pat<V6_vgtuh_or,     Or, setugt,   HQ16, HVI16, HVI16>;
621  def: AccRRR_pat<V6_vgtuh_xor,   Xor, setugt,   HQ16, HVI16, HVI16>;
622  def: AccRRR_pat<V6_vgtuw_and,   And, setugt,   HQ32, HVI32, HVI32>;
623  def: AccRRR_pat<V6_vgtuw_or,     Or, setugt,   HQ32, HVI32, HVI32>;
624  def: AccRRR_pat<V6_vgtuw_xor,   Xor, setugt,   HQ32, HVI32, HVI32>;
625}
626