xref: /freebsd/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td (revision e32fecd0c2c3ee37c47ee100f169e7eb0282a873)
1//===- HexagonPatternsHVX.td - Selection Patterns for HVX --*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def HQ8:    PatLeaf<(VecQ8   HvxQR:$R)>;
10def HQ16:   PatLeaf<(VecQ16  HvxQR:$R)>;
11def HQ32:   PatLeaf<(VecQ32  HvxQR:$R)>;
12
13def HVI8:   PatLeaf<(VecI8   HvxVR:$R)>;
14def HVI16:  PatLeaf<(VecI16  HvxVR:$R)>;
15def HVI32:  PatLeaf<(VecI32  HvxVR:$R)>;
16def HVF16:  PatLeaf<(VecF16  HvxVR:$R)>;
17def HVF32:  PatLeaf<(VecF32  HvxVR:$R)>;
18
19def HWI8:   PatLeaf<(VecPI8  HvxWR:$R)>;
20def HWI16:  PatLeaf<(VecPI16 HvxWR:$R)>;
21def HWI32:  PatLeaf<(VecPI32 HvxWR:$R)>;
22def HWF16:  PatLeaf<(VecPF16 HvxWR:$R)>;
23def HWF32:  PatLeaf<(VecPF32 HvxWR:$R)>;
24
25def SDTVecUnaryOp:
26  SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
27
28def SDTVecBinOp:
29  SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>;
30
31def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2,
32  [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>;
33def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>;
34
35def SDTHexagonVINSERTW0: SDTypeProfile<1, 2,
36  [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>;
37def HexagonVINSERTW0: SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>;
38
39def HwLen2: SDNodeXForm<imm, [{
40  const auto &ST = CurDAG->getSubtarget<HexagonSubtarget>();
41  return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32);
42}]>;
43
44def Q2V: OutPatFrag<(ops node:$Qs), (V6_vandqrt $Qs, (A2_tfrsi -1))>;
45
46def Combinev: OutPatFrag<(ops node:$Vs, node:$Vt),
47  (REG_SEQUENCE HvxWR, $Vs, vsub_hi, $Vt, vsub_lo)>;
48
49def Combineq: OutPatFrag<(ops node:$Qs, node:$Qt),
50  (V6_vandvrt
51    (V6_vor
52      (V6_vror (V6_vpackeb (V6_vd0), (Q2V $Qs)),
53               (A2_tfrsi (HwLen2 (i32 0)))),  // Half the vector length
54      (V6_vpackeb (V6_vd0), (Q2V $Qt))),
55    (A2_tfrsi -1))>;
56
57def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>;
58def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>;
59
60def HexagonQCAT:       SDNode<"HexagonISD::QCAT",       SDTVecBinOp>;
61def HexagonQTRUE:      SDNode<"HexagonISD::QTRUE",      SDTVecLeaf>;
62def HexagonQFALSE:     SDNode<"HexagonISD::QFALSE",     SDTVecLeaf>;
63def HexagonVPACKL:     SDNode<"HexagonISD::VPACKL",     SDTVecUnaryOp>;
64def HexagonVUNPACK:    SDNode<"HexagonISD::VUNPACK",    SDTVecUnaryOp>;
65def HexagonVUNPACKU:   SDNode<"HexagonISD::VUNPACKU",   SDTVecUnaryOp>;
66
67def vzero:  PatFrags<(ops), [(splat_vector (i32 0)), (splat_vector (f32zero))]>;
68def qtrue:  PatFrag<(ops), (HexagonQTRUE)>;
69def qfalse: PatFrag<(ops), (HexagonQFALSE)>;
70def qcat:   PatFrag<(ops node:$Qs, node:$Qt),
71                    (HexagonQCAT node:$Qs, node:$Qt)>;
72
73def qnot:     PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>;
74def vpackl:   PatFrag<(ops node:$Vs), (HexagonVPACKL node:$Vs)>;
75def vunpack:  PatFrag<(ops node:$Vs), (HexagonVUNPACK node:$Vs)>;
76def vunpacku: PatFrag<(ops node:$Vs), (HexagonVUNPACKU node:$Vs)>;
77
78def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb  $Vs)>;
79def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh  $Vs)>;
80def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>;
81def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>;
82
83def IsVecOff : PatLeaf<(i32 imm), [{
84  int32_t V = N->getSExtValue();
85  int32_t VecSize = HRI->getSpillSize(Hexagon::HvxVRRegClass);
86  assert(isPowerOf2_32(VecSize));
87  if ((uint32_t(V) & (uint32_t(VecSize)-1)) != 0)
88    return false;
89  int32_t L = Log2_32(VecSize);
90  return isInt<4>(V >> L);
91}]>;
92
93
94def alignedload: PatFrag<(ops node:$a), (load $a), [{
95  return isAlignedMemNode(cast<MemSDNode>(N));
96}]>;
97
98def unalignedload: PatFrag<(ops node:$a), (load $a), [{
99  return !isAlignedMemNode(cast<MemSDNode>(N));
100}]>;
101
102def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{
103  return isAlignedMemNode(cast<MemSDNode>(N));
104}]>;
105
106def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{
107  return !isAlignedMemNode(cast<MemSDNode>(N));
108}]>;
109
110
111// HVX loads
112
113multiclass HvxLdfi_pat<InstHexagon MI, PatFrag Load, ValueType ResType,
114                       PatFrag ImmPred> {
115  def: Pat<(ResType (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
116           (MI AddrFI:$fi, imm:$Off)>;
117  def: Pat<(ResType (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))),
118           (MI AddrFI:$fi, imm:$Off)>;
119  def: Pat<(ResType (Load AddrFI:$fi)), (ResType (MI AddrFI:$fi, 0))>;
120}
121
122multiclass HvxLdgi_pat<InstHexagon MI, PatFrag Load, ValueType ResType,
123                     PatFrag ImmPred> {
124  def: Pat<(ResType (Load (add I32:$Rt, ImmPred:$Off))),
125           (MI I32:$Rt, imm:$Off)>;
126  def: Pat<(ResType (Load I32:$Rt)),
127           (MI I32:$Rt, 0)>;
128}
129
130multiclass HvxLdc_pat<InstHexagon MI, PatFrag Load, ValueType ResType> {
131  // The HVX selection code for shuffles can generate vector constants.
132  // Calling "Select" on the resulting loads from CP fails without these
133  // patterns.
134  def: Pat<(ResType (Load (HexagonCP tconstpool:$Addr))),
135           (MI (A2_tfrsi imm:$Addr), 0)>;
136  def: Pat<(ResType (Load (HexagonAtPcrel tconstpool:$Addr))),
137           (MI (C4_addipc imm:$Addr), 0)>;
138}
139
140multiclass HvxLd_pat<InstHexagon MI, PatFrag Load, ValueType ResType,
141                     PatFrag ImmPred> {
142  defm: HvxLdfi_pat<MI, Load, ResType, ImmPred>;
143  defm: HvxLdgi_pat<MI, Load, ResType, ImmPred>;
144  defm: HvxLdc_pat <MI, Load, ResType>;
145}
146
147// Aligned loads: everything, plus loads with valignaddr node.
148multiclass HvxLda_pat<InstHexagon MI, PatFrag Load, ValueType ResType,
149                      PatFrag ImmPred> {
150  let AddedComplexity = 50 in {
151    def: Pat<(ResType (Load (valignaddr I32:$Rt))),
152             (MI I32:$Rt, 0)>;
153    def: Pat<(ResType (Load (add (valignaddr I32:$Rt), ImmPred:$Off))),
154             (MI I32:$Rt, imm:$Off)>;
155  }
156  defm: HvxLd_pat<MI, Load, ResType, ImmPred>;
157}
158
159let Predicates = [UseHVX] in {
160  // alignedload will match a non-temporal load as well, so try non-temporal
161  // first.
162  defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI8,  IsVecOff>;
163  defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI16, IsVecOff>;
164  defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI32, IsVecOff>;
165  defm: HvxLda_pat<V6_vL32b_ai,               alignedload, VecI8,  IsVecOff>;
166  defm: HvxLda_pat<V6_vL32b_ai,               alignedload, VecI16, IsVecOff>;
167  defm: HvxLda_pat<V6_vL32b_ai,               alignedload, VecI32, IsVecOff>;
168  defm: HvxLd_pat<V6_vL32Ub_ai,             unalignedload, VecI8,  IsVecOff>;
169  defm: HvxLd_pat<V6_vL32Ub_ai,             unalignedload, VecI16, IsVecOff>;
170  defm: HvxLd_pat<V6_vL32Ub_ai,             unalignedload, VecI32, IsVecOff>;
171}
172
173let Predicates = [UseHVXV68] in {
174  defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF16, IsVecOff>;
175  defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF32, IsVecOff>;
176  defm: HvxLda_pat<V6_vL32b_ai,               alignedload, VecF16, IsVecOff>;
177  defm: HvxLda_pat<V6_vL32b_ai,               alignedload, VecF32, IsVecOff>;
178  defm: HvxLd_pat<V6_vL32Ub_ai,             unalignedload, VecF16, IsVecOff>;
179  defm: HvxLd_pat<V6_vL32Ub_ai,             unalignedload, VecF32, IsVecOff>;
180}
181
182// HVX stores
183
184multiclass HvxStfi_pat<InstHexagon MI, PatFrag Store, PatFrag Value,
185                       PatFrag ImmPred> {
186  def: Pat<(Store Value:$Vs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
187           (MI AddrFI:$fi, imm:$Off, Value:$Vs)>;
188  def: Pat<(Store Value:$Vs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)),
189           (MI AddrFI:$fi, imm:$Off, Value:$Vs)>;
190  def: Pat<(Store Value:$Vs, AddrFI:$fi),
191           (MI AddrFI:$fi, 0, Value:$Vs)>;
192}
193
194multiclass HvxStgi_pat<InstHexagon MI, PatFrag Store, PatFrag Value,
195                       PatFrag ImmPred> {
196  def: Pat<(Store Value:$Vs, (add I32:$Rt, ImmPred:$Off)),
197           (MI I32:$Rt, imm:$Off, Value:$Vs)>;
198  def: Pat<(Store Value:$Vs, (IsOrAdd I32:$Rt, ImmPred:$Off)),
199           (MI I32:$Rt, imm:$Off, Value:$Vs)>;
200  def: Pat<(Store Value:$Vs, I32:$Rt),
201           (MI I32:$Rt, 0, Value:$Vs)>;
202}
203
204multiclass HvxSt_pat<InstHexagon MI, PatFrag Store, PatFrag Value,
205                     PatFrag ImmPred> {
206  defm: HvxStfi_pat<MI, Store, Value, ImmPred>;
207  defm: HvxStgi_pat<MI, Store, Value, ImmPred>;
208}
209
210let Predicates = [UseHVX] in {
211  // alignedstore will match a non-temporal store as well, so try non-temporal
212  // first.
213  defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore,  HVI8, IsVecOff>;
214  defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVI16, IsVecOff>;
215  defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVI32, IsVecOff>;
216  defm: HvxSt_pat<V6_vS32b_ai,               alignedstore,  HVI8, IsVecOff>;
217  defm: HvxSt_pat<V6_vS32b_ai,               alignedstore, HVI16, IsVecOff>;
218  defm: HvxSt_pat<V6_vS32b_ai,               alignedstore, HVI32, IsVecOff>;
219  defm: HvxSt_pat<V6_vS32Ub_ai,            unalignedstore,  HVI8, IsVecOff>;
220  defm: HvxSt_pat<V6_vS32Ub_ai,            unalignedstore, HVI16, IsVecOff>;
221  defm: HvxSt_pat<V6_vS32Ub_ai,            unalignedstore, HVI32, IsVecOff>;
222}
223
224let Predicates = [UseHVXV68] in {
225  defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVF16, IsVecOff>;
226  defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVF32, IsVecOff>;
227  defm: HvxSt_pat<V6_vS32b_ai,               alignedstore, HVF16, IsVecOff>;
228  defm: HvxSt_pat<V6_vS32b_ai,               alignedstore, HVF32, IsVecOff>;
229  defm: HvxSt_pat<V6_vS32Ub_ai,            unalignedstore, HVF16, IsVecOff>;
230  defm: HvxSt_pat<V6_vS32Ub_ai,            unalignedstore, HVF32, IsVecOff>;
231}
232
233// Bitcasts between same-size vector types are no-ops, except for the
234// actual type change.
235let Predicates = [UseHVX] in {
236  defm: NopCast_pat<VecI8,   VecI16,  HvxVR>;
237  defm: NopCast_pat<VecI8,   VecI32,  HvxVR>;
238  defm: NopCast_pat<VecI16,  VecI32,  HvxVR>;
239
240  defm: NopCast_pat<VecPI8,  VecPI16, HvxWR>;
241  defm: NopCast_pat<VecPI8,  VecPI32, HvxWR>;
242  defm: NopCast_pat<VecPI16, VecPI32, HvxWR>;
243}
244
245let Predicates = [UseHVX, UseHVXFloatingPoint] in {
246  defm: NopCast_pat<VecI8,   VecF16,  HvxVR>;
247  defm: NopCast_pat<VecI8,   VecF32,  HvxVR>;
248  defm: NopCast_pat<VecI16,  VecF16,  HvxVR>;
249  defm: NopCast_pat<VecI16,  VecF32,  HvxVR>;
250  defm: NopCast_pat<VecI32,  VecF16,  HvxVR>;
251  defm: NopCast_pat<VecI32,  VecF32,  HvxVR>;
252  defm: NopCast_pat<VecF16,  VecF32,  HvxVR>;
253
254  defm: NopCast_pat<VecPI8,  VecPF16, HvxWR>;
255  defm: NopCast_pat<VecPI8,  VecPF32, HvxWR>;
256  defm: NopCast_pat<VecPI16, VecPF16, HvxWR>;
257  defm: NopCast_pat<VecPI16, VecPF32, HvxWR>;
258  defm: NopCast_pat<VecPI32, VecPF16, HvxWR>;
259  defm: NopCast_pat<VecPI32, VecPF32, HvxWR>;
260  defm: NopCast_pat<VecPF16, VecPF32, HvxWR>;
261}
262
263let Predicates = [UseHVX] in {
264  let AddedComplexity = 100 in {
265    // These should be preferred over a vsplat of 0.
266    def: Pat<(VecI8   vzero), (V6_vd0)>;
267    def: Pat<(VecI16  vzero), (V6_vd0)>;
268    def: Pat<(VecI32  vzero), (V6_vd0)>;
269    def: Pat<(VecPI8  vzero), (PS_vdd0)>;
270    def: Pat<(VecPI16 vzero), (PS_vdd0)>;
271    def: Pat<(VecPI32 vzero), (PS_vdd0)>;
272    def: Pat<(VecPF32 vzero), (PS_vdd0)>;
273
274    def: Pat<(concat_vectors  (VecI8 vzero),  (VecI8 vzero)), (PS_vdd0)>;
275    def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>;
276    def: Pat<(concat_vectors (VecI32 vzero), (VecI32 vzero)), (PS_vdd0)>;
277  }
278
279  def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)),
280           (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
281  def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)),
282           (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
283  def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)),
284           (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
285
286  def: Pat<(VecQ8  (qcat HQ16:$Qs, HQ16:$Qt)), (Combineq $Qt, $Qs)>;
287  def: Pat<(VecQ16 (qcat HQ32:$Qs, HQ32:$Qt)), (Combineq $Qt, $Qs)>;
288
289  def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs),
290           (V6_extractw HvxVR:$Vu, I32:$Rs)>;
291  def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs),
292           (V6_extractw HvxVR:$Vu, I32:$Rs)>;
293  def: Pat<(HexagonVEXTRACTW HVI32:$Vu, I32:$Rs),
294           (V6_extractw HvxVR:$Vu, I32:$Rs)>;
295
296  def: Pat<(HexagonVINSERTW0 HVI8:$Vu,  I32:$Rt),
297           (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
298  def: Pat<(HexagonVINSERTW0 HVI16:$Vu, I32:$Rt),
299           (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
300  def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt),
301           (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
302}
303
304let Predicates = [UseHVX, UseHVXFloatingPoint] in {
305  let AddedComplexity = 100 in {
306    def: Pat<(VecF16  vzero), (V6_vd0)>;
307    def: Pat<(VecF32  vzero), (V6_vd0)>;
308    def: Pat<(VecPF16 vzero), (PS_vdd0)>;
309    def: Pat<(VecPF32 vzero), (PS_vdd0)>;
310
311    def: Pat<(concat_vectors (VecF16 vzero), (VecF16 vzero)), (PS_vdd0)>;
312    def: Pat<(concat_vectors (VecF32 vzero), (VecF32 vzero)), (PS_vdd0)>;
313  }
314
315  def: Pat<(VecPF16 (concat_vectors HVF16:$Vs, HVF16:$Vt)),
316           (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
317  def: Pat<(VecPF32 (concat_vectors HVF32:$Vs, HVF32:$Vt)),
318           (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
319
320  def: Pat<(HexagonVINSERTW0 HVF16:$Vu, I32:$Rt),
321           (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
322  def: Pat<(HexagonVINSERTW0 HVF32:$Vu, I32:$Rt),
323           (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
324}
325
326// Splats for HvxV60
327def V60splatib: OutPatFrag<(ops node:$V),  (V6_lvsplatw (ToI32 (SplatB $V)))>;
328def V60splatih: OutPatFrag<(ops node:$V),  (V6_lvsplatw (ToI32 (SplatH $V)))>;
329def V60splatiw: OutPatFrag<(ops node:$V),  (V6_lvsplatw (ToI32 $V))>;
330def V60splatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatw (S2_vsplatrb $Rs))>;
331def V60splatrh: OutPatFrag<(ops node:$Rs),
332                           (V6_lvsplatw (A2_combine_ll $Rs, $Rs))>;
333def V60splatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>;
334
335// Splats for HvxV62+
336def V62splatib: OutPatFrag<(ops node:$V),  (V6_lvsplatb (ToI32 $V))>;
337def V62splatih: OutPatFrag<(ops node:$V),  (V6_lvsplath (ToI32 $V))>;
338def V62splatiw: OutPatFrag<(ops node:$V),  (V6_lvsplatw (ToI32 $V))>;
339def V62splatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatb $Rs)>;
340def V62splatrh: OutPatFrag<(ops node:$Rs), (V6_lvsplath $Rs)>;
341def V62splatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>;
342
343def Rep: OutPatFrag<(ops node:$N), (Combinev $N, $N)>;
344
345let Predicates = [UseHVX,UseHVXV60] in {
346  let AddedComplexity = 10 in {
347    def: Pat<(VecI8   (splat_vector u8_0ImmPred:$V)),  (V60splatib $V)>;
348    def: Pat<(VecI16  (splat_vector u16_0ImmPred:$V)), (V60splatih $V)>;
349    def: Pat<(VecI32  (splat_vector anyimm:$V)),       (V60splatiw $V)>;
350    def: Pat<(VecPI8  (splat_vector u8_0ImmPred:$V)),  (Rep (V60splatib $V))>;
351    def: Pat<(VecPI16 (splat_vector u16_0ImmPred:$V)), (Rep (V60splatih $V))>;
352    def: Pat<(VecPI32 (splat_vector anyimm:$V)),       (Rep (V60splatiw $V))>;
353  }
354  def: Pat<(VecI8   (splat_vector I32:$Rs)), (V60splatrb $Rs)>;
355  def: Pat<(VecI16  (splat_vector I32:$Rs)), (V60splatrh $Rs)>;
356  def: Pat<(VecI32  (splat_vector I32:$Rs)), (V60splatrw $Rs)>;
357  def: Pat<(VecPI8  (splat_vector I32:$Rs)), (Rep (V60splatrb $Rs))>;
358  def: Pat<(VecPI16 (splat_vector I32:$Rs)), (Rep (V60splatrh $Rs))>;
359  def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V60splatrw $Rs))>;
360}
361let Predicates = [UseHVX,UseHVXV62] in {
362  let AddedComplexity = 30 in {
363    def: Pat<(VecI8   (splat_vector u8_0ImmPred:$V)),  (V62splatib imm:$V)>;
364    def: Pat<(VecI16  (splat_vector u16_0ImmPred:$V)), (V62splatih imm:$V)>;
365    def: Pat<(VecI32  (splat_vector anyimm:$V)),       (V62splatiw imm:$V)>;
366    def: Pat<(VecPI8  (splat_vector u8_0ImmPred:$V)),
367             (Rep (V62splatib imm:$V))>;
368    def: Pat<(VecPI16 (splat_vector u16_0ImmPred:$V)),
369             (Rep (V62splatih imm:$V))>;
370    def: Pat<(VecPI32 (splat_vector anyimm:$V)),
371             (Rep (V62splatiw imm:$V))>;
372  }
373  let AddedComplexity = 20 in {
374    def: Pat<(VecI8   (splat_vector I32:$Rs)), (V62splatrb $Rs)>;
375    def: Pat<(VecI16  (splat_vector I32:$Rs)), (V62splatrh $Rs)>;
376    def: Pat<(VecI32  (splat_vector I32:$Rs)), (V62splatrw $Rs)>;
377    def: Pat<(VecPI8  (splat_vector I32:$Rs)), (Rep (V62splatrb $Rs))>;
378    def: Pat<(VecPI16 (splat_vector I32:$Rs)), (Rep (V62splatrh $Rs))>;
379    def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V62splatrw $Rs))>;
380  }
381}
382let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
383  let AddedComplexity = 30 in {
384    def: Pat<(VecF16  (splat_vector u16_0ImmPred:$V)), (V62splatih imm:$V)>;
385    def: Pat<(VecF32  (splat_vector anyint:$V)),       (V62splatiw imm:$V)>;
386    def: Pat<(VecF32  (splat_vector f32ImmPred:$V)),   (V62splatiw (ftoi $V))>;
387  }
388  let AddedComplexity = 20 in {
389    def: Pat<(VecF16  (splat_vector I32:$Rs)), (V62splatrh $Rs)>;
390    def: Pat<(VecF32  (splat_vector I32:$Rs)), (V62splatrw $Rs)>;
391    def: Pat<(VecF32  (splat_vector F32:$Rs)), (V62splatrw $Rs)>;
392  }
393}
394
395class Vneg1<ValueType VecTy>
396  : PatFrag<(ops), (VecTy (splat_vector (i32 -1)))>;
397
398class Vnot<ValueType VecTy>
399  : PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>;
400
401let Predicates = [UseHVX] in {
402  let AddedComplexity = 200 in {
403    def: Pat<(Vnot<VecI8>   HVI8:$Vs), (V6_vnot HvxVR:$Vs)>;
404    def: Pat<(Vnot<VecI16> HVI16:$Vs), (V6_vnot HvxVR:$Vs)>;
405    def: Pat<(Vnot<VecI32> HVI32:$Vs), (V6_vnot HvxVR:$Vs)>;
406  }
407
408  def: OpR_RR_pat<V6_vaddb,    Add,   VecI8,  HVI8>;
409  def: OpR_RR_pat<V6_vaddh,    Add,  VecI16, HVI16>;
410  def: OpR_RR_pat<V6_vaddw,    Add,  VecI32, HVI32>;
411  def: OpR_RR_pat<V6_vaddb_dv, Add,  VecPI8,  HWI8>;
412  def: OpR_RR_pat<V6_vaddh_dv, Add, VecPI16, HWI16>;
413  def: OpR_RR_pat<V6_vaddw_dv, Add, VecPI32, HWI32>;
414  def: OpR_RR_pat<V6_vsubb,    Sub,   VecI8,  HVI8>;
415  def: OpR_RR_pat<V6_vsubh,    Sub,  VecI16, HVI16>;
416  def: OpR_RR_pat<V6_vsubw,    Sub,  VecI32, HVI32>;
417  def: OpR_RR_pat<V6_vsubb_dv, Sub,  VecPI8,  HWI8>;
418  def: OpR_RR_pat<V6_vsubh_dv, Sub, VecPI16, HWI16>;
419  def: OpR_RR_pat<V6_vsubw_dv, Sub, VecPI32, HWI32>;
420  def: OpR_RR_pat<V6_vand,     And,   VecI8,  HVI8>;
421  def: OpR_RR_pat<V6_vand,     And,  VecI16, HVI16>;
422  def: OpR_RR_pat<V6_vand,     And,  VecI32, HVI32>;
423  def: OpR_RR_pat<V6_vor,       Or,   VecI8,  HVI8>;
424  def: OpR_RR_pat<V6_vor,       Or,  VecI16, HVI16>;
425  def: OpR_RR_pat<V6_vor,       Or,  VecI32, HVI32>;
426  def: OpR_RR_pat<V6_vxor,     Xor,   VecI8,  HVI8>;
427  def: OpR_RR_pat<V6_vxor,     Xor,  VecI16, HVI16>;
428  def: OpR_RR_pat<V6_vxor,     Xor,  VecI32, HVI32>;
429
430  def: OpR_RR_pat<V6_vminb,   Smin,   VecI8,  HVI8>;
431  def: OpR_RR_pat<V6_vmaxb,   Smax,   VecI8,  HVI8>;
432  def: OpR_RR_pat<V6_vminub,  Umin,   VecI8,  HVI8>;
433  def: OpR_RR_pat<V6_vmaxub,  Umax,   VecI8,  HVI8>;
434  def: OpR_RR_pat<V6_vminh,   Smin,  VecI16, HVI16>;
435  def: OpR_RR_pat<V6_vmaxh,   Smax,  VecI16, HVI16>;
436  def: OpR_RR_pat<V6_vminuh,  Umin,  VecI16, HVI16>;
437  def: OpR_RR_pat<V6_vmaxuh,  Umax,  VecI16, HVI16>;
438  def: OpR_RR_pat<V6_vminw,   Smin,  VecI32, HVI32>;
439  def: OpR_RR_pat<V6_vmaxw,   Smax,  VecI32, HVI32>;
440
441  def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt),
442           (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
443  def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt),
444           (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
445  def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt),
446           (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
447
448  def: Pat<(vselect (qnot HQ8:$Qu), HVI8:$Vs, HVI8:$Vt),
449           (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
450  def: Pat<(vselect (qnot HQ16:$Qu), HVI16:$Vs, HVI16:$Vt),
451           (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
452  def: Pat<(vselect (qnot HQ32:$Qu), HVI32:$Vs, HVI32:$Vt),
453           (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
454}
455
456// For now, we always deal with vector floating point in SF mode.
457class OpR_RR_pat_conv<InstHexagon MI, PatFrag Op, ValueType ResType,
458                      PatFrag RsPred, PatFrag RtPred = RsPred>
459  : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
460        (V6_vconv_sf_qf32 (VecF32 (MI RsPred:$Rs, RtPred:$Rt)))>;
461
462class OpR_RR_pat_conv_hf<InstHexagon MI, PatFrag Op, ValueType ResType,
463                      PatFrag RsPred, PatFrag RtPred = RsPred>
464  : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
465        (V6_vconv_hf_qf16 (VecF16 (MI RsPred:$Rs, RtPred:$Rt)))>;
466
467let Predicates = [UseHVXV68, UseHVXQFloat] in {
468  def: OpR_RR_pat_conv_hf<V6_vsub_hf,        pf2<fsub>,  VecF16, HVF16>;
469  def: OpR_RR_pat_conv_hf<V6_vadd_hf,        pf2<fadd>,  VecF16, HVF16>;
470  def: OpR_RR_pat_conv_hf<V6_vmpy_qf16_hf,   pf2<fmul>,  VecF16, HVF16>;
471  def: OpR_RR_pat_conv<V6_vsub_sf,        pf2<fsub>,  VecF32, HVF32>;
472  def: OpR_RR_pat_conv<V6_vadd_sf,        pf2<fadd>,  VecF32, HVF32>;
473  def: OpR_RR_pat_conv<V6_vmpy_qf32_sf,   pf2<fmul>,  VecF32, HVF32>;
474
475  // For now we assume that the fp32 register is always coming in as IEEE float
476  // since the qfloat arithmetic instructions above always generate the
477  // accompanying conversions as part of their pattern
478  def: Pat<(VecF16 (pf1<fpround> HWF32:$Vuu)),
479           (V6_vdealh (V6_vconv_hf_qf32
480             (VecPF32 (Combinev (V6_vadd_sf (HiVec HvxWR:$Vuu), (V6_vd0)),
481                                (V6_vadd_sf (LoVec HvxWR:$Vuu), (V6_vd0))
482             ))))>;
483  // fpextend for QFloat is handled manually in HexagonISelLoweringHVX.cpp.
484}
485
486// HVX IEEE arithmetic Instructions
487let Predicates = [UseHVXV68, UseHVXIEEEFP] in {
488  def: Pat<(fadd HVF16:$Rs, HVF16:$Rt),
489           (V6_vadd_hf_hf HVF16:$Rs, HVF16:$Rt)>;
490  def: Pat<(fadd HVF32:$Rs, HVF32:$Rt),
491           (V6_vadd_sf_sf HVF32:$Rs, HVF32:$Rt)>;
492  def: Pat<(fsub HVF16:$Rs, HVF16:$Rt),
493           (V6_vsub_hf_hf HVF16:$Rs, HVF16:$Rt)>;
494  def: Pat<(fsub HVF32:$Rs, HVF32:$Rt),
495           (V6_vsub_sf_sf HVF32:$Rs, HVF32:$Rt)>;
496  def: Pat<(fmul HVF16:$Rs, HVF16:$Rt),
497           (V6_vmpy_hf_hf HVF16:$Rs, HVF16:$Rt)>;
498  def: Pat<(fmul HVF32:$Rs, HVF32:$Rt),
499           (V6_vmpy_sf_sf HVF32:$Rs, HVF32:$Rt)>;
500
501  def: Pat<(VecF16 (pf1<fpround> HWF32:$Vuu)),
502           (V6_vdealh (V6_vcvt_hf_sf (HiVec HvxWR:$Vuu), (LoVec HvxWR:$Vuu)))>;
503  def: Pat<(VecPF32 (pf1<fpextend> HVF16:$Vu)),
504           (V6_vcvt_sf_hf (V6_vshuffh HvxVR:$Vu))>;
505
506  def: OpR_R_pat<V6_vcvt_h_hf,  Fptosi, VecI16, HVF16>;
507  def: OpR_R_pat<V6_vcvt_uh_hf, Fptoui, VecI16, HVF16>;
508  def: OpR_R_pat<V6_vcvt_hf_h,  Sitofp, VecF16, HVI16>;
509  def: OpR_R_pat<V6_vcvt_hf_uh, Uitofp, VecF16, HVI16>;
510
511  def: Pat<(VecI8 (Fptosi HWF16:$Vu)),
512           (V6_vcvt_b_hf (HiVec $Vu), (LoVec $Vu))>;
513  def: Pat<(VecI8 (Fptoui HWF16:$Vu)),
514           (V6_vcvt_ub_hf (HiVec $Vu), (LoVec $Vu))>;
515  def: Pat<(VecPF16 (Sitofp HVI8:$Vu)), (V6_vcvt_hf_b HvxVR:$Vu)>;
516  def: Pat<(VecPF16 (Uitofp HVI8:$Vu)), (V6_vcvt_hf_ub HvxVR:$Vu)>;
517}
518
519let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
520  def: Pat<(vselect HQ16:$Qu, HVF16:$Vs, HVF16:$Vt),
521           (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
522  def: Pat<(vselect (qnot HQ16:$Qu), HVF16:$Vs, HVF16:$Vt),
523           (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
524
525  def: Pat<(vselect HQ32:$Qu, HVF32:$Vs, HVF32:$Vt),
526           (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
527  def: Pat<(vselect (qnot HQ32:$Qu), HVF32:$Vs, HVF32:$Vt),
528           (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
529}
530
531let Predicates = [UseHVXV68, UseHVX128B, UseHVXQFloat] in {
532  let AddedComplexity = 220 in {
533    defm: MinMax_pats<V6_vmin_hf, V6_vmax_hf, vselect,  setgt, VecQ16, HVF16>;
534    defm: MinMax_pats<V6_vmin_hf, V6_vmax_hf, vselect, setogt, VecQ16, HVF16>;
535    defm: MinMax_pats<V6_vmin_sf, V6_vmax_sf, vselect,  setgt, VecQ32, HVF32>;
536    defm: MinMax_pats<V6_vmin_sf, V6_vmax_sf, vselect, setogt, VecQ32, HVF32>;
537  }
538  def: OpR_RR_pat<V6_vmin_hf, pf2<fminnum>, VecF16, HVF16>;
539  def: OpR_RR_pat<V6_vmax_hf, pf2<fmaxnum>, VecF16, HVF16>;
540  def: OpR_RR_pat<V6_vmin_sf, pf2<fminnum>, VecF32, HVF32>;
541  def: OpR_RR_pat<V6_vmax_sf, pf2<fmaxnum>, VecF32, HVF32>;
542}
543
544let Predicates = [UseHVXV68, UseHVX128B, UseHVXIEEEFP] in {
545  let AddedComplexity = 220 in {
546    defm: MinMax_pats<V6_vfmin_hf, V6_vfmax_hf, vselect,  setgt, VecQ16, HVF16>;
547    defm: MinMax_pats<V6_vfmin_hf, V6_vfmax_hf, vselect, setogt, VecQ16, HVF16>;
548    defm: MinMax_pats<V6_vfmin_sf, V6_vfmax_sf, vselect,  setgt, VecQ32, HVF32>;
549    defm: MinMax_pats<V6_vfmin_sf, V6_vfmax_sf, vselect, setogt, VecQ32, HVF32>;
550  }
551  def: OpR_RR_pat<V6_vfmin_hf, pf2<fminnum>, VecF16, HVF16>;
552  def: OpR_RR_pat<V6_vfmax_hf, pf2<fmaxnum>, VecF16, HVF16>;
553  def: OpR_RR_pat<V6_vfmin_sf, pf2<fminnum>, VecF32, HVF32>;
554  def: OpR_RR_pat<V6_vfmax_sf, pf2<fmaxnum>, VecF32, HVF32>;
555}
556
557let Predicates = [UseHVX] in {
558  // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
559  // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
560  // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
561  def: Pat<(mul HVI8:$Vs, HVI8:$Vt),
562           (V6_vshuffeb (HiVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)),
563                        (LoVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)))>;
564  def: Pat<(mul HVI16:$Vs, HVI16:$Vt),
565           (V6_vmpyih HvxVR:$Vs, HvxVR:$Vt)>;
566  def: Pat<(mul HVI32:$Vs, HVI32:$Vt),
567           (V6_vmpyiewuh_acc (V6_vmpyieoh HvxVR:$Vs, HvxVR:$Vt),
568                             HvxVR:$Vs, HvxVR:$Vt)>;
569}
570
571let Predicates = [UseHVX] in {
572  def: Pat<(VecPI16 (sext HVI8:$Vs)),  (VSxtb $Vs)>;
573  def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>;
574  def: Pat<(VecPI16 (zext HVI8:$Vs)),  (VZxtb $Vs)>;
575  def: Pat<(VecPI32 (zext HVI16:$Vs)), (VZxth $Vs)>;
576
577  def: Pat<(VecI16 (sext_invec HVI8:$Vs)),  (LoVec (VSxtb $Vs))>;
578  def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (VSxth $Vs))>;
579  def: Pat<(VecI32 (sext_invec HVI8:$Vs)),
580           (LoVec (VSxth (LoVec (VSxtb $Vs))))>;
581  def: Pat<(VecPI16 (sext_invec HWI8:$Vss)),  (VSxtb (LoVec $Vss))>;
582  def: Pat<(VecPI32 (sext_invec HWI16:$Vss)), (VSxth (LoVec $Vss))>;
583  def: Pat<(VecPI32 (sext_invec HWI8:$Vss)),
584           (VSxth (LoVec (VSxtb (LoVec $Vss))))>;
585
586  def: Pat<(VecI16 (zext_invec HVI8:$Vs)),  (LoVec (VZxtb $Vs))>;
587  def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (VZxth $Vs))>;
588  def: Pat<(VecI32 (zext_invec HVI8:$Vs)),
589           (LoVec (VZxth (LoVec (VZxtb $Vs))))>;
590  def: Pat<(VecPI16 (zext_invec HWI8:$Vss)),  (VZxtb (LoVec $Vss))>;
591  def: Pat<(VecPI32 (zext_invec HWI16:$Vss)), (VZxth (LoVec $Vss))>;
592  def: Pat<(VecPI32 (zext_invec HWI8:$Vss)),
593           (VZxth (LoVec (VZxtb (LoVec $Vss))))>;
594
595  def: Pat<(VecI8 (trunc HWI16:$Vss)),
596           (V6_vpackeb (HiVec $Vss), (LoVec $Vss))>;
597  def: Pat<(VecI16 (trunc HWI32:$Vss)),
598           (V6_vpackeh (HiVec $Vss), (LoVec $Vss))>;
599
600  def: Pat<(VecQ8 (trunc HVI8:$Vs)),
601           (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>;
602  def: Pat<(VecQ16 (trunc HVI16:$Vs)),
603           (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>;
604  def: Pat<(VecQ32 (trunc HVI32:$Vs)),
605           (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>;
606}
607
608let Predicates = [UseHVX] in {
609  // The "source" types are not legal, and there are no parameterized
610  // definitions for them, but they are length-specific.
611  let Predicates = [UseHVX,UseHVX64B] in {
612    def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v32i8)),
613             (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>;
614    def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i8)),
615             (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>;
616    def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i16)),
617             (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>;
618  }
619  let Predicates = [UseHVX,UseHVX128B] in {
620    def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v64i8)),
621             (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>;
622    def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i8)),
623             (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>;
624    def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i16)),
625             (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>;
626  }
627
628  // Take a pair of vectors Vt:Vs and shift them towards LSB by (Rt & HwLen).
629  def: Pat<(VecI8 (valign HVI8:$Vt, HVI8:$Vs, I32:$Rt)),
630           (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>;
631  def: Pat<(VecI16 (valign HVI16:$Vt, HVI16:$Vs, I32:$Rt)),
632           (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>;
633  def: Pat<(VecI32 (valign HVI32:$Vt, HVI32:$Vs, I32:$Rt)),
634           (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>;
635
636  def: Pat<(HexagonVASL HVI8:$Vs, I32:$Rt),
637           (V6_vpackeb (V6_vaslh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt),
638                       (V6_vaslh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>;
639  def: Pat<(HexagonVASR HVI8:$Vs, I32:$Rt),
640           (V6_vpackeb (V6_vasrh (HiVec (VSxtb HvxVR:$Vs)), I32:$Rt),
641                       (V6_vasrh (LoVec (VSxtb HvxVR:$Vs)), I32:$Rt))>;
642  def: Pat<(HexagonVLSR HVI8:$Vs, I32:$Rt),
643           (V6_vpackeb (V6_vlsrh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt),
644                       (V6_vlsrh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>;
645
646  def: Pat<(HexagonVASL HVI16:$Vs, I32:$Rt), (V6_vaslh HvxVR:$Vs, I32:$Rt)>;
647  def: Pat<(HexagonVASL HVI32:$Vs, I32:$Rt), (V6_vaslw HvxVR:$Vs, I32:$Rt)>;
648  def: Pat<(HexagonVASR HVI16:$Vs, I32:$Rt), (V6_vasrh HvxVR:$Vs, I32:$Rt)>;
649  def: Pat<(HexagonVASR HVI32:$Vs, I32:$Rt), (V6_vasrw HvxVR:$Vs, I32:$Rt)>;
650  def: Pat<(HexagonVLSR HVI16:$Vs, I32:$Rt), (V6_vlsrh HvxVR:$Vs, I32:$Rt)>;
651  def: Pat<(HexagonVLSR HVI32:$Vs, I32:$Rt), (V6_vlsrw HvxVR:$Vs, I32:$Rt)>;
652
653  def: Pat<(add HVI32:$Vx, (HexagonVASL HVI32:$Vu, I32:$Rt)),
654           (V6_vaslw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>;
655  def: Pat<(add HVI32:$Vx, (HexagonVASR HVI32:$Vu, I32:$Rt)),
656           (V6_vasrw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>;
657
658  def: Pat<(shl HVI16:$Vs, HVI16:$Vt), (V6_vaslhv HvxVR:$Vs, HvxVR:$Vt)>;
659  def: Pat<(shl HVI32:$Vs, HVI32:$Vt), (V6_vaslwv HvxVR:$Vs, HvxVR:$Vt)>;
660  def: Pat<(sra HVI16:$Vs, HVI16:$Vt), (V6_vasrhv HvxVR:$Vs, HvxVR:$Vt)>;
661  def: Pat<(sra HVI32:$Vs, HVI32:$Vt), (V6_vasrwv HvxVR:$Vs, HvxVR:$Vt)>;
662  def: Pat<(srl HVI16:$Vs, HVI16:$Vt), (V6_vlsrhv HvxVR:$Vs, HvxVR:$Vt)>;
663  def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>;
664
665  // Vpackl is a pseudo-op that is used when legalizing widened truncates.
666  // It should never be produced with a register pair in the output, but
667  // it can happen to have a pair as an input.
668  def: Pat<(VecI8  (vpackl HVI16:$Vs)), (V6_vdealb HvxVR:$Vs)>;
669  def: Pat<(VecI8  (vpackl HVI32:$Vs)), (V6_vdealb4w (IMPLICIT_DEF), HvxVR:$Vs)>;
670  def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>;
671  def: Pat<(VecI8  (vpackl HWI16:$Vs)), (V6_vpackeb (HiVec $Vs), (LoVec $Vs))>;
672  def: Pat<(VecI8  (vpackl HWI32:$Vs)),
673           (V6_vpackeb (IMPLICIT_DEF), (V6_vpackeh (HiVec $Vs), (LoVec $Vs)))>;
674  def: Pat<(VecI16 (vpackl HWI32:$Vs)), (V6_vpackeh (HiVec $Vs), (LoVec $Vs))>;
675
676  def: Pat<(VecI16  (vunpack   HVI8:$Vs)), (LoVec (VSxtb $Vs))>;
677  def: Pat<(VecI32  (vunpack   HVI8:$Vs)), (LoVec (VSxth (LoVec (VSxtb $Vs))))>;
678  def: Pat<(VecI32  (vunpack  HVI16:$Vs)), (LoVec (VSxth $Vs))>;
679  def: Pat<(VecPI16 (vunpack   HVI8:$Vs)), (VSxtb $Vs)>;
680  def: Pat<(VecPI32 (vunpack   HVI8:$Vs)), (VSxth (LoVec (VSxtb $Vs)))>;
681  def: Pat<(VecPI32 (vunpack  HVI32:$Vs)), (VSxth $Vs)>;
682
683  def: Pat<(VecI16  (vunpacku  HVI8:$Vs)), (LoVec (VZxtb $Vs))>;
684  def: Pat<(VecI32  (vunpacku  HVI8:$Vs)), (LoVec (VZxth (LoVec (VZxtb $Vs))))>;
685  def: Pat<(VecI32  (vunpacku HVI16:$Vs)), (LoVec (VZxth $Vs))>;
686  def: Pat<(VecPI16 (vunpacku  HVI8:$Vs)), (VZxtb $Vs)>;
687  def: Pat<(VecPI32 (vunpacku  HVI8:$Vs)), (VZxth (LoVec (VZxtb $Vs)))>;
688  def: Pat<(VecPI32 (vunpacku HVI32:$Vs)), (VZxth $Vs)>;
689
690  let Predicates = [UseHVX,UseHVXV60] in {
691    def: Pat<(VecI16 (bswap HVI16:$Vs)),
692             (V6_vdelta HvxVR:$Vs, (V60splatib (i32 0x01)))>;
693    def: Pat<(VecI32 (bswap HVI32:$Vs)),
694             (V6_vdelta HvxVR:$Vs, (V60splatib (i32 0x03)))>;
695  }
696  let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in {
697    def: Pat<(VecI16 (bswap HVI16:$Vs)),
698             (V6_vdelta HvxVR:$Vs, (V62splatib (i32 0x01)))>;
699    def: Pat<(VecI32 (bswap HVI32:$Vs)),
700             (V6_vdelta HvxVR:$Vs, (V62splatib (i32 0x03)))>;
701  }
702
703  def: Pat<(VecI8 (ctpop HVI8:$Vs)),
704           (V6_vpackeb (V6_vpopcounth (HiVec (V6_vunpackub HvxVR:$Vs))),
705                       (V6_vpopcounth (LoVec (V6_vunpackub HvxVR:$Vs))))>;
706  def: Pat<(VecI16 (ctpop HVI16:$Vs)), (V6_vpopcounth HvxVR:$Vs)>;
707  def: Pat<(VecI32 (ctpop HVI32:$Vs)),
708           (V6_vaddw (LoVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))),
709                     (HiVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))))>;
710
711  let Predicates = [UseHVX,UseHVXV60] in
712  def: Pat<(VecI8 (ctlz HVI8:$Vs)),
713           (V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))),
714                                 (V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))),
715                     (V60splatib (i32 0x08)))>;
716  let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in
717  def: Pat<(VecI8 (ctlz HVI8:$Vs)),
718           (V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))),
719                                 (V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))),
720                     (V62splatib (i32 0x08)))>;
721
722  def: Pat<(VecI16 (ctlz HVI16:$Vs)), (V6_vcl0h HvxVR:$Vs)>;
723  def: Pat<(VecI32 (ctlz HVI32:$Vs)), (V6_vcl0w HvxVR:$Vs)>;
724}
725
726class HvxSel_pat<InstHexagon MI, PatFrag RegPred>
727  : Pat<(select I1:$Pu, RegPred:$Vs, RegPred:$Vt),
728        (MI I1:$Pu, RegPred:$Vs, RegPred:$Vt)>;
729
730let Predicates = [UseHVX] in {
731  def: HvxSel_pat<PS_vselect, HVI8>;
732  def: HvxSel_pat<PS_vselect, HVI16>;
733  def: HvxSel_pat<PS_vselect, HVI32>;
734  def: HvxSel_pat<PS_wselect, HWI8>;
735  def: HvxSel_pat<PS_wselect, HWI16>;
736  def: HvxSel_pat<PS_wselect, HWI32>;
737}
738
739def V2Q: OutPatFrag<(ops node:$Vs), (V6_vandvrt $Vs, (A2_tfrsi -1))>;
740
741let Predicates = [UseHVX] in {
742  def: Pat<(select I1:$Pu, VecQ8:$Qs, VecQ8:$Qt),
743           (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>;
744  def: Pat<(select I1:$Pu, VecQ16:$Qs, VecQ16:$Qt),
745           (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>;
746  def: Pat<(select I1:$Pu, VecQ32:$Qs, VecQ32:$Qt),
747           (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>;
748}
749
750let Predicates = [UseHVX] in {
751  def: Pat<(VecQ8   (qtrue)), (PS_qtrue)>;
752  def: Pat<(VecQ16  (qtrue)), (PS_qtrue)>;
753  def: Pat<(VecQ32  (qtrue)), (PS_qtrue)>;
754  def: Pat<(VecQ8  (qfalse)), (PS_qfalse)>;
755  def: Pat<(VecQ16 (qfalse)), (PS_qfalse)>;
756  def: Pat<(VecQ32 (qfalse)), (PS_qfalse)>;
757
758  def: Pat<(vnot  HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>;
759  def: Pat<(vnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>;
760  def: Pat<(vnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>;
761  def: Pat<(qnot  HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>;
762  def: Pat<(qnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>;
763  def: Pat<(qnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>;
764
765  def: OpR_RR_pat<V6_pred_and,  And,  VecQ8,   HQ8>;
766  def: OpR_RR_pat<V6_pred_and,  And, VecQ16,  HQ16>;
767  def: OpR_RR_pat<V6_pred_and,  And, VecQ32,  HQ32>;
768  def: OpR_RR_pat<V6_pred_or,    Or,  VecQ8,   HQ8>;
769  def: OpR_RR_pat<V6_pred_or,    Or, VecQ16,  HQ16>;
770  def: OpR_RR_pat<V6_pred_or,    Or, VecQ32,  HQ32>;
771  def: OpR_RR_pat<V6_pred_xor,  Xor,  VecQ8,   HQ8>;
772  def: OpR_RR_pat<V6_pred_xor,  Xor, VecQ16,  HQ16>;
773  def: OpR_RR_pat<V6_pred_xor,  Xor, VecQ32,  HQ32>;
774
775  def: OpR_RR_pat<V6_pred_and_n,  VNot2<And, qnot>,  VecQ8,   HQ8>;
776  def: OpR_RR_pat<V6_pred_and_n,  VNot2<And, qnot>, VecQ16,  HQ16>;
777  def: OpR_RR_pat<V6_pred_and_n,  VNot2<And, qnot>, VecQ32,  HQ32>;
778  def: OpR_RR_pat<V6_pred_or_n,    VNot2<Or, qnot>,  VecQ8,   HQ8>;
779  def: OpR_RR_pat<V6_pred_or_n,    VNot2<Or, qnot>, VecQ16,  HQ16>;
780  def: OpR_RR_pat<V6_pred_or_n,    VNot2<Or, qnot>, VecQ32,  HQ32>;
781
782  def: OpR_RR_pat<V6_veqb,      seteq,  VecQ8,  HVI8>;
783  def: OpR_RR_pat<V6_veqh,      seteq, VecQ16, HVI16>;
784  def: OpR_RR_pat<V6_veqw,      seteq, VecQ32, HVI32>;
785  def: OpR_RR_pat<V6_vgtb,      setgt,  VecQ8,  HVI8>;
786  def: OpR_RR_pat<V6_vgth,      setgt, VecQ16, HVI16>;
787  def: OpR_RR_pat<V6_vgtw,      setgt, VecQ32, HVI32>;
788  def: OpR_RR_pat<V6_vgtub,    setugt,  VecQ8,  HVI8>;
789  def: OpR_RR_pat<V6_vgtuh,    setugt, VecQ16, HVI16>;
790  def: OpR_RR_pat<V6_vgtuw,    setugt, VecQ32, HVI32>;
791
792  def: AccRRR_pat<V6_veqb_and,    And,  seteq,    HQ8,  HVI8,  HVI8>;
793  def: AccRRR_pat<V6_veqb_or,      Or,  seteq,    HQ8,  HVI8,  HVI8>;
794  def: AccRRR_pat<V6_veqb_xor,    Xor,  seteq,    HQ8,  HVI8,  HVI8>;
795  def: AccRRR_pat<V6_veqh_and,    And,  seteq,   HQ16, HVI16, HVI16>;
796  def: AccRRR_pat<V6_veqh_or,      Or,  seteq,   HQ16, HVI16, HVI16>;
797  def: AccRRR_pat<V6_veqh_xor,    Xor,  seteq,   HQ16, HVI16, HVI16>;
798  def: AccRRR_pat<V6_veqw_and,    And,  seteq,   HQ32, HVI32, HVI32>;
799  def: AccRRR_pat<V6_veqw_or,      Or,  seteq,   HQ32, HVI32, HVI32>;
800  def: AccRRR_pat<V6_veqw_xor,    Xor,  seteq,   HQ32, HVI32, HVI32>;
801
802  def: AccRRR_pat<V6_vgtb_and,    And,  setgt,    HQ8,  HVI8,  HVI8>;
803  def: AccRRR_pat<V6_vgtb_or,      Or,  setgt,    HQ8,  HVI8,  HVI8>;
804  def: AccRRR_pat<V6_vgtb_xor,    Xor,  setgt,    HQ8,  HVI8,  HVI8>;
805  def: AccRRR_pat<V6_vgth_and,    And,  setgt,   HQ16, HVI16, HVI16>;
806  def: AccRRR_pat<V6_vgth_or,      Or,  setgt,   HQ16, HVI16, HVI16>;
807  def: AccRRR_pat<V6_vgth_xor,    Xor,  setgt,   HQ16, HVI16, HVI16>;
808  def: AccRRR_pat<V6_vgtw_and,    And,  setgt,   HQ32, HVI32, HVI32>;
809  def: AccRRR_pat<V6_vgtw_or,      Or,  setgt,   HQ32, HVI32, HVI32>;
810  def: AccRRR_pat<V6_vgtw_xor,    Xor,  setgt,   HQ32, HVI32, HVI32>;
811
812  def: AccRRR_pat<V6_vgtub_and,   And, setugt,    HQ8,  HVI8,  HVI8>;
813  def: AccRRR_pat<V6_vgtub_or,     Or, setugt,    HQ8,  HVI8,  HVI8>;
814  def: AccRRR_pat<V6_vgtub_xor,   Xor, setugt,    HQ8,  HVI8,  HVI8>;
815  def: AccRRR_pat<V6_vgtuh_and,   And, setugt,   HQ16, HVI16, HVI16>;
816  def: AccRRR_pat<V6_vgtuh_or,     Or, setugt,   HQ16, HVI16, HVI16>;
817  def: AccRRR_pat<V6_vgtuh_xor,   Xor, setugt,   HQ16, HVI16, HVI16>;
818  def: AccRRR_pat<V6_vgtuw_and,   And, setugt,   HQ32, HVI32, HVI32>;
819  def: AccRRR_pat<V6_vgtuw_or,     Or, setugt,   HQ32, HVI32, HVI32>;
820  def: AccRRR_pat<V6_vgtuw_xor,   Xor, setugt,   HQ32, HVI32, HVI32>;
821}
822
823let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
824  def: OpR_RR_pat<V6_veqh,              seteq,  VecQ16, HVF16>;
825  def: OpR_RR_pat<V6_veqh,             setoeq,  VecQ16, HVF16>;
826  def: OpR_RR_pat<V6_veqh,             setueq,  VecQ16, HVF16>;
827  def: OpR_RR_pat<V6_vgthf,             setgt,  VecQ16, HVF16>;
828  def: OpR_RR_pat<V6_vgthf,            setogt,  VecQ16, HVF16>;
829  def: OpR_RR_pat<V6_vgthf,            setugt,  VecQ16, HVF16>;
830
831  def: OpR_RR_pat<V6_veqw,              seteq,  VecQ32, HVF32>;
832  def: OpR_RR_pat<V6_veqw,             setoeq,  VecQ32, HVF32>;
833  def: OpR_RR_pat<V6_veqw,             setueq,  VecQ32, HVF32>;
834  def: OpR_RR_pat<V6_vgtsf,             setgt,  VecQ32, HVF32>;
835  def: OpR_RR_pat<V6_vgtsf,            setogt,  VecQ32, HVF32>;
836  def: OpR_RR_pat<V6_vgtsf,            setugt,  VecQ32, HVF32>;
837
838  def: AccRRR_pat<V6_veqh_and,    And,          seteq,  HQ16, HVF16, HVF16>;
839  def: AccRRR_pat<V6_veqh_or,      Or,          seteq,  HQ16, HVF16, HVF16>;
840  def: AccRRR_pat<V6_veqh_xor,    Xor,          seteq,  HQ16, HVF16, HVF16>;
841  def: AccRRR_pat<V6_veqh_and,    And,         setoeq,  HQ16, HVF16, HVF16>;
842  def: AccRRR_pat<V6_veqh_or,      Or,         setoeq,  HQ16, HVF16, HVF16>;
843  def: AccRRR_pat<V6_veqh_xor,    Xor,         setoeq,  HQ16, HVF16, HVF16>;
844  def: AccRRR_pat<V6_veqh_and,    And,         setueq,  HQ16, HVF16, HVF16>;
845  def: AccRRR_pat<V6_veqh_or,      Or,         setueq,  HQ16, HVF16, HVF16>;
846  def: AccRRR_pat<V6_veqh_xor,    Xor,         setueq,  HQ16, HVF16, HVF16>;
847  def: AccRRR_pat<V6_vgthf_and,   And,          setgt,  HQ16, HVF16, HVF16>;
848  def: AccRRR_pat<V6_vgthf_or,     Or,          setgt,  HQ16, HVF16, HVF16>;
849  def: AccRRR_pat<V6_vgthf_xor,   Xor,          setgt,  HQ16, HVF16, HVF16>;
850  def: AccRRR_pat<V6_vgthf_and,   And,         setogt,  HQ16, HVF16, HVF16>;
851  def: AccRRR_pat<V6_vgthf_or,     Or,         setogt,  HQ16, HVF16, HVF16>;
852  def: AccRRR_pat<V6_vgthf_xor,   Xor,         setogt,  HQ16, HVF16, HVF16>;
853  def: AccRRR_pat<V6_vgthf_and,   And,         setugt,  HQ16, HVF16, HVF16>;
854  def: AccRRR_pat<V6_vgthf_or,     Or,         setugt,  HQ16, HVF16, HVF16>;
855  def: AccRRR_pat<V6_vgthf_xor,   Xor,         setugt,  HQ16, HVF16, HVF16>;
856
857  def: AccRRR_pat<V6_veqw_and,    And,          seteq,  HQ32, HVF32, HVF32>;
858  def: AccRRR_pat<V6_veqw_or,      Or,          seteq,  HQ32, HVF32, HVF32>;
859  def: AccRRR_pat<V6_veqw_xor,    Xor,          seteq,  HQ32, HVF32, HVF32>;
860  def: AccRRR_pat<V6_veqw_and,    And,         setoeq,  HQ32, HVF32, HVF32>;
861  def: AccRRR_pat<V6_veqw_or,      Or,         setoeq,  HQ32, HVF32, HVF32>;
862  def: AccRRR_pat<V6_veqw_xor,    Xor,         setoeq,  HQ32, HVF32, HVF32>;
863  def: AccRRR_pat<V6_veqw_and,    And,         setueq,  HQ32, HVF32, HVF32>;
864  def: AccRRR_pat<V6_veqw_or,      Or,         setueq,  HQ32, HVF32, HVF32>;
865  def: AccRRR_pat<V6_veqw_xor,    Xor,         setueq,  HQ32, HVF32, HVF32>;
866  def: AccRRR_pat<V6_vgtsf_and,   And,          setgt,  HQ32, HVF32, HVF32>;
867  def: AccRRR_pat<V6_vgtsf_or,     Or,          setgt,  HQ32, HVF32, HVF32>;
868  def: AccRRR_pat<V6_vgtsf_xor,   Xor,          setgt,  HQ32, HVF32, HVF32>;
869  def: AccRRR_pat<V6_vgtsf_and,   And,         setogt,  HQ32, HVF32, HVF32>;
870  def: AccRRR_pat<V6_vgtsf_or,     Or,         setogt,  HQ32, HVF32, HVF32>;
871  def: AccRRR_pat<V6_vgtsf_xor,   Xor,         setogt,  HQ32, HVF32, HVF32>;
872  def: AccRRR_pat<V6_vgtsf_and,   And,         setugt,  HQ32, HVF32, HVF32>;
873  def: AccRRR_pat<V6_vgtsf_or,     Or,         setugt,  HQ32, HVF32, HVF32>;
874  def: AccRRR_pat<V6_vgtsf_xor,   Xor,         setugt,  HQ32, HVF32, HVF32>;
875
876  def: Pat<(VecQ16 (setone HVF16:$Vt, HVF16:$Vu)),
877           (V6_pred_not (V6_veqh HvxVR:$Vt, HvxVR:$Vu))>;
878
879  def: Pat<(VecQ32 (setone HVF32:$Vt, HVF32:$Vu)),
880           (V6_pred_not (V6_veqw HvxVR:$Vt, HvxVR:$Vu))>;
881}
882