xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrGISel.td (revision b2d2a78ad80ec68d4a17f5aef97d21686cb1e29b)
1//=----- AArch64InstrGISel.td - AArch64 GISel target pseudos -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// AArch64 GlobalISel target pseudo instruction definitions. This is kept
10// separately from the other tablegen files for organizational purposes, but
11// share the same infrastructure.
12//
13//===----------------------------------------------------------------------===//
14
15
16class AArch64GenericInstruction : GenericInstruction {
17  let Namespace = "AArch64";
18}
19
20// A pseudo to represent a relocatable add instruction as part of address
21// computation.
22def G_ADD_LOW : AArch64GenericInstruction {
23  let OutOperandList = (outs type0:$dst);
24  let InOperandList = (ins type1:$src, type2:$imm);
25  let hasSideEffects = 0;
26}
27
28// Pseudo for a rev16 instruction. Produced post-legalization from
29// G_SHUFFLE_VECTORs with appropriate masks.
30def G_REV16 : AArch64GenericInstruction {
31  let OutOperandList = (outs type0:$dst);
32  let InOperandList = (ins type0:$src);
33  let hasSideEffects = 0;
34}
35
36// Pseudo for a rev32 instruction. Produced post-legalization from
37// G_SHUFFLE_VECTORs with appropriate masks.
38def G_REV32 : AArch64GenericInstruction {
39  let OutOperandList = (outs type0:$dst);
40  let InOperandList = (ins type0:$src);
41  let hasSideEffects = 0;
42}
43
44// Pseudo for a rev64 instruction. Produced post-legalization from
45// G_SHUFFLE_VECTORs with appropriate masks.
46def G_REV64 : AArch64GenericInstruction {
47  let OutOperandList = (outs type0:$dst);
48  let InOperandList = (ins type0:$src);
49  let hasSideEffects = 0;
50}
51
52// Represents an uzp1 instruction. Produced post-legalization from
53// G_SHUFFLE_VECTORs with appropriate masks.
54def G_UZP1 : AArch64GenericInstruction {
55  let OutOperandList = (outs type0:$dst);
56  let InOperandList = (ins type0:$v1, type0:$v2);
57  let hasSideEffects = 0;
58}
59
60// Represents an uzp2 instruction. Produced post-legalization from
61// G_SHUFFLE_VECTORs with appropriate masks.
62def G_UZP2 : AArch64GenericInstruction {
63  let OutOperandList = (outs type0:$dst);
64  let InOperandList = (ins type0:$v1, type0:$v2);
65  let hasSideEffects = 0;
66}
67
68// Represents a zip1 instruction. Produced post-legalization from
69// G_SHUFFLE_VECTORs with appropriate masks.
70def G_ZIP1 : AArch64GenericInstruction {
71  let OutOperandList = (outs type0:$dst);
72  let InOperandList = (ins type0:$v1, type0:$v2);
73  let hasSideEffects = 0;
74}
75
76// Represents a zip2 instruction. Produced post-legalization from
77// G_SHUFFLE_VECTORs with appropriate masks.
78def G_ZIP2 : AArch64GenericInstruction {
79  let OutOperandList = (outs type0:$dst);
80  let InOperandList = (ins type0:$v1, type0:$v2);
81  let hasSideEffects = 0;
82}
83
84// Represents a dup instruction. Produced post-legalization from
85// G_SHUFFLE_VECTORs with appropriate masks.
86def G_DUP: AArch64GenericInstruction {
87  let OutOperandList = (outs type0:$dst);
88  let InOperandList = (ins type1:$lane);
89  let hasSideEffects = 0;
90}
91
92// Represents a lane duplicate operation.
93def G_DUPLANE8 : AArch64GenericInstruction {
94  let OutOperandList = (outs type0:$dst);
95  let InOperandList = (ins type0:$src, type1:$lane);
96  let hasSideEffects = 0;
97}
98def G_DUPLANE16 : AArch64GenericInstruction {
99  let OutOperandList = (outs type0:$dst);
100  let InOperandList = (ins type0:$src, type1:$lane);
101  let hasSideEffects = 0;
102}
103def G_DUPLANE32 : AArch64GenericInstruction {
104  let OutOperandList = (outs type0:$dst);
105  let InOperandList = (ins type0:$src, type1:$lane);
106  let hasSideEffects = 0;
107}
108def G_DUPLANE64 : AArch64GenericInstruction {
109  let OutOperandList = (outs type0:$dst);
110  let InOperandList = (ins type0:$src, type1:$lane);
111  let hasSideEffects = 0;
112}
113
114// Represents a trn1 instruction. Produced post-legalization from
115// G_SHUFFLE_VECTORs with appropriate masks.
116def G_TRN1 : AArch64GenericInstruction {
117  let OutOperandList = (outs type0:$dst);
118  let InOperandList = (ins type0:$v1, type0:$v2);
119  let hasSideEffects = 0;
120}
121
122// Represents a trn2 instruction. Produced post-legalization from
123// G_SHUFFLE_VECTORs with appropriate masks.
124def G_TRN2 : AArch64GenericInstruction {
125  let OutOperandList = (outs type0:$dst);
126  let InOperandList = (ins type0:$v1, type0:$v2);
127  let hasSideEffects = 0;
128}
129
130// Represents an ext instruction. Produced post-legalization from
131// G_SHUFFLE_VECTORs with appropriate masks.
132def G_EXT: AArch64GenericInstruction {
133  let OutOperandList = (outs type0:$dst);
134  let InOperandList = (ins type0:$v1, type0:$v2, untyped_imm_0:$imm);
135  let hasSideEffects = 0;
136}
137
138// Represents a vector G_ASHR with an immediate.
139def G_VASHR : AArch64GenericInstruction {
140  let OutOperandList = (outs type0:$dst);
141  let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
142  let hasSideEffects = 0;
143}
144
145// Represents a vector G_LSHR with an immediate.
146def G_VLSHR : AArch64GenericInstruction {
147  let OutOperandList = (outs type0:$dst);
148  let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
149  let hasSideEffects = 0;
150}
151
152// Represents an integer to FP conversion on the FPR bank.
153def G_SITOF : AArch64GenericInstruction {
154  let OutOperandList = (outs type0:$dst);
155  let InOperandList = (ins type0:$src);
156  let hasSideEffects = 0;
157}
158def G_UITOF : AArch64GenericInstruction {
159  let OutOperandList = (outs type0:$dst);
160  let InOperandList = (ins type0:$src);
161  let hasSideEffects = 0;
162}
163
164def G_FCMEQ : AArch64GenericInstruction {
165  let OutOperandList = (outs type0:$dst);
166  let InOperandList = (ins type0:$src1, type1:$src2);
167  let hasSideEffects = 0;
168}
169
170def G_FCMGE : AArch64GenericInstruction {
171  let OutOperandList = (outs type0:$dst);
172  let InOperandList = (ins type0:$src1, type1:$src2);
173  let hasSideEffects = 0;
174}
175
176def G_FCMGT : AArch64GenericInstruction {
177  let OutOperandList = (outs type0:$dst);
178  let InOperandList = (ins type0:$src1, type1:$src2);
179  let hasSideEffects = 0;
180}
181
182def G_FCMEQZ : AArch64GenericInstruction {
183  let OutOperandList = (outs type0:$dst);
184  let InOperandList = (ins type0:$src);
185  let hasSideEffects = 0;
186}
187
188def G_FCMGEZ : AArch64GenericInstruction {
189  let OutOperandList = (outs type0:$dst);
190  let InOperandList = (ins type0:$src);
191  let hasSideEffects = 0;
192}
193
194def G_FCMGTZ : AArch64GenericInstruction {
195  let OutOperandList = (outs type0:$dst);
196  let InOperandList = (ins type0:$src);
197  let hasSideEffects = 0;
198}
199
200def G_FCMLEZ : AArch64GenericInstruction {
201  let OutOperandList = (outs type0:$dst);
202  let InOperandList = (ins type0:$src);
203  let hasSideEffects = 0;
204}
205
206def G_FCMLTZ : AArch64GenericInstruction {
207  let OutOperandList = (outs type0:$dst);
208  let InOperandList = (ins type0:$src);
209  let hasSideEffects = 0;
210}
211
212def G_AARCH64_PREFETCH : AArch64GenericInstruction {
213  let OutOperandList = (outs);
214  let InOperandList = (ins type0:$imm, ptype0:$src1);
215  let hasSideEffects = 1;
216}
217
218def G_UMULL : AArch64GenericInstruction {
219  let OutOperandList = (outs type0:$dst);
220  let InOperandList = (ins type0:$src1, type0:$src2);
221  let hasSideEffects = 0;
222}
223
224def G_SMULL : AArch64GenericInstruction {
225  let OutOperandList = (outs type0:$dst);
226  let InOperandList = (ins type0:$src1, type0:$src2);
227  let hasSideEffects = 0;
228}
229
230def G_UADDLP : AArch64GenericInstruction {
231  let OutOperandList = (outs type0:$dst);
232  let InOperandList = (ins type0:$src1);
233  let hasSideEffects = 0;
234}
235
236def G_SADDLP : AArch64GenericInstruction {
237  let OutOperandList = (outs type0:$dst);
238  let InOperandList = (ins type0:$src1);
239  let hasSideEffects = 0;
240}
241
242def G_UADDLV : AArch64GenericInstruction {
243  let OutOperandList = (outs type0:$dst);
244  let InOperandList = (ins type0:$src1);
245  let hasSideEffects = 0;
246}
247
248def G_SADDLV : AArch64GenericInstruction {
249  let OutOperandList = (outs type0:$dst);
250  let InOperandList = (ins type0:$src1);
251  let hasSideEffects = 0;
252}
253
254def G_UDOT : AArch64GenericInstruction {
255  let OutOperandList = (outs type0:$dst);
256  let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
257  let hasSideEffects = 0;
258}
259
260def G_SDOT : AArch64GenericInstruction {
261  let OutOperandList = (outs type0:$dst);
262  let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
263  let hasSideEffects = 0;
264}
265
266// Generic instruction for the BSP pseudo. It is expanded into BSP, which
267// expands into BSL/BIT/BIF after register allocation.
268def G_BSP : AArch64GenericInstruction {
269  let OutOperandList = (outs type0:$dst);
270  let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
271  let hasSideEffects = 0;
272}
273
274def : GINodeEquiv<G_REV16, AArch64rev16>;
275def : GINodeEquiv<G_REV32, AArch64rev32>;
276def : GINodeEquiv<G_REV64, AArch64rev64>;
277def : GINodeEquiv<G_UZP1, AArch64uzp1>;
278def : GINodeEquiv<G_UZP2, AArch64uzp2>;
279def : GINodeEquiv<G_ZIP1, AArch64zip1>;
280def : GINodeEquiv<G_ZIP2, AArch64zip2>;
281def : GINodeEquiv<G_DUP, AArch64dup>;
282def : GINodeEquiv<G_DUPLANE8, AArch64duplane8>;
283def : GINodeEquiv<G_DUPLANE16, AArch64duplane16>;
284def : GINodeEquiv<G_DUPLANE32, AArch64duplane32>;
285def : GINodeEquiv<G_DUPLANE64, AArch64duplane64>;
286def : GINodeEquiv<G_TRN1, AArch64trn1>;
287def : GINodeEquiv<G_TRN2, AArch64trn2>;
288def : GINodeEquiv<G_EXT, AArch64ext>;
289def : GINodeEquiv<G_VASHR, AArch64vashr>;
290def : GINodeEquiv<G_VLSHR, AArch64vlshr>;
291def : GINodeEquiv<G_SITOF, AArch64sitof>;
292def : GINodeEquiv<G_UITOF, AArch64uitof>;
293
294def : GINodeEquiv<G_FCMEQ, AArch64fcmeq>;
295def : GINodeEquiv<G_FCMGE, AArch64fcmge>;
296def : GINodeEquiv<G_FCMGT, AArch64fcmgt>;
297
298def : GINodeEquiv<G_FCMEQZ, AArch64fcmeqz>;
299def : GINodeEquiv<G_FCMGEZ, AArch64fcmgez>;
300def : GINodeEquiv<G_FCMGTZ, AArch64fcmgtz>;
301def : GINodeEquiv<G_FCMLEZ, AArch64fcmlez>;
302def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;
303
304def : GINodeEquiv<G_BSP, AArch64bsp>;
305
306def : GINodeEquiv<G_UMULL, AArch64umull>;
307def : GINodeEquiv<G_SMULL, AArch64smull>;
308
309def : GINodeEquiv<G_SADDLP, AArch64saddlp_n>;
310def : GINodeEquiv<G_UADDLP, AArch64uaddlp_n>;
311
312def : GINodeEquiv<G_SADDLV, AArch64saddlv>;
313def : GINodeEquiv<G_UADDLV, AArch64uaddlv>;
314
315def : GINodeEquiv<G_UDOT, AArch64udot>;
316def : GINodeEquiv<G_SDOT, AArch64sdot>;
317
318def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
319
320def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;
321
322// These are patterns that we only use for GlobalISel via the importer.
323def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)),
324                     (vector_extract (v2f32 FPR64:$Rn), (i64 1)))),
325           (f32 (FADDPv2i32p (v2f32 FPR64:$Rn)))>;
326
327let Predicates = [HasNEON] in {
328  def : Pat<(v2f64 (sint_to_fp v2i32:$src)),
329            (SCVTFv2f64 (SSHLLv2i32_shift V64:$src, 0))>;
330  def : Pat<(v2f64 (uint_to_fp v2i32:$src)),
331            (UCVTFv2f64 (USHLLv2i32_shift V64:$src, 0))>;
332  def : Pat<(v2f32 (sint_to_fp v2i64:$src)),
333            (FCVTNv2i32 (SCVTFv2f64 V128:$src))>;
334  def : Pat<(v2f32 (uint_to_fp v2i64:$src)),
335            (FCVTNv2i32 (UCVTFv2f64 V128:$src))>;
336
337  def : Pat<(v2i64 (fp_to_sint v2f32:$src)),
338            (FCVTZSv2f64 (FCVTLv2i32 V64:$src))>;
339  def : Pat<(v2i64 (fp_to_uint v2f32:$src)),
340            (FCVTZUv2f64 (FCVTLv2i32 V64:$src))>;
341  def : Pat<(v2i32 (fp_to_sint v2f64:$src)),
342            (XTNv2i32 (FCVTZSv2f64 V128:$src))>;
343  def : Pat<(v2i32 (fp_to_uint v2f64:$src)),
344            (XTNv2i32 (FCVTZUv2f64 V128:$src))>;
345
346}
347
348let Predicates = [HasNoLSE] in {
349def : Pat<(atomic_cmp_swap_i8 GPR64:$addr, GPR32:$desired, GPR32:$new),
350          (CMP_SWAP_8 GPR64:$addr, GPR32:$desired, GPR32:$new)>;
351
352def : Pat<(atomic_cmp_swap_i16 GPR64:$addr, GPR32:$desired, GPR32:$new),
353          (CMP_SWAP_16 GPR64:$addr, GPR32:$desired, GPR32:$new)>;
354
355def : Pat<(atomic_cmp_swap_i32 GPR64:$addr, GPR32:$desired, GPR32:$new),
356          (CMP_SWAP_32 GPR64:$addr, GPR32:$desired, GPR32:$new)>;
357
358def : Pat<(atomic_cmp_swap_i64 GPR64:$addr, GPR64:$desired, GPR64:$new),
359          (CMP_SWAP_64 GPR64:$addr, GPR64:$desired, GPR64:$new)>;
360}
361
362def : Pat<(int_aarch64_stlxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
363          (STLXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;
364def : Pat<(int_aarch64_stxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
365          (STXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;
366
367let GIIgnoreCopies = 1 in
368class PatIgnoreCopies<dag pattern, dag result> : Pat<pattern, result>, GISelFlags;
369
370multiclass SIMDAcrossLanesSignedIntrinsicBHS<string baseOpc, Intrinsic intOp> {
371  def : PatIgnoreCopies<(i32 (sext (i8 (intOp (v8i8 V64:$Rn))))),
372        (i32 (SMOVvi8to32
373          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
374           (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
375          (i64 0)))>;
376  def : Pat<(i8 (intOp (v8i8 V64:$Rn))),
377        (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn)>;
378
379  def : PatIgnoreCopies<(i32 (sext (i8 (intOp (v16i8 V128:$Rn))))),
380        (i32 (SMOVvi8to32
381          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
382           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
383          (i64 0)))>;
384  def : Pat<(i8 (intOp (v16i8 V128:$Rn))),
385        (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn)>;
386
387  def : PatIgnoreCopies<(i32 (sext (i16 (intOp (v4i16 V64:$Rn))))),
388        (i32 (SMOVvi16to32
389          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
390           (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
391          (i64 0)))>;
392  def : Pat<(i16 (intOp (v4i16 V64:$Rn))),
393        (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn)>;
394
395  def : PatIgnoreCopies<(i32 (sext (i16 (intOp (v8i16 V128:$Rn))))),
396        (i32 (SMOVvi16to32
397          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
398           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
399          (i64 0)))>;
400  def : Pat<(i16 (intOp (v8i16 V128:$Rn))),
401        (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn)>;
402
403  def : PatIgnoreCopies<(i32 (intOp (v4i32 V128:$Rn))),
404        (i32 (EXTRACT_SUBREG
405          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
406           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
407          ssub))>;
408}
409
410multiclass SIMDAcrossLanesUnsignedIntrinsicBHS<string baseOpc,
411                                                Intrinsic intOp> {
412  def : PatIgnoreCopies<(i32 (zext (i8 (intOp (v8i8 V64:$Rn))))),
413        (COPY_TO_REGCLASS
414          (i32 (EXTRACT_SUBREG
415            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
416              (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
417            ssub)),
418          GPR32)>;
419  def : Pat<(i8 (intOp (v8i8 V64:$Rn))),
420        (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn)>;
421
422  def : PatIgnoreCopies<(i32 (zext (i8 (intOp (v16i8 V128:$Rn))))),
423        (COPY_TO_REGCLASS
424          (i32 (EXTRACT_SUBREG
425            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
426              (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
427            ssub)),
428        GPR32)>;
429  def : Pat<(i8 (intOp (v16i8 V128:$Rn))),
430        (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn)>;
431
432
433  def : PatIgnoreCopies<(i32 (zext (i16 (intOp (v4i16 V64:$Rn))))),
434        (COPY_TO_REGCLASS
435          (i32 (EXTRACT_SUBREG
436            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
437              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
438            ssub)),
439          GPR32)>;
440  def : Pat<(i16 (intOp (v4i16 V64:$Rn))),
441        (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn)>;
442
443  def : PatIgnoreCopies<(i32 (zext (i16 (intOp (v8i16 V128:$Rn))))),
444        (COPY_TO_REGCLASS
445          (i32 (EXTRACT_SUBREG
446            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
447              (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
448            ssub)),
449        GPR32)>;
450  def : Pat<(i16 (intOp (v8i16 V128:$Rn))),
451        (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn)>;
452
453  def : PatIgnoreCopies<(i32 (intOp (v4i32 V128:$Rn))),
454        (i32 (EXTRACT_SUBREG
455          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
456            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
457          ssub))>;
458}
459
460
461defm : SIMDAcrossLanesSignedIntrinsicBHS<"ADDV", int_aarch64_neon_saddv>;
462// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
463def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))),
464          (i32 (EXTRACT_SUBREG
465            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
466              (ADDPv2i32 V64:$Rn, V64:$Rn), dsub),
467            ssub))>;
468
469def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))),
470          (i64 (EXTRACT_SUBREG
471          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
472              (ADDPv2i64p V128:$Rn), dsub),
473            dsub))>;
474
475defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"ADDV", int_aarch64_neon_uaddv>;
476def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))),
477          (i32 (EXTRACT_SUBREG
478            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
479              (ADDPv2i32 V64:$Rn, V64:$Rn), dsub),
480            ssub))>;
481def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))),
482          (i64 (EXTRACT_SUBREG
483          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
484              (ADDPv2i64p V128:$Rn), dsub),
485            dsub))>;
486
487defm : SIMDAcrossLanesSignedIntrinsicBHS<"SMAXV", int_aarch64_neon_smaxv>;
488def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))),
489          (i32 (EXTRACT_SUBREG
490            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
491              (SMAXPv2i32 V64:$Rn, V64:$Rn), dsub),
492            ssub))>;
493
494defm : SIMDAcrossLanesSignedIntrinsicBHS<"SMINV", int_aarch64_neon_sminv>;
495def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))),
496          (i32 (EXTRACT_SUBREG
497            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
498              (SMINPv2i32 V64:$Rn, V64:$Rn), dsub),
499            ssub))>;
500
501defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"UMAXV", int_aarch64_neon_umaxv>;
502def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))),
503          (i32 (EXTRACT_SUBREG
504            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
505              (UMAXPv2i32 V64:$Rn, V64:$Rn), dsub),
506            ssub))>;
507
508defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"UMINV", int_aarch64_neon_uminv>;
509def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))),
510          (i32 (EXTRACT_SUBREG
511            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
512              (UMINPv2i32 V64:$Rn, V64:$Rn), dsub),
513            ssub))>;
514
515// Match stores from lane 0 to the appropriate subreg's store.
516multiclass VecStoreLane64_0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
517                            ValueType VTy, ValueType STy,
518                            SubRegIndex SubRegIdx, Operand IndexType,
519                            Instruction STR> {
520  def : Pat<(storeop (STy (vector_extract (VTy VecListOne64:$Vt), (i64 0))),
521                     (UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
522            (STR (EXTRACT_SUBREG VecListOne64:$Vt, SubRegIdx),
523                 GPR64sp:$Rn, IndexType:$offset)>;
524}
525multiclass VecStoreULane64_0Pat<SDPatternOperator StoreOp,
526                             ValueType VTy, ValueType STy,
527                             SubRegIndex SubRegIdx, Instruction STR> {
528  defm : VecStoreLane64_0Pat<am_unscaled64, StoreOp, VTy, STy, SubRegIdx, simm9, STR>;
529}
530
531multiclass VecROStoreLane64_0Pat<ROAddrMode ro, SDPatternOperator storeop,
532                              ValueType VecTy, ValueType STy,
533                              SubRegIndex SubRegIdx,
534                              Instruction STRW, Instruction STRX> {
535
536  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne64:$Vt), (i64 0))),
537                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
538            (STRW (EXTRACT_SUBREG VecListOne64:$Vt, SubRegIdx),
539                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
540
541  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne64:$Vt), (i64 0))),
542                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
543            (STRX (EXTRACT_SUBREG VecListOne64:$Vt, SubRegIdx),
544                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
545}
546
547let AddedComplexity = 19 in {
548  def : St1Lane128Pat<store, VectorIndexB, v16i8, i8,  ST1i8>;
549  def : St1Lane64Pat<store, VectorIndexB, v8i8,  i8,  ST1i8>;
550
551  defm : VecStoreLane64_0Pat<am_indexed16, store, v4i16, i16, hsub, uimm12s2, STRHui>;
552  defm : VecStoreLane64_0Pat<am_indexed32, store, v2i32, i32, ssub, uimm12s4, STRSui>;
553
554  defm : VecStoreULane64_0Pat<store, v4i16, i16, hsub, STURHi>;
555  defm : VecStoreULane64_0Pat<store, v2i32, i32, ssub, STURSi>;
556  defm : VecROStoreLane64_0Pat<ro16, store, v4i16, i16, hsub, STRHroW, STRHroX>;
557  defm : VecROStoreLane64_0Pat<ro32, store, v2i32, i32, ssub, STRSroW, STRSroX>;
558}
559
560def : Pat<(v8i8 (AArch64dup (i8 (load (am_indexed8 GPR64sp:$Rn))))),
561          (LD1Rv8b GPR64sp:$Rn)>;
562def : Pat<(v16i8 (AArch64dup (i8 (load GPR64sp:$Rn)))),
563          (LD1Rv16b GPR64sp:$Rn)>;
564def : Pat<(v4i16 (AArch64dup (i16 (load GPR64sp:$Rn)))),
565          (LD1Rv4h GPR64sp:$Rn)>;
566def : Pat<(v8i16 (AArch64dup (i16 (load GPR64sp:$Rn)))),
567          (LD1Rv8h GPR64sp:$Rn)>;
568def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
569          (LD1Rv2s GPR64sp:$Rn)>;
570def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
571          (LD1Rv4s GPR64sp:$Rn)>;
572def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
573          (LD1Rv2d GPR64sp:$Rn)>;
574def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
575          (LD1Rv1d GPR64sp:$Rn)>;
576