xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td (revision 5def4c47d4bd90b209b9b4a4ba9faec15846d8fd)
1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the ARM NEON instruction set.
10//
11//===----------------------------------------------------------------------===//
12
13
14//===----------------------------------------------------------------------===//
15// NEON-specific Operands.
16//===----------------------------------------------------------------------===//
17def nModImm : Operand<i32> {
18  let PrintMethod = "printVMOVModImmOperand";
19}
20
21def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
22def nImmSplatI8 : Operand<i32> {
23  let PrintMethod = "printVMOVModImmOperand";
24  let ParserMatchClass = nImmSplatI8AsmOperand;
25}
26def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
27def nImmSplatI16 : Operand<i32> {
28  let PrintMethod = "printVMOVModImmOperand";
29  let ParserMatchClass = nImmSplatI16AsmOperand;
30}
31def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
32def nImmSplatI32 : Operand<i32> {
33  let PrintMethod = "printVMOVModImmOperand";
34  let ParserMatchClass = nImmSplatI32AsmOperand;
35}
36def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
37def nImmSplatNotI16 : Operand<i32> {
38  let ParserMatchClass = nImmSplatNotI16AsmOperand;
39}
40def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; }
41def nImmSplatNotI32 : Operand<i32> {
42  let ParserMatchClass = nImmSplatNotI32AsmOperand;
43}
44def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
45def nImmVMOVI32 : Operand<i32> {
46  let PrintMethod = "printVMOVModImmOperand";
47  let ParserMatchClass = nImmVMOVI32AsmOperand;
48}
49
50class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To>
51  : AsmOperandClass {
52  let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate";
53  let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">";
54  let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands";
55}
56
57class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To>
58  : AsmOperandClass {
59  let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate";
60  let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">";
61  let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands";
62}
63
64class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> {
65  let PrintMethod = "printVMOVModImmOperand";
66  let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>;
67}
68
69class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> {
70  let PrintMethod = "printVMOVModImmOperand";
71  let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>;
72}
73
74def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
75def nImmVMOVI32Neg : Operand<i32> {
76  let PrintMethod = "printVMOVModImmOperand";
77  let ParserMatchClass = nImmVMOVI32NegAsmOperand;
78}
79def nImmVMOVF32 : Operand<i32> {
80  let PrintMethod = "printFPImmOperand";
81  let ParserMatchClass = FPImmOperand;
82}
83def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
84def nImmSplatI64 : Operand<i32> {
85  let PrintMethod = "printVMOVModImmOperand";
86  let ParserMatchClass = nImmSplatI64AsmOperand;
87}
88
89def VectorIndex8Operand  : AsmOperandClass { let Name = "VectorIndex8"; }
90def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
91def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
92def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; }
93def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
94  return ((uint64_t)Imm) < 8;
95}]> {
96  let ParserMatchClass = VectorIndex8Operand;
97  let PrintMethod = "printVectorIndex";
98  let MIOperandInfo = (ops i32imm);
99}
100def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
101  return ((uint64_t)Imm) < 4;
102}]> {
103  let ParserMatchClass = VectorIndex16Operand;
104  let PrintMethod = "printVectorIndex";
105  let MIOperandInfo = (ops i32imm);
106}
107def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
108  return ((uint64_t)Imm) < 2;
109}]> {
110  let ParserMatchClass = VectorIndex32Operand;
111  let PrintMethod = "printVectorIndex";
112  let MIOperandInfo = (ops i32imm);
113}
114def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{
115  return ((uint64_t)Imm) < 1;
116}]> {
117  let ParserMatchClass = VectorIndex64Operand;
118  let PrintMethod = "printVectorIndex";
119  let MIOperandInfo = (ops i32imm);
120}
121
122// Register list of one D register.
123def VecListOneDAsmOperand : AsmOperandClass {
124  let Name = "VecListOneD";
125  let ParserMethod = "parseVectorList";
126  let RenderMethod = "addVecListOperands";
127}
128def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
129  let ParserMatchClass = VecListOneDAsmOperand;
130}
131// Register list of two sequential D registers.
132def VecListDPairAsmOperand : AsmOperandClass {
133  let Name = "VecListDPair";
134  let ParserMethod = "parseVectorList";
135  let RenderMethod = "addVecListOperands";
136}
137def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
138  let ParserMatchClass = VecListDPairAsmOperand;
139}
140// Register list of three sequential D registers.
141def VecListThreeDAsmOperand : AsmOperandClass {
142  let Name = "VecListThreeD";
143  let ParserMethod = "parseVectorList";
144  let RenderMethod = "addVecListOperands";
145}
146def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
147  let ParserMatchClass = VecListThreeDAsmOperand;
148}
149// Register list of four sequential D registers.
150def VecListFourDAsmOperand : AsmOperandClass {
151  let Name = "VecListFourD";
152  let ParserMethod = "parseVectorList";
153  let RenderMethod = "addVecListOperands";
154}
155def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
156  let ParserMatchClass = VecListFourDAsmOperand;
157}
158// Register list of two D registers spaced by 2 (two sequential Q registers).
159def VecListDPairSpacedAsmOperand : AsmOperandClass {
160  let Name = "VecListDPairSpaced";
161  let ParserMethod = "parseVectorList";
162  let RenderMethod = "addVecListOperands";
163}
164def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
165  let ParserMatchClass = VecListDPairSpacedAsmOperand;
166}
167// Register list of three D registers spaced by 2 (three Q registers).
168def VecListThreeQAsmOperand : AsmOperandClass {
169  let Name = "VecListThreeQ";
170  let ParserMethod = "parseVectorList";
171  let RenderMethod = "addVecListOperands";
172}
173def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
174  let ParserMatchClass = VecListThreeQAsmOperand;
175}
176// Register list of three D registers spaced by 2 (three Q registers).
177def VecListFourQAsmOperand : AsmOperandClass {
178  let Name = "VecListFourQ";
179  let ParserMethod = "parseVectorList";
180  let RenderMethod = "addVecListOperands";
181}
182def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
183  let ParserMatchClass = VecListFourQAsmOperand;
184}
185
186// Register list of one D register, with "all lanes" subscripting.
187def VecListOneDAllLanesAsmOperand : AsmOperandClass {
188  let Name = "VecListOneDAllLanes";
189  let ParserMethod = "parseVectorList";
190  let RenderMethod = "addVecListOperands";
191}
192def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
193  let ParserMatchClass = VecListOneDAllLanesAsmOperand;
194}
195// Register list of two D registers, with "all lanes" subscripting.
196def VecListDPairAllLanesAsmOperand : AsmOperandClass {
197  let Name = "VecListDPairAllLanes";
198  let ParserMethod = "parseVectorList";
199  let RenderMethod = "addVecListOperands";
200}
201def VecListDPairAllLanes : RegisterOperand<DPair,
202                                           "printVectorListTwoAllLanes"> {
203  let ParserMatchClass = VecListDPairAllLanesAsmOperand;
204}
205// Register list of two D registers spaced by 2 (two sequential Q registers).
206def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
207  let Name = "VecListDPairSpacedAllLanes";
208  let ParserMethod = "parseVectorList";
209  let RenderMethod = "addVecListOperands";
210}
211def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc,
212                                         "printVectorListTwoSpacedAllLanes"> {
213  let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
214}
215// Register list of three D registers, with "all lanes" subscripting.
216def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
217  let Name = "VecListThreeDAllLanes";
218  let ParserMethod = "parseVectorList";
219  let RenderMethod = "addVecListOperands";
220}
221def VecListThreeDAllLanes : RegisterOperand<DPR,
222                                            "printVectorListThreeAllLanes"> {
223  let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
224}
225// Register list of three D registers spaced by 2 (three sequential Q regs).
226def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
227  let Name = "VecListThreeQAllLanes";
228  let ParserMethod = "parseVectorList";
229  let RenderMethod = "addVecListOperands";
230}
231def VecListThreeQAllLanes : RegisterOperand<DPR,
232                                         "printVectorListThreeSpacedAllLanes"> {
233  let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
234}
235// Register list of four D registers, with "all lanes" subscripting.
236def VecListFourDAllLanesAsmOperand : AsmOperandClass {
237  let Name = "VecListFourDAllLanes";
238  let ParserMethod = "parseVectorList";
239  let RenderMethod = "addVecListOperands";
240}
241def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
242  let ParserMatchClass = VecListFourDAllLanesAsmOperand;
243}
244// Register list of four D registers spaced by 2 (four sequential Q regs).
245def VecListFourQAllLanesAsmOperand : AsmOperandClass {
246  let Name = "VecListFourQAllLanes";
247  let ParserMethod = "parseVectorList";
248  let RenderMethod = "addVecListOperands";
249}
250def VecListFourQAllLanes : RegisterOperand<DPR,
251                                         "printVectorListFourSpacedAllLanes"> {
252  let ParserMatchClass = VecListFourQAllLanesAsmOperand;
253}
254
255
256// Register list of one D register, with byte lane subscripting.
257def VecListOneDByteIndexAsmOperand : AsmOperandClass {
258  let Name = "VecListOneDByteIndexed";
259  let ParserMethod = "parseVectorList";
260  let RenderMethod = "addVecListIndexedOperands";
261}
262def VecListOneDByteIndexed : Operand<i32> {
263  let ParserMatchClass = VecListOneDByteIndexAsmOperand;
264  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
265}
266// ...with half-word lane subscripting.
267def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
268  let Name = "VecListOneDHWordIndexed";
269  let ParserMethod = "parseVectorList";
270  let RenderMethod = "addVecListIndexedOperands";
271}
272def VecListOneDHWordIndexed : Operand<i32> {
273  let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
274  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
275}
276// ...with word lane subscripting.
277def VecListOneDWordIndexAsmOperand : AsmOperandClass {
278  let Name = "VecListOneDWordIndexed";
279  let ParserMethod = "parseVectorList";
280  let RenderMethod = "addVecListIndexedOperands";
281}
282def VecListOneDWordIndexed : Operand<i32> {
283  let ParserMatchClass = VecListOneDWordIndexAsmOperand;
284  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
285}
286
287// Register list of two D registers with byte lane subscripting.
288def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
289  let Name = "VecListTwoDByteIndexed";
290  let ParserMethod = "parseVectorList";
291  let RenderMethod = "addVecListIndexedOperands";
292}
293def VecListTwoDByteIndexed : Operand<i32> {
294  let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
295  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
296}
297// ...with half-word lane subscripting.
298def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
299  let Name = "VecListTwoDHWordIndexed";
300  let ParserMethod = "parseVectorList";
301  let RenderMethod = "addVecListIndexedOperands";
302}
303def VecListTwoDHWordIndexed : Operand<i32> {
304  let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
305  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
306}
307// ...with word lane subscripting.
308def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
309  let Name = "VecListTwoDWordIndexed";
310  let ParserMethod = "parseVectorList";
311  let RenderMethod = "addVecListIndexedOperands";
312}
313def VecListTwoDWordIndexed : Operand<i32> {
314  let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
315  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
316}
317// Register list of two Q registers with half-word lane subscripting.
318def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
319  let Name = "VecListTwoQHWordIndexed";
320  let ParserMethod = "parseVectorList";
321  let RenderMethod = "addVecListIndexedOperands";
322}
323def VecListTwoQHWordIndexed : Operand<i32> {
324  let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
325  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
326}
327// ...with word lane subscripting.
328def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
329  let Name = "VecListTwoQWordIndexed";
330  let ParserMethod = "parseVectorList";
331  let RenderMethod = "addVecListIndexedOperands";
332}
333def VecListTwoQWordIndexed : Operand<i32> {
334  let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
335  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
336}
337
338
339// Register list of three D registers with byte lane subscripting.
340def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
341  let Name = "VecListThreeDByteIndexed";
342  let ParserMethod = "parseVectorList";
343  let RenderMethod = "addVecListIndexedOperands";
344}
345def VecListThreeDByteIndexed : Operand<i32> {
346  let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
347  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
348}
349// ...with half-word lane subscripting.
350def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
351  let Name = "VecListThreeDHWordIndexed";
352  let ParserMethod = "parseVectorList";
353  let RenderMethod = "addVecListIndexedOperands";
354}
355def VecListThreeDHWordIndexed : Operand<i32> {
356  let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
357  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
358}
359// ...with word lane subscripting.
360def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
361  let Name = "VecListThreeDWordIndexed";
362  let ParserMethod = "parseVectorList";
363  let RenderMethod = "addVecListIndexedOperands";
364}
365def VecListThreeDWordIndexed : Operand<i32> {
366  let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
367  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
368}
369// Register list of three Q registers with half-word lane subscripting.
370def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
371  let Name = "VecListThreeQHWordIndexed";
372  let ParserMethod = "parseVectorList";
373  let RenderMethod = "addVecListIndexedOperands";
374}
375def VecListThreeQHWordIndexed : Operand<i32> {
376  let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
377  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
378}
379// ...with word lane subscripting.
380def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
381  let Name = "VecListThreeQWordIndexed";
382  let ParserMethod = "parseVectorList";
383  let RenderMethod = "addVecListIndexedOperands";
384}
385def VecListThreeQWordIndexed : Operand<i32> {
386  let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
387  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
388}
389
390// Register list of four D registers with byte lane subscripting.
391def VecListFourDByteIndexAsmOperand : AsmOperandClass {
392  let Name = "VecListFourDByteIndexed";
393  let ParserMethod = "parseVectorList";
394  let RenderMethod = "addVecListIndexedOperands";
395}
396def VecListFourDByteIndexed : Operand<i32> {
397  let ParserMatchClass = VecListFourDByteIndexAsmOperand;
398  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
399}
400// ...with half-word lane subscripting.
401def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
402  let Name = "VecListFourDHWordIndexed";
403  let ParserMethod = "parseVectorList";
404  let RenderMethod = "addVecListIndexedOperands";
405}
406def VecListFourDHWordIndexed : Operand<i32> {
407  let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
408  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
409}
410// ...with word lane subscripting.
411def VecListFourDWordIndexAsmOperand : AsmOperandClass {
412  let Name = "VecListFourDWordIndexed";
413  let ParserMethod = "parseVectorList";
414  let RenderMethod = "addVecListIndexedOperands";
415}
416def VecListFourDWordIndexed : Operand<i32> {
417  let ParserMatchClass = VecListFourDWordIndexAsmOperand;
418  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
419}
420// Register list of four Q registers with half-word lane subscripting.
421def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
422  let Name = "VecListFourQHWordIndexed";
423  let ParserMethod = "parseVectorList";
424  let RenderMethod = "addVecListIndexedOperands";
425}
426def VecListFourQHWordIndexed : Operand<i32> {
427  let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
428  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
429}
430// ...with word lane subscripting.
431def VecListFourQWordIndexAsmOperand : AsmOperandClass {
432  let Name = "VecListFourQWordIndexed";
433  let ParserMethod = "parseVectorList";
434  let RenderMethod = "addVecListIndexedOperands";
435}
436def VecListFourQWordIndexed : Operand<i32> {
437  let ParserMatchClass = VecListFourQWordIndexAsmOperand;
438  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
439}
440
441def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
442  return cast<LoadSDNode>(N)->getAlignment() >= 8;
443}]>;
444def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
445                                 (store node:$val, node:$ptr), [{
446  return cast<StoreSDNode>(N)->getAlignment() >= 8;
447}]>;
448def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
449  return cast<LoadSDNode>(N)->getAlignment() == 4;
450}]>;
451def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
452                                 (store node:$val, node:$ptr), [{
453  return cast<StoreSDNode>(N)->getAlignment() == 4;
454}]>;
455def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
456  return cast<LoadSDNode>(N)->getAlignment() == 2;
457}]>;
458def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
459                                 (store node:$val, node:$ptr), [{
460  return cast<StoreSDNode>(N)->getAlignment() == 2;
461}]>;
462def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
463  return cast<LoadSDNode>(N)->getAlignment() == 1;
464}]>;
465def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
466                             (store node:$val, node:$ptr), [{
467  return cast<StoreSDNode>(N)->getAlignment() == 1;
468}]>;
469def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
470  return cast<LoadSDNode>(N)->getAlignment() < 4;
471}]>;
472def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
473                                    (store node:$val, node:$ptr), [{
474  return cast<StoreSDNode>(N)->getAlignment() < 4;
475}]>;
476
477//===----------------------------------------------------------------------===//
478// NEON-specific DAG Nodes.
479//===----------------------------------------------------------------------===//
480
481def SDTARMVTST    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
482def NEONvtst      : SDNode<"ARMISD::VTST", SDTARMVTST>;
483
484// Types for vector shift by immediates.  The "SHX" version is for long and
485// narrow operations where the source and destination vectors have different
486// types.  The "SHINS" version is for shift and insert operations.
487def SDTARMVSHXIMM    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
488                                            SDTCisVT<2, i32>]>;
489def SDTARMVSHINSIMM  : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
490                                            SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
491
492def NEONvshrnImm     : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>;
493
494def NEONvrshrsImm    : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>;
495def NEONvrshruImm    : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>;
496def NEONvrshrnImm    : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>;
497
498def NEONvqshlsImm    : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>;
499def NEONvqshluImm    : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>;
500def NEONvqshlsuImm   : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>;
501def NEONvqshrnsImm   : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>;
502def NEONvqshrnuImm   : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>;
503def NEONvqshrnsuImm  : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>;
504
505def NEONvqrshrnsImm  : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>;
506def NEONvqrshrnuImm  : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>;
507def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
508
509def NEONvsliImm      : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
510def NEONvsriImm      : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
511
512def NEONvbsp      : SDNode<"ARMISD::VBSP",
513                           SDTypeProfile<1, 3, [SDTCisVec<0>,
514                                                SDTCisSameAs<0, 1>,
515                                                SDTCisSameAs<0, 2>,
516                                                SDTCisSameAs<0, 3>]>>;
517
518def SDTARMVEXT    : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
519                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
520def NEONvext      : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
521
522def SDTARMVSHUF2  : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
523                                         SDTCisSameAs<0, 2>,
524                                         SDTCisSameAs<0, 3>]>;
525def NEONzip       : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
526def NEONuzp       : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
527def NEONtrn       : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
528
529def SDTARMVTBL1   : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
530                                         SDTCisVT<2, v8i8>]>;
531def SDTARMVTBL2   : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
532                                         SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>;
533def NEONvtbl1     : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
534def NEONvtbl2     : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
535
536
537//===----------------------------------------------------------------------===//
538// NEON load / store instructions
539//===----------------------------------------------------------------------===//
540
541// Use VLDM to load a Q register as a D register pair.
542// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
543def VLDMQIA
544  : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
545                    IIC_fpLoad_m, "",
546                   [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>;
547
548// Use VSTM to store a Q register as a D register pair.
549// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
550def VSTMQIA
551  : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
552                    IIC_fpStore_m, "",
553                   [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>;
554
555// Classes for VLD* pseudo-instructions with multi-register operands.
556// These are expanded to real instructions after register allocation.
557class VLDQPseudo<InstrItinClass itin>
558  : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
559class VLDQWBPseudo<InstrItinClass itin>
560  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
561                (ins addrmode6:$addr, am6offset:$offset), itin,
562                "$addr.addr = $wb">;
563class VLDQWBfixedPseudo<InstrItinClass itin>
564  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
565                (ins addrmode6:$addr), itin,
566                "$addr.addr = $wb">;
567class VLDQWBregisterPseudo<InstrItinClass itin>
568  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
569                (ins addrmode6:$addr, rGPR:$offset), itin,
570                "$addr.addr = $wb">;
571
572class VLDQQPseudo<InstrItinClass itin>
573  : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
574class VLDQQWBPseudo<InstrItinClass itin>
575  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
576                (ins addrmode6:$addr, am6offset:$offset), itin,
577                "$addr.addr = $wb">;
578class VLDQQWBfixedPseudo<InstrItinClass itin>
579  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
580                (ins addrmode6:$addr), itin,
581                "$addr.addr = $wb">;
582class VLDQQWBregisterPseudo<InstrItinClass itin>
583  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
584                (ins addrmode6:$addr, rGPR:$offset), itin,
585                "$addr.addr = $wb">;
586
587
588class VLDQQQQPseudo<InstrItinClass itin>
589  : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
590                "$src = $dst">;
591class VLDQQQQWBPseudo<InstrItinClass itin>
592  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
593                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
594                "$addr.addr = $wb, $src = $dst">;
595
596let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
597
598//   VLD1     : Vector Load (multiple single elements)
599class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
600  : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
601          (ins AddrMode:$Rn), IIC_VLD1,
602          "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> {
603  let Rm = 0b1111;
604  let Inst{4} = Rn{4};
605  let DecoderMethod = "DecodeVLDST1Instruction";
606}
607class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
608  : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
609          (ins AddrMode:$Rn), IIC_VLD1x2,
610          "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> {
611  let Rm = 0b1111;
612  let Inst{5-4} = Rn{5-4};
613  let DecoderMethod = "DecodeVLDST1Instruction";
614}
615
616def  VLD1d8   : VLD1D<{0,0,0,?}, "8",  addrmode6align64>;
617def  VLD1d16  : VLD1D<{0,1,0,?}, "16", addrmode6align64>;
618def  VLD1d32  : VLD1D<{1,0,0,?}, "32", addrmode6align64>;
619def  VLD1d64  : VLD1D<{1,1,0,?}, "64", addrmode6align64>;
620
621def  VLD1q8   : VLD1Q<{0,0,?,?}, "8",  addrmode6align64or128>;
622def  VLD1q16  : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>;
623def  VLD1q32  : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>;
624def  VLD1q64  : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>;
625
626// ...with address register writeback:
627multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
628  def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
629                     (ins AddrMode:$Rn), IIC_VLD1u,
630                     "vld1", Dt, "$Vd, $Rn!",
631                     "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
632    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
633    let Inst{4} = Rn{4};
634    let DecoderMethod = "DecodeVLDST1Instruction";
635  }
636  def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
637                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
638                        "vld1", Dt, "$Vd, $Rn, $Rm",
639                        "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
640    let Inst{4} = Rn{4};
641    let DecoderMethod = "DecodeVLDST1Instruction";
642  }
643}
644multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
645  def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
646                    (ins AddrMode:$Rn), IIC_VLD1x2u,
647                     "vld1", Dt, "$Vd, $Rn!",
648                     "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
649    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
650    let Inst{5-4} = Rn{5-4};
651    let DecoderMethod = "DecodeVLDST1Instruction";
652  }
653  def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
654                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
655                        "vld1", Dt, "$Vd, $Rn, $Rm",
656                        "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
657    let Inst{5-4} = Rn{5-4};
658    let DecoderMethod = "DecodeVLDST1Instruction";
659  }
660}
661
662defm VLD1d8wb  : VLD1DWB<{0,0,0,?}, "8",  addrmode6align64>;
663defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>;
664defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>;
665defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>;
666defm VLD1q8wb  : VLD1QWB<{0,0,?,?}, "8",  addrmode6align64or128>;
667defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
668defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
669defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
670
671// ...with 3 registers
672class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
673  : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
674          (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
675          "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> {
676  let Rm = 0b1111;
677  let Inst{4} = Rn{4};
678  let DecoderMethod = "DecodeVLDST1Instruction";
679}
680multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
681  def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
682                    (ins AddrMode:$Rn), IIC_VLD1x2u,
683                     "vld1", Dt, "$Vd, $Rn!",
684                     "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
685    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
686    let Inst{4} = Rn{4};
687    let DecoderMethod = "DecodeVLDST1Instruction";
688  }
689  def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
690                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
691                        "vld1", Dt, "$Vd, $Rn, $Rm",
692                        "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
693    let Inst{4} = Rn{4};
694    let DecoderMethod = "DecodeVLDST1Instruction";
695  }
696}
697
698def VLD1d8T      : VLD1D3<{0,0,0,?}, "8",  addrmode6align64>;
699def VLD1d16T     : VLD1D3<{0,1,0,?}, "16", addrmode6align64>;
700def VLD1d32T     : VLD1D3<{1,0,0,?}, "32", addrmode6align64>;
701def VLD1d64T     : VLD1D3<{1,1,0,?}, "64", addrmode6align64>;
702
703defm VLD1d8Twb  : VLD1D3WB<{0,0,0,?}, "8",  addrmode6align64>;
704defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
705defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
706defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
707
708def VLD1d8TPseudo  : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
709def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
710def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
711def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
712def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
713def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
714
715def VLD1q8HighTPseudo     : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
716def VLD1q8LowTPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
717def VLD1q16HighTPseudo    : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
718def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
719def VLD1q32HighTPseudo    : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
720def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
721def VLD1q64HighTPseudo    : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
722def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
723
724// ...with 4 registers
725class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
726  : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
727          (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
728          "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> {
729  let Rm = 0b1111;
730  let Inst{5-4} = Rn{5-4};
731  let DecoderMethod = "DecodeVLDST1Instruction";
732}
733multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
734  def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
735                    (ins AddrMode:$Rn), IIC_VLD1x2u,
736                     "vld1", Dt, "$Vd, $Rn!",
737                     "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
738    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
739    let Inst{5-4} = Rn{5-4};
740    let DecoderMethod = "DecodeVLDST1Instruction";
741  }
742  def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
743                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
744                        "vld1", Dt, "$Vd, $Rn, $Rm",
745                        "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
746    let Inst{5-4} = Rn{5-4};
747    let DecoderMethod = "DecodeVLDST1Instruction";
748  }
749}
750
751def VLD1d8Q      : VLD1D4<{0,0,?,?}, "8",  addrmode6align64or128or256>;
752def VLD1d16Q     : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
753def VLD1d32Q     : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
754def VLD1d64Q     : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
755
756defm VLD1d8Qwb   : VLD1D4WB<{0,0,?,?}, "8",  addrmode6align64or128or256>;
757defm VLD1d16Qwb  : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
758defm VLD1d32Qwb  : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
759defm VLD1d64Qwb  : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
760
761def VLD1d8QPseudo  : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
762def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
763def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
764def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
765def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
766def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
767
768def VLD1q8LowQPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
769def VLD1q8HighQPseudo     : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
770def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
771def VLD1q16HighQPseudo    : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
772def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
773def VLD1q32HighQPseudo    : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
774def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
775def VLD1q64HighQPseudo    : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
776
777//   VLD2     : Vector Load (multiple 2-element structures)
778class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
779           InstrItinClass itin, Operand AddrMode>
780  : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
781          (ins AddrMode:$Rn), itin,
782          "vld2", Dt, "$Vd, $Rn", "", []> {
783  let Rm = 0b1111;
784  let Inst{5-4} = Rn{5-4};
785  let DecoderMethod = "DecodeVLDST2Instruction";
786}
787
788def  VLD2d8   : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
789                     addrmode6align64or128>, Sched<[WriteVLD2]>;
790def  VLD2d16  : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
791                     addrmode6align64or128>, Sched<[WriteVLD2]>;
792def  VLD2d32  : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
793                     addrmode6align64or128>, Sched<[WriteVLD2]>;
794
795def  VLD2q8   : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
796                     addrmode6align64or128or256>, Sched<[WriteVLD4]>;
797def  VLD2q16  : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
798                     addrmode6align64or128or256>, Sched<[WriteVLD4]>;
799def  VLD2q32  : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
800                     addrmode6align64or128or256>, Sched<[WriteVLD4]>;
801
802def  VLD2q8Pseudo  : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
803def  VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
804def  VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
805
806// ...with address register writeback:
807multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
808                  RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> {
809  def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
810                     (ins AddrMode:$Rn), itin,
811                     "vld2", Dt, "$Vd, $Rn!",
812                     "$Rn.addr = $wb", []> {
813    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
814    let Inst{5-4} = Rn{5-4};
815    let DecoderMethod = "DecodeVLDST2Instruction";
816  }
817  def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
818                        (ins AddrMode:$Rn, rGPR:$Rm), itin,
819                        "vld2", Dt, "$Vd, $Rn, $Rm",
820                        "$Rn.addr = $wb", []> {
821    let Inst{5-4} = Rn{5-4};
822    let DecoderMethod = "DecodeVLDST2Instruction";
823  }
824}
825
826defm VLD2d8wb  : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
827                        addrmode6align64or128>, Sched<[WriteVLD2]>;
828defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
829                        addrmode6align64or128>, Sched<[WriteVLD2]>;
830defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
831                        addrmode6align64or128>, Sched<[WriteVLD2]>;
832
833defm VLD2q8wb  : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
834                        addrmode6align64or128or256>, Sched<[WriteVLD4]>;
835defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
836                        addrmode6align64or128or256>, Sched<[WriteVLD4]>;
837defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
838                        addrmode6align64or128or256>, Sched<[WriteVLD4]>;
839
840def VLD2q8PseudoWB_fixed     : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
841def VLD2q16PseudoWB_fixed    : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
842def VLD2q32PseudoWB_fixed    : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
843def VLD2q8PseudoWB_register  : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
844def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
845def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
846
847// ...with double-spaced registers
848def  VLD2b8    : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
849                      addrmode6align64or128>, Sched<[WriteVLD2]>;
850def  VLD2b16   : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
851                      addrmode6align64or128>, Sched<[WriteVLD2]>;
852def  VLD2b32   : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
853                      addrmode6align64or128>, Sched<[WriteVLD2]>;
854defm VLD2b8wb  : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
855                        addrmode6align64or128>, Sched<[WriteVLD2]>;
856defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
857                        addrmode6align64or128>, Sched<[WriteVLD2]>;
858defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
859                        addrmode6align64or128>, Sched<[WriteVLD2]>;
860
861//   VLD3     : Vector Load (multiple 3-element structures)
862class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
863  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
864          (ins addrmode6:$Rn), IIC_VLD3,
865          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> {
866  let Rm = 0b1111;
867  let Inst{4} = Rn{4};
868  let DecoderMethod = "DecodeVLDST3Instruction";
869}
870
871def  VLD3d8   : VLD3D<0b0100, {0,0,0,?}, "8">;
872def  VLD3d16  : VLD3D<0b0100, {0,1,0,?}, "16">;
873def  VLD3d32  : VLD3D<0b0100, {1,0,0,?}, "32">;
874
875def  VLD3d8Pseudo  : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
876def  VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
877def  VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
878
879// ...with address register writeback:
880class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
881  : NLdSt<0, 0b10, op11_8, op7_4,
882          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
883          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
884          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
885          "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
886  let Inst{4} = Rn{4};
887  let DecoderMethod = "DecodeVLDST3Instruction";
888}
889
890def VLD3d8_UPD  : VLD3DWB<0b0100, {0,0,0,?}, "8">;
891def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
892def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
893
894def VLD3d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
895def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
896def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
897
898// ...with double-spaced registers:
899def VLD3q8      : VLD3D<0b0101, {0,0,0,?}, "8">;
900def VLD3q16     : VLD3D<0b0101, {0,1,0,?}, "16">;
901def VLD3q32     : VLD3D<0b0101, {1,0,0,?}, "32">;
902def VLD3q8_UPD  : VLD3DWB<0b0101, {0,0,0,?}, "8">;
903def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
904def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
905
906def VLD3q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
907def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
908def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
909
910// ...alternate versions to be allocated odd register numbers:
911def VLD3q8oddPseudo   : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
912def VLD3q16oddPseudo  : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
913def VLD3q32oddPseudo  : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
914
915def VLD3q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
916def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
917def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
918
919//   VLD4     : Vector Load (multiple 4-element structures)
920class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
921  : NLdSt<0, 0b10, op11_8, op7_4,
922          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
923          (ins addrmode6:$Rn), IIC_VLD4,
924          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>,
925    Sched<[WriteVLD4]> {
926  let Rm = 0b1111;
927  let Inst{5-4} = Rn{5-4};
928  let DecoderMethod = "DecodeVLDST4Instruction";
929}
930
931def  VLD4d8   : VLD4D<0b0000, {0,0,?,?}, "8">;
932def  VLD4d16  : VLD4D<0b0000, {0,1,?,?}, "16">;
933def  VLD4d32  : VLD4D<0b0000, {1,0,?,?}, "32">;
934
935def  VLD4d8Pseudo  : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
936def  VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
937def  VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
938
939// ...with address register writeback:
940class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
941  : NLdSt<0, 0b10, op11_8, op7_4,
942          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
943          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
944          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
945          "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
946  let Inst{5-4} = Rn{5-4};
947  let DecoderMethod = "DecodeVLDST4Instruction";
948}
949
950def VLD4d8_UPD  : VLD4DWB<0b0000, {0,0,?,?}, "8">;
951def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
952def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
953
954def VLD4d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
955def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
956def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
957
958// ...with double-spaced registers:
959def VLD4q8      : VLD4D<0b0001, {0,0,?,?}, "8">;
960def VLD4q16     : VLD4D<0b0001, {0,1,?,?}, "16">;
961def VLD4q32     : VLD4D<0b0001, {1,0,?,?}, "32">;
962def VLD4q8_UPD  : VLD4DWB<0b0001, {0,0,?,?}, "8">;
963def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
964def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
965
966def VLD4q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
967def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
968def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
969
970// ...alternate versions to be allocated odd register numbers:
971def VLD4q8oddPseudo   : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
972def VLD4q16oddPseudo  : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
973def VLD4q32oddPseudo  : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
974
975def VLD4q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
976def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
977def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
978
979} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
980
981// Classes for VLD*LN pseudo-instructions with multi-register operands.
982// These are expanded to real instructions after register allocation.
983class VLDQLNPseudo<InstrItinClass itin>
984  : PseudoNLdSt<(outs QPR:$dst),
985                (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
986                itin, "$src = $dst">;
987class VLDQLNWBPseudo<InstrItinClass itin>
988  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
989                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
990                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
991class VLDQQLNPseudo<InstrItinClass itin>
992  : PseudoNLdSt<(outs QQPR:$dst),
993                (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
994                itin, "$src = $dst">;
995class VLDQQLNWBPseudo<InstrItinClass itin>
996  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
997                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
998                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
999class VLDQQQQLNPseudo<InstrItinClass itin>
1000  : PseudoNLdSt<(outs QQQQPR:$dst),
1001                (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
1002                itin, "$src = $dst">;
1003class VLDQQQQLNWBPseudo<InstrItinClass itin>
1004  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
1005                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
1006                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1007
1008//   VLD1LN   : Vector Load (single element to one lane)
1009class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1010             PatFrag LoadOp>
1011  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1012          (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
1013          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1014          "$src = $Vd",
1015          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1016                                         (i32 (LoadOp addrmode6:$Rn)),
1017                                         imm:$lane))]> {
1018  let Rm = 0b1111;
1019  let DecoderMethod = "DecodeVLD1LN";
1020}
1021class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1022             PatFrag LoadOp>
1023  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1024          (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
1025          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1026          "$src = $Vd",
1027          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1028                                         (i32 (LoadOp addrmode6oneL32:$Rn)),
1029                                         imm:$lane))]>, Sched<[WriteVLD1]> {
1030  let Rm = 0b1111;
1031  let DecoderMethod = "DecodeVLD1LN";
1032}
1033class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>,
1034                                                    Sched<[WriteVLD1]> {
1035  let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
1036                                               (i32 (LoadOp addrmode6:$addr)),
1037                                               imm:$lane))];
1038}
1039
1040def VLD1LNd8  : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
1041  let Inst{7-5} = lane{2-0};
1042}
1043def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
1044  let Inst{7-6} = lane{1-0};
1045  let Inst{5-4} = Rn{5-4};
1046}
1047def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
1048  let Inst{7} = lane{0};
1049  let Inst{5-4} = Rn{5-4};
1050}
1051
1052def VLD1LNq8Pseudo  : VLD1QLNPseudo<v16i8, extloadi8>;
1053def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
1054def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
1055
1056let Predicates = [HasNEON] in {
1057def : Pat<(vector_insert (v4f16 DPR:$src),
1058                         (f16 (load addrmode6:$addr)), imm:$lane),
1059          (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
1060def : Pat<(vector_insert (v8f16 QPR:$src),
1061                         (f16 (load addrmode6:$addr)), imm:$lane),
1062          (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1063def : Pat<(vector_insert (v4bf16 DPR:$src),
1064                         (bf16 (load addrmode6:$addr)), imm:$lane),
1065          (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
1066def : Pat<(vector_insert (v8bf16 QPR:$src),
1067                         (bf16 (load addrmode6:$addr)), imm:$lane),
1068          (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1069def : Pat<(vector_insert (v2f32 DPR:$src),
1070                         (f32 (load addrmode6:$addr)), imm:$lane),
1071          (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
1072def : Pat<(vector_insert (v4f32 QPR:$src),
1073                         (f32 (load addrmode6:$addr)), imm:$lane),
1074          (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1075
1076// A 64-bit subvector insert to the first 128-bit vector position
1077// is a subregister copy that needs no instruction.
1078def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)),
1079          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1080def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)),
1081          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1082def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)),
1083          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1084def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)),
1085          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1086def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)),
1087          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1088def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)),
1089          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1090}
1091
1092
1093let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1094
1095// ...with address register writeback:
1096class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1097  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
1098          (ins addrmode6:$Rn, am6offset:$Rm,
1099           DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
1100          "\\{$Vd[$lane]\\}, $Rn$Rm",
1101          "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1102  let DecoderMethod = "DecodeVLD1LN";
1103}
1104
1105def VLD1LNd8_UPD  : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
1106  let Inst{7-5} = lane{2-0};
1107}
1108def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
1109  let Inst{7-6} = lane{1-0};
1110  let Inst{4}   = Rn{4};
1111}
1112def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
1113  let Inst{7} = lane{0};
1114  let Inst{5} = Rn{4};
1115  let Inst{4} = Rn{4};
1116}
1117
1118def VLD1LNq8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1119def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1120def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1121
1122//   VLD2LN   : Vector Load (single 2-element structure to one lane)
1123class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1124  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
1125          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
1126          IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
1127          "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> {
1128  let Rm = 0b1111;
1129  let Inst{4}   = Rn{4};
1130  let DecoderMethod = "DecodeVLD2LN";
1131}
1132
1133def VLD2LNd8  : VLD2LN<0b0001, {?,?,?,?}, "8"> {
1134  let Inst{7-5} = lane{2-0};
1135}
1136def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
1137  let Inst{7-6} = lane{1-0};
1138}
1139def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
1140  let Inst{7} = lane{0};
1141}
1142
1143def VLD2LNd8Pseudo  : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1144def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1145def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1146
1147// ...with double-spaced registers:
1148def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
1149  let Inst{7-6} = lane{1-0};
1150}
1151def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
1152  let Inst{7} = lane{0};
1153}
1154
1155def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1156def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1157
1158// ...with address register writeback:
1159class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1160  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
1161          (ins addrmode6:$Rn, am6offset:$Rm,
1162           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
1163          "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
1164          "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
1165  let Inst{4}   = Rn{4};
1166  let DecoderMethod = "DecodeVLD2LN";
1167}
1168
1169def VLD2LNd8_UPD  : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
1170  let Inst{7-5} = lane{2-0};
1171}
1172def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
1173  let Inst{7-6} = lane{1-0};
1174}
1175def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
1176  let Inst{7} = lane{0};
1177}
1178
1179def VLD2LNd8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1180def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1181def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1182
1183def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
1184  let Inst{7-6} = lane{1-0};
1185}
1186def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
1187  let Inst{7} = lane{0};
1188}
1189
1190def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1191def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1192
1193//   VLD3LN   : Vector Load (single 3-element structure to one lane)
1194class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1195  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1196          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
1197          nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
1198          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
1199          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> {
1200  let Rm = 0b1111;
1201  let DecoderMethod = "DecodeVLD3LN";
1202}
1203
1204def VLD3LNd8  : VLD3LN<0b0010, {?,?,?,0}, "8"> {
1205  let Inst{7-5} = lane{2-0};
1206}
1207def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
1208  let Inst{7-6} = lane{1-0};
1209}
1210def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
1211  let Inst{7}   = lane{0};
1212}
1213
1214def VLD3LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1215def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1216def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1217
1218// ...with double-spaced registers:
1219def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
1220  let Inst{7-6} = lane{1-0};
1221}
1222def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
1223  let Inst{7}   = lane{0};
1224}
1225
1226def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1227def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1228
1229// ...with address register writeback:
1230class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1231  : NLdStLn<1, 0b10, op11_8, op7_4,
1232          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1233          (ins addrmode6:$Rn, am6offset:$Rm,
1234           DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
1235          IIC_VLD3lnu, "vld3", Dt,
1236          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
1237          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
1238          []>, Sched<[WriteVLD2]> {
1239  let DecoderMethod = "DecodeVLD3LN";
1240}
1241
1242def VLD3LNd8_UPD  : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
1243  let Inst{7-5} = lane{2-0};
1244}
1245def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
1246  let Inst{7-6} = lane{1-0};
1247}
1248def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
1249  let Inst{7} = lane{0};
1250}
1251
1252def VLD3LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1253def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1254def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1255
1256def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
1257  let Inst{7-6} = lane{1-0};
1258}
1259def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
1260  let Inst{7} = lane{0};
1261}
1262
1263def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1264def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1265
1266//   VLD4LN   : Vector Load (single 4-element structure to one lane)
1267class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1268  : NLdStLn<1, 0b10, op11_8, op7_4,
1269          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1270          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
1271          nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
1272          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
1273          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>,
1274    Sched<[WriteVLD2]> {
1275  let Rm = 0b1111;
1276  let Inst{4} = Rn{4};
1277  let DecoderMethod = "DecodeVLD4LN";
1278}
1279
1280def VLD4LNd8  : VLD4LN<0b0011, {?,?,?,?}, "8"> {
1281  let Inst{7-5} = lane{2-0};
1282}
1283def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
1284  let Inst{7-6} = lane{1-0};
1285}
1286def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
1287  let Inst{7} = lane{0};
1288  let Inst{5} = Rn{5};
1289}
1290
1291def VLD4LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1292def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1293def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1294
1295// ...with double-spaced registers:
1296def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
1297  let Inst{7-6} = lane{1-0};
1298}
1299def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
1300  let Inst{7} = lane{0};
1301  let Inst{5} = Rn{5};
1302}
1303
1304def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1305def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1306
1307// ...with address register writeback:
1308class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1309  : NLdStLn<1, 0b10, op11_8, op7_4,
1310          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1311          (ins addrmode6:$Rn, am6offset:$Rm,
1312           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
1313          IIC_VLD4lnu, "vld4", Dt,
1314"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
1315"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
1316          []> {
1317  let Inst{4}   = Rn{4};
1318  let DecoderMethod = "DecodeVLD4LN"  ;
1319}
1320
1321def VLD4LNd8_UPD  : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
1322  let Inst{7-5} = lane{2-0};
1323}
1324def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
1325  let Inst{7-6} = lane{1-0};
1326}
1327def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
1328  let Inst{7} = lane{0};
1329  let Inst{5} = Rn{5};
1330}
1331
1332def VLD4LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1333def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1334def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1335
1336def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
1337  let Inst{7-6} = lane{1-0};
1338}
1339def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
1340  let Inst{7} = lane{0};
1341  let Inst{5} = Rn{5};
1342}
1343
1344def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1345def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1346
1347} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1348
1349//   VLD1DUP  : Vector Load (single element to all lanes)
1350class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1351              Operand AddrMode>
1352  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
1353          (ins AddrMode:$Rn),
1354          IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
1355          [(set VecListOneDAllLanes:$Vd,
1356                (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
1357   Sched<[WriteVLD2]> {
1358  let Rm = 0b1111;
1359  let Inst{4} = Rn{4};
1360  let DecoderMethod = "DecodeVLD1DupInstruction";
1361}
1362def VLD1DUPd8  : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8,
1363                         addrmode6dupalignNone>;
1364def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
1365                         addrmode6dupalign16>;
1366def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
1367                         addrmode6dupalign32>;
1368
1369let Predicates = [HasNEON] in {
1370def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
1371          (VLD1DUPd32 addrmode6:$addr)>;
1372}
1373
1374class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1375               Operand AddrMode>
1376  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
1377          (ins AddrMode:$Rn), IIC_VLD1dup,
1378          "vld1", Dt, "$Vd, $Rn", "",
1379          [(set VecListDPairAllLanes:$Vd,
1380                (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
1381  let Rm = 0b1111;
1382  let Inst{4} = Rn{4};
1383  let DecoderMethod = "DecodeVLD1DupInstruction";
1384}
1385
1386def VLD1DUPq8  : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8,
1387                          addrmode6dupalignNone>;
1388def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
1389                          addrmode6dupalign16>;
1390def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
1391                          addrmode6dupalign32>;
1392
1393let Predicates = [HasNEON] in {
1394def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
1395          (VLD1DUPq32 addrmode6:$addr)>;
1396}
1397
1398let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1399// ...with address register writeback:
1400multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1401  def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1402                     (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1403                     (ins AddrMode:$Rn), IIC_VLD1dupu,
1404                     "vld1", Dt, "$Vd, $Rn!",
1405                     "$Rn.addr = $wb", []> {
1406    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1407    let Inst{4} = Rn{4};
1408    let DecoderMethod = "DecodeVLD1DupInstruction";
1409  }
1410  def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1411                        (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1412                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1413                        "vld1", Dt, "$Vd, $Rn, $Rm",
1414                        "$Rn.addr = $wb", []> {
1415    let Inst{4} = Rn{4};
1416    let DecoderMethod = "DecodeVLD1DupInstruction";
1417  }
1418}
1419multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1420  def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1421                     (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1422                     (ins AddrMode:$Rn), IIC_VLD1dupu,
1423                     "vld1", Dt, "$Vd, $Rn!",
1424                     "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1425    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1426    let Inst{4} = Rn{4};
1427    let DecoderMethod = "DecodeVLD1DupInstruction";
1428  }
1429  def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1430                        (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1431                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1432                        "vld1", Dt, "$Vd, $Rn, $Rm",
1433                        "$Rn.addr = $wb", []> {
1434    let Inst{4} = Rn{4};
1435    let DecoderMethod = "DecodeVLD1DupInstruction";
1436  }
1437}
1438
1439defm VLD1DUPd8wb  : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>;
1440defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>;
1441defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>;
1442
1443defm VLD1DUPq8wb  : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>;
1444defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>;
1445defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>;
1446
1447//   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
1448class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode>
1449  : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
1450          (ins AddrMode:$Rn), IIC_VLD2dup,
1451          "vld2", Dt, "$Vd, $Rn", "", []> {
1452  let Rm = 0b1111;
1453  let Inst{4} = Rn{4};
1454  let DecoderMethod = "DecodeVLD2DupInstruction";
1455}
1456
1457def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8",  VecListDPairAllLanes,
1458                         addrmode6dupalign16>;
1459def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes,
1460                         addrmode6dupalign32>;
1461def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes,
1462                         addrmode6dupalign64>;
1463
1464// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or
1465// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]".
1466// ...with double-spaced registers
1467def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8",  VecListDPairSpacedAllLanes,
1468                           addrmode6dupalign16>;
1469def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1470                           addrmode6dupalign32>;
1471def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1472                           addrmode6dupalign64>;
1473
1474def VLD2DUPq8EvenPseudo  : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1475def VLD2DUPq8OddPseudo   : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1476def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1477def VLD2DUPq16OddPseudo  : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1478def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1479def VLD2DUPq32OddPseudo  : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1480
1481// ...with address register writeback:
1482multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
1483                     Operand AddrMode> {
1484  def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
1485                     (outs VdTy:$Vd, GPR:$wb),
1486                     (ins AddrMode:$Rn), IIC_VLD2dupu,
1487                     "vld2", Dt, "$Vd, $Rn!",
1488                     "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1489    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1490    let Inst{4} = Rn{4};
1491    let DecoderMethod = "DecodeVLD2DupInstruction";
1492  }
1493  def _register : NLdSt<1, 0b10, 0b1101, op7_4,
1494                        (outs VdTy:$Vd, GPR:$wb),
1495                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
1496                        "vld2", Dt, "$Vd, $Rn, $Rm",
1497                        "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1498    let Inst{4} = Rn{4};
1499    let DecoderMethod = "DecodeVLD2DupInstruction";
1500  }
1501}
1502
1503defm VLD2DUPd8wb    : VLD2DUPWB<{0,0,0,0}, "8",  VecListDPairAllLanes,
1504                                addrmode6dupalign16>;
1505defm VLD2DUPd16wb   : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes,
1506                                addrmode6dupalign32>;
1507defm VLD2DUPd32wb   : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes,
1508                                addrmode6dupalign64>;
1509
1510defm VLD2DUPd8x2wb  : VLD2DUPWB<{0,0,1,0}, "8",  VecListDPairSpacedAllLanes,
1511                                addrmode6dupalign16>;
1512defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1513                                addrmode6dupalign32>;
1514defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1515                                addrmode6dupalign64>;
1516
1517//   VLD3DUP  : Vector Load (single 3-element structure to all lanes)
1518class VLD3DUP<bits<4> op7_4, string Dt>
1519  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1520          (ins addrmode6dup:$Rn), IIC_VLD3dup,
1521          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>,
1522    Sched<[WriteVLD2]> {
1523  let Rm = 0b1111;
1524  let Inst{4} = 0;
1525  let DecoderMethod = "DecodeVLD3DupInstruction";
1526}
1527
1528def VLD3DUPd8  : VLD3DUP<{0,0,0,?}, "8">;
1529def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
1530def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
1531
1532def VLD3DUPd8Pseudo  : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1533def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1534def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1535
1536// ...with double-spaced registers (not used for codegen):
1537def VLD3DUPq8  : VLD3DUP<{0,0,1,?}, "8">;
1538def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
1539def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
1540
1541def VLD3DUPq8EvenPseudo  : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1542def VLD3DUPq8OddPseudo   : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1543def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1544def VLD3DUPq16OddPseudo  : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1545def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1546def VLD3DUPq32OddPseudo  : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1547
1548// ...with address register writeback:
1549class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
1550  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1551          (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
1552          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
1553          "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
1554  let Inst{4} = 0;
1555  let DecoderMethod = "DecodeVLD3DupInstruction";
1556}
1557
1558def VLD3DUPd8_UPD  : VLD3DUPWB<{0,0,0,0}, "8",  addrmode6dupalign64>;
1559def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>;
1560def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>;
1561
1562def VLD3DUPq8_UPD  : VLD3DUPWB<{0,0,1,0}, "8",  addrmode6dupalign64>;
1563def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
1564def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
1565
1566def VLD3DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1567def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1568def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1569
1570//   VLD4DUP  : Vector Load (single 4-element structure to all lanes)
1571class VLD4DUP<bits<4> op7_4, string Dt>
1572  : NLdSt<1, 0b10, 0b1111, op7_4,
1573          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1574          (ins addrmode6dup:$Rn), IIC_VLD4dup,
1575          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
1576  let Rm = 0b1111;
1577  let Inst{4} = Rn{4};
1578  let DecoderMethod = "DecodeVLD4DupInstruction";
1579}
1580
1581def VLD4DUPd8  : VLD4DUP<{0,0,0,?}, "8">;
1582def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
1583def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1584
1585def VLD4DUPd8Pseudo  : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1586def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1587def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1588
1589// ...with double-spaced registers (not used for codegen):
1590def VLD4DUPq8  : VLD4DUP<{0,0,1,?}, "8">;
1591def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
1592def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1593
1594def VLD4DUPq8EvenPseudo  : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1595def VLD4DUPq8OddPseudo   : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1596def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1597def VLD4DUPq16OddPseudo  : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1598def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1599def VLD4DUPq32OddPseudo  : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1600
1601// ...with address register writeback:
1602class VLD4DUPWB<bits<4> op7_4, string Dt>
1603  : NLdSt<1, 0b10, 0b1111, op7_4,
1604          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1605          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
1606          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
1607          "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
1608  let Inst{4} = Rn{4};
1609  let DecoderMethod = "DecodeVLD4DupInstruction";
1610}
1611
1612def VLD4DUPd8_UPD  : VLD4DUPWB<{0,0,0,0}, "8">;
1613def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
1614def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1615
1616def VLD4DUPq8_UPD  : VLD4DUPWB<{0,0,1,0}, "8">;
1617def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
1618def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1619
1620def VLD4DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1621def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1622def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1623
1624} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1625
1626let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
1627
1628// Classes for VST* pseudo-instructions with multi-register operands.
1629// These are expanded to real instructions after register allocation.
1630class VSTQPseudo<InstrItinClass itin>
1631  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
1632class VSTQWBPseudo<InstrItinClass itin>
1633  : PseudoNLdSt<(outs GPR:$wb),
1634                (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
1635                "$addr.addr = $wb">;
1636class VSTQWBfixedPseudo<InstrItinClass itin>
1637  : PseudoNLdSt<(outs GPR:$wb),
1638                (ins addrmode6:$addr, QPR:$src), itin,
1639                "$addr.addr = $wb">;
1640class VSTQWBregisterPseudo<InstrItinClass itin>
1641  : PseudoNLdSt<(outs GPR:$wb),
1642                (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
1643                "$addr.addr = $wb">;
1644class VSTQQPseudo<InstrItinClass itin>
1645  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
1646class VSTQQWBPseudo<InstrItinClass itin>
1647  : PseudoNLdSt<(outs GPR:$wb),
1648                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
1649                "$addr.addr = $wb">;
1650class VSTQQWBfixedPseudo<InstrItinClass itin>
1651  : PseudoNLdSt<(outs GPR:$wb),
1652                (ins addrmode6:$addr, QQPR:$src), itin,
1653                "$addr.addr = $wb">;
1654class VSTQQWBregisterPseudo<InstrItinClass itin>
1655  : PseudoNLdSt<(outs GPR:$wb),
1656                (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
1657                "$addr.addr = $wb">;
1658
1659class VSTQQQQPseudo<InstrItinClass itin>
1660  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
1661class VSTQQQQWBPseudo<InstrItinClass itin>
1662  : PseudoNLdSt<(outs GPR:$wb),
1663                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
1664                "$addr.addr = $wb">;
1665
1666//   VST1     : Vector Store (multiple single elements)
1667class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
1668  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
1669          IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> {
1670  let Rm = 0b1111;
1671  let Inst{4} = Rn{4};
1672  let DecoderMethod = "DecodeVLDST1Instruction";
1673}
1674class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
1675  : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
1676          IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> {
1677  let Rm = 0b1111;
1678  let Inst{5-4} = Rn{5-4};
1679  let DecoderMethod = "DecodeVLDST1Instruction";
1680}
1681
1682def  VST1d8   : VST1D<{0,0,0,?}, "8",  addrmode6align64>;
1683def  VST1d16  : VST1D<{0,1,0,?}, "16", addrmode6align64>;
1684def  VST1d32  : VST1D<{1,0,0,?}, "32", addrmode6align64>;
1685def  VST1d64  : VST1D<{1,1,0,?}, "64", addrmode6align64>;
1686
1687def  VST1q8   : VST1Q<{0,0,?,?}, "8",  addrmode6align64or128>;
1688def  VST1q16  : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>;
1689def  VST1q32  : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>;
1690def  VST1q64  : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>;
1691
1692// ...with address register writeback:
1693multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1694  def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
1695                     (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
1696                     "vst1", Dt, "$Vd, $Rn!",
1697                     "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
1698    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1699    let Inst{4} = Rn{4};
1700    let DecoderMethod = "DecodeVLDST1Instruction";
1701  }
1702  def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
1703                        (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
1704                        IIC_VLD1u,
1705                        "vst1", Dt, "$Vd, $Rn, $Rm",
1706                        "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
1707    let Inst{4} = Rn{4};
1708    let DecoderMethod = "DecodeVLDST1Instruction";
1709  }
1710}
1711multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1712  def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1713                    (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
1714                     "vst1", Dt, "$Vd, $Rn!",
1715                     "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1716    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1717    let Inst{5-4} = Rn{5-4};
1718    let DecoderMethod = "DecodeVLDST1Instruction";
1719  }
1720  def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1721                        (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
1722                        IIC_VLD1x2u,
1723                        "vst1", Dt, "$Vd, $Rn, $Rm",
1724                        "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1725    let Inst{5-4} = Rn{5-4};
1726    let DecoderMethod = "DecodeVLDST1Instruction";
1727  }
1728}
1729
1730defm VST1d8wb  : VST1DWB<{0,0,0,?}, "8",  addrmode6align64>;
1731defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>;
1732defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>;
1733defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>;
1734
1735defm VST1q8wb  : VST1QWB<{0,0,?,?}, "8",  addrmode6align64or128>;
1736defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
1737defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
1738defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
1739
1740// ...with 3 registers
1741class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
1742  : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
1743          (ins AddrMode:$Rn, VecListThreeD:$Vd),
1744          IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> {
1745  let Rm = 0b1111;
1746  let Inst{4} = Rn{4};
1747  let DecoderMethod = "DecodeVLDST1Instruction";
1748}
1749multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1750  def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1751                    (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
1752                     "vst1", Dt, "$Vd, $Rn!",
1753                     "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1754    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1755    let Inst{5-4} = Rn{5-4};
1756    let DecoderMethod = "DecodeVLDST1Instruction";
1757  }
1758  def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1759                        (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
1760                        IIC_VLD1x3u,
1761                        "vst1", Dt, "$Vd, $Rn, $Rm",
1762                        "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1763    let Inst{5-4} = Rn{5-4};
1764    let DecoderMethod = "DecodeVLDST1Instruction";
1765  }
1766}
1767
1768def VST1d8T     : VST1D3<{0,0,0,?}, "8",  addrmode6align64>;
1769def VST1d16T    : VST1D3<{0,1,0,?}, "16", addrmode6align64>;
1770def VST1d32T    : VST1D3<{1,0,0,?}, "32", addrmode6align64>;
1771def VST1d64T    : VST1D3<{1,1,0,?}, "64", addrmode6align64>;
1772
1773defm VST1d8Twb  : VST1D3WB<{0,0,0,?}, "8",  addrmode6align64>;
1774defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
1775defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
1776defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
1777
1778def VST1d8TPseudo             : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1779def VST1d16TPseudo            : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1780def VST1d32TPseudo            : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1781def VST1d64TPseudo            : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1782def VST1d64TPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1783def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1784
1785def VST1q8HighTPseudo     : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1786def VST1q8LowTPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1787def VST1q16HighTPseudo    : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1788def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1789def VST1q32HighTPseudo    : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1790def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1791def VST1q64HighTPseudo    : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1792def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1793
1794// ...with 4 registers
1795class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
1796  : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
1797          (ins AddrMode:$Rn, VecListFourD:$Vd),
1798          IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
1799          []>, Sched<[WriteVST4]> {
1800  let Rm = 0b1111;
1801  let Inst{5-4} = Rn{5-4};
1802  let DecoderMethod = "DecodeVLDST1Instruction";
1803}
1804multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1805  def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1806                    (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
1807                     "vst1", Dt, "$Vd, $Rn!",
1808                     "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1809    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1810    let Inst{5-4} = Rn{5-4};
1811    let DecoderMethod = "DecodeVLDST1Instruction";
1812  }
1813  def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1814                        (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1815                        IIC_VLD1x4u,
1816                        "vst1", Dt, "$Vd, $Rn, $Rm",
1817                        "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1818    let Inst{5-4} = Rn{5-4};
1819    let DecoderMethod = "DecodeVLDST1Instruction";
1820  }
1821}
1822
1823def VST1d8Q     : VST1D4<{0,0,?,?}, "8",  addrmode6align64or128or256>;
1824def VST1d16Q    : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
1825def VST1d32Q    : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
1826def VST1d64Q    : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
1827
1828defm VST1d8Qwb  : VST1D4WB<{0,0,?,?}, "8",  addrmode6align64or128or256>;
1829defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1830defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1831defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
1832
1833def VST1d8QPseudo             : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1834def VST1d16QPseudo            : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1835def VST1d32QPseudo            : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1836def VST1d64QPseudo            : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1837def VST1d64QPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1838def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1839
1840def VST1q8HighQPseudo     : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1841def VST1q8LowQPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1842def VST1q16HighQPseudo    : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1843def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1844def VST1q32HighQPseudo    : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1845def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1846def VST1q64HighQPseudo    : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1847def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1848
1849//   VST2     : Vector Store (multiple 2-element structures)
1850class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
1851            InstrItinClass itin, Operand AddrMode>
1852  : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd),
1853          itin, "vst2", Dt, "$Vd, $Rn", "", []> {
1854  let Rm = 0b1111;
1855  let Inst{5-4} = Rn{5-4};
1856  let DecoderMethod = "DecodeVLDST2Instruction";
1857}
1858
1859def  VST2d8   : VST2<0b1000, {0,0,?,?}, "8",  VecListDPair, IIC_VST2,
1860                     addrmode6align64or128>, Sched<[WriteVST2]>;
1861def  VST2d16  : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
1862                     addrmode6align64or128>, Sched<[WriteVST2]>;
1863def  VST2d32  : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
1864                     addrmode6align64or128>, Sched<[WriteVST2]>;
1865
1866def  VST2q8   : VST2<0b0011, {0,0,?,?}, "8",  VecListFourD, IIC_VST2x2,
1867                     addrmode6align64or128or256>, Sched<[WriteVST4]>;
1868def  VST2q16  : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
1869                     addrmode6align64or128or256>, Sched<[WriteVST4]>;
1870def  VST2q32  : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
1871                     addrmode6align64or128or256>, Sched<[WriteVST4]>;
1872
1873def  VST2q8Pseudo  : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1874def  VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1875def  VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1876
1877// ...with address register writeback:
1878multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
1879                   RegisterOperand VdTy, Operand AddrMode> {
1880  def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1881                     (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
1882                     "vst2", Dt, "$Vd, $Rn!",
1883                     "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1884    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1885    let Inst{5-4} = Rn{5-4};
1886    let DecoderMethod = "DecodeVLDST2Instruction";
1887  }
1888  def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1889                        (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
1890                        "vst2", Dt, "$Vd, $Rn, $Rm",
1891                        "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1892    let Inst{5-4} = Rn{5-4};
1893    let DecoderMethod = "DecodeVLDST2Instruction";
1894  }
1895}
1896multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1897  def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1898                     (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
1899                     "vst2", Dt, "$Vd, $Rn!",
1900                     "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1901    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1902    let Inst{5-4} = Rn{5-4};
1903    let DecoderMethod = "DecodeVLDST2Instruction";
1904  }
1905  def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1906                        (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1907                        IIC_VLD1u,
1908                        "vst2", Dt, "$Vd, $Rn, $Rm",
1909                        "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1910    let Inst{5-4} = Rn{5-4};
1911    let DecoderMethod = "DecodeVLDST2Instruction";
1912  }
1913}
1914
1915defm VST2d8wb    : VST2DWB<0b1000, {0,0,?,?}, "8",  VecListDPair,
1916                           addrmode6align64or128>;
1917defm VST2d16wb   : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair,
1918                           addrmode6align64or128>;
1919defm VST2d32wb   : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair,
1920                           addrmode6align64or128>;
1921
1922defm VST2q8wb    : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
1923defm VST2q16wb   : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1924defm VST2q32wb   : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1925
1926def VST2q8PseudoWB_fixed     : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1927def VST2q16PseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1928def VST2q32PseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1929def VST2q8PseudoWB_register  : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1930def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1931def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1932
1933// ...with double-spaced registers
1934def VST2b8      : VST2<0b1001, {0,0,?,?}, "8",  VecListDPairSpaced, IIC_VST2,
1935                      addrmode6align64or128>;
1936def VST2b16     : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2,
1937                      addrmode6align64or128>;
1938def VST2b32     : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2,
1939                      addrmode6align64or128>;
1940defm VST2b8wb   : VST2DWB<0b1001, {0,0,?,?}, "8",  VecListDPairSpaced,
1941                          addrmode6align64or128>;
1942defm VST2b16wb  : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced,
1943                          addrmode6align64or128>;
1944defm VST2b32wb  : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
1945                          addrmode6align64or128>;
1946
1947//   VST3     : Vector Store (multiple 3-element structures)
1948class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
1949  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1950          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
1951          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> {
1952  let Rm = 0b1111;
1953  let Inst{4} = Rn{4};
1954  let DecoderMethod = "DecodeVLDST3Instruction";
1955}
1956
1957def  VST3d8   : VST3D<0b0100, {0,0,0,?}, "8">;
1958def  VST3d16  : VST3D<0b0100, {0,1,0,?}, "16">;
1959def  VST3d32  : VST3D<0b0100, {1,0,0,?}, "32">;
1960
1961def  VST3d8Pseudo  : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1962def  VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1963def  VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1964
1965// ...with address register writeback:
1966class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1967  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1968          (ins addrmode6:$Rn, am6offset:$Rm,
1969           DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
1970          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
1971          "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1972  let Inst{4} = Rn{4};
1973  let DecoderMethod = "DecodeVLDST3Instruction";
1974}
1975
1976def VST3d8_UPD  : VST3DWB<0b0100, {0,0,0,?}, "8">;
1977def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
1978def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
1979
1980def VST3d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1981def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1982def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1983
1984// ...with double-spaced registers:
1985def VST3q8      : VST3D<0b0101, {0,0,0,?}, "8">;
1986def VST3q16     : VST3D<0b0101, {0,1,0,?}, "16">;
1987def VST3q32     : VST3D<0b0101, {1,0,0,?}, "32">;
1988def VST3q8_UPD  : VST3DWB<0b0101, {0,0,0,?}, "8">;
1989def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
1990def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
1991
1992def VST3q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1993def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1994def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1995
1996// ...alternate versions to be allocated odd register numbers:
1997def VST3q8oddPseudo   : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1998def VST3q16oddPseudo  : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1999def VST3q32oddPseudo  : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2000
2001def VST3q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2002def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2003def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2004
2005//   VST4     : Vector Store (multiple 4-element structures)
2006class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
2007  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
2008          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
2009          IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
2010          "", []>, Sched<[WriteVST4]> {
2011  let Rm = 0b1111;
2012  let Inst{5-4} = Rn{5-4};
2013  let DecoderMethod = "DecodeVLDST4Instruction";
2014}
2015
2016def  VST4d8   : VST4D<0b0000, {0,0,?,?}, "8">;
2017def  VST4d16  : VST4D<0b0000, {0,1,?,?}, "16">;
2018def  VST4d32  : VST4D<0b0000, {1,0,?,?}, "32">;
2019
2020def  VST4d8Pseudo  : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2021def  VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2022def  VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2023
2024// ...with address register writeback:
2025class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2026  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
2027          (ins addrmode6:$Rn, am6offset:$Rm,
2028           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
2029           "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
2030          "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
2031  let Inst{5-4} = Rn{5-4};
2032  let DecoderMethod = "DecodeVLDST4Instruction";
2033}
2034
2035def VST4d8_UPD  : VST4DWB<0b0000, {0,0,?,?}, "8">;
2036def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
2037def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
2038
2039def VST4d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2040def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2041def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2042
2043// ...with double-spaced registers:
2044def VST4q8      : VST4D<0b0001, {0,0,?,?}, "8">;
2045def VST4q16     : VST4D<0b0001, {0,1,?,?}, "16">;
2046def VST4q32     : VST4D<0b0001, {1,0,?,?}, "32">;
2047def VST4q8_UPD  : VST4DWB<0b0001, {0,0,?,?}, "8">;
2048def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
2049def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
2050
2051def VST4q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2052def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2053def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2054
2055// ...alternate versions to be allocated odd register numbers:
2056def VST4q8oddPseudo   : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2057def VST4q16oddPseudo  : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2058def VST4q32oddPseudo  : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2059
2060def VST4q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2061def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2062def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2063
2064} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2065
2066// Classes for VST*LN pseudo-instructions with multi-register operands.
2067// These are expanded to real instructions after register allocation.
2068class VSTQLNPseudo<InstrItinClass itin>
2069  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
2070                itin, "">;
2071class VSTQLNWBPseudo<InstrItinClass itin>
2072  : PseudoNLdSt<(outs GPR:$wb),
2073                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
2074                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2075class VSTQQLNPseudo<InstrItinClass itin>
2076  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
2077                itin, "">;
2078class VSTQQLNWBPseudo<InstrItinClass itin>
2079  : PseudoNLdSt<(outs GPR:$wb),
2080                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
2081                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2082class VSTQQQQLNPseudo<InstrItinClass itin>
2083  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
2084                itin, "">;
2085class VSTQQQQLNWBPseudo<InstrItinClass itin>
2086  : PseudoNLdSt<(outs GPR:$wb),
2087                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
2088                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2089
2090//   VST1LN   : Vector Store (single element from one lane)
2091class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2092             PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
2093  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2094          (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
2095          IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
2096          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>,
2097     Sched<[WriteVST1]> {
2098  let Rm = 0b1111;
2099  let DecoderMethod = "DecodeVST1LN";
2100}
2101class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2102  : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> {
2103  let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2104                          addrmode6:$addr)];
2105}
2106
2107def VST1LNd8  : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
2108                       ARMvgetlaneu, addrmode6> {
2109  let Inst{7-5} = lane{2-0};
2110}
2111def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
2112                       ARMvgetlaneu, addrmode6> {
2113  let Inst{7-6} = lane{1-0};
2114  let Inst{4}   = Rn{4};
2115}
2116
2117def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
2118                       addrmode6oneL32> {
2119  let Inst{7}   = lane{0};
2120  let Inst{5-4} = Rn{5-4};
2121}
2122
2123def VST1LNq8Pseudo  : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>;
2124def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>;
2125def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
2126
2127let Predicates = [HasNEON] in {
2128def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
2129          (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
2130def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
2131          (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2132
2133def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr),
2134          (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
2135def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr),
2136          (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2137}
2138
2139// ...with address register writeback:
2140class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2141               PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
2142  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2143          (ins AdrMode:$Rn, am6offset:$Rm,
2144           DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
2145          "\\{$Vd[$lane]\\}, $Rn$Rm",
2146          "$Rn.addr = $wb",
2147          [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
2148                                  AdrMode:$Rn, am6offset:$Rm))]>,
2149    Sched<[WriteVST1]> {
2150  let DecoderMethod = "DecodeVST1LN";
2151}
2152class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2153  : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> {
2154  let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2155                                        addrmode6:$addr, am6offset:$offset))];
2156}
2157
2158def VST1LNd8_UPD  : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
2159                             ARMvgetlaneu, addrmode6> {
2160  let Inst{7-5} = lane{2-0};
2161}
2162def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
2163                             ARMvgetlaneu, addrmode6> {
2164  let Inst{7-6} = lane{1-0};
2165  let Inst{4}   = Rn{4};
2166}
2167def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
2168                             extractelt, addrmode6oneL32> {
2169  let Inst{7}   = lane{0};
2170  let Inst{5-4} = Rn{5-4};
2171}
2172
2173def VST1LNq8Pseudo_UPD  : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>;
2174def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>;
2175def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
2176
2177let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
2178
2179//   VST2LN   : Vector Store (single 2-element structure from one lane)
2180class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2181  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2182          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
2183          IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
2184          "", []>, Sched<[WriteVST1]> {
2185  let Rm = 0b1111;
2186  let Inst{4}   = Rn{4};
2187  let DecoderMethod = "DecodeVST2LN";
2188}
2189
2190def VST2LNd8  : VST2LN<0b0001, {?,?,?,?}, "8"> {
2191  let Inst{7-5} = lane{2-0};
2192}
2193def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
2194  let Inst{7-6} = lane{1-0};
2195}
2196def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
2197  let Inst{7}   = lane{0};
2198}
2199
2200def VST2LNd8Pseudo  : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2201def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2202def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2203
2204// ...with double-spaced registers:
2205def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
2206  let Inst{7-6} = lane{1-0};
2207  let Inst{4}   = Rn{4};
2208}
2209def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
2210  let Inst{7}   = lane{0};
2211  let Inst{4}   = Rn{4};
2212}
2213
2214def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2215def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2216
2217// ...with address register writeback:
2218class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2219  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2220          (ins addrmode6:$Rn, am6offset:$Rm,
2221           DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
2222          "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
2223          "$Rn.addr = $wb", []> {
2224  let Inst{4}   = Rn{4};
2225  let DecoderMethod = "DecodeVST2LN";
2226}
2227
2228def VST2LNd8_UPD  : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
2229  let Inst{7-5} = lane{2-0};
2230}
2231def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
2232  let Inst{7-6} = lane{1-0};
2233}
2234def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
2235  let Inst{7}   = lane{0};
2236}
2237
2238def VST2LNd8Pseudo_UPD  : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2239def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2240def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2241
2242def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
2243  let Inst{7-6} = lane{1-0};
2244}
2245def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
2246  let Inst{7}   = lane{0};
2247}
2248
2249def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2250def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2251
2252//   VST3LN   : Vector Store (single 3-element structure from one lane)
2253class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2254  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2255          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
2256           nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
2257          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>,
2258    Sched<[WriteVST2]> {
2259  let Rm = 0b1111;
2260  let DecoderMethod = "DecodeVST3LN";
2261}
2262
2263def VST3LNd8  : VST3LN<0b0010, {?,?,?,0}, "8"> {
2264  let Inst{7-5} = lane{2-0};
2265}
2266def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
2267  let Inst{7-6} = lane{1-0};
2268}
2269def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
2270  let Inst{7}   = lane{0};
2271}
2272
2273def VST3LNd8Pseudo  : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2274def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2275def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2276
2277// ...with double-spaced registers:
2278def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
2279  let Inst{7-6} = lane{1-0};
2280}
2281def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
2282  let Inst{7}   = lane{0};
2283}
2284
2285def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2286def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2287
2288// ...with address register writeback:
2289class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2290  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2291          (ins addrmode6:$Rn, am6offset:$Rm,
2292           DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
2293          IIC_VST3lnu, "vst3", Dt,
2294          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
2295          "$Rn.addr = $wb", []> {
2296  let DecoderMethod = "DecodeVST3LN";
2297}
2298
2299def VST3LNd8_UPD  : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
2300  let Inst{7-5} = lane{2-0};
2301}
2302def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
2303  let Inst{7-6} = lane{1-0};
2304}
2305def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
2306  let Inst{7}   = lane{0};
2307}
2308
2309def VST3LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2310def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2311def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2312
2313def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
2314  let Inst{7-6} = lane{1-0};
2315}
2316def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
2317  let Inst{7}   = lane{0};
2318}
2319
2320def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2321def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2322
2323//   VST4LN   : Vector Store (single 4-element structure from one lane)
2324class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2325  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2326          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
2327           nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
2328          "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
2329          "", []>, Sched<[WriteVST2]> {
2330  let Rm = 0b1111;
2331  let Inst{4} = Rn{4};
2332  let DecoderMethod = "DecodeVST4LN";
2333}
2334
2335def VST4LNd8  : VST4LN<0b0011, {?,?,?,?}, "8"> {
2336  let Inst{7-5} = lane{2-0};
2337}
2338def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
2339  let Inst{7-6} = lane{1-0};
2340}
2341def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
2342  let Inst{7}   = lane{0};
2343  let Inst{5} = Rn{5};
2344}
2345
2346def VST4LNd8Pseudo  : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2347def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2348def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2349
2350// ...with double-spaced registers:
2351def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
2352  let Inst{7-6} = lane{1-0};
2353}
2354def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
2355  let Inst{7}   = lane{0};
2356  let Inst{5} = Rn{5};
2357}
2358
2359def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2360def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2361
2362// ...with address register writeback:
2363class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2364  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2365          (ins addrmode6:$Rn, am6offset:$Rm,
2366           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
2367          IIC_VST4lnu, "vst4", Dt,
2368  "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
2369          "$Rn.addr = $wb", []> {
2370  let Inst{4} = Rn{4};
2371  let DecoderMethod = "DecodeVST4LN";
2372}
2373
2374def VST4LNd8_UPD  : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
2375  let Inst{7-5} = lane{2-0};
2376}
2377def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
2378  let Inst{7-6} = lane{1-0};
2379}
2380def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
2381  let Inst{7}   = lane{0};
2382  let Inst{5} = Rn{5};
2383}
2384
2385def VST4LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2386def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2387def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2388
2389def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
2390  let Inst{7-6} = lane{1-0};
2391}
2392def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
2393  let Inst{7}   = lane{0};
2394  let Inst{5} = Rn{5};
2395}
2396
2397def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2398def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2399
2400} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2401
2402// Use vld1/vst1 for unaligned f64 load / store
2403let Predicates = [IsLE,HasNEON] in {
2404def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
2405          (VLD1d16 addrmode6:$addr)>;
2406def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
2407          (VST1d16 addrmode6:$addr, DPR:$value)>;
2408def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
2409          (VLD1d8 addrmode6:$addr)>;
2410def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
2411          (VST1d8 addrmode6:$addr, DPR:$value)>;
2412}
2413let Predicates = [IsBE,HasNEON] in {
2414def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
2415          (VLD1d64 addrmode6:$addr)>;
2416def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
2417          (VST1d64 addrmode6:$addr, DPR:$value)>;
2418}
2419
2420// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
2421// load / store if it's legal.
2422let Predicates = [HasNEON] in {
2423def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
2424          (VLD1q64 addrmode6:$addr)>;
2425def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2426          (VST1q64 addrmode6:$addr, QPR:$value)>;
2427}
2428let Predicates = [IsLE,HasNEON] in {
2429def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
2430          (VLD1q32 addrmode6:$addr)>;
2431def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2432          (VST1q32 addrmode6:$addr, QPR:$value)>;
2433def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
2434          (VLD1q16 addrmode6:$addr)>;
2435def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2436          (VST1q16 addrmode6:$addr, QPR:$value)>;
2437def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
2438          (VLD1q8 addrmode6:$addr)>;
2439def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2440          (VST1q8 addrmode6:$addr, QPR:$value)>;
2441}
2442
2443//===----------------------------------------------------------------------===//
2444// Instruction Classes
2445//===----------------------------------------------------------------------===//
2446
2447// Basic 2-register operations: double- and quad-register.
2448class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2449           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2450           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2451  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2452        (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
2453        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
2454class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2455           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2456           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2457  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2458        (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
2459        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
2460
2461// Basic 2-register intrinsics, both double- and quad-register.
2462class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2463              bits<2> op17_16, bits<5> op11_7, bit op4,
2464              InstrItinClass itin, string OpcodeStr, string Dt,
2465              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2466  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2467        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2468        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2469class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2470              bits<2> op17_16, bits<5> op11_7, bit op4,
2471              InstrItinClass itin, string OpcodeStr, string Dt,
2472              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2473  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2474        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2475        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2476
2477// Same as above, but not predicated.
2478class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
2479              InstrItinClass itin, string OpcodeStr, string Dt,
2480              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2481  : N2Vnp<op19_18, op17_16, op10_8, op7, 0,  (outs DPR:$Vd), (ins DPR:$Vm),
2482          itin, OpcodeStr, Dt,
2483          [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2484
2485class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
2486              InstrItinClass itin, string OpcodeStr, string Dt,
2487              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2488  : N2Vnp<op19_18, op17_16, op10_8, op7, 1,  (outs QPR:$Vd), (ins QPR:$Vm),
2489          itin, OpcodeStr, Dt,
2490          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2491
2492// Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
2493class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2494              bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2495              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2496  : N2Vnp<op19_18, op17_16, op10_8, op7, op6,  (outs QPR:$Vd), (ins QPR:$Vm),
2497          itin, OpcodeStr, Dt,
2498          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2499
2500// Same as N2VQIntXnp but with Vd as a src register.
2501class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2502              bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2503              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2504  : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
2505          (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
2506          itin, OpcodeStr, Dt,
2507          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
2508  let Constraints = "$src = $Vd";
2509}
2510
2511// Narrow 2-register operations.
2512class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2513           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2514           InstrItinClass itin, string OpcodeStr, string Dt,
2515           ValueType TyD, ValueType TyQ, SDNode OpNode>
2516  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2517        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2518        [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
2519
2520// Narrow 2-register intrinsics.
2521class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2522              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2523              InstrItinClass itin, string OpcodeStr, string Dt,
2524              ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
2525  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2526        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2527        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
2528
2529// Long 2-register operations (currently only used for VMOVL).
2530class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2531           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2532           InstrItinClass itin, string OpcodeStr, string Dt,
2533           ValueType TyQ, ValueType TyD, SDNode OpNode>
2534  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2535        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2536        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
2537
2538// Long 2-register intrinsics.
2539class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2540              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2541              InstrItinClass itin, string OpcodeStr, string Dt,
2542              ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
2543  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2544        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2545        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
2546
2547// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
2548class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
2549  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
2550        (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
2551        OpcodeStr, Dt, "$Vd, $Vm",
2552        "$src1 = $Vd, $src2 = $Vm", []>;
2553class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
2554                  InstrItinClass itin, string OpcodeStr, string Dt>
2555  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
2556        (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
2557        "$src1 = $Vd, $src2 = $Vm", []>;
2558
2559// Basic 3-register operations: double- and quad-register.
2560class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2561           InstrItinClass itin, string OpcodeStr, string Dt,
2562           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2563  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2564        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2565        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2566        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2567  // All of these have a two-operand InstAlias.
2568  let TwoOperandAliasConstraint = "$Vn = $Vd";
2569  let isCommutable = Commutable;
2570}
2571// Same as N3VD but no data type.
2572class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2573           InstrItinClass itin, string OpcodeStr,
2574           ValueType ResTy, ValueType OpTy,
2575           SDNode OpNode, bit Commutable>
2576  : N3VX<op24, op23, op21_20, op11_8, 0, op4,
2577         (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2578         OpcodeStr, "$Vd, $Vn, $Vm", "",
2579         [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
2580  // All of these have a two-operand InstAlias.
2581  let TwoOperandAliasConstraint = "$Vn = $Vd";
2582  let isCommutable = Commutable;
2583}
2584
2585class N3VDSL<bits<2> op21_20, bits<4> op11_8,
2586             InstrItinClass itin, string OpcodeStr, string Dt,
2587             ValueType Ty, SDNode ShOp>
2588  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2589        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2590        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2591        [(set (Ty DPR:$Vd),
2592              (Ty (ShOp (Ty DPR:$Vn),
2593                        (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
2594  // All of these have a two-operand InstAlias.
2595  let TwoOperandAliasConstraint = "$Vn = $Vd";
2596  let isCommutable = 0;
2597}
2598class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
2599               string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
2600  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2601        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2602        NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
2603        [(set (Ty DPR:$Vd),
2604              (Ty (ShOp (Ty DPR:$Vn),
2605                        (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2606  // All of these have a two-operand InstAlias.
2607  let TwoOperandAliasConstraint = "$Vn = $Vd";
2608  let isCommutable = 0;
2609}
2610
2611class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2612           InstrItinClass itin, string OpcodeStr, string Dt,
2613           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2614  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2615        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2616        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2617        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2618  // All of these have a two-operand InstAlias.
2619  let TwoOperandAliasConstraint = "$Vn = $Vd";
2620  let isCommutable = Commutable;
2621}
2622class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2623           InstrItinClass itin, string OpcodeStr,
2624           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2625  : N3VX<op24, op23, op21_20, op11_8, 1, op4,
2626         (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2627         OpcodeStr, "$Vd, $Vn, $Vm", "",
2628         [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
2629  // All of these have a two-operand InstAlias.
2630  let TwoOperandAliasConstraint = "$Vn = $Vd";
2631  let isCommutable = Commutable;
2632}
2633class N3VQSL<bits<2> op21_20, bits<4> op11_8,
2634             InstrItinClass itin, string OpcodeStr, string Dt,
2635             ValueType ResTy, ValueType OpTy, SDNode ShOp>
2636  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2637        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2638        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2639        [(set (ResTy QPR:$Vd),
2640              (ResTy (ShOp (ResTy QPR:$Vn),
2641                           (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2642                                                imm:$lane)))))]> {
2643  // All of these have a two-operand InstAlias.
2644  let TwoOperandAliasConstraint = "$Vn = $Vd";
2645  let isCommutable = 0;
2646}
2647class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
2648               ValueType ResTy, ValueType OpTy, SDNode ShOp>
2649  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2650        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2651        NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
2652        [(set (ResTy QPR:$Vd),
2653              (ResTy (ShOp (ResTy QPR:$Vn),
2654                           (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
2655                                                imm:$lane)))))]> {
2656  // All of these have a two-operand InstAlias.
2657  let TwoOperandAliasConstraint = "$Vn = $Vd";
2658  let isCommutable = 0;
2659}
2660
2661// Basic 3-register intrinsics, both double- and quad-register.
2662class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2663              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2664              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2665  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2666        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
2667        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2668        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2669  // All of these have a two-operand InstAlias.
2670  let TwoOperandAliasConstraint = "$Vn = $Vd";
2671  let isCommutable = Commutable;
2672}
2673
2674class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2675                bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2676                string Dt, ValueType ResTy, ValueType OpTy,
2677                SDPatternOperator IntOp, bit Commutable>
2678  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2679          (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
2680          [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2681
2682class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2683                string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2684  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2685        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2686        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2687        [(set (Ty DPR:$Vd),
2688              (Ty (IntOp (Ty DPR:$Vn),
2689                         (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
2690                                           imm:$lane)))))]> {
2691  let isCommutable = 0;
2692}
2693
2694class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2695                  string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2696  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2697        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2698        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2699        [(set (Ty DPR:$Vd),
2700              (Ty (IntOp (Ty DPR:$Vn),
2701                         (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2702  let isCommutable = 0;
2703}
2704class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2705              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2706              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2707  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2708        (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
2709        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2710        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
2711  let TwoOperandAliasConstraint = "$Vm = $Vd";
2712  let isCommutable = 0;
2713}
2714
2715class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2716              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2717              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2718  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2719        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
2720        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2721        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2722  // All of these have a two-operand InstAlias.
2723  let TwoOperandAliasConstraint = "$Vn = $Vd";
2724  let isCommutable = Commutable;
2725}
2726
2727class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2728                bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2729                string Dt, ValueType ResTy, ValueType OpTy,
2730                SDPatternOperator IntOp, bit Commutable>
2731  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2732          (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
2733          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2734
2735// Same as N3VQIntnp but with Vd as a src register.
2736class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2737                bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2738                string Dt, ValueType ResTy, ValueType OpTy,
2739                SDPatternOperator IntOp, bit Commutable>
2740  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2741          (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
2742          f, itin, OpcodeStr, Dt,
2743          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
2744                                       (OpTy QPR:$Vm))))]> {
2745  let Constraints = "$src = $Vd";
2746}
2747
2748class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2749                string OpcodeStr, string Dt,
2750                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2751  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2752        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2753        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2754        [(set (ResTy QPR:$Vd),
2755              (ResTy (IntOp (ResTy QPR:$Vn),
2756                            (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2757                                                 imm:$lane)))))]> {
2758  let isCommutable = 0;
2759}
2760class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2761                  string OpcodeStr, string Dt,
2762                  ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2763  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2764        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2765        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2766        [(set (ResTy QPR:$Vd),
2767              (ResTy (IntOp (ResTy QPR:$Vn),
2768                            (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
2769                                                 imm:$lane)))))]> {
2770  let isCommutable = 0;
2771}
2772class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2773              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2774              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2775  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2776        (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
2777        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2778        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
2779  let TwoOperandAliasConstraint = "$Vm = $Vd";
2780  let isCommutable = 0;
2781}
2782
2783// Multiply-Add/Sub operations: double- and quad-register.
2784class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2785                InstrItinClass itin, string OpcodeStr, string Dt,
2786                ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
2787  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2788        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2789        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2790        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2791                             (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
2792
2793class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2794                  string OpcodeStr, string Dt,
2795                  ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2796  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2797        (outs DPR:$Vd),
2798        (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2799        NVMulSLFrm, itin,
2800        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2801        [(set (Ty DPR:$Vd),
2802              (Ty (ShOp (Ty DPR:$src1),
2803                        (Ty (MulOp DPR:$Vn,
2804                                   (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
2805                                                     imm:$lane)))))))]>;
2806class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2807                    string OpcodeStr, string Dt,
2808                    ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2809  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2810        (outs DPR:$Vd),
2811        (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2812        NVMulSLFrm, itin,
2813        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2814        [(set (Ty DPR:$Vd),
2815              (Ty (ShOp (Ty DPR:$src1),
2816                        (Ty (MulOp DPR:$Vn,
2817                                   (Ty (ARMvduplane (Ty DPR_8:$Vm),
2818                                                     imm:$lane)))))))]>;
2819
2820class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2821                InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
2822                SDPatternOperator MulOp, SDPatternOperator OpNode>
2823  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2824        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2825        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2826        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2827                             (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
2828class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2829                  string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2830                  SDPatternOperator MulOp, SDPatternOperator ShOp>
2831  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2832        (outs QPR:$Vd),
2833        (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2834        NVMulSLFrm, itin,
2835        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2836        [(set (ResTy QPR:$Vd),
2837              (ResTy (ShOp (ResTy QPR:$src1),
2838                           (ResTy (MulOp QPR:$Vn,
2839                                   (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2840                                                        imm:$lane)))))))]>;
2841class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2842                    string OpcodeStr, string Dt,
2843                    ValueType ResTy, ValueType OpTy,
2844                    SDPatternOperator MulOp, SDPatternOperator ShOp>
2845  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2846        (outs QPR:$Vd),
2847        (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2848        NVMulSLFrm, itin,
2849        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2850        [(set (ResTy QPR:$Vd),
2851              (ResTy (ShOp (ResTy QPR:$src1),
2852                           (ResTy (MulOp QPR:$Vn,
2853                                   (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
2854                                                        imm:$lane)))))))]>;
2855
2856// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
2857class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2858                InstrItinClass itin, string OpcodeStr, string Dt,
2859                ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2860  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2861        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2862        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2863        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2864                             (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
2865class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2866                InstrItinClass itin, string OpcodeStr, string Dt,
2867                ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2868  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2869        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2870        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2871        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2872                             (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
2873
2874// Neon 3-argument intrinsics, both double- and quad-register.
2875// The destination register is also used as the first source operand register.
2876class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2877               InstrItinClass itin, string OpcodeStr, string Dt,
2878               ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2879  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2880        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2881        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2882        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
2883                                      (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2884class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2885               InstrItinClass itin, string OpcodeStr, string Dt,
2886               ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2887  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2888        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2889        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2890        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
2891                                      (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2892
2893// Long Multiply-Add/Sub operations.
2894class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2895                InstrItinClass itin, string OpcodeStr, string Dt,
2896                ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2897  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2898        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2899        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2900        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2901                                (TyQ (MulOp (TyD DPR:$Vn),
2902                                            (TyD DPR:$Vm)))))]>;
2903class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2904                  InstrItinClass itin, string OpcodeStr, string Dt,
2905                  ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2906  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2907        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2908        NVMulSLFrm, itin,
2909        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2910        [(set QPR:$Vd,
2911          (OpNode (TyQ QPR:$src1),
2912                  (TyQ (MulOp (TyD DPR:$Vn),
2913                              (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),
2914                                                 imm:$lane))))))]>;
2915class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2916                    InstrItinClass itin, string OpcodeStr, string Dt,
2917                    ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2918  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2919        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2920        NVMulSLFrm, itin,
2921        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2922        [(set QPR:$Vd,
2923          (OpNode (TyQ QPR:$src1),
2924                  (TyQ (MulOp (TyD DPR:$Vn),
2925                              (TyD (ARMvduplane (TyD DPR_8:$Vm),
2926                                                 imm:$lane))))))]>;
2927
2928// Long Intrinsic-Op vector operations with explicit extend (VABAL).
2929class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2930                   InstrItinClass itin, string OpcodeStr, string Dt,
2931                   ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
2932                   SDNode OpNode>
2933  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2934        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2935        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2936        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2937                                (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
2938                                                        (TyD DPR:$Vm)))))))]>;
2939
2940// Neon Long 3-argument intrinsic.  The destination register is
2941// a quad-register and is also used as the first source operand register.
2942class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2943               InstrItinClass itin, string OpcodeStr, string Dt,
2944               ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
2945  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2946        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2947        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2948        [(set QPR:$Vd,
2949          (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
2950class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2951                 string OpcodeStr, string Dt,
2952                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2953  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2954        (outs QPR:$Vd),
2955        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2956        NVMulSLFrm, itin,
2957        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2958        [(set (ResTy QPR:$Vd),
2959              (ResTy (IntOp (ResTy QPR:$src1),
2960                            (OpTy DPR:$Vn),
2961                            (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2962                                                imm:$lane)))))]>;
2963class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2964                   InstrItinClass itin, string OpcodeStr, string Dt,
2965                   ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2966  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2967        (outs QPR:$Vd),
2968        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2969        NVMulSLFrm, itin,
2970        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2971        [(set (ResTy QPR:$Vd),
2972              (ResTy (IntOp (ResTy QPR:$src1),
2973                            (OpTy DPR:$Vn),
2974                            (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
2975                                                imm:$lane)))))]>;
2976
2977// Narrowing 3-register intrinsics.
2978class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2979              string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
2980              SDPatternOperator IntOp, bit Commutable>
2981  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2982        (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
2983        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2984        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
2985  let isCommutable = Commutable;
2986}
2987
2988// Long 3-register operations.
2989class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2990           InstrItinClass itin, string OpcodeStr, string Dt,
2991           ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
2992  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2993        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2994        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2995        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
2996  let isCommutable = Commutable;
2997}
2998
2999class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
3000             InstrItinClass itin, string OpcodeStr, string Dt,
3001             ValueType TyQ, ValueType TyD, SDNode OpNode>
3002  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3003        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3004        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3005        [(set QPR:$Vd,
3006          (TyQ (OpNode (TyD DPR:$Vn),
3007                       (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
3008class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3009               InstrItinClass itin, string OpcodeStr, string Dt,
3010               ValueType TyQ, ValueType TyD, SDNode OpNode>
3011  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3012        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3013        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3014        [(set QPR:$Vd,
3015          (TyQ (OpNode (TyD DPR:$Vn),
3016                       (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
3017
3018// Long 3-register operations with explicitly extended operands.
3019class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3020              InstrItinClass itin, string OpcodeStr, string Dt,
3021              ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
3022              bit Commutable>
3023  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3024        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3025        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3026        [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
3027                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3028  let isCommutable = Commutable;
3029}
3030
3031// Long 3-register intrinsics with explicit extend (VABDL).
3032class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3033                 InstrItinClass itin, string OpcodeStr, string Dt,
3034                 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
3035                 bit Commutable>
3036  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3037        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3038        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3039        [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
3040                                                (TyD DPR:$Vm))))))]> {
3041  let isCommutable = Commutable;
3042}
3043
3044// Long 3-register intrinsics.
3045class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3046              InstrItinClass itin, string OpcodeStr, string Dt,
3047              ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
3048  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3049        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3050        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3051        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
3052  let isCommutable = Commutable;
3053}
3054
3055// Same as above, but not predicated.
3056class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
3057                bit op4, InstrItinClass itin, string OpcodeStr,
3058                string Dt, ValueType ResTy, ValueType OpTy,
3059                SDPatternOperator IntOp, bit Commutable>
3060  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
3061          (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
3062          [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
3063
3064class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
3065                string OpcodeStr, string Dt,
3066                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3067  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3068        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3069        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3070        [(set (ResTy QPR:$Vd),
3071              (ResTy (IntOp (OpTy DPR:$Vn),
3072                            (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
3073                                                imm:$lane)))))]>;
3074class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3075                  InstrItinClass itin, string OpcodeStr, string Dt,
3076                  ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3077  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3078        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3079        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3080        [(set (ResTy QPR:$Vd),
3081              (ResTy (IntOp (OpTy DPR:$Vn),
3082                            (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
3083                                                imm:$lane)))))]>;
3084
3085// Wide 3-register operations.
3086class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3087           string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
3088           SDNode OpNode, SDNode ExtOp, bit Commutable>
3089  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3090        (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
3091        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3092        [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
3093                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3094  // All of these have a two-operand InstAlias.
3095  let TwoOperandAliasConstraint = "$Vn = $Vd";
3096  let isCommutable = Commutable;
3097}
3098
3099// Pairwise long 2-register intrinsics, both double- and quad-register.
3100class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3101                bits<2> op17_16, bits<5> op11_7, bit op4,
3102                string OpcodeStr, string Dt,
3103                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3104  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
3105        (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3106        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
3107class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3108                bits<2> op17_16, bits<5> op11_7, bit op4,
3109                string OpcodeStr, string Dt,
3110                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3111  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
3112        (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3113        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
3114
3115// Pairwise long 2-register accumulate intrinsics,
3116// both double- and quad-register.
3117// The destination register is also used as the first source operand register.
3118class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3119                 bits<2> op17_16, bits<5> op11_7, bit op4,
3120                 string OpcodeStr, string Dt,
3121                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3122  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
3123        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
3124        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3125        [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
3126class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3127                 bits<2> op17_16, bits<5> op11_7, bit op4,
3128                 string OpcodeStr, string Dt,
3129                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3130  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
3131        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
3132        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3133        [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
3134
3135// Shift by immediate,
3136// both double- and quad-register.
3137let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3138class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3139             Format f, InstrItinClass itin, Operand ImmTy,
3140             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3141  : N2VImm<op24, op23, op11_8, op7, 0, op4,
3142           (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
3143           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3144           [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
3145class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3146             Format f, InstrItinClass itin, Operand ImmTy,
3147             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3148  : N2VImm<op24, op23, op11_8, op7, 1, op4,
3149           (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
3150           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3151           [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
3152}
3153
3154// Long shift by immediate.
3155class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3156             string OpcodeStr, string Dt,
3157             ValueType ResTy, ValueType OpTy, Operand ImmTy,
3158             SDPatternOperator OpNode>
3159  : N2VImm<op24, op23, op11_8, op7, op6, op4,
3160           (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
3161           IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3162           [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>;
3163
3164// Narrow shift by immediate.
3165class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3166             InstrItinClass itin, string OpcodeStr, string Dt,
3167             ValueType ResTy, ValueType OpTy, Operand ImmTy,
3168             SDPatternOperator OpNode>
3169  : N2VImm<op24, op23, op11_8, op7, op6, op4,
3170           (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
3171           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3172           [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
3173                                          (i32 ImmTy:$SIMM))))]>;
3174
3175// Shift right by immediate and accumulate,
3176// both double- and quad-register.
3177let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3178class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3179                Operand ImmTy, string OpcodeStr, string Dt,
3180                ValueType Ty, SDNode ShOp>
3181  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3182           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3183           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3184           [(set DPR:$Vd, (Ty (add DPR:$src1,
3185                                (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
3186class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3187                Operand ImmTy, string OpcodeStr, string Dt,
3188                ValueType Ty, SDNode ShOp>
3189  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3190           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3191           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3192           [(set QPR:$Vd, (Ty (add QPR:$src1,
3193                                (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
3194}
3195
3196// Shift by immediate and insert,
3197// both double- and quad-register.
3198let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3199class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3200                Operand ImmTy, Format f, string OpcodeStr, string Dt,
3201                ValueType Ty,SDNode ShOp>
3202  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3203           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
3204           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3205           [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
3206class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3207                Operand ImmTy, Format f, string OpcodeStr, string Dt,
3208                ValueType Ty,SDNode ShOp>
3209  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3210           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
3211           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3212           [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
3213}
3214
3215// Convert, with fractional bits immediate,
3216// both double- and quad-register.
3217class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3218              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3219              SDPatternOperator IntOp>
3220  : N2VImm<op24, op23, op11_8, op7, 0, op4,
3221           (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3222           IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3223           [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
3224class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3225              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3226              SDPatternOperator IntOp>
3227  : N2VImm<op24, op23, op11_8, op7, 1, op4,
3228           (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3229           IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3230           [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
3231
3232//===----------------------------------------------------------------------===//
3233// Multiclasses
3234//===----------------------------------------------------------------------===//
3235
3236// Abbreviations used in multiclass suffixes:
3237//   Q = quarter int (8 bit) elements
3238//   H = half int (16 bit) elements
3239//   S = single int (32 bit) elements
3240//   D = double int (64 bit) elements
3241
3242// Neon 2-register vector operations and intrinsics.
3243
3244// Neon 2-register comparisons.
3245//   source operand element sizes of 8, 16 and 32 bits:
3246multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3247                       bits<5> op11_7, bit op4, string opc, string Dt,
3248                       string asm, PatFrag fc> {
3249  // 64-bit vector types.
3250  def v8i8  : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
3251                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3252                  opc, !strconcat(Dt, "8"), asm, "",
3253                  [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), fc)))]>;
3254  def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3255                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3256                  opc, !strconcat(Dt, "16"), asm, "",
3257                  [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), fc)))]>;
3258  def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3259                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3260                  opc, !strconcat(Dt, "32"), asm, "",
3261                  [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), fc)))]>;
3262  def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3263                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3264                  opc, "f32", asm, "",
3265                  [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), fc)))]> {
3266    let Inst{10} = 1; // overwrite F = 1
3267  }
3268  def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3269                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3270                  opc, "f16", asm, "",
3271                  [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), fc)))]>,
3272              Requires<[HasNEON,HasFullFP16]> {
3273    let Inst{10} = 1; // overwrite F = 1
3274  }
3275
3276  // 128-bit vector types.
3277  def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
3278                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3279                  opc, !strconcat(Dt, "8"), asm, "",
3280                  [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), fc)))]>;
3281  def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3282                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3283                  opc, !strconcat(Dt, "16"), asm, "",
3284                  [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), fc)))]>;
3285  def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3286                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3287                  opc, !strconcat(Dt, "32"), asm, "",
3288                  [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), fc)))]>;
3289  def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3290                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3291                  opc, "f32", asm, "",
3292                  [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), fc)))]> {
3293    let Inst{10} = 1; // overwrite F = 1
3294  }
3295  def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3296                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3297                  opc, "f16", asm, "",
3298                  [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), fc)))]>,
3299              Requires<[HasNEON,HasFullFP16]> {
3300    let Inst{10} = 1; // overwrite F = 1
3301  }
3302}
3303
3304// Neon 3-register comparisons.
3305class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3306               InstrItinClass itin, string OpcodeStr, string Dt,
3307               ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable>
3308  : N3V<op24, op23, op21_20, op11_8, 1, op4,
3309        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
3310        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3311        [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), fc)))]> {
3312  // All of these have a two-operand InstAlias.
3313  let TwoOperandAliasConstraint = "$Vn = $Vd";
3314  let isCommutable = Commutable;
3315}
3316
3317class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3318               InstrItinClass itin, string OpcodeStr, string Dt,
3319               ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable>
3320  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3321        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3322        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3323        [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), fc)))]> {
3324  // All of these have a two-operand InstAlias.
3325  let TwoOperandAliasConstraint = "$Vn = $Vd";
3326  let isCommutable = Commutable;
3327}
3328
3329multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4,
3330                       InstrItinClass itinD16, InstrItinClass itinD32,
3331                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3332                       string OpcodeStr, string Dt,
3333                       PatFrag fc, bit Commutable = 0> {
3334  // 64-bit vector types.
3335  def v8i8  : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16,
3336                       OpcodeStr, !strconcat(Dt, "8"),
3337                       v8i8, v8i8, fc, Commutable>;
3338  def v4i16 : N3VD_cmp<op24, op23, 0b01, op11_8, op4, itinD16,
3339                       OpcodeStr, !strconcat(Dt, "16"),
3340                       v4i16, v4i16, fc, Commutable>;
3341  def v2i32 : N3VD_cmp<op24, op23, 0b10, op11_8, op4, itinD32,
3342                       OpcodeStr, !strconcat(Dt, "32"),
3343                       v2i32, v2i32, fc, Commutable>;
3344
3345  // 128-bit vector types.
3346  def v16i8 : N3VQ_cmp<op24, op23, 0b00, op11_8, op4, itinQ16,
3347                       OpcodeStr, !strconcat(Dt, "8"),
3348                       v16i8, v16i8, fc, Commutable>;
3349  def v8i16 : N3VQ_cmp<op24, op23, 0b01, op11_8, op4, itinQ16,
3350                       OpcodeStr, !strconcat(Dt, "16"),
3351                       v8i16, v8i16, fc, Commutable>;
3352  def v4i32 : N3VQ_cmp<op24, op23, 0b10, op11_8, op4, itinQ32,
3353                       OpcodeStr, !strconcat(Dt, "32"),
3354                       v4i32, v4i32, fc, Commutable>;
3355}
3356
3357
3358// Neon 2-register vector intrinsics,
3359//   element sizes of 8, 16 and 32 bits:
3360multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3361                      bits<5> op11_7, bit op4,
3362                      InstrItinClass itinD, InstrItinClass itinQ,
3363                      string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3364  // 64-bit vector types.
3365  def v8i8  : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3366                      itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3367  def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3368                      itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
3369  def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3370                      itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
3371
3372  // 128-bit vector types.
3373  def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3374                      itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
3375  def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3376                      itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
3377  def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3378                      itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
3379}
3380
3381
3382// Neon Narrowing 2-register vector operations,
3383//   source operand element sizes of 16, 32 and 64 bits:
3384multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3385                    bits<5> op11_7, bit op6, bit op4,
3386                    InstrItinClass itin, string OpcodeStr, string Dt,
3387                    SDNode OpNode> {
3388  def v8i8  : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3389                   itin, OpcodeStr, !strconcat(Dt, "16"),
3390                   v8i8, v8i16, OpNode>;
3391  def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3392                   itin, OpcodeStr, !strconcat(Dt, "32"),
3393                   v4i16, v4i32, OpNode>;
3394  def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3395                   itin, OpcodeStr, !strconcat(Dt, "64"),
3396                   v2i32, v2i64, OpNode>;
3397}
3398
3399// Neon Narrowing 2-register vector intrinsics,
3400//   source operand element sizes of 16, 32 and 64 bits:
3401multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3402                       bits<5> op11_7, bit op6, bit op4,
3403                       InstrItinClass itin, string OpcodeStr, string Dt,
3404                       SDPatternOperator IntOp> {
3405  def v8i8  : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3406                      itin, OpcodeStr, !strconcat(Dt, "16"),
3407                      v8i8, v8i16, IntOp>;
3408  def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3409                      itin, OpcodeStr, !strconcat(Dt, "32"),
3410                      v4i16, v4i32, IntOp>;
3411  def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3412                      itin, OpcodeStr, !strconcat(Dt, "64"),
3413                      v2i32, v2i64, IntOp>;
3414}
3415
3416
3417// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
3418//   source operand element sizes of 16, 32 and 64 bits:
3419multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
3420                    string OpcodeStr, string Dt, SDNode OpNode> {
3421  def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3422                   OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
3423  def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3424                   OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3425  def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3426                   OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3427}
3428
3429
3430// Neon 3-register vector operations.
3431
3432// First with only element sizes of 8, 16 and 32 bits:
3433multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3434                   InstrItinClass itinD16, InstrItinClass itinD32,
3435                   InstrItinClass itinQ16, InstrItinClass itinQ32,
3436                   string OpcodeStr, string Dt,
3437                   SDNode OpNode, bit Commutable = 0> {
3438  // 64-bit vector types.
3439  def v8i8  : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
3440                   OpcodeStr, !strconcat(Dt, "8"),
3441                   v8i8, v8i8, OpNode, Commutable>;
3442  def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
3443                   OpcodeStr, !strconcat(Dt, "16"),
3444                   v4i16, v4i16, OpNode, Commutable>;
3445  def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
3446                   OpcodeStr, !strconcat(Dt, "32"),
3447                   v2i32, v2i32, OpNode, Commutable>;
3448
3449  // 128-bit vector types.
3450  def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
3451                   OpcodeStr, !strconcat(Dt, "8"),
3452                   v16i8, v16i8, OpNode, Commutable>;
3453  def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
3454                   OpcodeStr, !strconcat(Dt, "16"),
3455                   v8i16, v8i16, OpNode, Commutable>;
3456  def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
3457                   OpcodeStr, !strconcat(Dt, "32"),
3458                   v4i32, v4i32, OpNode, Commutable>;
3459}
3460
3461multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
3462  def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
3463  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
3464  def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
3465  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
3466                     v4i32, v2i32, ShOp>;
3467}
3468
3469// ....then also with element size 64 bits:
3470multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3471                    InstrItinClass itinD, InstrItinClass itinQ,
3472                    string OpcodeStr, string Dt,
3473                    SDNode OpNode, bit Commutable = 0>
3474  : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
3475            OpcodeStr, Dt, OpNode, Commutable> {
3476  def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
3477                   OpcodeStr, !strconcat(Dt, "64"),
3478                   v1i64, v1i64, OpNode, Commutable>;
3479  def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
3480                   OpcodeStr, !strconcat(Dt, "64"),
3481                   v2i64, v2i64, OpNode, Commutable>;
3482}
3483
3484
3485// Neon 3-register vector intrinsics.
3486
3487// First with only element sizes of 16 and 32 bits:
3488multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3489                     InstrItinClass itinD16, InstrItinClass itinD32,
3490                     InstrItinClass itinQ16, InstrItinClass itinQ32,
3491                     string OpcodeStr, string Dt,
3492                     SDPatternOperator IntOp, bit Commutable = 0> {
3493  // 64-bit vector types.
3494  def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
3495                      OpcodeStr, !strconcat(Dt, "16"),
3496                      v4i16, v4i16, IntOp, Commutable>;
3497  def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
3498                      OpcodeStr, !strconcat(Dt, "32"),
3499                      v2i32, v2i32, IntOp, Commutable>;
3500
3501  // 128-bit vector types.
3502  def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3503                      OpcodeStr, !strconcat(Dt, "16"),
3504                      v8i16, v8i16, IntOp, Commutable>;
3505  def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3506                      OpcodeStr, !strconcat(Dt, "32"),
3507                      v4i32, v4i32, IntOp, Commutable>;
3508}
3509multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3510                     InstrItinClass itinD16, InstrItinClass itinD32,
3511                     InstrItinClass itinQ16, InstrItinClass itinQ32,
3512                     string OpcodeStr, string Dt,
3513                     SDPatternOperator IntOp> {
3514  // 64-bit vector types.
3515  def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
3516                      OpcodeStr, !strconcat(Dt, "16"),
3517                      v4i16, v4i16, IntOp>;
3518  def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
3519                      OpcodeStr, !strconcat(Dt, "32"),
3520                      v2i32, v2i32, IntOp>;
3521
3522  // 128-bit vector types.
3523  def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3524                      OpcodeStr, !strconcat(Dt, "16"),
3525                      v8i16, v8i16, IntOp>;
3526  def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3527                      OpcodeStr, !strconcat(Dt, "32"),
3528                      v4i32, v4i32, IntOp>;
3529}
3530
3531multiclass N3VIntSL_HS<bits<4> op11_8,
3532                       InstrItinClass itinD16, InstrItinClass itinD32,
3533                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3534                       string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3535  def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
3536                          OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
3537  def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
3538                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
3539  def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
3540                          OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
3541  def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
3542                        OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
3543}
3544
3545// ....then also with element size of 8 bits:
3546multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3547                      InstrItinClass itinD16, InstrItinClass itinD32,
3548                      InstrItinClass itinQ16, InstrItinClass itinQ32,
3549                      string OpcodeStr, string Dt,
3550                      SDPatternOperator IntOp, bit Commutable = 0>
3551  : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3552              OpcodeStr, Dt, IntOp, Commutable> {
3553  def v8i8  : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
3554                      OpcodeStr, !strconcat(Dt, "8"),
3555                      v8i8, v8i8, IntOp, Commutable>;
3556  def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3557                      OpcodeStr, !strconcat(Dt, "8"),
3558                      v16i8, v16i8, IntOp, Commutable>;
3559}
3560multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3561                      InstrItinClass itinD16, InstrItinClass itinD32,
3562                      InstrItinClass itinQ16, InstrItinClass itinQ32,
3563                      string OpcodeStr, string Dt,
3564                      SDPatternOperator IntOp>
3565  : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3566              OpcodeStr, Dt, IntOp> {
3567  def v8i8  : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
3568                      OpcodeStr, !strconcat(Dt, "8"),
3569                      v8i8, v8i8, IntOp>;
3570  def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3571                      OpcodeStr, !strconcat(Dt, "8"),
3572                      v16i8, v16i8, IntOp>;
3573}
3574
3575
3576// ....then also with element size of 64 bits:
3577multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3578                       InstrItinClass itinD16, InstrItinClass itinD32,
3579                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3580                       string OpcodeStr, string Dt,
3581                       SDPatternOperator IntOp, bit Commutable = 0>
3582  : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3583               OpcodeStr, Dt, IntOp, Commutable> {
3584  def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
3585                      OpcodeStr, !strconcat(Dt, "64"),
3586                      v1i64, v1i64, IntOp, Commutable>;
3587  def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3588                      OpcodeStr, !strconcat(Dt, "64"),
3589                      v2i64, v2i64, IntOp, Commutable>;
3590}
3591multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3592                       InstrItinClass itinD16, InstrItinClass itinD32,
3593                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3594                       string OpcodeStr, string Dt,
3595                       SDPatternOperator IntOp>
3596  : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3597               OpcodeStr, Dt, IntOp> {
3598  def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
3599                      OpcodeStr, !strconcat(Dt, "64"),
3600                      v1i64, v1i64, IntOp>;
3601  def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3602                      OpcodeStr, !strconcat(Dt, "64"),
3603                      v2i64, v2i64, IntOp>;
3604}
3605
3606// Neon Narrowing 3-register vector intrinsics,
3607//   source operand element sizes of 16, 32 and 64 bits:
3608multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3609                       string OpcodeStr, string Dt,
3610                       SDPatternOperator IntOp, bit Commutable = 0> {
3611  def v8i8  : N3VNInt<op24, op23, 0b00, op11_8, op4,
3612                      OpcodeStr, !strconcat(Dt, "16"),
3613                      v8i8, v8i16, IntOp, Commutable>;
3614  def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
3615                      OpcodeStr, !strconcat(Dt, "32"),
3616                      v4i16, v4i32, IntOp, Commutable>;
3617  def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
3618                      OpcodeStr, !strconcat(Dt, "64"),
3619                      v2i32, v2i64, IntOp, Commutable>;
3620}
3621
3622
3623// Neon Long 3-register vector operations.
3624
3625multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3626                    InstrItinClass itin16, InstrItinClass itin32,
3627                    string OpcodeStr, string Dt,
3628                    SDNode OpNode, bit Commutable = 0> {
3629  def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
3630                   OpcodeStr, !strconcat(Dt, "8"),
3631                   v8i16, v8i8, OpNode, Commutable>;
3632  def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
3633                   OpcodeStr, !strconcat(Dt, "16"),
3634                   v4i32, v4i16, OpNode, Commutable>;
3635  def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
3636                   OpcodeStr, !strconcat(Dt, "32"),
3637                   v2i64, v2i32, OpNode, Commutable>;
3638}
3639
3640multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
3641                     InstrItinClass itin, string OpcodeStr, string Dt,
3642                     SDNode OpNode> {
3643  def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
3644                       !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3645  def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
3646                     !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3647}
3648
3649multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3650                       InstrItinClass itin16, InstrItinClass itin32,
3651                       string OpcodeStr, string Dt,
3652                       SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3653  def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
3654                      OpcodeStr, !strconcat(Dt, "8"),
3655                      v8i16, v8i8, OpNode, ExtOp, Commutable>;
3656  def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
3657                      OpcodeStr, !strconcat(Dt, "16"),
3658                      v4i32, v4i16, OpNode, ExtOp, Commutable>;
3659  def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
3660                      OpcodeStr, !strconcat(Dt, "32"),
3661                      v2i64, v2i32, OpNode, ExtOp, Commutable>;
3662}
3663
3664// Neon Long 3-register vector intrinsics.
3665
3666// First with only element sizes of 16 and 32 bits:
3667multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3668                      InstrItinClass itin16, InstrItinClass itin32,
3669                      string OpcodeStr, string Dt,
3670                      SDPatternOperator IntOp, bit Commutable = 0> {
3671  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
3672                      OpcodeStr, !strconcat(Dt, "16"),
3673                      v4i32, v4i16, IntOp, Commutable>;
3674  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
3675                      OpcodeStr, !strconcat(Dt, "32"),
3676                      v2i64, v2i32, IntOp, Commutable>;
3677}
3678
3679multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
3680                        InstrItinClass itin, string OpcodeStr, string Dt,
3681                        SDPatternOperator IntOp> {
3682  def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
3683                          OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3684  def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
3685                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3686}
3687
3688// ....then also with element size of 8 bits:
3689multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3690                       InstrItinClass itin16, InstrItinClass itin32,
3691                       string OpcodeStr, string Dt,
3692                       SDPatternOperator IntOp, bit Commutable = 0>
3693  : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
3694               IntOp, Commutable> {
3695  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
3696                      OpcodeStr, !strconcat(Dt, "8"),
3697                      v8i16, v8i8, IntOp, Commutable>;
3698}
3699
3700// ....with explicit extend (VABDL).
3701multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3702                       InstrItinClass itin, string OpcodeStr, string Dt,
3703                       SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
3704  def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
3705                         OpcodeStr, !strconcat(Dt, "8"),
3706                         v8i16, v8i8, IntOp, ExtOp, Commutable>;
3707  def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
3708                         OpcodeStr, !strconcat(Dt, "16"),
3709                         v4i32, v4i16, IntOp, ExtOp, Commutable>;
3710  def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
3711                         OpcodeStr, !strconcat(Dt, "32"),
3712                         v2i64, v2i32, IntOp, ExtOp, Commutable>;
3713}
3714
3715
3716// Neon Wide 3-register vector intrinsics,
3717//   source operand element sizes of 8, 16 and 32 bits:
3718multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3719                    string OpcodeStr, string Dt,
3720                    SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3721  def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
3722                   OpcodeStr, !strconcat(Dt, "8"),
3723                   v8i16, v8i8, OpNode, ExtOp, Commutable>;
3724  def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
3725                   OpcodeStr, !strconcat(Dt, "16"),
3726                   v4i32, v4i16, OpNode, ExtOp, Commutable>;
3727  def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
3728                   OpcodeStr, !strconcat(Dt, "32"),
3729                   v2i64, v2i32, OpNode, ExtOp, Commutable>;
3730}
3731
3732
3733// Neon Multiply-Op vector operations,
3734//   element sizes of 8, 16 and 32 bits:
3735multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3736                        InstrItinClass itinD16, InstrItinClass itinD32,
3737                        InstrItinClass itinQ16, InstrItinClass itinQ32,
3738                        string OpcodeStr, string Dt, SDNode OpNode> {
3739  // 64-bit vector types.
3740  def v8i8  : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
3741                        OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
3742  def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
3743                        OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
3744  def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
3745                        OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
3746
3747  // 128-bit vector types.
3748  def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
3749                        OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
3750  def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
3751                        OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
3752  def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
3753                        OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
3754}
3755
3756multiclass N3VMulOpSL_HS<bits<4> op11_8,
3757                         InstrItinClass itinD16, InstrItinClass itinD32,
3758                         InstrItinClass itinQ16, InstrItinClass itinQ32,
3759                         string OpcodeStr, string Dt, SDPatternOperator ShOp> {
3760  def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
3761                            OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
3762  def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
3763                          OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
3764  def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
3765                            OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
3766                            mul, ShOp>;
3767  def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
3768                          OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
3769                          mul, ShOp>;
3770}
3771
3772// Neon Intrinsic-Op vector operations,
3773//   element sizes of 8, 16 and 32 bits:
3774multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3775                        InstrItinClass itinD, InstrItinClass itinQ,
3776                        string OpcodeStr, string Dt, SDPatternOperator IntOp,
3777                        SDNode OpNode> {
3778  // 64-bit vector types.
3779  def v8i8  : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
3780                        OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
3781  def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
3782                        OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
3783  def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
3784                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
3785
3786  // 128-bit vector types.
3787  def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
3788                        OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
3789  def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
3790                        OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
3791  def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
3792                        OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
3793}
3794
3795// Neon 3-argument intrinsics,
3796//   element sizes of 16 and 32 bits:
3797multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3798                       InstrItinClass itinD16, InstrItinClass itinD32,
3799                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3800                       string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3801  // 64-bit vector types.
3802  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
3803                       OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
3804  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
3805                       OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
3806
3807  // 128-bit vector types.
3808  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
3809                       OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
3810  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
3811                       OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
3812}
3813
3814//   element sizes of 8, 16 and 32 bits:
3815multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3816                       InstrItinClass itinD16, InstrItinClass itinD32,
3817                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3818                       string OpcodeStr, string Dt, SDPatternOperator IntOp>
3819           :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
3820                        itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
3821  // 64-bit vector types.
3822  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
3823                       OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3824  // 128-bit vector types.
3825  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
3826                       OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
3827}
3828
3829// Neon Long Multiply-Op vector operations,
3830//   element sizes of 8, 16 and 32 bits:
3831multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3832                         InstrItinClass itin16, InstrItinClass itin32,
3833                         string OpcodeStr, string Dt, SDNode MulOp,
3834                         SDNode OpNode> {
3835  def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
3836                        !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
3837  def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
3838                        !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
3839  def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
3840                        !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3841}
3842
3843multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
3844                          string Dt, SDNode MulOp, SDNode OpNode> {
3845  def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
3846                            !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
3847  def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
3848                          !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3849}
3850
3851
3852// Neon Long 3-argument intrinsics.
3853
3854// First with only element sizes of 16 and 32 bits:
3855multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3856                       InstrItinClass itin16, InstrItinClass itin32,
3857                       string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3858  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
3859                       OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3860  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
3861                       OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3862}
3863
3864multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
3865                         string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3866  def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
3867                           OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
3868  def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
3869                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3870}
3871
3872// ....then also with element size of 8 bits:
3873multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3874                        InstrItinClass itin16, InstrItinClass itin32,
3875                        string OpcodeStr, string Dt, SDPatternOperator IntOp>
3876  : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
3877  def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
3878                       OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
3879}
3880
3881// ....with explicit extend (VABAL).
3882multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3883                            InstrItinClass itin, string OpcodeStr, string Dt,
3884                            SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
3885  def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
3886                           OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
3887                           IntOp, ExtOp, OpNode>;
3888  def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
3889                           OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
3890                           IntOp, ExtOp, OpNode>;
3891  def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
3892                           OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
3893                           IntOp, ExtOp, OpNode>;
3894}
3895
3896
3897// Neon Pairwise long 2-register intrinsics,
3898//   element sizes of 8, 16 and 32 bits:
3899multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3900                        bits<5> op11_7, bit op4,
3901                        string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3902  // 64-bit vector types.
3903  def v8i8  : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3904                        OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3905  def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3906                        OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3907  def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3908                        OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3909
3910  // 128-bit vector types.
3911  def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3912                        OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3913  def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3914                        OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3915  def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3916                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3917}
3918
3919
3920// Neon Pairwise long 2-register accumulate intrinsics,
3921//   element sizes of 8, 16 and 32 bits:
3922multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3923                         bits<5> op11_7, bit op4,
3924                         string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3925  // 64-bit vector types.
3926  def v8i8  : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3927                         OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3928  def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3929                         OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3930  def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3931                         OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3932
3933  // 128-bit vector types.
3934  def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3935                         OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3936  def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3937                         OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3938  def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3939                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3940}
3941
3942
3943// Neon 2-register vector shift by immediate,
3944//   with f of either N2RegVShLFrm or N2RegVShRFrm
3945//   element sizes of 8, 16, 32 and 64 bits:
3946multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3947                       InstrItinClass itin, string OpcodeStr, string Dt,
3948                       SDNode OpNode> {
3949  // 64-bit vector types.
3950  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3951                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3952    let Inst{21-19} = 0b001; // imm6 = 001xxx
3953  }
3954  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3955                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3956    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3957  }
3958  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3959                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3960    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3961  }
3962  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3963                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3964                             // imm6 = xxxxxx
3965
3966  // 128-bit vector types.
3967  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3968                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3969    let Inst{21-19} = 0b001; // imm6 = 001xxx
3970  }
3971  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3972                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3973    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3974  }
3975  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3976                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
3977    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3978  }
3979  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3980                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
3981                             // imm6 = xxxxxx
3982}
3983multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3984                       InstrItinClass itin, string OpcodeStr, string Dt,
3985                       string baseOpc, SDNode OpNode> {
3986  // 64-bit vector types.
3987  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
3988                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3989    let Inst{21-19} = 0b001; // imm6 = 001xxx
3990  }
3991  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
3992                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3993    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3994  }
3995  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
3996                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3997    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3998  }
3999  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
4000                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
4001                             // imm6 = xxxxxx
4002
4003  // 128-bit vector types.
4004  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
4005                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
4006    let Inst{21-19} = 0b001; // imm6 = 001xxx
4007  }
4008  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
4009                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
4010    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4011  }
4012  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
4013                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
4014    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4015  }
4016  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
4017                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
4018                             // imm6 = xxxxxx
4019}
4020
4021// Neon Shift-Accumulate vector operations,
4022//   element sizes of 8, 16, 32 and 64 bits:
4023multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4024                         string OpcodeStr, string Dt, SDNode ShOp> {
4025  // 64-bit vector types.
4026  def v8i8  : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
4027                        OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
4028    let Inst{21-19} = 0b001; // imm6 = 001xxx
4029  }
4030  def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
4031                        OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
4032    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4033  }
4034  def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
4035                        OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
4036    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4037  }
4038  def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
4039                        OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
4040                             // imm6 = xxxxxx
4041
4042  // 128-bit vector types.
4043  def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
4044                        OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
4045    let Inst{21-19} = 0b001; // imm6 = 001xxx
4046  }
4047  def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
4048                        OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
4049    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4050  }
4051  def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
4052                        OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
4053    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4054  }
4055  def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
4056                        OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
4057                             // imm6 = xxxxxx
4058}
4059
4060// Neon Shift-Insert vector operations,
4061//   with f of either N2RegVShLFrm or N2RegVShRFrm
4062//   element sizes of 8, 16, 32 and 64 bits:
4063multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4064                          string OpcodeStr> {
4065  // 64-bit vector types.
4066  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4067                        N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> {
4068    let Inst{21-19} = 0b001; // imm6 = 001xxx
4069  }
4070  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4071                        N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> {
4072    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4073  }
4074  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4075                        N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> {
4076    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4077  }
4078  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
4079                        N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>;
4080                             // imm6 = xxxxxx
4081
4082  // 128-bit vector types.
4083  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4084                        N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> {
4085    let Inst{21-19} = 0b001; // imm6 = 001xxx
4086  }
4087  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4088                        N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> {
4089    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4090  }
4091  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4092                        N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> {
4093    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4094  }
4095  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
4096                        N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>;
4097                             // imm6 = xxxxxx
4098}
4099multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4100                          string OpcodeStr> {
4101  // 64-bit vector types.
4102  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4103                        N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> {
4104    let Inst{21-19} = 0b001; // imm6 = 001xxx
4105  }
4106  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4107                        N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> {
4108    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4109  }
4110  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4111                        N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> {
4112    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4113  }
4114  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4115                        N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>;
4116                             // imm6 = xxxxxx
4117
4118  // 128-bit vector types.
4119  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4120                        N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> {
4121    let Inst{21-19} = 0b001; // imm6 = 001xxx
4122  }
4123  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4124                        N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> {
4125    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4126  }
4127  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4128                        N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> {
4129    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4130  }
4131  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4132                        N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>;
4133                             // imm6 = xxxxxx
4134}
4135
4136// Neon Shift Long operations,
4137//   element sizes of 8, 16, 32 bits:
4138multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4139                      bit op4, string OpcodeStr, string Dt,
4140                      SDPatternOperator OpNode> {
4141  def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4142              OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
4143    let Inst{21-19} = 0b001; // imm6 = 001xxx
4144  }
4145  def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4146               OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
4147    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4148  }
4149  def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4150               OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
4151    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4152  }
4153}
4154
4155// Neon Shift Narrow operations,
4156//   element sizes of 16, 32, 64 bits:
4157multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4158                      bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
4159                      SDPatternOperator OpNode> {
4160  def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4161                    OpcodeStr, !strconcat(Dt, "16"),
4162                    v8i8, v8i16, shr_imm8, OpNode> {
4163    let Inst{21-19} = 0b001; // imm6 = 001xxx
4164  }
4165  def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4166                     OpcodeStr, !strconcat(Dt, "32"),
4167                     v4i16, v4i32, shr_imm16, OpNode> {
4168    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4169  }
4170  def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4171                     OpcodeStr, !strconcat(Dt, "64"),
4172                     v2i32, v2i64, shr_imm32, OpNode> {
4173    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4174  }
4175}
4176
4177//===----------------------------------------------------------------------===//
4178// Instruction Definitions.
4179//===----------------------------------------------------------------------===//
4180
4181// Vector Add Operations.
4182
4183//   VADD     : Vector Add (integer and floating-point)
4184defm VADD     : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
4185                         add, 1>;
4186def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
4187                     v2f32, v2f32, fadd, 1>;
4188def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
4189                     v4f32, v4f32, fadd, 1>;
4190def  VADDhd   : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
4191                     v4f16, v4f16, fadd, 1>,
4192                Requires<[HasNEON,HasFullFP16]>;
4193def  VADDhq   : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
4194                     v8f16, v8f16, fadd, 1>,
4195                Requires<[HasNEON,HasFullFP16]>;
4196//   VADDL    : Vector Add Long (Q = D + D)
4197defm VADDLs   : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4198                            "vaddl", "s", add, sext, 1>;
4199defm VADDLu   : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4200                            "vaddl", "u", add, zanyext, 1>;
4201//   VADDW    : Vector Add Wide (Q = Q + D)
4202defm VADDWs   : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
4203defm VADDWu   : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>;
4204//   VHADD    : Vector Halving Add
4205defm VHADDs   : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
4206                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4207                           "vhadd", "s", int_arm_neon_vhadds, 1>;
4208defm VHADDu   : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
4209                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4210                           "vhadd", "u", int_arm_neon_vhaddu, 1>;
4211//   VRHADD   : Vector Rounding Halving Add
4212defm VRHADDs  : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
4213                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4214                           "vrhadd", "s", int_arm_neon_vrhadds, 1>;
4215defm VRHADDu  : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
4216                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4217                           "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
4218//   VQADD    : Vector Saturating Add
4219defm VQADDs   : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
4220                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4221                            "vqadd", "s", saddsat, 1>;
4222defm VQADDu   : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
4223                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4224                            "vqadd", "u", uaddsat, 1>;
4225//   VADDHN   : Vector Add and Narrow Returning High Half (D = Q + Q)
4226defm VADDHN   : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
4227//   VRADDHN  : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
4228defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
4229                            int_arm_neon_vraddhn, 1>;
4230
4231let Predicates = [HasNEON] in {
4232def : Pat<(v8i8  (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
4233          (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
4234def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
4235          (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
4236def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
4237          (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
4238}
4239
4240// Vector Multiply Operations.
4241
4242//   VMUL     : Vector Multiply (integer, polynomial and floating-point)
4243defm VMUL     : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
4244                        IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
4245def  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
4246                        "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
4247def  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
4248                        "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
4249def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
4250                     v2f32, v2f32, fmul, 1>;
4251def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
4252                     v4f32, v4f32, fmul, 1>;
4253def  VMULhd   : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
4254                     v4f16, v4f16, fmul, 1>,
4255                Requires<[HasNEON,HasFullFP16]>;
4256def  VMULhq   : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
4257                     v8f16, v8f16, fmul, 1>,
4258                Requires<[HasNEON,HasFullFP16]>;
4259defm VMULsl   : N3VSL_HS<0b1000, "vmul", mul>;
4260def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
4261def  VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
4262                       v2f32, fmul>;
4263def  VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
4264                Requires<[HasNEON,HasFullFP16]>;
4265def  VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
4266                       v4f16, fmul>,
4267                Requires<[HasNEON,HasFullFP16]>;
4268
4269let Predicates = [HasNEON] in {
4270def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
4271                      (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))),
4272          (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
4273                              (v4i16 (EXTRACT_SUBREG QPR:$src2,
4274                                      (DSubReg_i16_reg imm:$lane))),
4275                              (SubReg_i16_lane imm:$lane)))>;
4276def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
4277                      (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))),
4278          (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
4279                              (v2i32 (EXTRACT_SUBREG QPR:$src2,
4280                                      (DSubReg_i32_reg imm:$lane))),
4281                              (SubReg_i32_lane imm:$lane)))>;
4282def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
4283                       (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))),
4284          (v4f32 (VMULslfq (v4f32 QPR:$src1),
4285                           (v2f32 (EXTRACT_SUBREG QPR:$src2,
4286                                   (DSubReg_i32_reg imm:$lane))),
4287                           (SubReg_i32_lane imm:$lane)))>;
4288def : Pat<(v8f16 (fmul (v8f16 QPR:$src1),
4289                       (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))),
4290          (v8f16 (VMULslhq(v8f16 QPR:$src1),
4291                           (v4f16 (EXTRACT_SUBREG QPR:$src2,
4292                                   (DSubReg_i16_reg imm:$lane))),
4293                           (SubReg_i16_lane imm:$lane)))>;
4294
4295def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
4296          (VMULslfd DPR:$Rn,
4297            (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4298            (i32 0))>;
4299def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
4300          (VMULslhd DPR:$Rn,
4301            (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0),
4302            (i32 0))>;
4303def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
4304          (VMULslfq QPR:$Rn,
4305            (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4306            (i32 0))>;
4307def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
4308          (VMULslhq QPR:$Rn,
4309            (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0),
4310            (i32 0))>;
4311}
4312
4313//   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
4314defm VQDMULH  : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
4315                          IIC_VMULi16Q, IIC_VMULi32Q,
4316                          "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
4317defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
4318                            IIC_VMULi16Q, IIC_VMULi32Q,
4319                            "vqdmulh", "s",  int_arm_neon_vqdmulh>;
4320
4321let Predicates = [HasNEON] in {
4322def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
4323                                       (v8i16 (ARMvduplane (v8i16 QPR:$src2),
4324                                                            imm:$lane)))),
4325          (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
4326                                 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4327                                         (DSubReg_i16_reg imm:$lane))),
4328                                 (SubReg_i16_lane imm:$lane)))>;
4329def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
4330                                       (v4i32 (ARMvduplane (v4i32 QPR:$src2),
4331                                                            imm:$lane)))),
4332          (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
4333                                 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4334                                         (DSubReg_i32_reg imm:$lane))),
4335                                 (SubReg_i32_lane imm:$lane)))>;
4336}
4337
4338//   VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
4339defm VQRDMULH   : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
4340                            IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
4341                            "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
4342defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
4343                              IIC_VMULi16Q, IIC_VMULi32Q,
4344                              "vqrdmulh", "s",  int_arm_neon_vqrdmulh>;
4345
4346let Predicates = [HasNEON] in {
4347def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
4348                                        (v8i16 (ARMvduplane (v8i16 QPR:$src2),
4349                                                             imm:$lane)))),
4350          (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
4351                                  (v4i16 (EXTRACT_SUBREG QPR:$src2,
4352                                          (DSubReg_i16_reg imm:$lane))),
4353                                  (SubReg_i16_lane imm:$lane)))>;
4354def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
4355                                        (v4i32 (ARMvduplane (v4i32 QPR:$src2),
4356                                                             imm:$lane)))),
4357          (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
4358                                  (v2i32 (EXTRACT_SUBREG QPR:$src2,
4359                                          (DSubReg_i32_reg imm:$lane))),
4360                                  (SubReg_i32_lane imm:$lane)))>;
4361}
4362
4363//   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
4364let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
4365    DecoderNamespace = "NEONData" in {
4366  defm VMULLs   : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4367                           "vmull", "s", ARMvmulls, 1>;
4368  defm VMULLu   : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4369                           "vmull", "u", ARMvmullu, 1>;
4370  def  VMULLp8   :  N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
4371                            v8i16, v8i8, int_arm_neon_vmullp, 1>;
4372  def  VMULLp64  : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
4373                          "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
4374                    Requires<[HasV8, HasCrypto]>;
4375}
4376defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", ARMvmulls>;
4377defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", ARMvmullu>;
4378
4379//   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
4380defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
4381                           "vqdmull", "s", int_arm_neon_vqdmull, 1>;
4382defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
4383                             "vqdmull", "s", int_arm_neon_vqdmull>;
4384
4385// Vector Multiply-Accumulate and Multiply-Subtract Operations.
4386
4387//   VMLA     : Vector Multiply Accumulate (integer and floating-point)
4388defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4389                             IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4390def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
4391                          v2f32, fmul_su, fadd_mlx>,
4392                Requires<[HasNEON, UseFPVMLx]>;
4393def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
4394                          v4f32, fmul_su, fadd_mlx>,
4395                Requires<[HasNEON, UseFPVMLx]>;
4396def  VMLAhd   : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
4397                          v4f16, fmul_su, fadd_mlx>,
4398                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4399def  VMLAhq   : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
4400                          v8f16, fmul_su, fadd_mlx>,
4401                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4402defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
4403                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4404def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
4405                            v2f32, fmul_su, fadd_mlx>,
4406                Requires<[HasNEON, UseFPVMLx]>;
4407def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
4408                            v4f32, v2f32, fmul_su, fadd_mlx>,
4409                Requires<[HasNEON, UseFPVMLx]>;
4410def  VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
4411                            v4f16, fmul, fadd>,
4412                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4413def  VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
4414                            v8f16, v4f16, fmul, fadd>,
4415                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4416
4417let Predicates = [HasNEON] in {
4418def : Pat<(v8i16 (add (v8i16 QPR:$src1),
4419                  (mul (v8i16 QPR:$src2),
4420                       (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
4421          (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4422                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
4423                                      (DSubReg_i16_reg imm:$lane))),
4424                              (SubReg_i16_lane imm:$lane)))>;
4425
4426def : Pat<(v4i32 (add (v4i32 QPR:$src1),
4427                  (mul (v4i32 QPR:$src2),
4428                       (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
4429          (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4430                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
4431                                      (DSubReg_i32_reg imm:$lane))),
4432                              (SubReg_i32_lane imm:$lane)))>;
4433}
4434
4435def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
4436                  (fmul_su (v4f32 QPR:$src2),
4437                        (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
4438          (v4f32 (VMLAslfq (v4f32 QPR:$src1),
4439                           (v4f32 QPR:$src2),
4440                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
4441                                   (DSubReg_i32_reg imm:$lane))),
4442                           (SubReg_i32_lane imm:$lane)))>,
4443          Requires<[HasNEON, UseFPVMLx]>;
4444
4445//   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
4446defm VMLALs   : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4447                              "vmlal", "s", ARMvmulls, add>;
4448defm VMLALu   : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4449                              "vmlal", "u", ARMvmullu, add>;
4450
4451defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", ARMvmulls, add>;
4452defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", ARMvmullu, add>;
4453
4454let Predicates = [HasNEON, HasV8_1a] in {
4455  // v8.1a Neon Rounding Double Multiply-Op vector operations,
4456  // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
4457  //            (Q += D * D)
4458  defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
4459                             IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4460                             null_frag>;
4461  def : Pat<(v4i16 (saddsat
4462                     (v4i16 DPR:$src1),
4463                     (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4464                                                   (v4i16 DPR:$Vm))))),
4465            (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4466  def : Pat<(v2i32 (saddsat
4467                     (v2i32 DPR:$src1),
4468                     (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4469                                                   (v2i32 DPR:$Vm))))),
4470            (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4471  def : Pat<(v8i16 (saddsat
4472                     (v8i16 QPR:$src1),
4473                     (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4474                                                   (v8i16 QPR:$Vm))))),
4475            (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4476  def : Pat<(v4i32 (saddsat
4477                     (v4i32 QPR:$src1),
4478                     (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4479                                                   (v4i32 QPR:$Vm))))),
4480            (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4481
4482  defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
4483                                  IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4484                                  null_frag>;
4485  def : Pat<(v4i16 (saddsat
4486                     (v4i16 DPR:$src1),
4487                     (v4i16 (int_arm_neon_vqrdmulh
4488                              (v4i16 DPR:$Vn),
4489                              (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4490                                                   imm:$lane)))))),
4491            (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
4492                                    imm:$lane))>;
4493  def : Pat<(v2i32 (saddsat
4494                     (v2i32 DPR:$src1),
4495                     (v2i32 (int_arm_neon_vqrdmulh
4496                              (v2i32 DPR:$Vn),
4497                              (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4498                                                   imm:$lane)))))),
4499            (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4500                                    imm:$lane))>;
4501  def : Pat<(v8i16 (saddsat
4502                     (v8i16 QPR:$src1),
4503                     (v8i16 (int_arm_neon_vqrdmulh
4504                              (v8i16 QPR:$src2),
4505                              (v8i16 (ARMvduplane (v8i16 QPR:$src3),
4506                                                   imm:$lane)))))),
4507            (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
4508                                    (v8i16 QPR:$src2),
4509                                    (v4i16 (EXTRACT_SUBREG
4510                                             QPR:$src3,
4511                                             (DSubReg_i16_reg imm:$lane))),
4512                                    (SubReg_i16_lane imm:$lane)))>;
4513  def : Pat<(v4i32 (saddsat
4514                     (v4i32 QPR:$src1),
4515                     (v4i32 (int_arm_neon_vqrdmulh
4516                              (v4i32 QPR:$src2),
4517                              (v4i32 (ARMvduplane (v4i32 QPR:$src3),
4518                                                   imm:$lane)))))),
4519            (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
4520                                    (v4i32 QPR:$src2),
4521                                    (v2i32 (EXTRACT_SUBREG
4522                                             QPR:$src3,
4523                                             (DSubReg_i32_reg imm:$lane))),
4524                                    (SubReg_i32_lane imm:$lane)))>;
4525
4526  //   VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
4527  //              (Q -= D * D)
4528  defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
4529                             IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4530                             null_frag>;
4531  def : Pat<(v4i16 (ssubsat
4532                     (v4i16 DPR:$src1),
4533                     (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4534                                                   (v4i16 DPR:$Vm))))),
4535            (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4536  def : Pat<(v2i32 (ssubsat
4537                     (v2i32 DPR:$src1),
4538                     (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4539                                                   (v2i32 DPR:$Vm))))),
4540            (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4541  def : Pat<(v8i16 (ssubsat
4542                     (v8i16 QPR:$src1),
4543                     (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4544                                                   (v8i16 QPR:$Vm))))),
4545            (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4546  def : Pat<(v4i32 (ssubsat
4547                     (v4i32 QPR:$src1),
4548                     (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4549                                                   (v4i32 QPR:$Vm))))),
4550            (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4551
4552  defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
4553                                  IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4554                                  null_frag>;
4555  def : Pat<(v4i16 (ssubsat
4556                     (v4i16 DPR:$src1),
4557                     (v4i16 (int_arm_neon_vqrdmulh
4558                              (v4i16 DPR:$Vn),
4559                              (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4560                                                   imm:$lane)))))),
4561            (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
4562  def : Pat<(v2i32 (ssubsat
4563                     (v2i32 DPR:$src1),
4564                     (v2i32 (int_arm_neon_vqrdmulh
4565                              (v2i32 DPR:$Vn),
4566                              (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4567                                                   imm:$lane)))))),
4568            (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4569                                    imm:$lane))>;
4570  def : Pat<(v8i16 (ssubsat
4571                     (v8i16 QPR:$src1),
4572                     (v8i16 (int_arm_neon_vqrdmulh
4573                              (v8i16 QPR:$src2),
4574                              (v8i16 (ARMvduplane (v8i16 QPR:$src3),
4575                                                   imm:$lane)))))),
4576            (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
4577                                    (v8i16 QPR:$src2),
4578                                    (v4i16 (EXTRACT_SUBREG
4579                                             QPR:$src3,
4580                                             (DSubReg_i16_reg imm:$lane))),
4581                                    (SubReg_i16_lane imm:$lane)))>;
4582  def : Pat<(v4i32 (ssubsat
4583                     (v4i32 QPR:$src1),
4584                     (v4i32 (int_arm_neon_vqrdmulh
4585                              (v4i32 QPR:$src2),
4586                              (v4i32 (ARMvduplane (v4i32 QPR:$src3),
4587                                                    imm:$lane)))))),
4588            (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
4589                                    (v4i32 QPR:$src2),
4590                                    (v2i32 (EXTRACT_SUBREG
4591                                             QPR:$src3,
4592                                             (DSubReg_i32_reg imm:$lane))),
4593                                    (SubReg_i32_lane imm:$lane)))>;
4594}
4595//   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
4596defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4597                            "vqdmlal", "s", null_frag>;
4598defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
4599
4600let Predicates = [HasNEON] in {
4601def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1),
4602                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4603                                                  (v4i16 DPR:$Vm))))),
4604          (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4605def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1),
4606                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4607                                                  (v2i32 DPR:$Vm))))),
4608          (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4609def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1),
4610                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4611                                (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4612                                                     imm:$lane)))))),
4613          (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4614def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1),
4615                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4616                                (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4617                                                     imm:$lane)))))),
4618          (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4619}
4620
4621//   VMLS     : Vector Multiply Subtract (integer and floating-point)
4622defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4623                             IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4624def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
4625                          v2f32, fmul_su, fsub_mlx>,
4626                Requires<[HasNEON, UseFPVMLx]>;
4627def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
4628                          v4f32, fmul_su, fsub_mlx>,
4629                Requires<[HasNEON, UseFPVMLx]>;
4630def  VMLShd   : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
4631                          v4f16, fmul, fsub>,
4632                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4633def  VMLShq   : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
4634                          v8f16, fmul, fsub>,
4635                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4636defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
4637                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4638def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
4639                            v2f32, fmul_su, fsub_mlx>,
4640                Requires<[HasNEON, UseFPVMLx]>;
4641def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
4642                            v4f32, v2f32, fmul_su, fsub_mlx>,
4643                Requires<[HasNEON, UseFPVMLx]>;
4644def  VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
4645                            v4f16, fmul, fsub>,
4646                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4647def  VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
4648                            v8f16, v4f16, fmul, fsub>,
4649                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4650
4651let Predicates = [HasNEON] in {
4652def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
4653                  (mul (v8i16 QPR:$src2),
4654                       (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
4655          (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4656                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
4657                                      (DSubReg_i16_reg imm:$lane))),
4658                              (SubReg_i16_lane imm:$lane)))>;
4659
4660def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
4661                  (mul (v4i32 QPR:$src2),
4662                     (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
4663          (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4664                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
4665                                      (DSubReg_i32_reg imm:$lane))),
4666                              (SubReg_i32_lane imm:$lane)))>;
4667}
4668
4669def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
4670                  (fmul_su (v4f32 QPR:$src2),
4671                        (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
4672          (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
4673                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
4674                                   (DSubReg_i32_reg imm:$lane))),
4675                           (SubReg_i32_lane imm:$lane)))>,
4676          Requires<[HasNEON, UseFPVMLx]>;
4677
4678//   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
4679defm VMLSLs   : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4680                              "vmlsl", "s", ARMvmulls, sub>;
4681defm VMLSLu   : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4682                              "vmlsl", "u", ARMvmullu, sub>;
4683
4684defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", ARMvmulls, sub>;
4685defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", ARMvmullu, sub>;
4686
4687//   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
4688defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
4689                            "vqdmlsl", "s", null_frag>;
4690defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
4691
4692let Predicates = [HasNEON] in {
4693def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1),
4694                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4695                                                  (v4i16 DPR:$Vm))))),
4696          (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4697def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1),
4698                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4699                                                  (v2i32 DPR:$Vm))))),
4700          (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4701def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1),
4702                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4703                                (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4704                                                     imm:$lane)))))),
4705          (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4706def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1),
4707                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4708                                (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4709                                                     imm:$lane)))))),
4710          (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4711}
4712
4713// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
4714def  VFMAfd   : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
4715                          v2f32, fmul_su, fadd_mlx>,
4716                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4717
4718def  VFMAfq   : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
4719                          v4f32, fmul_su, fadd_mlx>,
4720                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4721def  VFMAhd   : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
4722                          v4f16, fmul, fadd>,
4723                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4724
4725def  VFMAhq   : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
4726                          v8f16, fmul, fadd>,
4727                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4728
4729//   Fused Vector Multiply Subtract (floating-point)
4730def  VFMSfd   : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
4731                          v2f32, fmul_su, fsub_mlx>,
4732                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4733def  VFMSfq   : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
4734                          v4f32, fmul_su, fsub_mlx>,
4735                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4736def  VFMShd   : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
4737                          v4f16, fmul, fsub>,
4738                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4739def  VFMShq   : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
4740                          v8f16, fmul, fsub>,
4741                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4742
4743// Match @llvm.fma.* intrinsics
4744def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
4745          (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4746          Requires<[HasNEON,HasFullFP16]>;
4747def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
4748          (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4749          Requires<[HasNEON,HasFullFP16]>;
4750def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
4751          (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4752          Requires<[HasNEON,HasVFP4]>;
4753def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
4754          (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4755          Requires<[HasNEON,HasVFP4]>;
4756def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
4757          (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4758      Requires<[HasNEON,HasVFP4]>;
4759def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
4760          (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4761      Requires<[HasNEON,HasVFP4]>;
4762
4763// ARMv8.2a dot product instructions.
4764// We put them in the VFPV8 decoder namespace because the ARM and Thumb
4765// encodings are the same and thus no further bit twiddling is necessary
4766// in the disassembler.
4767class VDOT<bit op6, bit op4, bit op23, RegisterClass RegTy, string Asm,
4768           string AsmTy, ValueType AccumTy, ValueType InputTy,
4769           SDPatternOperator OpNode> :
4770      N3Vnp<{0b1100, op23}, 0b10, 0b1101, op6, op4, (outs RegTy:$dst),
4771            (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD,
4772            Asm, AsmTy,
4773            [(set (AccumTy RegTy:$dst),
4774                  (OpNode (AccumTy RegTy:$Vd),
4775                          (InputTy RegTy:$Vn),
4776                          (InputTy RegTy:$Vm)))]> {
4777  let Predicates = [HasDotProd];
4778  let DecoderNamespace = "VFPV8";
4779  let Constraints = "$dst = $Vd";
4780}
4781
4782def VUDOTD : VDOT<0, 1, 0, DPR, "vudot", "u8", v2i32, v8i8,  int_arm_neon_udot>;
4783def VSDOTD : VDOT<0, 0, 0, DPR, "vsdot", "s8", v2i32, v8i8,  int_arm_neon_sdot>;
4784def VUDOTQ : VDOT<1, 1, 0, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>;
4785def VSDOTQ : VDOT<1, 0, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>;
4786
4787// Indexed dot product instructions:
4788multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty,
4789           ValueType AccumType, ValueType InputType, SDPatternOperator OpNode,
4790           dag RHS> {
4791  def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst),
4792                 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
4793                 N3RegFrm, IIC_VDOTPROD, opc, dt, []> {
4794    bit lane;
4795    let Inst{5} = lane;
4796    let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane");
4797    let Constraints = "$dst = $Vd";
4798    let Predicates = [HasDotProd];
4799    let DecoderNamespace = "VFPV8";
4800  }
4801
4802  def : Pat<
4803    (AccumType (OpNode (AccumType Ty:$Vd),
4804                       (InputType Ty:$Vn),
4805                       (InputType (bitconvert (AccumType
4806                                  (ARMvduplane (AccumType Ty:$Vm),
4807                                                 VectorIndex32:$lane)))))),
4808    (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>;
4809}
4810
4811defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8,
4812                    int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>;
4813defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8,
4814                    int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>;
4815defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8,
4816                    int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4817defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8,
4818                    int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4819
4820// v8.6A matrix multiplication extension
4821let Predicates = [HasMatMulInt8] in {
4822  class N3VMatMul<bit B, bit U, string Asm, string AsmTy,
4823                  SDPatternOperator OpNode>
4824        : N3Vnp<{0b1100, B}, 0b10, 0b1100, 1, U, (outs QPR:$dst),
4825                (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), N3RegFrm, NoItinerary,
4826                Asm, AsmTy,
4827                [(set (v4i32 QPR:$dst), (OpNode (v4i32 QPR:$Vd),
4828                                                (v16i8 QPR:$Vn),
4829                                                (v16i8 QPR:$Vm)))]> {
4830    let DecoderNamespace = "VFPV8";
4831    let Constraints = "$dst = $Vd";
4832  }
4833
4834  multiclass N3VMixedDotLane<bit Q, bit U, string Asm, string AsmTy, RegisterClass RegTy,
4835                        ValueType AccumTy, ValueType InputTy, SDPatternOperator OpNode,
4836                        dag RHS> {
4837
4838    def "" : N3Vnp<0b11101, 0b00, 0b1101, Q, U, (outs RegTy:$dst),
4839                (ins RegTy:$Vd, RegTy:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), N3RegFrm,
4840                 NoItinerary, Asm, AsmTy, []> {
4841      bit lane;
4842      let Inst{5} = lane;
4843      let AsmString = !strconcat(Asm, ".", AsmTy, "\t$Vd, $Vn, $Vm$lane");
4844      let DecoderNamespace = "VFPV8";
4845      let Constraints = "$dst = $Vd";
4846    }
4847
4848    def : Pat<
4849      (AccumTy (OpNode (AccumTy RegTy:$Vd),
4850                       (InputTy RegTy:$Vn),
4851                       (InputTy (bitconvert (AccumTy
4852                                (ARMvduplane (AccumTy RegTy:$Vm),
4853                                              VectorIndex32:$lane)))))),
4854      (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
4855
4856  }
4857
4858  multiclass SUDOTLane<bit Q, RegisterClass RegTy, ValueType AccumTy, ValueType InputTy, dag RHS>
4859        : N3VMixedDotLane<Q, 1, "vsudot", "u8", RegTy, AccumTy, InputTy, null_frag, null_frag> {
4860    def : Pat<
4861      (AccumTy (int_arm_neon_usdot (AccumTy RegTy:$Vd),
4862                                   (InputTy (bitconvert (AccumTy
4863                                            (ARMvduplane (AccumTy RegTy:$Vm),
4864                                                          VectorIndex32:$lane)))),
4865                                   (InputTy RegTy:$Vn))),
4866      (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
4867  }
4868
4869  def VSMMLA  : N3VMatMul<0, 0, "vsmmla",  "s8", int_arm_neon_smmla>;
4870  def VUMMLA  : N3VMatMul<0, 1, "vummla",  "u8", int_arm_neon_ummla>;
4871  def VUSMMLA : N3VMatMul<1, 0, "vusmmla", "s8", int_arm_neon_usmmla>;
4872  def VUSDOTD : VDOT<0, 0, 1, DPR, "vusdot", "s8", v2i32, v8i8,  int_arm_neon_usdot>;
4873  def VUSDOTQ : VDOT<1, 0, 1, QPR, "vusdot", "s8", v4i32, v16i8, int_arm_neon_usdot>;
4874
4875  defm VUSDOTDI : N3VMixedDotLane<0, 0, "vusdot", "s8", DPR, v2i32, v8i8,
4876                                  int_arm_neon_usdot, (v2i32 DPR_VFP2:$Vm)>;
4877  defm VUSDOTQI : N3VMixedDotLane<1, 0, "vusdot", "s8", QPR, v4i32, v16i8,
4878                                  int_arm_neon_usdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4879  defm VSUDOTDI : SUDOTLane<0, DPR, v2i32, v8i8, (v2i32 DPR_VFP2:$Vm)>;
4880  defm VSUDOTQI : SUDOTLane<1, QPR, v4i32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4881}
4882
4883// ARMv8.3 complex operations
4884class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q,
4885                            InstrItinClass itin, dag oops, dag iops,
4886                            string opc, string dt, list<dag> pattern>
4887  : N3VCP8<{?,?}, {op21,s}, q, op4, oops,
4888           iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{
4889  bits<2> rot;
4890  let Inst{24-23} = rot;
4891}
4892
4893class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q,
4894                           InstrItinClass itin, dag oops, dag iops, string opc,
4895                            string dt, list<dag> pattern>
4896  : N3VCP8<{?,op23}, {op21,s}, q, op4, oops,
4897           iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> {
4898  bits<1> rot;
4899  let Inst{24} = rot;
4900}
4901
4902class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin,
4903                                  dag oops, dag iops, string opc, string dt,
4904                                  list<dag> pattern>
4905  : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
4906               "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4907  bits<2> rot;
4908  bit lane;
4909
4910  let Inst{21-20} = rot;
4911  let Inst{5} = lane;
4912}
4913
4914class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin,
4915                            dag oops, dag iops, string opc, string dt,
4916                            list<dag> pattern>
4917  : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
4918               "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4919  bits<2> rot;
4920  bit lane;
4921
4922  let Inst{21-20} = rot;
4923  let Inst{5} = Vm{4};
4924  // This is needed because the lane operand does not have any bits in the
4925  // encoding (it only has one possible value), so we need to manually set it
4926  // to it's default value.
4927  let DecoderMethod = "DecodeNEONComplexLane64Instruction";
4928}
4929
4930multiclass N3VCP8ComplexTied<bit op21, bit op4,
4931                       string OpcodeStr, SDPatternOperator Op> {
4932  let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4933  def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd),
4934              (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
4935              OpcodeStr, "f16", []>;
4936  def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd),
4937              (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
4938              OpcodeStr, "f16", []>;
4939  }
4940  let Predicates = [HasNEON,HasV8_3a] in {
4941  def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd),
4942              (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
4943              OpcodeStr, "f32", []>;
4944  def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd),
4945              (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
4946              OpcodeStr, "f32", []>;
4947  }
4948}
4949
4950multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4,
4951                       string OpcodeStr, SDPatternOperator Op> {
4952  let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4953  def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD,
4954              (outs DPR:$Vd),
4955              (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
4956              OpcodeStr, "f16", []>;
4957  def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ,
4958              (outs QPR:$Vd),
4959              (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
4960              OpcodeStr, "f16", []>;
4961  }
4962  let Predicates = [HasNEON,HasV8_3a] in {
4963  def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD,
4964              (outs DPR:$Vd),
4965              (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
4966              OpcodeStr, "f32", []>;
4967  def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ,
4968              (outs QPR:$Vd),
4969              (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
4970              OpcodeStr, "f32", []>;
4971  }
4972}
4973
4974// These instructions index by pairs of lanes, so the VectorIndexes are twice
4975// as wide as the data types.
4976multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr,
4977                                 SDPatternOperator Op> {
4978  let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4979  def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD,
4980                      (outs DPR:$Vd),
4981                      (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4982                      VectorIndex32:$lane, complexrotateop:$rot),
4983                      OpcodeStr, "f16", []>;
4984  def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ,
4985                      (outs QPR:$Vd),
4986                      (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm,
4987                      VectorIndex32:$lane, complexrotateop:$rot),
4988                      OpcodeStr, "f16", []>;
4989  }
4990  let Predicates = [HasNEON,HasV8_3a] in {
4991  def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD,
4992                      (outs DPR:$Vd),
4993                      (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
4994                      complexrotateop:$rot),
4995                      OpcodeStr, "f32", []>;
4996  def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ,
4997                      (outs QPR:$Vd),
4998                      (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
4999                      complexrotateop:$rot),
5000                      OpcodeStr, "f32", []>;
5001  }
5002}
5003
5004defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>;
5005defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>;
5006defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>;
5007
5008let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
5009  def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
5010            (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>;
5011  def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
5012            (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>;
5013  def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))),
5014            (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>;
5015  def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))),
5016            (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>;
5017}
5018let Predicates = [HasNEON,HasV8_3a] in {
5019  def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))),
5020            (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>;
5021  def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))),
5022            (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>;
5023  def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))),
5024            (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>;
5025  def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))),
5026            (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>;
5027}
5028
5029// Vector Subtract Operations.
5030
5031//   VSUB     : Vector Subtract (integer and floating-point)
5032defm VSUB     : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
5033                         "vsub", "i", sub, 0>;
5034def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
5035                     v2f32, v2f32, fsub, 0>;
5036def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
5037                     v4f32, v4f32, fsub, 0>;
5038def  VSUBhd   : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
5039                     v4f16, v4f16, fsub, 0>,
5040                Requires<[HasNEON,HasFullFP16]>;
5041def  VSUBhq   : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
5042                     v8f16, v8f16, fsub, 0>,
5043                Requires<[HasNEON,HasFullFP16]>;
5044//   VSUBL    : Vector Subtract Long (Q = D - D)
5045defm VSUBLs   : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
5046                            "vsubl", "s", sub, sext, 0>;
5047defm VSUBLu   : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
5048                            "vsubl", "u", sub, zanyext, 0>;
5049//   VSUBW    : Vector Subtract Wide (Q = Q - D)
5050defm VSUBWs   : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
5051defm VSUBWu   : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>;
5052//   VHSUB    : Vector Halving Subtract
5053defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
5054                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5055                           "vhsub", "s", int_arm_neon_vhsubs, 0>;
5056defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
5057                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5058                           "vhsub", "u", int_arm_neon_vhsubu, 0>;
5059//   VQSUB    : Vector Saturing Subtract
5060defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
5061                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5062                            "vqsub", "s", ssubsat, 0>;
5063defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
5064                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5065                            "vqsub", "u", usubsat, 0>;
5066//   VSUBHN   : Vector Subtract and Narrow Returning High Half (D = Q - Q)
5067defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
5068//   VRSUBHN  : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
5069defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
5070                            int_arm_neon_vrsubhn, 0>;
5071
5072let Predicates = [HasNEON] in {
5073def : Pat<(v8i8  (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
5074          (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
5075def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
5076          (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
5077def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
5078          (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
5079}
5080
5081// Vector Comparisons.
5082
5083//   VCEQ     : Vector Compare Equal
5084defm VCEQ     : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5085                            IIC_VSUBi4Q, "vceq", "i", ARMCCeq, 1>;
5086def  VCEQfd   : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
5087                         ARMCCeq, 1>;
5088def  VCEQfq   : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
5089                         ARMCCeq, 1>;
5090def  VCEQhd   : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
5091                         ARMCCeq, 1>,
5092                Requires<[HasNEON, HasFullFP16]>;
5093def  VCEQhq   : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
5094                         ARMCCeq, 1>,
5095                Requires<[HasNEON, HasFullFP16]>;
5096
5097let TwoOperandAliasConstraint = "$Vm = $Vd" in
5098defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
5099                            "$Vd, $Vm, #0", ARMCCeq>;
5100
5101//   VCGE     : Vector Compare Greater Than or Equal
5102defm VCGEs    : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5103                            IIC_VSUBi4Q, "vcge", "s", ARMCCge, 0>;
5104defm VCGEu    : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5105                            IIC_VSUBi4Q, "vcge", "u", ARMCChs, 0>;
5106def  VCGEfd   : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
5107                         ARMCCge, 0>;
5108def  VCGEfq   : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
5109                         ARMCCge, 0>;
5110def  VCGEhd   : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
5111                         ARMCCge, 0>,
5112                Requires<[HasNEON, HasFullFP16]>;
5113def  VCGEhq   : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
5114                         ARMCCge, 0>,
5115                Requires<[HasNEON, HasFullFP16]>;
5116
5117let TwoOperandAliasConstraint = "$Vm = $Vd" in {
5118defm VCGEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
5119                            "$Vd, $Vm, #0", ARMCCge>;
5120defm VCLEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
5121                            "$Vd, $Vm, #0", ARMCCle>;
5122}
5123
5124//   VCGT     : Vector Compare Greater Than
5125defm VCGTs    : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5126                            IIC_VSUBi4Q, "vcgt", "s", ARMCCgt, 0>;
5127defm VCGTu    : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5128                            IIC_VSUBi4Q, "vcgt", "u", ARMCChi, 0>;
5129def  VCGTfd   : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
5130                         ARMCCgt, 0>;
5131def  VCGTfq   : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
5132                         ARMCCgt, 0>;
5133def  VCGThd   : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
5134                         ARMCCgt, 0>,
5135                Requires<[HasNEON, HasFullFP16]>;
5136def  VCGThq   : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
5137                         ARMCCgt, 0>,
5138                Requires<[HasNEON, HasFullFP16]>;
5139
5140let TwoOperandAliasConstraint = "$Vm = $Vd" in {
5141defm VCGTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
5142                            "$Vd, $Vm, #0", ARMCCgt>;
5143defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
5144                            "$Vd, $Vm, #0", ARMCClt>;
5145}
5146
5147//   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
5148def  VACGEfd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
5149                        "f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
5150def  VACGEfq   : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
5151                        "f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
5152def  VACGEhd   : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
5153                        "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
5154                 Requires<[HasNEON, HasFullFP16]>;
5155def  VACGEhq   : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
5156                        "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
5157                 Requires<[HasNEON, HasFullFP16]>;
5158//   VACGT    : Vector Absolute Compare Greater Than (aka VCAGT)
5159def  VACGTfd   : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
5160                        "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
5161def  VACGTfq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
5162                        "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
5163def  VACGThd   : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
5164                        "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
5165                 Requires<[HasNEON, HasFullFP16]>;
5166def  VACGThq   : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
5167                        "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>,
5168                 Requires<[HasNEON, HasFullFP16]>;
5169//   VTST     : Vector Test Bits
5170defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
5171                        IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
5172
5173def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
5174                   (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5175def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
5176                   (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5177def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
5178                   (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5179def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
5180                   (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5181let Predicates = [HasNEON, HasFullFP16] in {
5182def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
5183                   (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5184def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
5185                   (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5186def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
5187                   (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5188def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
5189                   (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5190}
5191
5192// +fp16fml Floating Point Multiplication Variants
5193let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in {
5194
5195class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn,
5196                RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
5197  : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
5198           asm, "f16", "$Vd, $Vn, $Vm", "", []>;
5199
5200class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn,
5201                RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
5202  : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
5203           asm, "f16", "$Vd, $Vn, $Vm", "", []>;
5204
5205// Vd, Vs, Vs[0-15], Idx[0-1]
5206class VFMD<string opc, string type, bits<2> S>
5207  : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd),
5208               (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx),
5209               IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
5210  bit idx;
5211  let Inst{3} = idx;
5212  let Inst{19-16} = Vn{4-1};
5213  let Inst{7}     = Vn{0};
5214  let Inst{5}     = Vm{0};
5215  let Inst{2-0}   = Vm{3-1};
5216}
5217
5218// Vq, Vd, Vd[0-7], Idx[0-3]
5219class VFMQ<string opc, string type, bits<2> S>
5220  : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd),
5221               (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
5222               IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
5223  bits<2> idx;
5224  let Inst{5} = idx{1};
5225  let Inst{3} = idx{0};
5226}
5227
5228//                                                op1   op2   op3
5229def VFMALD  : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>;
5230def VFMSLD  : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>;
5231def VFMALQ  : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>;
5232def VFMSLQ  : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>;
5233def VFMALDI : VFMD<"vfmal", "f16", 0b00>;
5234def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>;
5235def VFMALQI : VFMQ<"vfmal", "f16", 0b00>;
5236def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>;
5237} // HasNEON, HasFP16FML
5238
5239
5240def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
5241                   (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5242def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
5243                   (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5244def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
5245                   (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5246def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
5247                   (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5248let Predicates = [HasNEON, HasFullFP16] in {
5249def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
5250                   (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5251def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
5252                   (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5253def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
5254                   (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5255def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
5256                   (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5257}
5258
5259// Vector Bitwise Operations.
5260
5261def vnotd : PatFrag<(ops node:$in),
5262                    (xor node:$in, ARMimmAllOnesD)>;
5263def vnotq : PatFrag<(ops node:$in),
5264                    (xor node:$in, ARMimmAllOnesV)>;
5265
5266
5267//   VAND     : Vector Bitwise AND
5268def  VANDd    : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
5269                      v2i32, v2i32, and, 1>;
5270def  VANDq    : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
5271                      v4i32, v4i32, and, 1>;
5272
5273//   VEOR     : Vector Bitwise Exclusive OR
5274def  VEORd    : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
5275                      v2i32, v2i32, xor, 1>;
5276def  VEORq    : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
5277                      v4i32, v4i32, xor, 1>;
5278
5279//   VORR     : Vector Bitwise OR
5280def  VORRd    : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
5281                      v2i32, v2i32, or, 1>;
5282def  VORRq    : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
5283                      v4i32, v4i32, or, 1>;
5284
5285def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
5286                          (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
5287                          IIC_VMOVImm,
5288                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
5289                          [(set DPR:$Vd,
5290                            (v4i16 (ARMvorrImm DPR:$src, timm:$SIMM)))]> {
5291  let Inst{9} = SIMM{9};
5292}
5293
5294def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
5295                          (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
5296                          IIC_VMOVImm,
5297                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
5298                          [(set DPR:$Vd,
5299                            (v2i32 (ARMvorrImm DPR:$src, timm:$SIMM)))]> {
5300  let Inst{10-9} = SIMM{10-9};
5301}
5302
5303def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
5304                          (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
5305                          IIC_VMOVImm,
5306                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
5307                          [(set QPR:$Vd,
5308                            (v8i16 (ARMvorrImm QPR:$src, timm:$SIMM)))]> {
5309  let Inst{9} = SIMM{9};
5310}
5311
5312def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
5313                          (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
5314                          IIC_VMOVImm,
5315                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
5316                          [(set QPR:$Vd,
5317                            (v4i32 (ARMvorrImm QPR:$src, timm:$SIMM)))]> {
5318  let Inst{10-9} = SIMM{10-9};
5319}
5320
5321
5322//   VBIC     : Vector Bitwise Bit Clear (AND NOT)
5323let TwoOperandAliasConstraint = "$Vn = $Vd" in {
5324def  VBICd    : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5325                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
5326                     "vbic", "$Vd, $Vn, $Vm", "",
5327                     [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
5328                                                 (vnotd DPR:$Vm))))]>;
5329def  VBICq    : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5330                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
5331                     "vbic", "$Vd, $Vn, $Vm", "",
5332                     [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
5333                                                 (vnotq QPR:$Vm))))]>;
5334}
5335
5336def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
5337                          (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
5338                          IIC_VMOVImm,
5339                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
5340                          [(set DPR:$Vd,
5341                            (v4i16 (ARMvbicImm DPR:$src, timm:$SIMM)))]> {
5342  let Inst{9} = SIMM{9};
5343}
5344
5345def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
5346                          (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
5347                          IIC_VMOVImm,
5348                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
5349                          [(set DPR:$Vd,
5350                            (v2i32 (ARMvbicImm DPR:$src, timm:$SIMM)))]> {
5351  let Inst{10-9} = SIMM{10-9};
5352}
5353
5354def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
5355                          (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
5356                          IIC_VMOVImm,
5357                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
5358                          [(set QPR:$Vd,
5359                            (v8i16 (ARMvbicImm QPR:$src, timm:$SIMM)))]> {
5360  let Inst{9} = SIMM{9};
5361}
5362
5363def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
5364                          (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
5365                          IIC_VMOVImm,
5366                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
5367                          [(set QPR:$Vd,
5368                            (v4i32 (ARMvbicImm QPR:$src, timm:$SIMM)))]> {
5369  let Inst{10-9} = SIMM{10-9};
5370}
5371
5372//   VORN     : Vector Bitwise OR NOT
5373def  VORNd    : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
5374                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
5375                     "vorn", "$Vd, $Vn, $Vm", "",
5376                     [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
5377                                                (vnotd DPR:$Vm))))]>;
5378def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
5379                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
5380                     "vorn", "$Vd, $Vn, $Vm", "",
5381                     [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
5382                                                (vnotq QPR:$Vm))))]>;
5383
5384//   VMVN     : Vector Bitwise NOT (Immediate)
5385
5386let isReMaterializable = 1 in {
5387
5388def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
5389                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5390                         "vmvn", "i16", "$Vd, $SIMM", "",
5391                         [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> {
5392  let Inst{9} = SIMM{9};
5393}
5394
5395def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
5396                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5397                         "vmvn", "i16", "$Vd, $SIMM", "",
5398                         [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> {
5399  let Inst{9} = SIMM{9};
5400}
5401
5402def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
5403                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5404                         "vmvn", "i32", "$Vd, $SIMM", "",
5405                         [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> {
5406  let Inst{11-8} = SIMM{11-8};
5407}
5408
5409def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
5410                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5411                         "vmvn", "i32", "$Vd, $SIMM", "",
5412                         [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> {
5413  let Inst{11-8} = SIMM{11-8};
5414}
5415}
5416
5417//   VMVN     : Vector Bitwise NOT
5418def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
5419                     (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
5420                     "vmvn", "$Vd, $Vm", "",
5421                     [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
5422def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
5423                     (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
5424                     "vmvn", "$Vd, $Vm", "",
5425                     [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
5426let Predicates = [HasNEON] in {
5427def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
5428def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
5429}
5430
5431// The TwoAddress pass will not go looking for equivalent operations
5432// with different register constraints; it just inserts copies.
5433// That is why pseudo VBSP implemented. Is is expanded later into
5434// VBIT/VBIF/VBSL taking into account register constraints to avoid copies.
5435def  VBSPd
5436  : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5437                IIC_VBINiD, "",
5438                [(set DPR:$Vd,
5439                      (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
5440let Predicates = [HasNEON] in {
5441def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
5442                                   (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
5443          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5444def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
5445                                    (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
5446          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5447def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
5448                                    (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
5449          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5450def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
5451                                    (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
5452          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5453def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
5454                                    (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
5455          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5456
5457def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
5458                     (and DPR:$Vm, (vnotd DPR:$Vd)))),
5459          (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
5460
5461def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
5462                     (and DPR:$Vm, (vnotd DPR:$Vd)))),
5463          (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
5464}
5465
5466def  VBSPq
5467  : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5468                IIC_VBINiQ, "",
5469                [(set QPR:$Vd,
5470                      (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
5471let Predicates = [HasNEON] in {
5472def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
5473                                   (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
5474          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5475def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
5476                                    (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
5477          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5478def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
5479                                    (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
5480          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5481def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
5482                                    (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
5483          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5484def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
5485                                    (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
5486          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5487
5488def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
5489                     (and QPR:$Vm, (vnotq QPR:$Vd)))),
5490          (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
5491def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
5492                     (and QPR:$Vm, (vnotq QPR:$Vd)))),
5493          (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
5494}
5495
5496//   VBSL     : Vector Bitwise Select
5497def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5498                     (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5499                     N3RegFrm, IIC_VBINiD,
5500                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5501                     []>;
5502
5503def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5504                     (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5505                     N3RegFrm, IIC_VBINiQ,
5506                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5507                     []>;
5508
5509//   VBIF     : Vector Bitwise Insert if False
5510//              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
5511def  VBIFd    : N3VX<1, 0, 0b11, 0b0001, 0, 1,
5512                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5513                     N3RegFrm, IIC_VBINiD,
5514                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5515                     []>;
5516def  VBIFq    : N3VX<1, 0, 0b11, 0b0001, 1, 1,
5517                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5518                     N3RegFrm, IIC_VBINiQ,
5519                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5520                     []>;
5521
5522//   VBIT     : Vector Bitwise Insert if True
5523//              like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
5524def  VBITd    : N3VX<1, 0, 0b10, 0b0001, 0, 1,
5525                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5526                     N3RegFrm, IIC_VBINiD,
5527                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5528                     []>;
5529def  VBITq    : N3VX<1, 0, 0b10, 0b0001, 1, 1,
5530                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5531                     N3RegFrm, IIC_VBINiQ,
5532                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5533                     []>;
5534
5535// Vector Absolute Differences.
5536
5537//   VABD     : Vector Absolute Difference
5538defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
5539                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5540                           "vabd", "s", int_arm_neon_vabds, 1>;
5541defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
5542                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5543                           "vabd", "u", int_arm_neon_vabdu, 1>;
5544def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
5545                        "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
5546def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5547                        "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
5548def  VABDhd   : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
5549                        "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
5550                Requires<[HasNEON, HasFullFP16]>;
5551def  VABDhq   : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5552                        "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
5553                Requires<[HasNEON, HasFullFP16]>;
5554
5555//   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
5556defm VABDLs   : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
5557                               "vabdl", "s", int_arm_neon_vabds, zext, 1>;
5558defm VABDLu   : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
5559                               "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
5560
5561let Predicates = [HasNEON] in {
5562def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
5563          (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
5564def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
5565          (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
5566}
5567
5568// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
5569// shift/xor pattern for ABS.
5570
5571def abd_shr :
5572    PatFrag<(ops node:$in1, node:$in2, node:$shift),
5573            (ARMvshrsImm (sub (zext node:$in1),
5574                            (zext node:$in2)), (i32 $shift))>;
5575
5576let Predicates = [HasNEON] in {
5577def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
5578               (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
5579                                                   (zext (v2i32 DPR:$opB))),
5580                                         (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
5581          (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
5582}
5583
5584//   VABA     : Vector Absolute Difference and Accumulate
5585defm VABAs    : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5586                             "vaba", "s", int_arm_neon_vabds, add>;
5587defm VABAu    : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5588                             "vaba", "u", int_arm_neon_vabdu, add>;
5589
5590//   VABAL    : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
5591defm VABALs   : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
5592                                 "vabal", "s", int_arm_neon_vabds, zext, add>;
5593defm VABALu   : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
5594                                 "vabal", "u", int_arm_neon_vabdu, zext, add>;
5595
5596// Vector Maximum and Minimum.
5597
5598//   VMAX     : Vector Maximum
5599defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
5600                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5601                           "vmax", "s", smax, 1>;
5602defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
5603                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5604                           "vmax", "u", umax, 1>;
5605def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
5606                        "vmax", "f32",
5607                        v2f32, v2f32, fmaximum, 1>;
5608def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5609                        "vmax", "f32",
5610                        v4f32, v4f32, fmaximum, 1>;
5611def  VMAXhd   : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
5612                        "vmax", "f16",
5613                        v4f16, v4f16, fmaximum, 1>,
5614                Requires<[HasNEON, HasFullFP16]>;
5615def  VMAXhq   : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5616                        "vmax", "f16",
5617                        v8f16, v8f16, fmaximum, 1>,
5618                Requires<[HasNEON, HasFullFP16]>;
5619
5620// VMAXNM
5621let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5622  def NEON_VMAXNMNDf  : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
5623                                  N3RegFrm, NoItinerary, "vmaxnm", "f32",
5624                                  v2f32, v2f32, fmaxnum, 1>,
5625                                  Requires<[HasV8, HasNEON]>;
5626  def NEON_VMAXNMNQf  : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
5627                                  N3RegFrm, NoItinerary, "vmaxnm", "f32",
5628                                  v4f32, v4f32, fmaxnum, 1>,
5629                                  Requires<[HasV8, HasNEON]>;
5630  def NEON_VMAXNMNDh  : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
5631                                  N3RegFrm, NoItinerary, "vmaxnm", "f16",
5632                                  v4f16, v4f16, fmaxnum, 1>,
5633                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
5634  def NEON_VMAXNMNQh  : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
5635                                  N3RegFrm, NoItinerary, "vmaxnm", "f16",
5636                                  v8f16, v8f16, fmaxnum, 1>,
5637                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
5638}
5639
5640//   VMIN     : Vector Minimum
5641defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
5642                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5643                           "vmin", "s", smin, 1>;
5644defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
5645                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5646                           "vmin", "u", umin, 1>;
5647def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
5648                        "vmin", "f32",
5649                        v2f32, v2f32, fminimum, 1>;
5650def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5651                        "vmin", "f32",
5652                        v4f32, v4f32, fminimum, 1>;
5653def  VMINhd   : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
5654                        "vmin", "f16",
5655                        v4f16, v4f16, fminimum, 1>,
5656                Requires<[HasNEON, HasFullFP16]>;
5657def  VMINhq   : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5658                        "vmin", "f16",
5659                        v8f16, v8f16, fminimum, 1>,
5660                Requires<[HasNEON, HasFullFP16]>;
5661
5662// VMINNM
5663let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5664  def NEON_VMINNMNDf  : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
5665                                  N3RegFrm, NoItinerary, "vminnm", "f32",
5666                                  v2f32, v2f32, fminnum, 1>,
5667                                  Requires<[HasV8, HasNEON]>;
5668  def NEON_VMINNMNQf  : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
5669                                  N3RegFrm, NoItinerary, "vminnm", "f32",
5670                                  v4f32, v4f32, fminnum, 1>,
5671                                  Requires<[HasV8, HasNEON]>;
5672  def NEON_VMINNMNDh  : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
5673                                  N3RegFrm, NoItinerary, "vminnm", "f16",
5674                                  v4f16, v4f16, fminnum, 1>,
5675                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
5676  def NEON_VMINNMNQh  : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
5677                                  N3RegFrm, NoItinerary, "vminnm", "f16",
5678                                  v8f16, v8f16, fminnum, 1>,
5679                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
5680}
5681
5682// Vector Pairwise Operations.
5683
5684//   VPADD    : Vector Pairwise Add
5685def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5686                        "vpadd", "i8",
5687                        v8i8, v8i8, int_arm_neon_vpadd, 0>;
5688def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5689                        "vpadd", "i16",
5690                        v4i16, v4i16, int_arm_neon_vpadd, 0>;
5691def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5692                        "vpadd", "i32",
5693                        v2i32, v2i32, int_arm_neon_vpadd, 0>;
5694def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
5695                        IIC_VPBIND, "vpadd", "f32",
5696                        v2f32, v2f32, int_arm_neon_vpadd, 0>;
5697def  VPADDh   : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
5698                        IIC_VPBIND, "vpadd", "f16",
5699                        v4f16, v4f16, int_arm_neon_vpadd, 0>,
5700                Requires<[HasNEON, HasFullFP16]>;
5701
5702//   VPADDL   : Vector Pairwise Add Long
5703defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
5704                             int_arm_neon_vpaddls>;
5705defm VPADDLu  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
5706                             int_arm_neon_vpaddlu>;
5707
5708//   VPADAL   : Vector Pairwise Add and Accumulate Long
5709defm VPADALs  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
5710                              int_arm_neon_vpadals>;
5711defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
5712                              int_arm_neon_vpadalu>;
5713
5714//   VPMAX    : Vector Pairwise Maximum
5715def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5716                        "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
5717def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5718                        "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
5719def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5720                        "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
5721def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5722                        "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
5723def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5724                        "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
5725def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5726                        "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
5727def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5728                        "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
5729def  VPMAXh   : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5730                        "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
5731                Requires<[HasNEON, HasFullFP16]>;
5732
5733//   VPMIN    : Vector Pairwise Minimum
5734def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5735                        "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
5736def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5737                        "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
5738def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5739                        "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
5740def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5741                        "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
5742def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5743                        "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
5744def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5745                        "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
5746def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5747                        "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
5748def  VPMINh   : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5749                        "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
5750                Requires<[HasNEON, HasFullFP16]>;
5751
5752// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
5753
5754//   VRECPE   : Vector Reciprocal Estimate
5755def  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5756                        IIC_VUNAD, "vrecpe", "u32",
5757                        v2i32, v2i32, int_arm_neon_vrecpe>;
5758def  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5759                        IIC_VUNAQ, "vrecpe", "u32",
5760                        v4i32, v4i32, int_arm_neon_vrecpe>;
5761def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5762                        IIC_VUNAD, "vrecpe", "f32",
5763                        v2f32, v2f32, int_arm_neon_vrecpe>;
5764def  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5765                        IIC_VUNAQ, "vrecpe", "f32",
5766                        v4f32, v4f32, int_arm_neon_vrecpe>;
5767def  VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
5768                        IIC_VUNAD, "vrecpe", "f16",
5769                        v4f16, v4f16, int_arm_neon_vrecpe>,
5770                Requires<[HasNEON, HasFullFP16]>;
5771def  VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
5772                        IIC_VUNAQ, "vrecpe", "f16",
5773                        v8f16, v8f16, int_arm_neon_vrecpe>,
5774                Requires<[HasNEON, HasFullFP16]>;
5775
5776//   VRECPS   : Vector Reciprocal Step
5777def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5778                        IIC_VRECSD, "vrecps", "f32",
5779                        v2f32, v2f32, int_arm_neon_vrecps, 1>;
5780def  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5781                        IIC_VRECSQ, "vrecps", "f32",
5782                        v4f32, v4f32, int_arm_neon_vrecps, 1>;
5783def  VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
5784                        IIC_VRECSD, "vrecps", "f16",
5785                        v4f16, v4f16, int_arm_neon_vrecps, 1>,
5786                Requires<[HasNEON, HasFullFP16]>;
5787def  VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
5788                        IIC_VRECSQ, "vrecps", "f16",
5789                        v8f16, v8f16, int_arm_neon_vrecps, 1>,
5790                Requires<[HasNEON, HasFullFP16]>;
5791
5792//   VRSQRTE  : Vector Reciprocal Square Root Estimate
5793def  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5794                         IIC_VUNAD, "vrsqrte", "u32",
5795                         v2i32, v2i32, int_arm_neon_vrsqrte>;
5796def  VRSQRTEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5797                         IIC_VUNAQ, "vrsqrte", "u32",
5798                         v4i32, v4i32, int_arm_neon_vrsqrte>;
5799def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5800                         IIC_VUNAD, "vrsqrte", "f32",
5801                         v2f32, v2f32, int_arm_neon_vrsqrte>;
5802def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5803                         IIC_VUNAQ, "vrsqrte", "f32",
5804                         v4f32, v4f32, int_arm_neon_vrsqrte>;
5805def  VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
5806                         IIC_VUNAD, "vrsqrte", "f16",
5807                         v4f16, v4f16, int_arm_neon_vrsqrte>,
5808                Requires<[HasNEON, HasFullFP16]>;
5809def  VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
5810                         IIC_VUNAQ, "vrsqrte", "f16",
5811                         v8f16, v8f16, int_arm_neon_vrsqrte>,
5812                Requires<[HasNEON, HasFullFP16]>;
5813
5814//   VRSQRTS  : Vector Reciprocal Square Root Step
5815def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5816                        IIC_VRECSD, "vrsqrts", "f32",
5817                        v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
5818def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5819                        IIC_VRECSQ, "vrsqrts", "f32",
5820                        v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
5821def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
5822                        IIC_VRECSD, "vrsqrts", "f16",
5823                        v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
5824                Requires<[HasNEON, HasFullFP16]>;
5825def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
5826                        IIC_VRECSQ, "vrsqrts", "f16",
5827                        v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
5828                Requires<[HasNEON, HasFullFP16]>;
5829
5830// Vector Shifts.
5831
5832//   VSHL     : Vector Shift
5833defm VSHLs    : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
5834                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5835                            "vshl", "s", int_arm_neon_vshifts>;
5836defm VSHLu    : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
5837                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5838                            "vshl", "u", int_arm_neon_vshiftu>;
5839
5840let Predicates = [HasNEON] in {
5841def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
5842          (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>;
5843def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
5844          (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>;
5845def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
5846          (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>;
5847def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
5848          (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>;
5849def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
5850          (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>;
5851def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
5852          (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>;
5853def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
5854          (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>;
5855def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
5856          (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>;
5857
5858def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
5859          (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>;
5860def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
5861          (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>;
5862def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
5863          (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>;
5864def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
5865          (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>;
5866def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
5867          (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>;
5868def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
5869          (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>;
5870def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
5871          (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>;
5872def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
5873          (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>;
5874
5875}
5876
5877//   VSHL     : Vector Shift Left (Immediate)
5878defm VSHLi    : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>;
5879
5880//   VSHR     : Vector Shift Right (Immediate)
5881defm VSHRs    : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
5882                            ARMvshrsImm>;
5883defm VSHRu    : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
5884                            ARMvshruImm>;
5885
5886//   VSHLL    : Vector Shift Left Long
5887defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
5888  PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>;
5889defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
5890  PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>;
5891
5892//   VSHLL    : Vector Shift Left Long (with maximum shift count)
5893class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
5894                bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
5895                ValueType OpTy, Operand ImmTy>
5896  : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
5897           ResTy, OpTy, ImmTy, null_frag> {
5898  let Inst{21-16} = op21_16;
5899  let DecoderMethod = "DecodeVSHLMaxInstruction";
5900}
5901def  VSHLLi8  : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
5902                          v8i16, v8i8, imm8>;
5903def  VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
5904                          v4i32, v4i16, imm16>;
5905def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
5906                          v2i64, v2i32, imm32>;
5907
5908let Predicates = [HasNEON] in {
5909def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
5910          (VSHLLi8 DPR:$Rn, 8)>;
5911def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
5912          (VSHLLi16 DPR:$Rn, 16)>;
5913def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
5914          (VSHLLi32 DPR:$Rn, 32)>;
5915def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
5916          (VSHLLi8 DPR:$Rn, 8)>;
5917def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
5918          (VSHLLi16 DPR:$Rn, 16)>;
5919def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
5920          (VSHLLi32 DPR:$Rn, 32)>;
5921def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
5922          (VSHLLi8 DPR:$Rn, 8)>;
5923def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
5924          (VSHLLi16 DPR:$Rn, 16)>;
5925def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
5926          (VSHLLi32 DPR:$Rn, 32)>;
5927}
5928
5929//   VSHRN    : Vector Shift Right and Narrow
5930defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
5931                           PatFrag<(ops node:$Rn, node:$amt),
5932                                   (trunc (ARMvshrsImm node:$Rn, node:$amt))>>;
5933
5934let Predicates = [HasNEON] in {
5935def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
5936          (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
5937def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
5938          (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
5939def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
5940          (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
5941}
5942
5943//   VRSHL    : Vector Rounding Shift
5944defm VRSHLs   : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
5945                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5946                            "vrshl", "s", int_arm_neon_vrshifts>;
5947defm VRSHLu   : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
5948                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5949                            "vrshl", "u", int_arm_neon_vrshiftu>;
5950//   VRSHR    : Vector Rounding Shift Right
5951defm VRSHRs   : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
5952                            NEONvrshrsImm>;
5953defm VRSHRu   : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
5954                            NEONvrshruImm>;
5955
5956//   VRSHRN   : Vector Rounding Shift Right and Narrow
5957defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
5958                           NEONvrshrnImm>;
5959
5960//   VQSHL    : Vector Saturating Shift
5961defm VQSHLs   : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
5962                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5963                            "vqshl", "s", int_arm_neon_vqshifts>;
5964defm VQSHLu   : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
5965                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5966                            "vqshl", "u", int_arm_neon_vqshiftu>;
5967//   VQSHL    : Vector Saturating Shift Left (Immediate)
5968defm VQSHLsi  : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>;
5969defm VQSHLui  : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>;
5970
5971//   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
5972defm VQSHLsu  : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>;
5973
5974//   VQSHRN   : Vector Saturating Shift Right and Narrow
5975defm VQSHRNs  : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
5976                           NEONvqshrnsImm>;
5977defm VQSHRNu  : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
5978                           NEONvqshrnuImm>;
5979
5980//   VQSHRUN  : Vector Saturating Shift Right and Narrow (Unsigned)
5981defm VQSHRUN  : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
5982                           NEONvqshrnsuImm>;
5983
5984//   VQRSHL   : Vector Saturating Rounding Shift
5985defm VQRSHLs  : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
5986                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5987                            "vqrshl", "s", int_arm_neon_vqrshifts>;
5988defm VQRSHLu  : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
5989                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5990                            "vqrshl", "u", int_arm_neon_vqrshiftu>;
5991
5992//   VQRSHRN  : Vector Saturating Rounding Shift Right and Narrow
5993defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
5994                           NEONvqrshrnsImm>;
5995defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
5996                           NEONvqrshrnuImm>;
5997
5998//   VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
5999defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
6000                           NEONvqrshrnsuImm>;
6001
6002//   VSRA     : Vector Shift Right and Accumulate
6003defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>;
6004defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>;
6005//   VRSRA    : Vector Rounding Shift Right and Accumulate
6006defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>;
6007defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>;
6008
6009//   VSLI     : Vector Shift Left and Insert
6010defm VSLI     : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
6011
6012//   VSRI     : Vector Shift Right and Insert
6013defm VSRI     : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
6014
6015// Vector Absolute and Saturating Absolute.
6016
6017//   VABS     : Vector Absolute Value
6018defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
6019                           IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>;
6020def  VABSfd   : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
6021                     "vabs", "f32",
6022                     v2f32, v2f32, fabs>;
6023def  VABSfq   : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
6024                     "vabs", "f32",
6025                      v4f32, v4f32, fabs>;
6026def  VABShd   : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
6027                     "vabs", "f16",
6028                     v4f16, v4f16, fabs>,
6029                Requires<[HasNEON, HasFullFP16]>;
6030def  VABShq   : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
6031                     "vabs", "f16",
6032                      v8f16, v8f16, fabs>,
6033                Requires<[HasNEON, HasFullFP16]>;
6034
6035//   VQABS    : Vector Saturating Absolute Value
6036defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
6037                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
6038                           int_arm_neon_vqabs>;
6039
6040// Vector Negate.
6041
6042def vnegd  : PatFrag<(ops node:$in),
6043                     (sub ARMimmAllZerosD, node:$in)>;
6044def vnegq  : PatFrag<(ops node:$in),
6045                     (sub ARMimmAllZerosV, node:$in)>;
6046
6047class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
6048  : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
6049        IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
6050        [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
6051class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
6052  : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
6053        IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
6054        [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
6055
6056//   VNEG     : Vector Negate (integer)
6057def  VNEGs8d  : VNEGD<0b00, "vneg", "s8", v8i8>;
6058def  VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
6059def  VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
6060def  VNEGs8q  : VNEGQ<0b00, "vneg", "s8", v16i8>;
6061def  VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
6062def  VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
6063
6064//   VNEG     : Vector Negate (floating-point)
6065def  VNEGfd   : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
6066                    (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
6067                    "vneg", "f32", "$Vd, $Vm", "",
6068                    [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
6069def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
6070                    (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
6071                    "vneg", "f32", "$Vd, $Vm", "",
6072                    [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
6073def  VNEGhd   : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
6074                    (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
6075                    "vneg", "f16", "$Vd, $Vm", "",
6076                    [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
6077                Requires<[HasNEON, HasFullFP16]>;
6078def  VNEGhq   : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
6079                    (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
6080                    "vneg", "f16", "$Vd, $Vm", "",
6081                    [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
6082                Requires<[HasNEON, HasFullFP16]>;
6083
6084let Predicates = [HasNEON] in {
6085def : Pat<(v8i8  (vnegd  DPR:$src)), (VNEGs8d DPR:$src)>;
6086def : Pat<(v4i16 (vnegd  DPR:$src)), (VNEGs16d DPR:$src)>;
6087def : Pat<(v2i32 (vnegd  DPR:$src)), (VNEGs32d DPR:$src)>;
6088def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
6089def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
6090def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
6091}
6092
6093//   VQNEG    : Vector Saturating Negate
6094defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
6095                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
6096                           int_arm_neon_vqneg>;
6097
6098// Vector Bit Counting Operations.
6099
6100//   VCLS     : Vector Count Leading Sign Bits
6101defm VCLS     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
6102                           IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
6103                           int_arm_neon_vcls>;
6104//   VCLZ     : Vector Count Leading Zeros
6105defm VCLZ     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
6106                           IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
6107                           ctlz>;
6108//   VCNT     : Vector Count One Bits
6109def  VCNTd    : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
6110                        IIC_VCNTiD, "vcnt", "8",
6111                        v8i8, v8i8, ctpop>;
6112def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
6113                        IIC_VCNTiQ, "vcnt", "8",
6114                        v16i8, v16i8, ctpop>;
6115
6116// Vector Swap
6117def  VSWPd    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
6118                     (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
6119                     NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
6120                     []>;
6121def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
6122                     (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
6123                     NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
6124                     []>;
6125
6126// Vector Move Operations.
6127
6128//   VMOV     : Vector Move (Register)
6129def : NEONInstAlias<"vmov${p} $Vd, $Vm",
6130                    (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
6131def : NEONInstAlias<"vmov${p} $Vd, $Vm",
6132                    (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
6133
6134//   VMOV     : Vector Move (Immediate)
6135
6136// Although VMOVs are not strictly speaking cheap, they are as expensive
6137// as their copies counterpart (VORR), so we should prefer rematerialization
6138// over splitting when it applies.
6139let isReMaterializable = 1, isAsCheapAsAMove=1 in {
6140def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
6141                         (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
6142                         "vmov", "i8", "$Vd, $SIMM", "",
6143                         [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>;
6144def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
6145                         (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
6146                         "vmov", "i8", "$Vd, $SIMM", "",
6147                         [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>;
6148
6149def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
6150                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
6151                         "vmov", "i16", "$Vd, $SIMM", "",
6152                         [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> {
6153  let Inst{9} = SIMM{9};
6154}
6155
6156def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
6157                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
6158                         "vmov", "i16", "$Vd, $SIMM", "",
6159                         [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> {
6160 let Inst{9} = SIMM{9};
6161}
6162
6163def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
6164                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
6165                         "vmov", "i32", "$Vd, $SIMM", "",
6166                         [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> {
6167  let Inst{11-8} = SIMM{11-8};
6168}
6169
6170def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
6171                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
6172                         "vmov", "i32", "$Vd, $SIMM", "",
6173                         [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> {
6174  let Inst{11-8} = SIMM{11-8};
6175}
6176
6177def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
6178                         (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
6179                         "vmov", "i64", "$Vd, $SIMM", "",
6180                         [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>;
6181def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
6182                         (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
6183                         "vmov", "i64", "$Vd, $SIMM", "",
6184                         [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>;
6185
6186def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
6187                         (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
6188                         "vmov", "f32", "$Vd, $SIMM", "",
6189                         [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>;
6190def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
6191                         (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
6192                         "vmov", "f32", "$Vd, $SIMM", "",
6193                         [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>;
6194} // isReMaterializable, isAsCheapAsAMove
6195
6196// Add support for bytes replication feature, so it could be GAS compatible.
6197multiclass NEONImmReplicateI8InstAlias<ValueType To> {
6198  // E.g. instructions below:
6199  // "vmov.i32 d0, #0xffffffff"
6200  // "vmov.i32 d0, #0xabababab"
6201  // "vmov.i16 d0, #0xabab"
6202  // are incorrect, but we could deal with such cases.
6203  // For last two instructions, for example, it should emit:
6204  // "vmov.i8 d0, #0xab"
6205  def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6206                      (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
6207  def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6208                      (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
6209  // Also add same support for VMVN instructions. So instruction:
6210  // "vmvn.i32 d0, #0xabababab"
6211  // actually means:
6212  // "vmov.i8 d0, #0x54"
6213  def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6214                      (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
6215  def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6216                      (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
6217}
6218
6219defm : NEONImmReplicateI8InstAlias<i16>;
6220defm : NEONImmReplicateI8InstAlias<i32>;
6221defm : NEONImmReplicateI8InstAlias<i64>;
6222
6223// Similar to above for types other than i8, e.g.:
6224// "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00"
6225// "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000"
6226// In this case we do not canonicalize VMVN to VMOV
6227multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16,
6228                                     NeonI NV8, NeonI NV16, ValueType To> {
6229  def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6230                      (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6231  def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6232                      (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6233  def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6234                      (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6235  def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6236                      (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6237}
6238
6239defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
6240                                      VMVNv4i16, VMVNv8i16, i32>;
6241defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
6242                                      VMVNv4i16, VMVNv8i16, i64>;
6243defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32,
6244                                      VMVNv2i32, VMVNv4i32, i64>;
6245// TODO: add "VMOV <-> VMVN" conversion for cases like
6246// "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55"
6247// "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00"
6248
6249// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
6250// require zero cycles to execute so they should be used wherever possible for
6251// setting a register to zero.
6252
6253// Even without these pseudo-insts we would probably end up with the correct
6254// instruction, but we could not mark the general ones with "isAsCheapAsAMove"
6255// since they are sometimes rather expensive (in general).
6256
6257let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
6258  def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
6259                               [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))],
6260                               (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
6261               Requires<[HasZCZ]>;
6262  def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
6263                               [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))],
6264                               (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
6265               Requires<[HasZCZ]>;
6266}
6267
6268//   VMOV     : Vector Get Lane (move scalar to ARM core register)
6269
6270def VGETLNs8  : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
6271                          (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
6272                          IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
6273                          [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V),
6274                                           imm:$lane))]> {
6275  let Inst{21}  = lane{2};
6276  let Inst{6-5} = lane{1-0};
6277}
6278def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
6279                          (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
6280                          IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
6281                          [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V),
6282                                           imm:$lane))]> {
6283  let Inst{21} = lane{1};
6284  let Inst{6}  = lane{0};
6285}
6286def VGETLNu8  : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
6287                          (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
6288                          IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
6289                          [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V),
6290                                           imm:$lane))]> {
6291  let Inst{21}  = lane{2};
6292  let Inst{6-5} = lane{1-0};
6293}
6294def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
6295                          (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
6296                          IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
6297                          [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V),
6298                                           imm:$lane))]> {
6299  let Inst{21} = lane{1};
6300  let Inst{6}  = lane{0};
6301}
6302def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
6303                          (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
6304                          IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
6305                          [(set GPR:$R, (extractelt (v2i32 DPR:$V),
6306                                           imm:$lane))]>,
6307                Requires<[HasFPRegs, HasFastVGETLNi32]> {
6308  let Inst{21} = lane{0};
6309}
6310let Predicates = [HasNEON] in {
6311// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
6312def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane),
6313          (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
6314                           (DSubReg_i8_reg imm:$lane))),
6315                     (SubReg_i8_lane imm:$lane))>;
6316def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane),
6317          (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
6318                             (DSubReg_i16_reg imm:$lane))),
6319                     (SubReg_i16_lane imm:$lane))>;
6320def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane),
6321          (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
6322                           (DSubReg_i8_reg imm:$lane))),
6323                     (SubReg_i8_lane imm:$lane))>;
6324def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane),
6325          (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
6326                             (DSubReg_i16_reg imm:$lane))),
6327                     (SubReg_i16_lane imm:$lane))>;
6328}
6329def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
6330          (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
6331                             (DSubReg_i32_reg imm:$lane))),
6332                     (SubReg_i32_lane imm:$lane))>,
6333      Requires<[HasNEON, HasFastVGETLNi32]>;
6334def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
6335          (COPY_TO_REGCLASS
6336            (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
6337      Requires<[HasNEON, HasSlowVGETLNi32]>;
6338def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
6339          (COPY_TO_REGCLASS
6340            (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
6341      Requires<[HasNEON, HasSlowVGETLNi32]>;
6342let Predicates = [HasNEON] in {
6343def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
6344          (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
6345                          (SSubReg_f32_reg imm:$src2))>;
6346def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
6347          (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
6348                          (SSubReg_f32_reg imm:$src2))>;
6349//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
6350//          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
6351def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
6352          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
6353}
6354
6355multiclass ExtractEltEvenF16<ValueType VT4, ValueType VT8> {
6356  def : Pat<(extractelt (VT4 DPR:$src), imm_even:$lane),
6357              (EXTRACT_SUBREG
6358                  (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)),
6359                  (SSubReg_f16_reg imm_even:$lane))>;
6360  def : Pat<(extractelt (VT8 QPR:$src), imm_even:$lane),
6361              (EXTRACT_SUBREG
6362                  (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)),
6363                  (SSubReg_f16_reg imm_even:$lane))>;
6364}
6365
6366multiclass ExtractEltOddF16VMOVH<ValueType VT4, ValueType VT8> {
6367  def : Pat<(extractelt (VT4 DPR:$src), imm_odd:$lane),
6368            (COPY_TO_REGCLASS
6369              (VMOVH (EXTRACT_SUBREG
6370                        (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)),
6371                        (SSubReg_f16_reg imm_odd:$lane))),
6372              HPR)>;
6373  def : Pat<(extractelt (VT8 QPR:$src), imm_odd:$lane),
6374            (COPY_TO_REGCLASS
6375              (VMOVH (EXTRACT_SUBREG
6376                        (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)),
6377                        (SSubReg_f16_reg imm_odd:$lane))),
6378              HPR)>;
6379}
6380
6381let Predicates = [HasNEON] in {
6382  defm : ExtractEltEvenF16<v4f16, v8f16>;
6383  defm : ExtractEltOddF16VMOVH<v4f16, v8f16>;
6384}
6385
6386let AddedComplexity = 1, Predicates = [HasNEON, HasBF16, HasFullFP16] in {
6387  // If VMOVH (vmovx.f16) is available use it to extract BF16 from the odd lanes
6388  defm : ExtractEltOddF16VMOVH<v4bf16, v8bf16>;
6389}
6390
6391let Predicates = [HasBF16, HasNEON] in {
6392  defm : ExtractEltEvenF16<v4bf16, v8bf16>;
6393
6394  // Otherwise, if VMOVH is not available resort to extracting the odd lane
6395  // into a GPR and then moving to HPR
6396  def : Pat<(extractelt (v4bf16 DPR:$src), imm_odd:$lane),
6397            (COPY_TO_REGCLASS
6398              (VGETLNu16 (v4bf16 DPR:$src), imm:$lane),
6399              HPR)>;
6400
6401  def : Pat<(extractelt (v8bf16 QPR:$src), imm_odd:$lane),
6402            (COPY_TO_REGCLASS
6403              (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
6404                                                (DSubReg_i16_reg imm:$lane))),
6405                         (SubReg_i16_lane imm:$lane)),
6406              HPR)>;
6407}
6408
6409//   VMOV     : Vector Set Lane (move ARM core register to scalar)
6410
6411let Constraints = "$src1 = $V" in {
6412def VSETLNi8  : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
6413                          (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
6414                          IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
6415                          [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
6416                                           GPR:$R, imm:$lane))]> {
6417  let Inst{21}  = lane{2};
6418  let Inst{6-5} = lane{1-0};
6419}
6420def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
6421                          (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
6422                          IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
6423                          [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
6424                                           GPR:$R, imm:$lane))]> {
6425  let Inst{21} = lane{1};
6426  let Inst{6}  = lane{0};
6427}
6428def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
6429                          (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
6430                          IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
6431                          [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
6432                                           GPR:$R, imm:$lane))]>,
6433                Requires<[HasVFP2]> {
6434  let Inst{21} = lane{0};
6435  // This instruction is equivalent as
6436  // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm)
6437  let isInsertSubreg = 1;
6438}
6439}
6440
6441// TODO: for odd lanes we could optimize this a bit by using the VINS
6442// FullFP16 instruction when it is available
6443multiclass InsertEltF16<ValueType VTScalar, ValueType VT4, ValueType VT8> {
6444  def : Pat<(insertelt (VT4 DPR:$src1), (VTScalar HPR:$src2), imm:$lane),
6445            (VT4 (VSETLNi16 DPR:$src1,
6446                 (COPY_TO_REGCLASS HPR:$src2, GPR), imm:$lane))>;
6447  def : Pat<(insertelt (VT8 QPR:$src1), (VTScalar HPR:$src2), imm:$lane),
6448            (VT8 (INSERT_SUBREG QPR:$src1,
6449                    (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
6450                                        (DSubReg_i16_reg imm:$lane))),
6451                              (COPY_TO_REGCLASS HPR:$src2, GPR),
6452                              (SubReg_i16_lane imm:$lane))),
6453                    (DSubReg_i16_reg imm:$lane)))>;
6454}
6455
6456let Predicates = [HasNEON] in {
6457def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
6458          (v16i8 (INSERT_SUBREG QPR:$src1,
6459                  (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
6460                                   (DSubReg_i8_reg imm:$lane))),
6461                            GPR:$src2, (SubReg_i8_lane imm:$lane))),
6462                  (DSubReg_i8_reg imm:$lane)))>;
6463def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
6464          (v8i16 (INSERT_SUBREG QPR:$src1,
6465                  (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
6466                                     (DSubReg_i16_reg imm:$lane))),
6467                             GPR:$src2, (SubReg_i16_lane imm:$lane))),
6468                  (DSubReg_i16_reg imm:$lane)))>;
6469def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
6470          (v4i32 (INSERT_SUBREG QPR:$src1,
6471                  (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
6472                                     (DSubReg_i32_reg imm:$lane))),
6473                             GPR:$src2, (SubReg_i32_lane imm:$lane))),
6474                  (DSubReg_i32_reg imm:$lane)))>;
6475
6476def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
6477          (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
6478                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
6479def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
6480          (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
6481                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
6482
6483defm : InsertEltF16<f16, v4f16, v8f16>;
6484
6485//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
6486//          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
6487def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
6488          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
6489
6490def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
6491          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
6492def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
6493          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
6494def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
6495          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
6496
6497def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
6498          (VSETLNi8  (v8i8  (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6499def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
6500          (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6501def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
6502          (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6503
6504def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
6505          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6506                         (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6507                         dsub_0)>;
6508def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
6509          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6510                         (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6511                         dsub_0)>;
6512def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
6513          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6514                         (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6515                         dsub_0)>;
6516}
6517
6518let Predicates = [HasNEON, HasBF16] in
6519defm : InsertEltF16<bf16, v4bf16, v8bf16>;
6520
6521//   VDUP     : Vector Duplicate (from ARM core register to all elements)
6522
6523class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
6524  : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
6525          IIC_VMOVIS, "vdup", Dt, "$V, $R",
6526          [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
6527class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
6528  : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
6529          IIC_VMOVIS, "vdup", Dt, "$V, $R",
6530          [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
6531
6532def  VDUP8d   : VDUPD<0b11101100, 0b00, "8", v8i8>;
6533def  VDUP16d  : VDUPD<0b11101000, 0b01, "16", v4i16>;
6534def  VDUP32d  : VDUPD<0b11101000, 0b00, "32", v2i32>,
6535                Requires<[HasNEON, HasFastVDUP32]>;
6536def  VDUP8q   : VDUPQ<0b11101110, 0b00, "8", v16i8>;
6537def  VDUP16q  : VDUPQ<0b11101010, 0b01, "16", v8i16>;
6538def  VDUP32q  : VDUPQ<0b11101010, 0b00, "32", v4i32>;
6539
6540// ARMvdup patterns for uarchs with fast VDUP.32.
6541def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
6542      Requires<[HasNEON,HasFastVDUP32]>;
6543def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>,
6544      Requires<[HasNEON]>;
6545
6546// ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
6547def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
6548      Requires<[HasNEON,HasSlowVDUP32]>;
6549def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
6550      Requires<[HasNEON,HasSlowVDUP32]>;
6551
6552//   VDUP     : Vector Duplicate Lane (from scalar to all elements)
6553
6554class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
6555              ValueType Ty, Operand IdxTy>
6556  : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6557              IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
6558              [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>;
6559
6560class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
6561              ValueType ResTy, ValueType OpTy, Operand IdxTy>
6562  : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6563              IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
6564              [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm),
6565                                      VectorIndex32:$lane)))]>;
6566
6567// Inst{19-16} is partially specified depending on the element size.
6568
6569def VDUPLN8d  : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
6570  bits<3> lane;
6571  let Inst{19-17} = lane{2-0};
6572}
6573def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
6574  bits<2> lane;
6575  let Inst{19-18} = lane{1-0};
6576}
6577def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
6578  bits<1> lane;
6579  let Inst{19} = lane{0};
6580}
6581def VDUPLN8q  : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
6582  bits<3> lane;
6583  let Inst{19-17} = lane{2-0};
6584}
6585def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
6586  bits<2> lane;
6587  let Inst{19-18} = lane{1-0};
6588}
6589def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
6590  bits<1> lane;
6591  let Inst{19} = lane{0};
6592}
6593
6594let Predicates = [HasNEON] in {
6595def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)),
6596          (VDUPLN32d DPR:$Vm, imm:$lane)>;
6597
6598def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
6599          (VDUPLN32d DPR:$Vm, imm:$lane)>;
6600
6601def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
6602          (VDUPLN32q DPR:$Vm, imm:$lane)>;
6603
6604def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)),
6605          (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
6606                                  (DSubReg_i8_reg imm:$lane))),
6607                           (SubReg_i8_lane imm:$lane)))>;
6608def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)),
6609          (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
6610                                    (DSubReg_i16_reg imm:$lane))),
6611                            (SubReg_i16_lane imm:$lane)))>;
6612def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)),
6613          (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src,
6614                                    (DSubReg_i16_reg imm:$lane))),
6615                            (SubReg_i16_lane imm:$lane)))>;
6616def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)),
6617          (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
6618                                    (DSubReg_i32_reg imm:$lane))),
6619                            (SubReg_i32_lane imm:$lane)))>;
6620def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)),
6621          (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
6622                                   (DSubReg_i32_reg imm:$lane))),
6623                           (SubReg_i32_lane imm:$lane)))>;
6624
6625def : Pat<(v4f16 (ARMvdup (f16 HPR:$src))),
6626          (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
6627                             (f16 HPR:$src), ssub_0), (i32 0)))>;
6628def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))),
6629          (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6630                             SPR:$src, ssub_0), (i32 0)))>;
6631def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))),
6632          (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6633                             SPR:$src, ssub_0), (i32 0)))>;
6634def : Pat<(v8f16 (ARMvdup (f16 HPR:$src))),
6635          (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
6636                             (f16 HPR:$src), ssub_0), (i32 0)))>;
6637}
6638
6639let Predicates = [HasNEON, HasBF16] in {
6640def : Pat<(v4bf16 (ARMvduplane (v4bf16 DPR:$Vm), imm:$lane)),
6641          (VDUPLN16d DPR:$Vm, imm:$lane)>;
6642
6643def : Pat<(v8bf16 (ARMvduplane (v8bf16 QPR:$src), imm:$lane)),
6644          (v8bf16 (VDUPLN16q (v4bf16 (EXTRACT_SUBREG QPR:$src,
6645                                    (DSubReg_i16_reg imm:$lane))),
6646                            (SubReg_i16_lane imm:$lane)))>;
6647
6648def : Pat<(v4bf16 (ARMvdup (bf16 HPR:$src))),
6649          (v4bf16 (VDUPLN16d (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)),
6650                             (bf16 HPR:$src), ssub_0), (i32 0)))>;
6651def : Pat<(v8bf16 (ARMvdup (bf16 HPR:$src))),
6652          (v8bf16 (VDUPLN16q (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)),
6653                             (bf16 HPR:$src), ssub_0), (i32 0)))>;
6654}
6655
6656//   VMOVN    : Vector Narrowing Move
6657defm VMOVN    : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
6658                         "vmovn", "i", trunc>;
6659//   VQMOVN   : Vector Saturating Narrowing Move
6660defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
6661                            "vqmovn", "s", int_arm_neon_vqmovns>;
6662defm VQMOVNu  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
6663                            "vqmovn", "u", int_arm_neon_vqmovnu>;
6664defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
6665                            "vqmovun", "s", int_arm_neon_vqmovnsu>;
6666//   VMOVL    : Vector Lengthening Move
6667defm VMOVLs   : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
6668defm VMOVLu   : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
6669
6670let Predicates = [HasNEON] in {
6671def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
6672def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
6673def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
6674}
6675
6676// Vector Conversions.
6677
6678//   VCVT     : Vector Convert Between Floating-Point and Integers
6679def  VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
6680                     v2i32, v2f32, fp_to_sint>;
6681def  VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
6682                     v2i32, v2f32, fp_to_uint>;
6683def  VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
6684                     v2f32, v2i32, sint_to_fp>;
6685def  VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
6686                     v2f32, v2i32, uint_to_fp>;
6687
6688def  VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
6689                     v4i32, v4f32, fp_to_sint>;
6690def  VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
6691                     v4i32, v4f32, fp_to_uint>;
6692def  VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
6693                     v4f32, v4i32, sint_to_fp>;
6694def  VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
6695                     v4f32, v4i32, uint_to_fp>;
6696
6697def  VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
6698                     v4i16, v4f16, fp_to_sint>,
6699                Requires<[HasNEON, HasFullFP16]>;
6700def  VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
6701                     v4i16, v4f16, fp_to_uint>,
6702                Requires<[HasNEON, HasFullFP16]>;
6703def  VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
6704                     v4f16, v4i16, sint_to_fp>,
6705                Requires<[HasNEON, HasFullFP16]>;
6706def  VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
6707                     v4f16, v4i16, uint_to_fp>,
6708                Requires<[HasNEON, HasFullFP16]>;
6709
6710def  VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
6711                     v8i16, v8f16, fp_to_sint>,
6712                Requires<[HasNEON, HasFullFP16]>;
6713def  VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
6714                     v8i16, v8f16, fp_to_uint>,
6715                Requires<[HasNEON, HasFullFP16]>;
6716def  VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
6717                     v8f16, v8i16, sint_to_fp>,
6718                Requires<[HasNEON, HasFullFP16]>;
6719def  VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
6720                     v8f16, v8i16, uint_to_fp>,
6721                Requires<[HasNEON, HasFullFP16]>;
6722
6723// VCVT{A, N, P, M}
6724multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
6725                    SDPatternOperator IntU> {
6726  let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
6727    def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6728                       "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
6729    def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6730                       "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
6731    def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6732                       "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
6733    def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6734                       "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
6735    def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6736                       "s16.f16", v4i16, v4f16, IntS>,
6737              Requires<[HasV8, HasNEON, HasFullFP16]>;
6738    def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6739                       "s16.f16", v8i16, v8f16, IntS>,
6740              Requires<[HasV8, HasNEON, HasFullFP16]>;
6741    def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6742                       "u16.f16", v4i16, v4f16, IntU>,
6743              Requires<[HasV8, HasNEON, HasFullFP16]>;
6744    def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6745                       "u16.f16", v8i16, v8f16, IntU>,
6746              Requires<[HasV8, HasNEON, HasFullFP16]>;
6747  }
6748}
6749
6750defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>;
6751defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>;
6752defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>;
6753defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>;
6754
6755//   VCVT     : Vector Convert Between Floating-Point and Fixed-Point.
6756let DecoderMethod = "DecodeVCVTD" in {
6757def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
6758                        v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
6759def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
6760                        v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
6761def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
6762                        v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
6763def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
6764                        v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
6765let Predicates = [HasNEON, HasFullFP16] in {
6766def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
6767                        v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
6768def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
6769                        v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
6770def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
6771                        v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
6772def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
6773                        v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
6774} // Predicates = [HasNEON, HasFullFP16]
6775}
6776
6777let DecoderMethod = "DecodeVCVTQ" in {
6778def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
6779                        v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
6780def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
6781                        v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
6782def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
6783                        v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
6784def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
6785                        v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
6786let Predicates = [HasNEON, HasFullFP16] in {
6787def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
6788                        v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
6789def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
6790                        v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
6791def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
6792                        v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
6793def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
6794                        v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
6795} // Predicates = [HasNEON, HasFullFP16]
6796}
6797
6798def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
6799                    (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
6800def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
6801                    (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
6802def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
6803                    (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
6804def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
6805                    (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
6806
6807def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
6808                    (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
6809def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
6810                    (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
6811def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
6812                    (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
6813def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
6814                    (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
6815
6816def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
6817                    (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
6818def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
6819                    (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
6820def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
6821                    (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
6822def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
6823                    (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
6824
6825def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
6826                    (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
6827def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
6828                    (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
6829def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
6830                    (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
6831def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
6832                    (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
6833
6834
6835//   VCVT     : Vector Convert Between Half-Precision and Single-Precision.
6836def  VCVTf2h  : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
6837                        IIC_VUNAQ, "vcvt", "f16.f32",
6838                        v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
6839                Requires<[HasNEON, HasFP16]>;
6840def  VCVTh2f  : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
6841                        IIC_VUNAQ, "vcvt", "f32.f16",
6842                        v4f32, v4i16, int_arm_neon_vcvthf2fp>,
6843                Requires<[HasNEON, HasFP16]>;
6844
6845// Vector Reverse.
6846
6847//   VREV64   : Vector Reverse elements within 64-bit doublewords
6848
6849class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6850  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
6851        (ins DPR:$Vm), IIC_VMOVD,
6852        OpcodeStr, Dt, "$Vd, $Vm", "",
6853        [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>;
6854class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6855  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
6856        (ins QPR:$Vm), IIC_VMOVQ,
6857        OpcodeStr, Dt, "$Vd, $Vm", "",
6858        [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>;
6859
6860def VREV64d8  : VREV64D<0b00, "vrev64", "8", v8i8>;
6861def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
6862def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
6863let Predicates = [HasNEON] in {
6864def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
6865}
6866
6867def VREV64q8  : VREV64Q<0b00, "vrev64", "8", v16i8>;
6868def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
6869def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
6870
6871let Predicates = [HasNEON] in {
6872  def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))),
6873            (VREV64q32 QPR:$Vm)>;
6874  def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))),
6875            (VREV64q16 QPR:$Vm)>;
6876  def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))),
6877            (VREV64d16 DPR:$Vm)>;
6878}
6879
6880//   VREV32   : Vector Reverse elements within 32-bit words
6881
6882class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6883  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
6884        (ins DPR:$Vm), IIC_VMOVD,
6885        OpcodeStr, Dt, "$Vd, $Vm", "",
6886        [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>;
6887class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6888  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
6889        (ins QPR:$Vm), IIC_VMOVQ,
6890        OpcodeStr, Dt, "$Vd, $Vm", "",
6891        [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>;
6892
6893def VREV32d8  : VREV32D<0b00, "vrev32", "8", v8i8>;
6894def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
6895
6896def VREV32q8  : VREV32Q<0b00, "vrev32", "8", v16i8>;
6897def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
6898
6899let Predicates = [HasNEON] in {
6900  def : Pat<(v8f16 (ARMvrev32 (v8f16 QPR:$Vm))),
6901            (VREV32q16 QPR:$Vm)>;
6902  def : Pat<(v4f16 (ARMvrev32 (v4f16 DPR:$Vm))),
6903            (VREV32d16 DPR:$Vm)>;
6904}
6905
6906//   VREV16   : Vector Reverse elements within 16-bit halfwords
6907
6908class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6909  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
6910        (ins DPR:$Vm), IIC_VMOVD,
6911        OpcodeStr, Dt, "$Vd, $Vm", "",
6912        [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>;
6913class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6914  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
6915        (ins QPR:$Vm), IIC_VMOVQ,
6916        OpcodeStr, Dt, "$Vd, $Vm", "",
6917        [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>;
6918
6919def VREV16d8  : VREV16D<0b00, "vrev16", "8", v8i8>;
6920def VREV16q8  : VREV16Q<0b00, "vrev16", "8", v16i8>;
6921
6922// Other Vector Shuffles.
6923
6924//  Aligned extractions: really just dropping registers
6925
6926class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
6927      : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
6928             (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>,
6929        Requires<[HasNEON]>;
6930
6931def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
6932
6933def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
6934
6935def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
6936
6937def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
6938
6939def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
6940
6941def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; // v8f16 -> v4f16
6942
6943//   VEXT     : Vector Extract
6944
6945
6946// All of these have a two-operand InstAlias.
6947let TwoOperandAliasConstraint = "$Vn = $Vd" in {
6948class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6949  : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
6950        (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
6951        IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6952        [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
6953                                     (Ty DPR:$Vm), imm:$index)))]> {
6954  bits<3> index;
6955  let Inst{11} = 0b0;
6956  let Inst{10-8} = index{2-0};
6957}
6958
6959class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6960  : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
6961        (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
6962        IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6963        [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
6964                                     (Ty QPR:$Vm), imm:$index)))]> {
6965  bits<4> index;
6966  let Inst{11-8} = index{3-0};
6967}
6968}
6969
6970def VEXTd8  : VEXTd<"vext", "8",  v8i8, imm0_7> {
6971  let Inst{10-8} = index{2-0};
6972}
6973def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
6974  let Inst{10-9} = index{1-0};
6975  let Inst{8}    = 0b0;
6976}
6977let Predicates = [HasNEON] in {
6978def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))),
6979          (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>;
6980}
6981
6982def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
6983  let Inst{10}     = index{0};
6984  let Inst{9-8}    = 0b00;
6985}
6986let Predicates = [HasNEON] in {
6987def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))),
6988          (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
6989}
6990
6991def VEXTq8  : VEXTq<"vext", "8",  v16i8, imm0_15> {
6992  let Inst{11-8} = index{3-0};
6993}
6994def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
6995  let Inst{11-9} = index{2-0};
6996  let Inst{8}    = 0b0;
6997}
6998let Predicates = [HasNEON] in {
6999def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))),
7000          (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>;
7001}
7002
7003def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
7004  let Inst{11-10} = index{1-0};
7005  let Inst{9-8}    = 0b00;
7006}
7007def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
7008  let Inst{11} = index{0};
7009  let Inst{10-8}    = 0b000;
7010}
7011let Predicates = [HasNEON] in {
7012def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))),
7013          (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
7014}
7015
7016//   VTRN     : Vector Transpose
7017
7018def  VTRNd8   : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
7019def  VTRNd16  : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
7020def  VTRNd32  : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
7021
7022def  VTRNq8   : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
7023def  VTRNq16  : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
7024def  VTRNq32  : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
7025
7026//   VUZP     : Vector Unzip (Deinterleave)
7027
7028def  VUZPd8   : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
7029def  VUZPd16  : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
7030// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
7031def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
7032                    (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
7033
7034def  VUZPq8   : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
7035def  VUZPq16  : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
7036def  VUZPq32  : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
7037
7038//   VZIP     : Vector Zip (Interleave)
7039
7040def  VZIPd8   : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
7041def  VZIPd16  : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
7042// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
7043def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
7044                    (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
7045
7046def  VZIPq8   : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
7047def  VZIPq16  : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
7048def  VZIPq32  : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
7049
7050// Vector Table Lookup and Table Extension.
7051
7052//   VTBL     : Vector Table Lookup
7053let DecoderMethod = "DecodeTBLInstruction" in {
7054def  VTBL1
7055  : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
7056        (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
7057        "vtbl", "8", "$Vd, $Vn, $Vm", "",
7058        [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
7059
7060let hasExtraSrcRegAllocReq = 1 in {
7061def  VTBL2
7062  : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
7063        (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
7064        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
7065def  VTBL3
7066  : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
7067        (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
7068        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
7069def  VTBL4
7070  : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
7071        (ins VecListFourD:$Vn, DPR:$Vm),
7072        NVTBLFrm, IIC_VTB4,
7073        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
7074} // hasExtraSrcRegAllocReq = 1
7075
7076def  VTBL3Pseudo
7077  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
7078def  VTBL4Pseudo
7079  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
7080
7081//   VTBX     : Vector Table Extension
7082def  VTBX1
7083  : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
7084        (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
7085        "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
7086        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
7087                               DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
7088let hasExtraSrcRegAllocReq = 1 in {
7089def  VTBX2
7090  : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
7091        (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
7092        "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
7093def  VTBX3
7094  : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
7095        (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
7096        NVTBLFrm, IIC_VTBX3,
7097        "vtbx", "8", "$Vd, $Vn, $Vm",
7098        "$orig = $Vd", []>;
7099def  VTBX4
7100  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
7101        (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
7102        "vtbx", "8", "$Vd, $Vn, $Vm",
7103        "$orig = $Vd", []>;
7104} // hasExtraSrcRegAllocReq = 1
7105
7106def  VTBX3Pseudo
7107  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
7108                IIC_VTBX3, "$orig = $dst", []>;
7109def  VTBX4Pseudo
7110  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
7111                IIC_VTBX4, "$orig = $dst", []>;
7112} // DecoderMethod = "DecodeTBLInstruction"
7113
7114let Predicates = [HasNEON] in {
7115def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
7116          (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
7117                                            v8i8:$Vn1, dsub_1),
7118                       v8i8:$Vm))>;
7119def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
7120                                    v8i8:$Vm)),
7121          (v8i8 (VTBX2 v8i8:$orig,
7122                       (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
7123                                            v8i8:$Vn1, dsub_1),
7124                       v8i8:$Vm))>;
7125
7126def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1,
7127                                    v8i8:$Vn2, v8i8:$Vm)),
7128          (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7129                                                 v8i8:$Vn1, dsub_1,
7130                                                 v8i8:$Vn2, dsub_2,
7131                                                 (v8i8 (IMPLICIT_DEF)), dsub_3),
7132                             v8i8:$Vm))>;
7133def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
7134                                    v8i8:$Vn2, v8i8:$Vm)),
7135          (v8i8 (VTBX3Pseudo v8i8:$orig,
7136                             (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7137                                                 v8i8:$Vn1, dsub_1,
7138                                                 v8i8:$Vn2, dsub_2,
7139                                                 (v8i8 (IMPLICIT_DEF)), dsub_3),
7140                             v8i8:$Vm))>;
7141
7142def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1,
7143                                    v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
7144          (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7145                                                 v8i8:$Vn1, dsub_1,
7146                                                 v8i8:$Vn2, dsub_2,
7147                                                 v8i8:$Vn3, dsub_3),
7148                             v8i8:$Vm))>;
7149def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
7150                                    v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
7151          (v8i8 (VTBX4Pseudo v8i8:$orig,
7152                             (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7153                                                 v8i8:$Vn1, dsub_1,
7154                                                 v8i8:$Vn2, dsub_2,
7155                                                 v8i8:$Vn3, dsub_3),
7156                             v8i8:$Vm))>;
7157}
7158
7159// VRINT      : Vector Rounding
7160multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
7161  let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
7162    def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
7163                      !strconcat("vrint", op), "f32",
7164                      v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
7165      let Inst{9-7} = op9_7;
7166    }
7167    def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
7168                      !strconcat("vrint", op), "f32",
7169                      v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
7170      let Inst{9-7} = op9_7;
7171    }
7172    def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
7173                      !strconcat("vrint", op), "f16",
7174                      v4f16, v4f16, Int>,
7175             Requires<[HasV8, HasNEON, HasFullFP16]> {
7176      let Inst{9-7} = op9_7;
7177    }
7178    def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
7179                      !strconcat("vrint", op), "f16",
7180                      v8f16, v8f16, Int>,
7181             Requires<[HasV8, HasNEON, HasFullFP16]> {
7182      let Inst{9-7} = op9_7;
7183    }
7184  }
7185
7186  def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
7187                  (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
7188  def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
7189                  (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
7190  let Predicates = [HasNEON, HasFullFP16] in {
7191  def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
7192                  (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
7193  def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
7194                  (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
7195  }
7196}
7197
7198defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
7199defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
7200defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
7201defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
7202defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
7203defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
7204
7205// Cryptography instructions
7206let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
7207    DecoderNamespace = "v8Crypto", hasSideEffects = 0 in {
7208  class AES<string op, bit op7, bit op6, SDPatternOperator Int>
7209    : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
7210                 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
7211      Requires<[HasV8, HasCrypto]>;
7212  class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
7213    : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
7214                 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
7215      Requires<[HasV8, HasCrypto]>;
7216  class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
7217              SDPatternOperator Int>
7218    : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
7219                 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
7220      Requires<[HasV8, HasCrypto]>;
7221  class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
7222              SDPatternOperator Int>
7223    : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
7224                 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
7225      Requires<[HasV8, HasCrypto]>;
7226  class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
7227    : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
7228                !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
7229      Requires<[HasV8, HasCrypto]>;
7230}
7231
7232def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
7233def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
7234def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
7235def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
7236
7237def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>;
7238def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
7239def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
7240def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>;
7241def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>;
7242def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>;
7243def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
7244def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
7245def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
7246def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
7247
7248let Predicates = [HasNEON] in {
7249def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
7250          (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG
7251              (SHA1H (SUBREG_TO_REG (i64 0),
7252                                    (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)),
7253                                    ssub_0)),
7254              ssub_0)), GPR)>;
7255
7256def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7257          (SHA1C v4i32:$hash_abcd,
7258                 (SUBREG_TO_REG (i64 0),
7259                                (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7260                                ssub_0),
7261                 v4i32:$wk)>;
7262
7263def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7264          (SHA1M v4i32:$hash_abcd,
7265                 (SUBREG_TO_REG (i64 0),
7266                                (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7267                                ssub_0),
7268                 v4i32:$wk)>;
7269
7270def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7271          (SHA1P v4i32:$hash_abcd,
7272                 (SUBREG_TO_REG (i64 0),
7273                                (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7274                                ssub_0),
7275                 v4i32:$wk)>;
7276}
7277
7278//===----------------------------------------------------------------------===//
7279// NEON instructions for single-precision FP math
7280//===----------------------------------------------------------------------===//
7281
7282class N2VSPat<SDNode OpNode, NeonI Inst>
7283  : NEONFPPat<(f32 (OpNode SPR:$a)),
7284              (EXTRACT_SUBREG
7285               (v2f32 (COPY_TO_REGCLASS (Inst
7286                (INSERT_SUBREG
7287                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7288                 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
7289
7290class N3VSPat<SDNode OpNode, NeonI Inst>
7291  : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
7292              (EXTRACT_SUBREG
7293               (v2f32 (COPY_TO_REGCLASS (Inst
7294                (INSERT_SUBREG
7295                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7296                 SPR:$a, ssub_0),
7297                (INSERT_SUBREG
7298                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7299                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7300
7301class N3VSPatFP16<SDNode OpNode, NeonI Inst>
7302  : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)),
7303              (EXTRACT_SUBREG
7304               (v4f16 (COPY_TO_REGCLASS (Inst
7305                (INSERT_SUBREG
7306                 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
7307                 HPR:$a, ssub_0),
7308                (INSERT_SUBREG
7309                 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
7310                 HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7311
7312class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
7313  : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
7314              (EXTRACT_SUBREG
7315               (v2f32 (COPY_TO_REGCLASS (Inst
7316                (INSERT_SUBREG
7317                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7318                 SPR:$acc, ssub_0),
7319                (INSERT_SUBREG
7320                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7321                 SPR:$a, ssub_0),
7322                (INSERT_SUBREG
7323                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7324                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7325
7326class NVCVTIFPat<SDNode OpNode, NeonI Inst>
7327  : NEONFPPat<(f32 (OpNode GPR:$a)),
7328              (f32 (EXTRACT_SUBREG
7329                     (v2f32 (Inst
7330                       (INSERT_SUBREG
7331                         (v2f32 (IMPLICIT_DEF)),
7332                         (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
7333                     ssub_0))>;
7334class NVCVTFIPat<SDNode OpNode, NeonI Inst>
7335  : NEONFPPat<(i32 (OpNode SPR:$a)),
7336              (i32 (EXTRACT_SUBREG
7337                     (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
7338                                                 SPR:$a, ssub_0))),
7339                     ssub_0))>;
7340
7341def : N3VSPat<fadd, VADDfd>;
7342def : N3VSPat<fsub, VSUBfd>;
7343def : N3VSPat<fmul, VMULfd>;
7344def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
7345      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
7346def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
7347      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
7348def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
7349      Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
7350def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
7351      Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
7352def : N2VSPat<fabs, VABSfd>;
7353def : N2VSPat<fneg, VNEGfd>;
7354def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>;
7355def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>;
7356def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>;
7357def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>;
7358def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
7359def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
7360def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
7361def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
7362
7363// NEON doesn't have any f64 conversions, so provide patterns to make
7364// sure the VFP conversions match when extracting from a vector.
7365def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
7366             (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7367def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
7368             (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7369def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
7370             (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7371def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
7372             (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7373
7374
7375// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
7376def : Pat<(f32 (bitconvert GPR:$a)),
7377          (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
7378        Requires<[HasNEON, DontUseVMOVSR]>;
7379def : Pat<(arm_vmovsr GPR:$a),
7380          (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
7381        Requires<[HasNEON, DontUseVMOVSR]>;
7382
7383//===----------------------------------------------------------------------===//
7384// Non-Instruction Patterns or Endianess - Revert Patterns
7385//===----------------------------------------------------------------------===//
7386
7387// bit_convert
7388// 64 bit conversions
7389let Predicates = [HasNEON] in {
7390def : Pat<(f64   (bitconvert (v1i64 DPR:$src))), (f64   DPR:$src)>;
7391def : Pat<(v1i64 (bitconvert (f64   DPR:$src))), (v1i64 DPR:$src)>;
7392
7393def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
7394def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
7395
7396def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16  DPR:$src)>;
7397def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16  DPR:$src)>;
7398
7399def : Pat<(v4i16 (bitconvert (v4bf16 DPR:$src))), (v4i16  DPR:$src)>;
7400def : Pat<(v4bf16 (bitconvert (v4i16 DPR:$src))), (v4bf16  DPR:$src)>;
7401
7402// 128 bit conversions
7403def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
7404def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
7405
7406def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
7407def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
7408
7409def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16  QPR:$src)>;
7410def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16  QPR:$src)>;
7411
7412def : Pat<(v8i16 (bitconvert (v8bf16 QPR:$src))), (v8i16  QPR:$src)>;
7413def : Pat<(v8bf16 (bitconvert (v8i16 QPR:$src))), (v8bf16  QPR:$src)>;
7414}
7415
7416let Predicates = [IsLE,HasNEON] in {
7417  // 64 bit conversions
7418  def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (f64   DPR:$src)>;
7419  def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (f64   DPR:$src)>;
7420  def : Pat<(f64   (bitconvert (v4f16 DPR:$src))), (f64   DPR:$src)>;
7421  def : Pat<(f64   (bitconvert (v4bf16 DPR:$src))), (f64   DPR:$src)>;
7422  def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (f64   DPR:$src)>;
7423  def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (f64   DPR:$src)>;
7424
7425  def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
7426  def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
7427  def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>;
7428  def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (v1i64 DPR:$src)>;
7429  def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
7430  def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (v1i64 DPR:$src)>;
7431
7432  def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (v2f32 DPR:$src)>;
7433  def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
7434  def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>;
7435  def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (v2f32 DPR:$src)>;
7436  def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
7437  def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (v2f32 DPR:$src)>;
7438
7439  def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (v2i32 DPR:$src)>;
7440  def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
7441  def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>;
7442  def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (v2i32 DPR:$src)>;
7443  def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
7444  def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (v2i32 DPR:$src)>;
7445
7446  def : Pat<(v4f16 (bitconvert (f64   DPR:$src))), (v4f16 DPR:$src)>;
7447  def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>;
7448  def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>;
7449  def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>;
7450  def : Pat<(v4f16 (bitconvert (v8i8  DPR:$src))), (v4f16 DPR:$src)>;
7451
7452  def : Pat<(v4bf16 (bitconvert (f64   DPR:$src))), (v4bf16 DPR:$src)>;
7453  def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (v4bf16 DPR:$src)>;
7454  def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (v4bf16 DPR:$src)>;
7455  def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (v4bf16 DPR:$src)>;
7456  def : Pat<(v4bf16 (bitconvert (v8i8  DPR:$src))), (v4bf16 DPR:$src)>;
7457
7458  def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (v4i16 DPR:$src)>;
7459  def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
7460  def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
7461  def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
7462  def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (v4i16 DPR:$src)>;
7463
7464  def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (v8i8  DPR:$src)>;
7465  def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (v8i8  DPR:$src)>;
7466  def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (v8i8  DPR:$src)>;
7467  def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (v8i8  DPR:$src)>;
7468  def : Pat<(v8i8  (bitconvert (v4f16 DPR:$src))), (v8i8  DPR:$src)>;
7469  def : Pat<(v8i8  (bitconvert (v4bf16 DPR:$src))), (v8i8  DPR:$src)>;
7470  def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (v8i8  DPR:$src)>;
7471
7472  // 128 bit conversions
7473  def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
7474  def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
7475  def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
7476  def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (v2f64 QPR:$src)>;
7477  def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
7478  def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
7479
7480  def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
7481  def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
7482  def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>;
7483  def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (v2i64 QPR:$src)>;
7484  def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
7485  def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
7486
7487  def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
7488  def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
7489  def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>;
7490  def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (v4f32 QPR:$src)>;
7491  def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
7492  def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
7493
7494  def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
7495  def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
7496  def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>;
7497  def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (v4i32 QPR:$src)>;
7498  def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
7499  def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
7500
7501  def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
7502  def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>;
7503  def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>;
7504  def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>;
7505  def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>;
7506
7507  def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (v8bf16 QPR:$src)>;
7508  def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (v8bf16 QPR:$src)>;
7509  def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (v8bf16 QPR:$src)>;
7510  def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (v8bf16 QPR:$src)>;
7511  def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (v8bf16 QPR:$src)>;
7512
7513  def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
7514  def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
7515  def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
7516  def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
7517  def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
7518
7519  def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
7520  def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
7521  def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
7522  def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
7523  def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>;
7524  def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (v16i8 QPR:$src)>;
7525  def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
7526}
7527
7528let Predicates = [IsBE,HasNEON] in {
7529  // 64 bit conversions
7530  def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
7531  def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
7532  def : Pat<(f64   (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
7533  def : Pat<(f64   (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>;
7534  def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
7535  def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (VREV64d8  DPR:$src)>;
7536
7537  def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
7538  def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
7539  def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
7540  def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>;
7541  def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
7542  def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (VREV64d8  DPR:$src)>;
7543
7544  def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (VREV64d32 DPR:$src)>;
7545  def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
7546  def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
7547  def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>;
7548  def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
7549  def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (VREV32d8  DPR:$src)>;
7550
7551  def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (VREV64d32 DPR:$src)>;
7552  def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
7553  def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
7554  def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>;
7555  def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
7556  def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (VREV32d8  DPR:$src)>;
7557
7558  def : Pat<(v4f16 (bitconvert (f64   DPR:$src))), (VREV64d16 DPR:$src)>;
7559  def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
7560  def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
7561  def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
7562  def : Pat<(v4f16 (bitconvert (v8i8  DPR:$src))), (VREV16d8  DPR:$src)>;
7563
7564  def : Pat<(v4bf16 (bitconvert (f64   DPR:$src))), (VREV64d16 DPR:$src)>;
7565  def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
7566  def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
7567  def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
7568  def : Pat<(v4bf16 (bitconvert (v8i8  DPR:$src))), (VREV16d8  DPR:$src)>;
7569
7570  def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (VREV64d16 DPR:$src)>;
7571  def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
7572  def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
7573  def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
7574  def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (VREV16d8  DPR:$src)>;
7575
7576  def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (VREV64d8  DPR:$src)>;
7577  def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (VREV64d8  DPR:$src)>;
7578  def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (VREV32d8  DPR:$src)>;
7579  def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (VREV32d8  DPR:$src)>;
7580  def : Pat<(v8i8  (bitconvert (v4f16 DPR:$src))), (VREV16d8  DPR:$src)>;
7581  def : Pat<(v8i8  (bitconvert (v4bf16 DPR:$src))), (VREV16d8  DPR:$src)>;
7582  def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (VREV16d8  DPR:$src)>;
7583
7584  // 128 bit conversions
7585  def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
7586  def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
7587  def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
7588  def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>;
7589  def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
7590  def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8  QPR:$src)>;
7591
7592  def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
7593  def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
7594  def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
7595  def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>;
7596  def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
7597  def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8  QPR:$src)>;
7598
7599  def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
7600  def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
7601  def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
7602  def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>;
7603  def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
7604  def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8  QPR:$src)>;
7605
7606  def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
7607  def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
7608  def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
7609  def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>;
7610  def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
7611  def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8  QPR:$src)>;
7612
7613  def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
7614  def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
7615  def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
7616  def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
7617  def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8  QPR:$src)>;
7618
7619  def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
7620  def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
7621  def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
7622  def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
7623  def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (VREV16q8  QPR:$src)>;
7624
7625  def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
7626  def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
7627  def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
7628  def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
7629  def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8  QPR:$src)>;
7630
7631  def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8  QPR:$src)>;
7632  def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8  QPR:$src)>;
7633  def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8  QPR:$src)>;
7634  def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8  QPR:$src)>;
7635  def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8  QPR:$src)>;
7636  def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (VREV16q8  QPR:$src)>;
7637  def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8  QPR:$src)>;
7638}
7639
7640let Predicates = [HasNEON] in {
7641  // Here we match the specific SDNode type 'ARMVectorRegCastImpl'
7642  // rather than the more general 'ARMVectorRegCast' which would also
7643  // match some bitconverts. If we use the latter in cases where the
7644  // input and output types are the same, the bitconvert gets elided
7645  // and we end up generating a nonsense match of nothing.
7646
7647  foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
7648    foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
7649      def : Pat<(VT (ARMVectorRegCastImpl (VT2 QPR:$src))), (VT QPR:$src)>;
7650
7651  foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in
7652    foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in
7653      def : Pat<(VT (ARMVectorRegCastImpl (VT2 DPR:$src))), (VT DPR:$src)>;
7654}
7655
7656// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
7657let Predicates = [IsBE,HasNEON] in {
7658def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
7659          (VREV64q8 (VLD1q8 addrmode6:$addr))>;
7660def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
7661          (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>;
7662def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
7663          (VREV64q16 (VLD1q16 addrmode6:$addr))>;
7664def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
7665          (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>;
7666}
7667
7668// Fold extracting an element out of a v2i32 into a vfp register.
7669def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
7670          (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>,
7671      Requires<[HasNEON]>;
7672
7673// Vector lengthening move with load, matching extending loads.
7674
7675// extload, zextload and sextload for a standard lengthening load. Example:
7676// Lengthen_Single<"8", "i16", "8"> =
7677//     Pat<(v8i16 (extloadvi8 addrmode6:$addr))
7678//         (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
7679//                              (f64 (IMPLICIT_DEF)), (i32 0)))>;
7680multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
7681  let AddedComplexity = 10 in {
7682  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7683                    (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
7684                  (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
7685                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
7686             Requires<[HasNEON]>;
7687
7688  def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7689                  (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
7690                (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
7691                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
7692           Requires<[HasNEON]>;
7693
7694  def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7695                  (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
7696                (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
7697                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
7698           Requires<[HasNEON]>;
7699  }
7700}
7701
7702// extload, zextload and sextload for a lengthening load which only uses
7703// half the lanes available. Example:
7704// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
7705//     Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
7706//         (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
7707//                                      (f64 (IMPLICIT_DEF)), (i32 0))),
7708//                         dsub_0)>;
7709multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
7710                               string InsnLanes, string InsnTy> {
7711  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7712                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7713       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7714         (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7715         dsub_0)>,
7716             Requires<[HasNEON]>;
7717  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7718                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7719       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7720         (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7721         dsub_0)>,
7722             Requires<[HasNEON]>;
7723  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7724                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7725       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
7726         (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7727         dsub_0)>,
7728             Requires<[HasNEON]>;
7729}
7730
7731// The following class definition is basically a copy of the
7732// Lengthen_HalfSingle definition above, however with an additional parameter
7733// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
7734// data loaded by VLD1LN into proper vector format in big endian mode.
7735multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7736                               string InsnLanes, string InsnTy, string RevLanes> {
7737  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7738                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7739       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7740         (!cast<Instruction>("VREV32d" # RevLanes)
7741           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7742         dsub_0)>,
7743             Requires<[HasNEON]>;
7744  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7745                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7746       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7747         (!cast<Instruction>("VREV32d" # RevLanes)
7748           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7749         dsub_0)>,
7750             Requires<[HasNEON]>;
7751  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7752                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7753       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
7754         (!cast<Instruction>("VREV32d" # RevLanes)
7755           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7756         dsub_0)>,
7757             Requires<[HasNEON]>;
7758}
7759
7760// extload, zextload and sextload for a lengthening load followed by another
7761// lengthening load, to quadruple the initial length.
7762//
7763// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
7764//     Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
7765//         (EXTRACT_SUBREG (VMOVLuv4i32
7766//           (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
7767//                                                   (f64 (IMPLICIT_DEF)),
7768//                                                   (i32 0))),
7769//                           dsub_0)),
7770//           dsub_0)>;
7771multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
7772                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7773                           string Insn2Ty> {
7774  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7775                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7776         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7777           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7778             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7779             dsub_0))>,
7780             Requires<[HasNEON]>;
7781  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7782                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7783         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7784           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7785             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7786             dsub_0))>,
7787             Requires<[HasNEON]>;
7788  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7789                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7790         (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7791           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7792             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7793             dsub_0))>,
7794             Requires<[HasNEON]>;
7795}
7796
7797// The following class definition is basically a copy of the
7798// Lengthen_Double definition above, however with an additional parameter
7799// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
7800// data loaded by VLD1LN into proper vector format in big endian mode.
7801multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7802                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7803                           string Insn2Ty, string RevLanes> {
7804  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7805                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7806         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7807           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7808            (!cast<Instruction>("VREV32d" # RevLanes)
7809             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7810             dsub_0))>,
7811             Requires<[HasNEON]>;
7812  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7813                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7814         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7815           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7816            (!cast<Instruction>("VREV32d" # RevLanes)
7817             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7818             dsub_0))>,
7819             Requires<[HasNEON]>;
7820  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7821                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7822         (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7823           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7824            (!cast<Instruction>("VREV32d" # RevLanes)
7825             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7826             dsub_0))>,
7827             Requires<[HasNEON]>;
7828}
7829
7830// extload, zextload and sextload for a lengthening load followed by another
7831// lengthening load, to quadruple the initial length, but which ends up only
7832// requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
7833//
7834// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
7835// Pat<(v2i32 (extloadvi8 addrmode6:$addr))
7836//     (EXTRACT_SUBREG (VMOVLuv4i32
7837//       (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
7838//                                               (f64 (IMPLICIT_DEF)), (i32 0))),
7839//                       dsub_0)),
7840//       dsub_0)>;
7841multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
7842                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7843                           string Insn2Ty> {
7844  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7845                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
7846         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7847           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7848             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7849             dsub_0)),
7850          dsub_0)>,
7851             Requires<[HasNEON]>;
7852  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7853                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
7854         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7855           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7856             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7857             dsub_0)),
7858          dsub_0)>,
7859              Requires<[HasNEON]>;
7860  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7861                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
7862         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7863           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7864             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7865             dsub_0)),
7866          dsub_0)>,
7867             Requires<[HasNEON]>;
7868}
7869
7870// The following class definition is basically a copy of the
7871// Lengthen_HalfDouble definition above, however with an additional VREV16d8
7872// instruction to convert data loaded by VLD1LN into proper vector format
7873// in big endian mode.
7874multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7875                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7876                           string Insn2Ty> {
7877  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7878                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
7879         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7880           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7881            (!cast<Instruction>("VREV16d8")
7882             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7883             dsub_0)),
7884          dsub_0)>,
7885             Requires<[HasNEON]>;
7886  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7887                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
7888         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7889           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7890            (!cast<Instruction>("VREV16d8")
7891             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7892             dsub_0)),
7893          dsub_0)>,
7894             Requires<[HasNEON]>;
7895  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7896                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
7897         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7898           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7899            (!cast<Instruction>("VREV16d8")
7900             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7901             dsub_0)),
7902          dsub_0)>,
7903             Requires<[HasNEON]>;
7904}
7905
7906defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
7907defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
7908defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
7909
7910let Predicates = [HasNEON,IsLE] in {
7911  defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
7912  defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
7913
7914  // Double lengthening - v4i8 -> v4i16 -> v4i32
7915  defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
7916  // v2i8 -> v2i16 -> v2i32
7917  defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
7918  // v2i16 -> v2i32 -> v2i64
7919  defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
7920}
7921
7922let Predicates = [HasNEON,IsBE] in {
7923  defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
7924  defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
7925
7926  // Double lengthening - v4i8 -> v4i16 -> v4i32
7927  defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
7928  // v2i8 -> v2i16 -> v2i32
7929  defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
7930  // v2i16 -> v2i32 -> v2i64
7931  defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
7932}
7933
7934// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
7935let Predicates = [HasNEON,IsLE] in {
7936  def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
7937        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7938           (VLD1LNd16 addrmode6:$addr,
7939                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7940  def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
7941        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7942           (VLD1LNd16 addrmode6:$addr,
7943                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7944  def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
7945        (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
7946           (VLD1LNd16 addrmode6:$addr,
7947                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7948}
7949// The following patterns are basically a copy of the patterns above,
7950// however with an additional VREV16d instruction to convert data
7951// loaded by VLD1LN into proper vector format in big endian mode.
7952let Predicates = [HasNEON,IsBE] in {
7953  def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
7954        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7955           (!cast<Instruction>("VREV16d8")
7956             (VLD1LNd16 addrmode6:$addr,
7957                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7958  def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
7959        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7960           (!cast<Instruction>("VREV16d8")
7961             (VLD1LNd16 addrmode6:$addr,
7962                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7963  def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
7964        (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
7965           (!cast<Instruction>("VREV16d8")
7966             (VLD1LNd16 addrmode6:$addr,
7967                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7968}
7969
7970let Predicates = [HasNEON] in {
7971def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)),
7972          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7973def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)),
7974          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7975def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)),
7976          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7977def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
7978          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7979def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
7980          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7981def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)),
7982          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7983def : Pat<(v8bf16 (concat_vectors DPR:$Dn, DPR:$Dm)),
7984          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7985}
7986
7987//===----------------------------------------------------------------------===//
7988// Assembler aliases
7989//
7990
7991def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
7992                    (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
7993def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
7994                    (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
7995
7996// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
7997defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
7998                         (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7999defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
8000                         (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8001defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
8002                         (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8003defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
8004                         (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8005defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
8006                         (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8007defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
8008                         (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8009defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
8010                         (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8011defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
8012                         (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8013// ... two-operand aliases
8014defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
8015                         (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
8016defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
8017                         (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
8018defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
8019                         (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
8020defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
8021                         (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
8022defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
8023                         (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
8024defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
8025                         (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
8026// ... immediates
8027def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
8028                    (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
8029def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
8030                    (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
8031def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
8032                    (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
8033def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
8034                    (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
8035
8036
8037// VLD1 single-lane pseudo-instructions. These need special handling for
8038// the lane index that an InstAlias can't handle, so we use these instead.
8039def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
8040                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
8041                      pred:$p)>;
8042def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
8043                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
8044                      pred:$p)>;
8045def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
8046                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
8047                      pred:$p)>;
8048
8049def VLD1LNdWB_fixed_Asm_8 :
8050        NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
8051                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
8052                      pred:$p)>;
8053def VLD1LNdWB_fixed_Asm_16 :
8054        NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
8055                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
8056                      pred:$p)>;
8057def VLD1LNdWB_fixed_Asm_32 :
8058        NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
8059                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
8060                      pred:$p)>;
8061def VLD1LNdWB_register_Asm_8 :
8062        NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
8063                  (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
8064                       rGPR:$Rm, pred:$p)>;
8065def VLD1LNdWB_register_Asm_16 :
8066        NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
8067                  (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
8068                       rGPR:$Rm, pred:$p)>;
8069def VLD1LNdWB_register_Asm_32 :
8070        NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
8071                  (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
8072                       rGPR:$Rm, pred:$p)>;
8073
8074
8075// VST1 single-lane pseudo-instructions. These need special handling for
8076// the lane index that an InstAlias can't handle, so we use these instead.
8077def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
8078                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
8079                      pred:$p)>;
8080def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
8081                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
8082                      pred:$p)>;
8083def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
8084                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
8085                      pred:$p)>;
8086
8087def VST1LNdWB_fixed_Asm_8 :
8088        NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
8089                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
8090                      pred:$p)>;
8091def VST1LNdWB_fixed_Asm_16 :
8092        NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
8093                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
8094                      pred:$p)>;
8095def VST1LNdWB_fixed_Asm_32 :
8096        NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
8097                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
8098                      pred:$p)>;
8099def VST1LNdWB_register_Asm_8 :
8100        NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
8101                  (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
8102                       rGPR:$Rm, pred:$p)>;
8103def VST1LNdWB_register_Asm_16 :
8104        NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
8105                  (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
8106                       rGPR:$Rm, pred:$p)>;
8107def VST1LNdWB_register_Asm_32 :
8108        NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
8109                  (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
8110                       rGPR:$Rm, pred:$p)>;
8111
8112// VLD2 single-lane pseudo-instructions. These need special handling for
8113// the lane index that an InstAlias can't handle, so we use these instead.
8114def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
8115                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8116                  pred:$p)>;
8117def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
8118                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8119                      pred:$p)>;
8120def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
8121                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>;
8122def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
8123                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8124                      pred:$p)>;
8125def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
8126                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8127                      pred:$p)>;
8128
8129def VLD2LNdWB_fixed_Asm_8 :
8130        NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
8131                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8132                      pred:$p)>;
8133def VLD2LNdWB_fixed_Asm_16 :
8134        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
8135                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8136                      pred:$p)>;
8137def VLD2LNdWB_fixed_Asm_32 :
8138        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
8139                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8140                      pred:$p)>;
8141def VLD2LNqWB_fixed_Asm_16 :
8142        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
8143                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8144                      pred:$p)>;
8145def VLD2LNqWB_fixed_Asm_32 :
8146        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
8147                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8148                      pred:$p)>;
8149def VLD2LNdWB_register_Asm_8 :
8150        NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
8151                  (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8152                       rGPR:$Rm, pred:$p)>;
8153def VLD2LNdWB_register_Asm_16 :
8154        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
8155                  (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8156                       rGPR:$Rm, pred:$p)>;
8157def VLD2LNdWB_register_Asm_32 :
8158        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
8159                  (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8160                       rGPR:$Rm, pred:$p)>;
8161def VLD2LNqWB_register_Asm_16 :
8162        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
8163                  (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8164                       rGPR:$Rm, pred:$p)>;
8165def VLD2LNqWB_register_Asm_32 :
8166        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
8167                  (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8168                       rGPR:$Rm, pred:$p)>;
8169
8170
8171// VST2 single-lane pseudo-instructions. These need special handling for
8172// the lane index that an InstAlias can't handle, so we use these instead.
8173def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
8174                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8175                      pred:$p)>;
8176def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
8177                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8178                      pred:$p)>;
8179def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
8180                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8181                      pred:$p)>;
8182def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
8183                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8184                      pred:$p)>;
8185def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
8186                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8187                      pred:$p)>;
8188
8189def VST2LNdWB_fixed_Asm_8 :
8190        NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
8191                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8192                      pred:$p)>;
8193def VST2LNdWB_fixed_Asm_16 :
8194        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
8195                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8196                      pred:$p)>;
8197def VST2LNdWB_fixed_Asm_32 :
8198        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
8199                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8200                      pred:$p)>;
8201def VST2LNqWB_fixed_Asm_16 :
8202        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
8203                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8204                      pred:$p)>;
8205def VST2LNqWB_fixed_Asm_32 :
8206        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
8207                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8208                      pred:$p)>;
8209def VST2LNdWB_register_Asm_8 :
8210        NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
8211                  (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8212                       rGPR:$Rm, pred:$p)>;
8213def VST2LNdWB_register_Asm_16 :
8214        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
8215                  (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8216                       rGPR:$Rm, pred:$p)>;
8217def VST2LNdWB_register_Asm_32 :
8218        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
8219                  (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8220                       rGPR:$Rm, pred:$p)>;
8221def VST2LNqWB_register_Asm_16 :
8222        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
8223                  (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8224                       rGPR:$Rm, pred:$p)>;
8225def VST2LNqWB_register_Asm_32 :
8226        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
8227                  (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8228                       rGPR:$Rm, pred:$p)>;
8229
8230// VLD3 all-lanes pseudo-instructions. These need special handling for
8231// the lane index that an InstAlias can't handle, so we use these instead.
8232def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8233               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8234                    pred:$p)>;
8235def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8236               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8237                    pred:$p)>;
8238def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8239               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8240                    pred:$p)>;
8241def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8242               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8243                    pred:$p)>;
8244def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8245               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8246                    pred:$p)>;
8247def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8248               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8249                    pred:$p)>;
8250
8251def VLD3DUPdWB_fixed_Asm_8 :
8252        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8253               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8254                    pred:$p)>;
8255def VLD3DUPdWB_fixed_Asm_16 :
8256        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8257               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8258                    pred:$p)>;
8259def VLD3DUPdWB_fixed_Asm_32 :
8260        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8261               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8262                    pred:$p)>;
8263def VLD3DUPqWB_fixed_Asm_8 :
8264        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8265               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8266                    pred:$p)>;
8267def VLD3DUPqWB_fixed_Asm_16 :
8268        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8269               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8270                    pred:$p)>;
8271def VLD3DUPqWB_fixed_Asm_32 :
8272        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8273               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8274                    pred:$p)>;
8275def VLD3DUPdWB_register_Asm_8 :
8276        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8277                  (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8278                       rGPR:$Rm, pred:$p)>;
8279def VLD3DUPdWB_register_Asm_16 :
8280        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8281                  (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8282                       rGPR:$Rm, pred:$p)>;
8283def VLD3DUPdWB_register_Asm_32 :
8284        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8285                  (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8286                       rGPR:$Rm, pred:$p)>;
8287def VLD3DUPqWB_register_Asm_8 :
8288        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8289                  (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8290                       rGPR:$Rm, pred:$p)>;
8291def VLD3DUPqWB_register_Asm_16 :
8292        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8293                  (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8294                       rGPR:$Rm, pred:$p)>;
8295def VLD3DUPqWB_register_Asm_32 :
8296        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8297                  (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8298                       rGPR:$Rm, pred:$p)>;
8299
8300
8301// VLD3 single-lane pseudo-instructions. These need special handling for
8302// the lane index that an InstAlias can't handle, so we use these instead.
8303def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8304               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8305                    pred:$p)>;
8306def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8307               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8308                    pred:$p)>;
8309def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8310               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8311                    pred:$p)>;
8312def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8313               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8314                    pred:$p)>;
8315def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8316               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8317                    pred:$p)>;
8318
8319def VLD3LNdWB_fixed_Asm_8 :
8320        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8321               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8322                    pred:$p)>;
8323def VLD3LNdWB_fixed_Asm_16 :
8324        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8325               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8326                    pred:$p)>;
8327def VLD3LNdWB_fixed_Asm_32 :
8328        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8329               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8330                    pred:$p)>;
8331def VLD3LNqWB_fixed_Asm_16 :
8332        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8333               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8334                    pred:$p)>;
8335def VLD3LNqWB_fixed_Asm_32 :
8336        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8337               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8338                    pred:$p)>;
8339def VLD3LNdWB_register_Asm_8 :
8340        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8341                  (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8342                       rGPR:$Rm, pred:$p)>;
8343def VLD3LNdWB_register_Asm_16 :
8344        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8345                  (ins VecListThreeDHWordIndexed:$list,
8346                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8347def VLD3LNdWB_register_Asm_32 :
8348        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8349                  (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8350                       rGPR:$Rm, pred:$p)>;
8351def VLD3LNqWB_register_Asm_16 :
8352        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8353                  (ins VecListThreeQHWordIndexed:$list,
8354                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8355def VLD3LNqWB_register_Asm_32 :
8356        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8357                  (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8358                       rGPR:$Rm, pred:$p)>;
8359
8360// VLD3 multiple structure pseudo-instructions. These need special handling for
8361// the vector operands that the normal instructions don't yet model.
8362// FIXME: Remove these when the register classes and instructions are updated.
8363def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8364               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8365def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8366               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8367def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8368               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8369def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8370               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8371def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8372               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8373def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8374               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8375
8376def VLD3dWB_fixed_Asm_8 :
8377        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8378               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8379def VLD3dWB_fixed_Asm_16 :
8380        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8381               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8382def VLD3dWB_fixed_Asm_32 :
8383        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8384               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8385def VLD3qWB_fixed_Asm_8 :
8386        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8387               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8388def VLD3qWB_fixed_Asm_16 :
8389        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8390               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8391def VLD3qWB_fixed_Asm_32 :
8392        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8393               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8394def VLD3dWB_register_Asm_8 :
8395        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8396                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8397                       rGPR:$Rm, pred:$p)>;
8398def VLD3dWB_register_Asm_16 :
8399        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8400                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8401                       rGPR:$Rm, pred:$p)>;
8402def VLD3dWB_register_Asm_32 :
8403        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8404                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8405                       rGPR:$Rm, pred:$p)>;
8406def VLD3qWB_register_Asm_8 :
8407        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8408                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8409                       rGPR:$Rm, pred:$p)>;
8410def VLD3qWB_register_Asm_16 :
8411        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8412                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8413                       rGPR:$Rm, pred:$p)>;
8414def VLD3qWB_register_Asm_32 :
8415        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8416                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8417                       rGPR:$Rm, pred:$p)>;
8418
8419// VST3 single-lane pseudo-instructions. These need special handling for
8420// the lane index that an InstAlias can't handle, so we use these instead.
8421def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8422               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8423                    pred:$p)>;
8424def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8425               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8426                    pred:$p)>;
8427def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8428               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8429                    pred:$p)>;
8430def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8431               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8432                    pred:$p)>;
8433def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8434               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8435                    pred:$p)>;
8436
8437def VST3LNdWB_fixed_Asm_8 :
8438        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8439               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8440                    pred:$p)>;
8441def VST3LNdWB_fixed_Asm_16 :
8442        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8443               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8444                    pred:$p)>;
8445def VST3LNdWB_fixed_Asm_32 :
8446        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8447               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8448                    pred:$p)>;
8449def VST3LNqWB_fixed_Asm_16 :
8450        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8451               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8452                    pred:$p)>;
8453def VST3LNqWB_fixed_Asm_32 :
8454        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8455               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8456                    pred:$p)>;
8457def VST3LNdWB_register_Asm_8 :
8458        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8459                  (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8460                       rGPR:$Rm, pred:$p)>;
8461def VST3LNdWB_register_Asm_16 :
8462        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8463                  (ins VecListThreeDHWordIndexed:$list,
8464                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8465def VST3LNdWB_register_Asm_32 :
8466        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8467                  (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8468                       rGPR:$Rm, pred:$p)>;
8469def VST3LNqWB_register_Asm_16 :
8470        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8471                  (ins VecListThreeQHWordIndexed:$list,
8472                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8473def VST3LNqWB_register_Asm_32 :
8474        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8475                  (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8476                       rGPR:$Rm, pred:$p)>;
8477
8478
8479// VST3 multiple structure pseudo-instructions. These need special handling for
8480// the vector operands that the normal instructions don't yet model.
8481// FIXME: Remove these when the register classes and instructions are updated.
8482def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8483               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8484def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8485               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8486def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8487               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8488def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8489               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8490def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8491               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8492def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8493               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8494
8495def VST3dWB_fixed_Asm_8 :
8496        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8497               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8498def VST3dWB_fixed_Asm_16 :
8499        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8500               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8501def VST3dWB_fixed_Asm_32 :
8502        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8503               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8504def VST3qWB_fixed_Asm_8 :
8505        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8506               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8507def VST3qWB_fixed_Asm_16 :
8508        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8509               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8510def VST3qWB_fixed_Asm_32 :
8511        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8512               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8513def VST3dWB_register_Asm_8 :
8514        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8515                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8516                       rGPR:$Rm, pred:$p)>;
8517def VST3dWB_register_Asm_16 :
8518        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8519                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8520                       rGPR:$Rm, pred:$p)>;
8521def VST3dWB_register_Asm_32 :
8522        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8523                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8524                       rGPR:$Rm, pred:$p)>;
8525def VST3qWB_register_Asm_8 :
8526        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8527                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8528                       rGPR:$Rm, pred:$p)>;
8529def VST3qWB_register_Asm_16 :
8530        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8531                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8532                       rGPR:$Rm, pred:$p)>;
8533def VST3qWB_register_Asm_32 :
8534        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8535                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8536                       rGPR:$Rm, pred:$p)>;
8537
8538// VLD4 all-lanes pseudo-instructions. These need special handling for
8539// the lane index that an InstAlias can't handle, so we use these instead.
8540def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8541               (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8542                    pred:$p)>;
8543def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8544               (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8545                    pred:$p)>;
8546def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8547               (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
8548                    pred:$p)>;
8549def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8550               (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8551                    pred:$p)>;
8552def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8553               (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8554                    pred:$p)>;
8555def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8556               (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
8557                    pred:$p)>;
8558
8559def VLD4DUPdWB_fixed_Asm_8 :
8560        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8561               (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8562                    pred:$p)>;
8563def VLD4DUPdWB_fixed_Asm_16 :
8564        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8565               (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8566                    pred:$p)>;
8567def VLD4DUPdWB_fixed_Asm_32 :
8568        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8569               (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
8570                    pred:$p)>;
8571def VLD4DUPqWB_fixed_Asm_8 :
8572        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8573               (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8574                    pred:$p)>;
8575def VLD4DUPqWB_fixed_Asm_16 :
8576        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8577               (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8578                    pred:$p)>;
8579def VLD4DUPqWB_fixed_Asm_32 :
8580        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8581               (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
8582                    pred:$p)>;
8583def VLD4DUPdWB_register_Asm_8 :
8584        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8585                  (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8586                       rGPR:$Rm, pred:$p)>;
8587def VLD4DUPdWB_register_Asm_16 :
8588        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8589                  (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8590                       rGPR:$Rm, pred:$p)>;
8591def VLD4DUPdWB_register_Asm_32 :
8592        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8593                  (ins VecListFourDAllLanes:$list,
8594                       addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
8595def VLD4DUPqWB_register_Asm_8 :
8596        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8597                  (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8598                       rGPR:$Rm, pred:$p)>;
8599def VLD4DUPqWB_register_Asm_16 :
8600        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8601                  (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8602                       rGPR:$Rm, pred:$p)>;
8603def VLD4DUPqWB_register_Asm_32 :
8604        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8605                  (ins VecListFourQAllLanes:$list,
8606                       addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
8607
8608
8609// VLD4 single-lane pseudo-instructions. These need special handling for
8610// the lane index that an InstAlias can't handle, so we use these instead.
8611def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8612               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8613                    pred:$p)>;
8614def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8615               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8616                    pred:$p)>;
8617def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8618               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8619                    pred:$p)>;
8620def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8621               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8622                    pred:$p)>;
8623def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8624               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8625                    pred:$p)>;
8626
8627def VLD4LNdWB_fixed_Asm_8 :
8628        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8629               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8630                    pred:$p)>;
8631def VLD4LNdWB_fixed_Asm_16 :
8632        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8633               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8634                    pred:$p)>;
8635def VLD4LNdWB_fixed_Asm_32 :
8636        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8637               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8638                    pred:$p)>;
8639def VLD4LNqWB_fixed_Asm_16 :
8640        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8641               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8642                    pred:$p)>;
8643def VLD4LNqWB_fixed_Asm_32 :
8644        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8645               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8646                    pred:$p)>;
8647def VLD4LNdWB_register_Asm_8 :
8648        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8649                  (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8650                       rGPR:$Rm, pred:$p)>;
8651def VLD4LNdWB_register_Asm_16 :
8652        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8653                  (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8654                       rGPR:$Rm, pred:$p)>;
8655def VLD4LNdWB_register_Asm_32 :
8656        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8657                  (ins VecListFourDWordIndexed:$list,
8658                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8659def VLD4LNqWB_register_Asm_16 :
8660        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8661                  (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8662                       rGPR:$Rm, pred:$p)>;
8663def VLD4LNqWB_register_Asm_32 :
8664        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8665                  (ins VecListFourQWordIndexed:$list,
8666                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8667
8668
8669
8670// VLD4 multiple structure pseudo-instructions. These need special handling for
8671// the vector operands that the normal instructions don't yet model.
8672// FIXME: Remove these when the register classes and instructions are updated.
8673def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8674               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8675                pred:$p)>;
8676def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8677               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8678                pred:$p)>;
8679def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8680               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8681                pred:$p)>;
8682def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8683               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8684                pred:$p)>;
8685def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8686               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8687                pred:$p)>;
8688def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8689               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8690                pred:$p)>;
8691
8692def VLD4dWB_fixed_Asm_8 :
8693        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8694               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8695                pred:$p)>;
8696def VLD4dWB_fixed_Asm_16 :
8697        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8698               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8699                pred:$p)>;
8700def VLD4dWB_fixed_Asm_32 :
8701        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8702               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8703                pred:$p)>;
8704def VLD4qWB_fixed_Asm_8 :
8705        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8706               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8707                pred:$p)>;
8708def VLD4qWB_fixed_Asm_16 :
8709        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8710               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8711                pred:$p)>;
8712def VLD4qWB_fixed_Asm_32 :
8713        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8714               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8715                pred:$p)>;
8716def VLD4dWB_register_Asm_8 :
8717        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8718                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8719                       rGPR:$Rm, pred:$p)>;
8720def VLD4dWB_register_Asm_16 :
8721        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8722                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8723                       rGPR:$Rm, pred:$p)>;
8724def VLD4dWB_register_Asm_32 :
8725        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8726                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8727                       rGPR:$Rm, pred:$p)>;
8728def VLD4qWB_register_Asm_8 :
8729        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8730                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8731                       rGPR:$Rm, pred:$p)>;
8732def VLD4qWB_register_Asm_16 :
8733        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8734                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8735                       rGPR:$Rm, pred:$p)>;
8736def VLD4qWB_register_Asm_32 :
8737        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8738                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8739                       rGPR:$Rm, pred:$p)>;
8740
8741// VST4 single-lane pseudo-instructions. These need special handling for
8742// the lane index that an InstAlias can't handle, so we use these instead.
8743def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8744               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8745                    pred:$p)>;
8746def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8747               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8748                    pred:$p)>;
8749def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8750               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8751                    pred:$p)>;
8752def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8753               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8754                    pred:$p)>;
8755def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8756               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8757                    pred:$p)>;
8758
8759def VST4LNdWB_fixed_Asm_8 :
8760        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8761               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8762                    pred:$p)>;
8763def VST4LNdWB_fixed_Asm_16 :
8764        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8765               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8766                    pred:$p)>;
8767def VST4LNdWB_fixed_Asm_32 :
8768        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8769               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8770                    pred:$p)>;
8771def VST4LNqWB_fixed_Asm_16 :
8772        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8773               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8774                    pred:$p)>;
8775def VST4LNqWB_fixed_Asm_32 :
8776        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8777               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8778                    pred:$p)>;
8779def VST4LNdWB_register_Asm_8 :
8780        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8781                  (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8782                       rGPR:$Rm, pred:$p)>;
8783def VST4LNdWB_register_Asm_16 :
8784        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8785                  (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8786                       rGPR:$Rm, pred:$p)>;
8787def VST4LNdWB_register_Asm_32 :
8788        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8789                  (ins VecListFourDWordIndexed:$list,
8790                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8791def VST4LNqWB_register_Asm_16 :
8792        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8793                  (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8794                       rGPR:$Rm, pred:$p)>;
8795def VST4LNqWB_register_Asm_32 :
8796        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8797                  (ins VecListFourQWordIndexed:$list,
8798                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8799
8800
8801// VST4 multiple structure pseudo-instructions. These need special handling for
8802// the vector operands that the normal instructions don't yet model.
8803// FIXME: Remove these when the register classes and instructions are updated.
8804def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8805               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8806                    pred:$p)>;
8807def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8808               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8809                    pred:$p)>;
8810def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8811               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8812                    pred:$p)>;
8813def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8814               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8815                    pred:$p)>;
8816def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8817               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8818                    pred:$p)>;
8819def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8820               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8821                    pred:$p)>;
8822
8823def VST4dWB_fixed_Asm_8 :
8824        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8825               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8826                    pred:$p)>;
8827def VST4dWB_fixed_Asm_16 :
8828        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8829               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8830                    pred:$p)>;
8831def VST4dWB_fixed_Asm_32 :
8832        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8833               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8834                    pred:$p)>;
8835def VST4qWB_fixed_Asm_8 :
8836        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8837               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8838                    pred:$p)>;
8839def VST4qWB_fixed_Asm_16 :
8840        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8841               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8842                    pred:$p)>;
8843def VST4qWB_fixed_Asm_32 :
8844        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8845               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8846                    pred:$p)>;
8847def VST4dWB_register_Asm_8 :
8848        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8849                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8850                       rGPR:$Rm, pred:$p)>;
8851def VST4dWB_register_Asm_16 :
8852        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8853                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8854                       rGPR:$Rm, pred:$p)>;
8855def VST4dWB_register_Asm_32 :
8856        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8857                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8858                       rGPR:$Rm, pred:$p)>;
8859def VST4qWB_register_Asm_8 :
8860        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8861                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8862                       rGPR:$Rm, pred:$p)>;
8863def VST4qWB_register_Asm_16 :
8864        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8865                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8866                       rGPR:$Rm, pred:$p)>;
8867def VST4qWB_register_Asm_32 :
8868        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8869                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8870                       rGPR:$Rm, pred:$p)>;
8871
8872// VMOV/VMVN takes an optional datatype suffix
8873defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
8874                         (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
8875defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
8876                         (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
8877
8878defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
8879                         (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>;
8880defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
8881                         (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>;
8882
8883// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
8884// D-register versions.
8885def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
8886                    (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8887def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
8888                    (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8889def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
8890                    (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8891def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
8892                    (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8893def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
8894                    (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8895def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
8896                    (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8897def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
8898                    (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8899let Predicates = [HasNEON, HasFullFP16] in
8900def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
8901                    (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8902// Q-register versions.
8903def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
8904                    (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8905def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
8906                    (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8907def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
8908                    (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8909def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
8910                    (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8911def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
8912                    (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8913def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
8914                    (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8915def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
8916                    (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8917let Predicates = [HasNEON, HasFullFP16] in
8918def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
8919                    (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8920
8921// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
8922// D-register versions.
8923def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
8924                    (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8925def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
8926                    (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8927def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
8928                    (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8929def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
8930                    (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8931def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
8932                    (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8933def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
8934                    (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8935def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
8936                    (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8937let Predicates = [HasNEON, HasFullFP16] in
8938def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
8939                    (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8940// Q-register versions.
8941def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
8942                    (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8943def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
8944                    (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8945def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
8946                    (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8947def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
8948                    (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8949def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
8950                    (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8951def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
8952                    (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8953def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
8954                    (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8955let Predicates = [HasNEON, HasFullFP16] in
8956def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
8957                    (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8958
8959// VSWP allows, but does not require, a type suffix.
8960defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
8961                         (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
8962defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
8963                         (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
8964
8965// VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
8966defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
8967                         (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8968defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
8969                         (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8970defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
8971                         (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8972defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
8973                         (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8974defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
8975                         (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8976defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
8977                         (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8978
8979// "vmov Rd, #-imm" can be handled via "vmvn".
8980def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
8981                    (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8982def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
8983                    (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8984def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
8985                    (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8986def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
8987                    (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8988
8989// 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
8990// these should restrict to just the Q register variants, but the register
8991// classes are enough to match correctly regardless, so we keep it simple
8992// and just use MnemonicAlias.
8993def : NEONMnemonicAlias<"vbicq", "vbic">;
8994def : NEONMnemonicAlias<"vandq", "vand">;
8995def : NEONMnemonicAlias<"veorq", "veor">;
8996def : NEONMnemonicAlias<"vorrq", "vorr">;
8997
8998def : NEONMnemonicAlias<"vmovq", "vmov">;
8999def : NEONMnemonicAlias<"vmvnq", "vmvn">;
9000// Explicit versions for floating point so that the FPImm variants get
9001// handled early. The parser gets confused otherwise.
9002def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
9003def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
9004
9005def : NEONMnemonicAlias<"vaddq", "vadd">;
9006def : NEONMnemonicAlias<"vsubq", "vsub">;
9007
9008def : NEONMnemonicAlias<"vminq", "vmin">;
9009def : NEONMnemonicAlias<"vmaxq", "vmax">;
9010
9011def : NEONMnemonicAlias<"vmulq", "vmul">;
9012
9013def : NEONMnemonicAlias<"vabsq", "vabs">;
9014
9015def : NEONMnemonicAlias<"vshlq", "vshl">;
9016def : NEONMnemonicAlias<"vshrq", "vshr">;
9017
9018def : NEONMnemonicAlias<"vcvtq", "vcvt">;
9019
9020def : NEONMnemonicAlias<"vcleq", "vcle">;
9021def : NEONMnemonicAlias<"vceqq", "vceq">;
9022
9023def : NEONMnemonicAlias<"vzipq", "vzip">;
9024def : NEONMnemonicAlias<"vswpq", "vswp">;
9025
9026def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
9027def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
9028
9029
9030// Alias for loading floating point immediates that aren't representable
9031// using the vmov.f32 encoding but the bitpattern is representable using
9032// the .i32 encoding.
9033def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
9034                     (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
9035def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
9036                     (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
9037
9038// ARMv8.6a BFloat16 instructions.
9039let Predicates = [HasBF16, HasNEON] in {
9040class BF16VDOT<bits<5> op27_23, bits<2> op21_20, bit op6,
9041               dag oops, dag iops, list<dag> pattern>
9042   : N3Vnp<op27_23, op21_20, 0b1101, op6, 0, oops, iops,
9043           N3RegFrm, IIC_VDOTPROD, "", "", pattern>
9044{
9045    let DecoderNamespace = "VFPV8";
9046}
9047
9048class BF16VDOTS<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, ValueType InputTy>
9049   : BF16VDOT<0b11000, 0b00,  Q, (outs RegTy:$dst),
9050              (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
9051            [(set (AccumTy RegTy:$dst),
9052                  (int_arm_neon_bfdot (AccumTy RegTy:$Vd),
9053                                      (InputTy RegTy:$Vn),
9054                                      (InputTy RegTy:$Vm)))]> {
9055  let Constraints = "$dst = $Vd";
9056  let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
9057    let DecoderNamespace = "VFPV8";
9058}
9059
9060multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy,
9061                     ValueType InputTy, dag RHS> {
9062
9063  def "" : BF16VDOT<0b11100, 0b00, Q, (outs RegTy:$dst),
9064                    (ins RegTy:$Vd, RegTy:$Vn,
9065                    DPR_VFP2:$Vm, VectorIndex32:$lane), []> {
9066    bit lane;
9067    let Inst{5} = lane;
9068    let Constraints = "$dst = $Vd";
9069    let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane");
9070    let DecoderNamespace = "VFPV8";
9071  }
9072
9073  def : Pat<
9074    (AccumTy (int_arm_neon_bfdot (AccumTy RegTy:$Vd),
9075                                 (InputTy RegTy:$Vn),
9076                                 (InputTy (bitconvert (AccumTy
9077                                          (ARMvduplane (AccumTy RegTy:$Vm),
9078                                                        VectorIndex32:$lane)))))),
9079    (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
9080}
9081
9082def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>;
9083def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>;
9084
9085defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>;
9086defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
9087
9088class BF16MM<bit Q, RegisterClass RegTy,
9089             string opc>
9090   : N3Vnp<0b11000, 0b00, 0b1100, Q, 0,
9091           (outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
9092           N3RegFrm, IIC_VDOTPROD, "", "",
9093                [(set (v4f32 QPR:$dst), (int_arm_neon_bfmmla (v4f32 QPR:$Vd),
9094                                                (v8bf16 QPR:$Vn),
9095                                                (v8bf16 QPR:$Vm)))]> {
9096   let Constraints = "$dst = $Vd";
9097   let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
9098   let DecoderNamespace = "VFPV8";
9099}
9100
9101def VMMLA : BF16MM<1, QPR, "vmmla">;
9102
9103class VBF16MALQ<bit T, string suffix, SDPatternOperator OpNode>
9104  : N3VCP8<0b00, 0b11, T, 1,
9105           (outs QPR:$dst), (ins QPR:$Vd, QPR:$Vn, QPR:$Vm),
9106           NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "",
9107                [(set (v4f32 QPR:$dst),
9108                      (OpNode (v4f32 QPR:$Vd),
9109                              (v8bf16 QPR:$Vn),
9110                              (v8bf16 QPR:$Vm)))]> {
9111  let Constraints = "$dst = $Vd";
9112  let DecoderNamespace = "VFPV8";
9113}
9114
9115def VBF16MALTQ: VBF16MALQ<1, "t", int_arm_neon_bfmlalt>;
9116def VBF16MALBQ: VBF16MALQ<0, "b", int_arm_neon_bfmlalb>;
9117
9118multiclass VBF16MALQI<bit T, string suffix, SDPatternOperator OpNode> {
9119  def "" : N3VLaneCP8<0, 0b11, T, 1, (outs QPR:$dst),
9120              (ins QPR:$Vd, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
9121               IIC_VMACD, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm$idx", "", []> {
9122  bits<2> idx;
9123  let Inst{5} = idx{1};
9124  let Inst{3} = idx{0};
9125  let Constraints = "$dst = $Vd";
9126  let DecoderNamespace = "VFPV8";
9127  }
9128
9129  def : Pat<
9130    (v4f32 (OpNode (v4f32 QPR:$Vd),
9131                   (v8bf16 QPR:$Vn),
9132                   (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm),
9133                            VectorIndex16:$lane)))),
9134    (!cast<Instruction>(NAME) QPR:$Vd,
9135                              QPR:$Vn,
9136                              (EXTRACT_SUBREG QPR:$Vm,
9137                                (DSubReg_i16_reg VectorIndex16:$lane)),
9138                              (SubReg_i16_lane VectorIndex16:$lane))>;
9139}
9140
9141defm VBF16MALTQI: VBF16MALQI<1, "t", int_arm_neon_bfmlalt>;
9142defm VBF16MALBQI: VBF16MALQI<0, "b", int_arm_neon_bfmlalb>;
9143
9144def BF16_VCVT :  N2V<0b11, 0b11, 0b01, 0b10, 0b01100, 1, 0,
9145                    (outs DPR:$Vd), (ins QPR:$Vm),
9146                    NoItinerary, "vcvt", "bf16.f32", "$Vd, $Vm", "", []>;
9147}
9148// End of BFloat16 instructions
9149