xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td (revision ec4deee4e4f2aef1b97d9424f25d04e91fd7dc10)
1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the ARM NEON instruction set.
10//
11//===----------------------------------------------------------------------===//
12
13
14//===----------------------------------------------------------------------===//
15// NEON-specific Operands.
16//===----------------------------------------------------------------------===//
17def nModImm : Operand<i32> {
18  let PrintMethod = "printVMOVModImmOperand";
19}
20
21def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
22def nImmSplatI8 : Operand<i32> {
23  let PrintMethod = "printVMOVModImmOperand";
24  let ParserMatchClass = nImmSplatI8AsmOperand;
25}
26def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
27def nImmSplatI16 : Operand<i32> {
28  let PrintMethod = "printVMOVModImmOperand";
29  let ParserMatchClass = nImmSplatI16AsmOperand;
30}
31def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
32def nImmSplatI32 : Operand<i32> {
33  let PrintMethod = "printVMOVModImmOperand";
34  let ParserMatchClass = nImmSplatI32AsmOperand;
35}
36def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
37def nImmSplatNotI16 : Operand<i32> {
38  let ParserMatchClass = nImmSplatNotI16AsmOperand;
39}
40def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; }
41def nImmSplatNotI32 : Operand<i32> {
42  let ParserMatchClass = nImmSplatNotI32AsmOperand;
43}
44def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
45def nImmVMOVI32 : Operand<i32> {
46  let PrintMethod = "printVMOVModImmOperand";
47  let ParserMatchClass = nImmVMOVI32AsmOperand;
48}
49
50class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To>
51  : AsmOperandClass {
52  let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate";
53  let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">";
54  let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands";
55}
56
57class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To>
58  : AsmOperandClass {
59  let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate";
60  let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">";
61  let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands";
62}
63
64class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> {
65  let PrintMethod = "printVMOVModImmOperand";
66  let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>;
67}
68
69class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> {
70  let PrintMethod = "printVMOVModImmOperand";
71  let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>;
72}
73
74def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
75def nImmVMOVI32Neg : Operand<i32> {
76  let PrintMethod = "printVMOVModImmOperand";
77  let ParserMatchClass = nImmVMOVI32NegAsmOperand;
78}
79def nImmVMOVF32 : Operand<i32> {
80  let PrintMethod = "printFPImmOperand";
81  let ParserMatchClass = FPImmOperand;
82}
83def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
84def nImmSplatI64 : Operand<i32> {
85  let PrintMethod = "printVMOVModImmOperand";
86  let ParserMatchClass = nImmSplatI64AsmOperand;
87}
88
89def VectorIndex8Operand  : AsmOperandClass { let Name = "VectorIndex8"; }
90def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
91def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
92def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; }
93def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
94  return ((uint64_t)Imm) < 8;
95}]> {
96  let ParserMatchClass = VectorIndex8Operand;
97  let PrintMethod = "printVectorIndex";
98  let MIOperandInfo = (ops i32imm);
99}
100def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
101  return ((uint64_t)Imm) < 4;
102}]> {
103  let ParserMatchClass = VectorIndex16Operand;
104  let PrintMethod = "printVectorIndex";
105  let MIOperandInfo = (ops i32imm);
106}
107def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
108  return ((uint64_t)Imm) < 2;
109}]> {
110  let ParserMatchClass = VectorIndex32Operand;
111  let PrintMethod = "printVectorIndex";
112  let MIOperandInfo = (ops i32imm);
113}
114def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{
115  return ((uint64_t)Imm) < 1;
116}]> {
117  let ParserMatchClass = VectorIndex64Operand;
118  let PrintMethod = "printVectorIndex";
119  let MIOperandInfo = (ops i32imm);
120}
121
122// Register list of one D register.
123def VecListOneDAsmOperand : AsmOperandClass {
124  let Name = "VecListOneD";
125  let ParserMethod = "parseVectorList";
126  let RenderMethod = "addVecListOperands";
127}
128def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
129  let ParserMatchClass = VecListOneDAsmOperand;
130}
131// Register list of two sequential D registers.
132def VecListDPairAsmOperand : AsmOperandClass {
133  let Name = "VecListDPair";
134  let ParserMethod = "parseVectorList";
135  let RenderMethod = "addVecListOperands";
136}
137def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
138  let ParserMatchClass = VecListDPairAsmOperand;
139}
140// Register list of three sequential D registers.
141def VecListThreeDAsmOperand : AsmOperandClass {
142  let Name = "VecListThreeD";
143  let ParserMethod = "parseVectorList";
144  let RenderMethod = "addVecListOperands";
145}
146def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
147  let ParserMatchClass = VecListThreeDAsmOperand;
148}
149// Register list of four sequential D registers.
150def VecListFourDAsmOperand : AsmOperandClass {
151  let Name = "VecListFourD";
152  let ParserMethod = "parseVectorList";
153  let RenderMethod = "addVecListOperands";
154}
155def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
156  let ParserMatchClass = VecListFourDAsmOperand;
157}
158// Register list of two D registers spaced by 2 (two sequential Q registers).
159def VecListDPairSpacedAsmOperand : AsmOperandClass {
160  let Name = "VecListDPairSpaced";
161  let ParserMethod = "parseVectorList";
162  let RenderMethod = "addVecListOperands";
163}
164def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
165  let ParserMatchClass = VecListDPairSpacedAsmOperand;
166}
167// Register list of three D registers spaced by 2 (three Q registers).
168def VecListThreeQAsmOperand : AsmOperandClass {
169  let Name = "VecListThreeQ";
170  let ParserMethod = "parseVectorList";
171  let RenderMethod = "addVecListOperands";
172}
173def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
174  let ParserMatchClass = VecListThreeQAsmOperand;
175}
176// Register list of three D registers spaced by 2 (three Q registers).
177def VecListFourQAsmOperand : AsmOperandClass {
178  let Name = "VecListFourQ";
179  let ParserMethod = "parseVectorList";
180  let RenderMethod = "addVecListOperands";
181}
182def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
183  let ParserMatchClass = VecListFourQAsmOperand;
184}
185
186// Register list of one D register, with "all lanes" subscripting.
187def VecListOneDAllLanesAsmOperand : AsmOperandClass {
188  let Name = "VecListOneDAllLanes";
189  let ParserMethod = "parseVectorList";
190  let RenderMethod = "addVecListOperands";
191}
192def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
193  let ParserMatchClass = VecListOneDAllLanesAsmOperand;
194}
195// Register list of two D registers, with "all lanes" subscripting.
196def VecListDPairAllLanesAsmOperand : AsmOperandClass {
197  let Name = "VecListDPairAllLanes";
198  let ParserMethod = "parseVectorList";
199  let RenderMethod = "addVecListOperands";
200}
201def VecListDPairAllLanes : RegisterOperand<DPair,
202                                           "printVectorListTwoAllLanes"> {
203  let ParserMatchClass = VecListDPairAllLanesAsmOperand;
204}
205// Register list of two D registers spaced by 2 (two sequential Q registers).
206def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
207  let Name = "VecListDPairSpacedAllLanes";
208  let ParserMethod = "parseVectorList";
209  let RenderMethod = "addVecListOperands";
210}
211def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc,
212                                         "printVectorListTwoSpacedAllLanes"> {
213  let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
214}
215// Register list of three D registers, with "all lanes" subscripting.
216def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
217  let Name = "VecListThreeDAllLanes";
218  let ParserMethod = "parseVectorList";
219  let RenderMethod = "addVecListOperands";
220}
221def VecListThreeDAllLanes : RegisterOperand<DPR,
222                                            "printVectorListThreeAllLanes"> {
223  let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
224}
225// Register list of three D registers spaced by 2 (three sequential Q regs).
226def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
227  let Name = "VecListThreeQAllLanes";
228  let ParserMethod = "parseVectorList";
229  let RenderMethod = "addVecListOperands";
230}
231def VecListThreeQAllLanes : RegisterOperand<DPR,
232                                         "printVectorListThreeSpacedAllLanes"> {
233  let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
234}
235// Register list of four D registers, with "all lanes" subscripting.
236def VecListFourDAllLanesAsmOperand : AsmOperandClass {
237  let Name = "VecListFourDAllLanes";
238  let ParserMethod = "parseVectorList";
239  let RenderMethod = "addVecListOperands";
240}
241def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
242  let ParserMatchClass = VecListFourDAllLanesAsmOperand;
243}
244// Register list of four D registers spaced by 2 (four sequential Q regs).
245def VecListFourQAllLanesAsmOperand : AsmOperandClass {
246  let Name = "VecListFourQAllLanes";
247  let ParserMethod = "parseVectorList";
248  let RenderMethod = "addVecListOperands";
249}
250def VecListFourQAllLanes : RegisterOperand<DPR,
251                                         "printVectorListFourSpacedAllLanes"> {
252  let ParserMatchClass = VecListFourQAllLanesAsmOperand;
253}
254
255
256// Register list of one D register, with byte lane subscripting.
257def VecListOneDByteIndexAsmOperand : AsmOperandClass {
258  let Name = "VecListOneDByteIndexed";
259  let ParserMethod = "parseVectorList";
260  let RenderMethod = "addVecListIndexedOperands";
261}
262def VecListOneDByteIndexed : Operand<i32> {
263  let ParserMatchClass = VecListOneDByteIndexAsmOperand;
264  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
265}
266// ...with half-word lane subscripting.
267def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
268  let Name = "VecListOneDHWordIndexed";
269  let ParserMethod = "parseVectorList";
270  let RenderMethod = "addVecListIndexedOperands";
271}
272def VecListOneDHWordIndexed : Operand<i32> {
273  let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
274  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
275}
276// ...with word lane subscripting.
277def VecListOneDWordIndexAsmOperand : AsmOperandClass {
278  let Name = "VecListOneDWordIndexed";
279  let ParserMethod = "parseVectorList";
280  let RenderMethod = "addVecListIndexedOperands";
281}
282def VecListOneDWordIndexed : Operand<i32> {
283  let ParserMatchClass = VecListOneDWordIndexAsmOperand;
284  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
285}
286
287// Register list of two D registers with byte lane subscripting.
288def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
289  let Name = "VecListTwoDByteIndexed";
290  let ParserMethod = "parseVectorList";
291  let RenderMethod = "addVecListIndexedOperands";
292}
293def VecListTwoDByteIndexed : Operand<i32> {
294  let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
295  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
296}
297// ...with half-word lane subscripting.
298def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
299  let Name = "VecListTwoDHWordIndexed";
300  let ParserMethod = "parseVectorList";
301  let RenderMethod = "addVecListIndexedOperands";
302}
303def VecListTwoDHWordIndexed : Operand<i32> {
304  let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
305  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
306}
307// ...with word lane subscripting.
308def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
309  let Name = "VecListTwoDWordIndexed";
310  let ParserMethod = "parseVectorList";
311  let RenderMethod = "addVecListIndexedOperands";
312}
313def VecListTwoDWordIndexed : Operand<i32> {
314  let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
315  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
316}
317// Register list of two Q registers with half-word lane subscripting.
318def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
319  let Name = "VecListTwoQHWordIndexed";
320  let ParserMethod = "parseVectorList";
321  let RenderMethod = "addVecListIndexedOperands";
322}
323def VecListTwoQHWordIndexed : Operand<i32> {
324  let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
325  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
326}
327// ...with word lane subscripting.
328def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
329  let Name = "VecListTwoQWordIndexed";
330  let ParserMethod = "parseVectorList";
331  let RenderMethod = "addVecListIndexedOperands";
332}
333def VecListTwoQWordIndexed : Operand<i32> {
334  let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
335  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
336}
337
338
339// Register list of three D registers with byte lane subscripting.
340def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
341  let Name = "VecListThreeDByteIndexed";
342  let ParserMethod = "parseVectorList";
343  let RenderMethod = "addVecListIndexedOperands";
344}
345def VecListThreeDByteIndexed : Operand<i32> {
346  let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
347  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
348}
349// ...with half-word lane subscripting.
350def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
351  let Name = "VecListThreeDHWordIndexed";
352  let ParserMethod = "parseVectorList";
353  let RenderMethod = "addVecListIndexedOperands";
354}
355def VecListThreeDHWordIndexed : Operand<i32> {
356  let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
357  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
358}
359// ...with word lane subscripting.
360def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
361  let Name = "VecListThreeDWordIndexed";
362  let ParserMethod = "parseVectorList";
363  let RenderMethod = "addVecListIndexedOperands";
364}
365def VecListThreeDWordIndexed : Operand<i32> {
366  let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
367  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
368}
369// Register list of three Q registers with half-word lane subscripting.
370def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
371  let Name = "VecListThreeQHWordIndexed";
372  let ParserMethod = "parseVectorList";
373  let RenderMethod = "addVecListIndexedOperands";
374}
375def VecListThreeQHWordIndexed : Operand<i32> {
376  let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
377  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
378}
379// ...with word lane subscripting.
380def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
381  let Name = "VecListThreeQWordIndexed";
382  let ParserMethod = "parseVectorList";
383  let RenderMethod = "addVecListIndexedOperands";
384}
385def VecListThreeQWordIndexed : Operand<i32> {
386  let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
387  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
388}
389
390// Register list of four D registers with byte lane subscripting.
391def VecListFourDByteIndexAsmOperand : AsmOperandClass {
392  let Name = "VecListFourDByteIndexed";
393  let ParserMethod = "parseVectorList";
394  let RenderMethod = "addVecListIndexedOperands";
395}
396def VecListFourDByteIndexed : Operand<i32> {
397  let ParserMatchClass = VecListFourDByteIndexAsmOperand;
398  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
399}
400// ...with half-word lane subscripting.
401def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
402  let Name = "VecListFourDHWordIndexed";
403  let ParserMethod = "parseVectorList";
404  let RenderMethod = "addVecListIndexedOperands";
405}
406def VecListFourDHWordIndexed : Operand<i32> {
407  let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
408  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
409}
410// ...with word lane subscripting.
411def VecListFourDWordIndexAsmOperand : AsmOperandClass {
412  let Name = "VecListFourDWordIndexed";
413  let ParserMethod = "parseVectorList";
414  let RenderMethod = "addVecListIndexedOperands";
415}
416def VecListFourDWordIndexed : Operand<i32> {
417  let ParserMatchClass = VecListFourDWordIndexAsmOperand;
418  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
419}
420// Register list of four Q registers with half-word lane subscripting.
421def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
422  let Name = "VecListFourQHWordIndexed";
423  let ParserMethod = "parseVectorList";
424  let RenderMethod = "addVecListIndexedOperands";
425}
426def VecListFourQHWordIndexed : Operand<i32> {
427  let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
428  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
429}
430// ...with word lane subscripting.
431def VecListFourQWordIndexAsmOperand : AsmOperandClass {
432  let Name = "VecListFourQWordIndexed";
433  let ParserMethod = "parseVectorList";
434  let RenderMethod = "addVecListIndexedOperands";
435}
436def VecListFourQWordIndexed : Operand<i32> {
437  let ParserMatchClass = VecListFourQWordIndexAsmOperand;
438  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
439}
440
441def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
442  return cast<LoadSDNode>(N)->getAlignment() >= 8;
443}]>;
444def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
445                                 (store node:$val, node:$ptr), [{
446  return cast<StoreSDNode>(N)->getAlignment() >= 8;
447}]>;
448def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
449  return cast<LoadSDNode>(N)->getAlignment() == 4;
450}]>;
451def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
452                                 (store node:$val, node:$ptr), [{
453  return cast<StoreSDNode>(N)->getAlignment() == 4;
454}]>;
455def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
456  return cast<LoadSDNode>(N)->getAlignment() == 2;
457}]>;
458def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
459                                 (store node:$val, node:$ptr), [{
460  return cast<StoreSDNode>(N)->getAlignment() == 2;
461}]>;
462def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
463  return cast<LoadSDNode>(N)->getAlignment() == 1;
464}]>;
465def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
466                             (store node:$val, node:$ptr), [{
467  return cast<StoreSDNode>(N)->getAlignment() == 1;
468}]>;
469def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
470  return cast<LoadSDNode>(N)->getAlignment() < 4;
471}]>;
472def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
473                                    (store node:$val, node:$ptr), [{
474  return cast<StoreSDNode>(N)->getAlignment() < 4;
475}]>;
476
477//===----------------------------------------------------------------------===//
478// NEON-specific DAG Nodes.
479//===----------------------------------------------------------------------===//
480
481def SDTARMVTST    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
482def NEONvtst      : SDNode<"ARMISD::VTST", SDTARMVTST>;
483
484// Types for vector shift by immediates.  The "SHX" version is for long and
485// narrow operations where the source and destination vectors have different
486// types.  The "SHINS" version is for shift and insert operations.
487def SDTARMVSHXIMM    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
488                                            SDTCisVT<2, i32>]>;
489def SDTARMVSHINSIMM  : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
490                                            SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
491
492def NEONvshrnImm     : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>;
493
494def NEONvrshrsImm    : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>;
495def NEONvrshruImm    : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>;
496def NEONvrshrnImm    : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>;
497
498def NEONvqshlsImm    : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>;
499def NEONvqshluImm    : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>;
500def NEONvqshlsuImm   : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>;
501def NEONvqshrnsImm   : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>;
502def NEONvqshrnuImm   : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>;
503def NEONvqshrnsuImm  : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>;
504
505def NEONvqrshrnsImm  : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>;
506def NEONvqrshrnuImm  : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>;
507def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
508
509def NEONvsliImm      : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
510def NEONvsriImm      : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
511
512def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
513                                           SDTCisVT<2, i32>]>;
514def NEONvorrImm   : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
515def NEONvbicImm   : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
516
517def NEONvbsl      : SDNode<"ARMISD::VBSL",
518                           SDTypeProfile<1, 3, [SDTCisVec<0>,
519                                                SDTCisSameAs<0, 1>,
520                                                SDTCisSameAs<0, 2>,
521                                                SDTCisSameAs<0, 3>]>>;
522
523def SDTARMVEXT    : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
524                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
525def NEONvext      : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
526
527def SDTARMVSHUF2  : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
528                                         SDTCisSameAs<0, 2>,
529                                         SDTCisSameAs<0, 3>]>;
530def NEONzip       : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
531def NEONuzp       : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
532def NEONtrn       : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
533
534def SDTARMVMULL   : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
535                                         SDTCisSameAs<1, 2>]>;
536def NEONvmulls    : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
537def NEONvmullu    : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
538
539def SDTARMVTBL1   : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
540                                         SDTCisVT<2, v8i8>]>;
541def SDTARMVTBL2   : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
542                                         SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>;
543def NEONvtbl1     : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
544def NEONvtbl2     : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
545
546
547def NEONimmAllZerosV: PatLeaf<(ARMvmovImm (i32 timm)), [{
548  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
549  unsigned EltBits = 0;
550  uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits);
551  return (EltBits == 32 && EltVal == 0);
552}]>;
553
554def NEONimmAllOnesV: PatLeaf<(ARMvmovImm (i32 timm)), [{
555  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
556  unsigned EltBits = 0;
557  uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits);
558  return (EltBits == 8 && EltVal == 0xff);
559}]>;
560
561//===----------------------------------------------------------------------===//
562// NEON load / store instructions
563//===----------------------------------------------------------------------===//
564
565// Use VLDM to load a Q register as a D register pair.
566// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
567def VLDMQIA
568  : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
569                    IIC_fpLoad_m, "",
570                   [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>;
571
572// Use VSTM to store a Q register as a D register pair.
573// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
574def VSTMQIA
575  : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
576                    IIC_fpStore_m, "",
577                   [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>;
578
579// Classes for VLD* pseudo-instructions with multi-register operands.
580// These are expanded to real instructions after register allocation.
581class VLDQPseudo<InstrItinClass itin>
582  : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
583class VLDQWBPseudo<InstrItinClass itin>
584  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
585                (ins addrmode6:$addr, am6offset:$offset), itin,
586                "$addr.addr = $wb">;
587class VLDQWBfixedPseudo<InstrItinClass itin>
588  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
589                (ins addrmode6:$addr), itin,
590                "$addr.addr = $wb">;
591class VLDQWBregisterPseudo<InstrItinClass itin>
592  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
593                (ins addrmode6:$addr, rGPR:$offset), itin,
594                "$addr.addr = $wb">;
595
596class VLDQQPseudo<InstrItinClass itin>
597  : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
598class VLDQQWBPseudo<InstrItinClass itin>
599  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
600                (ins addrmode6:$addr, am6offset:$offset), itin,
601                "$addr.addr = $wb">;
602class VLDQQWBfixedPseudo<InstrItinClass itin>
603  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
604                (ins addrmode6:$addr), itin,
605                "$addr.addr = $wb">;
606class VLDQQWBregisterPseudo<InstrItinClass itin>
607  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
608                (ins addrmode6:$addr, rGPR:$offset), itin,
609                "$addr.addr = $wb">;
610
611
612class VLDQQQQPseudo<InstrItinClass itin>
613  : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
614                "$src = $dst">;
615class VLDQQQQWBPseudo<InstrItinClass itin>
616  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
617                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
618                "$addr.addr = $wb, $src = $dst">;
619
620let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
621
622//   VLD1     : Vector Load (multiple single elements)
623class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
624  : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
625          (ins AddrMode:$Rn), IIC_VLD1,
626          "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> {
627  let Rm = 0b1111;
628  let Inst{4} = Rn{4};
629  let DecoderMethod = "DecodeVLDST1Instruction";
630}
631class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
632  : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
633          (ins AddrMode:$Rn), IIC_VLD1x2,
634          "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> {
635  let Rm = 0b1111;
636  let Inst{5-4} = Rn{5-4};
637  let DecoderMethod = "DecodeVLDST1Instruction";
638}
639
640def  VLD1d8   : VLD1D<{0,0,0,?}, "8",  addrmode6align64>;
641def  VLD1d16  : VLD1D<{0,1,0,?}, "16", addrmode6align64>;
642def  VLD1d32  : VLD1D<{1,0,0,?}, "32", addrmode6align64>;
643def  VLD1d64  : VLD1D<{1,1,0,?}, "64", addrmode6align64>;
644
645def  VLD1q8   : VLD1Q<{0,0,?,?}, "8",  addrmode6align64or128>;
646def  VLD1q16  : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>;
647def  VLD1q32  : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>;
648def  VLD1q64  : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>;
649
650// ...with address register writeback:
651multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
652  def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
653                     (ins AddrMode:$Rn), IIC_VLD1u,
654                     "vld1", Dt, "$Vd, $Rn!",
655                     "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
656    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
657    let Inst{4} = Rn{4};
658    let DecoderMethod = "DecodeVLDST1Instruction";
659  }
660  def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
661                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
662                        "vld1", Dt, "$Vd, $Rn, $Rm",
663                        "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
664    let Inst{4} = Rn{4};
665    let DecoderMethod = "DecodeVLDST1Instruction";
666  }
667}
668multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
669  def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
670                    (ins AddrMode:$Rn), IIC_VLD1x2u,
671                     "vld1", Dt, "$Vd, $Rn!",
672                     "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
673    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
674    let Inst{5-4} = Rn{5-4};
675    let DecoderMethod = "DecodeVLDST1Instruction";
676  }
677  def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
678                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
679                        "vld1", Dt, "$Vd, $Rn, $Rm",
680                        "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
681    let Inst{5-4} = Rn{5-4};
682    let DecoderMethod = "DecodeVLDST1Instruction";
683  }
684}
685
686defm VLD1d8wb  : VLD1DWB<{0,0,0,?}, "8",  addrmode6align64>;
687defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>;
688defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>;
689defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>;
690defm VLD1q8wb  : VLD1QWB<{0,0,?,?}, "8",  addrmode6align64or128>;
691defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
692defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
693defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
694
695// ...with 3 registers
696class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
697  : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
698          (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
699          "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> {
700  let Rm = 0b1111;
701  let Inst{4} = Rn{4};
702  let DecoderMethod = "DecodeVLDST1Instruction";
703}
704multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
705  def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
706                    (ins AddrMode:$Rn), IIC_VLD1x2u,
707                     "vld1", Dt, "$Vd, $Rn!",
708                     "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
709    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
710    let Inst{4} = Rn{4};
711    let DecoderMethod = "DecodeVLDST1Instruction";
712  }
713  def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
714                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
715                        "vld1", Dt, "$Vd, $Rn, $Rm",
716                        "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
717    let Inst{4} = Rn{4};
718    let DecoderMethod = "DecodeVLDST1Instruction";
719  }
720}
721
722def VLD1d8T      : VLD1D3<{0,0,0,?}, "8",  addrmode6align64>;
723def VLD1d16T     : VLD1D3<{0,1,0,?}, "16", addrmode6align64>;
724def VLD1d32T     : VLD1D3<{1,0,0,?}, "32", addrmode6align64>;
725def VLD1d64T     : VLD1D3<{1,1,0,?}, "64", addrmode6align64>;
726
727defm VLD1d8Twb  : VLD1D3WB<{0,0,0,?}, "8",  addrmode6align64>;
728defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
729defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
730defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
731
732def VLD1d8TPseudo  : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
733def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
734def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
735def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
736def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
737def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
738
739def VLD1q8HighTPseudo     : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
740def VLD1q8LowTPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
741def VLD1q16HighTPseudo    : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
742def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
743def VLD1q32HighTPseudo    : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
744def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
745def VLD1q64HighTPseudo    : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
746def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
747
748// ...with 4 registers
749class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
750  : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
751          (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
752          "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> {
753  let Rm = 0b1111;
754  let Inst{5-4} = Rn{5-4};
755  let DecoderMethod = "DecodeVLDST1Instruction";
756}
757multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
758  def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
759                    (ins AddrMode:$Rn), IIC_VLD1x2u,
760                     "vld1", Dt, "$Vd, $Rn!",
761                     "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
762    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
763    let Inst{5-4} = Rn{5-4};
764    let DecoderMethod = "DecodeVLDST1Instruction";
765  }
766  def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
767                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
768                        "vld1", Dt, "$Vd, $Rn, $Rm",
769                        "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
770    let Inst{5-4} = Rn{5-4};
771    let DecoderMethod = "DecodeVLDST1Instruction";
772  }
773}
774
775def VLD1d8Q      : VLD1D4<{0,0,?,?}, "8",  addrmode6align64or128or256>;
776def VLD1d16Q     : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
777def VLD1d32Q     : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
778def VLD1d64Q     : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
779
780defm VLD1d8Qwb   : VLD1D4WB<{0,0,?,?}, "8",  addrmode6align64or128or256>;
781defm VLD1d16Qwb  : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
782defm VLD1d32Qwb  : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
783defm VLD1d64Qwb  : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
784
785def VLD1d8QPseudo  : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
786def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
787def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
788def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
789def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
790def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
791
792def VLD1q8LowQPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
793def VLD1q8HighQPseudo     : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
794def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
795def VLD1q16HighQPseudo    : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
796def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
797def VLD1q32HighQPseudo    : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
798def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
799def VLD1q64HighQPseudo    : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
800
801//   VLD2     : Vector Load (multiple 2-element structures)
802class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
803           InstrItinClass itin, Operand AddrMode>
804  : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
805          (ins AddrMode:$Rn), itin,
806          "vld2", Dt, "$Vd, $Rn", "", []> {
807  let Rm = 0b1111;
808  let Inst{5-4} = Rn{5-4};
809  let DecoderMethod = "DecodeVLDST2Instruction";
810}
811
812def  VLD2d8   : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
813                     addrmode6align64or128>, Sched<[WriteVLD2]>;
814def  VLD2d16  : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
815                     addrmode6align64or128>, Sched<[WriteVLD2]>;
816def  VLD2d32  : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
817                     addrmode6align64or128>, Sched<[WriteVLD2]>;
818
819def  VLD2q8   : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
820                     addrmode6align64or128or256>, Sched<[WriteVLD4]>;
821def  VLD2q16  : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
822                     addrmode6align64or128or256>, Sched<[WriteVLD4]>;
823def  VLD2q32  : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
824                     addrmode6align64or128or256>, Sched<[WriteVLD4]>;
825
826def  VLD2q8Pseudo  : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
827def  VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
828def  VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
829
830// ...with address register writeback:
831multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
832                  RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> {
833  def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
834                     (ins AddrMode:$Rn), itin,
835                     "vld2", Dt, "$Vd, $Rn!",
836                     "$Rn.addr = $wb", []> {
837    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
838    let Inst{5-4} = Rn{5-4};
839    let DecoderMethod = "DecodeVLDST2Instruction";
840  }
841  def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
842                        (ins AddrMode:$Rn, rGPR:$Rm), itin,
843                        "vld2", Dt, "$Vd, $Rn, $Rm",
844                        "$Rn.addr = $wb", []> {
845    let Inst{5-4} = Rn{5-4};
846    let DecoderMethod = "DecodeVLDST2Instruction";
847  }
848}
849
850defm VLD2d8wb  : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
851                        addrmode6align64or128>, Sched<[WriteVLD2]>;
852defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
853                        addrmode6align64or128>, Sched<[WriteVLD2]>;
854defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
855                        addrmode6align64or128>, Sched<[WriteVLD2]>;
856
857defm VLD2q8wb  : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
858                        addrmode6align64or128or256>, Sched<[WriteVLD4]>;
859defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
860                        addrmode6align64or128or256>, Sched<[WriteVLD4]>;
861defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
862                        addrmode6align64or128or256>, Sched<[WriteVLD4]>;
863
864def VLD2q8PseudoWB_fixed     : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
865def VLD2q16PseudoWB_fixed    : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
866def VLD2q32PseudoWB_fixed    : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
867def VLD2q8PseudoWB_register  : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
868def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
869def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
870
871// ...with double-spaced registers
872def  VLD2b8    : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
873                      addrmode6align64or128>, Sched<[WriteVLD2]>;
874def  VLD2b16   : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
875                      addrmode6align64or128>, Sched<[WriteVLD2]>;
876def  VLD2b32   : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
877                      addrmode6align64or128>, Sched<[WriteVLD2]>;
878defm VLD2b8wb  : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
879                        addrmode6align64or128>, Sched<[WriteVLD2]>;
880defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
881                        addrmode6align64or128>, Sched<[WriteVLD2]>;
882defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
883                        addrmode6align64or128>, Sched<[WriteVLD2]>;
884
885//   VLD3     : Vector Load (multiple 3-element structures)
886class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
887  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
888          (ins addrmode6:$Rn), IIC_VLD3,
889          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> {
890  let Rm = 0b1111;
891  let Inst{4} = Rn{4};
892  let DecoderMethod = "DecodeVLDST3Instruction";
893}
894
895def  VLD3d8   : VLD3D<0b0100, {0,0,0,?}, "8">;
896def  VLD3d16  : VLD3D<0b0100, {0,1,0,?}, "16">;
897def  VLD3d32  : VLD3D<0b0100, {1,0,0,?}, "32">;
898
899def  VLD3d8Pseudo  : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
900def  VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
901def  VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
902
903// ...with address register writeback:
904class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
905  : NLdSt<0, 0b10, op11_8, op7_4,
906          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
907          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
908          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
909          "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
910  let Inst{4} = Rn{4};
911  let DecoderMethod = "DecodeVLDST3Instruction";
912}
913
914def VLD3d8_UPD  : VLD3DWB<0b0100, {0,0,0,?}, "8">;
915def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
916def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
917
918def VLD3d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
919def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
920def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
921
922// ...with double-spaced registers:
923def VLD3q8      : VLD3D<0b0101, {0,0,0,?}, "8">;
924def VLD3q16     : VLD3D<0b0101, {0,1,0,?}, "16">;
925def VLD3q32     : VLD3D<0b0101, {1,0,0,?}, "32">;
926def VLD3q8_UPD  : VLD3DWB<0b0101, {0,0,0,?}, "8">;
927def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
928def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
929
930def VLD3q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
931def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
932def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
933
934// ...alternate versions to be allocated odd register numbers:
935def VLD3q8oddPseudo   : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
936def VLD3q16oddPseudo  : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
937def VLD3q32oddPseudo  : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
938
939def VLD3q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
940def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
941def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
942
943//   VLD4     : Vector Load (multiple 4-element structures)
944class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
945  : NLdSt<0, 0b10, op11_8, op7_4,
946          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
947          (ins addrmode6:$Rn), IIC_VLD4,
948          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>,
949    Sched<[WriteVLD4]> {
950  let Rm = 0b1111;
951  let Inst{5-4} = Rn{5-4};
952  let DecoderMethod = "DecodeVLDST4Instruction";
953}
954
955def  VLD4d8   : VLD4D<0b0000, {0,0,?,?}, "8">;
956def  VLD4d16  : VLD4D<0b0000, {0,1,?,?}, "16">;
957def  VLD4d32  : VLD4D<0b0000, {1,0,?,?}, "32">;
958
959def  VLD4d8Pseudo  : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
960def  VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
961def  VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
962
963// ...with address register writeback:
964class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
965  : NLdSt<0, 0b10, op11_8, op7_4,
966          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
967          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
968          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
969          "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
970  let Inst{5-4} = Rn{5-4};
971  let DecoderMethod = "DecodeVLDST4Instruction";
972}
973
974def VLD4d8_UPD  : VLD4DWB<0b0000, {0,0,?,?}, "8">;
975def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
976def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
977
978def VLD4d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
979def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
980def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
981
982// ...with double-spaced registers:
983def VLD4q8      : VLD4D<0b0001, {0,0,?,?}, "8">;
984def VLD4q16     : VLD4D<0b0001, {0,1,?,?}, "16">;
985def VLD4q32     : VLD4D<0b0001, {1,0,?,?}, "32">;
986def VLD4q8_UPD  : VLD4DWB<0b0001, {0,0,?,?}, "8">;
987def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
988def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
989
990def VLD4q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
991def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
992def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
993
994// ...alternate versions to be allocated odd register numbers:
995def VLD4q8oddPseudo   : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
996def VLD4q16oddPseudo  : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
997def VLD4q32oddPseudo  : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
998
999def VLD4q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1000def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1001def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1002
1003} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1004
1005// Classes for VLD*LN pseudo-instructions with multi-register operands.
1006// These are expanded to real instructions after register allocation.
1007class VLDQLNPseudo<InstrItinClass itin>
1008  : PseudoNLdSt<(outs QPR:$dst),
1009                (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
1010                itin, "$src = $dst">;
1011class VLDQLNWBPseudo<InstrItinClass itin>
1012  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
1013                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
1014                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1015class VLDQQLNPseudo<InstrItinClass itin>
1016  : PseudoNLdSt<(outs QQPR:$dst),
1017                (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
1018                itin, "$src = $dst">;
1019class VLDQQLNWBPseudo<InstrItinClass itin>
1020  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
1021                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
1022                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1023class VLDQQQQLNPseudo<InstrItinClass itin>
1024  : PseudoNLdSt<(outs QQQQPR:$dst),
1025                (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
1026                itin, "$src = $dst">;
1027class VLDQQQQLNWBPseudo<InstrItinClass itin>
1028  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
1029                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
1030                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1031
1032//   VLD1LN   : Vector Load (single element to one lane)
1033class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1034             PatFrag LoadOp>
1035  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1036          (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
1037          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1038          "$src = $Vd",
1039          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1040                                         (i32 (LoadOp addrmode6:$Rn)),
1041                                         imm:$lane))]> {
1042  let Rm = 0b1111;
1043  let DecoderMethod = "DecodeVLD1LN";
1044}
1045class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1046             PatFrag LoadOp>
1047  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1048          (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
1049          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1050          "$src = $Vd",
1051          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1052                                         (i32 (LoadOp addrmode6oneL32:$Rn)),
1053                                         imm:$lane))]>, Sched<[WriteVLD1]> {
1054  let Rm = 0b1111;
1055  let DecoderMethod = "DecodeVLD1LN";
1056}
1057class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>,
1058                                                    Sched<[WriteVLD1]> {
1059  let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
1060                                               (i32 (LoadOp addrmode6:$addr)),
1061                                               imm:$lane))];
1062}
1063
1064def VLD1LNd8  : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
1065  let Inst{7-5} = lane{2-0};
1066}
1067def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
1068  let Inst{7-6} = lane{1-0};
1069  let Inst{5-4} = Rn{5-4};
1070}
1071def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
1072  let Inst{7} = lane{0};
1073  let Inst{5-4} = Rn{5-4};
1074}
1075
1076def VLD1LNq8Pseudo  : VLD1QLNPseudo<v16i8, extloadi8>;
1077def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
1078def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
1079
1080let Predicates = [HasNEON] in {
1081def : Pat<(vector_insert (v4f16 DPR:$src),
1082                         (f16 (load addrmode6:$addr)), imm:$lane),
1083          (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
1084def : Pat<(vector_insert (v8f16 QPR:$src),
1085                         (f16 (load addrmode6:$addr)), imm:$lane),
1086          (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1087def : Pat<(vector_insert (v2f32 DPR:$src),
1088                         (f32 (load addrmode6:$addr)), imm:$lane),
1089          (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
1090def : Pat<(vector_insert (v4f32 QPR:$src),
1091                         (f32 (load addrmode6:$addr)), imm:$lane),
1092          (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1093
1094// A 64-bit subvector insert to the first 128-bit vector position
1095// is a subregister copy that needs no instruction.
1096def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)),
1097          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1098def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)),
1099          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1100def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)),
1101          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1102def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)),
1103          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1104def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)),
1105          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1106def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)),
1107          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1108}
1109
1110
1111let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1112
1113// ...with address register writeback:
1114class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1115  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
1116          (ins addrmode6:$Rn, am6offset:$Rm,
1117           DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
1118          "\\{$Vd[$lane]\\}, $Rn$Rm",
1119          "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1120  let DecoderMethod = "DecodeVLD1LN";
1121}
1122
1123def VLD1LNd8_UPD  : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
1124  let Inst{7-5} = lane{2-0};
1125}
1126def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
1127  let Inst{7-6} = lane{1-0};
1128  let Inst{4}   = Rn{4};
1129}
1130def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
1131  let Inst{7} = lane{0};
1132  let Inst{5} = Rn{4};
1133  let Inst{4} = Rn{4};
1134}
1135
1136def VLD1LNq8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1137def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1138def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1139
1140//   VLD2LN   : Vector Load (single 2-element structure to one lane)
1141class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1142  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
1143          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
1144          IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
1145          "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> {
1146  let Rm = 0b1111;
1147  let Inst{4}   = Rn{4};
1148  let DecoderMethod = "DecodeVLD2LN";
1149}
1150
1151def VLD2LNd8  : VLD2LN<0b0001, {?,?,?,?}, "8"> {
1152  let Inst{7-5} = lane{2-0};
1153}
1154def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
1155  let Inst{7-6} = lane{1-0};
1156}
1157def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
1158  let Inst{7} = lane{0};
1159}
1160
1161def VLD2LNd8Pseudo  : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1162def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1163def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1164
1165// ...with double-spaced registers:
1166def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
1167  let Inst{7-6} = lane{1-0};
1168}
1169def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
1170  let Inst{7} = lane{0};
1171}
1172
1173def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1174def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1175
1176// ...with address register writeback:
1177class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1178  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
1179          (ins addrmode6:$Rn, am6offset:$Rm,
1180           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
1181          "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
1182          "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
1183  let Inst{4}   = Rn{4};
1184  let DecoderMethod = "DecodeVLD2LN";
1185}
1186
1187def VLD2LNd8_UPD  : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
1188  let Inst{7-5} = lane{2-0};
1189}
1190def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
1191  let Inst{7-6} = lane{1-0};
1192}
1193def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
1194  let Inst{7} = lane{0};
1195}
1196
1197def VLD2LNd8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1198def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1199def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1200
1201def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
1202  let Inst{7-6} = lane{1-0};
1203}
1204def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
1205  let Inst{7} = lane{0};
1206}
1207
1208def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1209def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1210
1211//   VLD3LN   : Vector Load (single 3-element structure to one lane)
1212class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1213  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1214          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
1215          nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
1216          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
1217          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> {
1218  let Rm = 0b1111;
1219  let DecoderMethod = "DecodeVLD3LN";
1220}
1221
1222def VLD3LNd8  : VLD3LN<0b0010, {?,?,?,0}, "8"> {
1223  let Inst{7-5} = lane{2-0};
1224}
1225def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
1226  let Inst{7-6} = lane{1-0};
1227}
1228def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
1229  let Inst{7}   = lane{0};
1230}
1231
1232def VLD3LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1233def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1234def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1235
1236// ...with double-spaced registers:
1237def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
1238  let Inst{7-6} = lane{1-0};
1239}
1240def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
1241  let Inst{7}   = lane{0};
1242}
1243
1244def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1245def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1246
1247// ...with address register writeback:
1248class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1249  : NLdStLn<1, 0b10, op11_8, op7_4,
1250          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1251          (ins addrmode6:$Rn, am6offset:$Rm,
1252           DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
1253          IIC_VLD3lnu, "vld3", Dt,
1254          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
1255          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
1256          []>, Sched<[WriteVLD2]> {
1257  let DecoderMethod = "DecodeVLD3LN";
1258}
1259
1260def VLD3LNd8_UPD  : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
1261  let Inst{7-5} = lane{2-0};
1262}
1263def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
1264  let Inst{7-6} = lane{1-0};
1265}
1266def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
1267  let Inst{7} = lane{0};
1268}
1269
1270def VLD3LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1271def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1272def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1273
1274def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
1275  let Inst{7-6} = lane{1-0};
1276}
1277def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
1278  let Inst{7} = lane{0};
1279}
1280
1281def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1282def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1283
1284//   VLD4LN   : Vector Load (single 4-element structure to one lane)
1285class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1286  : NLdStLn<1, 0b10, op11_8, op7_4,
1287          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1288          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
1289          nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
1290          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
1291          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>,
1292    Sched<[WriteVLD2]> {
1293  let Rm = 0b1111;
1294  let Inst{4} = Rn{4};
1295  let DecoderMethod = "DecodeVLD4LN";
1296}
1297
1298def VLD4LNd8  : VLD4LN<0b0011, {?,?,?,?}, "8"> {
1299  let Inst{7-5} = lane{2-0};
1300}
1301def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
1302  let Inst{7-6} = lane{1-0};
1303}
1304def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
1305  let Inst{7} = lane{0};
1306  let Inst{5} = Rn{5};
1307}
1308
1309def VLD4LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1310def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1311def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1312
1313// ...with double-spaced registers:
1314def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
1315  let Inst{7-6} = lane{1-0};
1316}
1317def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
1318  let Inst{7} = lane{0};
1319  let Inst{5} = Rn{5};
1320}
1321
1322def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1323def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1324
1325// ...with address register writeback:
1326class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1327  : NLdStLn<1, 0b10, op11_8, op7_4,
1328          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1329          (ins addrmode6:$Rn, am6offset:$Rm,
1330           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
1331          IIC_VLD4lnu, "vld4", Dt,
1332"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
1333"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
1334          []> {
1335  let Inst{4}   = Rn{4};
1336  let DecoderMethod = "DecodeVLD4LN"  ;
1337}
1338
1339def VLD4LNd8_UPD  : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
1340  let Inst{7-5} = lane{2-0};
1341}
1342def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
1343  let Inst{7-6} = lane{1-0};
1344}
1345def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
1346  let Inst{7} = lane{0};
1347  let Inst{5} = Rn{5};
1348}
1349
1350def VLD4LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1351def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1352def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1353
1354def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
1355  let Inst{7-6} = lane{1-0};
1356}
1357def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
1358  let Inst{7} = lane{0};
1359  let Inst{5} = Rn{5};
1360}
1361
1362def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1363def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1364
1365} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1366
1367//   VLD1DUP  : Vector Load (single element to all lanes)
1368class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1369              Operand AddrMode>
1370  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
1371          (ins AddrMode:$Rn),
1372          IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
1373          [(set VecListOneDAllLanes:$Vd,
1374                (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
1375   Sched<[WriteVLD2]> {
1376  let Rm = 0b1111;
1377  let Inst{4} = Rn{4};
1378  let DecoderMethod = "DecodeVLD1DupInstruction";
1379}
1380def VLD1DUPd8  : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8,
1381                         addrmode6dupalignNone>;
1382def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
1383                         addrmode6dupalign16>;
1384def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
1385                         addrmode6dupalign32>;
1386
1387let Predicates = [HasNEON] in {
1388def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
1389          (VLD1DUPd32 addrmode6:$addr)>;
1390}
1391
1392class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1393               Operand AddrMode>
1394  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
1395          (ins AddrMode:$Rn), IIC_VLD1dup,
1396          "vld1", Dt, "$Vd, $Rn", "",
1397          [(set VecListDPairAllLanes:$Vd,
1398                (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
1399  let Rm = 0b1111;
1400  let Inst{4} = Rn{4};
1401  let DecoderMethod = "DecodeVLD1DupInstruction";
1402}
1403
1404def VLD1DUPq8  : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8,
1405                          addrmode6dupalignNone>;
1406def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
1407                          addrmode6dupalign16>;
1408def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
1409                          addrmode6dupalign32>;
1410
1411let Predicates = [HasNEON] in {
1412def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
1413          (VLD1DUPq32 addrmode6:$addr)>;
1414}
1415
1416let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1417// ...with address register writeback:
1418multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1419  def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1420                     (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1421                     (ins AddrMode:$Rn), IIC_VLD1dupu,
1422                     "vld1", Dt, "$Vd, $Rn!",
1423                     "$Rn.addr = $wb", []> {
1424    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1425    let Inst{4} = Rn{4};
1426    let DecoderMethod = "DecodeVLD1DupInstruction";
1427  }
1428  def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1429                        (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1430                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1431                        "vld1", Dt, "$Vd, $Rn, $Rm",
1432                        "$Rn.addr = $wb", []> {
1433    let Inst{4} = Rn{4};
1434    let DecoderMethod = "DecodeVLD1DupInstruction";
1435  }
1436}
1437multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1438  def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1439                     (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1440                     (ins AddrMode:$Rn), IIC_VLD1dupu,
1441                     "vld1", Dt, "$Vd, $Rn!",
1442                     "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1443    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1444    let Inst{4} = Rn{4};
1445    let DecoderMethod = "DecodeVLD1DupInstruction";
1446  }
1447  def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1448                        (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1449                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1450                        "vld1", Dt, "$Vd, $Rn, $Rm",
1451                        "$Rn.addr = $wb", []> {
1452    let Inst{4} = Rn{4};
1453    let DecoderMethod = "DecodeVLD1DupInstruction";
1454  }
1455}
1456
1457defm VLD1DUPd8wb  : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>;
1458defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>;
1459defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>;
1460
1461defm VLD1DUPq8wb  : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>;
1462defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>;
1463defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>;
1464
1465//   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
1466class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode>
1467  : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
1468          (ins AddrMode:$Rn), IIC_VLD2dup,
1469          "vld2", Dt, "$Vd, $Rn", "", []> {
1470  let Rm = 0b1111;
1471  let Inst{4} = Rn{4};
1472  let DecoderMethod = "DecodeVLD2DupInstruction";
1473}
1474
1475def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8",  VecListDPairAllLanes,
1476                         addrmode6dupalign16>;
1477def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes,
1478                         addrmode6dupalign32>;
1479def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes,
1480                         addrmode6dupalign64>;
1481
1482// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or
1483// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]".
1484// ...with double-spaced registers
1485def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8",  VecListDPairSpacedAllLanes,
1486                           addrmode6dupalign16>;
1487def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1488                           addrmode6dupalign32>;
1489def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1490                           addrmode6dupalign64>;
1491
1492def VLD2DUPq8EvenPseudo  : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1493def VLD2DUPq8OddPseudo   : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1494def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1495def VLD2DUPq16OddPseudo  : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1496def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1497def VLD2DUPq32OddPseudo  : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1498
1499// ...with address register writeback:
1500multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
1501                     Operand AddrMode> {
1502  def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
1503                     (outs VdTy:$Vd, GPR:$wb),
1504                     (ins AddrMode:$Rn), IIC_VLD2dupu,
1505                     "vld2", Dt, "$Vd, $Rn!",
1506                     "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1507    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1508    let Inst{4} = Rn{4};
1509    let DecoderMethod = "DecodeVLD2DupInstruction";
1510  }
1511  def _register : NLdSt<1, 0b10, 0b1101, op7_4,
1512                        (outs VdTy:$Vd, GPR:$wb),
1513                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
1514                        "vld2", Dt, "$Vd, $Rn, $Rm",
1515                        "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1516    let Inst{4} = Rn{4};
1517    let DecoderMethod = "DecodeVLD2DupInstruction";
1518  }
1519}
1520
1521defm VLD2DUPd8wb    : VLD2DUPWB<{0,0,0,0}, "8",  VecListDPairAllLanes,
1522                                addrmode6dupalign16>;
1523defm VLD2DUPd16wb   : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes,
1524                                addrmode6dupalign32>;
1525defm VLD2DUPd32wb   : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes,
1526                                addrmode6dupalign64>;
1527
1528defm VLD2DUPd8x2wb  : VLD2DUPWB<{0,0,1,0}, "8",  VecListDPairSpacedAllLanes,
1529                                addrmode6dupalign16>;
1530defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1531                                addrmode6dupalign32>;
1532defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1533                                addrmode6dupalign64>;
1534
1535//   VLD3DUP  : Vector Load (single 3-element structure to all lanes)
1536class VLD3DUP<bits<4> op7_4, string Dt>
1537  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1538          (ins addrmode6dup:$Rn), IIC_VLD3dup,
1539          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>,
1540    Sched<[WriteVLD2]> {
1541  let Rm = 0b1111;
1542  let Inst{4} = 0;
1543  let DecoderMethod = "DecodeVLD3DupInstruction";
1544}
1545
1546def VLD3DUPd8  : VLD3DUP<{0,0,0,?}, "8">;
1547def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
1548def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
1549
1550def VLD3DUPd8Pseudo  : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1551def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1552def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1553
1554// ...with double-spaced registers (not used for codegen):
1555def VLD3DUPq8  : VLD3DUP<{0,0,1,?}, "8">;
1556def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
1557def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
1558
1559def VLD3DUPq8EvenPseudo  : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1560def VLD3DUPq8OddPseudo   : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1561def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1562def VLD3DUPq16OddPseudo  : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1563def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1564def VLD3DUPq32OddPseudo  : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1565
1566// ...with address register writeback:
1567class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
1568  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1569          (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
1570          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
1571          "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
1572  let Inst{4} = 0;
1573  let DecoderMethod = "DecodeVLD3DupInstruction";
1574}
1575
1576def VLD3DUPd8_UPD  : VLD3DUPWB<{0,0,0,0}, "8",  addrmode6dupalign64>;
1577def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>;
1578def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>;
1579
1580def VLD3DUPq8_UPD  : VLD3DUPWB<{0,0,1,0}, "8",  addrmode6dupalign64>;
1581def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
1582def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
1583
1584def VLD3DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1585def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1586def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1587
1588//   VLD4DUP  : Vector Load (single 4-element structure to all lanes)
1589class VLD4DUP<bits<4> op7_4, string Dt>
1590  : NLdSt<1, 0b10, 0b1111, op7_4,
1591          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1592          (ins addrmode6dup:$Rn), IIC_VLD4dup,
1593          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
1594  let Rm = 0b1111;
1595  let Inst{4} = Rn{4};
1596  let DecoderMethod = "DecodeVLD4DupInstruction";
1597}
1598
1599def VLD4DUPd8  : VLD4DUP<{0,0,0,?}, "8">;
1600def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
1601def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1602
1603def VLD4DUPd8Pseudo  : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1604def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1605def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1606
1607// ...with double-spaced registers (not used for codegen):
1608def VLD4DUPq8  : VLD4DUP<{0,0,1,?}, "8">;
1609def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
1610def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1611
1612def VLD4DUPq8EvenPseudo  : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1613def VLD4DUPq8OddPseudo   : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1614def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1615def VLD4DUPq16OddPseudo  : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1616def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1617def VLD4DUPq32OddPseudo  : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1618
1619// ...with address register writeback:
1620class VLD4DUPWB<bits<4> op7_4, string Dt>
1621  : NLdSt<1, 0b10, 0b1111, op7_4,
1622          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1623          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
1624          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
1625          "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
1626  let Inst{4} = Rn{4};
1627  let DecoderMethod = "DecodeVLD4DupInstruction";
1628}
1629
1630def VLD4DUPd8_UPD  : VLD4DUPWB<{0,0,0,0}, "8">;
1631def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
1632def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1633
1634def VLD4DUPq8_UPD  : VLD4DUPWB<{0,0,1,0}, "8">;
1635def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
1636def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1637
1638def VLD4DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1639def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1640def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1641
1642} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1643
1644let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
1645
1646// Classes for VST* pseudo-instructions with multi-register operands.
1647// These are expanded to real instructions after register allocation.
1648class VSTQPseudo<InstrItinClass itin>
1649  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
1650class VSTQWBPseudo<InstrItinClass itin>
1651  : PseudoNLdSt<(outs GPR:$wb),
1652                (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
1653                "$addr.addr = $wb">;
1654class VSTQWBfixedPseudo<InstrItinClass itin>
1655  : PseudoNLdSt<(outs GPR:$wb),
1656                (ins addrmode6:$addr, QPR:$src), itin,
1657                "$addr.addr = $wb">;
1658class VSTQWBregisterPseudo<InstrItinClass itin>
1659  : PseudoNLdSt<(outs GPR:$wb),
1660                (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
1661                "$addr.addr = $wb">;
1662class VSTQQPseudo<InstrItinClass itin>
1663  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
1664class VSTQQWBPseudo<InstrItinClass itin>
1665  : PseudoNLdSt<(outs GPR:$wb),
1666                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
1667                "$addr.addr = $wb">;
1668class VSTQQWBfixedPseudo<InstrItinClass itin>
1669  : PseudoNLdSt<(outs GPR:$wb),
1670                (ins addrmode6:$addr, QQPR:$src), itin,
1671                "$addr.addr = $wb">;
1672class VSTQQWBregisterPseudo<InstrItinClass itin>
1673  : PseudoNLdSt<(outs GPR:$wb),
1674                (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
1675                "$addr.addr = $wb">;
1676
1677class VSTQQQQPseudo<InstrItinClass itin>
1678  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
1679class VSTQQQQWBPseudo<InstrItinClass itin>
1680  : PseudoNLdSt<(outs GPR:$wb),
1681                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
1682                "$addr.addr = $wb">;
1683
1684//   VST1     : Vector Store (multiple single elements)
1685class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
1686  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
1687          IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> {
1688  let Rm = 0b1111;
1689  let Inst{4} = Rn{4};
1690  let DecoderMethod = "DecodeVLDST1Instruction";
1691}
1692class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
1693  : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
1694          IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> {
1695  let Rm = 0b1111;
1696  let Inst{5-4} = Rn{5-4};
1697  let DecoderMethod = "DecodeVLDST1Instruction";
1698}
1699
1700def  VST1d8   : VST1D<{0,0,0,?}, "8",  addrmode6align64>;
1701def  VST1d16  : VST1D<{0,1,0,?}, "16", addrmode6align64>;
1702def  VST1d32  : VST1D<{1,0,0,?}, "32", addrmode6align64>;
1703def  VST1d64  : VST1D<{1,1,0,?}, "64", addrmode6align64>;
1704
1705def  VST1q8   : VST1Q<{0,0,?,?}, "8",  addrmode6align64or128>;
1706def  VST1q16  : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>;
1707def  VST1q32  : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>;
1708def  VST1q64  : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>;
1709
1710// ...with address register writeback:
1711multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1712  def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
1713                     (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
1714                     "vst1", Dt, "$Vd, $Rn!",
1715                     "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
1716    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1717    let Inst{4} = Rn{4};
1718    let DecoderMethod = "DecodeVLDST1Instruction";
1719  }
1720  def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
1721                        (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
1722                        IIC_VLD1u,
1723                        "vst1", Dt, "$Vd, $Rn, $Rm",
1724                        "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
1725    let Inst{4} = Rn{4};
1726    let DecoderMethod = "DecodeVLDST1Instruction";
1727  }
1728}
1729multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1730  def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1731                    (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
1732                     "vst1", Dt, "$Vd, $Rn!",
1733                     "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1734    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1735    let Inst{5-4} = Rn{5-4};
1736    let DecoderMethod = "DecodeVLDST1Instruction";
1737  }
1738  def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1739                        (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
1740                        IIC_VLD1x2u,
1741                        "vst1", Dt, "$Vd, $Rn, $Rm",
1742                        "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1743    let Inst{5-4} = Rn{5-4};
1744    let DecoderMethod = "DecodeVLDST1Instruction";
1745  }
1746}
1747
1748defm VST1d8wb  : VST1DWB<{0,0,0,?}, "8",  addrmode6align64>;
1749defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>;
1750defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>;
1751defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>;
1752
1753defm VST1q8wb  : VST1QWB<{0,0,?,?}, "8",  addrmode6align64or128>;
1754defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
1755defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
1756defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
1757
1758// ...with 3 registers
1759class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
1760  : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
1761          (ins AddrMode:$Rn, VecListThreeD:$Vd),
1762          IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> {
1763  let Rm = 0b1111;
1764  let Inst{4} = Rn{4};
1765  let DecoderMethod = "DecodeVLDST1Instruction";
1766}
1767multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1768  def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1769                    (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
1770                     "vst1", Dt, "$Vd, $Rn!",
1771                     "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1772    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1773    let Inst{5-4} = Rn{5-4};
1774    let DecoderMethod = "DecodeVLDST1Instruction";
1775  }
1776  def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1777                        (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
1778                        IIC_VLD1x3u,
1779                        "vst1", Dt, "$Vd, $Rn, $Rm",
1780                        "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1781    let Inst{5-4} = Rn{5-4};
1782    let DecoderMethod = "DecodeVLDST1Instruction";
1783  }
1784}
1785
1786def VST1d8T     : VST1D3<{0,0,0,?}, "8",  addrmode6align64>;
1787def VST1d16T    : VST1D3<{0,1,0,?}, "16", addrmode6align64>;
1788def VST1d32T    : VST1D3<{1,0,0,?}, "32", addrmode6align64>;
1789def VST1d64T    : VST1D3<{1,1,0,?}, "64", addrmode6align64>;
1790
1791defm VST1d8Twb  : VST1D3WB<{0,0,0,?}, "8",  addrmode6align64>;
1792defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
1793defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
1794defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
1795
1796def VST1d8TPseudo             : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1797def VST1d16TPseudo            : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1798def VST1d32TPseudo            : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1799def VST1d64TPseudo            : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1800def VST1d64TPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1801def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1802
1803def VST1q8HighTPseudo     : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1804def VST1q8LowTPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1805def VST1q16HighTPseudo    : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1806def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1807def VST1q32HighTPseudo    : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1808def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1809def VST1q64HighTPseudo    : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1810def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1811
1812// ...with 4 registers
1813class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
1814  : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
1815          (ins AddrMode:$Rn, VecListFourD:$Vd),
1816          IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
1817          []>, Sched<[WriteVST4]> {
1818  let Rm = 0b1111;
1819  let Inst{5-4} = Rn{5-4};
1820  let DecoderMethod = "DecodeVLDST1Instruction";
1821}
1822multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1823  def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1824                    (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
1825                     "vst1", Dt, "$Vd, $Rn!",
1826                     "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1827    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1828    let Inst{5-4} = Rn{5-4};
1829    let DecoderMethod = "DecodeVLDST1Instruction";
1830  }
1831  def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1832                        (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1833                        IIC_VLD1x4u,
1834                        "vst1", Dt, "$Vd, $Rn, $Rm",
1835                        "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1836    let Inst{5-4} = Rn{5-4};
1837    let DecoderMethod = "DecodeVLDST1Instruction";
1838  }
1839}
1840
1841def VST1d8Q     : VST1D4<{0,0,?,?}, "8",  addrmode6align64or128or256>;
1842def VST1d16Q    : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
1843def VST1d32Q    : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
1844def VST1d64Q    : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
1845
1846defm VST1d8Qwb  : VST1D4WB<{0,0,?,?}, "8",  addrmode6align64or128or256>;
1847defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1848defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1849defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
1850
1851def VST1d8QPseudo             : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1852def VST1d16QPseudo            : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1853def VST1d32QPseudo            : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1854def VST1d64QPseudo            : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1855def VST1d64QPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1856def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1857
1858def VST1q8HighQPseudo     : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1859def VST1q8LowQPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1860def VST1q16HighQPseudo    : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1861def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1862def VST1q32HighQPseudo    : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1863def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1864def VST1q64HighQPseudo    : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1865def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1866
1867//   VST2     : Vector Store (multiple 2-element structures)
1868class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
1869            InstrItinClass itin, Operand AddrMode>
1870  : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd),
1871          itin, "vst2", Dt, "$Vd, $Rn", "", []> {
1872  let Rm = 0b1111;
1873  let Inst{5-4} = Rn{5-4};
1874  let DecoderMethod = "DecodeVLDST2Instruction";
1875}
1876
1877def  VST2d8   : VST2<0b1000, {0,0,?,?}, "8",  VecListDPair, IIC_VST2,
1878                     addrmode6align64or128>, Sched<[WriteVST2]>;
1879def  VST2d16  : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
1880                     addrmode6align64or128>, Sched<[WriteVST2]>;
1881def  VST2d32  : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
1882                     addrmode6align64or128>, Sched<[WriteVST2]>;
1883
1884def  VST2q8   : VST2<0b0011, {0,0,?,?}, "8",  VecListFourD, IIC_VST2x2,
1885                     addrmode6align64or128or256>, Sched<[WriteVST4]>;
1886def  VST2q16  : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
1887                     addrmode6align64or128or256>, Sched<[WriteVST4]>;
1888def  VST2q32  : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
1889                     addrmode6align64or128or256>, Sched<[WriteVST4]>;
1890
1891def  VST2q8Pseudo  : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1892def  VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1893def  VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1894
1895// ...with address register writeback:
1896multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
1897                   RegisterOperand VdTy, Operand AddrMode> {
1898  def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1899                     (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
1900                     "vst2", Dt, "$Vd, $Rn!",
1901                     "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1902    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1903    let Inst{5-4} = Rn{5-4};
1904    let DecoderMethod = "DecodeVLDST2Instruction";
1905  }
1906  def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1907                        (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
1908                        "vst2", Dt, "$Vd, $Rn, $Rm",
1909                        "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1910    let Inst{5-4} = Rn{5-4};
1911    let DecoderMethod = "DecodeVLDST2Instruction";
1912  }
1913}
1914multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1915  def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1916                     (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
1917                     "vst2", Dt, "$Vd, $Rn!",
1918                     "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1919    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1920    let Inst{5-4} = Rn{5-4};
1921    let DecoderMethod = "DecodeVLDST2Instruction";
1922  }
1923  def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1924                        (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1925                        IIC_VLD1u,
1926                        "vst2", Dt, "$Vd, $Rn, $Rm",
1927                        "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1928    let Inst{5-4} = Rn{5-4};
1929    let DecoderMethod = "DecodeVLDST2Instruction";
1930  }
1931}
1932
1933defm VST2d8wb    : VST2DWB<0b1000, {0,0,?,?}, "8",  VecListDPair,
1934                           addrmode6align64or128>;
1935defm VST2d16wb   : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair,
1936                           addrmode6align64or128>;
1937defm VST2d32wb   : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair,
1938                           addrmode6align64or128>;
1939
1940defm VST2q8wb    : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
1941defm VST2q16wb   : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1942defm VST2q32wb   : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1943
1944def VST2q8PseudoWB_fixed     : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1945def VST2q16PseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1946def VST2q32PseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1947def VST2q8PseudoWB_register  : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1948def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1949def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1950
1951// ...with double-spaced registers
1952def VST2b8      : VST2<0b1001, {0,0,?,?}, "8",  VecListDPairSpaced, IIC_VST2,
1953                      addrmode6align64or128>;
1954def VST2b16     : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2,
1955                      addrmode6align64or128>;
1956def VST2b32     : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2,
1957                      addrmode6align64or128>;
1958defm VST2b8wb   : VST2DWB<0b1001, {0,0,?,?}, "8",  VecListDPairSpaced,
1959                          addrmode6align64or128>;
1960defm VST2b16wb  : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced,
1961                          addrmode6align64or128>;
1962defm VST2b32wb  : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
1963                          addrmode6align64or128>;
1964
1965//   VST3     : Vector Store (multiple 3-element structures)
1966class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
1967  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1968          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
1969          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> {
1970  let Rm = 0b1111;
1971  let Inst{4} = Rn{4};
1972  let DecoderMethod = "DecodeVLDST3Instruction";
1973}
1974
1975def  VST3d8   : VST3D<0b0100, {0,0,0,?}, "8">;
1976def  VST3d16  : VST3D<0b0100, {0,1,0,?}, "16">;
1977def  VST3d32  : VST3D<0b0100, {1,0,0,?}, "32">;
1978
1979def  VST3d8Pseudo  : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1980def  VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1981def  VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1982
1983// ...with address register writeback:
1984class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1985  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1986          (ins addrmode6:$Rn, am6offset:$Rm,
1987           DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
1988          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
1989          "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1990  let Inst{4} = Rn{4};
1991  let DecoderMethod = "DecodeVLDST3Instruction";
1992}
1993
1994def VST3d8_UPD  : VST3DWB<0b0100, {0,0,0,?}, "8">;
1995def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
1996def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
1997
1998def VST3d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1999def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2000def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2001
2002// ...with double-spaced registers:
2003def VST3q8      : VST3D<0b0101, {0,0,0,?}, "8">;
2004def VST3q16     : VST3D<0b0101, {0,1,0,?}, "16">;
2005def VST3q32     : VST3D<0b0101, {1,0,0,?}, "32">;
2006def VST3q8_UPD  : VST3DWB<0b0101, {0,0,0,?}, "8">;
2007def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
2008def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
2009
2010def VST3q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2011def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2012def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2013
2014// ...alternate versions to be allocated odd register numbers:
2015def VST3q8oddPseudo   : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2016def VST3q16oddPseudo  : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2017def VST3q32oddPseudo  : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2018
2019def VST3q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2020def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2021def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2022
2023//   VST4     : Vector Store (multiple 4-element structures)
2024class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
2025  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
2026          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
2027          IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
2028          "", []>, Sched<[WriteVST4]> {
2029  let Rm = 0b1111;
2030  let Inst{5-4} = Rn{5-4};
2031  let DecoderMethod = "DecodeVLDST4Instruction";
2032}
2033
2034def  VST4d8   : VST4D<0b0000, {0,0,?,?}, "8">;
2035def  VST4d16  : VST4D<0b0000, {0,1,?,?}, "16">;
2036def  VST4d32  : VST4D<0b0000, {1,0,?,?}, "32">;
2037
2038def  VST4d8Pseudo  : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2039def  VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2040def  VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2041
2042// ...with address register writeback:
2043class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2044  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
2045          (ins addrmode6:$Rn, am6offset:$Rm,
2046           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
2047           "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
2048          "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
2049  let Inst{5-4} = Rn{5-4};
2050  let DecoderMethod = "DecodeVLDST4Instruction";
2051}
2052
2053def VST4d8_UPD  : VST4DWB<0b0000, {0,0,?,?}, "8">;
2054def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
2055def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
2056
2057def VST4d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2058def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2059def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2060
2061// ...with double-spaced registers:
2062def VST4q8      : VST4D<0b0001, {0,0,?,?}, "8">;
2063def VST4q16     : VST4D<0b0001, {0,1,?,?}, "16">;
2064def VST4q32     : VST4D<0b0001, {1,0,?,?}, "32">;
2065def VST4q8_UPD  : VST4DWB<0b0001, {0,0,?,?}, "8">;
2066def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
2067def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
2068
2069def VST4q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2070def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2071def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2072
2073// ...alternate versions to be allocated odd register numbers:
2074def VST4q8oddPseudo   : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2075def VST4q16oddPseudo  : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2076def VST4q32oddPseudo  : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2077
2078def VST4q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2079def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2080def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2081
2082} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2083
2084// Classes for VST*LN pseudo-instructions with multi-register operands.
2085// These are expanded to real instructions after register allocation.
2086class VSTQLNPseudo<InstrItinClass itin>
2087  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
2088                itin, "">;
2089class VSTQLNWBPseudo<InstrItinClass itin>
2090  : PseudoNLdSt<(outs GPR:$wb),
2091                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
2092                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2093class VSTQQLNPseudo<InstrItinClass itin>
2094  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
2095                itin, "">;
2096class VSTQQLNWBPseudo<InstrItinClass itin>
2097  : PseudoNLdSt<(outs GPR:$wb),
2098                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
2099                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2100class VSTQQQQLNPseudo<InstrItinClass itin>
2101  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
2102                itin, "">;
2103class VSTQQQQLNWBPseudo<InstrItinClass itin>
2104  : PseudoNLdSt<(outs GPR:$wb),
2105                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
2106                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2107
2108//   VST1LN   : Vector Store (single element from one lane)
2109class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2110             PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
2111  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2112          (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
2113          IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
2114          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>,
2115     Sched<[WriteVST1]> {
2116  let Rm = 0b1111;
2117  let DecoderMethod = "DecodeVST1LN";
2118}
2119class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2120  : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> {
2121  let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2122                          addrmode6:$addr)];
2123}
2124
2125def VST1LNd8  : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
2126                       ARMvgetlaneu, addrmode6> {
2127  let Inst{7-5} = lane{2-0};
2128}
2129def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
2130                       ARMvgetlaneu, addrmode6> {
2131  let Inst{7-6} = lane{1-0};
2132  let Inst{4}   = Rn{4};
2133}
2134
2135def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
2136                       addrmode6oneL32> {
2137  let Inst{7}   = lane{0};
2138  let Inst{5-4} = Rn{5-4};
2139}
2140
2141def VST1LNq8Pseudo  : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>;
2142def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>;
2143def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
2144
2145let Predicates = [HasNEON] in {
2146def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
2147          (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
2148def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
2149          (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2150
2151def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr),
2152          (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
2153def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr),
2154          (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2155}
2156
2157// ...with address register writeback:
2158class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2159               PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
2160  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2161          (ins AdrMode:$Rn, am6offset:$Rm,
2162           DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
2163          "\\{$Vd[$lane]\\}, $Rn$Rm",
2164          "$Rn.addr = $wb",
2165          [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
2166                                  AdrMode:$Rn, am6offset:$Rm))]>,
2167    Sched<[WriteVST1]> {
2168  let DecoderMethod = "DecodeVST1LN";
2169}
2170class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2171  : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> {
2172  let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2173                                        addrmode6:$addr, am6offset:$offset))];
2174}
2175
2176def VST1LNd8_UPD  : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
2177                             ARMvgetlaneu, addrmode6> {
2178  let Inst{7-5} = lane{2-0};
2179}
2180def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
2181                             ARMvgetlaneu, addrmode6> {
2182  let Inst{7-6} = lane{1-0};
2183  let Inst{4}   = Rn{4};
2184}
2185def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
2186                             extractelt, addrmode6oneL32> {
2187  let Inst{7}   = lane{0};
2188  let Inst{5-4} = Rn{5-4};
2189}
2190
2191def VST1LNq8Pseudo_UPD  : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>;
2192def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>;
2193def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
2194
2195let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
2196
2197//   VST2LN   : Vector Store (single 2-element structure from one lane)
2198class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2199  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2200          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
2201          IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
2202          "", []>, Sched<[WriteVST1]> {
2203  let Rm = 0b1111;
2204  let Inst{4}   = Rn{4};
2205  let DecoderMethod = "DecodeVST2LN";
2206}
2207
2208def VST2LNd8  : VST2LN<0b0001, {?,?,?,?}, "8"> {
2209  let Inst{7-5} = lane{2-0};
2210}
2211def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
2212  let Inst{7-6} = lane{1-0};
2213}
2214def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
2215  let Inst{7}   = lane{0};
2216}
2217
2218def VST2LNd8Pseudo  : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2219def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2220def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2221
2222// ...with double-spaced registers:
2223def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
2224  let Inst{7-6} = lane{1-0};
2225  let Inst{4}   = Rn{4};
2226}
2227def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
2228  let Inst{7}   = lane{0};
2229  let Inst{4}   = Rn{4};
2230}
2231
2232def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2233def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2234
2235// ...with address register writeback:
2236class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2237  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2238          (ins addrmode6:$Rn, am6offset:$Rm,
2239           DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
2240          "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
2241          "$Rn.addr = $wb", []> {
2242  let Inst{4}   = Rn{4};
2243  let DecoderMethod = "DecodeVST2LN";
2244}
2245
2246def VST2LNd8_UPD  : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
2247  let Inst{7-5} = lane{2-0};
2248}
2249def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
2250  let Inst{7-6} = lane{1-0};
2251}
2252def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
2253  let Inst{7}   = lane{0};
2254}
2255
2256def VST2LNd8Pseudo_UPD  : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2257def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2258def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2259
2260def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
2261  let Inst{7-6} = lane{1-0};
2262}
2263def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
2264  let Inst{7}   = lane{0};
2265}
2266
2267def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2268def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2269
2270//   VST3LN   : Vector Store (single 3-element structure from one lane)
2271class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2272  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2273          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
2274           nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
2275          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>,
2276    Sched<[WriteVST2]> {
2277  let Rm = 0b1111;
2278  let DecoderMethod = "DecodeVST3LN";
2279}
2280
2281def VST3LNd8  : VST3LN<0b0010, {?,?,?,0}, "8"> {
2282  let Inst{7-5} = lane{2-0};
2283}
2284def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
2285  let Inst{7-6} = lane{1-0};
2286}
2287def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
2288  let Inst{7}   = lane{0};
2289}
2290
2291def VST3LNd8Pseudo  : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2292def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2293def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2294
2295// ...with double-spaced registers:
2296def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
2297  let Inst{7-6} = lane{1-0};
2298}
2299def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
2300  let Inst{7}   = lane{0};
2301}
2302
2303def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2304def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2305
2306// ...with address register writeback:
2307class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2308  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2309          (ins addrmode6:$Rn, am6offset:$Rm,
2310           DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
2311          IIC_VST3lnu, "vst3", Dt,
2312          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
2313          "$Rn.addr = $wb", []> {
2314  let DecoderMethod = "DecodeVST3LN";
2315}
2316
2317def VST3LNd8_UPD  : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
2318  let Inst{7-5} = lane{2-0};
2319}
2320def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
2321  let Inst{7-6} = lane{1-0};
2322}
2323def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
2324  let Inst{7}   = lane{0};
2325}
2326
2327def VST3LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2328def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2329def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2330
2331def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
2332  let Inst{7-6} = lane{1-0};
2333}
2334def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
2335  let Inst{7}   = lane{0};
2336}
2337
2338def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2339def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2340
2341//   VST4LN   : Vector Store (single 4-element structure from one lane)
2342class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2343  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2344          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
2345           nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
2346          "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
2347          "", []>, Sched<[WriteVST2]> {
2348  let Rm = 0b1111;
2349  let Inst{4} = Rn{4};
2350  let DecoderMethod = "DecodeVST4LN";
2351}
2352
2353def VST4LNd8  : VST4LN<0b0011, {?,?,?,?}, "8"> {
2354  let Inst{7-5} = lane{2-0};
2355}
2356def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
2357  let Inst{7-6} = lane{1-0};
2358}
2359def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
2360  let Inst{7}   = lane{0};
2361  let Inst{5} = Rn{5};
2362}
2363
2364def VST4LNd8Pseudo  : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2365def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2366def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2367
2368// ...with double-spaced registers:
2369def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
2370  let Inst{7-6} = lane{1-0};
2371}
2372def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
2373  let Inst{7}   = lane{0};
2374  let Inst{5} = Rn{5};
2375}
2376
2377def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2378def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2379
2380// ...with address register writeback:
2381class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2382  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2383          (ins addrmode6:$Rn, am6offset:$Rm,
2384           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
2385          IIC_VST4lnu, "vst4", Dt,
2386  "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
2387          "$Rn.addr = $wb", []> {
2388  let Inst{4} = Rn{4};
2389  let DecoderMethod = "DecodeVST4LN";
2390}
2391
2392def VST4LNd8_UPD  : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
2393  let Inst{7-5} = lane{2-0};
2394}
2395def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
2396  let Inst{7-6} = lane{1-0};
2397}
2398def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
2399  let Inst{7}   = lane{0};
2400  let Inst{5} = Rn{5};
2401}
2402
2403def VST4LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2404def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2405def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2406
2407def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
2408  let Inst{7-6} = lane{1-0};
2409}
2410def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
2411  let Inst{7}   = lane{0};
2412  let Inst{5} = Rn{5};
2413}
2414
2415def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2416def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2417
2418} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2419
2420// Use vld1/vst1 for unaligned f64 load / store
2421let Predicates = [IsLE,HasNEON] in {
2422def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
2423          (VLD1d16 addrmode6:$addr)>;
2424def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
2425          (VST1d16 addrmode6:$addr, DPR:$value)>;
2426def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
2427          (VLD1d8 addrmode6:$addr)>;
2428def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
2429          (VST1d8 addrmode6:$addr, DPR:$value)>;
2430}
2431let Predicates = [IsBE,HasNEON] in {
2432def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
2433          (VLD1d64 addrmode6:$addr)>;
2434def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
2435          (VST1d64 addrmode6:$addr, DPR:$value)>;
2436}
2437
2438// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
2439// load / store if it's legal.
2440let Predicates = [HasNEON] in {
2441def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
2442          (VLD1q64 addrmode6:$addr)>;
2443def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2444          (VST1q64 addrmode6:$addr, QPR:$value)>;
2445}
2446let Predicates = [IsLE,HasNEON] in {
2447def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
2448          (VLD1q32 addrmode6:$addr)>;
2449def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2450          (VST1q32 addrmode6:$addr, QPR:$value)>;
2451def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
2452          (VLD1q16 addrmode6:$addr)>;
2453def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2454          (VST1q16 addrmode6:$addr, QPR:$value)>;
2455def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
2456          (VLD1q8 addrmode6:$addr)>;
2457def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2458          (VST1q8 addrmode6:$addr, QPR:$value)>;
2459}
2460
2461//===----------------------------------------------------------------------===//
2462// NEON pattern fragments
2463//===----------------------------------------------------------------------===//
2464
2465// Extract D sub-registers of Q registers.
2466def DSubReg_i8_reg  : SDNodeXForm<imm, [{
2467  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2468  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N),
2469                                   MVT::i32);
2470}]>;
2471def DSubReg_i16_reg : SDNodeXForm<imm, [{
2472  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2473  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N),
2474                                   MVT::i32);
2475}]>;
2476def DSubReg_i32_reg : SDNodeXForm<imm, [{
2477  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2478  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N),
2479                                   MVT::i32);
2480}]>;
2481def DSubReg_f64_reg : SDNodeXForm<imm, [{
2482  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2483  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N),
2484                                   MVT::i32);
2485}]>;
2486
2487// Extract S sub-registers of Q/D registers.
2488def SSubReg_f32_reg : SDNodeXForm<imm, [{
2489  assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
2490  return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N),
2491                                   MVT::i32);
2492}]>;
2493
2494// Extract S sub-registers of Q/D registers containing a given f16 lane.
2495def SSubReg_f16_reg : SDNodeXForm<imm, [{
2496  assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
2497  return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue()/2, SDLoc(N),
2498                                   MVT::i32);
2499}]>;
2500
2501// Translate lane numbers from Q registers to D subregs.
2502def SubReg_i8_lane  : SDNodeXForm<imm, [{
2503  return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32);
2504}]>;
2505def SubReg_i16_lane : SDNodeXForm<imm, [{
2506  return CurDAG->getTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32);
2507}]>;
2508def SubReg_i32_lane : SDNodeXForm<imm, [{
2509  return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32);
2510}]>;
2511
2512//===----------------------------------------------------------------------===//
2513// Instruction Classes
2514//===----------------------------------------------------------------------===//
2515
2516// Basic 2-register operations: double- and quad-register.
2517class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2518           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2519           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2520  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2521        (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
2522        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
2523class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2524           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2525           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2526  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2527        (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
2528        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
2529
2530// Basic 2-register intrinsics, both double- and quad-register.
2531class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2532              bits<2> op17_16, bits<5> op11_7, bit op4,
2533              InstrItinClass itin, string OpcodeStr, string Dt,
2534              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2535  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2536        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2537        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2538class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2539              bits<2> op17_16, bits<5> op11_7, bit op4,
2540              InstrItinClass itin, string OpcodeStr, string Dt,
2541              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2542  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2543        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2544        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2545
2546// Same as above, but not predicated.
2547class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
2548              InstrItinClass itin, string OpcodeStr, string Dt,
2549              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2550  : N2Vnp<op19_18, op17_16, op10_8, op7, 0,  (outs DPR:$Vd), (ins DPR:$Vm),
2551          itin, OpcodeStr, Dt,
2552          [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2553
2554class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
2555              InstrItinClass itin, string OpcodeStr, string Dt,
2556              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2557  : N2Vnp<op19_18, op17_16, op10_8, op7, 1,  (outs QPR:$Vd), (ins QPR:$Vm),
2558          itin, OpcodeStr, Dt,
2559          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2560
2561// Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
2562class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2563              bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2564              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2565  : N2Vnp<op19_18, op17_16, op10_8, op7, op6,  (outs QPR:$Vd), (ins QPR:$Vm),
2566          itin, OpcodeStr, Dt,
2567          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2568
2569// Same as N2VQIntXnp but with Vd as a src register.
2570class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2571              bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2572              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2573  : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
2574          (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
2575          itin, OpcodeStr, Dt,
2576          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
2577  let Constraints = "$src = $Vd";
2578}
2579
2580// Narrow 2-register operations.
2581class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2582           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2583           InstrItinClass itin, string OpcodeStr, string Dt,
2584           ValueType TyD, ValueType TyQ, SDNode OpNode>
2585  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2586        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2587        [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
2588
2589// Narrow 2-register intrinsics.
2590class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2591              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2592              InstrItinClass itin, string OpcodeStr, string Dt,
2593              ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
2594  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2595        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2596        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
2597
2598// Long 2-register operations (currently only used for VMOVL).
2599class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2600           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2601           InstrItinClass itin, string OpcodeStr, string Dt,
2602           ValueType TyQ, ValueType TyD, SDNode OpNode>
2603  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2604        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2605        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
2606
2607// Long 2-register intrinsics.
2608class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2609              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2610              InstrItinClass itin, string OpcodeStr, string Dt,
2611              ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
2612  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2613        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2614        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
2615
2616// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
2617class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
2618  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
2619        (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
2620        OpcodeStr, Dt, "$Vd, $Vm",
2621        "$src1 = $Vd, $src2 = $Vm", []>;
2622class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
2623                  InstrItinClass itin, string OpcodeStr, string Dt>
2624  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
2625        (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
2626        "$src1 = $Vd, $src2 = $Vm", []>;
2627
2628// Basic 3-register operations: double- and quad-register.
2629class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2630           InstrItinClass itin, string OpcodeStr, string Dt,
2631           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2632  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2633        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2634        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2635        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2636  // All of these have a two-operand InstAlias.
2637  let TwoOperandAliasConstraint = "$Vn = $Vd";
2638  let isCommutable = Commutable;
2639}
2640// Same as N3VD but no data type.
2641class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2642           InstrItinClass itin, string OpcodeStr,
2643           ValueType ResTy, ValueType OpTy,
2644           SDNode OpNode, bit Commutable>
2645  : N3VX<op24, op23, op21_20, op11_8, 0, op4,
2646         (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2647         OpcodeStr, "$Vd, $Vn, $Vm", "",
2648         [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
2649  // All of these have a two-operand InstAlias.
2650  let TwoOperandAliasConstraint = "$Vn = $Vd";
2651  let isCommutable = Commutable;
2652}
2653
2654class N3VDSL<bits<2> op21_20, bits<4> op11_8,
2655             InstrItinClass itin, string OpcodeStr, string Dt,
2656             ValueType Ty, SDNode ShOp>
2657  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2658        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2659        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2660        [(set (Ty DPR:$Vd),
2661              (Ty (ShOp (Ty DPR:$Vn),
2662                        (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
2663  // All of these have a two-operand InstAlias.
2664  let TwoOperandAliasConstraint = "$Vn = $Vd";
2665  let isCommutable = 0;
2666}
2667class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
2668               string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
2669  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2670        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2671        NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
2672        [(set (Ty DPR:$Vd),
2673              (Ty (ShOp (Ty DPR:$Vn),
2674                        (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2675  // All of these have a two-operand InstAlias.
2676  let TwoOperandAliasConstraint = "$Vn = $Vd";
2677  let isCommutable = 0;
2678}
2679
2680class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2681           InstrItinClass itin, string OpcodeStr, string Dt,
2682           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2683  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2684        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2685        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2686        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2687  // All of these have a two-operand InstAlias.
2688  let TwoOperandAliasConstraint = "$Vn = $Vd";
2689  let isCommutable = Commutable;
2690}
2691class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2692           InstrItinClass itin, string OpcodeStr,
2693           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2694  : N3VX<op24, op23, op21_20, op11_8, 1, op4,
2695         (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2696         OpcodeStr, "$Vd, $Vn, $Vm", "",
2697         [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
2698  // All of these have a two-operand InstAlias.
2699  let TwoOperandAliasConstraint = "$Vn = $Vd";
2700  let isCommutable = Commutable;
2701}
2702class N3VQSL<bits<2> op21_20, bits<4> op11_8,
2703             InstrItinClass itin, string OpcodeStr, string Dt,
2704             ValueType ResTy, ValueType OpTy, SDNode ShOp>
2705  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2706        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2707        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2708        [(set (ResTy QPR:$Vd),
2709              (ResTy (ShOp (ResTy QPR:$Vn),
2710                           (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2711                                                imm:$lane)))))]> {
2712  // All of these have a two-operand InstAlias.
2713  let TwoOperandAliasConstraint = "$Vn = $Vd";
2714  let isCommutable = 0;
2715}
2716class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
2717               ValueType ResTy, ValueType OpTy, SDNode ShOp>
2718  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2719        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2720        NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
2721        [(set (ResTy QPR:$Vd),
2722              (ResTy (ShOp (ResTy QPR:$Vn),
2723                           (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
2724                                                imm:$lane)))))]> {
2725  // All of these have a two-operand InstAlias.
2726  let TwoOperandAliasConstraint = "$Vn = $Vd";
2727  let isCommutable = 0;
2728}
2729
2730// Basic 3-register intrinsics, both double- and quad-register.
2731class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2732              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2733              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2734  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2735        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
2736        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2737        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2738  // All of these have a two-operand InstAlias.
2739  let TwoOperandAliasConstraint = "$Vn = $Vd";
2740  let isCommutable = Commutable;
2741}
2742
2743class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2744                bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2745                string Dt, ValueType ResTy, ValueType OpTy,
2746                SDPatternOperator IntOp, bit Commutable>
2747  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2748          (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
2749          [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2750
2751class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2752                string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2753  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2754        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2755        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2756        [(set (Ty DPR:$Vd),
2757              (Ty (IntOp (Ty DPR:$Vn),
2758                         (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
2759                                           imm:$lane)))))]> {
2760  let isCommutable = 0;
2761}
2762
2763class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2764                  string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2765  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2766        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2767        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2768        [(set (Ty DPR:$Vd),
2769              (Ty (IntOp (Ty DPR:$Vn),
2770                         (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2771  let isCommutable = 0;
2772}
2773class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2774              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2775              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2776  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2777        (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
2778        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2779        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
2780  let TwoOperandAliasConstraint = "$Vm = $Vd";
2781  let isCommutable = 0;
2782}
2783
2784class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2785              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2786              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2787  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2788        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
2789        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2790        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2791  // All of these have a two-operand InstAlias.
2792  let TwoOperandAliasConstraint = "$Vn = $Vd";
2793  let isCommutable = Commutable;
2794}
2795
2796class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2797                bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2798                string Dt, ValueType ResTy, ValueType OpTy,
2799                SDPatternOperator IntOp, bit Commutable>
2800  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2801          (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
2802          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2803
2804// Same as N3VQIntnp but with Vd as a src register.
2805class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2806                bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2807                string Dt, ValueType ResTy, ValueType OpTy,
2808                SDPatternOperator IntOp, bit Commutable>
2809  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2810          (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
2811          f, itin, OpcodeStr, Dt,
2812          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
2813                                       (OpTy QPR:$Vm))))]> {
2814  let Constraints = "$src = $Vd";
2815}
2816
2817class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2818                string OpcodeStr, string Dt,
2819                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2820  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2821        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2822        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2823        [(set (ResTy QPR:$Vd),
2824              (ResTy (IntOp (ResTy QPR:$Vn),
2825                            (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2826                                                 imm:$lane)))))]> {
2827  let isCommutable = 0;
2828}
2829class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2830                  string OpcodeStr, string Dt,
2831                  ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2832  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2833        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2834        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2835        [(set (ResTy QPR:$Vd),
2836              (ResTy (IntOp (ResTy QPR:$Vn),
2837                            (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
2838                                                 imm:$lane)))))]> {
2839  let isCommutable = 0;
2840}
2841class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2842              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2843              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2844  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2845        (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
2846        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2847        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
2848  let TwoOperandAliasConstraint = "$Vm = $Vd";
2849  let isCommutable = 0;
2850}
2851
2852// Multiply-Add/Sub operations: double- and quad-register.
2853class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2854                InstrItinClass itin, string OpcodeStr, string Dt,
2855                ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
2856  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2857        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2858        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2859        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2860                             (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
2861
2862class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2863                  string OpcodeStr, string Dt,
2864                  ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2865  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2866        (outs DPR:$Vd),
2867        (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2868        NVMulSLFrm, itin,
2869        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2870        [(set (Ty DPR:$Vd),
2871              (Ty (ShOp (Ty DPR:$src1),
2872                        (Ty (MulOp DPR:$Vn,
2873                                   (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
2874                                                     imm:$lane)))))))]>;
2875class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2876                    string OpcodeStr, string Dt,
2877                    ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2878  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2879        (outs DPR:$Vd),
2880        (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2881        NVMulSLFrm, itin,
2882        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2883        [(set (Ty DPR:$Vd),
2884              (Ty (ShOp (Ty DPR:$src1),
2885                        (Ty (MulOp DPR:$Vn,
2886                                   (Ty (ARMvduplane (Ty DPR_8:$Vm),
2887                                                     imm:$lane)))))))]>;
2888
2889class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2890                InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
2891                SDPatternOperator MulOp, SDPatternOperator OpNode>
2892  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2893        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2894        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2895        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2896                             (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
2897class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2898                  string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2899                  SDPatternOperator MulOp, SDPatternOperator ShOp>
2900  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2901        (outs QPR:$Vd),
2902        (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2903        NVMulSLFrm, itin,
2904        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2905        [(set (ResTy QPR:$Vd),
2906              (ResTy (ShOp (ResTy QPR:$src1),
2907                           (ResTy (MulOp QPR:$Vn,
2908                                   (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2909                                                        imm:$lane)))))))]>;
2910class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2911                    string OpcodeStr, string Dt,
2912                    ValueType ResTy, ValueType OpTy,
2913                    SDPatternOperator MulOp, SDPatternOperator ShOp>
2914  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2915        (outs QPR:$Vd),
2916        (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2917        NVMulSLFrm, itin,
2918        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2919        [(set (ResTy QPR:$Vd),
2920              (ResTy (ShOp (ResTy QPR:$src1),
2921                           (ResTy (MulOp QPR:$Vn,
2922                                   (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
2923                                                        imm:$lane)))))))]>;
2924
2925// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
2926class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2927                InstrItinClass itin, string OpcodeStr, string Dt,
2928                ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2929  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2930        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2931        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2932        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2933                             (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
2934class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2935                InstrItinClass itin, string OpcodeStr, string Dt,
2936                ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2937  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2938        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2939        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2940        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2941                             (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
2942
2943// Neon 3-argument intrinsics, both double- and quad-register.
2944// The destination register is also used as the first source operand register.
2945class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2946               InstrItinClass itin, string OpcodeStr, string Dt,
2947               ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2948  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2949        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2950        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2951        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
2952                                      (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2953class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2954               InstrItinClass itin, string OpcodeStr, string Dt,
2955               ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2956  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2957        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2958        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2959        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
2960                                      (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2961
2962// Long Multiply-Add/Sub operations.
2963class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2964                InstrItinClass itin, string OpcodeStr, string Dt,
2965                ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2966  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2967        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2968        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2969        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2970                                (TyQ (MulOp (TyD DPR:$Vn),
2971                                            (TyD DPR:$Vm)))))]>;
2972class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2973                  InstrItinClass itin, string OpcodeStr, string Dt,
2974                  ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2975  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2976        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2977        NVMulSLFrm, itin,
2978        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2979        [(set QPR:$Vd,
2980          (OpNode (TyQ QPR:$src1),
2981                  (TyQ (MulOp (TyD DPR:$Vn),
2982                              (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),
2983                                                 imm:$lane))))))]>;
2984class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2985                    InstrItinClass itin, string OpcodeStr, string Dt,
2986                    ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2987  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2988        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2989        NVMulSLFrm, itin,
2990        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2991        [(set QPR:$Vd,
2992          (OpNode (TyQ QPR:$src1),
2993                  (TyQ (MulOp (TyD DPR:$Vn),
2994                              (TyD (ARMvduplane (TyD DPR_8:$Vm),
2995                                                 imm:$lane))))))]>;
2996
2997// Long Intrinsic-Op vector operations with explicit extend (VABAL).
2998class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2999                   InstrItinClass itin, string OpcodeStr, string Dt,
3000                   ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
3001                   SDNode OpNode>
3002  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3003        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3004        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
3005        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
3006                                (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
3007                                                        (TyD DPR:$Vm)))))))]>;
3008
3009// Neon Long 3-argument intrinsic.  The destination register is
3010// a quad-register and is also used as the first source operand register.
3011class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3012               InstrItinClass itin, string OpcodeStr, string Dt,
3013               ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
3014  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3015        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3016        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
3017        [(set QPR:$Vd,
3018          (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
3019class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
3020                 string OpcodeStr, string Dt,
3021                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3022  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3023        (outs QPR:$Vd),
3024        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3025        NVMulSLFrm, itin,
3026        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
3027        [(set (ResTy QPR:$Vd),
3028              (ResTy (IntOp (ResTy QPR:$src1),
3029                            (OpTy DPR:$Vn),
3030                            (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
3031                                                imm:$lane)))))]>;
3032class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3033                   InstrItinClass itin, string OpcodeStr, string Dt,
3034                   ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3035  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3036        (outs QPR:$Vd),
3037        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3038        NVMulSLFrm, itin,
3039        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
3040        [(set (ResTy QPR:$Vd),
3041              (ResTy (IntOp (ResTy QPR:$src1),
3042                            (OpTy DPR:$Vn),
3043                            (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
3044                                                imm:$lane)))))]>;
3045
3046// Narrowing 3-register intrinsics.
3047class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3048              string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
3049              SDPatternOperator IntOp, bit Commutable>
3050  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3051        (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
3052        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3053        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
3054  let isCommutable = Commutable;
3055}
3056
3057// Long 3-register operations.
3058class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3059           InstrItinClass itin, string OpcodeStr, string Dt,
3060           ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
3061  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3062        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3063        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3064        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
3065  let isCommutable = Commutable;
3066}
3067
3068class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
3069             InstrItinClass itin, string OpcodeStr, string Dt,
3070             ValueType TyQ, ValueType TyD, SDNode OpNode>
3071  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3072        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3073        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3074        [(set QPR:$Vd,
3075          (TyQ (OpNode (TyD DPR:$Vn),
3076                       (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
3077class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3078               InstrItinClass itin, string OpcodeStr, string Dt,
3079               ValueType TyQ, ValueType TyD, SDNode OpNode>
3080  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3081        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3082        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3083        [(set QPR:$Vd,
3084          (TyQ (OpNode (TyD DPR:$Vn),
3085                       (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
3086
3087// Long 3-register operations with explicitly extended operands.
3088class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3089              InstrItinClass itin, string OpcodeStr, string Dt,
3090              ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
3091              bit Commutable>
3092  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3093        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3094        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3095        [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
3096                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3097  let isCommutable = Commutable;
3098}
3099
3100// Long 3-register intrinsics with explicit extend (VABDL).
3101class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3102                 InstrItinClass itin, string OpcodeStr, string Dt,
3103                 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
3104                 bit Commutable>
3105  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3106        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3107        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3108        [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
3109                                                (TyD DPR:$Vm))))))]> {
3110  let isCommutable = Commutable;
3111}
3112
3113// Long 3-register intrinsics.
3114class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3115              InstrItinClass itin, string OpcodeStr, string Dt,
3116              ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
3117  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3118        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3119        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3120        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
3121  let isCommutable = Commutable;
3122}
3123
3124// Same as above, but not predicated.
3125class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
3126                bit op4, InstrItinClass itin, string OpcodeStr,
3127                string Dt, ValueType ResTy, ValueType OpTy,
3128                SDPatternOperator IntOp, bit Commutable>
3129  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
3130          (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
3131          [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
3132
3133class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
3134                string OpcodeStr, string Dt,
3135                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3136  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3137        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3138        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3139        [(set (ResTy QPR:$Vd),
3140              (ResTy (IntOp (OpTy DPR:$Vn),
3141                            (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
3142                                                imm:$lane)))))]>;
3143class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3144                  InstrItinClass itin, string OpcodeStr, string Dt,
3145                  ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3146  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3147        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3148        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3149        [(set (ResTy QPR:$Vd),
3150              (ResTy (IntOp (OpTy DPR:$Vn),
3151                            (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
3152                                                imm:$lane)))))]>;
3153
3154// Wide 3-register operations.
3155class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3156           string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
3157           SDNode OpNode, SDNode ExtOp, bit Commutable>
3158  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3159        (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
3160        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3161        [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
3162                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3163  // All of these have a two-operand InstAlias.
3164  let TwoOperandAliasConstraint = "$Vn = $Vd";
3165  let isCommutable = Commutable;
3166}
3167
3168// Pairwise long 2-register intrinsics, both double- and quad-register.
3169class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3170                bits<2> op17_16, bits<5> op11_7, bit op4,
3171                string OpcodeStr, string Dt,
3172                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3173  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
3174        (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3175        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
3176class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3177                bits<2> op17_16, bits<5> op11_7, bit op4,
3178                string OpcodeStr, string Dt,
3179                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3180  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
3181        (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3182        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
3183
3184// Pairwise long 2-register accumulate intrinsics,
3185// both double- and quad-register.
3186// The destination register is also used as the first source operand register.
3187class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3188                 bits<2> op17_16, bits<5> op11_7, bit op4,
3189                 string OpcodeStr, string Dt,
3190                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3191  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
3192        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
3193        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3194        [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
3195class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3196                 bits<2> op17_16, bits<5> op11_7, bit op4,
3197                 string OpcodeStr, string Dt,
3198                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3199  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
3200        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
3201        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3202        [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
3203
3204// Shift by immediate,
3205// both double- and quad-register.
3206let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3207class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3208             Format f, InstrItinClass itin, Operand ImmTy,
3209             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3210  : N2VImm<op24, op23, op11_8, op7, 0, op4,
3211           (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
3212           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3213           [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
3214class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3215             Format f, InstrItinClass itin, Operand ImmTy,
3216             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3217  : N2VImm<op24, op23, op11_8, op7, 1, op4,
3218           (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
3219           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3220           [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
3221}
3222
3223// Long shift by immediate.
3224class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3225             string OpcodeStr, string Dt,
3226             ValueType ResTy, ValueType OpTy, Operand ImmTy,
3227             SDPatternOperator OpNode>
3228  : N2VImm<op24, op23, op11_8, op7, op6, op4,
3229           (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
3230           IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3231           [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>;
3232
3233// Narrow shift by immediate.
3234class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3235             InstrItinClass itin, string OpcodeStr, string Dt,
3236             ValueType ResTy, ValueType OpTy, Operand ImmTy,
3237             SDPatternOperator OpNode>
3238  : N2VImm<op24, op23, op11_8, op7, op6, op4,
3239           (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
3240           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3241           [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
3242                                          (i32 ImmTy:$SIMM))))]>;
3243
3244// Shift right by immediate and accumulate,
3245// both double- and quad-register.
3246let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3247class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3248                Operand ImmTy, string OpcodeStr, string Dt,
3249                ValueType Ty, SDNode ShOp>
3250  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3251           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3252           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3253           [(set DPR:$Vd, (Ty (add DPR:$src1,
3254                                (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
3255class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3256                Operand ImmTy, string OpcodeStr, string Dt,
3257                ValueType Ty, SDNode ShOp>
3258  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3259           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3260           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3261           [(set QPR:$Vd, (Ty (add QPR:$src1,
3262                                (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
3263}
3264
3265// Shift by immediate and insert,
3266// both double- and quad-register.
3267let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3268class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3269                Operand ImmTy, Format f, string OpcodeStr, string Dt,
3270                ValueType Ty,SDNode ShOp>
3271  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3272           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
3273           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3274           [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
3275class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3276                Operand ImmTy, Format f, string OpcodeStr, string Dt,
3277                ValueType Ty,SDNode ShOp>
3278  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3279           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
3280           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3281           [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
3282}
3283
3284// Convert, with fractional bits immediate,
3285// both double- and quad-register.
3286class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3287              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3288              SDPatternOperator IntOp>
3289  : N2VImm<op24, op23, op11_8, op7, 0, op4,
3290           (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3291           IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3292           [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
3293class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3294              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3295              SDPatternOperator IntOp>
3296  : N2VImm<op24, op23, op11_8, op7, 1, op4,
3297           (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3298           IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3299           [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
3300
3301//===----------------------------------------------------------------------===//
3302// Multiclasses
3303//===----------------------------------------------------------------------===//
3304
3305// Abbreviations used in multiclass suffixes:
3306//   Q = quarter int (8 bit) elements
3307//   H = half int (16 bit) elements
3308//   S = single int (32 bit) elements
3309//   D = double int (64 bit) elements
3310
3311// Neon 2-register vector operations and intrinsics.
3312
3313// Neon 2-register comparisons.
3314//   source operand element sizes of 8, 16 and 32 bits:
3315multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3316                       bits<5> op11_7, bit op4, string opc, string Dt,
3317                       string asm, PatFrag fc> {
3318  // 64-bit vector types.
3319  def v8i8  : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
3320                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3321                  opc, !strconcat(Dt, "8"), asm, "",
3322                  [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), fc)))]>;
3323  def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3324                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3325                  opc, !strconcat(Dt, "16"), asm, "",
3326                  [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), fc)))]>;
3327  def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3328                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3329                  opc, !strconcat(Dt, "32"), asm, "",
3330                  [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), fc)))]>;
3331  def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3332                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3333                  opc, "f32", asm, "",
3334                  [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), fc)))]> {
3335    let Inst{10} = 1; // overwrite F = 1
3336  }
3337  def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3338                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3339                  opc, "f16", asm, "",
3340                  [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), fc)))]>,
3341              Requires<[HasNEON,HasFullFP16]> {
3342    let Inst{10} = 1; // overwrite F = 1
3343  }
3344
3345  // 128-bit vector types.
3346  def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
3347                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3348                  opc, !strconcat(Dt, "8"), asm, "",
3349                  [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), fc)))]>;
3350  def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3351                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3352                  opc, !strconcat(Dt, "16"), asm, "",
3353                  [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), fc)))]>;
3354  def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3355                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3356                  opc, !strconcat(Dt, "32"), asm, "",
3357                  [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), fc)))]>;
3358  def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3359                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3360                  opc, "f32", asm, "",
3361                  [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), fc)))]> {
3362    let Inst{10} = 1; // overwrite F = 1
3363  }
3364  def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3365                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3366                  opc, "f16", asm, "",
3367                  [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), fc)))]>,
3368              Requires<[HasNEON,HasFullFP16]> {
3369    let Inst{10} = 1; // overwrite F = 1
3370  }
3371}
3372
3373// Neon 3-register comparisons.
3374class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3375               InstrItinClass itin, string OpcodeStr, string Dt,
3376               ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable>
3377  : N3V<op24, op23, op21_20, op11_8, 1, op4,
3378        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
3379        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3380        [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), fc)))]> {
3381  // All of these have a two-operand InstAlias.
3382  let TwoOperandAliasConstraint = "$Vn = $Vd";
3383  let isCommutable = Commutable;
3384}
3385
3386class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3387               InstrItinClass itin, string OpcodeStr, string Dt,
3388               ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable>
3389  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3390        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3391        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3392        [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), fc)))]> {
3393  // All of these have a two-operand InstAlias.
3394  let TwoOperandAliasConstraint = "$Vn = $Vd";
3395  let isCommutable = Commutable;
3396}
3397
3398multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4,
3399                       InstrItinClass itinD16, InstrItinClass itinD32,
3400                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3401                       string OpcodeStr, string Dt,
3402                       PatFrag fc, bit Commutable = 0> {
3403  // 64-bit vector types.
3404  def v8i8  : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16,
3405                       OpcodeStr, !strconcat(Dt, "8"),
3406                       v8i8, v8i8, fc, Commutable>;
3407  def v4i16 : N3VD_cmp<op24, op23, 0b01, op11_8, op4, itinD16,
3408                       OpcodeStr, !strconcat(Dt, "16"),
3409                       v4i16, v4i16, fc, Commutable>;
3410  def v2i32 : N3VD_cmp<op24, op23, 0b10, op11_8, op4, itinD32,
3411                       OpcodeStr, !strconcat(Dt, "32"),
3412                       v2i32, v2i32, fc, Commutable>;
3413
3414  // 128-bit vector types.
3415  def v16i8 : N3VQ_cmp<op24, op23, 0b00, op11_8, op4, itinQ16,
3416                       OpcodeStr, !strconcat(Dt, "8"),
3417                       v16i8, v16i8, fc, Commutable>;
3418  def v8i16 : N3VQ_cmp<op24, op23, 0b01, op11_8, op4, itinQ16,
3419                       OpcodeStr, !strconcat(Dt, "16"),
3420                       v8i16, v8i16, fc, Commutable>;
3421  def v4i32 : N3VQ_cmp<op24, op23, 0b10, op11_8, op4, itinQ32,
3422                       OpcodeStr, !strconcat(Dt, "32"),
3423                       v4i32, v4i32, fc, Commutable>;
3424}
3425
3426
3427// Neon 2-register vector intrinsics,
3428//   element sizes of 8, 16 and 32 bits:
3429multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3430                      bits<5> op11_7, bit op4,
3431                      InstrItinClass itinD, InstrItinClass itinQ,
3432                      string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3433  // 64-bit vector types.
3434  def v8i8  : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3435                      itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3436  def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3437                      itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
3438  def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3439                      itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
3440
3441  // 128-bit vector types.
3442  def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3443                      itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
3444  def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3445                      itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
3446  def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3447                      itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
3448}
3449
3450
3451// Neon Narrowing 2-register vector operations,
3452//   source operand element sizes of 16, 32 and 64 bits:
3453multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3454                    bits<5> op11_7, bit op6, bit op4,
3455                    InstrItinClass itin, string OpcodeStr, string Dt,
3456                    SDNode OpNode> {
3457  def v8i8  : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3458                   itin, OpcodeStr, !strconcat(Dt, "16"),
3459                   v8i8, v8i16, OpNode>;
3460  def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3461                   itin, OpcodeStr, !strconcat(Dt, "32"),
3462                   v4i16, v4i32, OpNode>;
3463  def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3464                   itin, OpcodeStr, !strconcat(Dt, "64"),
3465                   v2i32, v2i64, OpNode>;
3466}
3467
3468// Neon Narrowing 2-register vector intrinsics,
3469//   source operand element sizes of 16, 32 and 64 bits:
3470multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3471                       bits<5> op11_7, bit op6, bit op4,
3472                       InstrItinClass itin, string OpcodeStr, string Dt,
3473                       SDPatternOperator IntOp> {
3474  def v8i8  : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3475                      itin, OpcodeStr, !strconcat(Dt, "16"),
3476                      v8i8, v8i16, IntOp>;
3477  def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3478                      itin, OpcodeStr, !strconcat(Dt, "32"),
3479                      v4i16, v4i32, IntOp>;
3480  def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3481                      itin, OpcodeStr, !strconcat(Dt, "64"),
3482                      v2i32, v2i64, IntOp>;
3483}
3484
3485
3486// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
3487//   source operand element sizes of 16, 32 and 64 bits:
3488multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
3489                    string OpcodeStr, string Dt, SDNode OpNode> {
3490  def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3491                   OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
3492  def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3493                   OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3494  def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3495                   OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3496}
3497
3498
3499// Neon 3-register vector operations.
3500
3501// First with only element sizes of 8, 16 and 32 bits:
3502multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3503                   InstrItinClass itinD16, InstrItinClass itinD32,
3504                   InstrItinClass itinQ16, InstrItinClass itinQ32,
3505                   string OpcodeStr, string Dt,
3506                   SDNode OpNode, bit Commutable = 0> {
3507  // 64-bit vector types.
3508  def v8i8  : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
3509                   OpcodeStr, !strconcat(Dt, "8"),
3510                   v8i8, v8i8, OpNode, Commutable>;
3511  def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
3512                   OpcodeStr, !strconcat(Dt, "16"),
3513                   v4i16, v4i16, OpNode, Commutable>;
3514  def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
3515                   OpcodeStr, !strconcat(Dt, "32"),
3516                   v2i32, v2i32, OpNode, Commutable>;
3517
3518  // 128-bit vector types.
3519  def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
3520                   OpcodeStr, !strconcat(Dt, "8"),
3521                   v16i8, v16i8, OpNode, Commutable>;
3522  def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
3523                   OpcodeStr, !strconcat(Dt, "16"),
3524                   v8i16, v8i16, OpNode, Commutable>;
3525  def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
3526                   OpcodeStr, !strconcat(Dt, "32"),
3527                   v4i32, v4i32, OpNode, Commutable>;
3528}
3529
3530multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
3531  def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
3532  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
3533  def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
3534  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
3535                     v4i32, v2i32, ShOp>;
3536}
3537
3538// ....then also with element size 64 bits:
3539multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3540                    InstrItinClass itinD, InstrItinClass itinQ,
3541                    string OpcodeStr, string Dt,
3542                    SDNode OpNode, bit Commutable = 0>
3543  : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
3544            OpcodeStr, Dt, OpNode, Commutable> {
3545  def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
3546                   OpcodeStr, !strconcat(Dt, "64"),
3547                   v1i64, v1i64, OpNode, Commutable>;
3548  def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
3549                   OpcodeStr, !strconcat(Dt, "64"),
3550                   v2i64, v2i64, OpNode, Commutable>;
3551}
3552
3553
3554// Neon 3-register vector intrinsics.
3555
3556// First with only element sizes of 16 and 32 bits:
3557multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3558                     InstrItinClass itinD16, InstrItinClass itinD32,
3559                     InstrItinClass itinQ16, InstrItinClass itinQ32,
3560                     string OpcodeStr, string Dt,
3561                     SDPatternOperator IntOp, bit Commutable = 0> {
3562  // 64-bit vector types.
3563  def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
3564                      OpcodeStr, !strconcat(Dt, "16"),
3565                      v4i16, v4i16, IntOp, Commutable>;
3566  def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
3567                      OpcodeStr, !strconcat(Dt, "32"),
3568                      v2i32, v2i32, IntOp, Commutable>;
3569
3570  // 128-bit vector types.
3571  def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3572                      OpcodeStr, !strconcat(Dt, "16"),
3573                      v8i16, v8i16, IntOp, Commutable>;
3574  def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3575                      OpcodeStr, !strconcat(Dt, "32"),
3576                      v4i32, v4i32, IntOp, Commutable>;
3577}
3578multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3579                     InstrItinClass itinD16, InstrItinClass itinD32,
3580                     InstrItinClass itinQ16, InstrItinClass itinQ32,
3581                     string OpcodeStr, string Dt,
3582                     SDPatternOperator IntOp> {
3583  // 64-bit vector types.
3584  def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
3585                      OpcodeStr, !strconcat(Dt, "16"),
3586                      v4i16, v4i16, IntOp>;
3587  def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
3588                      OpcodeStr, !strconcat(Dt, "32"),
3589                      v2i32, v2i32, IntOp>;
3590
3591  // 128-bit vector types.
3592  def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3593                      OpcodeStr, !strconcat(Dt, "16"),
3594                      v8i16, v8i16, IntOp>;
3595  def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3596                      OpcodeStr, !strconcat(Dt, "32"),
3597                      v4i32, v4i32, IntOp>;
3598}
3599
3600multiclass N3VIntSL_HS<bits<4> op11_8,
3601                       InstrItinClass itinD16, InstrItinClass itinD32,
3602                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3603                       string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3604  def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
3605                          OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
3606  def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
3607                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
3608  def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
3609                          OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
3610  def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
3611                        OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
3612}
3613
3614// ....then also with element size of 8 bits:
3615multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3616                      InstrItinClass itinD16, InstrItinClass itinD32,
3617                      InstrItinClass itinQ16, InstrItinClass itinQ32,
3618                      string OpcodeStr, string Dt,
3619                      SDPatternOperator IntOp, bit Commutable = 0>
3620  : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3621              OpcodeStr, Dt, IntOp, Commutable> {
3622  def v8i8  : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
3623                      OpcodeStr, !strconcat(Dt, "8"),
3624                      v8i8, v8i8, IntOp, Commutable>;
3625  def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3626                      OpcodeStr, !strconcat(Dt, "8"),
3627                      v16i8, v16i8, IntOp, Commutable>;
3628}
3629multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3630                      InstrItinClass itinD16, InstrItinClass itinD32,
3631                      InstrItinClass itinQ16, InstrItinClass itinQ32,
3632                      string OpcodeStr, string Dt,
3633                      SDPatternOperator IntOp>
3634  : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3635              OpcodeStr, Dt, IntOp> {
3636  def v8i8  : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
3637                      OpcodeStr, !strconcat(Dt, "8"),
3638                      v8i8, v8i8, IntOp>;
3639  def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3640                      OpcodeStr, !strconcat(Dt, "8"),
3641                      v16i8, v16i8, IntOp>;
3642}
3643
3644
3645// ....then also with element size of 64 bits:
3646multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3647                       InstrItinClass itinD16, InstrItinClass itinD32,
3648                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3649                       string OpcodeStr, string Dt,
3650                       SDPatternOperator IntOp, bit Commutable = 0>
3651  : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3652               OpcodeStr, Dt, IntOp, Commutable> {
3653  def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
3654                      OpcodeStr, !strconcat(Dt, "64"),
3655                      v1i64, v1i64, IntOp, Commutable>;
3656  def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3657                      OpcodeStr, !strconcat(Dt, "64"),
3658                      v2i64, v2i64, IntOp, Commutable>;
3659}
3660multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3661                       InstrItinClass itinD16, InstrItinClass itinD32,
3662                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3663                       string OpcodeStr, string Dt,
3664                       SDPatternOperator IntOp>
3665  : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3666               OpcodeStr, Dt, IntOp> {
3667  def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
3668                      OpcodeStr, !strconcat(Dt, "64"),
3669                      v1i64, v1i64, IntOp>;
3670  def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3671                      OpcodeStr, !strconcat(Dt, "64"),
3672                      v2i64, v2i64, IntOp>;
3673}
3674
3675// Neon Narrowing 3-register vector intrinsics,
3676//   source operand element sizes of 16, 32 and 64 bits:
3677multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3678                       string OpcodeStr, string Dt,
3679                       SDPatternOperator IntOp, bit Commutable = 0> {
3680  def v8i8  : N3VNInt<op24, op23, 0b00, op11_8, op4,
3681                      OpcodeStr, !strconcat(Dt, "16"),
3682                      v8i8, v8i16, IntOp, Commutable>;
3683  def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
3684                      OpcodeStr, !strconcat(Dt, "32"),
3685                      v4i16, v4i32, IntOp, Commutable>;
3686  def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
3687                      OpcodeStr, !strconcat(Dt, "64"),
3688                      v2i32, v2i64, IntOp, Commutable>;
3689}
3690
3691
3692// Neon Long 3-register vector operations.
3693
3694multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3695                    InstrItinClass itin16, InstrItinClass itin32,
3696                    string OpcodeStr, string Dt,
3697                    SDNode OpNode, bit Commutable = 0> {
3698  def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
3699                   OpcodeStr, !strconcat(Dt, "8"),
3700                   v8i16, v8i8, OpNode, Commutable>;
3701  def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
3702                   OpcodeStr, !strconcat(Dt, "16"),
3703                   v4i32, v4i16, OpNode, Commutable>;
3704  def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
3705                   OpcodeStr, !strconcat(Dt, "32"),
3706                   v2i64, v2i32, OpNode, Commutable>;
3707}
3708
3709multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
3710                     InstrItinClass itin, string OpcodeStr, string Dt,
3711                     SDNode OpNode> {
3712  def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
3713                       !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3714  def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
3715                     !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3716}
3717
3718multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3719                       InstrItinClass itin16, InstrItinClass itin32,
3720                       string OpcodeStr, string Dt,
3721                       SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3722  def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
3723                      OpcodeStr, !strconcat(Dt, "8"),
3724                      v8i16, v8i8, OpNode, ExtOp, Commutable>;
3725  def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
3726                      OpcodeStr, !strconcat(Dt, "16"),
3727                      v4i32, v4i16, OpNode, ExtOp, Commutable>;
3728  def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
3729                      OpcodeStr, !strconcat(Dt, "32"),
3730                      v2i64, v2i32, OpNode, ExtOp, Commutable>;
3731}
3732
3733// Neon Long 3-register vector intrinsics.
3734
3735// First with only element sizes of 16 and 32 bits:
3736multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3737                      InstrItinClass itin16, InstrItinClass itin32,
3738                      string OpcodeStr, string Dt,
3739                      SDPatternOperator IntOp, bit Commutable = 0> {
3740  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
3741                      OpcodeStr, !strconcat(Dt, "16"),
3742                      v4i32, v4i16, IntOp, Commutable>;
3743  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
3744                      OpcodeStr, !strconcat(Dt, "32"),
3745                      v2i64, v2i32, IntOp, Commutable>;
3746}
3747
3748multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
3749                        InstrItinClass itin, string OpcodeStr, string Dt,
3750                        SDPatternOperator IntOp> {
3751  def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
3752                          OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3753  def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
3754                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3755}
3756
3757// ....then also with element size of 8 bits:
3758multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3759                       InstrItinClass itin16, InstrItinClass itin32,
3760                       string OpcodeStr, string Dt,
3761                       SDPatternOperator IntOp, bit Commutable = 0>
3762  : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
3763               IntOp, Commutable> {
3764  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
3765                      OpcodeStr, !strconcat(Dt, "8"),
3766                      v8i16, v8i8, IntOp, Commutable>;
3767}
3768
3769// ....with explicit extend (VABDL).
3770multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3771                       InstrItinClass itin, string OpcodeStr, string Dt,
3772                       SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
3773  def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
3774                         OpcodeStr, !strconcat(Dt, "8"),
3775                         v8i16, v8i8, IntOp, ExtOp, Commutable>;
3776  def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
3777                         OpcodeStr, !strconcat(Dt, "16"),
3778                         v4i32, v4i16, IntOp, ExtOp, Commutable>;
3779  def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
3780                         OpcodeStr, !strconcat(Dt, "32"),
3781                         v2i64, v2i32, IntOp, ExtOp, Commutable>;
3782}
3783
3784
3785// Neon Wide 3-register vector intrinsics,
3786//   source operand element sizes of 8, 16 and 32 bits:
3787multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3788                    string OpcodeStr, string Dt,
3789                    SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3790  def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
3791                   OpcodeStr, !strconcat(Dt, "8"),
3792                   v8i16, v8i8, OpNode, ExtOp, Commutable>;
3793  def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
3794                   OpcodeStr, !strconcat(Dt, "16"),
3795                   v4i32, v4i16, OpNode, ExtOp, Commutable>;
3796  def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
3797                   OpcodeStr, !strconcat(Dt, "32"),
3798                   v2i64, v2i32, OpNode, ExtOp, Commutable>;
3799}
3800
3801
3802// Neon Multiply-Op vector operations,
3803//   element sizes of 8, 16 and 32 bits:
3804multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3805                        InstrItinClass itinD16, InstrItinClass itinD32,
3806                        InstrItinClass itinQ16, InstrItinClass itinQ32,
3807                        string OpcodeStr, string Dt, SDNode OpNode> {
3808  // 64-bit vector types.
3809  def v8i8  : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
3810                        OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
3811  def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
3812                        OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
3813  def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
3814                        OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
3815
3816  // 128-bit vector types.
3817  def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
3818                        OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
3819  def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
3820                        OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
3821  def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
3822                        OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
3823}
3824
3825multiclass N3VMulOpSL_HS<bits<4> op11_8,
3826                         InstrItinClass itinD16, InstrItinClass itinD32,
3827                         InstrItinClass itinQ16, InstrItinClass itinQ32,
3828                         string OpcodeStr, string Dt, SDPatternOperator ShOp> {
3829  def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
3830                            OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
3831  def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
3832                          OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
3833  def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
3834                            OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
3835                            mul, ShOp>;
3836  def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
3837                          OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
3838                          mul, ShOp>;
3839}
3840
3841// Neon Intrinsic-Op vector operations,
3842//   element sizes of 8, 16 and 32 bits:
3843multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3844                        InstrItinClass itinD, InstrItinClass itinQ,
3845                        string OpcodeStr, string Dt, SDPatternOperator IntOp,
3846                        SDNode OpNode> {
3847  // 64-bit vector types.
3848  def v8i8  : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
3849                        OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
3850  def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
3851                        OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
3852  def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
3853                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
3854
3855  // 128-bit vector types.
3856  def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
3857                        OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
3858  def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
3859                        OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
3860  def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
3861                        OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
3862}
3863
3864// Neon 3-argument intrinsics,
3865//   element sizes of 16 and 32 bits:
3866multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3867                       InstrItinClass itinD16, InstrItinClass itinD32,
3868                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3869                       string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3870  // 64-bit vector types.
3871  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
3872                       OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
3873  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
3874                       OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
3875
3876  // 128-bit vector types.
3877  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
3878                       OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
3879  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
3880                       OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
3881}
3882
3883//   element sizes of 8, 16 and 32 bits:
3884multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3885                       InstrItinClass itinD16, InstrItinClass itinD32,
3886                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3887                       string OpcodeStr, string Dt, SDPatternOperator IntOp>
3888           :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
3889                        itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
3890  // 64-bit vector types.
3891  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
3892                       OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3893  // 128-bit vector types.
3894  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
3895                       OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
3896}
3897
3898// Neon Long Multiply-Op vector operations,
3899//   element sizes of 8, 16 and 32 bits:
3900multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3901                         InstrItinClass itin16, InstrItinClass itin32,
3902                         string OpcodeStr, string Dt, SDNode MulOp,
3903                         SDNode OpNode> {
3904  def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
3905                        !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
3906  def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
3907                        !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
3908  def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
3909                        !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3910}
3911
3912multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
3913                          string Dt, SDNode MulOp, SDNode OpNode> {
3914  def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
3915                            !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
3916  def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
3917                          !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3918}
3919
3920
3921// Neon Long 3-argument intrinsics.
3922
3923// First with only element sizes of 16 and 32 bits:
3924multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3925                       InstrItinClass itin16, InstrItinClass itin32,
3926                       string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3927  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
3928                       OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3929  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
3930                       OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3931}
3932
3933multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
3934                         string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3935  def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
3936                           OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
3937  def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
3938                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3939}
3940
3941// ....then also with element size of 8 bits:
3942multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3943                        InstrItinClass itin16, InstrItinClass itin32,
3944                        string OpcodeStr, string Dt, SDPatternOperator IntOp>
3945  : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
3946  def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
3947                       OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
3948}
3949
3950// ....with explicit extend (VABAL).
3951multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3952                            InstrItinClass itin, string OpcodeStr, string Dt,
3953                            SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
3954  def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
3955                           OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
3956                           IntOp, ExtOp, OpNode>;
3957  def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
3958                           OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
3959                           IntOp, ExtOp, OpNode>;
3960  def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
3961                           OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
3962                           IntOp, ExtOp, OpNode>;
3963}
3964
3965
3966// Neon Pairwise long 2-register intrinsics,
3967//   element sizes of 8, 16 and 32 bits:
3968multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3969                        bits<5> op11_7, bit op4,
3970                        string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3971  // 64-bit vector types.
3972  def v8i8  : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3973                        OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3974  def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3975                        OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3976  def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3977                        OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3978
3979  // 128-bit vector types.
3980  def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3981                        OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3982  def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3983                        OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3984  def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3985                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3986}
3987
3988
3989// Neon Pairwise long 2-register accumulate intrinsics,
3990//   element sizes of 8, 16 and 32 bits:
3991multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3992                         bits<5> op11_7, bit op4,
3993                         string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3994  // 64-bit vector types.
3995  def v8i8  : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3996                         OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3997  def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3998                         OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3999  def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
4000                         OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
4001
4002  // 128-bit vector types.
4003  def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
4004                         OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
4005  def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
4006                         OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
4007  def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
4008                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
4009}
4010
4011
4012// Neon 2-register vector shift by immediate,
4013//   with f of either N2RegVShLFrm or N2RegVShRFrm
4014//   element sizes of 8, 16, 32 and 64 bits:
4015multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4016                       InstrItinClass itin, string OpcodeStr, string Dt,
4017                       SDNode OpNode> {
4018  // 64-bit vector types.
4019  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
4020                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
4021    let Inst{21-19} = 0b001; // imm6 = 001xxx
4022  }
4023  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
4024                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
4025    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4026  }
4027  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
4028                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
4029    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4030  }
4031  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
4032                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
4033                             // imm6 = xxxxxx
4034
4035  // 128-bit vector types.
4036  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
4037                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
4038    let Inst{21-19} = 0b001; // imm6 = 001xxx
4039  }
4040  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
4041                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
4042    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4043  }
4044  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
4045                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
4046    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4047  }
4048  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
4049                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
4050                             // imm6 = xxxxxx
4051}
4052multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4053                       InstrItinClass itin, string OpcodeStr, string Dt,
4054                       string baseOpc, SDNode OpNode> {
4055  // 64-bit vector types.
4056  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
4057                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
4058    let Inst{21-19} = 0b001; // imm6 = 001xxx
4059  }
4060  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
4061                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
4062    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4063  }
4064  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
4065                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
4066    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4067  }
4068  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
4069                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
4070                             // imm6 = xxxxxx
4071
4072  // 128-bit vector types.
4073  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
4074                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
4075    let Inst{21-19} = 0b001; // imm6 = 001xxx
4076  }
4077  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
4078                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
4079    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4080  }
4081  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
4082                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
4083    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4084  }
4085  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
4086                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
4087                             // imm6 = xxxxxx
4088}
4089
4090// Neon Shift-Accumulate vector operations,
4091//   element sizes of 8, 16, 32 and 64 bits:
4092multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4093                         string OpcodeStr, string Dt, SDNode ShOp> {
4094  // 64-bit vector types.
4095  def v8i8  : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
4096                        OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
4097    let Inst{21-19} = 0b001; // imm6 = 001xxx
4098  }
4099  def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
4100                        OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
4101    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4102  }
4103  def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
4104                        OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
4105    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4106  }
4107  def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
4108                        OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
4109                             // imm6 = xxxxxx
4110
4111  // 128-bit vector types.
4112  def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
4113                        OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
4114    let Inst{21-19} = 0b001; // imm6 = 001xxx
4115  }
4116  def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
4117                        OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
4118    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4119  }
4120  def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
4121                        OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
4122    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4123  }
4124  def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
4125                        OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
4126                             // imm6 = xxxxxx
4127}
4128
4129// Neon Shift-Insert vector operations,
4130//   with f of either N2RegVShLFrm or N2RegVShRFrm
4131//   element sizes of 8, 16, 32 and 64 bits:
4132multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4133                          string OpcodeStr> {
4134  // 64-bit vector types.
4135  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4136                        N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> {
4137    let Inst{21-19} = 0b001; // imm6 = 001xxx
4138  }
4139  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4140                        N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> {
4141    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4142  }
4143  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4144                        N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> {
4145    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4146  }
4147  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
4148                        N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>;
4149                             // imm6 = xxxxxx
4150
4151  // 128-bit vector types.
4152  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4153                        N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> {
4154    let Inst{21-19} = 0b001; // imm6 = 001xxx
4155  }
4156  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4157                        N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> {
4158    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4159  }
4160  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4161                        N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> {
4162    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4163  }
4164  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
4165                        N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>;
4166                             // imm6 = xxxxxx
4167}
4168multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4169                          string OpcodeStr> {
4170  // 64-bit vector types.
4171  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4172                        N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> {
4173    let Inst{21-19} = 0b001; // imm6 = 001xxx
4174  }
4175  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4176                        N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> {
4177    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4178  }
4179  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4180                        N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> {
4181    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4182  }
4183  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4184                        N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>;
4185                             // imm6 = xxxxxx
4186
4187  // 128-bit vector types.
4188  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4189                        N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> {
4190    let Inst{21-19} = 0b001; // imm6 = 001xxx
4191  }
4192  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4193                        N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> {
4194    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4195  }
4196  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4197                        N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> {
4198    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4199  }
4200  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4201                        N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>;
4202                             // imm6 = xxxxxx
4203}
4204
4205// Neon Shift Long operations,
4206//   element sizes of 8, 16, 32 bits:
4207multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4208                      bit op4, string OpcodeStr, string Dt,
4209                      SDPatternOperator OpNode> {
4210  def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4211              OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
4212    let Inst{21-19} = 0b001; // imm6 = 001xxx
4213  }
4214  def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4215               OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
4216    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4217  }
4218  def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4219               OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
4220    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4221  }
4222}
4223
4224// Neon Shift Narrow operations,
4225//   element sizes of 16, 32, 64 bits:
4226multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4227                      bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
4228                      SDPatternOperator OpNode> {
4229  def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4230                    OpcodeStr, !strconcat(Dt, "16"),
4231                    v8i8, v8i16, shr_imm8, OpNode> {
4232    let Inst{21-19} = 0b001; // imm6 = 001xxx
4233  }
4234  def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4235                     OpcodeStr, !strconcat(Dt, "32"),
4236                     v4i16, v4i32, shr_imm16, OpNode> {
4237    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4238  }
4239  def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4240                     OpcodeStr, !strconcat(Dt, "64"),
4241                     v2i32, v2i64, shr_imm32, OpNode> {
4242    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4243  }
4244}
4245
4246//===----------------------------------------------------------------------===//
4247// Instruction Definitions.
4248//===----------------------------------------------------------------------===//
4249
4250// Vector Add Operations.
4251
4252//   VADD     : Vector Add (integer and floating-point)
4253defm VADD     : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
4254                         add, 1>;
4255def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
4256                     v2f32, v2f32, fadd, 1>;
4257def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
4258                     v4f32, v4f32, fadd, 1>;
4259def  VADDhd   : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
4260                     v4f16, v4f16, fadd, 1>,
4261                Requires<[HasNEON,HasFullFP16]>;
4262def  VADDhq   : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
4263                     v8f16, v8f16, fadd, 1>,
4264                Requires<[HasNEON,HasFullFP16]>;
4265//   VADDL    : Vector Add Long (Q = D + D)
4266defm VADDLs   : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4267                            "vaddl", "s", add, sext, 1>;
4268defm VADDLu   : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4269                            "vaddl", "u", add, zext, 1>;
4270//   VADDW    : Vector Add Wide (Q = Q + D)
4271defm VADDWs   : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
4272defm VADDWu   : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
4273//   VHADD    : Vector Halving Add
4274defm VHADDs   : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
4275                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4276                           "vhadd", "s", int_arm_neon_vhadds, 1>;
4277defm VHADDu   : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
4278                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4279                           "vhadd", "u", int_arm_neon_vhaddu, 1>;
4280//   VRHADD   : Vector Rounding Halving Add
4281defm VRHADDs  : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
4282                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4283                           "vrhadd", "s", int_arm_neon_vrhadds, 1>;
4284defm VRHADDu  : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
4285                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4286                           "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
4287//   VQADD    : Vector Saturating Add
4288defm VQADDs   : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
4289                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4290                            "vqadd", "s", saddsat, 1>;
4291defm VQADDu   : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
4292                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4293                            "vqadd", "u", uaddsat, 1>;
4294//   VADDHN   : Vector Add and Narrow Returning High Half (D = Q + Q)
4295defm VADDHN   : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
4296//   VRADDHN  : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
4297defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
4298                            int_arm_neon_vraddhn, 1>;
4299
4300let Predicates = [HasNEON] in {
4301def : Pat<(v8i8  (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
4302          (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
4303def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
4304          (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
4305def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
4306          (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
4307}
4308
4309// Vector Multiply Operations.
4310
4311//   VMUL     : Vector Multiply (integer, polynomial and floating-point)
4312defm VMUL     : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
4313                        IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
4314def  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
4315                        "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
4316def  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
4317                        "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
4318def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
4319                     v2f32, v2f32, fmul, 1>;
4320def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
4321                     v4f32, v4f32, fmul, 1>;
4322def  VMULhd   : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
4323                     v4f16, v4f16, fmul, 1>,
4324                Requires<[HasNEON,HasFullFP16]>;
4325def  VMULhq   : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
4326                     v8f16, v8f16, fmul, 1>,
4327                Requires<[HasNEON,HasFullFP16]>;
4328defm VMULsl   : N3VSL_HS<0b1000, "vmul", mul>;
4329def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
4330def  VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
4331                       v2f32, fmul>;
4332def  VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
4333                Requires<[HasNEON,HasFullFP16]>;
4334def  VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
4335                       v4f16, fmul>,
4336                Requires<[HasNEON,HasFullFP16]>;
4337
4338let Predicates = [HasNEON] in {
4339def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
4340                      (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))),
4341          (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
4342                              (v4i16 (EXTRACT_SUBREG QPR:$src2,
4343                                      (DSubReg_i16_reg imm:$lane))),
4344                              (SubReg_i16_lane imm:$lane)))>;
4345def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
4346                      (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))),
4347          (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
4348                              (v2i32 (EXTRACT_SUBREG QPR:$src2,
4349                                      (DSubReg_i32_reg imm:$lane))),
4350                              (SubReg_i32_lane imm:$lane)))>;
4351def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
4352                       (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))),
4353          (v4f32 (VMULslfq (v4f32 QPR:$src1),
4354                           (v2f32 (EXTRACT_SUBREG QPR:$src2,
4355                                   (DSubReg_i32_reg imm:$lane))),
4356                           (SubReg_i32_lane imm:$lane)))>;
4357def : Pat<(v8f16 (fmul (v8f16 QPR:$src1),
4358                       (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))),
4359          (v8f16 (VMULslhq(v8f16 QPR:$src1),
4360                           (v4f16 (EXTRACT_SUBREG QPR:$src2,
4361                                   (DSubReg_i16_reg imm:$lane))),
4362                           (SubReg_i16_lane imm:$lane)))>;
4363
4364def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
4365          (VMULslfd DPR:$Rn,
4366            (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4367            (i32 0))>;
4368def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
4369          (VMULslhd DPR:$Rn,
4370            (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
4371            (i32 0))>;
4372def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
4373          (VMULslfq QPR:$Rn,
4374            (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4375            (i32 0))>;
4376def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
4377          (VMULslhq QPR:$Rn,
4378            (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
4379            (i32 0))>;
4380}
4381
4382//   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
4383defm VQDMULH  : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
4384                          IIC_VMULi16Q, IIC_VMULi32Q,
4385                          "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
4386defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
4387                            IIC_VMULi16Q, IIC_VMULi32Q,
4388                            "vqdmulh", "s",  int_arm_neon_vqdmulh>;
4389
4390let Predicates = [HasNEON] in {
4391def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
4392                                       (v8i16 (ARMvduplane (v8i16 QPR:$src2),
4393                                                            imm:$lane)))),
4394          (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
4395                                 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4396                                         (DSubReg_i16_reg imm:$lane))),
4397                                 (SubReg_i16_lane imm:$lane)))>;
4398def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
4399                                       (v4i32 (ARMvduplane (v4i32 QPR:$src2),
4400                                                            imm:$lane)))),
4401          (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
4402                                 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4403                                         (DSubReg_i32_reg imm:$lane))),
4404                                 (SubReg_i32_lane imm:$lane)))>;
4405}
4406
4407//   VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
4408defm VQRDMULH   : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
4409                            IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
4410                            "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
4411defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
4412                              IIC_VMULi16Q, IIC_VMULi32Q,
4413                              "vqrdmulh", "s",  int_arm_neon_vqrdmulh>;
4414
4415let Predicates = [HasNEON] in {
4416def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
4417                                        (v8i16 (ARMvduplane (v8i16 QPR:$src2),
4418                                                             imm:$lane)))),
4419          (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
4420                                  (v4i16 (EXTRACT_SUBREG QPR:$src2,
4421                                          (DSubReg_i16_reg imm:$lane))),
4422                                  (SubReg_i16_lane imm:$lane)))>;
4423def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
4424                                        (v4i32 (ARMvduplane (v4i32 QPR:$src2),
4425                                                             imm:$lane)))),
4426          (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
4427                                  (v2i32 (EXTRACT_SUBREG QPR:$src2,
4428                                          (DSubReg_i32_reg imm:$lane))),
4429                                  (SubReg_i32_lane imm:$lane)))>;
4430}
4431
4432//   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
4433let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
4434    DecoderNamespace = "NEONData" in {
4435  defm VMULLs   : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4436                           "vmull", "s", NEONvmulls, 1>;
4437  defm VMULLu   : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4438                           "vmull", "u", NEONvmullu, 1>;
4439  def  VMULLp8   :  N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
4440                            v8i16, v8i8, int_arm_neon_vmullp, 1>;
4441  def  VMULLp64  : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
4442                          "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
4443                    Requires<[HasV8, HasCrypto]>;
4444}
4445defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
4446defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
4447
4448//   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
4449defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
4450                           "vqdmull", "s", int_arm_neon_vqdmull, 1>;
4451defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
4452                             "vqdmull", "s", int_arm_neon_vqdmull>;
4453
4454// Vector Multiply-Accumulate and Multiply-Subtract Operations.
4455
4456//   VMLA     : Vector Multiply Accumulate (integer and floating-point)
4457defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4458                             IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4459def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
4460                          v2f32, fmul_su, fadd_mlx>,
4461                Requires<[HasNEON, UseFPVMLx]>;
4462def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
4463                          v4f32, fmul_su, fadd_mlx>,
4464                Requires<[HasNEON, UseFPVMLx]>;
4465def  VMLAhd   : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
4466                          v4f16, fmul_su, fadd_mlx>,
4467                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4468def  VMLAhq   : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
4469                          v8f16, fmul_su, fadd_mlx>,
4470                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4471defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
4472                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4473def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
4474                            v2f32, fmul_su, fadd_mlx>,
4475                Requires<[HasNEON, UseFPVMLx]>;
4476def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
4477                            v4f32, v2f32, fmul_su, fadd_mlx>,
4478                Requires<[HasNEON, UseFPVMLx]>;
4479def  VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
4480                            v4f16, fmul, fadd>,
4481                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4482def  VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
4483                            v8f16, v4f16, fmul, fadd>,
4484                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4485
4486let Predicates = [HasNEON] in {
4487def : Pat<(v8i16 (add (v8i16 QPR:$src1),
4488                  (mul (v8i16 QPR:$src2),
4489                       (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
4490          (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4491                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
4492                                      (DSubReg_i16_reg imm:$lane))),
4493                              (SubReg_i16_lane imm:$lane)))>;
4494
4495def : Pat<(v4i32 (add (v4i32 QPR:$src1),
4496                  (mul (v4i32 QPR:$src2),
4497                       (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
4498          (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4499                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
4500                                      (DSubReg_i32_reg imm:$lane))),
4501                              (SubReg_i32_lane imm:$lane)))>;
4502}
4503
4504def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
4505                  (fmul_su (v4f32 QPR:$src2),
4506                        (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
4507          (v4f32 (VMLAslfq (v4f32 QPR:$src1),
4508                           (v4f32 QPR:$src2),
4509                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
4510                                   (DSubReg_i32_reg imm:$lane))),
4511                           (SubReg_i32_lane imm:$lane)))>,
4512          Requires<[HasNEON, UseFPVMLx]>;
4513
4514//   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
4515defm VMLALs   : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4516                              "vmlal", "s", NEONvmulls, add>;
4517defm VMLALu   : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4518                              "vmlal", "u", NEONvmullu, add>;
4519
4520defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
4521defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
4522
4523let Predicates = [HasNEON, HasV8_1a] in {
4524  // v8.1a Neon Rounding Double Multiply-Op vector operations,
4525  // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
4526  //            (Q += D * D)
4527  defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
4528                             IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4529                             null_frag>;
4530  def : Pat<(v4i16 (saddsat
4531                     (v4i16 DPR:$src1),
4532                     (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4533                                                   (v4i16 DPR:$Vm))))),
4534            (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4535  def : Pat<(v2i32 (saddsat
4536                     (v2i32 DPR:$src1),
4537                     (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4538                                                   (v2i32 DPR:$Vm))))),
4539            (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4540  def : Pat<(v8i16 (saddsat
4541                     (v8i16 QPR:$src1),
4542                     (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4543                                                   (v8i16 QPR:$Vm))))),
4544            (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4545  def : Pat<(v4i32 (saddsat
4546                     (v4i32 QPR:$src1),
4547                     (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4548                                                   (v4i32 QPR:$Vm))))),
4549            (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4550
4551  defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
4552                                  IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4553                                  null_frag>;
4554  def : Pat<(v4i16 (saddsat
4555                     (v4i16 DPR:$src1),
4556                     (v4i16 (int_arm_neon_vqrdmulh
4557                              (v4i16 DPR:$Vn),
4558                              (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4559                                                   imm:$lane)))))),
4560            (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
4561                                    imm:$lane))>;
4562  def : Pat<(v2i32 (saddsat
4563                     (v2i32 DPR:$src1),
4564                     (v2i32 (int_arm_neon_vqrdmulh
4565                              (v2i32 DPR:$Vn),
4566                              (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4567                                                   imm:$lane)))))),
4568            (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4569                                    imm:$lane))>;
4570  def : Pat<(v8i16 (saddsat
4571                     (v8i16 QPR:$src1),
4572                     (v8i16 (int_arm_neon_vqrdmulh
4573                              (v8i16 QPR:$src2),
4574                              (v8i16 (ARMvduplane (v8i16 QPR:$src3),
4575                                                   imm:$lane)))))),
4576            (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
4577                                    (v8i16 QPR:$src2),
4578                                    (v4i16 (EXTRACT_SUBREG
4579                                             QPR:$src3,
4580                                             (DSubReg_i16_reg imm:$lane))),
4581                                    (SubReg_i16_lane imm:$lane)))>;
4582  def : Pat<(v4i32 (saddsat
4583                     (v4i32 QPR:$src1),
4584                     (v4i32 (int_arm_neon_vqrdmulh
4585                              (v4i32 QPR:$src2),
4586                              (v4i32 (ARMvduplane (v4i32 QPR:$src3),
4587                                                   imm:$lane)))))),
4588            (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
4589                                    (v4i32 QPR:$src2),
4590                                    (v2i32 (EXTRACT_SUBREG
4591                                             QPR:$src3,
4592                                             (DSubReg_i32_reg imm:$lane))),
4593                                    (SubReg_i32_lane imm:$lane)))>;
4594
4595  //   VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
4596  //              (Q -= D * D)
4597  defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
4598                             IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4599                             null_frag>;
4600  def : Pat<(v4i16 (ssubsat
4601                     (v4i16 DPR:$src1),
4602                     (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4603                                                   (v4i16 DPR:$Vm))))),
4604            (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4605  def : Pat<(v2i32 (ssubsat
4606                     (v2i32 DPR:$src1),
4607                     (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4608                                                   (v2i32 DPR:$Vm))))),
4609            (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4610  def : Pat<(v8i16 (ssubsat
4611                     (v8i16 QPR:$src1),
4612                     (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4613                                                   (v8i16 QPR:$Vm))))),
4614            (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4615  def : Pat<(v4i32 (ssubsat
4616                     (v4i32 QPR:$src1),
4617                     (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4618                                                   (v4i32 QPR:$Vm))))),
4619            (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4620
4621  defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
4622                                  IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4623                                  null_frag>;
4624  def : Pat<(v4i16 (ssubsat
4625                     (v4i16 DPR:$src1),
4626                     (v4i16 (int_arm_neon_vqrdmulh
4627                              (v4i16 DPR:$Vn),
4628                              (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4629                                                   imm:$lane)))))),
4630            (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
4631  def : Pat<(v2i32 (ssubsat
4632                     (v2i32 DPR:$src1),
4633                     (v2i32 (int_arm_neon_vqrdmulh
4634                              (v2i32 DPR:$Vn),
4635                              (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4636                                                   imm:$lane)))))),
4637            (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4638                                    imm:$lane))>;
4639  def : Pat<(v8i16 (ssubsat
4640                     (v8i16 QPR:$src1),
4641                     (v8i16 (int_arm_neon_vqrdmulh
4642                              (v8i16 QPR:$src2),
4643                              (v8i16 (ARMvduplane (v8i16 QPR:$src3),
4644                                                   imm:$lane)))))),
4645            (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
4646                                    (v8i16 QPR:$src2),
4647                                    (v4i16 (EXTRACT_SUBREG
4648                                             QPR:$src3,
4649                                             (DSubReg_i16_reg imm:$lane))),
4650                                    (SubReg_i16_lane imm:$lane)))>;
4651  def : Pat<(v4i32 (ssubsat
4652                     (v4i32 QPR:$src1),
4653                     (v4i32 (int_arm_neon_vqrdmulh
4654                              (v4i32 QPR:$src2),
4655                              (v4i32 (ARMvduplane (v4i32 QPR:$src3),
4656                                                    imm:$lane)))))),
4657            (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
4658                                    (v4i32 QPR:$src2),
4659                                    (v2i32 (EXTRACT_SUBREG
4660                                             QPR:$src3,
4661                                             (DSubReg_i32_reg imm:$lane))),
4662                                    (SubReg_i32_lane imm:$lane)))>;
4663}
4664//   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
4665defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4666                            "vqdmlal", "s", null_frag>;
4667defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
4668
4669let Predicates = [HasNEON] in {
4670def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1),
4671                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4672                                                  (v4i16 DPR:$Vm))))),
4673          (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4674def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1),
4675                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4676                                                  (v2i32 DPR:$Vm))))),
4677          (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4678def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1),
4679                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4680                                (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4681                                                     imm:$lane)))))),
4682          (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4683def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1),
4684                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4685                                (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4686                                                     imm:$lane)))))),
4687          (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4688}
4689
4690//   VMLS     : Vector Multiply Subtract (integer and floating-point)
4691defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4692                             IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4693def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
4694                          v2f32, fmul_su, fsub_mlx>,
4695                Requires<[HasNEON, UseFPVMLx]>;
4696def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
4697                          v4f32, fmul_su, fsub_mlx>,
4698                Requires<[HasNEON, UseFPVMLx]>;
4699def  VMLShd   : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
4700                          v4f16, fmul, fsub>,
4701                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4702def  VMLShq   : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
4703                          v8f16, fmul, fsub>,
4704                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4705defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
4706                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4707def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
4708                            v2f32, fmul_su, fsub_mlx>,
4709                Requires<[HasNEON, UseFPVMLx]>;
4710def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
4711                            v4f32, v2f32, fmul_su, fsub_mlx>,
4712                Requires<[HasNEON, UseFPVMLx]>;
4713def  VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
4714                            v4f16, fmul, fsub>,
4715                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4716def  VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
4717                            v8f16, v4f16, fmul, fsub>,
4718                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4719
4720let Predicates = [HasNEON] in {
4721def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
4722                  (mul (v8i16 QPR:$src2),
4723                       (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
4724          (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4725                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
4726                                      (DSubReg_i16_reg imm:$lane))),
4727                              (SubReg_i16_lane imm:$lane)))>;
4728
4729def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
4730                  (mul (v4i32 QPR:$src2),
4731                     (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
4732          (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4733                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
4734                                      (DSubReg_i32_reg imm:$lane))),
4735                              (SubReg_i32_lane imm:$lane)))>;
4736}
4737
4738def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
4739                  (fmul_su (v4f32 QPR:$src2),
4740                        (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
4741          (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
4742                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
4743                                   (DSubReg_i32_reg imm:$lane))),
4744                           (SubReg_i32_lane imm:$lane)))>,
4745          Requires<[HasNEON, UseFPVMLx]>;
4746
4747//   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
4748defm VMLSLs   : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4749                              "vmlsl", "s", NEONvmulls, sub>;
4750defm VMLSLu   : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4751                              "vmlsl", "u", NEONvmullu, sub>;
4752
4753defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
4754defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
4755
4756//   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
4757defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
4758                            "vqdmlsl", "s", null_frag>;
4759defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
4760
4761let Predicates = [HasNEON] in {
4762def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1),
4763                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4764                                                  (v4i16 DPR:$Vm))))),
4765          (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4766def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1),
4767                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4768                                                  (v2i32 DPR:$Vm))))),
4769          (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4770def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1),
4771                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4772                                (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4773                                                     imm:$lane)))))),
4774          (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4775def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1),
4776                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4777                                (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4778                                                     imm:$lane)))))),
4779          (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4780}
4781
4782// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
4783def  VFMAfd   : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
4784                          v2f32, fmul_su, fadd_mlx>,
4785                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4786
4787def  VFMAfq   : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
4788                          v4f32, fmul_su, fadd_mlx>,
4789                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4790def  VFMAhd   : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
4791                          v4f16, fmul, fadd>,
4792                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4793
4794def  VFMAhq   : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
4795                          v8f16, fmul, fadd>,
4796                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4797
4798//   Fused Vector Multiply Subtract (floating-point)
4799def  VFMSfd   : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
4800                          v2f32, fmul_su, fsub_mlx>,
4801                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4802def  VFMSfq   : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
4803                          v4f32, fmul_su, fsub_mlx>,
4804                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4805def  VFMShd   : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
4806                          v4f16, fmul, fsub>,
4807                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4808def  VFMShq   : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
4809                          v8f16, fmul, fsub>,
4810                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4811
4812// Match @llvm.fma.* intrinsics
4813def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
4814          (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4815          Requires<[HasNEON,HasFullFP16]>;
4816def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
4817          (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4818          Requires<[HasNEON,HasFullFP16]>;
4819def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
4820          (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4821          Requires<[HasNEON,HasVFP4]>;
4822def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
4823          (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4824          Requires<[HasNEON,HasVFP4]>;
4825def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
4826          (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4827      Requires<[HasNEON,HasVFP4]>;
4828def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
4829          (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4830      Requires<[HasNEON,HasVFP4]>;
4831
4832// ARMv8.2a dot product instructions.
4833// We put them in the VFPV8 decoder namespace because the ARM and Thumb
4834// encodings are the same and thus no further bit twiddling is necessary
4835// in the disassembler.
4836class VDOT<bit op6, bit op4, RegisterClass RegTy, string Asm, string AsmTy,
4837           ValueType AccumTy, ValueType InputTy,
4838           SDPatternOperator OpNode> :
4839      N3Vnp<0b11000, 0b10, 0b1101, op6, op4, (outs RegTy:$dst),
4840            (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD,
4841            Asm, AsmTy,
4842            [(set (AccumTy RegTy:$dst),
4843                  (OpNode (AccumTy RegTy:$Vd),
4844                          (InputTy RegTy:$Vn),
4845                          (InputTy RegTy:$Vm)))]> {
4846  let Predicates = [HasDotProd];
4847  let DecoderNamespace = "VFPV8";
4848  let Constraints = "$dst = $Vd";
4849}
4850
4851def VUDOTD : VDOT<0, 1, DPR, "vudot", "u8", v2i32, v8i8,  int_arm_neon_udot>;
4852def VSDOTD : VDOT<0, 0, DPR, "vsdot", "s8", v2i32, v8i8,  int_arm_neon_sdot>;
4853def VUDOTQ : VDOT<1, 1, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>;
4854def VSDOTQ : VDOT<1, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>;
4855
4856// Indexed dot product instructions:
4857multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty,
4858           ValueType AccumType, ValueType InputType, SDPatternOperator OpNode,
4859           dag RHS> {
4860  def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst),
4861                 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
4862                 N3RegFrm, IIC_VDOTPROD, opc, dt, []> {
4863    bit lane;
4864    let Inst{5} = lane;
4865    let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane");
4866    let Constraints = "$dst = $Vd";
4867    let Predicates = [HasDotProd];
4868    let DecoderNamespace = "VFPV8";
4869  }
4870
4871  def : Pat<
4872    (AccumType (OpNode (AccumType Ty:$Vd),
4873                       (InputType Ty:$Vn),
4874                       (InputType (bitconvert (AccumType
4875                                  (ARMvduplane (AccumType Ty:$Vm),
4876                                                 VectorIndex32:$lane)))))),
4877    (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>;
4878}
4879
4880defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8,
4881                    int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>;
4882defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8,
4883                    int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>;
4884defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8,
4885                    int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4886defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8,
4887                    int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4888
4889
4890// ARMv8.3 complex operations
4891class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q,
4892                            InstrItinClass itin, dag oops, dag iops,
4893                            string opc, string dt, list<dag> pattern>
4894  : N3VCP8<{?,?}, {op21,s}, q, op4, oops,
4895           iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{
4896  bits<2> rot;
4897  let Inst{24-23} = rot;
4898}
4899
4900class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q,
4901                           InstrItinClass itin, dag oops, dag iops, string opc,
4902                            string dt, list<dag> pattern>
4903  : N3VCP8<{?,op23}, {op21,s}, q, op4, oops,
4904           iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> {
4905  bits<1> rot;
4906  let Inst{24} = rot;
4907}
4908
4909class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin,
4910                                  dag oops, dag iops, string opc, string dt,
4911                                  list<dag> pattern>
4912  : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
4913               "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4914  bits<2> rot;
4915  bit lane;
4916
4917  let Inst{21-20} = rot;
4918  let Inst{5} = lane;
4919}
4920
4921class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin,
4922                            dag oops, dag iops, string opc, string dt,
4923                            list<dag> pattern>
4924  : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
4925               "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4926  bits<2> rot;
4927  bit lane;
4928
4929  let Inst{21-20} = rot;
4930  let Inst{5} = Vm{4};
4931  // This is needed because the lane operand does not have any bits in the
4932  // encoding (it only has one possible value), so we need to manually set it
4933  // to it's default value.
4934  let DecoderMethod = "DecodeNEONComplexLane64Instruction";
4935}
4936
4937multiclass N3VCP8ComplexTied<bit op21, bit op4,
4938                       string OpcodeStr, SDPatternOperator Op> {
4939  let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4940  def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd),
4941              (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
4942              OpcodeStr, "f16", []>;
4943  def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd),
4944              (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
4945              OpcodeStr, "f16", []>;
4946  }
4947  let Predicates = [HasNEON,HasV8_3a] in {
4948  def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd),
4949              (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
4950              OpcodeStr, "f32", []>;
4951  def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd),
4952              (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
4953              OpcodeStr, "f32", []>;
4954  }
4955}
4956
4957multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4,
4958                       string OpcodeStr, SDPatternOperator Op> {
4959  let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4960  def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD,
4961              (outs DPR:$Vd),
4962              (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
4963              OpcodeStr, "f16", []>;
4964  def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ,
4965              (outs QPR:$Vd),
4966              (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
4967              OpcodeStr, "f16", []>;
4968  }
4969  let Predicates = [HasNEON,HasV8_3a] in {
4970  def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD,
4971              (outs DPR:$Vd),
4972              (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
4973              OpcodeStr, "f32", []>;
4974  def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ,
4975              (outs QPR:$Vd),
4976              (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
4977              OpcodeStr, "f32", []>;
4978  }
4979}
4980
4981// These instructions index by pairs of lanes, so the VectorIndexes are twice
4982// as wide as the data types.
4983multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr,
4984                                 SDPatternOperator Op> {
4985  let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4986  def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD,
4987                      (outs DPR:$Vd),
4988                      (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4989                      VectorIndex32:$lane, complexrotateop:$rot),
4990                      OpcodeStr, "f16", []>;
4991  def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ,
4992                      (outs QPR:$Vd),
4993                      (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm,
4994                      VectorIndex32:$lane, complexrotateop:$rot),
4995                      OpcodeStr, "f16", []>;
4996  }
4997  let Predicates = [HasNEON,HasV8_3a] in {
4998  def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD,
4999                      (outs DPR:$Vd),
5000                      (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
5001                      complexrotateop:$rot),
5002                      OpcodeStr, "f32", []>;
5003  def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ,
5004                      (outs QPR:$Vd),
5005                      (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
5006                      complexrotateop:$rot),
5007                      OpcodeStr, "f32", []>;
5008  }
5009}
5010
5011defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>;
5012defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>;
5013defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>;
5014
5015let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
5016  def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
5017            (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>;
5018  def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
5019            (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>;
5020  def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))),
5021            (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>;
5022  def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))),
5023            (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>;
5024}
5025let Predicates = [HasNEON,HasV8_3a] in {
5026  def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))),
5027            (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>;
5028  def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))),
5029            (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>;
5030  def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))),
5031            (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>;
5032  def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))),
5033            (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>;
5034}
5035
5036// Vector Subtract Operations.
5037
5038//   VSUB     : Vector Subtract (integer and floating-point)
5039defm VSUB     : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
5040                         "vsub", "i", sub, 0>;
5041def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
5042                     v2f32, v2f32, fsub, 0>;
5043def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
5044                     v4f32, v4f32, fsub, 0>;
5045def  VSUBhd   : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
5046                     v4f16, v4f16, fsub, 0>,
5047                Requires<[HasNEON,HasFullFP16]>;
5048def  VSUBhq   : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
5049                     v8f16, v8f16, fsub, 0>,
5050                Requires<[HasNEON,HasFullFP16]>;
5051//   VSUBL    : Vector Subtract Long (Q = D - D)
5052defm VSUBLs   : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
5053                            "vsubl", "s", sub, sext, 0>;
5054defm VSUBLu   : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
5055                            "vsubl", "u", sub, zext, 0>;
5056//   VSUBW    : Vector Subtract Wide (Q = Q - D)
5057defm VSUBWs   : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
5058defm VSUBWu   : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
5059//   VHSUB    : Vector Halving Subtract
5060defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
5061                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5062                           "vhsub", "s", int_arm_neon_vhsubs, 0>;
5063defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
5064                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5065                           "vhsub", "u", int_arm_neon_vhsubu, 0>;
5066//   VQSUB    : Vector Saturing Subtract
5067defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
5068                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5069                            "vqsub", "s", ssubsat, 0>;
5070defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
5071                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5072                            "vqsub", "u", usubsat, 0>;
5073//   VSUBHN   : Vector Subtract and Narrow Returning High Half (D = Q - Q)
5074defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
5075//   VRSUBHN  : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
5076defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
5077                            int_arm_neon_vrsubhn, 0>;
5078
5079let Predicates = [HasNEON] in {
5080def : Pat<(v8i8  (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
5081          (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
5082def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
5083          (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
5084def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
5085          (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
5086}
5087
5088// Vector Comparisons.
5089
5090//   VCEQ     : Vector Compare Equal
5091defm VCEQ     : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5092                            IIC_VSUBi4Q, "vceq", "i", ARMCCeq, 1>;
5093def  VCEQfd   : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
5094                         ARMCCeq, 1>;
5095def  VCEQfq   : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
5096                         ARMCCeq, 1>;
5097def  VCEQhd   : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
5098                         ARMCCeq, 1>,
5099                Requires<[HasNEON, HasFullFP16]>;
5100def  VCEQhq   : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
5101                         ARMCCeq, 1>,
5102                Requires<[HasNEON, HasFullFP16]>;
5103
5104let TwoOperandAliasConstraint = "$Vm = $Vd" in
5105defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
5106                            "$Vd, $Vm, #0", ARMCCeq>;
5107
5108//   VCGE     : Vector Compare Greater Than or Equal
5109defm VCGEs    : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5110                            IIC_VSUBi4Q, "vcge", "s", ARMCCge, 0>;
5111defm VCGEu    : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5112                            IIC_VSUBi4Q, "vcge", "u", ARMCChs, 0>;
5113def  VCGEfd   : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
5114                         ARMCCge, 0>;
5115def  VCGEfq   : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
5116                         ARMCCge, 0>;
5117def  VCGEhd   : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
5118                         ARMCCge, 0>,
5119                Requires<[HasNEON, HasFullFP16]>;
5120def  VCGEhq   : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
5121                         ARMCCge, 0>,
5122                Requires<[HasNEON, HasFullFP16]>;
5123
5124let TwoOperandAliasConstraint = "$Vm = $Vd" in {
5125defm VCGEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
5126                            "$Vd, $Vm, #0", ARMCCge>;
5127defm VCLEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
5128                            "$Vd, $Vm, #0", ARMCCle>;
5129}
5130
5131//   VCGT     : Vector Compare Greater Than
5132defm VCGTs    : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5133                            IIC_VSUBi4Q, "vcgt", "s", ARMCCgt, 0>;
5134defm VCGTu    : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5135                            IIC_VSUBi4Q, "vcgt", "u", ARMCChi, 0>;
5136def  VCGTfd   : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
5137                         ARMCCgt, 0>;
5138def  VCGTfq   : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
5139                         ARMCCgt, 0>;
5140def  VCGThd   : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
5141                         ARMCCgt, 0>,
5142                Requires<[HasNEON, HasFullFP16]>;
5143def  VCGThq   : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
5144                         ARMCCgt, 0>,
5145                Requires<[HasNEON, HasFullFP16]>;
5146
5147let TwoOperandAliasConstraint = "$Vm = $Vd" in {
5148defm VCGTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
5149                            "$Vd, $Vm, #0", ARMCCgt>;
5150defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
5151                            "$Vd, $Vm, #0", ARMCClt>;
5152}
5153
5154//   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
5155def  VACGEfd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
5156                        "f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
5157def  VACGEfq   : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
5158                        "f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
5159def  VACGEhd   : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
5160                        "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
5161                 Requires<[HasNEON, HasFullFP16]>;
5162def  VACGEhq   : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
5163                        "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
5164                 Requires<[HasNEON, HasFullFP16]>;
5165//   VACGT    : Vector Absolute Compare Greater Than (aka VCAGT)
5166def  VACGTfd   : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
5167                        "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
5168def  VACGTfq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
5169                        "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
5170def  VACGThd   : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
5171                        "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
5172                 Requires<[HasNEON, HasFullFP16]>;
5173def  VACGThq   : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
5174                        "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>,
5175                 Requires<[HasNEON, HasFullFP16]>;
5176//   VTST     : Vector Test Bits
5177defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
5178                        IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
5179
5180def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
5181                   (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5182def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
5183                   (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5184def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
5185                   (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5186def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
5187                   (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5188let Predicates = [HasNEON, HasFullFP16] in {
5189def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
5190                   (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5191def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
5192                   (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5193def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
5194                   (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5195def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
5196                   (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5197}
5198
5199// +fp16fml Floating Point Multiplication Variants
5200let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in {
5201
5202class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn,
5203                RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
5204  : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
5205           asm, "f16", "$Vd, $Vn, $Vm", "", []>;
5206
5207class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn,
5208                RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
5209  : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
5210           asm, "f16", "$Vd, $Vn, $Vm", "", []>;
5211
5212// Vd, Vs, Vs[0-15], Idx[0-1]
5213class VFMD<string opc, string type, bits<2> S>
5214  : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd),
5215               (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx),
5216               IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
5217  bit idx;
5218  let Inst{3} = idx;
5219  let Inst{19-16} = Vn{4-1};
5220  let Inst{7}     = Vn{0};
5221  let Inst{5}     = Vm{0};
5222  let Inst{2-0}   = Vm{3-1};
5223}
5224
5225// Vq, Vd, Vd[0-7], Idx[0-3]
5226class VFMQ<string opc, string type, bits<2> S>
5227  : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd),
5228               (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
5229               IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
5230  bits<2> idx;
5231  let Inst{5} = idx{1};
5232  let Inst{3} = idx{0};
5233}
5234
5235let hasNoSchedulingInfo = 1 in {
5236//                                                op1   op2   op3
5237def VFMALD  : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>;
5238def VFMSLD  : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>;
5239def VFMALQ  : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>;
5240def VFMSLQ  : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>;
5241def VFMALDI : VFMD<"vfmal", "f16", 0b00>;
5242def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>;
5243def VFMALQI : VFMQ<"vfmal", "f16", 0b00>;
5244def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>;
5245}
5246} // HasNEON, HasFP16FML
5247
5248
5249def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
5250                   (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5251def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
5252                   (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5253def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
5254                   (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5255def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
5256                   (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5257let Predicates = [HasNEON, HasFullFP16] in {
5258def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
5259                   (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5260def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
5261                   (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5262def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
5263                   (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5264def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
5265                   (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5266}
5267
5268// Vector Bitwise Operations.
5269
5270def vnotd : PatFrag<(ops node:$in),
5271                    (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
5272def vnotq : PatFrag<(ops node:$in),
5273                    (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
5274
5275
5276//   VAND     : Vector Bitwise AND
5277def  VANDd    : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
5278                      v2i32, v2i32, and, 1>;
5279def  VANDq    : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
5280                      v4i32, v4i32, and, 1>;
5281
5282//   VEOR     : Vector Bitwise Exclusive OR
5283def  VEORd    : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
5284                      v2i32, v2i32, xor, 1>;
5285def  VEORq    : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
5286                      v4i32, v4i32, xor, 1>;
5287
5288//   VORR     : Vector Bitwise OR
5289def  VORRd    : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
5290                      v2i32, v2i32, or, 1>;
5291def  VORRq    : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
5292                      v4i32, v4i32, or, 1>;
5293
5294def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
5295                          (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
5296                          IIC_VMOVImm,
5297                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
5298                          [(set DPR:$Vd,
5299                            (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
5300  let Inst{9} = SIMM{9};
5301}
5302
5303def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
5304                          (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
5305                          IIC_VMOVImm,
5306                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
5307                          [(set DPR:$Vd,
5308                            (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
5309  let Inst{10-9} = SIMM{10-9};
5310}
5311
5312def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
5313                          (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
5314                          IIC_VMOVImm,
5315                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
5316                          [(set QPR:$Vd,
5317                            (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
5318  let Inst{9} = SIMM{9};
5319}
5320
5321def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
5322                          (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
5323                          IIC_VMOVImm,
5324                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
5325                          [(set QPR:$Vd,
5326                            (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
5327  let Inst{10-9} = SIMM{10-9};
5328}
5329
5330
5331//   VBIC     : Vector Bitwise Bit Clear (AND NOT)
5332let TwoOperandAliasConstraint = "$Vn = $Vd" in {
5333def  VBICd    : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5334                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
5335                     "vbic", "$Vd, $Vn, $Vm", "",
5336                     [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
5337                                                 (vnotd DPR:$Vm))))]>;
5338def  VBICq    : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5339                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
5340                     "vbic", "$Vd, $Vn, $Vm", "",
5341                     [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
5342                                                 (vnotq QPR:$Vm))))]>;
5343}
5344
5345def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
5346                          (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
5347                          IIC_VMOVImm,
5348                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
5349                          [(set DPR:$Vd,
5350                            (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
5351  let Inst{9} = SIMM{9};
5352}
5353
5354def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
5355                          (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
5356                          IIC_VMOVImm,
5357                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
5358                          [(set DPR:$Vd,
5359                            (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
5360  let Inst{10-9} = SIMM{10-9};
5361}
5362
5363def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
5364                          (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
5365                          IIC_VMOVImm,
5366                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
5367                          [(set QPR:$Vd,
5368                            (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
5369  let Inst{9} = SIMM{9};
5370}
5371
5372def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
5373                          (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
5374                          IIC_VMOVImm,
5375                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
5376                          [(set QPR:$Vd,
5377                            (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
5378  let Inst{10-9} = SIMM{10-9};
5379}
5380
5381//   VORN     : Vector Bitwise OR NOT
5382def  VORNd    : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
5383                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
5384                     "vorn", "$Vd, $Vn, $Vm", "",
5385                     [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
5386                                                (vnotd DPR:$Vm))))]>;
5387def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
5388                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
5389                     "vorn", "$Vd, $Vn, $Vm", "",
5390                     [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
5391                                                (vnotq QPR:$Vm))))]>;
5392
5393//   VMVN     : Vector Bitwise NOT (Immediate)
5394
5395let isReMaterializable = 1 in {
5396
5397def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
5398                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5399                         "vmvn", "i16", "$Vd, $SIMM", "",
5400                         [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> {
5401  let Inst{9} = SIMM{9};
5402}
5403
5404def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
5405                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5406                         "vmvn", "i16", "$Vd, $SIMM", "",
5407                         [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> {
5408  let Inst{9} = SIMM{9};
5409}
5410
5411def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
5412                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5413                         "vmvn", "i32", "$Vd, $SIMM", "",
5414                         [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> {
5415  let Inst{11-8} = SIMM{11-8};
5416}
5417
5418def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
5419                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5420                         "vmvn", "i32", "$Vd, $SIMM", "",
5421                         [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> {
5422  let Inst{11-8} = SIMM{11-8};
5423}
5424}
5425
5426//   VMVN     : Vector Bitwise NOT
5427def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
5428                     (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
5429                     "vmvn", "$Vd, $Vm", "",
5430                     [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
5431def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
5432                     (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
5433                     "vmvn", "$Vd, $Vm", "",
5434                     [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
5435let Predicates = [HasNEON] in {
5436def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
5437def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
5438}
5439
5440//   VBSL     : Vector Bitwise Select
5441def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5442                     (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5443                     N3RegFrm, IIC_VCNTiD,
5444                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5445                     [(set DPR:$Vd,
5446                           (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
5447let Predicates = [HasNEON] in {
5448def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
5449                                   (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
5450          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5451def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
5452                                    (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
5453          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5454def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
5455                                    (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
5456          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5457def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
5458                                    (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
5459          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5460def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
5461                                    (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
5462          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5463
5464def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
5465                     (and DPR:$Vm, (vnotd DPR:$Vd)))),
5466          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
5467
5468def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
5469                     (and DPR:$Vm, (vnotd DPR:$Vd)))),
5470          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
5471}
5472
5473def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5474                     (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5475                     N3RegFrm, IIC_VCNTiQ,
5476                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5477                     [(set QPR:$Vd,
5478                           (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
5479
5480let Predicates = [HasNEON] in {
5481def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
5482                                   (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
5483          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5484def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
5485                                    (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
5486          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5487def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
5488                                    (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
5489          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5490def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
5491                                    (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
5492          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5493def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
5494                                    (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
5495          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5496
5497def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
5498                     (and QPR:$Vm, (vnotq QPR:$Vd)))),
5499          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
5500def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
5501                     (and QPR:$Vm, (vnotq QPR:$Vd)))),
5502          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
5503}
5504
5505//   VBIF     : Vector Bitwise Insert if False
5506//              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
5507// FIXME: This instruction's encoding MAY NOT BE correct.
5508def  VBIFd    : N3VX<1, 0, 0b11, 0b0001, 0, 1,
5509                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5510                     N3RegFrm, IIC_VBINiD,
5511                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5512                     []>;
5513def  VBIFq    : N3VX<1, 0, 0b11, 0b0001, 1, 1,
5514                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5515                     N3RegFrm, IIC_VBINiQ,
5516                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5517                     []>;
5518
5519//   VBIT     : Vector Bitwise Insert if True
5520//              like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
5521// FIXME: This instruction's encoding MAY NOT BE correct.
5522def  VBITd    : N3VX<1, 0, 0b10, 0b0001, 0, 1,
5523                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5524                     N3RegFrm, IIC_VBINiD,
5525                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5526                     []>;
5527def  VBITq    : N3VX<1, 0, 0b10, 0b0001, 1, 1,
5528                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5529                     N3RegFrm, IIC_VBINiQ,
5530                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5531                     []>;
5532
5533// VBIT/VBIF are not yet implemented.  The TwoAddress pass will not go looking
5534// for equivalent operations with different register constraints; it just
5535// inserts copies.
5536
5537// Vector Absolute Differences.
5538
5539//   VABD     : Vector Absolute Difference
5540defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
5541                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5542                           "vabd", "s", int_arm_neon_vabds, 1>;
5543defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
5544                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5545                           "vabd", "u", int_arm_neon_vabdu, 1>;
5546def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
5547                        "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
5548def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5549                        "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
5550def  VABDhd   : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
5551                        "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
5552                Requires<[HasNEON, HasFullFP16]>;
5553def  VABDhq   : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5554                        "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
5555                Requires<[HasNEON, HasFullFP16]>;
5556
5557//   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
5558defm VABDLs   : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
5559                               "vabdl", "s", int_arm_neon_vabds, zext, 1>;
5560defm VABDLu   : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
5561                               "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
5562
5563let Predicates = [HasNEON] in {
5564def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
5565          (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
5566def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
5567          (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
5568}
5569
5570// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
5571// shift/xor pattern for ABS.
5572
5573def abd_shr :
5574    PatFrag<(ops node:$in1, node:$in2, node:$shift),
5575            (ARMvshrsImm (sub (zext node:$in1),
5576                            (zext node:$in2)), (i32 $shift))>;
5577
5578let Predicates = [HasNEON] in {
5579def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
5580               (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
5581                                                   (zext (v2i32 DPR:$opB))),
5582                                         (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
5583          (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
5584}
5585
5586//   VABA     : Vector Absolute Difference and Accumulate
5587defm VABAs    : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5588                             "vaba", "s", int_arm_neon_vabds, add>;
5589defm VABAu    : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5590                             "vaba", "u", int_arm_neon_vabdu, add>;
5591
5592//   VABAL    : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
5593defm VABALs   : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
5594                                 "vabal", "s", int_arm_neon_vabds, zext, add>;
5595defm VABALu   : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
5596                                 "vabal", "u", int_arm_neon_vabdu, zext, add>;
5597
5598// Vector Maximum and Minimum.
5599
5600//   VMAX     : Vector Maximum
5601defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
5602                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5603                           "vmax", "s", smax, 1>;
5604defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
5605                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5606                           "vmax", "u", umax, 1>;
5607def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
5608                        "vmax", "f32",
5609                        v2f32, v2f32, fmaximum, 1>;
5610def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5611                        "vmax", "f32",
5612                        v4f32, v4f32, fmaximum, 1>;
5613def  VMAXhd   : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
5614                        "vmax", "f16",
5615                        v4f16, v4f16, fmaximum, 1>,
5616                Requires<[HasNEON, HasFullFP16]>;
5617def  VMAXhq   : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5618                        "vmax", "f16",
5619                        v8f16, v8f16, fmaximum, 1>,
5620                Requires<[HasNEON, HasFullFP16]>;
5621
5622// VMAXNM
5623let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5624  def NEON_VMAXNMNDf  : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
5625                                  N3RegFrm, NoItinerary, "vmaxnm", "f32",
5626                                  v2f32, v2f32, fmaxnum, 1>,
5627                                  Requires<[HasV8, HasNEON]>;
5628  def NEON_VMAXNMNQf  : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
5629                                  N3RegFrm, NoItinerary, "vmaxnm", "f32",
5630                                  v4f32, v4f32, fmaxnum, 1>,
5631                                  Requires<[HasV8, HasNEON]>;
5632  def NEON_VMAXNMNDh  : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
5633                                  N3RegFrm, NoItinerary, "vmaxnm", "f16",
5634                                  v4f16, v4f16, fmaxnum, 1>,
5635                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
5636  def NEON_VMAXNMNQh  : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
5637                                  N3RegFrm, NoItinerary, "vmaxnm", "f16",
5638                                  v8f16, v8f16, fmaxnum, 1>,
5639                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
5640}
5641
5642//   VMIN     : Vector Minimum
5643defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
5644                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5645                           "vmin", "s", smin, 1>;
5646defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
5647                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5648                           "vmin", "u", umin, 1>;
5649def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
5650                        "vmin", "f32",
5651                        v2f32, v2f32, fminimum, 1>;
5652def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5653                        "vmin", "f32",
5654                        v4f32, v4f32, fminimum, 1>;
5655def  VMINhd   : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
5656                        "vmin", "f16",
5657                        v4f16, v4f16, fminimum, 1>,
5658                Requires<[HasNEON, HasFullFP16]>;
5659def  VMINhq   : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5660                        "vmin", "f16",
5661                        v8f16, v8f16, fminimum, 1>,
5662                Requires<[HasNEON, HasFullFP16]>;
5663
5664// VMINNM
5665let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5666  def NEON_VMINNMNDf  : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
5667                                  N3RegFrm, NoItinerary, "vminnm", "f32",
5668                                  v2f32, v2f32, fminnum, 1>,
5669                                  Requires<[HasV8, HasNEON]>;
5670  def NEON_VMINNMNQf  : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
5671                                  N3RegFrm, NoItinerary, "vminnm", "f32",
5672                                  v4f32, v4f32, fminnum, 1>,
5673                                  Requires<[HasV8, HasNEON]>;
5674  def NEON_VMINNMNDh  : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
5675                                  N3RegFrm, NoItinerary, "vminnm", "f16",
5676                                  v4f16, v4f16, fminnum, 1>,
5677                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
5678  def NEON_VMINNMNQh  : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
5679                                  N3RegFrm, NoItinerary, "vminnm", "f16",
5680                                  v8f16, v8f16, fminnum, 1>,
5681                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
5682}
5683
5684// Vector Pairwise Operations.
5685
5686//   VPADD    : Vector Pairwise Add
5687def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5688                        "vpadd", "i8",
5689                        v8i8, v8i8, int_arm_neon_vpadd, 0>;
5690def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5691                        "vpadd", "i16",
5692                        v4i16, v4i16, int_arm_neon_vpadd, 0>;
5693def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5694                        "vpadd", "i32",
5695                        v2i32, v2i32, int_arm_neon_vpadd, 0>;
5696def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
5697                        IIC_VPBIND, "vpadd", "f32",
5698                        v2f32, v2f32, int_arm_neon_vpadd, 0>;
5699def  VPADDh   : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
5700                        IIC_VPBIND, "vpadd", "f16",
5701                        v4f16, v4f16, int_arm_neon_vpadd, 0>,
5702                Requires<[HasNEON, HasFullFP16]>;
5703
5704//   VPADDL   : Vector Pairwise Add Long
5705defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
5706                             int_arm_neon_vpaddls>;
5707defm VPADDLu  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
5708                             int_arm_neon_vpaddlu>;
5709
5710//   VPADAL   : Vector Pairwise Add and Accumulate Long
5711defm VPADALs  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
5712                              int_arm_neon_vpadals>;
5713defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
5714                              int_arm_neon_vpadalu>;
5715
5716//   VPMAX    : Vector Pairwise Maximum
5717def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5718                        "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
5719def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5720                        "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
5721def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5722                        "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
5723def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5724                        "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
5725def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5726                        "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
5727def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5728                        "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
5729def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5730                        "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
5731def  VPMAXh   : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5732                        "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
5733                Requires<[HasNEON, HasFullFP16]>;
5734
5735//   VPMIN    : Vector Pairwise Minimum
5736def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5737                        "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
5738def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5739                        "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
5740def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5741                        "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
5742def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5743                        "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
5744def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5745                        "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
5746def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5747                        "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
5748def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5749                        "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
5750def  VPMINh   : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5751                        "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
5752                Requires<[HasNEON, HasFullFP16]>;
5753
5754// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
5755
5756//   VRECPE   : Vector Reciprocal Estimate
5757def  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5758                        IIC_VUNAD, "vrecpe", "u32",
5759                        v2i32, v2i32, int_arm_neon_vrecpe>;
5760def  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5761                        IIC_VUNAQ, "vrecpe", "u32",
5762                        v4i32, v4i32, int_arm_neon_vrecpe>;
5763def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5764                        IIC_VUNAD, "vrecpe", "f32",
5765                        v2f32, v2f32, int_arm_neon_vrecpe>;
5766def  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5767                        IIC_VUNAQ, "vrecpe", "f32",
5768                        v4f32, v4f32, int_arm_neon_vrecpe>;
5769def  VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
5770                        IIC_VUNAD, "vrecpe", "f16",
5771                        v4f16, v4f16, int_arm_neon_vrecpe>,
5772                Requires<[HasNEON, HasFullFP16]>;
5773def  VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
5774                        IIC_VUNAQ, "vrecpe", "f16",
5775                        v8f16, v8f16, int_arm_neon_vrecpe>,
5776                Requires<[HasNEON, HasFullFP16]>;
5777
5778//   VRECPS   : Vector Reciprocal Step
5779def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5780                        IIC_VRECSD, "vrecps", "f32",
5781                        v2f32, v2f32, int_arm_neon_vrecps, 1>;
5782def  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5783                        IIC_VRECSQ, "vrecps", "f32",
5784                        v4f32, v4f32, int_arm_neon_vrecps, 1>;
5785def  VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
5786                        IIC_VRECSD, "vrecps", "f16",
5787                        v4f16, v4f16, int_arm_neon_vrecps, 1>,
5788                Requires<[HasNEON, HasFullFP16]>;
5789def  VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
5790                        IIC_VRECSQ, "vrecps", "f16",
5791                        v8f16, v8f16, int_arm_neon_vrecps, 1>,
5792                Requires<[HasNEON, HasFullFP16]>;
5793
5794//   VRSQRTE  : Vector Reciprocal Square Root Estimate
5795def  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5796                         IIC_VUNAD, "vrsqrte", "u32",
5797                         v2i32, v2i32, int_arm_neon_vrsqrte>;
5798def  VRSQRTEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5799                         IIC_VUNAQ, "vrsqrte", "u32",
5800                         v4i32, v4i32, int_arm_neon_vrsqrte>;
5801def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5802                         IIC_VUNAD, "vrsqrte", "f32",
5803                         v2f32, v2f32, int_arm_neon_vrsqrte>;
5804def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5805                         IIC_VUNAQ, "vrsqrte", "f32",
5806                         v4f32, v4f32, int_arm_neon_vrsqrte>;
5807def  VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
5808                         IIC_VUNAD, "vrsqrte", "f16",
5809                         v4f16, v4f16, int_arm_neon_vrsqrte>,
5810                Requires<[HasNEON, HasFullFP16]>;
5811def  VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
5812                         IIC_VUNAQ, "vrsqrte", "f16",
5813                         v8f16, v8f16, int_arm_neon_vrsqrte>,
5814                Requires<[HasNEON, HasFullFP16]>;
5815
5816//   VRSQRTS  : Vector Reciprocal Square Root Step
5817def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5818                        IIC_VRECSD, "vrsqrts", "f32",
5819                        v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
5820def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5821                        IIC_VRECSQ, "vrsqrts", "f32",
5822                        v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
5823def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
5824                        IIC_VRECSD, "vrsqrts", "f16",
5825                        v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
5826                Requires<[HasNEON, HasFullFP16]>;
5827def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
5828                        IIC_VRECSQ, "vrsqrts", "f16",
5829                        v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
5830                Requires<[HasNEON, HasFullFP16]>;
5831
5832// Vector Shifts.
5833
5834//   VSHL     : Vector Shift
5835defm VSHLs    : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
5836                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5837                            "vshl", "s", int_arm_neon_vshifts>;
5838defm VSHLu    : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
5839                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5840                            "vshl", "u", int_arm_neon_vshiftu>;
5841
5842let Predicates = [HasNEON] in {
5843def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
5844          (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>;
5845def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
5846          (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>;
5847def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
5848          (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>;
5849def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
5850          (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>;
5851def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
5852          (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>;
5853def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
5854          (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>;
5855def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
5856          (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>;
5857def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
5858          (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>;
5859
5860def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
5861          (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>;
5862def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
5863          (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>;
5864def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
5865          (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>;
5866def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
5867          (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>;
5868def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
5869          (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>;
5870def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
5871          (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>;
5872def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
5873          (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>;
5874def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
5875          (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>;
5876
5877}
5878
5879//   VSHL     : Vector Shift Left (Immediate)
5880defm VSHLi    : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>;
5881
5882//   VSHR     : Vector Shift Right (Immediate)
5883defm VSHRs    : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
5884                            ARMvshrsImm>;
5885defm VSHRu    : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
5886                            ARMvshruImm>;
5887
5888//   VSHLL    : Vector Shift Left Long
5889defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
5890  PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>;
5891defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
5892  PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>;
5893
5894//   VSHLL    : Vector Shift Left Long (with maximum shift count)
5895class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
5896                bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
5897                ValueType OpTy, Operand ImmTy>
5898  : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
5899           ResTy, OpTy, ImmTy, null_frag> {
5900  let Inst{21-16} = op21_16;
5901  let DecoderMethod = "DecodeVSHLMaxInstruction";
5902}
5903def  VSHLLi8  : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
5904                          v8i16, v8i8, imm8>;
5905def  VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
5906                          v4i32, v4i16, imm16>;
5907def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
5908                          v2i64, v2i32, imm32>;
5909
5910let Predicates = [HasNEON] in {
5911def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
5912          (VSHLLi8 DPR:$Rn, 8)>;
5913def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
5914          (VSHLLi16 DPR:$Rn, 16)>;
5915def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
5916          (VSHLLi32 DPR:$Rn, 32)>;
5917def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
5918          (VSHLLi8 DPR:$Rn, 8)>;
5919def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
5920          (VSHLLi16 DPR:$Rn, 16)>;
5921def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
5922          (VSHLLi32 DPR:$Rn, 32)>;
5923def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
5924          (VSHLLi8 DPR:$Rn, 8)>;
5925def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
5926          (VSHLLi16 DPR:$Rn, 16)>;
5927def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
5928          (VSHLLi32 DPR:$Rn, 32)>;
5929}
5930
5931//   VSHRN    : Vector Shift Right and Narrow
5932defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
5933                           PatFrag<(ops node:$Rn, node:$amt),
5934                                   (trunc (ARMvshrsImm node:$Rn, node:$amt))>>;
5935
5936let Predicates = [HasNEON] in {
5937def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
5938          (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
5939def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
5940          (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
5941def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
5942          (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
5943}
5944
5945//   VRSHL    : Vector Rounding Shift
5946defm VRSHLs   : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
5947                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5948                            "vrshl", "s", int_arm_neon_vrshifts>;
5949defm VRSHLu   : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
5950                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5951                            "vrshl", "u", int_arm_neon_vrshiftu>;
5952//   VRSHR    : Vector Rounding Shift Right
5953defm VRSHRs   : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
5954                            NEONvrshrsImm>;
5955defm VRSHRu   : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
5956                            NEONvrshruImm>;
5957
5958//   VRSHRN   : Vector Rounding Shift Right and Narrow
5959defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
5960                           NEONvrshrnImm>;
5961
5962//   VQSHL    : Vector Saturating Shift
5963defm VQSHLs   : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
5964                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5965                            "vqshl", "s", int_arm_neon_vqshifts>;
5966defm VQSHLu   : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
5967                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5968                            "vqshl", "u", int_arm_neon_vqshiftu>;
5969//   VQSHL    : Vector Saturating Shift Left (Immediate)
5970defm VQSHLsi  : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>;
5971defm VQSHLui  : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>;
5972
5973//   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
5974defm VQSHLsu  : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>;
5975
5976//   VQSHRN   : Vector Saturating Shift Right and Narrow
5977defm VQSHRNs  : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
5978                           NEONvqshrnsImm>;
5979defm VQSHRNu  : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
5980                           NEONvqshrnuImm>;
5981
5982//   VQSHRUN  : Vector Saturating Shift Right and Narrow (Unsigned)
5983defm VQSHRUN  : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
5984                           NEONvqshrnsuImm>;
5985
5986//   VQRSHL   : Vector Saturating Rounding Shift
5987defm VQRSHLs  : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
5988                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5989                            "vqrshl", "s", int_arm_neon_vqrshifts>;
5990defm VQRSHLu  : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
5991                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5992                            "vqrshl", "u", int_arm_neon_vqrshiftu>;
5993
5994//   VQRSHRN  : Vector Saturating Rounding Shift Right and Narrow
5995defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
5996                           NEONvqrshrnsImm>;
5997defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
5998                           NEONvqrshrnuImm>;
5999
6000//   VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
6001defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
6002                           NEONvqrshrnsuImm>;
6003
6004//   VSRA     : Vector Shift Right and Accumulate
6005defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>;
6006defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>;
6007//   VRSRA    : Vector Rounding Shift Right and Accumulate
6008defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>;
6009defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>;
6010
6011//   VSLI     : Vector Shift Left and Insert
6012defm VSLI     : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
6013
6014//   VSRI     : Vector Shift Right and Insert
6015defm VSRI     : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
6016
6017// Vector Absolute and Saturating Absolute.
6018
6019//   VABS     : Vector Absolute Value
6020defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
6021                           IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>;
6022def  VABSfd   : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
6023                     "vabs", "f32",
6024                     v2f32, v2f32, fabs>;
6025def  VABSfq   : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
6026                     "vabs", "f32",
6027                      v4f32, v4f32, fabs>;
6028def  VABShd   : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
6029                     "vabs", "f16",
6030                     v4f16, v4f16, fabs>,
6031                Requires<[HasNEON, HasFullFP16]>;
6032def  VABShq   : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
6033                     "vabs", "f16",
6034                      v8f16, v8f16, fabs>,
6035                Requires<[HasNEON, HasFullFP16]>;
6036
6037//   VQABS    : Vector Saturating Absolute Value
6038defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
6039                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
6040                           int_arm_neon_vqabs>;
6041
6042// Vector Negate.
6043
6044def vnegd  : PatFrag<(ops node:$in),
6045                     (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
6046def vnegq  : PatFrag<(ops node:$in),
6047                     (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
6048
6049class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
6050  : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
6051        IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
6052        [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
6053class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
6054  : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
6055        IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
6056        [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
6057
6058//   VNEG     : Vector Negate (integer)
6059def  VNEGs8d  : VNEGD<0b00, "vneg", "s8", v8i8>;
6060def  VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
6061def  VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
6062def  VNEGs8q  : VNEGQ<0b00, "vneg", "s8", v16i8>;
6063def  VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
6064def  VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
6065
6066//   VNEG     : Vector Negate (floating-point)
6067def  VNEGfd   : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
6068                    (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
6069                    "vneg", "f32", "$Vd, $Vm", "",
6070                    [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
6071def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
6072                    (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
6073                    "vneg", "f32", "$Vd, $Vm", "",
6074                    [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
6075def  VNEGhd   : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
6076                    (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
6077                    "vneg", "f16", "$Vd, $Vm", "",
6078                    [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
6079                Requires<[HasNEON, HasFullFP16]>;
6080def  VNEGhq   : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
6081                    (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
6082                    "vneg", "f16", "$Vd, $Vm", "",
6083                    [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
6084                Requires<[HasNEON, HasFullFP16]>;
6085
6086let Predicates = [HasNEON] in {
6087def : Pat<(v8i8  (vnegd  DPR:$src)), (VNEGs8d DPR:$src)>;
6088def : Pat<(v4i16 (vnegd  DPR:$src)), (VNEGs16d DPR:$src)>;
6089def : Pat<(v2i32 (vnegd  DPR:$src)), (VNEGs32d DPR:$src)>;
6090def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
6091def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
6092def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
6093}
6094
6095//   VQNEG    : Vector Saturating Negate
6096defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
6097                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
6098                           int_arm_neon_vqneg>;
6099
6100// Vector Bit Counting Operations.
6101
6102//   VCLS     : Vector Count Leading Sign Bits
6103defm VCLS     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
6104                           IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
6105                           int_arm_neon_vcls>;
6106//   VCLZ     : Vector Count Leading Zeros
6107defm VCLZ     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
6108                           IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
6109                           ctlz>;
6110//   VCNT     : Vector Count One Bits
6111def  VCNTd    : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
6112                        IIC_VCNTiD, "vcnt", "8",
6113                        v8i8, v8i8, ctpop>;
6114def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
6115                        IIC_VCNTiQ, "vcnt", "8",
6116                        v16i8, v16i8, ctpop>;
6117
6118// Vector Swap
6119def  VSWPd    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
6120                     (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
6121                     NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
6122                     []>;
6123def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
6124                     (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
6125                     NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
6126                     []>;
6127
6128// Vector Move Operations.
6129
6130//   VMOV     : Vector Move (Register)
6131def : NEONInstAlias<"vmov${p} $Vd, $Vm",
6132                    (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
6133def : NEONInstAlias<"vmov${p} $Vd, $Vm",
6134                    (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
6135
6136//   VMOV     : Vector Move (Immediate)
6137
6138// Although VMOVs are not strictly speaking cheap, they are as expensive
6139// as their copies counterpart (VORR), so we should prefer rematerialization
6140// over splitting when it applies.
6141let isReMaterializable = 1, isAsCheapAsAMove=1 in {
6142def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
6143                         (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
6144                         "vmov", "i8", "$Vd, $SIMM", "",
6145                         [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>;
6146def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
6147                         (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
6148                         "vmov", "i8", "$Vd, $SIMM", "",
6149                         [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>;
6150
6151def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
6152                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
6153                         "vmov", "i16", "$Vd, $SIMM", "",
6154                         [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> {
6155  let Inst{9} = SIMM{9};
6156}
6157
6158def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
6159                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
6160                         "vmov", "i16", "$Vd, $SIMM", "",
6161                         [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> {
6162 let Inst{9} = SIMM{9};
6163}
6164
6165def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
6166                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
6167                         "vmov", "i32", "$Vd, $SIMM", "",
6168                         [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> {
6169  let Inst{11-8} = SIMM{11-8};
6170}
6171
6172def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
6173                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
6174                         "vmov", "i32", "$Vd, $SIMM", "",
6175                         [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> {
6176  let Inst{11-8} = SIMM{11-8};
6177}
6178
6179def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
6180                         (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
6181                         "vmov", "i64", "$Vd, $SIMM", "",
6182                         [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>;
6183def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
6184                         (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
6185                         "vmov", "i64", "$Vd, $SIMM", "",
6186                         [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>;
6187
6188def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
6189                         (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
6190                         "vmov", "f32", "$Vd, $SIMM", "",
6191                         [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>;
6192def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
6193                         (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
6194                         "vmov", "f32", "$Vd, $SIMM", "",
6195                         [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>;
6196} // isReMaterializable, isAsCheapAsAMove
6197
6198// Add support for bytes replication feature, so it could be GAS compatible.
6199multiclass NEONImmReplicateI8InstAlias<ValueType To> {
6200  // E.g. instructions below:
6201  // "vmov.i32 d0, #0xffffffff"
6202  // "vmov.i32 d0, #0xabababab"
6203  // "vmov.i16 d0, #0xabab"
6204  // are incorrect, but we could deal with such cases.
6205  // For last two instructions, for example, it should emit:
6206  // "vmov.i8 d0, #0xab"
6207  def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6208                      (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
6209  def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6210                      (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
6211  // Also add same support for VMVN instructions. So instruction:
6212  // "vmvn.i32 d0, #0xabababab"
6213  // actually means:
6214  // "vmov.i8 d0, #0x54"
6215  def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6216                      (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
6217  def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6218                      (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
6219}
6220
6221defm : NEONImmReplicateI8InstAlias<i16>;
6222defm : NEONImmReplicateI8InstAlias<i32>;
6223defm : NEONImmReplicateI8InstAlias<i64>;
6224
6225// Similar to above for types other than i8, e.g.:
6226// "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00"
6227// "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000"
6228// In this case we do not canonicalize VMVN to VMOV
6229multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16,
6230                                     NeonI NV8, NeonI NV16, ValueType To> {
6231  def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6232                      (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6233  def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6234                      (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6235  def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6236                      (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6237  def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6238                      (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6239}
6240
6241defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
6242                                      VMVNv4i16, VMVNv8i16, i32>;
6243defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
6244                                      VMVNv4i16, VMVNv8i16, i64>;
6245defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32,
6246                                      VMVNv2i32, VMVNv4i32, i64>;
6247// TODO: add "VMOV <-> VMVN" conversion for cases like
6248// "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55"
6249// "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00"
6250
6251// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
6252// require zero cycles to execute so they should be used wherever possible for
6253// setting a register to zero.
6254
6255// Even without these pseudo-insts we would probably end up with the correct
6256// instruction, but we could not mark the general ones with "isAsCheapAsAMove"
6257// since they are sometimes rather expensive (in general).
6258
6259let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
6260  def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
6261                               [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))],
6262                               (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
6263               Requires<[HasZCZ]>;
6264  def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
6265                               [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))],
6266                               (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
6267               Requires<[HasZCZ]>;
6268}
6269
6270//   VMOV     : Vector Get Lane (move scalar to ARM core register)
6271
6272def VGETLNs8  : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
6273                          (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
6274                          IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
6275                          [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V),
6276                                           imm:$lane))]> {
6277  let Inst{21}  = lane{2};
6278  let Inst{6-5} = lane{1-0};
6279}
6280def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
6281                          (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
6282                          IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
6283                          [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V),
6284                                           imm:$lane))]> {
6285  let Inst{21} = lane{1};
6286  let Inst{6}  = lane{0};
6287}
6288def VGETLNu8  : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
6289                          (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
6290                          IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
6291                          [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V),
6292                                           imm:$lane))]> {
6293  let Inst{21}  = lane{2};
6294  let Inst{6-5} = lane{1-0};
6295}
6296def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
6297                          (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
6298                          IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
6299                          [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V),
6300                                           imm:$lane))]> {
6301  let Inst{21} = lane{1};
6302  let Inst{6}  = lane{0};
6303}
6304def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
6305                          (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
6306                          IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
6307                          [(set GPR:$R, (extractelt (v2i32 DPR:$V),
6308                                           imm:$lane))]>,
6309                Requires<[HasFPRegs, HasFastVGETLNi32]> {
6310  let Inst{21} = lane{0};
6311}
6312let Predicates = [HasNEON] in {
6313// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
6314def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane),
6315          (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
6316                           (DSubReg_i8_reg imm:$lane))),
6317                     (SubReg_i8_lane imm:$lane))>;
6318def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane),
6319          (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
6320                             (DSubReg_i16_reg imm:$lane))),
6321                     (SubReg_i16_lane imm:$lane))>;
6322def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane),
6323          (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
6324                           (DSubReg_i8_reg imm:$lane))),
6325                     (SubReg_i8_lane imm:$lane))>;
6326def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane),
6327          (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
6328                             (DSubReg_i16_reg imm:$lane))),
6329                     (SubReg_i16_lane imm:$lane))>;
6330}
6331def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
6332          (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
6333                             (DSubReg_i32_reg imm:$lane))),
6334                     (SubReg_i32_lane imm:$lane))>,
6335      Requires<[HasNEON, HasFastVGETLNi32]>;
6336def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
6337          (COPY_TO_REGCLASS
6338            (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
6339      Requires<[HasNEON, HasSlowVGETLNi32]>;
6340def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
6341          (COPY_TO_REGCLASS
6342            (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
6343      Requires<[HasNEON, HasSlowVGETLNi32]>;
6344let Predicates = [HasNEON] in {
6345def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
6346          (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
6347                          (SSubReg_f32_reg imm:$src2))>;
6348def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
6349          (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
6350                          (SSubReg_f32_reg imm:$src2))>;
6351//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
6352//          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
6353def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
6354          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
6355}
6356
6357def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>;
6358def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>;
6359
6360let Predicates = [HasNEON] in {
6361def : Pat<(extractelt (v4f16 DPR:$src), imm_even:$lane),
6362            (EXTRACT_SUBREG
6363                (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
6364                (SSubReg_f16_reg imm_even:$lane))>;
6365
6366def : Pat<(extractelt (v4f16 DPR:$src), imm_odd:$lane),
6367            (COPY_TO_REGCLASS
6368              (VMOVH (EXTRACT_SUBREG
6369                  (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
6370                  (SSubReg_f16_reg imm_odd:$lane))),
6371              HPR)>;
6372
6373def : Pat<(extractelt (v8f16 QPR:$src), imm_even:$lane),
6374            (EXTRACT_SUBREG
6375                (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)),
6376                (SSubReg_f16_reg imm_even:$lane))>;
6377
6378def : Pat<(extractelt (v8f16 QPR:$src), imm_odd:$lane),
6379            (COPY_TO_REGCLASS
6380              (VMOVH (EXTRACT_SUBREG
6381                  (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)),
6382                  (SSubReg_f16_reg imm_odd:$lane))),
6383              HPR)>;
6384}
6385
6386//   VMOV     : Vector Set Lane (move ARM core register to scalar)
6387
6388let Constraints = "$src1 = $V" in {
6389def VSETLNi8  : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
6390                          (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
6391                          IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
6392                          [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
6393                                           GPR:$R, imm:$lane))]> {
6394  let Inst{21}  = lane{2};
6395  let Inst{6-5} = lane{1-0};
6396}
6397def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
6398                          (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
6399                          IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
6400                          [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
6401                                           GPR:$R, imm:$lane))]> {
6402  let Inst{21} = lane{1};
6403  let Inst{6}  = lane{0};
6404}
6405def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
6406                          (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
6407                          IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
6408                          [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
6409                                           GPR:$R, imm:$lane))]>,
6410                Requires<[HasVFP2]> {
6411  let Inst{21} = lane{0};
6412  // This instruction is equivalent as
6413  // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm)
6414  let isInsertSubreg = 1;
6415}
6416}
6417
6418let Predicates = [HasNEON] in {
6419def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
6420          (v16i8 (INSERT_SUBREG QPR:$src1,
6421                  (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
6422                                   (DSubReg_i8_reg imm:$lane))),
6423                            GPR:$src2, (SubReg_i8_lane imm:$lane))),
6424                  (DSubReg_i8_reg imm:$lane)))>;
6425def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
6426          (v8i16 (INSERT_SUBREG QPR:$src1,
6427                  (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
6428                                     (DSubReg_i16_reg imm:$lane))),
6429                             GPR:$src2, (SubReg_i16_lane imm:$lane))),
6430                  (DSubReg_i16_reg imm:$lane)))>;
6431def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
6432          (v4i32 (INSERT_SUBREG QPR:$src1,
6433                  (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
6434                                     (DSubReg_i32_reg imm:$lane))),
6435                             GPR:$src2, (SubReg_i32_lane imm:$lane))),
6436                  (DSubReg_i32_reg imm:$lane)))>;
6437
6438def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
6439          (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
6440                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
6441def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
6442          (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
6443                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
6444
6445def : Pat<(insertelt (v4f16 DPR:$src1), HPR:$src2, imm:$lane),
6446          (v4f16 (VSETLNi16 DPR:$src1, (VMOVRH $src2), imm:$lane))>;
6447def : Pat<(insertelt (v8f16 QPR:$src1), HPR:$src2, imm:$lane),
6448          (v8f16 (INSERT_SUBREG QPR:$src1,
6449                   (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
6450                                      (DSubReg_i16_reg imm:$lane))),
6451                             (VMOVRH $src2), (SubReg_i16_lane imm:$lane))),
6452                   (DSubReg_i16_reg imm:$lane)))>;
6453
6454//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
6455//          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
6456def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
6457          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
6458
6459def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
6460          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
6461def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
6462          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
6463def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
6464          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
6465
6466def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
6467          (VSETLNi8  (v8i8  (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6468def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
6469          (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6470def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
6471          (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6472
6473def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
6474          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6475                         (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6476                         dsub_0)>;
6477def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
6478          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6479                         (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6480                         dsub_0)>;
6481def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
6482          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6483                         (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6484                         dsub_0)>;
6485}
6486
6487//   VDUP     : Vector Duplicate (from ARM core register to all elements)
6488
6489class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
6490  : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
6491          IIC_VMOVIS, "vdup", Dt, "$V, $R",
6492          [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
6493class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
6494  : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
6495          IIC_VMOVIS, "vdup", Dt, "$V, $R",
6496          [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
6497
6498def  VDUP8d   : VDUPD<0b11101100, 0b00, "8", v8i8>;
6499def  VDUP16d  : VDUPD<0b11101000, 0b01, "16", v4i16>;
6500def  VDUP32d  : VDUPD<0b11101000, 0b00, "32", v2i32>,
6501                Requires<[HasNEON, HasFastVDUP32]>;
6502def  VDUP8q   : VDUPQ<0b11101110, 0b00, "8", v16i8>;
6503def  VDUP16q  : VDUPQ<0b11101010, 0b01, "16", v8i16>;
6504def  VDUP32q  : VDUPQ<0b11101010, 0b00, "32", v4i32>;
6505
6506// ARMvdup patterns for uarchs with fast VDUP.32.
6507def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
6508      Requires<[HasNEON,HasFastVDUP32]>;
6509def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>,
6510      Requires<[HasNEON]>;
6511
6512// ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
6513def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
6514      Requires<[HasNEON,HasSlowVDUP32]>;
6515def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
6516      Requires<[HasNEON,HasSlowVDUP32]>;
6517
6518//   VDUP     : Vector Duplicate Lane (from scalar to all elements)
6519
6520class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
6521              ValueType Ty, Operand IdxTy>
6522  : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6523              IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
6524              [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>;
6525
6526class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
6527              ValueType ResTy, ValueType OpTy, Operand IdxTy>
6528  : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6529              IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
6530              [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm),
6531                                      VectorIndex32:$lane)))]>;
6532
6533// Inst{19-16} is partially specified depending on the element size.
6534
6535def VDUPLN8d  : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
6536  bits<3> lane;
6537  let Inst{19-17} = lane{2-0};
6538}
6539def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
6540  bits<2> lane;
6541  let Inst{19-18} = lane{1-0};
6542}
6543def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
6544  bits<1> lane;
6545  let Inst{19} = lane{0};
6546}
6547def VDUPLN8q  : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
6548  bits<3> lane;
6549  let Inst{19-17} = lane{2-0};
6550}
6551def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
6552  bits<2> lane;
6553  let Inst{19-18} = lane{1-0};
6554}
6555def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
6556  bits<1> lane;
6557  let Inst{19} = lane{0};
6558}
6559
6560let Predicates = [HasNEON] in {
6561def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)),
6562          (VDUPLN32d DPR:$Vm, imm:$lane)>;
6563
6564def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
6565          (VDUPLN32d DPR:$Vm, imm:$lane)>;
6566
6567def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
6568          (VDUPLN32q DPR:$Vm, imm:$lane)>;
6569
6570def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)),
6571          (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
6572                                  (DSubReg_i8_reg imm:$lane))),
6573                           (SubReg_i8_lane imm:$lane)))>;
6574def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)),
6575          (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
6576                                    (DSubReg_i16_reg imm:$lane))),
6577                            (SubReg_i16_lane imm:$lane)))>;
6578def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)),
6579          (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src,
6580                                    (DSubReg_i16_reg imm:$lane))),
6581                            (SubReg_i16_lane imm:$lane)))>;
6582def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)),
6583          (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
6584                                    (DSubReg_i32_reg imm:$lane))),
6585                            (SubReg_i32_lane imm:$lane)))>;
6586def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)),
6587          (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
6588                                   (DSubReg_i32_reg imm:$lane))),
6589                           (SubReg_i32_lane imm:$lane)))>;
6590
6591def : Pat<(v4f16 (ARMvdup HPR:$src)),
6592          (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
6593                             HPR:$src, ssub_0), (i32 0)))>;
6594def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))),
6595          (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6596                             SPR:$src, ssub_0), (i32 0)))>;
6597def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))),
6598          (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6599                             SPR:$src, ssub_0), (i32 0)))>;
6600def : Pat<(v8f16 (ARMvdup HPR:$src)),
6601          (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
6602                             HPR:$src, ssub_0), (i32 0)))>;
6603}
6604
6605//   VMOVN    : Vector Narrowing Move
6606defm VMOVN    : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
6607                         "vmovn", "i", trunc>;
6608//   VQMOVN   : Vector Saturating Narrowing Move
6609defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
6610                            "vqmovn", "s", int_arm_neon_vqmovns>;
6611defm VQMOVNu  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
6612                            "vqmovn", "u", int_arm_neon_vqmovnu>;
6613defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
6614                            "vqmovun", "s", int_arm_neon_vqmovnsu>;
6615//   VMOVL    : Vector Lengthening Move
6616defm VMOVLs   : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
6617defm VMOVLu   : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
6618
6619let Predicates = [HasNEON] in {
6620def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
6621def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
6622def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
6623}
6624
6625// Vector Conversions.
6626
6627//   VCVT     : Vector Convert Between Floating-Point and Integers
6628def  VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
6629                     v2i32, v2f32, fp_to_sint>;
6630def  VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
6631                     v2i32, v2f32, fp_to_uint>;
6632def  VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
6633                     v2f32, v2i32, sint_to_fp>;
6634def  VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
6635                     v2f32, v2i32, uint_to_fp>;
6636
6637def  VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
6638                     v4i32, v4f32, fp_to_sint>;
6639def  VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
6640                     v4i32, v4f32, fp_to_uint>;
6641def  VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
6642                     v4f32, v4i32, sint_to_fp>;
6643def  VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
6644                     v4f32, v4i32, uint_to_fp>;
6645
6646def  VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
6647                     v4i16, v4f16, fp_to_sint>,
6648                Requires<[HasNEON, HasFullFP16]>;
6649def  VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
6650                     v4i16, v4f16, fp_to_uint>,
6651                Requires<[HasNEON, HasFullFP16]>;
6652def  VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
6653                     v4f16, v4i16, sint_to_fp>,
6654                Requires<[HasNEON, HasFullFP16]>;
6655def  VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
6656                     v4f16, v4i16, uint_to_fp>,
6657                Requires<[HasNEON, HasFullFP16]>;
6658
6659def  VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
6660                     v8i16, v8f16, fp_to_sint>,
6661                Requires<[HasNEON, HasFullFP16]>;
6662def  VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
6663                     v8i16, v8f16, fp_to_uint>,
6664                Requires<[HasNEON, HasFullFP16]>;
6665def  VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
6666                     v8f16, v8i16, sint_to_fp>,
6667                Requires<[HasNEON, HasFullFP16]>;
6668def  VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
6669                     v8f16, v8i16, uint_to_fp>,
6670                Requires<[HasNEON, HasFullFP16]>;
6671
6672// VCVT{A, N, P, M}
6673multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
6674                    SDPatternOperator IntU> {
6675  let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
6676    def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6677                       "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
6678    def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6679                       "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
6680    def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6681                       "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
6682    def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6683                       "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
6684    def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6685                       "s16.f16", v4i16, v4f16, IntS>,
6686              Requires<[HasV8, HasNEON, HasFullFP16]>;
6687    def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6688                       "s16.f16", v8i16, v8f16, IntS>,
6689              Requires<[HasV8, HasNEON, HasFullFP16]>;
6690    def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6691                       "u16.f16", v4i16, v4f16, IntU>,
6692              Requires<[HasV8, HasNEON, HasFullFP16]>;
6693    def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6694                       "u16.f16", v8i16, v8f16, IntU>,
6695              Requires<[HasV8, HasNEON, HasFullFP16]>;
6696  }
6697}
6698
6699defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>;
6700defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>;
6701defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>;
6702defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>;
6703
6704//   VCVT     : Vector Convert Between Floating-Point and Fixed-Point.
6705let DecoderMethod = "DecodeVCVTD" in {
6706def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
6707                        v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
6708def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
6709                        v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
6710def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
6711                        v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
6712def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
6713                        v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
6714let Predicates = [HasNEON, HasFullFP16] in {
6715def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
6716                        v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
6717def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
6718                        v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
6719def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
6720                        v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
6721def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
6722                        v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
6723} // Predicates = [HasNEON, HasFullFP16]
6724}
6725
6726let DecoderMethod = "DecodeVCVTQ" in {
6727def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
6728                        v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
6729def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
6730                        v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
6731def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
6732                        v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
6733def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
6734                        v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
6735let Predicates = [HasNEON, HasFullFP16] in {
6736def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
6737                        v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
6738def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
6739                        v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
6740def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
6741                        v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
6742def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
6743                        v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
6744} // Predicates = [HasNEON, HasFullFP16]
6745}
6746
6747def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
6748                    (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
6749def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
6750                    (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
6751def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
6752                    (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
6753def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
6754                    (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
6755
6756def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
6757                    (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
6758def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
6759                    (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
6760def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
6761                    (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
6762def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
6763                    (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
6764
6765def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
6766                    (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
6767def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
6768                    (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
6769def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
6770                    (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
6771def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
6772                    (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
6773
6774def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
6775                    (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
6776def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
6777                    (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
6778def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
6779                    (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
6780def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
6781                    (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
6782
6783
6784//   VCVT     : Vector Convert Between Half-Precision and Single-Precision.
6785def  VCVTf2h  : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
6786                        IIC_VUNAQ, "vcvt", "f16.f32",
6787                        v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
6788                Requires<[HasNEON, HasFP16]>;
6789def  VCVTh2f  : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
6790                        IIC_VUNAQ, "vcvt", "f32.f16",
6791                        v4f32, v4i16, int_arm_neon_vcvthf2fp>,
6792                Requires<[HasNEON, HasFP16]>;
6793
6794// Vector Reverse.
6795
6796//   VREV64   : Vector Reverse elements within 64-bit doublewords
6797
6798class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6799  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
6800        (ins DPR:$Vm), IIC_VMOVD,
6801        OpcodeStr, Dt, "$Vd, $Vm", "",
6802        [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>;
6803class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6804  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
6805        (ins QPR:$Vm), IIC_VMOVQ,
6806        OpcodeStr, Dt, "$Vd, $Vm", "",
6807        [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>;
6808
6809def VREV64d8  : VREV64D<0b00, "vrev64", "8", v8i8>;
6810def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
6811def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
6812let Predicates = [HasNEON] in {
6813def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
6814}
6815
6816def VREV64q8  : VREV64Q<0b00, "vrev64", "8", v16i8>;
6817def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
6818def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
6819
6820let Predicates = [HasNEON] in {
6821  def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))),
6822            (VREV64q32 QPR:$Vm)>;
6823  def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))),
6824            (VREV64q16 QPR:$Vm)>;
6825  def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))),
6826            (VREV64d16 DPR:$Vm)>;
6827}
6828
6829//   VREV32   : Vector Reverse elements within 32-bit words
6830
6831class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6832  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
6833        (ins DPR:$Vm), IIC_VMOVD,
6834        OpcodeStr, Dt, "$Vd, $Vm", "",
6835        [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>;
6836class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6837  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
6838        (ins QPR:$Vm), IIC_VMOVQ,
6839        OpcodeStr, Dt, "$Vd, $Vm", "",
6840        [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>;
6841
6842def VREV32d8  : VREV32D<0b00, "vrev32", "8", v8i8>;
6843def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
6844
6845def VREV32q8  : VREV32Q<0b00, "vrev32", "8", v16i8>;
6846def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
6847
6848let Predicates = [HasNEON] in {
6849  def : Pat<(v8f16 (ARMvrev32 (v8f16 QPR:$Vm))),
6850            (VREV32q16 QPR:$Vm)>;
6851  def : Pat<(v4f16 (ARMvrev32 (v4f16 DPR:$Vm))),
6852            (VREV32d16 DPR:$Vm)>;
6853}
6854
6855//   VREV16   : Vector Reverse elements within 16-bit halfwords
6856
6857class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6858  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
6859        (ins DPR:$Vm), IIC_VMOVD,
6860        OpcodeStr, Dt, "$Vd, $Vm", "",
6861        [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>;
6862class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6863  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
6864        (ins QPR:$Vm), IIC_VMOVQ,
6865        OpcodeStr, Dt, "$Vd, $Vm", "",
6866        [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>;
6867
6868def VREV16d8  : VREV16D<0b00, "vrev16", "8", v8i8>;
6869def VREV16q8  : VREV16Q<0b00, "vrev16", "8", v16i8>;
6870
6871// Other Vector Shuffles.
6872
6873//  Aligned extractions: really just dropping registers
6874
6875class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
6876      : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
6877             (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>,
6878        Requires<[HasNEON]>;
6879
6880def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
6881
6882def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
6883
6884def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
6885
6886def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
6887
6888def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
6889
6890def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; // v8f16 -> v4f16
6891
6892//   VEXT     : Vector Extract
6893
6894
6895// All of these have a two-operand InstAlias.
6896let TwoOperandAliasConstraint = "$Vn = $Vd" in {
6897class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6898  : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
6899        (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
6900        IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6901        [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
6902                                     (Ty DPR:$Vm), imm:$index)))]> {
6903  bits<3> index;
6904  let Inst{11} = 0b0;
6905  let Inst{10-8} = index{2-0};
6906}
6907
6908class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6909  : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
6910        (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
6911        IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6912        [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
6913                                     (Ty QPR:$Vm), imm:$index)))]> {
6914  bits<4> index;
6915  let Inst{11-8} = index{3-0};
6916}
6917}
6918
6919def VEXTd8  : VEXTd<"vext", "8",  v8i8, imm0_7> {
6920  let Inst{10-8} = index{2-0};
6921}
6922def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
6923  let Inst{10-9} = index{1-0};
6924  let Inst{8}    = 0b0;
6925}
6926let Predicates = [HasNEON] in {
6927def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))),
6928          (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>;
6929}
6930
6931def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
6932  let Inst{10}     = index{0};
6933  let Inst{9-8}    = 0b00;
6934}
6935let Predicates = [HasNEON] in {
6936def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))),
6937          (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
6938}
6939
6940def VEXTq8  : VEXTq<"vext", "8",  v16i8, imm0_15> {
6941  let Inst{11-8} = index{3-0};
6942}
6943def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
6944  let Inst{11-9} = index{2-0};
6945  let Inst{8}    = 0b0;
6946}
6947let Predicates = [HasNEON] in {
6948def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))),
6949          (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>;
6950}
6951
6952def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
6953  let Inst{11-10} = index{1-0};
6954  let Inst{9-8}    = 0b00;
6955}
6956def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
6957  let Inst{11} = index{0};
6958  let Inst{10-8}    = 0b000;
6959}
6960let Predicates = [HasNEON] in {
6961def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))),
6962          (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
6963}
6964
6965//   VTRN     : Vector Transpose
6966
6967def  VTRNd8   : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
6968def  VTRNd16  : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
6969def  VTRNd32  : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
6970
6971def  VTRNq8   : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
6972def  VTRNq16  : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
6973def  VTRNq32  : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
6974
6975//   VUZP     : Vector Unzip (Deinterleave)
6976
6977def  VUZPd8   : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
6978def  VUZPd16  : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
6979// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
6980def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
6981                    (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
6982
6983def  VUZPq8   : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
6984def  VUZPq16  : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
6985def  VUZPq32  : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
6986
6987//   VZIP     : Vector Zip (Interleave)
6988
6989def  VZIPd8   : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
6990def  VZIPd16  : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
6991// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
6992def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
6993                    (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
6994
6995def  VZIPq8   : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
6996def  VZIPq16  : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
6997def  VZIPq32  : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
6998
6999// Vector Table Lookup and Table Extension.
7000
7001//   VTBL     : Vector Table Lookup
7002let DecoderMethod = "DecodeTBLInstruction" in {
7003def  VTBL1
7004  : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
7005        (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
7006        "vtbl", "8", "$Vd, $Vn, $Vm", "",
7007        [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
7008
7009let hasExtraSrcRegAllocReq = 1 in {
7010def  VTBL2
7011  : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
7012        (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
7013        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
7014def  VTBL3
7015  : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
7016        (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
7017        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
7018def  VTBL4
7019  : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
7020        (ins VecListFourD:$Vn, DPR:$Vm),
7021        NVTBLFrm, IIC_VTB4,
7022        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
7023} // hasExtraSrcRegAllocReq = 1
7024
7025def  VTBL3Pseudo
7026  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
7027def  VTBL4Pseudo
7028  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
7029
7030//   VTBX     : Vector Table Extension
7031def  VTBX1
7032  : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
7033        (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
7034        "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
7035        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
7036                               DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
7037let hasExtraSrcRegAllocReq = 1 in {
7038def  VTBX2
7039  : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
7040        (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
7041        "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
7042def  VTBX3
7043  : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
7044        (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
7045        NVTBLFrm, IIC_VTBX3,
7046        "vtbx", "8", "$Vd, $Vn, $Vm",
7047        "$orig = $Vd", []>;
7048def  VTBX4
7049  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
7050        (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
7051        "vtbx", "8", "$Vd, $Vn, $Vm",
7052        "$orig = $Vd", []>;
7053} // hasExtraSrcRegAllocReq = 1
7054
7055def  VTBX3Pseudo
7056  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
7057                IIC_VTBX3, "$orig = $dst", []>;
7058def  VTBX4Pseudo
7059  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
7060                IIC_VTBX4, "$orig = $dst", []>;
7061} // DecoderMethod = "DecodeTBLInstruction"
7062
7063let Predicates = [HasNEON] in {
7064def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
7065          (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
7066                                            v8i8:$Vn1, dsub_1),
7067                       v8i8:$Vm))>;
7068def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
7069                                    v8i8:$Vm)),
7070          (v8i8 (VTBX2 v8i8:$orig,
7071                       (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
7072                                            v8i8:$Vn1, dsub_1),
7073                       v8i8:$Vm))>;
7074
7075def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1,
7076                                    v8i8:$Vn2, v8i8:$Vm)),
7077          (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7078                                                 v8i8:$Vn1, dsub_1,
7079                                                 v8i8:$Vn2, dsub_2,
7080                                                 (v8i8 (IMPLICIT_DEF)), dsub_3),
7081                             v8i8:$Vm))>;
7082def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
7083                                    v8i8:$Vn2, v8i8:$Vm)),
7084          (v8i8 (VTBX3Pseudo v8i8:$orig,
7085                             (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7086                                                 v8i8:$Vn1, dsub_1,
7087                                                 v8i8:$Vn2, dsub_2,
7088                                                 (v8i8 (IMPLICIT_DEF)), dsub_3),
7089                             v8i8:$Vm))>;
7090
7091def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1,
7092                                    v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
7093          (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7094                                                 v8i8:$Vn1, dsub_1,
7095                                                 v8i8:$Vn2, dsub_2,
7096                                                 v8i8:$Vn3, dsub_3),
7097                             v8i8:$Vm))>;
7098def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
7099                                    v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
7100          (v8i8 (VTBX4Pseudo v8i8:$orig,
7101                             (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7102                                                 v8i8:$Vn1, dsub_1,
7103                                                 v8i8:$Vn2, dsub_2,
7104                                                 v8i8:$Vn3, dsub_3),
7105                             v8i8:$Vm))>;
7106}
7107
7108// VRINT      : Vector Rounding
7109multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
7110  let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
7111    def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
7112                      !strconcat("vrint", op), "f32",
7113                      v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
7114      let Inst{9-7} = op9_7;
7115    }
7116    def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
7117                      !strconcat("vrint", op), "f32",
7118                      v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
7119      let Inst{9-7} = op9_7;
7120    }
7121    def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
7122                      !strconcat("vrint", op), "f16",
7123                      v4f16, v4f16, Int>,
7124             Requires<[HasV8, HasNEON, HasFullFP16]> {
7125      let Inst{9-7} = op9_7;
7126    }
7127    def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
7128                      !strconcat("vrint", op), "f16",
7129                      v8f16, v8f16, Int>,
7130             Requires<[HasV8, HasNEON, HasFullFP16]> {
7131      let Inst{9-7} = op9_7;
7132    }
7133  }
7134
7135  def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
7136                  (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
7137  def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
7138                  (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
7139  let Predicates = [HasNEON, HasFullFP16] in {
7140  def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
7141                  (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
7142  def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
7143                  (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
7144  }
7145}
7146
7147defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
7148defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
7149defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
7150defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
7151defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
7152defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
7153
7154// Cryptography instructions
7155let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
7156    DecoderNamespace = "v8Crypto", hasSideEffects = 0 in {
7157  class AES<string op, bit op7, bit op6, SDPatternOperator Int>
7158    : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
7159                 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
7160      Requires<[HasV8, HasCrypto]>;
7161  class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
7162    : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
7163                 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
7164      Requires<[HasV8, HasCrypto]>;
7165  class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
7166              SDPatternOperator Int>
7167    : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
7168                 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
7169      Requires<[HasV8, HasCrypto]>;
7170  class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
7171              SDPatternOperator Int>
7172    : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
7173                 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
7174      Requires<[HasV8, HasCrypto]>;
7175  class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
7176    : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
7177                !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
7178      Requires<[HasV8, HasCrypto]>;
7179}
7180
7181def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
7182def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
7183def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
7184def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
7185
7186def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>;
7187def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
7188def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
7189def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>;
7190def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>;
7191def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>;
7192def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
7193def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
7194def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
7195def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
7196
7197let Predicates = [HasNEON] in {
7198def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
7199          (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG
7200              (SHA1H (SUBREG_TO_REG (i64 0),
7201                                    (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)),
7202                                    ssub_0)),
7203              ssub_0)), GPR)>;
7204
7205def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7206          (SHA1C v4i32:$hash_abcd,
7207                 (SUBREG_TO_REG (i64 0),
7208                                (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7209                                ssub_0),
7210                 v4i32:$wk)>;
7211
7212def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7213          (SHA1M v4i32:$hash_abcd,
7214                 (SUBREG_TO_REG (i64 0),
7215                                (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7216                                ssub_0),
7217                 v4i32:$wk)>;
7218
7219def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7220          (SHA1P v4i32:$hash_abcd,
7221                 (SUBREG_TO_REG (i64 0),
7222                                (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7223                                ssub_0),
7224                 v4i32:$wk)>;
7225}
7226
7227//===----------------------------------------------------------------------===//
7228// NEON instructions for single-precision FP math
7229//===----------------------------------------------------------------------===//
7230
7231class N2VSPat<SDNode OpNode, NeonI Inst>
7232  : NEONFPPat<(f32 (OpNode SPR:$a)),
7233              (EXTRACT_SUBREG
7234               (v2f32 (COPY_TO_REGCLASS (Inst
7235                (INSERT_SUBREG
7236                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7237                 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
7238
7239class N3VSPat<SDNode OpNode, NeonI Inst>
7240  : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
7241              (EXTRACT_SUBREG
7242               (v2f32 (COPY_TO_REGCLASS (Inst
7243                (INSERT_SUBREG
7244                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7245                 SPR:$a, ssub_0),
7246                (INSERT_SUBREG
7247                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7248                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7249
7250class N3VSPatFP16<SDNode OpNode, NeonI Inst>
7251  : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)),
7252              (EXTRACT_SUBREG
7253               (v4f16 (COPY_TO_REGCLASS (Inst
7254                (INSERT_SUBREG
7255                 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
7256                 HPR:$a, ssub_0),
7257                (INSERT_SUBREG
7258                 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
7259                 HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7260
7261class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
7262  : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
7263              (EXTRACT_SUBREG
7264               (v2f32 (COPY_TO_REGCLASS (Inst
7265                (INSERT_SUBREG
7266                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7267                 SPR:$acc, ssub_0),
7268                (INSERT_SUBREG
7269                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7270                 SPR:$a, ssub_0),
7271                (INSERT_SUBREG
7272                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7273                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7274
7275class NVCVTIFPat<SDNode OpNode, NeonI Inst>
7276  : NEONFPPat<(f32 (OpNode GPR:$a)),
7277              (f32 (EXTRACT_SUBREG
7278                     (v2f32 (Inst
7279                       (INSERT_SUBREG
7280                         (v2f32 (IMPLICIT_DEF)),
7281                         (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
7282                     ssub_0))>;
7283class NVCVTFIPat<SDNode OpNode, NeonI Inst>
7284  : NEONFPPat<(i32 (OpNode SPR:$a)),
7285              (i32 (EXTRACT_SUBREG
7286                     (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
7287                                                 SPR:$a, ssub_0))),
7288                     ssub_0))>;
7289
7290def : N3VSPat<fadd, VADDfd>;
7291def : N3VSPat<fsub, VSUBfd>;
7292def : N3VSPat<fmul, VMULfd>;
7293def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
7294      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
7295def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
7296      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
7297def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
7298      Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
7299def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
7300      Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
7301def : N2VSPat<fabs, VABSfd>;
7302def : N2VSPat<fneg, VNEGfd>;
7303def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>;
7304def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>;
7305def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>;
7306def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>;
7307def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
7308def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
7309def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
7310def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
7311
7312// NEON doesn't have any f64 conversions, so provide patterns to make
7313// sure the VFP conversions match when extracting from a vector.
7314def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
7315             (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7316def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
7317             (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7318def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
7319             (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7320def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
7321             (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7322
7323
7324// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
7325def : Pat<(f32 (bitconvert GPR:$a)),
7326          (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
7327        Requires<[HasNEON, DontUseVMOVSR]>;
7328def : Pat<(arm_vmovsr GPR:$a),
7329          (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
7330        Requires<[HasNEON, DontUseVMOVSR]>;
7331
7332//===----------------------------------------------------------------------===//
7333// Non-Instruction Patterns or Endiness - Revert Patterns
7334//===----------------------------------------------------------------------===//
7335
7336// bit_convert
7337// 64 bit conversions
7338let Predicates = [HasNEON] in {
7339def : Pat<(f64   (bitconvert (v1i64 DPR:$src))), (f64   DPR:$src)>;
7340def : Pat<(v1i64 (bitconvert (f64   DPR:$src))), (v1i64 DPR:$src)>;
7341
7342def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
7343def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
7344
7345def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16  DPR:$src)>;
7346def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16  DPR:$src)>;
7347
7348// 128 bit conversions
7349def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
7350def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
7351
7352def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
7353def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
7354
7355def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16  QPR:$src)>;
7356def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16  QPR:$src)>;
7357}
7358
7359let Predicates = [IsLE,HasNEON] in {
7360  // 64 bit conversions
7361  def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (f64   DPR:$src)>;
7362  def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (f64   DPR:$src)>;
7363  def : Pat<(f64   (bitconvert (v4f16 DPR:$src))), (f64   DPR:$src)>;
7364  def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (f64   DPR:$src)>;
7365  def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (f64   DPR:$src)>;
7366
7367  def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
7368  def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
7369  def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>;
7370  def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
7371  def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (v1i64 DPR:$src)>;
7372
7373  def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (v2f32 DPR:$src)>;
7374  def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
7375  def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>;
7376  def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
7377  def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (v2f32 DPR:$src)>;
7378
7379  def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (v2i32 DPR:$src)>;
7380  def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
7381  def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>;
7382  def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
7383  def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (v2i32 DPR:$src)>;
7384
7385  def : Pat<(v4f16 (bitconvert (f64   DPR:$src))), (v4f16 DPR:$src)>;
7386  def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>;
7387  def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>;
7388  def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>;
7389  def : Pat<(v4f16 (bitconvert (v8i8  DPR:$src))), (v4f16 DPR:$src)>;
7390
7391  def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (v4i16 DPR:$src)>;
7392  def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
7393  def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
7394  def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
7395  def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (v4i16 DPR:$src)>;
7396
7397  def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (v8i8  DPR:$src)>;
7398  def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (v8i8  DPR:$src)>;
7399  def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (v8i8  DPR:$src)>;
7400  def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (v8i8  DPR:$src)>;
7401  def : Pat<(v8i8  (bitconvert (v4f16 DPR:$src))), (v8i8  DPR:$src)>;
7402  def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (v8i8  DPR:$src)>;
7403
7404  // 128 bit conversions
7405  def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
7406  def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
7407  def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
7408  def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
7409  def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
7410
7411  def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
7412  def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
7413  def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>;
7414  def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
7415  def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
7416
7417  def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
7418  def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
7419  def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>;
7420  def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
7421  def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
7422
7423  def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
7424  def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
7425  def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>;
7426  def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
7427  def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
7428
7429  def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
7430  def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>;
7431  def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>;
7432  def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>;
7433  def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>;
7434
7435  def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
7436  def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
7437  def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
7438  def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
7439  def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
7440
7441  def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
7442  def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
7443  def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
7444  def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
7445  def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>;
7446  def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
7447}
7448
7449let Predicates = [IsBE,HasNEON] in {
7450  // 64 bit conversions
7451  def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
7452  def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
7453  def : Pat<(f64   (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
7454  def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
7455  def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (VREV64d8  DPR:$src)>;
7456
7457  def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
7458  def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
7459  def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
7460  def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
7461  def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (VREV64d8  DPR:$src)>;
7462
7463  def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (VREV64d32 DPR:$src)>;
7464  def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
7465  def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
7466  def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
7467  def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (VREV32d8  DPR:$src)>;
7468
7469  def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (VREV64d32 DPR:$src)>;
7470  def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
7471  def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
7472  def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
7473  def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (VREV32d8  DPR:$src)>;
7474
7475  def : Pat<(v4f16 (bitconvert (f64   DPR:$src))), (VREV64d16 DPR:$src)>;
7476  def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
7477  def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
7478  def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
7479  def : Pat<(v4f16 (bitconvert (v8i8  DPR:$src))), (VREV16d8  DPR:$src)>;
7480
7481  def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (VREV64d16 DPR:$src)>;
7482  def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
7483  def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
7484  def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
7485  def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (VREV16d8  DPR:$src)>;
7486
7487  def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (VREV64d8  DPR:$src)>;
7488  def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (VREV64d8  DPR:$src)>;
7489  def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (VREV32d8  DPR:$src)>;
7490  def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (VREV32d8  DPR:$src)>;
7491  def : Pat<(v8i8  (bitconvert (v4f16 DPR:$src))), (VREV16d8  DPR:$src)>;
7492  def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (VREV16d8  DPR:$src)>;
7493
7494  // 128 bit conversions
7495  def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
7496  def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
7497  def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
7498  def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
7499  def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8  QPR:$src)>;
7500
7501  def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
7502  def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
7503  def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
7504  def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
7505  def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8  QPR:$src)>;
7506
7507  def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
7508  def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
7509  def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
7510  def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
7511  def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8  QPR:$src)>;
7512
7513  def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
7514  def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
7515  def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
7516  def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
7517  def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8  QPR:$src)>;
7518
7519  def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
7520  def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
7521  def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
7522  def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
7523  def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8  QPR:$src)>;
7524
7525  def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
7526  def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
7527  def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
7528  def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
7529  def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8  QPR:$src)>;
7530
7531  def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8  QPR:$src)>;
7532  def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8  QPR:$src)>;
7533  def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8  QPR:$src)>;
7534  def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8  QPR:$src)>;
7535  def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8  QPR:$src)>;
7536  def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8  QPR:$src)>;
7537}
7538
7539// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
7540let Predicates = [IsBE,HasNEON] in {
7541def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
7542          (VREV64q8 (VLD1q8 addrmode6:$addr))>;
7543def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
7544          (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>;
7545def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
7546          (VREV64q16 (VLD1q16 addrmode6:$addr))>;
7547def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
7548          (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>;
7549}
7550
7551// Fold extracting an element out of a v2i32 into a vfp register.
7552def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
7553          (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>,
7554      Requires<[HasNEON]>;
7555
7556// Vector lengthening move with load, matching extending loads.
7557
7558// extload, zextload and sextload for a standard lengthening load. Example:
7559// Lengthen_Single<"8", "i16", "8"> =
7560//     Pat<(v8i16 (extloadvi8 addrmode6:$addr))
7561//         (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
7562//                              (f64 (IMPLICIT_DEF)), (i32 0)))>;
7563multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
7564  let AddedComplexity = 10 in {
7565  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7566                    (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
7567                  (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
7568                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
7569             Requires<[HasNEON]>;
7570
7571  def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7572                  (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
7573                (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
7574                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
7575           Requires<[HasNEON]>;
7576
7577  def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7578                  (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
7579                (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
7580                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
7581           Requires<[HasNEON]>;
7582  }
7583}
7584
7585// extload, zextload and sextload for a lengthening load which only uses
7586// half the lanes available. Example:
7587// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
7588//     Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
7589//         (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
7590//                                      (f64 (IMPLICIT_DEF)), (i32 0))),
7591//                         dsub_0)>;
7592multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
7593                               string InsnLanes, string InsnTy> {
7594  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7595                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7596       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7597         (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7598         dsub_0)>,
7599             Requires<[HasNEON]>;
7600  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7601                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7602       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7603         (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7604         dsub_0)>,
7605             Requires<[HasNEON]>;
7606  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7607                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7608       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
7609         (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7610         dsub_0)>,
7611             Requires<[HasNEON]>;
7612}
7613
7614// The following class definition is basically a copy of the
7615// Lengthen_HalfSingle definition above, however with an additional parameter
7616// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
7617// data loaded by VLD1LN into proper vector format in big endian mode.
7618multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7619                               string InsnLanes, string InsnTy, string RevLanes> {
7620  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7621                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7622       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7623         (!cast<Instruction>("VREV32d" # RevLanes)
7624           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7625         dsub_0)>,
7626             Requires<[HasNEON]>;
7627  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7628                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7629       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7630         (!cast<Instruction>("VREV32d" # RevLanes)
7631           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7632         dsub_0)>,
7633             Requires<[HasNEON]>;
7634  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7635                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7636       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
7637         (!cast<Instruction>("VREV32d" # RevLanes)
7638           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7639         dsub_0)>,
7640             Requires<[HasNEON]>;
7641}
7642
7643// extload, zextload and sextload for a lengthening load followed by another
7644// lengthening load, to quadruple the initial length.
7645//
7646// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
7647//     Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
7648//         (EXTRACT_SUBREG (VMOVLuv4i32
7649//           (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
7650//                                                   (f64 (IMPLICIT_DEF)),
7651//                                                   (i32 0))),
7652//                           dsub_0)),
7653//           dsub_0)>;
7654multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
7655                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7656                           string Insn2Ty> {
7657  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7658                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7659         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7660           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7661             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7662             dsub_0))>,
7663             Requires<[HasNEON]>;
7664  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7665                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7666         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7667           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7668             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7669             dsub_0))>,
7670             Requires<[HasNEON]>;
7671  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7672                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7673         (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7674           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7675             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7676             dsub_0))>,
7677             Requires<[HasNEON]>;
7678}
7679
7680// The following class definition is basically a copy of the
7681// Lengthen_Double definition above, however with an additional parameter
7682// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
7683// data loaded by VLD1LN into proper vector format in big endian mode.
7684multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7685                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7686                           string Insn2Ty, string RevLanes> {
7687  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7688                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7689         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7690           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7691            (!cast<Instruction>("VREV32d" # RevLanes)
7692             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7693             dsub_0))>,
7694             Requires<[HasNEON]>;
7695  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7696                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7697         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7698           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7699            (!cast<Instruction>("VREV32d" # RevLanes)
7700             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7701             dsub_0))>,
7702             Requires<[HasNEON]>;
7703  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7704                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7705         (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7706           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7707            (!cast<Instruction>("VREV32d" # RevLanes)
7708             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7709             dsub_0))>,
7710             Requires<[HasNEON]>;
7711}
7712
7713// extload, zextload and sextload for a lengthening load followed by another
7714// lengthening load, to quadruple the initial length, but which ends up only
7715// requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
7716//
7717// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
7718// Pat<(v2i32 (extloadvi8 addrmode6:$addr))
7719//     (EXTRACT_SUBREG (VMOVLuv4i32
7720//       (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
7721//                                               (f64 (IMPLICIT_DEF)), (i32 0))),
7722//                       dsub_0)),
7723//       dsub_0)>;
7724multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
7725                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7726                           string Insn2Ty> {
7727  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7728                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
7729         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7730           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7731             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7732             dsub_0)),
7733          dsub_0)>,
7734             Requires<[HasNEON]>;
7735  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7736                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
7737         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7738           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7739             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7740             dsub_0)),
7741          dsub_0)>,
7742              Requires<[HasNEON]>;
7743  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7744                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
7745         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7746           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7747             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7748             dsub_0)),
7749          dsub_0)>,
7750             Requires<[HasNEON]>;
7751}
7752
7753// The following class definition is basically a copy of the
7754// Lengthen_HalfDouble definition above, however with an additional VREV16d8
7755// instruction to convert data loaded by VLD1LN into proper vector format
7756// in big endian mode.
7757multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7758                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7759                           string Insn2Ty> {
7760  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7761                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
7762         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7763           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7764            (!cast<Instruction>("VREV16d8")
7765             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7766             dsub_0)),
7767          dsub_0)>,
7768             Requires<[HasNEON]>;
7769  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7770                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
7771         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7772           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7773            (!cast<Instruction>("VREV16d8")
7774             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7775             dsub_0)),
7776          dsub_0)>,
7777             Requires<[HasNEON]>;
7778  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7779                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
7780         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7781           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7782            (!cast<Instruction>("VREV16d8")
7783             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7784             dsub_0)),
7785          dsub_0)>,
7786             Requires<[HasNEON]>;
7787}
7788
7789defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
7790defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
7791defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
7792
7793let Predicates = [HasNEON,IsLE] in {
7794  defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
7795  defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
7796
7797  // Double lengthening - v4i8 -> v4i16 -> v4i32
7798  defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
7799  // v2i8 -> v2i16 -> v2i32
7800  defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
7801  // v2i16 -> v2i32 -> v2i64
7802  defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
7803}
7804
7805let Predicates = [HasNEON,IsBE] in {
7806  defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
7807  defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
7808
7809  // Double lengthening - v4i8 -> v4i16 -> v4i32
7810  defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
7811  // v2i8 -> v2i16 -> v2i32
7812  defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
7813  // v2i16 -> v2i32 -> v2i64
7814  defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
7815}
7816
7817// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
7818let Predicates = [HasNEON,IsLE] in {
7819  def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
7820        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7821           (VLD1LNd16 addrmode6:$addr,
7822                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7823  def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
7824        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7825           (VLD1LNd16 addrmode6:$addr,
7826                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7827  def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
7828        (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
7829           (VLD1LNd16 addrmode6:$addr,
7830                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7831}
7832// The following patterns are basically a copy of the patterns above,
7833// however with an additional VREV16d instruction to convert data
7834// loaded by VLD1LN into proper vector format in big endian mode.
7835let Predicates = [HasNEON,IsBE] in {
7836  def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
7837        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7838           (!cast<Instruction>("VREV16d8")
7839             (VLD1LNd16 addrmode6:$addr,
7840                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7841  def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
7842        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7843           (!cast<Instruction>("VREV16d8")
7844             (VLD1LNd16 addrmode6:$addr,
7845                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7846  def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
7847        (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
7848           (!cast<Instruction>("VREV16d8")
7849             (VLD1LNd16 addrmode6:$addr,
7850                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7851}
7852
7853let Predicates = [HasNEON] in {
7854def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)),
7855          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7856def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)),
7857          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7858def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)),
7859          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7860def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
7861          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7862def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
7863          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7864def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)),
7865          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7866}
7867
7868//===----------------------------------------------------------------------===//
7869// Assembler aliases
7870//
7871
7872def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
7873                    (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
7874def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
7875                    (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
7876
7877// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
7878defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
7879                         (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7880defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
7881                         (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7882defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
7883                         (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7884defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
7885                         (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7886defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
7887                         (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7888defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
7889                         (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7890defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
7891                         (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7892defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
7893                         (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7894// ... two-operand aliases
7895defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
7896                         (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7897defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
7898                         (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7899defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
7900                         (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7901defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
7902                         (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7903defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
7904                         (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7905defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
7906                         (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7907// ... immediates
7908def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
7909                    (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
7910def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
7911                    (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
7912def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
7913                    (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
7914def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
7915                    (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
7916
7917
7918// VLD1 single-lane pseudo-instructions. These need special handling for
7919// the lane index that an InstAlias can't handle, so we use these instead.
7920def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
7921                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7922                      pred:$p)>;
7923def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
7924                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7925                      pred:$p)>;
7926def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
7927                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7928                      pred:$p)>;
7929
7930def VLD1LNdWB_fixed_Asm_8 :
7931        NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
7932                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7933                      pred:$p)>;
7934def VLD1LNdWB_fixed_Asm_16 :
7935        NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
7936                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7937                      pred:$p)>;
7938def VLD1LNdWB_fixed_Asm_32 :
7939        NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
7940                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7941                      pred:$p)>;
7942def VLD1LNdWB_register_Asm_8 :
7943        NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
7944                  (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7945                       rGPR:$Rm, pred:$p)>;
7946def VLD1LNdWB_register_Asm_16 :
7947        NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
7948                  (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7949                       rGPR:$Rm, pred:$p)>;
7950def VLD1LNdWB_register_Asm_32 :
7951        NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
7952                  (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7953                       rGPR:$Rm, pred:$p)>;
7954
7955
7956// VST1 single-lane pseudo-instructions. These need special handling for
7957// the lane index that an InstAlias can't handle, so we use these instead.
7958def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
7959                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7960                      pred:$p)>;
7961def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
7962                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7963                      pred:$p)>;
7964def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
7965                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7966                      pred:$p)>;
7967
7968def VST1LNdWB_fixed_Asm_8 :
7969        NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
7970                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7971                      pred:$p)>;
7972def VST1LNdWB_fixed_Asm_16 :
7973        NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
7974                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7975                      pred:$p)>;
7976def VST1LNdWB_fixed_Asm_32 :
7977        NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
7978                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7979                      pred:$p)>;
7980def VST1LNdWB_register_Asm_8 :
7981        NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
7982                  (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7983                       rGPR:$Rm, pred:$p)>;
7984def VST1LNdWB_register_Asm_16 :
7985        NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
7986                  (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7987                       rGPR:$Rm, pred:$p)>;
7988def VST1LNdWB_register_Asm_32 :
7989        NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
7990                  (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7991                       rGPR:$Rm, pred:$p)>;
7992
7993// VLD2 single-lane pseudo-instructions. These need special handling for
7994// the lane index that an InstAlias can't handle, so we use these instead.
7995def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
7996                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7997                  pred:$p)>;
7998def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
7999                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8000                      pred:$p)>;
8001def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
8002                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>;
8003def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
8004                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8005                      pred:$p)>;
8006def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
8007                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8008                      pred:$p)>;
8009
8010def VLD2LNdWB_fixed_Asm_8 :
8011        NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
8012                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8013                      pred:$p)>;
8014def VLD2LNdWB_fixed_Asm_16 :
8015        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
8016                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8017                      pred:$p)>;
8018def VLD2LNdWB_fixed_Asm_32 :
8019        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
8020                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8021                      pred:$p)>;
8022def VLD2LNqWB_fixed_Asm_16 :
8023        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
8024                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8025                      pred:$p)>;
8026def VLD2LNqWB_fixed_Asm_32 :
8027        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
8028                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8029                      pred:$p)>;
8030def VLD2LNdWB_register_Asm_8 :
8031        NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
8032                  (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8033                       rGPR:$Rm, pred:$p)>;
8034def VLD2LNdWB_register_Asm_16 :
8035        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
8036                  (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8037                       rGPR:$Rm, pred:$p)>;
8038def VLD2LNdWB_register_Asm_32 :
8039        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
8040                  (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8041                       rGPR:$Rm, pred:$p)>;
8042def VLD2LNqWB_register_Asm_16 :
8043        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
8044                  (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8045                       rGPR:$Rm, pred:$p)>;
8046def VLD2LNqWB_register_Asm_32 :
8047        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
8048                  (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8049                       rGPR:$Rm, pred:$p)>;
8050
8051
8052// VST2 single-lane pseudo-instructions. These need special handling for
8053// the lane index that an InstAlias can't handle, so we use these instead.
8054def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
8055                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8056                      pred:$p)>;
8057def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
8058                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8059                      pred:$p)>;
8060def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
8061                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8062                      pred:$p)>;
8063def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
8064                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8065                      pred:$p)>;
8066def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
8067                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8068                      pred:$p)>;
8069
8070def VST2LNdWB_fixed_Asm_8 :
8071        NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
8072                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8073                      pred:$p)>;
8074def VST2LNdWB_fixed_Asm_16 :
8075        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
8076                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8077                      pred:$p)>;
8078def VST2LNdWB_fixed_Asm_32 :
8079        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
8080                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8081                      pred:$p)>;
8082def VST2LNqWB_fixed_Asm_16 :
8083        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
8084                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8085                      pred:$p)>;
8086def VST2LNqWB_fixed_Asm_32 :
8087        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
8088                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8089                      pred:$p)>;
8090def VST2LNdWB_register_Asm_8 :
8091        NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
8092                  (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8093                       rGPR:$Rm, pred:$p)>;
8094def VST2LNdWB_register_Asm_16 :
8095        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
8096                  (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8097                       rGPR:$Rm, pred:$p)>;
8098def VST2LNdWB_register_Asm_32 :
8099        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
8100                  (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8101                       rGPR:$Rm, pred:$p)>;
8102def VST2LNqWB_register_Asm_16 :
8103        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
8104                  (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8105                       rGPR:$Rm, pred:$p)>;
8106def VST2LNqWB_register_Asm_32 :
8107        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
8108                  (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8109                       rGPR:$Rm, pred:$p)>;
8110
8111// VLD3 all-lanes pseudo-instructions. These need special handling for
8112// the lane index that an InstAlias can't handle, so we use these instead.
8113def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8114               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8115                    pred:$p)>;
8116def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8117               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8118                    pred:$p)>;
8119def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8120               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8121                    pred:$p)>;
8122def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8123               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8124                    pred:$p)>;
8125def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8126               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8127                    pred:$p)>;
8128def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8129               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8130                    pred:$p)>;
8131
8132def VLD3DUPdWB_fixed_Asm_8 :
8133        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8134               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8135                    pred:$p)>;
8136def VLD3DUPdWB_fixed_Asm_16 :
8137        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8138               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8139                    pred:$p)>;
8140def VLD3DUPdWB_fixed_Asm_32 :
8141        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8142               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8143                    pred:$p)>;
8144def VLD3DUPqWB_fixed_Asm_8 :
8145        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8146               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8147                    pred:$p)>;
8148def VLD3DUPqWB_fixed_Asm_16 :
8149        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8150               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8151                    pred:$p)>;
8152def VLD3DUPqWB_fixed_Asm_32 :
8153        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8154               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8155                    pred:$p)>;
8156def VLD3DUPdWB_register_Asm_8 :
8157        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8158                  (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8159                       rGPR:$Rm, pred:$p)>;
8160def VLD3DUPdWB_register_Asm_16 :
8161        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8162                  (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8163                       rGPR:$Rm, pred:$p)>;
8164def VLD3DUPdWB_register_Asm_32 :
8165        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8166                  (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8167                       rGPR:$Rm, pred:$p)>;
8168def VLD3DUPqWB_register_Asm_8 :
8169        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8170                  (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8171                       rGPR:$Rm, pred:$p)>;
8172def VLD3DUPqWB_register_Asm_16 :
8173        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8174                  (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8175                       rGPR:$Rm, pred:$p)>;
8176def VLD3DUPqWB_register_Asm_32 :
8177        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8178                  (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8179                       rGPR:$Rm, pred:$p)>;
8180
8181
8182// VLD3 single-lane pseudo-instructions. These need special handling for
8183// the lane index that an InstAlias can't handle, so we use these instead.
8184def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8185               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8186                    pred:$p)>;
8187def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8188               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8189                    pred:$p)>;
8190def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8191               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8192                    pred:$p)>;
8193def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8194               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8195                    pred:$p)>;
8196def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8197               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8198                    pred:$p)>;
8199
8200def VLD3LNdWB_fixed_Asm_8 :
8201        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8202               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8203                    pred:$p)>;
8204def VLD3LNdWB_fixed_Asm_16 :
8205        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8206               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8207                    pred:$p)>;
8208def VLD3LNdWB_fixed_Asm_32 :
8209        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8210               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8211                    pred:$p)>;
8212def VLD3LNqWB_fixed_Asm_16 :
8213        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8214               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8215                    pred:$p)>;
8216def VLD3LNqWB_fixed_Asm_32 :
8217        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8218               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8219                    pred:$p)>;
8220def VLD3LNdWB_register_Asm_8 :
8221        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8222                  (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8223                       rGPR:$Rm, pred:$p)>;
8224def VLD3LNdWB_register_Asm_16 :
8225        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8226                  (ins VecListThreeDHWordIndexed:$list,
8227                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8228def VLD3LNdWB_register_Asm_32 :
8229        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8230                  (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8231                       rGPR:$Rm, pred:$p)>;
8232def VLD3LNqWB_register_Asm_16 :
8233        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8234                  (ins VecListThreeQHWordIndexed:$list,
8235                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8236def VLD3LNqWB_register_Asm_32 :
8237        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8238                  (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8239                       rGPR:$Rm, pred:$p)>;
8240
8241// VLD3 multiple structure pseudo-instructions. These need special handling for
8242// the vector operands that the normal instructions don't yet model.
8243// FIXME: Remove these when the register classes and instructions are updated.
8244def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8245               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8246def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8247               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8248def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8249               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8250def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8251               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8252def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8253               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8254def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8255               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8256
8257def VLD3dWB_fixed_Asm_8 :
8258        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8259               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8260def VLD3dWB_fixed_Asm_16 :
8261        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8262               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8263def VLD3dWB_fixed_Asm_32 :
8264        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8265               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8266def VLD3qWB_fixed_Asm_8 :
8267        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8268               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8269def VLD3qWB_fixed_Asm_16 :
8270        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8271               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8272def VLD3qWB_fixed_Asm_32 :
8273        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8274               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8275def VLD3dWB_register_Asm_8 :
8276        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8277                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8278                       rGPR:$Rm, pred:$p)>;
8279def VLD3dWB_register_Asm_16 :
8280        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8281                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8282                       rGPR:$Rm, pred:$p)>;
8283def VLD3dWB_register_Asm_32 :
8284        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8285                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8286                       rGPR:$Rm, pred:$p)>;
8287def VLD3qWB_register_Asm_8 :
8288        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8289                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8290                       rGPR:$Rm, pred:$p)>;
8291def VLD3qWB_register_Asm_16 :
8292        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8293                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8294                       rGPR:$Rm, pred:$p)>;
8295def VLD3qWB_register_Asm_32 :
8296        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8297                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8298                       rGPR:$Rm, pred:$p)>;
8299
8300// VST3 single-lane pseudo-instructions. These need special handling for
8301// the lane index that an InstAlias can't handle, so we use these instead.
8302def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8303               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8304                    pred:$p)>;
8305def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8306               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8307                    pred:$p)>;
8308def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8309               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8310                    pred:$p)>;
8311def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8312               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8313                    pred:$p)>;
8314def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8315               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8316                    pred:$p)>;
8317
8318def VST3LNdWB_fixed_Asm_8 :
8319        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8320               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8321                    pred:$p)>;
8322def VST3LNdWB_fixed_Asm_16 :
8323        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8324               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8325                    pred:$p)>;
8326def VST3LNdWB_fixed_Asm_32 :
8327        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8328               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8329                    pred:$p)>;
8330def VST3LNqWB_fixed_Asm_16 :
8331        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8332               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8333                    pred:$p)>;
8334def VST3LNqWB_fixed_Asm_32 :
8335        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8336               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8337                    pred:$p)>;
8338def VST3LNdWB_register_Asm_8 :
8339        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8340                  (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8341                       rGPR:$Rm, pred:$p)>;
8342def VST3LNdWB_register_Asm_16 :
8343        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8344                  (ins VecListThreeDHWordIndexed:$list,
8345                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8346def VST3LNdWB_register_Asm_32 :
8347        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8348                  (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8349                       rGPR:$Rm, pred:$p)>;
8350def VST3LNqWB_register_Asm_16 :
8351        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8352                  (ins VecListThreeQHWordIndexed:$list,
8353                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8354def VST3LNqWB_register_Asm_32 :
8355        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8356                  (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8357                       rGPR:$Rm, pred:$p)>;
8358
8359
8360// VST3 multiple structure pseudo-instructions. These need special handling for
8361// the vector operands that the normal instructions don't yet model.
8362// FIXME: Remove these when the register classes and instructions are updated.
8363def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8364               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8365def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8366               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8367def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8368               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8369def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8370               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8371def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8372               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8373def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8374               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8375
8376def VST3dWB_fixed_Asm_8 :
8377        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8378               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8379def VST3dWB_fixed_Asm_16 :
8380        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8381               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8382def VST3dWB_fixed_Asm_32 :
8383        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8384               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8385def VST3qWB_fixed_Asm_8 :
8386        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8387               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8388def VST3qWB_fixed_Asm_16 :
8389        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8390               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8391def VST3qWB_fixed_Asm_32 :
8392        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8393               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8394def VST3dWB_register_Asm_8 :
8395        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8396                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8397                       rGPR:$Rm, pred:$p)>;
8398def VST3dWB_register_Asm_16 :
8399        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8400                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8401                       rGPR:$Rm, pred:$p)>;
8402def VST3dWB_register_Asm_32 :
8403        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8404                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8405                       rGPR:$Rm, pred:$p)>;
8406def VST3qWB_register_Asm_8 :
8407        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8408                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8409                       rGPR:$Rm, pred:$p)>;
8410def VST3qWB_register_Asm_16 :
8411        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8412                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8413                       rGPR:$Rm, pred:$p)>;
8414def VST3qWB_register_Asm_32 :
8415        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8416                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8417                       rGPR:$Rm, pred:$p)>;
8418
8419// VLD4 all-lanes pseudo-instructions. These need special handling for
8420// the lane index that an InstAlias can't handle, so we use these instead.
8421def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8422               (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8423                    pred:$p)>;
8424def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8425               (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8426                    pred:$p)>;
8427def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8428               (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
8429                    pred:$p)>;
8430def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8431               (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8432                    pred:$p)>;
8433def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8434               (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8435                    pred:$p)>;
8436def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8437               (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
8438                    pred:$p)>;
8439
8440def VLD4DUPdWB_fixed_Asm_8 :
8441        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8442               (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8443                    pred:$p)>;
8444def VLD4DUPdWB_fixed_Asm_16 :
8445        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8446               (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8447                    pred:$p)>;
8448def VLD4DUPdWB_fixed_Asm_32 :
8449        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8450               (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
8451                    pred:$p)>;
8452def VLD4DUPqWB_fixed_Asm_8 :
8453        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8454               (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8455                    pred:$p)>;
8456def VLD4DUPqWB_fixed_Asm_16 :
8457        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8458               (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8459                    pred:$p)>;
8460def VLD4DUPqWB_fixed_Asm_32 :
8461        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8462               (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
8463                    pred:$p)>;
8464def VLD4DUPdWB_register_Asm_8 :
8465        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8466                  (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8467                       rGPR:$Rm, pred:$p)>;
8468def VLD4DUPdWB_register_Asm_16 :
8469        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8470                  (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8471                       rGPR:$Rm, pred:$p)>;
8472def VLD4DUPdWB_register_Asm_32 :
8473        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8474                  (ins VecListFourDAllLanes:$list,
8475                       addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
8476def VLD4DUPqWB_register_Asm_8 :
8477        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8478                  (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8479                       rGPR:$Rm, pred:$p)>;
8480def VLD4DUPqWB_register_Asm_16 :
8481        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8482                  (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8483                       rGPR:$Rm, pred:$p)>;
8484def VLD4DUPqWB_register_Asm_32 :
8485        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8486                  (ins VecListFourQAllLanes:$list,
8487                       addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
8488
8489
8490// VLD4 single-lane pseudo-instructions. These need special handling for
8491// the lane index that an InstAlias can't handle, so we use these instead.
8492def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8493               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8494                    pred:$p)>;
8495def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8496               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8497                    pred:$p)>;
8498def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8499               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8500                    pred:$p)>;
8501def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8502               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8503                    pred:$p)>;
8504def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8505               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8506                    pred:$p)>;
8507
8508def VLD4LNdWB_fixed_Asm_8 :
8509        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8510               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8511                    pred:$p)>;
8512def VLD4LNdWB_fixed_Asm_16 :
8513        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8514               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8515                    pred:$p)>;
8516def VLD4LNdWB_fixed_Asm_32 :
8517        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8518               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8519                    pred:$p)>;
8520def VLD4LNqWB_fixed_Asm_16 :
8521        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8522               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8523                    pred:$p)>;
8524def VLD4LNqWB_fixed_Asm_32 :
8525        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8526               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8527                    pred:$p)>;
8528def VLD4LNdWB_register_Asm_8 :
8529        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8530                  (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8531                       rGPR:$Rm, pred:$p)>;
8532def VLD4LNdWB_register_Asm_16 :
8533        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8534                  (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8535                       rGPR:$Rm, pred:$p)>;
8536def VLD4LNdWB_register_Asm_32 :
8537        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8538                  (ins VecListFourDWordIndexed:$list,
8539                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8540def VLD4LNqWB_register_Asm_16 :
8541        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8542                  (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8543                       rGPR:$Rm, pred:$p)>;
8544def VLD4LNqWB_register_Asm_32 :
8545        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8546                  (ins VecListFourQWordIndexed:$list,
8547                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8548
8549
8550
8551// VLD4 multiple structure pseudo-instructions. These need special handling for
8552// the vector operands that the normal instructions don't yet model.
8553// FIXME: Remove these when the register classes and instructions are updated.
8554def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8555               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8556                pred:$p)>;
8557def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8558               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8559                pred:$p)>;
8560def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8561               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8562                pred:$p)>;
8563def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8564               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8565                pred:$p)>;
8566def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8567               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8568                pred:$p)>;
8569def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8570               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8571                pred:$p)>;
8572
8573def VLD4dWB_fixed_Asm_8 :
8574        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8575               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8576                pred:$p)>;
8577def VLD4dWB_fixed_Asm_16 :
8578        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8579               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8580                pred:$p)>;
8581def VLD4dWB_fixed_Asm_32 :
8582        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8583               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8584                pred:$p)>;
8585def VLD4qWB_fixed_Asm_8 :
8586        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8587               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8588                pred:$p)>;
8589def VLD4qWB_fixed_Asm_16 :
8590        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8591               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8592                pred:$p)>;
8593def VLD4qWB_fixed_Asm_32 :
8594        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8595               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8596                pred:$p)>;
8597def VLD4dWB_register_Asm_8 :
8598        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8599                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8600                       rGPR:$Rm, pred:$p)>;
8601def VLD4dWB_register_Asm_16 :
8602        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8603                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8604                       rGPR:$Rm, pred:$p)>;
8605def VLD4dWB_register_Asm_32 :
8606        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8607                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8608                       rGPR:$Rm, pred:$p)>;
8609def VLD4qWB_register_Asm_8 :
8610        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8611                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8612                       rGPR:$Rm, pred:$p)>;
8613def VLD4qWB_register_Asm_16 :
8614        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8615                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8616                       rGPR:$Rm, pred:$p)>;
8617def VLD4qWB_register_Asm_32 :
8618        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8619                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8620                       rGPR:$Rm, pred:$p)>;
8621
8622// VST4 single-lane pseudo-instructions. These need special handling for
8623// the lane index that an InstAlias can't handle, so we use these instead.
8624def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8625               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8626                    pred:$p)>;
8627def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8628               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8629                    pred:$p)>;
8630def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8631               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8632                    pred:$p)>;
8633def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8634               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8635                    pred:$p)>;
8636def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8637               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8638                    pred:$p)>;
8639
8640def VST4LNdWB_fixed_Asm_8 :
8641        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8642               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8643                    pred:$p)>;
8644def VST4LNdWB_fixed_Asm_16 :
8645        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8646               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8647                    pred:$p)>;
8648def VST4LNdWB_fixed_Asm_32 :
8649        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8650               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8651                    pred:$p)>;
8652def VST4LNqWB_fixed_Asm_16 :
8653        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8654               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8655                    pred:$p)>;
8656def VST4LNqWB_fixed_Asm_32 :
8657        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8658               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8659                    pred:$p)>;
8660def VST4LNdWB_register_Asm_8 :
8661        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8662                  (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8663                       rGPR:$Rm, pred:$p)>;
8664def VST4LNdWB_register_Asm_16 :
8665        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8666                  (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8667                       rGPR:$Rm, pred:$p)>;
8668def VST4LNdWB_register_Asm_32 :
8669        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8670                  (ins VecListFourDWordIndexed:$list,
8671                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8672def VST4LNqWB_register_Asm_16 :
8673        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8674                  (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8675                       rGPR:$Rm, pred:$p)>;
8676def VST4LNqWB_register_Asm_32 :
8677        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8678                  (ins VecListFourQWordIndexed:$list,
8679                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8680
8681
8682// VST4 multiple structure pseudo-instructions. These need special handling for
8683// the vector operands that the normal instructions don't yet model.
8684// FIXME: Remove these when the register classes and instructions are updated.
8685def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8686               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8687                    pred:$p)>;
8688def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8689               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8690                    pred:$p)>;
8691def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8692               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8693                    pred:$p)>;
8694def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8695               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8696                    pred:$p)>;
8697def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8698               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8699                    pred:$p)>;
8700def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8701               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8702                    pred:$p)>;
8703
8704def VST4dWB_fixed_Asm_8 :
8705        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8706               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8707                    pred:$p)>;
8708def VST4dWB_fixed_Asm_16 :
8709        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8710               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8711                    pred:$p)>;
8712def VST4dWB_fixed_Asm_32 :
8713        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8714               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8715                    pred:$p)>;
8716def VST4qWB_fixed_Asm_8 :
8717        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8718               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8719                    pred:$p)>;
8720def VST4qWB_fixed_Asm_16 :
8721        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8722               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8723                    pred:$p)>;
8724def VST4qWB_fixed_Asm_32 :
8725        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8726               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8727                    pred:$p)>;
8728def VST4dWB_register_Asm_8 :
8729        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8730                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8731                       rGPR:$Rm, pred:$p)>;
8732def VST4dWB_register_Asm_16 :
8733        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8734                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8735                       rGPR:$Rm, pred:$p)>;
8736def VST4dWB_register_Asm_32 :
8737        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8738                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8739                       rGPR:$Rm, pred:$p)>;
8740def VST4qWB_register_Asm_8 :
8741        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8742                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8743                       rGPR:$Rm, pred:$p)>;
8744def VST4qWB_register_Asm_16 :
8745        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8746                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8747                       rGPR:$Rm, pred:$p)>;
8748def VST4qWB_register_Asm_32 :
8749        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8750                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8751                       rGPR:$Rm, pred:$p)>;
8752
8753// VMOV/VMVN takes an optional datatype suffix
8754defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
8755                         (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
8756defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
8757                         (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
8758
8759defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
8760                         (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>;
8761defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
8762                         (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>;
8763
8764// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
8765// D-register versions.
8766def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
8767                    (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8768def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
8769                    (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8770def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
8771                    (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8772def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
8773                    (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8774def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
8775                    (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8776def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
8777                    (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8778def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
8779                    (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8780let Predicates = [HasNEON, HasFullFP16] in
8781def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
8782                    (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8783// Q-register versions.
8784def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
8785                    (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8786def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
8787                    (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8788def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
8789                    (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8790def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
8791                    (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8792def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
8793                    (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8794def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
8795                    (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8796def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
8797                    (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8798let Predicates = [HasNEON, HasFullFP16] in
8799def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
8800                    (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8801
8802// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
8803// D-register versions.
8804def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
8805                    (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8806def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
8807                    (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8808def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
8809                    (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8810def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
8811                    (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8812def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
8813                    (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8814def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
8815                    (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8816def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
8817                    (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8818let Predicates = [HasNEON, HasFullFP16] in
8819def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
8820                    (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8821// Q-register versions.
8822def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
8823                    (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8824def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
8825                    (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8826def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
8827                    (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8828def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
8829                    (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8830def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
8831                    (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8832def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
8833                    (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8834def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
8835                    (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8836let Predicates = [HasNEON, HasFullFP16] in
8837def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
8838                    (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8839
8840// VSWP allows, but does not require, a type suffix.
8841defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
8842                         (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
8843defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
8844                         (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
8845
8846// VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
8847defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
8848                         (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8849defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
8850                         (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8851defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
8852                         (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8853defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
8854                         (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8855defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
8856                         (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8857defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
8858                         (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8859
8860// "vmov Rd, #-imm" can be handled via "vmvn".
8861def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
8862                    (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8863def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
8864                    (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8865def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
8866                    (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8867def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
8868                    (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8869
8870// 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
8871// these should restrict to just the Q register variants, but the register
8872// classes are enough to match correctly regardless, so we keep it simple
8873// and just use MnemonicAlias.
8874def : NEONMnemonicAlias<"vbicq", "vbic">;
8875def : NEONMnemonicAlias<"vandq", "vand">;
8876def : NEONMnemonicAlias<"veorq", "veor">;
8877def : NEONMnemonicAlias<"vorrq", "vorr">;
8878
8879def : NEONMnemonicAlias<"vmovq", "vmov">;
8880def : NEONMnemonicAlias<"vmvnq", "vmvn">;
8881// Explicit versions for floating point so that the FPImm variants get
8882// handled early. The parser gets confused otherwise.
8883def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
8884def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
8885
8886def : NEONMnemonicAlias<"vaddq", "vadd">;
8887def : NEONMnemonicAlias<"vsubq", "vsub">;
8888
8889def : NEONMnemonicAlias<"vminq", "vmin">;
8890def : NEONMnemonicAlias<"vmaxq", "vmax">;
8891
8892def : NEONMnemonicAlias<"vmulq", "vmul">;
8893
8894def : NEONMnemonicAlias<"vabsq", "vabs">;
8895
8896def : NEONMnemonicAlias<"vshlq", "vshl">;
8897def : NEONMnemonicAlias<"vshrq", "vshr">;
8898
8899def : NEONMnemonicAlias<"vcvtq", "vcvt">;
8900
8901def : NEONMnemonicAlias<"vcleq", "vcle">;
8902def : NEONMnemonicAlias<"vceqq", "vceq">;
8903
8904def : NEONMnemonicAlias<"vzipq", "vzip">;
8905def : NEONMnemonicAlias<"vswpq", "vswp">;
8906
8907def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
8908def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
8909
8910
8911// Alias for loading floating point immediates that aren't representable
8912// using the vmov.f32 encoding but the bitpattern is representable using
8913// the .i32 encoding.
8914def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
8915                     (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
8916def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
8917                     (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
8918