xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td (revision 6966ac055c3b7a39266fb982493330df7a097997)
1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the ARM NEON instruction set.
10//
11//===----------------------------------------------------------------------===//
12
13
14//===----------------------------------------------------------------------===//
15// NEON-specific Operands.
16//===----------------------------------------------------------------------===//
17def nModImm : Operand<i32> {
18  let PrintMethod = "printNEONModImmOperand";
19}
20
21def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
22def nImmSplatI8 : Operand<i32> {
23  let PrintMethod = "printNEONModImmOperand";
24  let ParserMatchClass = nImmSplatI8AsmOperand;
25}
26def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
27def nImmSplatI16 : Operand<i32> {
28  let PrintMethod = "printNEONModImmOperand";
29  let ParserMatchClass = nImmSplatI16AsmOperand;
30}
31def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
32def nImmSplatI32 : Operand<i32> {
33  let PrintMethod = "printNEONModImmOperand";
34  let ParserMatchClass = nImmSplatI32AsmOperand;
35}
36def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
37def nImmSplatNotI16 : Operand<i32> {
38  let ParserMatchClass = nImmSplatNotI16AsmOperand;
39}
40def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; }
41def nImmSplatNotI32 : Operand<i32> {
42  let ParserMatchClass = nImmSplatNotI32AsmOperand;
43}
44def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
45def nImmVMOVI32 : Operand<i32> {
46  let PrintMethod = "printNEONModImmOperand";
47  let ParserMatchClass = nImmVMOVI32AsmOperand;
48}
49
50class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To>
51  : AsmOperandClass {
52  let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate";
53  let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">";
54  let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands";
55}
56
57class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To>
58  : AsmOperandClass {
59  let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate";
60  let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">";
61  let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands";
62}
63
64class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> {
65  let PrintMethod = "printNEONModImmOperand";
66  let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>;
67}
68
69class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> {
70  let PrintMethod = "printNEONModImmOperand";
71  let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>;
72}
73
74def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
75def nImmVMOVI32Neg : Operand<i32> {
76  let PrintMethod = "printNEONModImmOperand";
77  let ParserMatchClass = nImmVMOVI32NegAsmOperand;
78}
79def nImmVMOVF32 : Operand<i32> {
80  let PrintMethod = "printFPImmOperand";
81  let ParserMatchClass = FPImmOperand;
82}
83def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
84def nImmSplatI64 : Operand<i32> {
85  let PrintMethod = "printNEONModImmOperand";
86  let ParserMatchClass = nImmSplatI64AsmOperand;
87}
88
89def VectorIndex8Operand  : AsmOperandClass { let Name = "VectorIndex8"; }
90def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
91def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
92def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; }
93def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
94  return ((uint64_t)Imm) < 8;
95}]> {
96  let ParserMatchClass = VectorIndex8Operand;
97  let PrintMethod = "printVectorIndex";
98  let MIOperandInfo = (ops i32imm);
99}
100def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
101  return ((uint64_t)Imm) < 4;
102}]> {
103  let ParserMatchClass = VectorIndex16Operand;
104  let PrintMethod = "printVectorIndex";
105  let MIOperandInfo = (ops i32imm);
106}
107def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
108  return ((uint64_t)Imm) < 2;
109}]> {
110  let ParserMatchClass = VectorIndex32Operand;
111  let PrintMethod = "printVectorIndex";
112  let MIOperandInfo = (ops i32imm);
113}
114def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{
115  return ((uint64_t)Imm) < 1;
116}]> {
117  let ParserMatchClass = VectorIndex64Operand;
118  let PrintMethod = "printVectorIndex";
119  let MIOperandInfo = (ops i32imm);
120}
121
122// Register list of one D register.
123def VecListOneDAsmOperand : AsmOperandClass {
124  let Name = "VecListOneD";
125  let ParserMethod = "parseVectorList";
126  let RenderMethod = "addVecListOperands";
127}
128def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
129  let ParserMatchClass = VecListOneDAsmOperand;
130}
131// Register list of two sequential D registers.
132def VecListDPairAsmOperand : AsmOperandClass {
133  let Name = "VecListDPair";
134  let ParserMethod = "parseVectorList";
135  let RenderMethod = "addVecListOperands";
136}
137def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
138  let ParserMatchClass = VecListDPairAsmOperand;
139}
140// Register list of three sequential D registers.
141def VecListThreeDAsmOperand : AsmOperandClass {
142  let Name = "VecListThreeD";
143  let ParserMethod = "parseVectorList";
144  let RenderMethod = "addVecListOperands";
145}
146def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
147  let ParserMatchClass = VecListThreeDAsmOperand;
148}
149// Register list of four sequential D registers.
150def VecListFourDAsmOperand : AsmOperandClass {
151  let Name = "VecListFourD";
152  let ParserMethod = "parseVectorList";
153  let RenderMethod = "addVecListOperands";
154}
155def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
156  let ParserMatchClass = VecListFourDAsmOperand;
157}
158// Register list of two D registers spaced by 2 (two sequential Q registers).
159def VecListDPairSpacedAsmOperand : AsmOperandClass {
160  let Name = "VecListDPairSpaced";
161  let ParserMethod = "parseVectorList";
162  let RenderMethod = "addVecListOperands";
163}
164def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
165  let ParserMatchClass = VecListDPairSpacedAsmOperand;
166}
167// Register list of three D registers spaced by 2 (three Q registers).
168def VecListThreeQAsmOperand : AsmOperandClass {
169  let Name = "VecListThreeQ";
170  let ParserMethod = "parseVectorList";
171  let RenderMethod = "addVecListOperands";
172}
173def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
174  let ParserMatchClass = VecListThreeQAsmOperand;
175}
176// Register list of three D registers spaced by 2 (three Q registers).
177def VecListFourQAsmOperand : AsmOperandClass {
178  let Name = "VecListFourQ";
179  let ParserMethod = "parseVectorList";
180  let RenderMethod = "addVecListOperands";
181}
182def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
183  let ParserMatchClass = VecListFourQAsmOperand;
184}
185
186// Register list of one D register, with "all lanes" subscripting.
187def VecListOneDAllLanesAsmOperand : AsmOperandClass {
188  let Name = "VecListOneDAllLanes";
189  let ParserMethod = "parseVectorList";
190  let RenderMethod = "addVecListOperands";
191}
192def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
193  let ParserMatchClass = VecListOneDAllLanesAsmOperand;
194}
195// Register list of two D registers, with "all lanes" subscripting.
196def VecListDPairAllLanesAsmOperand : AsmOperandClass {
197  let Name = "VecListDPairAllLanes";
198  let ParserMethod = "parseVectorList";
199  let RenderMethod = "addVecListOperands";
200}
201def VecListDPairAllLanes : RegisterOperand<DPair,
202                                           "printVectorListTwoAllLanes"> {
203  let ParserMatchClass = VecListDPairAllLanesAsmOperand;
204}
205// Register list of two D registers spaced by 2 (two sequential Q registers).
206def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
207  let Name = "VecListDPairSpacedAllLanes";
208  let ParserMethod = "parseVectorList";
209  let RenderMethod = "addVecListOperands";
210}
211def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc,
212                                         "printVectorListTwoSpacedAllLanes"> {
213  let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
214}
215// Register list of three D registers, with "all lanes" subscripting.
216def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
217  let Name = "VecListThreeDAllLanes";
218  let ParserMethod = "parseVectorList";
219  let RenderMethod = "addVecListOperands";
220}
221def VecListThreeDAllLanes : RegisterOperand<DPR,
222                                            "printVectorListThreeAllLanes"> {
223  let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
224}
225// Register list of three D registers spaced by 2 (three sequential Q regs).
226def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
227  let Name = "VecListThreeQAllLanes";
228  let ParserMethod = "parseVectorList";
229  let RenderMethod = "addVecListOperands";
230}
231def VecListThreeQAllLanes : RegisterOperand<DPR,
232                                         "printVectorListThreeSpacedAllLanes"> {
233  let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
234}
235// Register list of four D registers, with "all lanes" subscripting.
236def VecListFourDAllLanesAsmOperand : AsmOperandClass {
237  let Name = "VecListFourDAllLanes";
238  let ParserMethod = "parseVectorList";
239  let RenderMethod = "addVecListOperands";
240}
241def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
242  let ParserMatchClass = VecListFourDAllLanesAsmOperand;
243}
244// Register list of four D registers spaced by 2 (four sequential Q regs).
245def VecListFourQAllLanesAsmOperand : AsmOperandClass {
246  let Name = "VecListFourQAllLanes";
247  let ParserMethod = "parseVectorList";
248  let RenderMethod = "addVecListOperands";
249}
250def VecListFourQAllLanes : RegisterOperand<DPR,
251                                         "printVectorListFourSpacedAllLanes"> {
252  let ParserMatchClass = VecListFourQAllLanesAsmOperand;
253}
254
255
256// Register list of one D register, with byte lane subscripting.
257def VecListOneDByteIndexAsmOperand : AsmOperandClass {
258  let Name = "VecListOneDByteIndexed";
259  let ParserMethod = "parseVectorList";
260  let RenderMethod = "addVecListIndexedOperands";
261}
262def VecListOneDByteIndexed : Operand<i32> {
263  let ParserMatchClass = VecListOneDByteIndexAsmOperand;
264  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
265}
266// ...with half-word lane subscripting.
267def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
268  let Name = "VecListOneDHWordIndexed";
269  let ParserMethod = "parseVectorList";
270  let RenderMethod = "addVecListIndexedOperands";
271}
272def VecListOneDHWordIndexed : Operand<i32> {
273  let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
274  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
275}
276// ...with word lane subscripting.
277def VecListOneDWordIndexAsmOperand : AsmOperandClass {
278  let Name = "VecListOneDWordIndexed";
279  let ParserMethod = "parseVectorList";
280  let RenderMethod = "addVecListIndexedOperands";
281}
282def VecListOneDWordIndexed : Operand<i32> {
283  let ParserMatchClass = VecListOneDWordIndexAsmOperand;
284  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
285}
286
287// Register list of two D registers with byte lane subscripting.
288def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
289  let Name = "VecListTwoDByteIndexed";
290  let ParserMethod = "parseVectorList";
291  let RenderMethod = "addVecListIndexedOperands";
292}
293def VecListTwoDByteIndexed : Operand<i32> {
294  let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
295  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
296}
297// ...with half-word lane subscripting.
298def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
299  let Name = "VecListTwoDHWordIndexed";
300  let ParserMethod = "parseVectorList";
301  let RenderMethod = "addVecListIndexedOperands";
302}
303def VecListTwoDHWordIndexed : Operand<i32> {
304  let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
305  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
306}
307// ...with word lane subscripting.
308def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
309  let Name = "VecListTwoDWordIndexed";
310  let ParserMethod = "parseVectorList";
311  let RenderMethod = "addVecListIndexedOperands";
312}
313def VecListTwoDWordIndexed : Operand<i32> {
314  let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
315  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
316}
317// Register list of two Q registers with half-word lane subscripting.
318def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
319  let Name = "VecListTwoQHWordIndexed";
320  let ParserMethod = "parseVectorList";
321  let RenderMethod = "addVecListIndexedOperands";
322}
323def VecListTwoQHWordIndexed : Operand<i32> {
324  let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
325  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
326}
327// ...with word lane subscripting.
328def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
329  let Name = "VecListTwoQWordIndexed";
330  let ParserMethod = "parseVectorList";
331  let RenderMethod = "addVecListIndexedOperands";
332}
333def VecListTwoQWordIndexed : Operand<i32> {
334  let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
335  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
336}
337
338
339// Register list of three D registers with byte lane subscripting.
340def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
341  let Name = "VecListThreeDByteIndexed";
342  let ParserMethod = "parseVectorList";
343  let RenderMethod = "addVecListIndexedOperands";
344}
345def VecListThreeDByteIndexed : Operand<i32> {
346  let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
347  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
348}
349// ...with half-word lane subscripting.
350def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
351  let Name = "VecListThreeDHWordIndexed";
352  let ParserMethod = "parseVectorList";
353  let RenderMethod = "addVecListIndexedOperands";
354}
355def VecListThreeDHWordIndexed : Operand<i32> {
356  let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
357  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
358}
359// ...with word lane subscripting.
360def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
361  let Name = "VecListThreeDWordIndexed";
362  let ParserMethod = "parseVectorList";
363  let RenderMethod = "addVecListIndexedOperands";
364}
365def VecListThreeDWordIndexed : Operand<i32> {
366  let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
367  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
368}
369// Register list of three Q registers with half-word lane subscripting.
370def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
371  let Name = "VecListThreeQHWordIndexed";
372  let ParserMethod = "parseVectorList";
373  let RenderMethod = "addVecListIndexedOperands";
374}
375def VecListThreeQHWordIndexed : Operand<i32> {
376  let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
377  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
378}
379// ...with word lane subscripting.
380def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
381  let Name = "VecListThreeQWordIndexed";
382  let ParserMethod = "parseVectorList";
383  let RenderMethod = "addVecListIndexedOperands";
384}
385def VecListThreeQWordIndexed : Operand<i32> {
386  let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
387  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
388}
389
390// Register list of four D registers with byte lane subscripting.
391def VecListFourDByteIndexAsmOperand : AsmOperandClass {
392  let Name = "VecListFourDByteIndexed";
393  let ParserMethod = "parseVectorList";
394  let RenderMethod = "addVecListIndexedOperands";
395}
396def VecListFourDByteIndexed : Operand<i32> {
397  let ParserMatchClass = VecListFourDByteIndexAsmOperand;
398  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
399}
400// ...with half-word lane subscripting.
401def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
402  let Name = "VecListFourDHWordIndexed";
403  let ParserMethod = "parseVectorList";
404  let RenderMethod = "addVecListIndexedOperands";
405}
406def VecListFourDHWordIndexed : Operand<i32> {
407  let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
408  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
409}
410// ...with word lane subscripting.
411def VecListFourDWordIndexAsmOperand : AsmOperandClass {
412  let Name = "VecListFourDWordIndexed";
413  let ParserMethod = "parseVectorList";
414  let RenderMethod = "addVecListIndexedOperands";
415}
416def VecListFourDWordIndexed : Operand<i32> {
417  let ParserMatchClass = VecListFourDWordIndexAsmOperand;
418  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
419}
420// Register list of four Q registers with half-word lane subscripting.
421def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
422  let Name = "VecListFourQHWordIndexed";
423  let ParserMethod = "parseVectorList";
424  let RenderMethod = "addVecListIndexedOperands";
425}
426def VecListFourQHWordIndexed : Operand<i32> {
427  let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
428  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
429}
430// ...with word lane subscripting.
431def VecListFourQWordIndexAsmOperand : AsmOperandClass {
432  let Name = "VecListFourQWordIndexed";
433  let ParserMethod = "parseVectorList";
434  let RenderMethod = "addVecListIndexedOperands";
435}
436def VecListFourQWordIndexed : Operand<i32> {
437  let ParserMatchClass = VecListFourQWordIndexAsmOperand;
438  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
439}
440
441def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
442  return cast<LoadSDNode>(N)->getAlignment() >= 8;
443}]>;
444def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
445                                 (store node:$val, node:$ptr), [{
446  return cast<StoreSDNode>(N)->getAlignment() >= 8;
447}]>;
448def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
449  return cast<LoadSDNode>(N)->getAlignment() == 4;
450}]>;
451def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
452                                 (store node:$val, node:$ptr), [{
453  return cast<StoreSDNode>(N)->getAlignment() == 4;
454}]>;
455def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
456  return cast<LoadSDNode>(N)->getAlignment() == 2;
457}]>;
458def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
459                                 (store node:$val, node:$ptr), [{
460  return cast<StoreSDNode>(N)->getAlignment() == 2;
461}]>;
462def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
463  return cast<LoadSDNode>(N)->getAlignment() == 1;
464}]>;
465def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
466                             (store node:$val, node:$ptr), [{
467  return cast<StoreSDNode>(N)->getAlignment() == 1;
468}]>;
469def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
470  return cast<LoadSDNode>(N)->getAlignment() < 4;
471}]>;
472def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
473                                    (store node:$val, node:$ptr), [{
474  return cast<StoreSDNode>(N)->getAlignment() < 4;
475}]>;
476
477//===----------------------------------------------------------------------===//
478// NEON-specific DAG Nodes.
479//===----------------------------------------------------------------------===//
480
481def SDTARMVCMP    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
482def SDTARMVCMPZ   : SDTypeProfile<1, 1, []>;
483
484def NEONvceq      : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
485def NEONvceqz     : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
486def NEONvcge      : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
487def NEONvcgez     : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
488def NEONvclez     : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
489def NEONvcgeu     : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
490def NEONvcgt      : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
491def NEONvcgtz     : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
492def NEONvcltz     : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
493def NEONvcgtu     : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
494def NEONvtst      : SDNode<"ARMISD::VTST", SDTARMVCMP>;
495
496// Types for vector shift by immediates.  The "SHX" version is for long and
497// narrow operations where the source and destination vectors have different
498// types.  The "SHINS" version is for shift and insert operations.
499def SDTARMVSHXIMM    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
500                                            SDTCisVT<2, i32>]>;
501def SDTARMVSHINSIMM  : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
502                                            SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
503
504def NEONvshrnImm     : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>;
505
506def NEONvrshrsImm    : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>;
507def NEONvrshruImm    : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>;
508def NEONvrshrnImm    : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>;
509
510def NEONvqshlsImm    : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>;
511def NEONvqshluImm    : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>;
512def NEONvqshlsuImm   : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>;
513def NEONvqshrnsImm   : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>;
514def NEONvqshrnuImm   : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>;
515def NEONvqshrnsuImm  : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>;
516
517def NEONvqrshrnsImm  : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>;
518def NEONvqrshrnuImm  : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>;
519def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
520
521def NEONvsliImm      : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
522def NEONvsriImm      : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
523
524def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
525                                           SDTCisVT<2, i32>]>;
526def NEONvorrImm   : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
527def NEONvbicImm   : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
528
529def NEONvbsl      : SDNode<"ARMISD::VBSL",
530                           SDTypeProfile<1, 3, [SDTCisVec<0>,
531                                                SDTCisSameAs<0, 1>,
532                                                SDTCisSameAs<0, 2>,
533                                                SDTCisSameAs<0, 3>]>>;
534
535def SDTARMVEXT    : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
536                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
537def NEONvext      : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
538
539def SDTARMVSHUF2  : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
540                                         SDTCisSameAs<0, 2>,
541                                         SDTCisSameAs<0, 3>]>;
542def NEONzip       : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
543def NEONuzp       : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
544def NEONtrn       : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
545
546def SDTARMVMULL   : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
547                                         SDTCisSameAs<1, 2>]>;
548def NEONvmulls    : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
549def NEONvmullu    : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
550
551def SDTARMVTBL1   : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
552                                         SDTCisVT<2, v8i8>]>;
553def SDTARMVTBL2   : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
554                                         SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>;
555def NEONvtbl1     : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
556def NEONvtbl2     : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
557
558
559def NEONimmAllZerosV: PatLeaf<(ARMvmovImm (i32 timm)), [{
560  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
561  unsigned EltBits = 0;
562  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
563  return (EltBits == 32 && EltVal == 0);
564}]>;
565
566def NEONimmAllOnesV: PatLeaf<(ARMvmovImm (i32 timm)), [{
567  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
568  unsigned EltBits = 0;
569  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
570  return (EltBits == 8 && EltVal == 0xff);
571}]>;
572
573//===----------------------------------------------------------------------===//
574// NEON load / store instructions
575//===----------------------------------------------------------------------===//
576
577// Use VLDM to load a Q register as a D register pair.
578// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
579def VLDMQIA
580  : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
581                    IIC_fpLoad_m, "",
582                   [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>;
583
584// Use VSTM to store a Q register as a D register pair.
585// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
586def VSTMQIA
587  : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
588                    IIC_fpStore_m, "",
589                   [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>;
590
591// Classes for VLD* pseudo-instructions with multi-register operands.
592// These are expanded to real instructions after register allocation.
593class VLDQPseudo<InstrItinClass itin>
594  : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
595class VLDQWBPseudo<InstrItinClass itin>
596  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
597                (ins addrmode6:$addr, am6offset:$offset), itin,
598                "$addr.addr = $wb">;
599class VLDQWBfixedPseudo<InstrItinClass itin>
600  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
601                (ins addrmode6:$addr), itin,
602                "$addr.addr = $wb">;
603class VLDQWBregisterPseudo<InstrItinClass itin>
604  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
605                (ins addrmode6:$addr, rGPR:$offset), itin,
606                "$addr.addr = $wb">;
607
608class VLDQQPseudo<InstrItinClass itin>
609  : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
610class VLDQQWBPseudo<InstrItinClass itin>
611  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
612                (ins addrmode6:$addr, am6offset:$offset), itin,
613                "$addr.addr = $wb">;
614class VLDQQWBfixedPseudo<InstrItinClass itin>
615  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
616                (ins addrmode6:$addr), itin,
617                "$addr.addr = $wb">;
618class VLDQQWBregisterPseudo<InstrItinClass itin>
619  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
620                (ins addrmode6:$addr, rGPR:$offset), itin,
621                "$addr.addr = $wb">;
622
623
624class VLDQQQQPseudo<InstrItinClass itin>
625  : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
626                "$src = $dst">;
627class VLDQQQQWBPseudo<InstrItinClass itin>
628  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
629                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
630                "$addr.addr = $wb, $src = $dst">;
631
632let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
633
634//   VLD1     : Vector Load (multiple single elements)
635class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
636  : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
637          (ins AddrMode:$Rn), IIC_VLD1,
638          "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> {
639  let Rm = 0b1111;
640  let Inst{4} = Rn{4};
641  let DecoderMethod = "DecodeVLDST1Instruction";
642}
643class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
644  : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
645          (ins AddrMode:$Rn), IIC_VLD1x2,
646          "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> {
647  let Rm = 0b1111;
648  let Inst{5-4} = Rn{5-4};
649  let DecoderMethod = "DecodeVLDST1Instruction";
650}
651
652def  VLD1d8   : VLD1D<{0,0,0,?}, "8",  addrmode6align64>;
653def  VLD1d16  : VLD1D<{0,1,0,?}, "16", addrmode6align64>;
654def  VLD1d32  : VLD1D<{1,0,0,?}, "32", addrmode6align64>;
655def  VLD1d64  : VLD1D<{1,1,0,?}, "64", addrmode6align64>;
656
657def  VLD1q8   : VLD1Q<{0,0,?,?}, "8",  addrmode6align64or128>;
658def  VLD1q16  : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>;
659def  VLD1q32  : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>;
660def  VLD1q64  : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>;
661
662// ...with address register writeback:
663multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
664  def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
665                     (ins AddrMode:$Rn), IIC_VLD1u,
666                     "vld1", Dt, "$Vd, $Rn!",
667                     "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
668    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
669    let Inst{4} = Rn{4};
670    let DecoderMethod = "DecodeVLDST1Instruction";
671  }
672  def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
673                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
674                        "vld1", Dt, "$Vd, $Rn, $Rm",
675                        "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
676    let Inst{4} = Rn{4};
677    let DecoderMethod = "DecodeVLDST1Instruction";
678  }
679}
680multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
681  def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
682                    (ins AddrMode:$Rn), IIC_VLD1x2u,
683                     "vld1", Dt, "$Vd, $Rn!",
684                     "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
685    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
686    let Inst{5-4} = Rn{5-4};
687    let DecoderMethod = "DecodeVLDST1Instruction";
688  }
689  def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
690                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
691                        "vld1", Dt, "$Vd, $Rn, $Rm",
692                        "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
693    let Inst{5-4} = Rn{5-4};
694    let DecoderMethod = "DecodeVLDST1Instruction";
695  }
696}
697
698defm VLD1d8wb  : VLD1DWB<{0,0,0,?}, "8",  addrmode6align64>;
699defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>;
700defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>;
701defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>;
702defm VLD1q8wb  : VLD1QWB<{0,0,?,?}, "8",  addrmode6align64or128>;
703defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
704defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
705defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
706
707// ...with 3 registers
708class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
709  : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
710          (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
711          "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> {
712  let Rm = 0b1111;
713  let Inst{4} = Rn{4};
714  let DecoderMethod = "DecodeVLDST1Instruction";
715}
716multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
717  def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
718                    (ins AddrMode:$Rn), IIC_VLD1x2u,
719                     "vld1", Dt, "$Vd, $Rn!",
720                     "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
721    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
722    let Inst{4} = Rn{4};
723    let DecoderMethod = "DecodeVLDST1Instruction";
724  }
725  def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
726                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
727                        "vld1", Dt, "$Vd, $Rn, $Rm",
728                        "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
729    let Inst{4} = Rn{4};
730    let DecoderMethod = "DecodeVLDST1Instruction";
731  }
732}
733
734def VLD1d8T      : VLD1D3<{0,0,0,?}, "8",  addrmode6align64>;
735def VLD1d16T     : VLD1D3<{0,1,0,?}, "16", addrmode6align64>;
736def VLD1d32T     : VLD1D3<{1,0,0,?}, "32", addrmode6align64>;
737def VLD1d64T     : VLD1D3<{1,1,0,?}, "64", addrmode6align64>;
738
739defm VLD1d8Twb  : VLD1D3WB<{0,0,0,?}, "8",  addrmode6align64>;
740defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
741defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
742defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
743
744def VLD1d8TPseudo  : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
745def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
746def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
747def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
748def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
749def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
750
751def VLD1q8HighTPseudo     : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
752def VLD1q8LowTPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
753def VLD1q16HighTPseudo    : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
754def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
755def VLD1q32HighTPseudo    : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
756def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
757def VLD1q64HighTPseudo    : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
758def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
759
760// ...with 4 registers
761class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
762  : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
763          (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
764          "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> {
765  let Rm = 0b1111;
766  let Inst{5-4} = Rn{5-4};
767  let DecoderMethod = "DecodeVLDST1Instruction";
768}
769multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
770  def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
771                    (ins AddrMode:$Rn), IIC_VLD1x2u,
772                     "vld1", Dt, "$Vd, $Rn!",
773                     "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
774    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
775    let Inst{5-4} = Rn{5-4};
776    let DecoderMethod = "DecodeVLDST1Instruction";
777  }
778  def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
779                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
780                        "vld1", Dt, "$Vd, $Rn, $Rm",
781                        "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
782    let Inst{5-4} = Rn{5-4};
783    let DecoderMethod = "DecodeVLDST1Instruction";
784  }
785}
786
787def VLD1d8Q      : VLD1D4<{0,0,?,?}, "8",  addrmode6align64or128or256>;
788def VLD1d16Q     : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
789def VLD1d32Q     : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
790def VLD1d64Q     : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
791
792defm VLD1d8Qwb   : VLD1D4WB<{0,0,?,?}, "8",  addrmode6align64or128or256>;
793defm VLD1d16Qwb  : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
794defm VLD1d32Qwb  : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
795defm VLD1d64Qwb  : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
796
797def VLD1d8QPseudo  : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
798def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
799def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
800def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
801def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
802def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
803
804def VLD1q8LowQPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
805def VLD1q8HighQPseudo     : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
806def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
807def VLD1q16HighQPseudo    : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
808def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
809def VLD1q32HighQPseudo    : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
810def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
811def VLD1q64HighQPseudo    : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
812
813//   VLD2     : Vector Load (multiple 2-element structures)
814class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
815           InstrItinClass itin, Operand AddrMode>
816  : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
817          (ins AddrMode:$Rn), itin,
818          "vld2", Dt, "$Vd, $Rn", "", []> {
819  let Rm = 0b1111;
820  let Inst{5-4} = Rn{5-4};
821  let DecoderMethod = "DecodeVLDST2Instruction";
822}
823
824def  VLD2d8   : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
825                     addrmode6align64or128>, Sched<[WriteVLD2]>;
826def  VLD2d16  : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
827                     addrmode6align64or128>, Sched<[WriteVLD2]>;
828def  VLD2d32  : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
829                     addrmode6align64or128>, Sched<[WriteVLD2]>;
830
831def  VLD2q8   : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
832                     addrmode6align64or128or256>, Sched<[WriteVLD4]>;
833def  VLD2q16  : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
834                     addrmode6align64or128or256>, Sched<[WriteVLD4]>;
835def  VLD2q32  : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
836                     addrmode6align64or128or256>, Sched<[WriteVLD4]>;
837
838def  VLD2q8Pseudo  : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
839def  VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
840def  VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
841
842// ...with address register writeback:
843multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
844                  RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> {
845  def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
846                     (ins AddrMode:$Rn), itin,
847                     "vld2", Dt, "$Vd, $Rn!",
848                     "$Rn.addr = $wb", []> {
849    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
850    let Inst{5-4} = Rn{5-4};
851    let DecoderMethod = "DecodeVLDST2Instruction";
852  }
853  def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
854                        (ins AddrMode:$Rn, rGPR:$Rm), itin,
855                        "vld2", Dt, "$Vd, $Rn, $Rm",
856                        "$Rn.addr = $wb", []> {
857    let Inst{5-4} = Rn{5-4};
858    let DecoderMethod = "DecodeVLDST2Instruction";
859  }
860}
861
862defm VLD2d8wb  : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
863                        addrmode6align64or128>, Sched<[WriteVLD2]>;
864defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
865                        addrmode6align64or128>, Sched<[WriteVLD2]>;
866defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
867                        addrmode6align64or128>, Sched<[WriteVLD2]>;
868
869defm VLD2q8wb  : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
870                        addrmode6align64or128or256>, Sched<[WriteVLD4]>;
871defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
872                        addrmode6align64or128or256>, Sched<[WriteVLD4]>;
873defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
874                        addrmode6align64or128or256>, Sched<[WriteVLD4]>;
875
876def VLD2q8PseudoWB_fixed     : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
877def VLD2q16PseudoWB_fixed    : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
878def VLD2q32PseudoWB_fixed    : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
879def VLD2q8PseudoWB_register  : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
880def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
881def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
882
883// ...with double-spaced registers
884def  VLD2b8    : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
885                      addrmode6align64or128>, Sched<[WriteVLD2]>;
886def  VLD2b16   : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
887                      addrmode6align64or128>, Sched<[WriteVLD2]>;
888def  VLD2b32   : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
889                      addrmode6align64or128>, Sched<[WriteVLD2]>;
890defm VLD2b8wb  : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
891                        addrmode6align64or128>, Sched<[WriteVLD2]>;
892defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
893                        addrmode6align64or128>, Sched<[WriteVLD2]>;
894defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
895                        addrmode6align64or128>, Sched<[WriteVLD2]>;
896
897//   VLD3     : Vector Load (multiple 3-element structures)
898class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
899  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
900          (ins addrmode6:$Rn), IIC_VLD3,
901          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> {
902  let Rm = 0b1111;
903  let Inst{4} = Rn{4};
904  let DecoderMethod = "DecodeVLDST3Instruction";
905}
906
907def  VLD3d8   : VLD3D<0b0100, {0,0,0,?}, "8">;
908def  VLD3d16  : VLD3D<0b0100, {0,1,0,?}, "16">;
909def  VLD3d32  : VLD3D<0b0100, {1,0,0,?}, "32">;
910
911def  VLD3d8Pseudo  : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
912def  VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
913def  VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
914
915// ...with address register writeback:
916class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
917  : NLdSt<0, 0b10, op11_8, op7_4,
918          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
919          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
920          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
921          "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
922  let Inst{4} = Rn{4};
923  let DecoderMethod = "DecodeVLDST3Instruction";
924}
925
926def VLD3d8_UPD  : VLD3DWB<0b0100, {0,0,0,?}, "8">;
927def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
928def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
929
930def VLD3d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
931def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
932def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
933
934// ...with double-spaced registers:
935def VLD3q8      : VLD3D<0b0101, {0,0,0,?}, "8">;
936def VLD3q16     : VLD3D<0b0101, {0,1,0,?}, "16">;
937def VLD3q32     : VLD3D<0b0101, {1,0,0,?}, "32">;
938def VLD3q8_UPD  : VLD3DWB<0b0101, {0,0,0,?}, "8">;
939def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
940def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
941
942def VLD3q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
943def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
944def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
945
946// ...alternate versions to be allocated odd register numbers:
947def VLD3q8oddPseudo   : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
948def VLD3q16oddPseudo  : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
949def VLD3q32oddPseudo  : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
950
951def VLD3q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
952def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
953def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
954
955//   VLD4     : Vector Load (multiple 4-element structures)
956class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
957  : NLdSt<0, 0b10, op11_8, op7_4,
958          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
959          (ins addrmode6:$Rn), IIC_VLD4,
960          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>,
961    Sched<[WriteVLD4]> {
962  let Rm = 0b1111;
963  let Inst{5-4} = Rn{5-4};
964  let DecoderMethod = "DecodeVLDST4Instruction";
965}
966
967def  VLD4d8   : VLD4D<0b0000, {0,0,?,?}, "8">;
968def  VLD4d16  : VLD4D<0b0000, {0,1,?,?}, "16">;
969def  VLD4d32  : VLD4D<0b0000, {1,0,?,?}, "32">;
970
971def  VLD4d8Pseudo  : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
972def  VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
973def  VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
974
975// ...with address register writeback:
976class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
977  : NLdSt<0, 0b10, op11_8, op7_4,
978          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
979          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
980          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
981          "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
982  let Inst{5-4} = Rn{5-4};
983  let DecoderMethod = "DecodeVLDST4Instruction";
984}
985
986def VLD4d8_UPD  : VLD4DWB<0b0000, {0,0,?,?}, "8">;
987def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
988def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
989
990def VLD4d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
991def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
992def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
993
994// ...with double-spaced registers:
995def VLD4q8      : VLD4D<0b0001, {0,0,?,?}, "8">;
996def VLD4q16     : VLD4D<0b0001, {0,1,?,?}, "16">;
997def VLD4q32     : VLD4D<0b0001, {1,0,?,?}, "32">;
998def VLD4q8_UPD  : VLD4DWB<0b0001, {0,0,?,?}, "8">;
999def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
1000def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
1001
1002def VLD4q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1003def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1004def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1005
1006// ...alternate versions to be allocated odd register numbers:
1007def VLD4q8oddPseudo   : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1008def VLD4q16oddPseudo  : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1009def VLD4q32oddPseudo  : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1010
1011def VLD4q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1012def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1013def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1014
1015} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1016
1017// Classes for VLD*LN pseudo-instructions with multi-register operands.
1018// These are expanded to real instructions after register allocation.
1019class VLDQLNPseudo<InstrItinClass itin>
1020  : PseudoNLdSt<(outs QPR:$dst),
1021                (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
1022                itin, "$src = $dst">;
1023class VLDQLNWBPseudo<InstrItinClass itin>
1024  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
1025                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
1026                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1027class VLDQQLNPseudo<InstrItinClass itin>
1028  : PseudoNLdSt<(outs QQPR:$dst),
1029                (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
1030                itin, "$src = $dst">;
1031class VLDQQLNWBPseudo<InstrItinClass itin>
1032  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
1033                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
1034                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1035class VLDQQQQLNPseudo<InstrItinClass itin>
1036  : PseudoNLdSt<(outs QQQQPR:$dst),
1037                (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
1038                itin, "$src = $dst">;
1039class VLDQQQQLNWBPseudo<InstrItinClass itin>
1040  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
1041                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
1042                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1043
1044//   VLD1LN   : Vector Load (single element to one lane)
1045class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1046             PatFrag LoadOp>
1047  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1048          (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
1049          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1050          "$src = $Vd",
1051          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1052                                         (i32 (LoadOp addrmode6:$Rn)),
1053                                         imm:$lane))]> {
1054  let Rm = 0b1111;
1055  let DecoderMethod = "DecodeVLD1LN";
1056}
1057class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1058             PatFrag LoadOp>
1059  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1060          (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
1061          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1062          "$src = $Vd",
1063          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1064                                         (i32 (LoadOp addrmode6oneL32:$Rn)),
1065                                         imm:$lane))]>, Sched<[WriteVLD1]> {
1066  let Rm = 0b1111;
1067  let DecoderMethod = "DecodeVLD1LN";
1068}
1069class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>,
1070                                                    Sched<[WriteVLD1]> {
1071  let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
1072                                               (i32 (LoadOp addrmode6:$addr)),
1073                                               imm:$lane))];
1074}
1075
1076def VLD1LNd8  : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
1077  let Inst{7-5} = lane{2-0};
1078}
1079def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
1080  let Inst{7-6} = lane{1-0};
1081  let Inst{5-4} = Rn{5-4};
1082}
1083def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
1084  let Inst{7} = lane{0};
1085  let Inst{5-4} = Rn{5-4};
1086}
1087
1088def VLD1LNq8Pseudo  : VLD1QLNPseudo<v16i8, extloadi8>;
1089def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
1090def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
1091
1092let Predicates = [HasNEON] in {
1093def : Pat<(vector_insert (v4f16 DPR:$src),
1094                         (f16 (load addrmode6:$addr)), imm:$lane),
1095          (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
1096def : Pat<(vector_insert (v8f16 QPR:$src),
1097                         (f16 (load addrmode6:$addr)), imm:$lane),
1098          (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1099def : Pat<(vector_insert (v2f32 DPR:$src),
1100                         (f32 (load addrmode6:$addr)), imm:$lane),
1101          (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
1102def : Pat<(vector_insert (v4f32 QPR:$src),
1103                         (f32 (load addrmode6:$addr)), imm:$lane),
1104          (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1105
1106// A 64-bit subvector insert to the first 128-bit vector position
1107// is a subregister copy that needs no instruction.
1108def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)),
1109          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1110def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)),
1111          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1112def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)),
1113          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1114def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)),
1115          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1116def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)),
1117          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1118def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)),
1119          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1120}
1121
1122
1123let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1124
1125// ...with address register writeback:
1126class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1127  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
1128          (ins addrmode6:$Rn, am6offset:$Rm,
1129           DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
1130          "\\{$Vd[$lane]\\}, $Rn$Rm",
1131          "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1132  let DecoderMethod = "DecodeVLD1LN";
1133}
1134
1135def VLD1LNd8_UPD  : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
1136  let Inst{7-5} = lane{2-0};
1137}
1138def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
1139  let Inst{7-6} = lane{1-0};
1140  let Inst{4}   = Rn{4};
1141}
1142def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
1143  let Inst{7} = lane{0};
1144  let Inst{5} = Rn{4};
1145  let Inst{4} = Rn{4};
1146}
1147
1148def VLD1LNq8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1149def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1150def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1151
1152//   VLD2LN   : Vector Load (single 2-element structure to one lane)
1153class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1154  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
1155          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
1156          IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
1157          "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> {
1158  let Rm = 0b1111;
1159  let Inst{4}   = Rn{4};
1160  let DecoderMethod = "DecodeVLD2LN";
1161}
1162
1163def VLD2LNd8  : VLD2LN<0b0001, {?,?,?,?}, "8"> {
1164  let Inst{7-5} = lane{2-0};
1165}
1166def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
1167  let Inst{7-6} = lane{1-0};
1168}
1169def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
1170  let Inst{7} = lane{0};
1171}
1172
1173def VLD2LNd8Pseudo  : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1174def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1175def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1176
1177// ...with double-spaced registers:
1178def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
1179  let Inst{7-6} = lane{1-0};
1180}
1181def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
1182  let Inst{7} = lane{0};
1183}
1184
1185def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1186def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1187
1188// ...with address register writeback:
1189class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1190  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
1191          (ins addrmode6:$Rn, am6offset:$Rm,
1192           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
1193          "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
1194          "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
1195  let Inst{4}   = Rn{4};
1196  let DecoderMethod = "DecodeVLD2LN";
1197}
1198
1199def VLD2LNd8_UPD  : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
1200  let Inst{7-5} = lane{2-0};
1201}
1202def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
1203  let Inst{7-6} = lane{1-0};
1204}
1205def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
1206  let Inst{7} = lane{0};
1207}
1208
1209def VLD2LNd8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1210def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1211def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1212
1213def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
1214  let Inst{7-6} = lane{1-0};
1215}
1216def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
1217  let Inst{7} = lane{0};
1218}
1219
1220def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1221def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1222
1223//   VLD3LN   : Vector Load (single 3-element structure to one lane)
1224class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1225  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1226          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
1227          nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
1228          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
1229          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> {
1230  let Rm = 0b1111;
1231  let DecoderMethod = "DecodeVLD3LN";
1232}
1233
1234def VLD3LNd8  : VLD3LN<0b0010, {?,?,?,0}, "8"> {
1235  let Inst{7-5} = lane{2-0};
1236}
1237def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
1238  let Inst{7-6} = lane{1-0};
1239}
1240def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
1241  let Inst{7}   = lane{0};
1242}
1243
1244def VLD3LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1245def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1246def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1247
1248// ...with double-spaced registers:
1249def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
1250  let Inst{7-6} = lane{1-0};
1251}
1252def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
1253  let Inst{7}   = lane{0};
1254}
1255
1256def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1257def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1258
1259// ...with address register writeback:
1260class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1261  : NLdStLn<1, 0b10, op11_8, op7_4,
1262          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1263          (ins addrmode6:$Rn, am6offset:$Rm,
1264           DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
1265          IIC_VLD3lnu, "vld3", Dt,
1266          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
1267          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
1268          []>, Sched<[WriteVLD2]> {
1269  let DecoderMethod = "DecodeVLD3LN";
1270}
1271
1272def VLD3LNd8_UPD  : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
1273  let Inst{7-5} = lane{2-0};
1274}
1275def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
1276  let Inst{7-6} = lane{1-0};
1277}
1278def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
1279  let Inst{7} = lane{0};
1280}
1281
1282def VLD3LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1283def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1284def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1285
1286def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
1287  let Inst{7-6} = lane{1-0};
1288}
1289def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
1290  let Inst{7} = lane{0};
1291}
1292
1293def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1294def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1295
1296//   VLD4LN   : Vector Load (single 4-element structure to one lane)
1297class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1298  : NLdStLn<1, 0b10, op11_8, op7_4,
1299          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1300          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
1301          nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
1302          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
1303          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>,
1304    Sched<[WriteVLD2]> {
1305  let Rm = 0b1111;
1306  let Inst{4} = Rn{4};
1307  let DecoderMethod = "DecodeVLD4LN";
1308}
1309
1310def VLD4LNd8  : VLD4LN<0b0011, {?,?,?,?}, "8"> {
1311  let Inst{7-5} = lane{2-0};
1312}
1313def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
1314  let Inst{7-6} = lane{1-0};
1315}
1316def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
1317  let Inst{7} = lane{0};
1318  let Inst{5} = Rn{5};
1319}
1320
1321def VLD4LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1322def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1323def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1324
1325// ...with double-spaced registers:
1326def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
1327  let Inst{7-6} = lane{1-0};
1328}
1329def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
1330  let Inst{7} = lane{0};
1331  let Inst{5} = Rn{5};
1332}
1333
1334def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1335def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1336
1337// ...with address register writeback:
1338class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1339  : NLdStLn<1, 0b10, op11_8, op7_4,
1340          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1341          (ins addrmode6:$Rn, am6offset:$Rm,
1342           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
1343          IIC_VLD4lnu, "vld4", Dt,
1344"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
1345"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
1346          []> {
1347  let Inst{4}   = Rn{4};
1348  let DecoderMethod = "DecodeVLD4LN"  ;
1349}
1350
1351def VLD4LNd8_UPD  : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
1352  let Inst{7-5} = lane{2-0};
1353}
1354def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
1355  let Inst{7-6} = lane{1-0};
1356}
1357def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
1358  let Inst{7} = lane{0};
1359  let Inst{5} = Rn{5};
1360}
1361
1362def VLD4LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1363def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1364def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1365
1366def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
1367  let Inst{7-6} = lane{1-0};
1368}
1369def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
1370  let Inst{7} = lane{0};
1371  let Inst{5} = Rn{5};
1372}
1373
1374def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1375def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1376
1377} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1378
1379//   VLD1DUP  : Vector Load (single element to all lanes)
1380class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1381              Operand AddrMode>
1382  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
1383          (ins AddrMode:$Rn),
1384          IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
1385          [(set VecListOneDAllLanes:$Vd,
1386                (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
1387   Sched<[WriteVLD2]> {
1388  let Rm = 0b1111;
1389  let Inst{4} = Rn{4};
1390  let DecoderMethod = "DecodeVLD1DupInstruction";
1391}
1392def VLD1DUPd8  : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8,
1393                         addrmode6dupalignNone>;
1394def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
1395                         addrmode6dupalign16>;
1396def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
1397                         addrmode6dupalign32>;
1398
1399let Predicates = [HasNEON] in {
1400def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
1401          (VLD1DUPd32 addrmode6:$addr)>;
1402}
1403
1404class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1405               Operand AddrMode>
1406  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
1407          (ins AddrMode:$Rn), IIC_VLD1dup,
1408          "vld1", Dt, "$Vd, $Rn", "",
1409          [(set VecListDPairAllLanes:$Vd,
1410                (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
1411  let Rm = 0b1111;
1412  let Inst{4} = Rn{4};
1413  let DecoderMethod = "DecodeVLD1DupInstruction";
1414}
1415
1416def VLD1DUPq8  : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8,
1417                          addrmode6dupalignNone>;
1418def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
1419                          addrmode6dupalign16>;
1420def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
1421                          addrmode6dupalign32>;
1422
1423let Predicates = [HasNEON] in {
1424def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
1425          (VLD1DUPq32 addrmode6:$addr)>;
1426}
1427
1428let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1429// ...with address register writeback:
1430multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1431  def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1432                     (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1433                     (ins AddrMode:$Rn), IIC_VLD1dupu,
1434                     "vld1", Dt, "$Vd, $Rn!",
1435                     "$Rn.addr = $wb", []> {
1436    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1437    let Inst{4} = Rn{4};
1438    let DecoderMethod = "DecodeVLD1DupInstruction";
1439  }
1440  def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1441                        (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1442                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1443                        "vld1", Dt, "$Vd, $Rn, $Rm",
1444                        "$Rn.addr = $wb", []> {
1445    let Inst{4} = Rn{4};
1446    let DecoderMethod = "DecodeVLD1DupInstruction";
1447  }
1448}
1449multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1450  def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1451                     (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1452                     (ins AddrMode:$Rn), IIC_VLD1dupu,
1453                     "vld1", Dt, "$Vd, $Rn!",
1454                     "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1455    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1456    let Inst{4} = Rn{4};
1457    let DecoderMethod = "DecodeVLD1DupInstruction";
1458  }
1459  def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1460                        (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1461                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1462                        "vld1", Dt, "$Vd, $Rn, $Rm",
1463                        "$Rn.addr = $wb", []> {
1464    let Inst{4} = Rn{4};
1465    let DecoderMethod = "DecodeVLD1DupInstruction";
1466  }
1467}
1468
1469defm VLD1DUPd8wb  : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>;
1470defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>;
1471defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>;
1472
1473defm VLD1DUPq8wb  : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>;
1474defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>;
1475defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>;
1476
1477//   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
1478class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode>
1479  : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
1480          (ins AddrMode:$Rn), IIC_VLD2dup,
1481          "vld2", Dt, "$Vd, $Rn", "", []> {
1482  let Rm = 0b1111;
1483  let Inst{4} = Rn{4};
1484  let DecoderMethod = "DecodeVLD2DupInstruction";
1485}
1486
1487def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8",  VecListDPairAllLanes,
1488                         addrmode6dupalign16>;
1489def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes,
1490                         addrmode6dupalign32>;
1491def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes,
1492                         addrmode6dupalign64>;
1493
1494// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or
1495// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]".
1496// ...with double-spaced registers
1497def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8",  VecListDPairSpacedAllLanes,
1498                           addrmode6dupalign16>;
1499def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1500                           addrmode6dupalign32>;
1501def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1502                           addrmode6dupalign64>;
1503
1504def VLD2DUPq8EvenPseudo  : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1505def VLD2DUPq8OddPseudo   : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1506def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1507def VLD2DUPq16OddPseudo  : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1508def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1509def VLD2DUPq32OddPseudo  : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1510
1511// ...with address register writeback:
1512multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
1513                     Operand AddrMode> {
1514  def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
1515                     (outs VdTy:$Vd, GPR:$wb),
1516                     (ins AddrMode:$Rn), IIC_VLD2dupu,
1517                     "vld2", Dt, "$Vd, $Rn!",
1518                     "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1519    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1520    let Inst{4} = Rn{4};
1521    let DecoderMethod = "DecodeVLD2DupInstruction";
1522  }
1523  def _register : NLdSt<1, 0b10, 0b1101, op7_4,
1524                        (outs VdTy:$Vd, GPR:$wb),
1525                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
1526                        "vld2", Dt, "$Vd, $Rn, $Rm",
1527                        "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1528    let Inst{4} = Rn{4};
1529    let DecoderMethod = "DecodeVLD2DupInstruction";
1530  }
1531}
1532
1533defm VLD2DUPd8wb    : VLD2DUPWB<{0,0,0,0}, "8",  VecListDPairAllLanes,
1534                                addrmode6dupalign16>;
1535defm VLD2DUPd16wb   : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes,
1536                                addrmode6dupalign32>;
1537defm VLD2DUPd32wb   : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes,
1538                                addrmode6dupalign64>;
1539
1540defm VLD2DUPd8x2wb  : VLD2DUPWB<{0,0,1,0}, "8",  VecListDPairSpacedAllLanes,
1541                                addrmode6dupalign16>;
1542defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1543                                addrmode6dupalign32>;
1544defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1545                                addrmode6dupalign64>;
1546
1547//   VLD3DUP  : Vector Load (single 3-element structure to all lanes)
1548class VLD3DUP<bits<4> op7_4, string Dt>
1549  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1550          (ins addrmode6dup:$Rn), IIC_VLD3dup,
1551          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>,
1552    Sched<[WriteVLD2]> {
1553  let Rm = 0b1111;
1554  let Inst{4} = 0;
1555  let DecoderMethod = "DecodeVLD3DupInstruction";
1556}
1557
1558def VLD3DUPd8  : VLD3DUP<{0,0,0,?}, "8">;
1559def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
1560def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
1561
1562def VLD3DUPd8Pseudo  : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1563def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1564def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1565
1566// ...with double-spaced registers (not used for codegen):
1567def VLD3DUPq8  : VLD3DUP<{0,0,1,?}, "8">;
1568def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
1569def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
1570
1571def VLD3DUPq8EvenPseudo  : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1572def VLD3DUPq8OddPseudo   : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1573def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1574def VLD3DUPq16OddPseudo  : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1575def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1576def VLD3DUPq32OddPseudo  : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1577
1578// ...with address register writeback:
1579class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
1580  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1581          (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
1582          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
1583          "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
1584  let Inst{4} = 0;
1585  let DecoderMethod = "DecodeVLD3DupInstruction";
1586}
1587
1588def VLD3DUPd8_UPD  : VLD3DUPWB<{0,0,0,0}, "8",  addrmode6dupalign64>;
1589def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>;
1590def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>;
1591
1592def VLD3DUPq8_UPD  : VLD3DUPWB<{0,0,1,0}, "8",  addrmode6dupalign64>;
1593def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
1594def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
1595
1596def VLD3DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1597def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1598def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1599
1600//   VLD4DUP  : Vector Load (single 4-element structure to all lanes)
1601class VLD4DUP<bits<4> op7_4, string Dt>
1602  : NLdSt<1, 0b10, 0b1111, op7_4,
1603          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1604          (ins addrmode6dup:$Rn), IIC_VLD4dup,
1605          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
1606  let Rm = 0b1111;
1607  let Inst{4} = Rn{4};
1608  let DecoderMethod = "DecodeVLD4DupInstruction";
1609}
1610
1611def VLD4DUPd8  : VLD4DUP<{0,0,0,?}, "8">;
1612def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
1613def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1614
1615def VLD4DUPd8Pseudo  : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1616def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1617def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1618
1619// ...with double-spaced registers (not used for codegen):
1620def VLD4DUPq8  : VLD4DUP<{0,0,1,?}, "8">;
1621def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
1622def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1623
1624def VLD4DUPq8EvenPseudo  : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1625def VLD4DUPq8OddPseudo   : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1626def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1627def VLD4DUPq16OddPseudo  : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1628def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1629def VLD4DUPq32OddPseudo  : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1630
1631// ...with address register writeback:
1632class VLD4DUPWB<bits<4> op7_4, string Dt>
1633  : NLdSt<1, 0b10, 0b1111, op7_4,
1634          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1635          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
1636          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
1637          "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
1638  let Inst{4} = Rn{4};
1639  let DecoderMethod = "DecodeVLD4DupInstruction";
1640}
1641
1642def VLD4DUPd8_UPD  : VLD4DUPWB<{0,0,0,0}, "8">;
1643def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
1644def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1645
1646def VLD4DUPq8_UPD  : VLD4DUPWB<{0,0,1,0}, "8">;
1647def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
1648def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1649
1650def VLD4DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1651def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1652def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1653
1654} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1655
1656let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
1657
1658// Classes for VST* pseudo-instructions with multi-register operands.
1659// These are expanded to real instructions after register allocation.
1660class VSTQPseudo<InstrItinClass itin>
1661  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
1662class VSTQWBPseudo<InstrItinClass itin>
1663  : PseudoNLdSt<(outs GPR:$wb),
1664                (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
1665                "$addr.addr = $wb">;
1666class VSTQWBfixedPseudo<InstrItinClass itin>
1667  : PseudoNLdSt<(outs GPR:$wb),
1668                (ins addrmode6:$addr, QPR:$src), itin,
1669                "$addr.addr = $wb">;
1670class VSTQWBregisterPseudo<InstrItinClass itin>
1671  : PseudoNLdSt<(outs GPR:$wb),
1672                (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
1673                "$addr.addr = $wb">;
1674class VSTQQPseudo<InstrItinClass itin>
1675  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
1676class VSTQQWBPseudo<InstrItinClass itin>
1677  : PseudoNLdSt<(outs GPR:$wb),
1678                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
1679                "$addr.addr = $wb">;
1680class VSTQQWBfixedPseudo<InstrItinClass itin>
1681  : PseudoNLdSt<(outs GPR:$wb),
1682                (ins addrmode6:$addr, QQPR:$src), itin,
1683                "$addr.addr = $wb">;
1684class VSTQQWBregisterPseudo<InstrItinClass itin>
1685  : PseudoNLdSt<(outs GPR:$wb),
1686                (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
1687                "$addr.addr = $wb">;
1688
1689class VSTQQQQPseudo<InstrItinClass itin>
1690  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
1691class VSTQQQQWBPseudo<InstrItinClass itin>
1692  : PseudoNLdSt<(outs GPR:$wb),
1693                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
1694                "$addr.addr = $wb">;
1695
1696//   VST1     : Vector Store (multiple single elements)
1697class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
1698  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
1699          IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> {
1700  let Rm = 0b1111;
1701  let Inst{4} = Rn{4};
1702  let DecoderMethod = "DecodeVLDST1Instruction";
1703}
1704class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
1705  : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
1706          IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> {
1707  let Rm = 0b1111;
1708  let Inst{5-4} = Rn{5-4};
1709  let DecoderMethod = "DecodeVLDST1Instruction";
1710}
1711
1712def  VST1d8   : VST1D<{0,0,0,?}, "8",  addrmode6align64>;
1713def  VST1d16  : VST1D<{0,1,0,?}, "16", addrmode6align64>;
1714def  VST1d32  : VST1D<{1,0,0,?}, "32", addrmode6align64>;
1715def  VST1d64  : VST1D<{1,1,0,?}, "64", addrmode6align64>;
1716
1717def  VST1q8   : VST1Q<{0,0,?,?}, "8",  addrmode6align64or128>;
1718def  VST1q16  : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>;
1719def  VST1q32  : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>;
1720def  VST1q64  : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>;
1721
1722// ...with address register writeback:
1723multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1724  def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
1725                     (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
1726                     "vst1", Dt, "$Vd, $Rn!",
1727                     "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
1728    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1729    let Inst{4} = Rn{4};
1730    let DecoderMethod = "DecodeVLDST1Instruction";
1731  }
1732  def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
1733                        (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
1734                        IIC_VLD1u,
1735                        "vst1", Dt, "$Vd, $Rn, $Rm",
1736                        "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
1737    let Inst{4} = Rn{4};
1738    let DecoderMethod = "DecodeVLDST1Instruction";
1739  }
1740}
1741multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1742  def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1743                    (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
1744                     "vst1", Dt, "$Vd, $Rn!",
1745                     "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1746    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1747    let Inst{5-4} = Rn{5-4};
1748    let DecoderMethod = "DecodeVLDST1Instruction";
1749  }
1750  def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1751                        (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
1752                        IIC_VLD1x2u,
1753                        "vst1", Dt, "$Vd, $Rn, $Rm",
1754                        "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1755    let Inst{5-4} = Rn{5-4};
1756    let DecoderMethod = "DecodeVLDST1Instruction";
1757  }
1758}
1759
1760defm VST1d8wb  : VST1DWB<{0,0,0,?}, "8",  addrmode6align64>;
1761defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>;
1762defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>;
1763defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>;
1764
1765defm VST1q8wb  : VST1QWB<{0,0,?,?}, "8",  addrmode6align64or128>;
1766defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
1767defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
1768defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
1769
1770// ...with 3 registers
1771class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
1772  : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
1773          (ins AddrMode:$Rn, VecListThreeD:$Vd),
1774          IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> {
1775  let Rm = 0b1111;
1776  let Inst{4} = Rn{4};
1777  let DecoderMethod = "DecodeVLDST1Instruction";
1778}
1779multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1780  def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1781                    (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
1782                     "vst1", Dt, "$Vd, $Rn!",
1783                     "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1784    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1785    let Inst{5-4} = Rn{5-4};
1786    let DecoderMethod = "DecodeVLDST1Instruction";
1787  }
1788  def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1789                        (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
1790                        IIC_VLD1x3u,
1791                        "vst1", Dt, "$Vd, $Rn, $Rm",
1792                        "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1793    let Inst{5-4} = Rn{5-4};
1794    let DecoderMethod = "DecodeVLDST1Instruction";
1795  }
1796}
1797
1798def VST1d8T     : VST1D3<{0,0,0,?}, "8",  addrmode6align64>;
1799def VST1d16T    : VST1D3<{0,1,0,?}, "16", addrmode6align64>;
1800def VST1d32T    : VST1D3<{1,0,0,?}, "32", addrmode6align64>;
1801def VST1d64T    : VST1D3<{1,1,0,?}, "64", addrmode6align64>;
1802
1803defm VST1d8Twb  : VST1D3WB<{0,0,0,?}, "8",  addrmode6align64>;
1804defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
1805defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
1806defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
1807
1808def VST1d8TPseudo             : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1809def VST1d16TPseudo            : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1810def VST1d32TPseudo            : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1811def VST1d64TPseudo            : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1812def VST1d64TPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1813def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1814
1815def VST1q8HighTPseudo     : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1816def VST1q8LowTPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1817def VST1q16HighTPseudo    : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1818def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1819def VST1q32HighTPseudo    : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1820def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1821def VST1q64HighTPseudo    : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1822def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1823
1824// ...with 4 registers
1825class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
1826  : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
1827          (ins AddrMode:$Rn, VecListFourD:$Vd),
1828          IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
1829          []>, Sched<[WriteVST4]> {
1830  let Rm = 0b1111;
1831  let Inst{5-4} = Rn{5-4};
1832  let DecoderMethod = "DecodeVLDST1Instruction";
1833}
1834multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1835  def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1836                    (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
1837                     "vst1", Dt, "$Vd, $Rn!",
1838                     "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1839    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1840    let Inst{5-4} = Rn{5-4};
1841    let DecoderMethod = "DecodeVLDST1Instruction";
1842  }
1843  def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1844                        (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1845                        IIC_VLD1x4u,
1846                        "vst1", Dt, "$Vd, $Rn, $Rm",
1847                        "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1848    let Inst{5-4} = Rn{5-4};
1849    let DecoderMethod = "DecodeVLDST1Instruction";
1850  }
1851}
1852
1853def VST1d8Q     : VST1D4<{0,0,?,?}, "8",  addrmode6align64or128or256>;
1854def VST1d16Q    : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
1855def VST1d32Q    : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
1856def VST1d64Q    : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
1857
1858defm VST1d8Qwb  : VST1D4WB<{0,0,?,?}, "8",  addrmode6align64or128or256>;
1859defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1860defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1861defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
1862
1863def VST1d8QPseudo             : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1864def VST1d16QPseudo            : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1865def VST1d32QPseudo            : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1866def VST1d64QPseudo            : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1867def VST1d64QPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1868def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1869
1870def VST1q8HighQPseudo     : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1871def VST1q8LowQPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1872def VST1q16HighQPseudo    : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1873def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1874def VST1q32HighQPseudo    : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1875def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1876def VST1q64HighQPseudo    : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1877def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1878
1879//   VST2     : Vector Store (multiple 2-element structures)
1880class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
1881            InstrItinClass itin, Operand AddrMode>
1882  : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd),
1883          itin, "vst2", Dt, "$Vd, $Rn", "", []> {
1884  let Rm = 0b1111;
1885  let Inst{5-4} = Rn{5-4};
1886  let DecoderMethod = "DecodeVLDST2Instruction";
1887}
1888
1889def  VST2d8   : VST2<0b1000, {0,0,?,?}, "8",  VecListDPair, IIC_VST2,
1890                     addrmode6align64or128>, Sched<[WriteVST2]>;
1891def  VST2d16  : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
1892                     addrmode6align64or128>, Sched<[WriteVST2]>;
1893def  VST2d32  : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
1894                     addrmode6align64or128>, Sched<[WriteVST2]>;
1895
1896def  VST2q8   : VST2<0b0011, {0,0,?,?}, "8",  VecListFourD, IIC_VST2x2,
1897                     addrmode6align64or128or256>, Sched<[WriteVST4]>;
1898def  VST2q16  : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
1899                     addrmode6align64or128or256>, Sched<[WriteVST4]>;
1900def  VST2q32  : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
1901                     addrmode6align64or128or256>, Sched<[WriteVST4]>;
1902
1903def  VST2q8Pseudo  : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1904def  VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1905def  VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1906
1907// ...with address register writeback:
1908multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
1909                   RegisterOperand VdTy, Operand AddrMode> {
1910  def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1911                     (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
1912                     "vst2", Dt, "$Vd, $Rn!",
1913                     "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1914    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1915    let Inst{5-4} = Rn{5-4};
1916    let DecoderMethod = "DecodeVLDST2Instruction";
1917  }
1918  def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1919                        (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
1920                        "vst2", Dt, "$Vd, $Rn, $Rm",
1921                        "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1922    let Inst{5-4} = Rn{5-4};
1923    let DecoderMethod = "DecodeVLDST2Instruction";
1924  }
1925}
1926multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1927  def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1928                     (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
1929                     "vst2", Dt, "$Vd, $Rn!",
1930                     "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1931    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1932    let Inst{5-4} = Rn{5-4};
1933    let DecoderMethod = "DecodeVLDST2Instruction";
1934  }
1935  def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1936                        (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1937                        IIC_VLD1u,
1938                        "vst2", Dt, "$Vd, $Rn, $Rm",
1939                        "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1940    let Inst{5-4} = Rn{5-4};
1941    let DecoderMethod = "DecodeVLDST2Instruction";
1942  }
1943}
1944
1945defm VST2d8wb    : VST2DWB<0b1000, {0,0,?,?}, "8",  VecListDPair,
1946                           addrmode6align64or128>;
1947defm VST2d16wb   : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair,
1948                           addrmode6align64or128>;
1949defm VST2d32wb   : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair,
1950                           addrmode6align64or128>;
1951
1952defm VST2q8wb    : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
1953defm VST2q16wb   : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1954defm VST2q32wb   : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1955
1956def VST2q8PseudoWB_fixed     : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1957def VST2q16PseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1958def VST2q32PseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1959def VST2q8PseudoWB_register  : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1960def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1961def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1962
1963// ...with double-spaced registers
1964def VST2b8      : VST2<0b1001, {0,0,?,?}, "8",  VecListDPairSpaced, IIC_VST2,
1965                      addrmode6align64or128>;
1966def VST2b16     : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2,
1967                      addrmode6align64or128>;
1968def VST2b32     : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2,
1969                      addrmode6align64or128>;
1970defm VST2b8wb   : VST2DWB<0b1001, {0,0,?,?}, "8",  VecListDPairSpaced,
1971                          addrmode6align64or128>;
1972defm VST2b16wb  : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced,
1973                          addrmode6align64or128>;
1974defm VST2b32wb  : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
1975                          addrmode6align64or128>;
1976
1977//   VST3     : Vector Store (multiple 3-element structures)
1978class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
1979  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1980          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
1981          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> {
1982  let Rm = 0b1111;
1983  let Inst{4} = Rn{4};
1984  let DecoderMethod = "DecodeVLDST3Instruction";
1985}
1986
1987def  VST3d8   : VST3D<0b0100, {0,0,0,?}, "8">;
1988def  VST3d16  : VST3D<0b0100, {0,1,0,?}, "16">;
1989def  VST3d32  : VST3D<0b0100, {1,0,0,?}, "32">;
1990
1991def  VST3d8Pseudo  : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1992def  VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1993def  VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1994
1995// ...with address register writeback:
1996class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1997  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1998          (ins addrmode6:$Rn, am6offset:$Rm,
1999           DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
2000          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
2001          "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
2002  let Inst{4} = Rn{4};
2003  let DecoderMethod = "DecodeVLDST3Instruction";
2004}
2005
2006def VST3d8_UPD  : VST3DWB<0b0100, {0,0,0,?}, "8">;
2007def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
2008def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
2009
2010def VST3d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2011def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2012def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2013
2014// ...with double-spaced registers:
2015def VST3q8      : VST3D<0b0101, {0,0,0,?}, "8">;
2016def VST3q16     : VST3D<0b0101, {0,1,0,?}, "16">;
2017def VST3q32     : VST3D<0b0101, {1,0,0,?}, "32">;
2018def VST3q8_UPD  : VST3DWB<0b0101, {0,0,0,?}, "8">;
2019def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
2020def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
2021
2022def VST3q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2023def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2024def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2025
2026// ...alternate versions to be allocated odd register numbers:
2027def VST3q8oddPseudo   : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2028def VST3q16oddPseudo  : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2029def VST3q32oddPseudo  : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2030
2031def VST3q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2032def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2033def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2034
2035//   VST4     : Vector Store (multiple 4-element structures)
2036class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
2037  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
2038          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
2039          IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
2040          "", []>, Sched<[WriteVST4]> {
2041  let Rm = 0b1111;
2042  let Inst{5-4} = Rn{5-4};
2043  let DecoderMethod = "DecodeVLDST4Instruction";
2044}
2045
2046def  VST4d8   : VST4D<0b0000, {0,0,?,?}, "8">;
2047def  VST4d16  : VST4D<0b0000, {0,1,?,?}, "16">;
2048def  VST4d32  : VST4D<0b0000, {1,0,?,?}, "32">;
2049
2050def  VST4d8Pseudo  : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2051def  VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2052def  VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2053
2054// ...with address register writeback:
2055class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2056  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
2057          (ins addrmode6:$Rn, am6offset:$Rm,
2058           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
2059           "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
2060          "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
2061  let Inst{5-4} = Rn{5-4};
2062  let DecoderMethod = "DecodeVLDST4Instruction";
2063}
2064
2065def VST4d8_UPD  : VST4DWB<0b0000, {0,0,?,?}, "8">;
2066def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
2067def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
2068
2069def VST4d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2070def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2071def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2072
2073// ...with double-spaced registers:
2074def VST4q8      : VST4D<0b0001, {0,0,?,?}, "8">;
2075def VST4q16     : VST4D<0b0001, {0,1,?,?}, "16">;
2076def VST4q32     : VST4D<0b0001, {1,0,?,?}, "32">;
2077def VST4q8_UPD  : VST4DWB<0b0001, {0,0,?,?}, "8">;
2078def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
2079def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
2080
2081def VST4q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2082def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2083def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2084
2085// ...alternate versions to be allocated odd register numbers:
2086def VST4q8oddPseudo   : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2087def VST4q16oddPseudo  : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2088def VST4q32oddPseudo  : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2089
2090def VST4q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2091def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2092def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2093
2094} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2095
2096// Classes for VST*LN pseudo-instructions with multi-register operands.
2097// These are expanded to real instructions after register allocation.
2098class VSTQLNPseudo<InstrItinClass itin>
2099  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
2100                itin, "">;
2101class VSTQLNWBPseudo<InstrItinClass itin>
2102  : PseudoNLdSt<(outs GPR:$wb),
2103                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
2104                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2105class VSTQQLNPseudo<InstrItinClass itin>
2106  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
2107                itin, "">;
2108class VSTQQLNWBPseudo<InstrItinClass itin>
2109  : PseudoNLdSt<(outs GPR:$wb),
2110                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
2111                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2112class VSTQQQQLNPseudo<InstrItinClass itin>
2113  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
2114                itin, "">;
2115class VSTQQQQLNWBPseudo<InstrItinClass itin>
2116  : PseudoNLdSt<(outs GPR:$wb),
2117                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
2118                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2119
2120//   VST1LN   : Vector Store (single element from one lane)
2121class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2122             PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
2123  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2124          (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
2125          IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
2126          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>,
2127     Sched<[WriteVST1]> {
2128  let Rm = 0b1111;
2129  let DecoderMethod = "DecodeVST1LN";
2130}
2131class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2132  : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> {
2133  let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2134                          addrmode6:$addr)];
2135}
2136
2137def VST1LNd8  : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
2138                       ARMvgetlaneu, addrmode6> {
2139  let Inst{7-5} = lane{2-0};
2140}
2141def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
2142                       ARMvgetlaneu, addrmode6> {
2143  let Inst{7-6} = lane{1-0};
2144  let Inst{4}   = Rn{4};
2145}
2146
2147def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
2148                       addrmode6oneL32> {
2149  let Inst{7}   = lane{0};
2150  let Inst{5-4} = Rn{5-4};
2151}
2152
2153def VST1LNq8Pseudo  : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>;
2154def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>;
2155def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
2156
2157let Predicates = [HasNEON] in {
2158def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
2159          (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
2160def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
2161          (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2162
2163def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr),
2164          (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
2165def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr),
2166          (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2167}
2168
2169// ...with address register writeback:
2170class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2171               PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
2172  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2173          (ins AdrMode:$Rn, am6offset:$Rm,
2174           DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
2175          "\\{$Vd[$lane]\\}, $Rn$Rm",
2176          "$Rn.addr = $wb",
2177          [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
2178                                  AdrMode:$Rn, am6offset:$Rm))]>,
2179    Sched<[WriteVST1]> {
2180  let DecoderMethod = "DecodeVST1LN";
2181}
2182class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2183  : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> {
2184  let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2185                                        addrmode6:$addr, am6offset:$offset))];
2186}
2187
2188def VST1LNd8_UPD  : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
2189                             ARMvgetlaneu, addrmode6> {
2190  let Inst{7-5} = lane{2-0};
2191}
2192def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
2193                             ARMvgetlaneu, addrmode6> {
2194  let Inst{7-6} = lane{1-0};
2195  let Inst{4}   = Rn{4};
2196}
2197def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
2198                             extractelt, addrmode6oneL32> {
2199  let Inst{7}   = lane{0};
2200  let Inst{5-4} = Rn{5-4};
2201}
2202
2203def VST1LNq8Pseudo_UPD  : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>;
2204def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>;
2205def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
2206
2207let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
2208
2209//   VST2LN   : Vector Store (single 2-element structure from one lane)
2210class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2211  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2212          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
2213          IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
2214          "", []>, Sched<[WriteVST1]> {
2215  let Rm = 0b1111;
2216  let Inst{4}   = Rn{4};
2217  let DecoderMethod = "DecodeVST2LN";
2218}
2219
2220def VST2LNd8  : VST2LN<0b0001, {?,?,?,?}, "8"> {
2221  let Inst{7-5} = lane{2-0};
2222}
2223def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
2224  let Inst{7-6} = lane{1-0};
2225}
2226def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
2227  let Inst{7}   = lane{0};
2228}
2229
2230def VST2LNd8Pseudo  : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2231def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2232def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2233
2234// ...with double-spaced registers:
2235def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
2236  let Inst{7-6} = lane{1-0};
2237  let Inst{4}   = Rn{4};
2238}
2239def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
2240  let Inst{7}   = lane{0};
2241  let Inst{4}   = Rn{4};
2242}
2243
2244def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2245def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2246
2247// ...with address register writeback:
2248class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2249  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2250          (ins addrmode6:$Rn, am6offset:$Rm,
2251           DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
2252          "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
2253          "$Rn.addr = $wb", []> {
2254  let Inst{4}   = Rn{4};
2255  let DecoderMethod = "DecodeVST2LN";
2256}
2257
2258def VST2LNd8_UPD  : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
2259  let Inst{7-5} = lane{2-0};
2260}
2261def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
2262  let Inst{7-6} = lane{1-0};
2263}
2264def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
2265  let Inst{7}   = lane{0};
2266}
2267
2268def VST2LNd8Pseudo_UPD  : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2269def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2270def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2271
2272def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
2273  let Inst{7-6} = lane{1-0};
2274}
2275def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
2276  let Inst{7}   = lane{0};
2277}
2278
2279def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2280def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2281
2282//   VST3LN   : Vector Store (single 3-element structure from one lane)
2283class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2284  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2285          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
2286           nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
2287          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>,
2288    Sched<[WriteVST2]> {
2289  let Rm = 0b1111;
2290  let DecoderMethod = "DecodeVST3LN";
2291}
2292
2293def VST3LNd8  : VST3LN<0b0010, {?,?,?,0}, "8"> {
2294  let Inst{7-5} = lane{2-0};
2295}
2296def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
2297  let Inst{7-6} = lane{1-0};
2298}
2299def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
2300  let Inst{7}   = lane{0};
2301}
2302
2303def VST3LNd8Pseudo  : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2304def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2305def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2306
2307// ...with double-spaced registers:
2308def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
2309  let Inst{7-6} = lane{1-0};
2310}
2311def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
2312  let Inst{7}   = lane{0};
2313}
2314
2315def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2316def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2317
2318// ...with address register writeback:
2319class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2320  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2321          (ins addrmode6:$Rn, am6offset:$Rm,
2322           DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
2323          IIC_VST3lnu, "vst3", Dt,
2324          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
2325          "$Rn.addr = $wb", []> {
2326  let DecoderMethod = "DecodeVST3LN";
2327}
2328
2329def VST3LNd8_UPD  : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
2330  let Inst{7-5} = lane{2-0};
2331}
2332def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
2333  let Inst{7-6} = lane{1-0};
2334}
2335def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
2336  let Inst{7}   = lane{0};
2337}
2338
2339def VST3LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2340def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2341def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2342
2343def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
2344  let Inst{7-6} = lane{1-0};
2345}
2346def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
2347  let Inst{7}   = lane{0};
2348}
2349
2350def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2351def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2352
2353//   VST4LN   : Vector Store (single 4-element structure from one lane)
2354class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2355  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2356          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
2357           nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
2358          "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
2359          "", []>, Sched<[WriteVST2]> {
2360  let Rm = 0b1111;
2361  let Inst{4} = Rn{4};
2362  let DecoderMethod = "DecodeVST4LN";
2363}
2364
2365def VST4LNd8  : VST4LN<0b0011, {?,?,?,?}, "8"> {
2366  let Inst{7-5} = lane{2-0};
2367}
2368def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
2369  let Inst{7-6} = lane{1-0};
2370}
2371def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
2372  let Inst{7}   = lane{0};
2373  let Inst{5} = Rn{5};
2374}
2375
2376def VST4LNd8Pseudo  : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2377def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2378def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2379
2380// ...with double-spaced registers:
2381def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
2382  let Inst{7-6} = lane{1-0};
2383}
2384def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
2385  let Inst{7}   = lane{0};
2386  let Inst{5} = Rn{5};
2387}
2388
2389def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2390def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2391
2392// ...with address register writeback:
2393class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2394  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2395          (ins addrmode6:$Rn, am6offset:$Rm,
2396           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
2397          IIC_VST4lnu, "vst4", Dt,
2398  "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
2399          "$Rn.addr = $wb", []> {
2400  let Inst{4} = Rn{4};
2401  let DecoderMethod = "DecodeVST4LN";
2402}
2403
2404def VST4LNd8_UPD  : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
2405  let Inst{7-5} = lane{2-0};
2406}
2407def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
2408  let Inst{7-6} = lane{1-0};
2409}
2410def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
2411  let Inst{7}   = lane{0};
2412  let Inst{5} = Rn{5};
2413}
2414
2415def VST4LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2416def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2417def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2418
2419def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
2420  let Inst{7-6} = lane{1-0};
2421}
2422def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
2423  let Inst{7}   = lane{0};
2424  let Inst{5} = Rn{5};
2425}
2426
2427def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2428def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2429
2430} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2431
2432// Use vld1/vst1 for unaligned f64 load / store
2433let Predicates = [IsLE,HasNEON] in {
2434def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
2435          (VLD1d16 addrmode6:$addr)>;
2436def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
2437          (VST1d16 addrmode6:$addr, DPR:$value)>;
2438def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
2439          (VLD1d8 addrmode6:$addr)>;
2440def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
2441          (VST1d8 addrmode6:$addr, DPR:$value)>;
2442}
2443let Predicates = [IsBE,HasNEON] in {
2444def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
2445          (VLD1d64 addrmode6:$addr)>;
2446def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
2447          (VST1d64 addrmode6:$addr, DPR:$value)>;
2448}
2449
2450// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
2451// load / store if it's legal.
2452let Predicates = [HasNEON] in {
2453def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
2454          (VLD1q64 addrmode6:$addr)>;
2455def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2456          (VST1q64 addrmode6:$addr, QPR:$value)>;
2457}
2458let Predicates = [IsLE,HasNEON] in {
2459def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
2460          (VLD1q32 addrmode6:$addr)>;
2461def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2462          (VST1q32 addrmode6:$addr, QPR:$value)>;
2463def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
2464          (VLD1q16 addrmode6:$addr)>;
2465def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2466          (VST1q16 addrmode6:$addr, QPR:$value)>;
2467def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
2468          (VLD1q8 addrmode6:$addr)>;
2469def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2470          (VST1q8 addrmode6:$addr, QPR:$value)>;
2471}
2472
2473//===----------------------------------------------------------------------===//
2474// NEON pattern fragments
2475//===----------------------------------------------------------------------===//
2476
2477// Extract D sub-registers of Q registers.
2478def DSubReg_i8_reg  : SDNodeXForm<imm, [{
2479  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2480  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N),
2481                                   MVT::i32);
2482}]>;
2483def DSubReg_i16_reg : SDNodeXForm<imm, [{
2484  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2485  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N),
2486                                   MVT::i32);
2487}]>;
2488def DSubReg_i32_reg : SDNodeXForm<imm, [{
2489  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2490  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N),
2491                                   MVT::i32);
2492}]>;
2493def DSubReg_f64_reg : SDNodeXForm<imm, [{
2494  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2495  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N),
2496                                   MVT::i32);
2497}]>;
2498
2499// Extract S sub-registers of Q/D registers.
2500def SSubReg_f32_reg : SDNodeXForm<imm, [{
2501  assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
2502  return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N),
2503                                   MVT::i32);
2504}]>;
2505
2506// Extract S sub-registers of Q/D registers containing a given f16 lane.
2507def SSubReg_f16_reg : SDNodeXForm<imm, [{
2508  assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
2509  return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue()/2, SDLoc(N),
2510                                   MVT::i32);
2511}]>;
2512
2513// Translate lane numbers from Q registers to D subregs.
2514def SubReg_i8_lane  : SDNodeXForm<imm, [{
2515  return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32);
2516}]>;
2517def SubReg_i16_lane : SDNodeXForm<imm, [{
2518  return CurDAG->getTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32);
2519}]>;
2520def SubReg_i32_lane : SDNodeXForm<imm, [{
2521  return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32);
2522}]>;
2523
2524//===----------------------------------------------------------------------===//
2525// Instruction Classes
2526//===----------------------------------------------------------------------===//
2527
2528// Basic 2-register operations: double- and quad-register.
2529class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2530           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2531           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2532  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2533        (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
2534        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
2535class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2536           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2537           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2538  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2539        (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
2540        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
2541
2542// Basic 2-register intrinsics, both double- and quad-register.
2543class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2544              bits<2> op17_16, bits<5> op11_7, bit op4,
2545              InstrItinClass itin, string OpcodeStr, string Dt,
2546              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2547  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2548        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2549        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2550class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2551              bits<2> op17_16, bits<5> op11_7, bit op4,
2552              InstrItinClass itin, string OpcodeStr, string Dt,
2553              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2554  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2555        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2556        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2557
2558// Same as above, but not predicated.
2559class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
2560              InstrItinClass itin, string OpcodeStr, string Dt,
2561              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2562  : N2Vnp<op19_18, op17_16, op10_8, op7, 0,  (outs DPR:$Vd), (ins DPR:$Vm),
2563          itin, OpcodeStr, Dt,
2564          [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2565
2566class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
2567              InstrItinClass itin, string OpcodeStr, string Dt,
2568              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2569  : N2Vnp<op19_18, op17_16, op10_8, op7, 1,  (outs QPR:$Vd), (ins QPR:$Vm),
2570          itin, OpcodeStr, Dt,
2571          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2572
2573// Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
2574class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2575              bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2576              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2577  : N2Vnp<op19_18, op17_16, op10_8, op7, op6,  (outs QPR:$Vd), (ins QPR:$Vm),
2578          itin, OpcodeStr, Dt,
2579          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2580
2581// Same as N2VQIntXnp but with Vd as a src register.
2582class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2583              bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2584              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2585  : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
2586          (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
2587          itin, OpcodeStr, Dt,
2588          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
2589  let Constraints = "$src = $Vd";
2590}
2591
2592// Narrow 2-register operations.
2593class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2594           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2595           InstrItinClass itin, string OpcodeStr, string Dt,
2596           ValueType TyD, ValueType TyQ, SDNode OpNode>
2597  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2598        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2599        [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
2600
2601// Narrow 2-register intrinsics.
2602class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2603              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2604              InstrItinClass itin, string OpcodeStr, string Dt,
2605              ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
2606  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2607        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2608        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
2609
2610// Long 2-register operations (currently only used for VMOVL).
2611class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2612           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2613           InstrItinClass itin, string OpcodeStr, string Dt,
2614           ValueType TyQ, ValueType TyD, SDNode OpNode>
2615  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2616        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2617        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
2618
2619// Long 2-register intrinsics.
2620class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2621              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2622              InstrItinClass itin, string OpcodeStr, string Dt,
2623              ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
2624  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2625        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2626        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
2627
2628// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
2629class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
2630  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
2631        (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
2632        OpcodeStr, Dt, "$Vd, $Vm",
2633        "$src1 = $Vd, $src2 = $Vm", []>;
2634class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
2635                  InstrItinClass itin, string OpcodeStr, string Dt>
2636  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
2637        (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
2638        "$src1 = $Vd, $src2 = $Vm", []>;
2639
2640// Basic 3-register operations: double- and quad-register.
2641class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2642           InstrItinClass itin, string OpcodeStr, string Dt,
2643           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2644  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2645        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2646        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2647        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2648  // All of these have a two-operand InstAlias.
2649  let TwoOperandAliasConstraint = "$Vn = $Vd";
2650  let isCommutable = Commutable;
2651}
2652// Same as N3VD but no data type.
2653class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2654           InstrItinClass itin, string OpcodeStr,
2655           ValueType ResTy, ValueType OpTy,
2656           SDNode OpNode, bit Commutable>
2657  : N3VX<op24, op23, op21_20, op11_8, 0, op4,
2658         (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2659         OpcodeStr, "$Vd, $Vn, $Vm", "",
2660         [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
2661  // All of these have a two-operand InstAlias.
2662  let TwoOperandAliasConstraint = "$Vn = $Vd";
2663  let isCommutable = Commutable;
2664}
2665
2666class N3VDSL<bits<2> op21_20, bits<4> op11_8,
2667             InstrItinClass itin, string OpcodeStr, string Dt,
2668             ValueType Ty, SDNode ShOp>
2669  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2670        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2671        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2672        [(set (Ty DPR:$Vd),
2673              (Ty (ShOp (Ty DPR:$Vn),
2674                        (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
2675  // All of these have a two-operand InstAlias.
2676  let TwoOperandAliasConstraint = "$Vn = $Vd";
2677  let isCommutable = 0;
2678}
2679class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
2680               string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
2681  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2682        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2683        NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
2684        [(set (Ty DPR:$Vd),
2685              (Ty (ShOp (Ty DPR:$Vn),
2686                        (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2687  // All of these have a two-operand InstAlias.
2688  let TwoOperandAliasConstraint = "$Vn = $Vd";
2689  let isCommutable = 0;
2690}
2691
2692class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2693           InstrItinClass itin, string OpcodeStr, string Dt,
2694           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2695  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2696        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2697        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2698        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2699  // All of these have a two-operand InstAlias.
2700  let TwoOperandAliasConstraint = "$Vn = $Vd";
2701  let isCommutable = Commutable;
2702}
2703class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2704           InstrItinClass itin, string OpcodeStr,
2705           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2706  : N3VX<op24, op23, op21_20, op11_8, 1, op4,
2707         (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2708         OpcodeStr, "$Vd, $Vn, $Vm", "",
2709         [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
2710  // All of these have a two-operand InstAlias.
2711  let TwoOperandAliasConstraint = "$Vn = $Vd";
2712  let isCommutable = Commutable;
2713}
2714class N3VQSL<bits<2> op21_20, bits<4> op11_8,
2715             InstrItinClass itin, string OpcodeStr, string Dt,
2716             ValueType ResTy, ValueType OpTy, SDNode ShOp>
2717  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2718        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2719        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2720        [(set (ResTy QPR:$Vd),
2721              (ResTy (ShOp (ResTy QPR:$Vn),
2722                           (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2723                                                imm:$lane)))))]> {
2724  // All of these have a two-operand InstAlias.
2725  let TwoOperandAliasConstraint = "$Vn = $Vd";
2726  let isCommutable = 0;
2727}
2728class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
2729               ValueType ResTy, ValueType OpTy, SDNode ShOp>
2730  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2731        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2732        NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
2733        [(set (ResTy QPR:$Vd),
2734              (ResTy (ShOp (ResTy QPR:$Vn),
2735                           (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
2736                                                imm:$lane)))))]> {
2737  // All of these have a two-operand InstAlias.
2738  let TwoOperandAliasConstraint = "$Vn = $Vd";
2739  let isCommutable = 0;
2740}
2741
2742// Basic 3-register intrinsics, both double- and quad-register.
2743class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2744              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2745              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2746  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2747        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
2748        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2749        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2750  // All of these have a two-operand InstAlias.
2751  let TwoOperandAliasConstraint = "$Vn = $Vd";
2752  let isCommutable = Commutable;
2753}
2754
2755class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2756                bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2757                string Dt, ValueType ResTy, ValueType OpTy,
2758                SDPatternOperator IntOp, bit Commutable>
2759  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2760          (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
2761          [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2762
2763class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2764                string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2765  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2766        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2767        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2768        [(set (Ty DPR:$Vd),
2769              (Ty (IntOp (Ty DPR:$Vn),
2770                         (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
2771                                           imm:$lane)))))]> {
2772  let isCommutable = 0;
2773}
2774
2775class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2776                  string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2777  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2778        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2779        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2780        [(set (Ty DPR:$Vd),
2781              (Ty (IntOp (Ty DPR:$Vn),
2782                         (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2783  let isCommutable = 0;
2784}
2785class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2786              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2787              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2788  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2789        (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
2790        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2791        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
2792  let TwoOperandAliasConstraint = "$Vm = $Vd";
2793  let isCommutable = 0;
2794}
2795
2796class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2797              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2798              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2799  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2800        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
2801        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2802        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2803  // All of these have a two-operand InstAlias.
2804  let TwoOperandAliasConstraint = "$Vn = $Vd";
2805  let isCommutable = Commutable;
2806}
2807
2808class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2809                bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2810                string Dt, ValueType ResTy, ValueType OpTy,
2811                SDPatternOperator IntOp, bit Commutable>
2812  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2813          (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
2814          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2815
2816// Same as N3VQIntnp but with Vd as a src register.
2817class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2818                bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2819                string Dt, ValueType ResTy, ValueType OpTy,
2820                SDPatternOperator IntOp, bit Commutable>
2821  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2822          (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
2823          f, itin, OpcodeStr, Dt,
2824          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
2825                                       (OpTy QPR:$Vm))))]> {
2826  let Constraints = "$src = $Vd";
2827}
2828
2829class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2830                string OpcodeStr, string Dt,
2831                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2832  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2833        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2834        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2835        [(set (ResTy QPR:$Vd),
2836              (ResTy (IntOp (ResTy QPR:$Vn),
2837                            (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2838                                                 imm:$lane)))))]> {
2839  let isCommutable = 0;
2840}
2841class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2842                  string OpcodeStr, string Dt,
2843                  ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2844  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2845        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2846        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2847        [(set (ResTy QPR:$Vd),
2848              (ResTy (IntOp (ResTy QPR:$Vn),
2849                            (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
2850                                                 imm:$lane)))))]> {
2851  let isCommutable = 0;
2852}
2853class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2854              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2855              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2856  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2857        (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
2858        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2859        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
2860  let TwoOperandAliasConstraint = "$Vm = $Vd";
2861  let isCommutable = 0;
2862}
2863
2864// Multiply-Add/Sub operations: double- and quad-register.
2865class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2866                InstrItinClass itin, string OpcodeStr, string Dt,
2867                ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
2868  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2869        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2870        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2871        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2872                             (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
2873
2874class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2875                  string OpcodeStr, string Dt,
2876                  ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2877  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2878        (outs DPR:$Vd),
2879        (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2880        NVMulSLFrm, itin,
2881        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2882        [(set (Ty DPR:$Vd),
2883              (Ty (ShOp (Ty DPR:$src1),
2884                        (Ty (MulOp DPR:$Vn,
2885                                   (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
2886                                                     imm:$lane)))))))]>;
2887class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2888                    string OpcodeStr, string Dt,
2889                    ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2890  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2891        (outs DPR:$Vd),
2892        (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2893        NVMulSLFrm, itin,
2894        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2895        [(set (Ty DPR:$Vd),
2896              (Ty (ShOp (Ty DPR:$src1),
2897                        (Ty (MulOp DPR:$Vn,
2898                                   (Ty (ARMvduplane (Ty DPR_8:$Vm),
2899                                                     imm:$lane)))))))]>;
2900
2901class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2902                InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
2903                SDPatternOperator MulOp, SDPatternOperator OpNode>
2904  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2905        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2906        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2907        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2908                             (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
2909class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2910                  string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2911                  SDPatternOperator MulOp, SDPatternOperator ShOp>
2912  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2913        (outs QPR:$Vd),
2914        (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2915        NVMulSLFrm, itin,
2916        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2917        [(set (ResTy QPR:$Vd),
2918              (ResTy (ShOp (ResTy QPR:$src1),
2919                           (ResTy (MulOp QPR:$Vn,
2920                                   (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2921                                                        imm:$lane)))))))]>;
2922class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2923                    string OpcodeStr, string Dt,
2924                    ValueType ResTy, ValueType OpTy,
2925                    SDPatternOperator MulOp, SDPatternOperator ShOp>
2926  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2927        (outs QPR:$Vd),
2928        (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2929        NVMulSLFrm, itin,
2930        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2931        [(set (ResTy QPR:$Vd),
2932              (ResTy (ShOp (ResTy QPR:$src1),
2933                           (ResTy (MulOp QPR:$Vn,
2934                                   (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
2935                                                        imm:$lane)))))))]>;
2936
2937// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
2938class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2939                InstrItinClass itin, string OpcodeStr, string Dt,
2940                ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2941  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2942        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2943        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2944        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2945                             (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
2946class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2947                InstrItinClass itin, string OpcodeStr, string Dt,
2948                ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2949  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2950        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2951        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2952        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2953                             (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
2954
2955// Neon 3-argument intrinsics, both double- and quad-register.
2956// The destination register is also used as the first source operand register.
2957class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2958               InstrItinClass itin, string OpcodeStr, string Dt,
2959               ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2960  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2961        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2962        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2963        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
2964                                      (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2965class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2966               InstrItinClass itin, string OpcodeStr, string Dt,
2967               ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2968  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2969        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2970        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2971        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
2972                                      (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2973
2974// Long Multiply-Add/Sub operations.
2975class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2976                InstrItinClass itin, string OpcodeStr, string Dt,
2977                ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2978  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2979        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2980        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2981        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2982                                (TyQ (MulOp (TyD DPR:$Vn),
2983                                            (TyD DPR:$Vm)))))]>;
2984class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2985                  InstrItinClass itin, string OpcodeStr, string Dt,
2986                  ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2987  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2988        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2989        NVMulSLFrm, itin,
2990        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2991        [(set QPR:$Vd,
2992          (OpNode (TyQ QPR:$src1),
2993                  (TyQ (MulOp (TyD DPR:$Vn),
2994                              (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),
2995                                                 imm:$lane))))))]>;
2996class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2997                    InstrItinClass itin, string OpcodeStr, string Dt,
2998                    ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2999  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
3000        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3001        NVMulSLFrm, itin,
3002        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
3003        [(set QPR:$Vd,
3004          (OpNode (TyQ QPR:$src1),
3005                  (TyQ (MulOp (TyD DPR:$Vn),
3006                              (TyD (ARMvduplane (TyD DPR_8:$Vm),
3007                                                 imm:$lane))))))]>;
3008
3009// Long Intrinsic-Op vector operations with explicit extend (VABAL).
3010class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3011                   InstrItinClass itin, string OpcodeStr, string Dt,
3012                   ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
3013                   SDNode OpNode>
3014  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3015        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3016        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
3017        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
3018                                (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
3019                                                        (TyD DPR:$Vm)))))))]>;
3020
3021// Neon Long 3-argument intrinsic.  The destination register is
3022// a quad-register and is also used as the first source operand register.
3023class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3024               InstrItinClass itin, string OpcodeStr, string Dt,
3025               ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
3026  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3027        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3028        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
3029        [(set QPR:$Vd,
3030          (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
3031class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
3032                 string OpcodeStr, string Dt,
3033                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3034  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3035        (outs QPR:$Vd),
3036        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3037        NVMulSLFrm, itin,
3038        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
3039        [(set (ResTy QPR:$Vd),
3040              (ResTy (IntOp (ResTy QPR:$src1),
3041                            (OpTy DPR:$Vn),
3042                            (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
3043                                                imm:$lane)))))]>;
3044class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3045                   InstrItinClass itin, string OpcodeStr, string Dt,
3046                   ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3047  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3048        (outs QPR:$Vd),
3049        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3050        NVMulSLFrm, itin,
3051        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
3052        [(set (ResTy QPR:$Vd),
3053              (ResTy (IntOp (ResTy QPR:$src1),
3054                            (OpTy DPR:$Vn),
3055                            (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
3056                                                imm:$lane)))))]>;
3057
3058// Narrowing 3-register intrinsics.
3059class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3060              string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
3061              SDPatternOperator IntOp, bit Commutable>
3062  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3063        (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
3064        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3065        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
3066  let isCommutable = Commutable;
3067}
3068
3069// Long 3-register operations.
3070class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3071           InstrItinClass itin, string OpcodeStr, string Dt,
3072           ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
3073  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3074        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3075        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3076        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
3077  let isCommutable = Commutable;
3078}
3079
3080class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
3081             InstrItinClass itin, string OpcodeStr, string Dt,
3082             ValueType TyQ, ValueType TyD, SDNode OpNode>
3083  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3084        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3085        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3086        [(set QPR:$Vd,
3087          (TyQ (OpNode (TyD DPR:$Vn),
3088                       (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
3089class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3090               InstrItinClass itin, string OpcodeStr, string Dt,
3091               ValueType TyQ, ValueType TyD, SDNode OpNode>
3092  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3093        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3094        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3095        [(set QPR:$Vd,
3096          (TyQ (OpNode (TyD DPR:$Vn),
3097                       (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
3098
3099// Long 3-register operations with explicitly extended operands.
3100class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3101              InstrItinClass itin, string OpcodeStr, string Dt,
3102              ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
3103              bit Commutable>
3104  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3105        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3106        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3107        [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
3108                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3109  let isCommutable = Commutable;
3110}
3111
3112// Long 3-register intrinsics with explicit extend (VABDL).
3113class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3114                 InstrItinClass itin, string OpcodeStr, string Dt,
3115                 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
3116                 bit Commutable>
3117  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3118        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3119        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3120        [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
3121                                                (TyD DPR:$Vm))))))]> {
3122  let isCommutable = Commutable;
3123}
3124
3125// Long 3-register intrinsics.
3126class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3127              InstrItinClass itin, string OpcodeStr, string Dt,
3128              ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
3129  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3130        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3131        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3132        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
3133  let isCommutable = Commutable;
3134}
3135
3136// Same as above, but not predicated.
3137class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
3138                bit op4, InstrItinClass itin, string OpcodeStr,
3139                string Dt, ValueType ResTy, ValueType OpTy,
3140                SDPatternOperator IntOp, bit Commutable>
3141  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
3142          (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
3143          [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
3144
3145class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
3146                string OpcodeStr, string Dt,
3147                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3148  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3149        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3150        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3151        [(set (ResTy QPR:$Vd),
3152              (ResTy (IntOp (OpTy DPR:$Vn),
3153                            (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
3154                                                imm:$lane)))))]>;
3155class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3156                  InstrItinClass itin, string OpcodeStr, string Dt,
3157                  ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3158  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3159        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3160        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3161        [(set (ResTy QPR:$Vd),
3162              (ResTy (IntOp (OpTy DPR:$Vn),
3163                            (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
3164                                                imm:$lane)))))]>;
3165
3166// Wide 3-register operations.
3167class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3168           string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
3169           SDNode OpNode, SDNode ExtOp, bit Commutable>
3170  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3171        (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
3172        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3173        [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
3174                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3175  // All of these have a two-operand InstAlias.
3176  let TwoOperandAliasConstraint = "$Vn = $Vd";
3177  let isCommutable = Commutable;
3178}
3179
3180// Pairwise long 2-register intrinsics, both double- and quad-register.
3181class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3182                bits<2> op17_16, bits<5> op11_7, bit op4,
3183                string OpcodeStr, string Dt,
3184                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3185  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
3186        (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3187        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
3188class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3189                bits<2> op17_16, bits<5> op11_7, bit op4,
3190                string OpcodeStr, string Dt,
3191                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3192  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
3193        (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3194        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
3195
3196// Pairwise long 2-register accumulate intrinsics,
3197// both double- and quad-register.
3198// The destination register is also used as the first source operand register.
3199class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3200                 bits<2> op17_16, bits<5> op11_7, bit op4,
3201                 string OpcodeStr, string Dt,
3202                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3203  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
3204        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
3205        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3206        [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
3207class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3208                 bits<2> op17_16, bits<5> op11_7, bit op4,
3209                 string OpcodeStr, string Dt,
3210                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3211  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
3212        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
3213        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3214        [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
3215
3216// Shift by immediate,
3217// both double- and quad-register.
3218let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3219class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3220             Format f, InstrItinClass itin, Operand ImmTy,
3221             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3222  : N2VImm<op24, op23, op11_8, op7, 0, op4,
3223           (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
3224           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3225           [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
3226class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3227             Format f, InstrItinClass itin, Operand ImmTy,
3228             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3229  : N2VImm<op24, op23, op11_8, op7, 1, op4,
3230           (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
3231           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3232           [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
3233}
3234
3235// Long shift by immediate.
3236class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3237             string OpcodeStr, string Dt,
3238             ValueType ResTy, ValueType OpTy, Operand ImmTy,
3239             SDPatternOperator OpNode>
3240  : N2VImm<op24, op23, op11_8, op7, op6, op4,
3241           (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
3242           IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3243           [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>;
3244
3245// Narrow shift by immediate.
3246class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3247             InstrItinClass itin, string OpcodeStr, string Dt,
3248             ValueType ResTy, ValueType OpTy, Operand ImmTy,
3249             SDPatternOperator OpNode>
3250  : N2VImm<op24, op23, op11_8, op7, op6, op4,
3251           (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
3252           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3253           [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
3254                                          (i32 ImmTy:$SIMM))))]>;
3255
3256// Shift right by immediate and accumulate,
3257// both double- and quad-register.
3258let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3259class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3260                Operand ImmTy, string OpcodeStr, string Dt,
3261                ValueType Ty, SDNode ShOp>
3262  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3263           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3264           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3265           [(set DPR:$Vd, (Ty (add DPR:$src1,
3266                                (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
3267class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3268                Operand ImmTy, string OpcodeStr, string Dt,
3269                ValueType Ty, SDNode ShOp>
3270  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3271           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3272           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3273           [(set QPR:$Vd, (Ty (add QPR:$src1,
3274                                (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
3275}
3276
3277// Shift by immediate and insert,
3278// both double- and quad-register.
3279let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3280class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3281                Operand ImmTy, Format f, string OpcodeStr, string Dt,
3282                ValueType Ty,SDNode ShOp>
3283  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3284           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
3285           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3286           [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
3287class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3288                Operand ImmTy, Format f, string OpcodeStr, string Dt,
3289                ValueType Ty,SDNode ShOp>
3290  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3291           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
3292           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3293           [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
3294}
3295
3296// Convert, with fractional bits immediate,
3297// both double- and quad-register.
3298class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3299              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3300              SDPatternOperator IntOp>
3301  : N2VImm<op24, op23, op11_8, op7, 0, op4,
3302           (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3303           IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3304           [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
3305class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3306              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3307              SDPatternOperator IntOp>
3308  : N2VImm<op24, op23, op11_8, op7, 1, op4,
3309           (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3310           IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3311           [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
3312
3313//===----------------------------------------------------------------------===//
3314// Multiclasses
3315//===----------------------------------------------------------------------===//
3316
3317// Abbreviations used in multiclass suffixes:
3318//   Q = quarter int (8 bit) elements
3319//   H = half int (16 bit) elements
3320//   S = single int (32 bit) elements
3321//   D = double int (64 bit) elements
3322
3323// Neon 2-register vector operations and intrinsics.
3324
3325// Neon 2-register comparisons.
3326//   source operand element sizes of 8, 16 and 32 bits:
3327multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3328                       bits<5> op11_7, bit op4, string opc, string Dt,
3329                       string asm, SDNode OpNode> {
3330  // 64-bit vector types.
3331  def v8i8  : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
3332                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3333                  opc, !strconcat(Dt, "8"), asm, "",
3334                  [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
3335  def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3336                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3337                  opc, !strconcat(Dt, "16"), asm, "",
3338                  [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
3339  def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3340                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3341                  opc, !strconcat(Dt, "32"), asm, "",
3342                  [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
3343  def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3344                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3345                  opc, "f32", asm, "",
3346                  [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
3347    let Inst{10} = 1; // overwrite F = 1
3348  }
3349  def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3350                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3351                  opc, "f16", asm, "",
3352                  [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>,
3353              Requires<[HasNEON,HasFullFP16]> {
3354    let Inst{10} = 1; // overwrite F = 1
3355  }
3356
3357  // 128-bit vector types.
3358  def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
3359                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3360                  opc, !strconcat(Dt, "8"), asm, "",
3361                  [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
3362  def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3363                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3364                  opc, !strconcat(Dt, "16"), asm, "",
3365                  [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
3366  def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3367                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3368                  opc, !strconcat(Dt, "32"), asm, "",
3369                  [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
3370  def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3371                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3372                  opc, "f32", asm, "",
3373                  [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
3374    let Inst{10} = 1; // overwrite F = 1
3375  }
3376  def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3377                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3378                  opc, "f16", asm, "",
3379                  [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>,
3380              Requires<[HasNEON,HasFullFP16]> {
3381    let Inst{10} = 1; // overwrite F = 1
3382  }
3383}
3384
3385
3386// Neon 2-register vector intrinsics,
3387//   element sizes of 8, 16 and 32 bits:
3388multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3389                      bits<5> op11_7, bit op4,
3390                      InstrItinClass itinD, InstrItinClass itinQ,
3391                      string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3392  // 64-bit vector types.
3393  def v8i8  : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3394                      itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3395  def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3396                      itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
3397  def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3398                      itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
3399
3400  // 128-bit vector types.
3401  def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3402                      itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
3403  def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3404                      itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
3405  def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3406                      itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
3407}
3408
3409
3410// Neon Narrowing 2-register vector operations,
3411//   source operand element sizes of 16, 32 and 64 bits:
3412multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3413                    bits<5> op11_7, bit op6, bit op4,
3414                    InstrItinClass itin, string OpcodeStr, string Dt,
3415                    SDNode OpNode> {
3416  def v8i8  : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3417                   itin, OpcodeStr, !strconcat(Dt, "16"),
3418                   v8i8, v8i16, OpNode>;
3419  def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3420                   itin, OpcodeStr, !strconcat(Dt, "32"),
3421                   v4i16, v4i32, OpNode>;
3422  def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3423                   itin, OpcodeStr, !strconcat(Dt, "64"),
3424                   v2i32, v2i64, OpNode>;
3425}
3426
3427// Neon Narrowing 2-register vector intrinsics,
3428//   source operand element sizes of 16, 32 and 64 bits:
3429multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3430                       bits<5> op11_7, bit op6, bit op4,
3431                       InstrItinClass itin, string OpcodeStr, string Dt,
3432                       SDPatternOperator IntOp> {
3433  def v8i8  : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3434                      itin, OpcodeStr, !strconcat(Dt, "16"),
3435                      v8i8, v8i16, IntOp>;
3436  def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3437                      itin, OpcodeStr, !strconcat(Dt, "32"),
3438                      v4i16, v4i32, IntOp>;
3439  def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3440                      itin, OpcodeStr, !strconcat(Dt, "64"),
3441                      v2i32, v2i64, IntOp>;
3442}
3443
3444
3445// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
3446//   source operand element sizes of 16, 32 and 64 bits:
3447multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
3448                    string OpcodeStr, string Dt, SDNode OpNode> {
3449  def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3450                   OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
3451  def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3452                   OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3453  def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3454                   OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3455}
3456
3457
3458// Neon 3-register vector operations.
3459
3460// First with only element sizes of 8, 16 and 32 bits:
3461multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3462                   InstrItinClass itinD16, InstrItinClass itinD32,
3463                   InstrItinClass itinQ16, InstrItinClass itinQ32,
3464                   string OpcodeStr, string Dt,
3465                   SDNode OpNode, bit Commutable = 0> {
3466  // 64-bit vector types.
3467  def v8i8  : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
3468                   OpcodeStr, !strconcat(Dt, "8"),
3469                   v8i8, v8i8, OpNode, Commutable>;
3470  def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
3471                   OpcodeStr, !strconcat(Dt, "16"),
3472                   v4i16, v4i16, OpNode, Commutable>;
3473  def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
3474                   OpcodeStr, !strconcat(Dt, "32"),
3475                   v2i32, v2i32, OpNode, Commutable>;
3476
3477  // 128-bit vector types.
3478  def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
3479                   OpcodeStr, !strconcat(Dt, "8"),
3480                   v16i8, v16i8, OpNode, Commutable>;
3481  def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
3482                   OpcodeStr, !strconcat(Dt, "16"),
3483                   v8i16, v8i16, OpNode, Commutable>;
3484  def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
3485                   OpcodeStr, !strconcat(Dt, "32"),
3486                   v4i32, v4i32, OpNode, Commutable>;
3487}
3488
3489multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
3490  def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
3491  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
3492  def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
3493  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
3494                     v4i32, v2i32, ShOp>;
3495}
3496
3497// ....then also with element size 64 bits:
3498multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3499                    InstrItinClass itinD, InstrItinClass itinQ,
3500                    string OpcodeStr, string Dt,
3501                    SDNode OpNode, bit Commutable = 0>
3502  : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
3503            OpcodeStr, Dt, OpNode, Commutable> {
3504  def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
3505                   OpcodeStr, !strconcat(Dt, "64"),
3506                   v1i64, v1i64, OpNode, Commutable>;
3507  def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
3508                   OpcodeStr, !strconcat(Dt, "64"),
3509                   v2i64, v2i64, OpNode, Commutable>;
3510}
3511
3512
3513// Neon 3-register vector intrinsics.
3514
3515// First with only element sizes of 16 and 32 bits:
3516multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3517                     InstrItinClass itinD16, InstrItinClass itinD32,
3518                     InstrItinClass itinQ16, InstrItinClass itinQ32,
3519                     string OpcodeStr, string Dt,
3520                     SDPatternOperator IntOp, bit Commutable = 0> {
3521  // 64-bit vector types.
3522  def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
3523                      OpcodeStr, !strconcat(Dt, "16"),
3524                      v4i16, v4i16, IntOp, Commutable>;
3525  def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
3526                      OpcodeStr, !strconcat(Dt, "32"),
3527                      v2i32, v2i32, IntOp, Commutable>;
3528
3529  // 128-bit vector types.
3530  def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3531                      OpcodeStr, !strconcat(Dt, "16"),
3532                      v8i16, v8i16, IntOp, Commutable>;
3533  def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3534                      OpcodeStr, !strconcat(Dt, "32"),
3535                      v4i32, v4i32, IntOp, Commutable>;
3536}
3537multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3538                     InstrItinClass itinD16, InstrItinClass itinD32,
3539                     InstrItinClass itinQ16, InstrItinClass itinQ32,
3540                     string OpcodeStr, string Dt,
3541                     SDPatternOperator IntOp> {
3542  // 64-bit vector types.
3543  def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
3544                      OpcodeStr, !strconcat(Dt, "16"),
3545                      v4i16, v4i16, IntOp>;
3546  def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
3547                      OpcodeStr, !strconcat(Dt, "32"),
3548                      v2i32, v2i32, IntOp>;
3549
3550  // 128-bit vector types.
3551  def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3552                      OpcodeStr, !strconcat(Dt, "16"),
3553                      v8i16, v8i16, IntOp>;
3554  def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3555                      OpcodeStr, !strconcat(Dt, "32"),
3556                      v4i32, v4i32, IntOp>;
3557}
3558
3559multiclass N3VIntSL_HS<bits<4> op11_8,
3560                       InstrItinClass itinD16, InstrItinClass itinD32,
3561                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3562                       string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3563  def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
3564                          OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
3565  def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
3566                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
3567  def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
3568                          OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
3569  def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
3570                        OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
3571}
3572
3573// ....then also with element size of 8 bits:
3574multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3575                      InstrItinClass itinD16, InstrItinClass itinD32,
3576                      InstrItinClass itinQ16, InstrItinClass itinQ32,
3577                      string OpcodeStr, string Dt,
3578                      SDPatternOperator IntOp, bit Commutable = 0>
3579  : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3580              OpcodeStr, Dt, IntOp, Commutable> {
3581  def v8i8  : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
3582                      OpcodeStr, !strconcat(Dt, "8"),
3583                      v8i8, v8i8, IntOp, Commutable>;
3584  def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3585                      OpcodeStr, !strconcat(Dt, "8"),
3586                      v16i8, v16i8, IntOp, Commutable>;
3587}
3588multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3589                      InstrItinClass itinD16, InstrItinClass itinD32,
3590                      InstrItinClass itinQ16, InstrItinClass itinQ32,
3591                      string OpcodeStr, string Dt,
3592                      SDPatternOperator IntOp>
3593  : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3594              OpcodeStr, Dt, IntOp> {
3595  def v8i8  : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
3596                      OpcodeStr, !strconcat(Dt, "8"),
3597                      v8i8, v8i8, IntOp>;
3598  def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3599                      OpcodeStr, !strconcat(Dt, "8"),
3600                      v16i8, v16i8, IntOp>;
3601}
3602
3603
3604// ....then also with element size of 64 bits:
3605multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3606                       InstrItinClass itinD16, InstrItinClass itinD32,
3607                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3608                       string OpcodeStr, string Dt,
3609                       SDPatternOperator IntOp, bit Commutable = 0>
3610  : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3611               OpcodeStr, Dt, IntOp, Commutable> {
3612  def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
3613                      OpcodeStr, !strconcat(Dt, "64"),
3614                      v1i64, v1i64, IntOp, Commutable>;
3615  def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3616                      OpcodeStr, !strconcat(Dt, "64"),
3617                      v2i64, v2i64, IntOp, Commutable>;
3618}
3619multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3620                       InstrItinClass itinD16, InstrItinClass itinD32,
3621                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3622                       string OpcodeStr, string Dt,
3623                       SDPatternOperator IntOp>
3624  : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3625               OpcodeStr, Dt, IntOp> {
3626  def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
3627                      OpcodeStr, !strconcat(Dt, "64"),
3628                      v1i64, v1i64, IntOp>;
3629  def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3630                      OpcodeStr, !strconcat(Dt, "64"),
3631                      v2i64, v2i64, IntOp>;
3632}
3633
3634// Neon Narrowing 3-register vector intrinsics,
3635//   source operand element sizes of 16, 32 and 64 bits:
3636multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3637                       string OpcodeStr, string Dt,
3638                       SDPatternOperator IntOp, bit Commutable = 0> {
3639  def v8i8  : N3VNInt<op24, op23, 0b00, op11_8, op4,
3640                      OpcodeStr, !strconcat(Dt, "16"),
3641                      v8i8, v8i16, IntOp, Commutable>;
3642  def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
3643                      OpcodeStr, !strconcat(Dt, "32"),
3644                      v4i16, v4i32, IntOp, Commutable>;
3645  def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
3646                      OpcodeStr, !strconcat(Dt, "64"),
3647                      v2i32, v2i64, IntOp, Commutable>;
3648}
3649
3650
3651// Neon Long 3-register vector operations.
3652
3653multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3654                    InstrItinClass itin16, InstrItinClass itin32,
3655                    string OpcodeStr, string Dt,
3656                    SDNode OpNode, bit Commutable = 0> {
3657  def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
3658                   OpcodeStr, !strconcat(Dt, "8"),
3659                   v8i16, v8i8, OpNode, Commutable>;
3660  def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
3661                   OpcodeStr, !strconcat(Dt, "16"),
3662                   v4i32, v4i16, OpNode, Commutable>;
3663  def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
3664                   OpcodeStr, !strconcat(Dt, "32"),
3665                   v2i64, v2i32, OpNode, Commutable>;
3666}
3667
3668multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
3669                     InstrItinClass itin, string OpcodeStr, string Dt,
3670                     SDNode OpNode> {
3671  def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
3672                       !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3673  def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
3674                     !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3675}
3676
3677multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3678                       InstrItinClass itin16, InstrItinClass itin32,
3679                       string OpcodeStr, string Dt,
3680                       SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3681  def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
3682                      OpcodeStr, !strconcat(Dt, "8"),
3683                      v8i16, v8i8, OpNode, ExtOp, Commutable>;
3684  def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
3685                      OpcodeStr, !strconcat(Dt, "16"),
3686                      v4i32, v4i16, OpNode, ExtOp, Commutable>;
3687  def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
3688                      OpcodeStr, !strconcat(Dt, "32"),
3689                      v2i64, v2i32, OpNode, ExtOp, Commutable>;
3690}
3691
3692// Neon Long 3-register vector intrinsics.
3693
3694// First with only element sizes of 16 and 32 bits:
3695multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3696                      InstrItinClass itin16, InstrItinClass itin32,
3697                      string OpcodeStr, string Dt,
3698                      SDPatternOperator IntOp, bit Commutable = 0> {
3699  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
3700                      OpcodeStr, !strconcat(Dt, "16"),
3701                      v4i32, v4i16, IntOp, Commutable>;
3702  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
3703                      OpcodeStr, !strconcat(Dt, "32"),
3704                      v2i64, v2i32, IntOp, Commutable>;
3705}
3706
3707multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
3708                        InstrItinClass itin, string OpcodeStr, string Dt,
3709                        SDPatternOperator IntOp> {
3710  def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
3711                          OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3712  def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
3713                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3714}
3715
3716// ....then also with element size of 8 bits:
3717multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3718                       InstrItinClass itin16, InstrItinClass itin32,
3719                       string OpcodeStr, string Dt,
3720                       SDPatternOperator IntOp, bit Commutable = 0>
3721  : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
3722               IntOp, Commutable> {
3723  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
3724                      OpcodeStr, !strconcat(Dt, "8"),
3725                      v8i16, v8i8, IntOp, Commutable>;
3726}
3727
3728// ....with explicit extend (VABDL).
3729multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3730                       InstrItinClass itin, string OpcodeStr, string Dt,
3731                       SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
3732  def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
3733                         OpcodeStr, !strconcat(Dt, "8"),
3734                         v8i16, v8i8, IntOp, ExtOp, Commutable>;
3735  def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
3736                         OpcodeStr, !strconcat(Dt, "16"),
3737                         v4i32, v4i16, IntOp, ExtOp, Commutable>;
3738  def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
3739                         OpcodeStr, !strconcat(Dt, "32"),
3740                         v2i64, v2i32, IntOp, ExtOp, Commutable>;
3741}
3742
3743
3744// Neon Wide 3-register vector intrinsics,
3745//   source operand element sizes of 8, 16 and 32 bits:
3746multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3747                    string OpcodeStr, string Dt,
3748                    SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3749  def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
3750                   OpcodeStr, !strconcat(Dt, "8"),
3751                   v8i16, v8i8, OpNode, ExtOp, Commutable>;
3752  def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
3753                   OpcodeStr, !strconcat(Dt, "16"),
3754                   v4i32, v4i16, OpNode, ExtOp, Commutable>;
3755  def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
3756                   OpcodeStr, !strconcat(Dt, "32"),
3757                   v2i64, v2i32, OpNode, ExtOp, Commutable>;
3758}
3759
3760
3761// Neon Multiply-Op vector operations,
3762//   element sizes of 8, 16 and 32 bits:
3763multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3764                        InstrItinClass itinD16, InstrItinClass itinD32,
3765                        InstrItinClass itinQ16, InstrItinClass itinQ32,
3766                        string OpcodeStr, string Dt, SDNode OpNode> {
3767  // 64-bit vector types.
3768  def v8i8  : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
3769                        OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
3770  def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
3771                        OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
3772  def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
3773                        OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
3774
3775  // 128-bit vector types.
3776  def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
3777                        OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
3778  def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
3779                        OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
3780  def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
3781                        OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
3782}
3783
3784multiclass N3VMulOpSL_HS<bits<4> op11_8,
3785                         InstrItinClass itinD16, InstrItinClass itinD32,
3786                         InstrItinClass itinQ16, InstrItinClass itinQ32,
3787                         string OpcodeStr, string Dt, SDPatternOperator ShOp> {
3788  def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
3789                            OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
3790  def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
3791                          OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
3792  def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
3793                            OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
3794                            mul, ShOp>;
3795  def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
3796                          OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
3797                          mul, ShOp>;
3798}
3799
3800// Neon Intrinsic-Op vector operations,
3801//   element sizes of 8, 16 and 32 bits:
3802multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3803                        InstrItinClass itinD, InstrItinClass itinQ,
3804                        string OpcodeStr, string Dt, SDPatternOperator IntOp,
3805                        SDNode OpNode> {
3806  // 64-bit vector types.
3807  def v8i8  : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
3808                        OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
3809  def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
3810                        OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
3811  def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
3812                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
3813
3814  // 128-bit vector types.
3815  def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
3816                        OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
3817  def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
3818                        OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
3819  def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
3820                        OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
3821}
3822
3823// Neon 3-argument intrinsics,
3824//   element sizes of 16 and 32 bits:
3825multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3826                       InstrItinClass itinD16, InstrItinClass itinD32,
3827                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3828                       string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3829  // 64-bit vector types.
3830  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
3831                       OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
3832  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
3833                       OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
3834
3835  // 128-bit vector types.
3836  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
3837                       OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
3838  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
3839                       OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
3840}
3841
3842//   element sizes of 8, 16 and 32 bits:
3843multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3844                       InstrItinClass itinD16, InstrItinClass itinD32,
3845                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3846                       string OpcodeStr, string Dt, SDPatternOperator IntOp>
3847           :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
3848                        itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
3849  // 64-bit vector types.
3850  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
3851                       OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3852  // 128-bit vector types.
3853  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
3854                       OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
3855}
3856
3857// Neon Long Multiply-Op vector operations,
3858//   element sizes of 8, 16 and 32 bits:
3859multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3860                         InstrItinClass itin16, InstrItinClass itin32,
3861                         string OpcodeStr, string Dt, SDNode MulOp,
3862                         SDNode OpNode> {
3863  def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
3864                        !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
3865  def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
3866                        !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
3867  def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
3868                        !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3869}
3870
3871multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
3872                          string Dt, SDNode MulOp, SDNode OpNode> {
3873  def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
3874                            !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
3875  def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
3876                          !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3877}
3878
3879
3880// Neon Long 3-argument intrinsics.
3881
3882// First with only element sizes of 16 and 32 bits:
3883multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3884                       InstrItinClass itin16, InstrItinClass itin32,
3885                       string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3886  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
3887                       OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3888  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
3889                       OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3890}
3891
3892multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
3893                         string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3894  def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
3895                           OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
3896  def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
3897                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3898}
3899
3900// ....then also with element size of 8 bits:
3901multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3902                        InstrItinClass itin16, InstrItinClass itin32,
3903                        string OpcodeStr, string Dt, SDPatternOperator IntOp>
3904  : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
3905  def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
3906                       OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
3907}
3908
3909// ....with explicit extend (VABAL).
3910multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3911                            InstrItinClass itin, string OpcodeStr, string Dt,
3912                            SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
3913  def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
3914                           OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
3915                           IntOp, ExtOp, OpNode>;
3916  def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
3917                           OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
3918                           IntOp, ExtOp, OpNode>;
3919  def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
3920                           OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
3921                           IntOp, ExtOp, OpNode>;
3922}
3923
3924
3925// Neon Pairwise long 2-register intrinsics,
3926//   element sizes of 8, 16 and 32 bits:
3927multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3928                        bits<5> op11_7, bit op4,
3929                        string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3930  // 64-bit vector types.
3931  def v8i8  : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3932                        OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3933  def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3934                        OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3935  def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3936                        OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3937
3938  // 128-bit vector types.
3939  def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3940                        OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3941  def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3942                        OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3943  def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3944                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3945}
3946
3947
3948// Neon Pairwise long 2-register accumulate intrinsics,
3949//   element sizes of 8, 16 and 32 bits:
3950multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3951                         bits<5> op11_7, bit op4,
3952                         string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3953  // 64-bit vector types.
3954  def v8i8  : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3955                         OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3956  def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3957                         OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3958  def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3959                         OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3960
3961  // 128-bit vector types.
3962  def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3963                         OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3964  def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3965                         OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3966  def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3967                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3968}
3969
3970
3971// Neon 2-register vector shift by immediate,
3972//   with f of either N2RegVShLFrm or N2RegVShRFrm
3973//   element sizes of 8, 16, 32 and 64 bits:
3974multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3975                       InstrItinClass itin, string OpcodeStr, string Dt,
3976                       SDNode OpNode> {
3977  // 64-bit vector types.
3978  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3979                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3980    let Inst{21-19} = 0b001; // imm6 = 001xxx
3981  }
3982  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3983                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3984    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3985  }
3986  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3987                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3988    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3989  }
3990  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3991                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3992                             // imm6 = xxxxxx
3993
3994  // 128-bit vector types.
3995  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3996                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3997    let Inst{21-19} = 0b001; // imm6 = 001xxx
3998  }
3999  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
4000                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
4001    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4002  }
4003  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
4004                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
4005    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4006  }
4007  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
4008                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
4009                             // imm6 = xxxxxx
4010}
4011multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4012                       InstrItinClass itin, string OpcodeStr, string Dt,
4013                       string baseOpc, SDNode OpNode> {
4014  // 64-bit vector types.
4015  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
4016                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
4017    let Inst{21-19} = 0b001; // imm6 = 001xxx
4018  }
4019  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
4020                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
4021    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4022  }
4023  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
4024                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
4025    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4026  }
4027  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
4028                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
4029                             // imm6 = xxxxxx
4030
4031  // 128-bit vector types.
4032  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
4033                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
4034    let Inst{21-19} = 0b001; // imm6 = 001xxx
4035  }
4036  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
4037                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
4038    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4039  }
4040  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
4041                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
4042    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4043  }
4044  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
4045                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
4046                             // imm6 = xxxxxx
4047}
4048
4049// Neon Shift-Accumulate vector operations,
4050//   element sizes of 8, 16, 32 and 64 bits:
4051multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4052                         string OpcodeStr, string Dt, SDNode ShOp> {
4053  // 64-bit vector types.
4054  def v8i8  : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
4055                        OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
4056    let Inst{21-19} = 0b001; // imm6 = 001xxx
4057  }
4058  def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
4059                        OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
4060    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4061  }
4062  def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
4063                        OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
4064    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4065  }
4066  def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
4067                        OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
4068                             // imm6 = xxxxxx
4069
4070  // 128-bit vector types.
4071  def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
4072                        OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
4073    let Inst{21-19} = 0b001; // imm6 = 001xxx
4074  }
4075  def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
4076                        OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
4077    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4078  }
4079  def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
4080                        OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
4081    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4082  }
4083  def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
4084                        OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
4085                             // imm6 = xxxxxx
4086}
4087
4088// Neon Shift-Insert vector operations,
4089//   with f of either N2RegVShLFrm or N2RegVShRFrm
4090//   element sizes of 8, 16, 32 and 64 bits:
4091multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4092                          string OpcodeStr> {
4093  // 64-bit vector types.
4094  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4095                        N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> {
4096    let Inst{21-19} = 0b001; // imm6 = 001xxx
4097  }
4098  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4099                        N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> {
4100    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4101  }
4102  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4103                        N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> {
4104    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4105  }
4106  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
4107                        N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>;
4108                             // imm6 = xxxxxx
4109
4110  // 128-bit vector types.
4111  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4112                        N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> {
4113    let Inst{21-19} = 0b001; // imm6 = 001xxx
4114  }
4115  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4116                        N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> {
4117    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4118  }
4119  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4120                        N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> {
4121    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4122  }
4123  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
4124                        N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>;
4125                             // imm6 = xxxxxx
4126}
4127multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4128                          string OpcodeStr> {
4129  // 64-bit vector types.
4130  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4131                        N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> {
4132    let Inst{21-19} = 0b001; // imm6 = 001xxx
4133  }
4134  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4135                        N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> {
4136    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4137  }
4138  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4139                        N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> {
4140    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4141  }
4142  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4143                        N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>;
4144                             // imm6 = xxxxxx
4145
4146  // 128-bit vector types.
4147  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4148                        N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> {
4149    let Inst{21-19} = 0b001; // imm6 = 001xxx
4150  }
4151  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4152                        N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> {
4153    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4154  }
4155  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4156                        N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> {
4157    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4158  }
4159  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4160                        N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>;
4161                             // imm6 = xxxxxx
4162}
4163
4164// Neon Shift Long operations,
4165//   element sizes of 8, 16, 32 bits:
4166multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4167                      bit op4, string OpcodeStr, string Dt,
4168                      SDPatternOperator OpNode> {
4169  def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4170              OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
4171    let Inst{21-19} = 0b001; // imm6 = 001xxx
4172  }
4173  def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4174               OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
4175    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4176  }
4177  def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4178               OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
4179    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4180  }
4181}
4182
4183// Neon Shift Narrow operations,
4184//   element sizes of 16, 32, 64 bits:
4185multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4186                      bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
4187                      SDPatternOperator OpNode> {
4188  def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4189                    OpcodeStr, !strconcat(Dt, "16"),
4190                    v8i8, v8i16, shr_imm8, OpNode> {
4191    let Inst{21-19} = 0b001; // imm6 = 001xxx
4192  }
4193  def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4194                     OpcodeStr, !strconcat(Dt, "32"),
4195                     v4i16, v4i32, shr_imm16, OpNode> {
4196    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4197  }
4198  def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4199                     OpcodeStr, !strconcat(Dt, "64"),
4200                     v2i32, v2i64, shr_imm32, OpNode> {
4201    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4202  }
4203}
4204
4205//===----------------------------------------------------------------------===//
4206// Instruction Definitions.
4207//===----------------------------------------------------------------------===//
4208
4209// Vector Add Operations.
4210
4211//   VADD     : Vector Add (integer and floating-point)
4212defm VADD     : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
4213                         add, 1>;
4214def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
4215                     v2f32, v2f32, fadd, 1>;
4216def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
4217                     v4f32, v4f32, fadd, 1>;
4218def  VADDhd   : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
4219                     v4f16, v4f16, fadd, 1>,
4220                Requires<[HasNEON,HasFullFP16]>;
4221def  VADDhq   : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
4222                     v8f16, v8f16, fadd, 1>,
4223                Requires<[HasNEON,HasFullFP16]>;
4224//   VADDL    : Vector Add Long (Q = D + D)
4225defm VADDLs   : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4226                            "vaddl", "s", add, sext, 1>;
4227defm VADDLu   : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4228                            "vaddl", "u", add, zext, 1>;
4229//   VADDW    : Vector Add Wide (Q = Q + D)
4230defm VADDWs   : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
4231defm VADDWu   : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
4232//   VHADD    : Vector Halving Add
4233defm VHADDs   : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
4234                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4235                           "vhadd", "s", int_arm_neon_vhadds, 1>;
4236defm VHADDu   : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
4237                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4238                           "vhadd", "u", int_arm_neon_vhaddu, 1>;
4239//   VRHADD   : Vector Rounding Halving Add
4240defm VRHADDs  : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
4241                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4242                           "vrhadd", "s", int_arm_neon_vrhadds, 1>;
4243defm VRHADDu  : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
4244                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4245                           "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
4246//   VQADD    : Vector Saturating Add
4247defm VQADDs   : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
4248                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4249                            "vqadd", "s", int_arm_neon_vqadds, 1>;
4250defm VQADDu   : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
4251                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4252                            "vqadd", "u", int_arm_neon_vqaddu, 1>;
4253//   VADDHN   : Vector Add and Narrow Returning High Half (D = Q + Q)
4254defm VADDHN   : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
4255//   VRADDHN  : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
4256defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
4257                            int_arm_neon_vraddhn, 1>;
4258
4259let Predicates = [HasNEON] in {
4260def : Pat<(v8i8  (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
4261          (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
4262def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
4263          (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
4264def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
4265          (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
4266}
4267
4268// Vector Multiply Operations.
4269
4270//   VMUL     : Vector Multiply (integer, polynomial and floating-point)
4271defm VMUL     : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
4272                        IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
4273def  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
4274                        "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
4275def  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
4276                        "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
4277def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
4278                     v2f32, v2f32, fmul, 1>;
4279def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
4280                     v4f32, v4f32, fmul, 1>;
4281def  VMULhd   : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
4282                     v4f16, v4f16, fmul, 1>,
4283                Requires<[HasNEON,HasFullFP16]>;
4284def  VMULhq   : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
4285                     v8f16, v8f16, fmul, 1>,
4286                Requires<[HasNEON,HasFullFP16]>;
4287defm VMULsl   : N3VSL_HS<0b1000, "vmul", mul>;
4288def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
4289def  VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
4290                       v2f32, fmul>;
4291def  VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
4292                Requires<[HasNEON,HasFullFP16]>;
4293def  VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
4294                       v4f16, fmul>,
4295                Requires<[HasNEON,HasFullFP16]>;
4296
4297let Predicates = [HasNEON] in {
4298def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
4299                      (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))),
4300          (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
4301                              (v4i16 (EXTRACT_SUBREG QPR:$src2,
4302                                      (DSubReg_i16_reg imm:$lane))),
4303                              (SubReg_i16_lane imm:$lane)))>;
4304def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
4305                      (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))),
4306          (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
4307                              (v2i32 (EXTRACT_SUBREG QPR:$src2,
4308                                      (DSubReg_i32_reg imm:$lane))),
4309                              (SubReg_i32_lane imm:$lane)))>;
4310def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
4311                       (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))),
4312          (v4f32 (VMULslfq (v4f32 QPR:$src1),
4313                           (v2f32 (EXTRACT_SUBREG QPR:$src2,
4314                                   (DSubReg_i32_reg imm:$lane))),
4315                           (SubReg_i32_lane imm:$lane)))>;
4316def : Pat<(v8f16 (fmul (v8f16 QPR:$src1),
4317                       (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))),
4318          (v8f16 (VMULslhq(v8f16 QPR:$src1),
4319                           (v4f16 (EXTRACT_SUBREG QPR:$src2,
4320                                   (DSubReg_i16_reg imm:$lane))),
4321                           (SubReg_i16_lane imm:$lane)))>;
4322
4323def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
4324          (VMULslfd DPR:$Rn,
4325            (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4326            (i32 0))>;
4327def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
4328          (VMULslhd DPR:$Rn,
4329            (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
4330            (i32 0))>;
4331def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
4332          (VMULslfq QPR:$Rn,
4333            (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4334            (i32 0))>;
4335def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
4336          (VMULslhq QPR:$Rn,
4337            (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
4338            (i32 0))>;
4339}
4340
4341//   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
4342defm VQDMULH  : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
4343                          IIC_VMULi16Q, IIC_VMULi32Q,
4344                          "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
4345defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
4346                            IIC_VMULi16Q, IIC_VMULi32Q,
4347                            "vqdmulh", "s",  int_arm_neon_vqdmulh>;
4348
4349let Predicates = [HasNEON] in {
4350def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
4351                                       (v8i16 (ARMvduplane (v8i16 QPR:$src2),
4352                                                            imm:$lane)))),
4353          (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
4354                                 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4355                                         (DSubReg_i16_reg imm:$lane))),
4356                                 (SubReg_i16_lane imm:$lane)))>;
4357def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
4358                                       (v4i32 (ARMvduplane (v4i32 QPR:$src2),
4359                                                            imm:$lane)))),
4360          (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
4361                                 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4362                                         (DSubReg_i32_reg imm:$lane))),
4363                                 (SubReg_i32_lane imm:$lane)))>;
4364}
4365
4366//   VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
4367defm VQRDMULH   : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
4368                            IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
4369                            "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
4370defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
4371                              IIC_VMULi16Q, IIC_VMULi32Q,
4372                              "vqrdmulh", "s",  int_arm_neon_vqrdmulh>;
4373
4374let Predicates = [HasNEON] in {
4375def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
4376                                        (v8i16 (ARMvduplane (v8i16 QPR:$src2),
4377                                                             imm:$lane)))),
4378          (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
4379                                  (v4i16 (EXTRACT_SUBREG QPR:$src2,
4380                                          (DSubReg_i16_reg imm:$lane))),
4381                                  (SubReg_i16_lane imm:$lane)))>;
4382def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
4383                                        (v4i32 (ARMvduplane (v4i32 QPR:$src2),
4384                                                             imm:$lane)))),
4385          (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
4386                                  (v2i32 (EXTRACT_SUBREG QPR:$src2,
4387                                          (DSubReg_i32_reg imm:$lane))),
4388                                  (SubReg_i32_lane imm:$lane)))>;
4389}
4390
4391//   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
4392let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
4393    DecoderNamespace = "NEONData" in {
4394  defm VMULLs   : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4395                           "vmull", "s", NEONvmulls, 1>;
4396  defm VMULLu   : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4397                           "vmull", "u", NEONvmullu, 1>;
4398  def  VMULLp8   :  N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
4399                            v8i16, v8i8, int_arm_neon_vmullp, 1>;
4400  def  VMULLp64  : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
4401                          "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
4402                    Requires<[HasV8, HasCrypto]>;
4403}
4404defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
4405defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
4406
4407//   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
4408defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
4409                           "vqdmull", "s", int_arm_neon_vqdmull, 1>;
4410defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
4411                             "vqdmull", "s", int_arm_neon_vqdmull>;
4412
4413// Vector Multiply-Accumulate and Multiply-Subtract Operations.
4414
4415//   VMLA     : Vector Multiply Accumulate (integer and floating-point)
4416defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4417                             IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4418def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
4419                          v2f32, fmul_su, fadd_mlx>,
4420                Requires<[HasNEON, UseFPVMLx]>;
4421def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
4422                          v4f32, fmul_su, fadd_mlx>,
4423                Requires<[HasNEON, UseFPVMLx]>;
4424def  VMLAhd   : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
4425                          v4f16, fmul_su, fadd_mlx>,
4426                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4427def  VMLAhq   : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
4428                          v8f16, fmul_su, fadd_mlx>,
4429                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4430defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
4431                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4432def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
4433                            v2f32, fmul_su, fadd_mlx>,
4434                Requires<[HasNEON, UseFPVMLx]>;
4435def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
4436                            v4f32, v2f32, fmul_su, fadd_mlx>,
4437                Requires<[HasNEON, UseFPVMLx]>;
4438def  VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
4439                            v4f16, fmul, fadd>,
4440                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4441def  VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
4442                            v8f16, v4f16, fmul, fadd>,
4443                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4444
4445let Predicates = [HasNEON] in {
4446def : Pat<(v8i16 (add (v8i16 QPR:$src1),
4447                  (mul (v8i16 QPR:$src2),
4448                       (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
4449          (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4450                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
4451                                      (DSubReg_i16_reg imm:$lane))),
4452                              (SubReg_i16_lane imm:$lane)))>;
4453
4454def : Pat<(v4i32 (add (v4i32 QPR:$src1),
4455                  (mul (v4i32 QPR:$src2),
4456                       (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
4457          (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4458                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
4459                                      (DSubReg_i32_reg imm:$lane))),
4460                              (SubReg_i32_lane imm:$lane)))>;
4461}
4462
4463def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
4464                  (fmul_su (v4f32 QPR:$src2),
4465                        (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
4466          (v4f32 (VMLAslfq (v4f32 QPR:$src1),
4467                           (v4f32 QPR:$src2),
4468                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
4469                                   (DSubReg_i32_reg imm:$lane))),
4470                           (SubReg_i32_lane imm:$lane)))>,
4471          Requires<[HasNEON, UseFPVMLx]>;
4472
4473//   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
4474defm VMLALs   : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4475                              "vmlal", "s", NEONvmulls, add>;
4476defm VMLALu   : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4477                              "vmlal", "u", NEONvmullu, add>;
4478
4479defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
4480defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
4481
4482let Predicates = [HasNEON, HasV8_1a] in {
4483  // v8.1a Neon Rounding Double Multiply-Op vector operations,
4484  // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
4485  //            (Q += D * D)
4486  defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
4487                             IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4488                             null_frag>;
4489  def : Pat<(v4i16 (int_arm_neon_vqadds
4490                     (v4i16 DPR:$src1),
4491                     (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4492                                                   (v4i16 DPR:$Vm))))),
4493            (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4494  def : Pat<(v2i32 (int_arm_neon_vqadds
4495                     (v2i32 DPR:$src1),
4496                     (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4497                                                   (v2i32 DPR:$Vm))))),
4498            (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4499  def : Pat<(v8i16 (int_arm_neon_vqadds
4500                     (v8i16 QPR:$src1),
4501                     (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4502                                                   (v8i16 QPR:$Vm))))),
4503            (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4504  def : Pat<(v4i32 (int_arm_neon_vqadds
4505                     (v4i32 QPR:$src1),
4506                     (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4507                                                   (v4i32 QPR:$Vm))))),
4508            (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4509
4510  defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
4511                                  IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4512                                  null_frag>;
4513  def : Pat<(v4i16 (int_arm_neon_vqadds
4514                     (v4i16 DPR:$src1),
4515                     (v4i16 (int_arm_neon_vqrdmulh
4516                              (v4i16 DPR:$Vn),
4517                              (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4518                                                   imm:$lane)))))),
4519            (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
4520                                    imm:$lane))>;
4521  def : Pat<(v2i32 (int_arm_neon_vqadds
4522                     (v2i32 DPR:$src1),
4523                     (v2i32 (int_arm_neon_vqrdmulh
4524                              (v2i32 DPR:$Vn),
4525                              (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4526                                                   imm:$lane)))))),
4527            (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4528                                    imm:$lane))>;
4529  def : Pat<(v8i16 (int_arm_neon_vqadds
4530                     (v8i16 QPR:$src1),
4531                     (v8i16 (int_arm_neon_vqrdmulh
4532                              (v8i16 QPR:$src2),
4533                              (v8i16 (ARMvduplane (v8i16 QPR:$src3),
4534                                                   imm:$lane)))))),
4535            (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
4536                                    (v8i16 QPR:$src2),
4537                                    (v4i16 (EXTRACT_SUBREG
4538                                             QPR:$src3,
4539                                             (DSubReg_i16_reg imm:$lane))),
4540                                    (SubReg_i16_lane imm:$lane)))>;
4541  def : Pat<(v4i32 (int_arm_neon_vqadds
4542                     (v4i32 QPR:$src1),
4543                     (v4i32 (int_arm_neon_vqrdmulh
4544                              (v4i32 QPR:$src2),
4545                              (v4i32 (ARMvduplane (v4i32 QPR:$src3),
4546                                                   imm:$lane)))))),
4547            (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
4548                                    (v4i32 QPR:$src2),
4549                                    (v2i32 (EXTRACT_SUBREG
4550                                             QPR:$src3,
4551                                             (DSubReg_i32_reg imm:$lane))),
4552                                    (SubReg_i32_lane imm:$lane)))>;
4553
4554  //   VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
4555  //              (Q -= D * D)
4556  defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
4557                             IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4558                             null_frag>;
4559  def : Pat<(v4i16 (int_arm_neon_vqsubs
4560                     (v4i16 DPR:$src1),
4561                     (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4562                                                   (v4i16 DPR:$Vm))))),
4563            (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4564  def : Pat<(v2i32 (int_arm_neon_vqsubs
4565                     (v2i32 DPR:$src1),
4566                     (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4567                                                   (v2i32 DPR:$Vm))))),
4568            (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4569  def : Pat<(v8i16 (int_arm_neon_vqsubs
4570                     (v8i16 QPR:$src1),
4571                     (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4572                                                   (v8i16 QPR:$Vm))))),
4573            (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4574  def : Pat<(v4i32 (int_arm_neon_vqsubs
4575                     (v4i32 QPR:$src1),
4576                     (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4577                                                   (v4i32 QPR:$Vm))))),
4578            (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4579
4580  defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
4581                                  IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4582                                  null_frag>;
4583  def : Pat<(v4i16 (int_arm_neon_vqsubs
4584                     (v4i16 DPR:$src1),
4585                     (v4i16 (int_arm_neon_vqrdmulh
4586                              (v4i16 DPR:$Vn),
4587                              (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4588                                                   imm:$lane)))))),
4589            (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
4590  def : Pat<(v2i32 (int_arm_neon_vqsubs
4591                     (v2i32 DPR:$src1),
4592                     (v2i32 (int_arm_neon_vqrdmulh
4593                              (v2i32 DPR:$Vn),
4594                              (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4595                                                   imm:$lane)))))),
4596            (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4597                                    imm:$lane))>;
4598  def : Pat<(v8i16 (int_arm_neon_vqsubs
4599                     (v8i16 QPR:$src1),
4600                     (v8i16 (int_arm_neon_vqrdmulh
4601                              (v8i16 QPR:$src2),
4602                              (v8i16 (ARMvduplane (v8i16 QPR:$src3),
4603                                                   imm:$lane)))))),
4604            (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
4605                                    (v8i16 QPR:$src2),
4606                                    (v4i16 (EXTRACT_SUBREG
4607                                             QPR:$src3,
4608                                             (DSubReg_i16_reg imm:$lane))),
4609                                    (SubReg_i16_lane imm:$lane)))>;
4610  def : Pat<(v4i32 (int_arm_neon_vqsubs
4611                     (v4i32 QPR:$src1),
4612                     (v4i32 (int_arm_neon_vqrdmulh
4613                              (v4i32 QPR:$src2),
4614                              (v4i32 (ARMvduplane (v4i32 QPR:$src3),
4615                                                    imm:$lane)))))),
4616            (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
4617                                    (v4i32 QPR:$src2),
4618                                    (v2i32 (EXTRACT_SUBREG
4619                                             QPR:$src3,
4620                                             (DSubReg_i32_reg imm:$lane))),
4621                                    (SubReg_i32_lane imm:$lane)))>;
4622}
4623//   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
4624defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4625                            "vqdmlal", "s", null_frag>;
4626defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
4627
4628let Predicates = [HasNEON] in {
4629def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
4630                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4631                                                  (v4i16 DPR:$Vm))))),
4632          (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4633def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
4634                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4635                                                  (v2i32 DPR:$Vm))))),
4636          (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4637def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
4638                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4639                                (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4640                                                     imm:$lane)))))),
4641          (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4642def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
4643                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4644                                (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4645                                                     imm:$lane)))))),
4646          (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4647}
4648
4649//   VMLS     : Vector Multiply Subtract (integer and floating-point)
4650defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4651                             IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4652def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
4653                          v2f32, fmul_su, fsub_mlx>,
4654                Requires<[HasNEON, UseFPVMLx]>;
4655def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
4656                          v4f32, fmul_su, fsub_mlx>,
4657                Requires<[HasNEON, UseFPVMLx]>;
4658def  VMLShd   : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
4659                          v4f16, fmul, fsub>,
4660                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4661def  VMLShq   : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
4662                          v8f16, fmul, fsub>,
4663                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4664defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
4665                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4666def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
4667                            v2f32, fmul_su, fsub_mlx>,
4668                Requires<[HasNEON, UseFPVMLx]>;
4669def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
4670                            v4f32, v2f32, fmul_su, fsub_mlx>,
4671                Requires<[HasNEON, UseFPVMLx]>;
4672def  VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
4673                            v4f16, fmul, fsub>,
4674                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4675def  VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
4676                            v8f16, v4f16, fmul, fsub>,
4677                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4678
4679let Predicates = [HasNEON] in {
4680def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
4681                  (mul (v8i16 QPR:$src2),
4682                       (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
4683          (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4684                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
4685                                      (DSubReg_i16_reg imm:$lane))),
4686                              (SubReg_i16_lane imm:$lane)))>;
4687
4688def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
4689                  (mul (v4i32 QPR:$src2),
4690                     (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
4691          (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4692                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
4693                                      (DSubReg_i32_reg imm:$lane))),
4694                              (SubReg_i32_lane imm:$lane)))>;
4695}
4696
4697def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
4698                  (fmul_su (v4f32 QPR:$src2),
4699                        (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
4700          (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
4701                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
4702                                   (DSubReg_i32_reg imm:$lane))),
4703                           (SubReg_i32_lane imm:$lane)))>,
4704          Requires<[HasNEON, UseFPVMLx]>;
4705
4706//   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
4707defm VMLSLs   : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4708                              "vmlsl", "s", NEONvmulls, sub>;
4709defm VMLSLu   : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4710                              "vmlsl", "u", NEONvmullu, sub>;
4711
4712defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
4713defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
4714
4715//   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
4716defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
4717                            "vqdmlsl", "s", null_frag>;
4718defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
4719
4720let Predicates = [HasNEON] in {
4721def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
4722                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4723                                                  (v4i16 DPR:$Vm))))),
4724          (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4725def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
4726                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4727                                                  (v2i32 DPR:$Vm))))),
4728          (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4729def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
4730                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4731                                (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4732                                                     imm:$lane)))))),
4733          (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4734def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
4735                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4736                                (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4737                                                     imm:$lane)))))),
4738          (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4739}
4740
4741// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
4742def  VFMAfd   : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
4743                          v2f32, fmul_su, fadd_mlx>,
4744                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4745
4746def  VFMAfq   : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
4747                          v4f32, fmul_su, fadd_mlx>,
4748                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4749def  VFMAhd   : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
4750                          v4f16, fmul, fadd>,
4751                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4752
4753def  VFMAhq   : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
4754                          v8f16, fmul, fadd>,
4755                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4756
4757//   Fused Vector Multiply Subtract (floating-point)
4758def  VFMSfd   : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
4759                          v2f32, fmul_su, fsub_mlx>,
4760                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4761def  VFMSfq   : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
4762                          v4f32, fmul_su, fsub_mlx>,
4763                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4764def  VFMShd   : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
4765                          v4f16, fmul, fsub>,
4766                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4767def  VFMShq   : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
4768                          v8f16, fmul, fsub>,
4769                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4770
4771// Match @llvm.fma.* intrinsics
4772def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
4773          (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4774          Requires<[HasNEON,HasFullFP16]>;
4775def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
4776          (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4777          Requires<[HasNEON,HasFullFP16]>;
4778def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
4779          (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4780          Requires<[HasNEON,HasVFP4]>;
4781def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
4782          (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4783          Requires<[HasNEON,HasVFP4]>;
4784def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
4785          (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4786      Requires<[HasNEON,HasVFP4]>;
4787def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
4788          (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4789      Requires<[HasNEON,HasVFP4]>;
4790
4791// ARMv8.2a dot product instructions.
4792// We put them in the VFPV8 decoder namespace because the ARM and Thumb
4793// encodings are the same and thus no further bit twiddling is necessary
4794// in the disassembler.
4795class VDOT<bit op6, bit op4, RegisterClass RegTy, string Asm, string AsmTy,
4796           ValueType AccumTy, ValueType InputTy,
4797           SDPatternOperator OpNode> :
4798      N3Vnp<0b11000, 0b10, 0b1101, op6, op4, (outs RegTy:$dst),
4799            (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD,
4800            Asm, AsmTy,
4801            [(set (AccumTy RegTy:$dst),
4802                  (OpNode (AccumTy RegTy:$Vd),
4803                          (InputTy RegTy:$Vn),
4804                          (InputTy RegTy:$Vm)))]> {
4805  let Predicates = [HasDotProd];
4806  let DecoderNamespace = "VFPV8";
4807  let Constraints = "$dst = $Vd";
4808}
4809
4810def VUDOTD : VDOT<0, 1, DPR, "vudot", "u8", v2i32, v8i8,  int_arm_neon_udot>;
4811def VSDOTD : VDOT<0, 0, DPR, "vsdot", "s8", v2i32, v8i8,  int_arm_neon_sdot>;
4812def VUDOTQ : VDOT<1, 1, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>;
4813def VSDOTQ : VDOT<1, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>;
4814
4815// Indexed dot product instructions:
4816multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty,
4817           ValueType AccumType, ValueType InputType, SDPatternOperator OpNode,
4818           dag RHS> {
4819  def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst),
4820                 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
4821                 N3RegFrm, IIC_VDOTPROD, opc, dt, []> {
4822    bit lane;
4823    let Inst{5} = lane;
4824    let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane");
4825    let Constraints = "$dst = $Vd";
4826    let Predicates = [HasDotProd];
4827    let DecoderNamespace = "VFPV8";
4828  }
4829
4830  def : Pat<
4831    (AccumType (OpNode (AccumType Ty:$Vd),
4832                       (InputType Ty:$Vn),
4833                       (InputType (bitconvert (AccumType
4834                                  (ARMvduplane (AccumType Ty:$Vm),
4835                                                 VectorIndex32:$lane)))))),
4836    (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>;
4837}
4838
4839defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8,
4840                    int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>;
4841defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8,
4842                    int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>;
4843defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8,
4844                    int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4845defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8,
4846                    int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4847
4848
4849// ARMv8.3 complex operations
4850class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q,
4851                            InstrItinClass itin, dag oops, dag iops,
4852                            string opc, string dt, list<dag> pattern>
4853  : N3VCP8<{?,?}, {op21,s}, q, op4, oops,
4854           iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{
4855  bits<2> rot;
4856  let Inst{24-23} = rot;
4857}
4858
4859class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q,
4860                           InstrItinClass itin, dag oops, dag iops, string opc,
4861                            string dt, list<dag> pattern>
4862  : N3VCP8<{?,op23}, {op21,s}, q, op4, oops,
4863           iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> {
4864  bits<1> rot;
4865  let Inst{24} = rot;
4866}
4867
4868class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin,
4869                                  dag oops, dag iops, string opc, string dt,
4870                                  list<dag> pattern>
4871  : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
4872               "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4873  bits<2> rot;
4874  bit lane;
4875
4876  let Inst{21-20} = rot;
4877  let Inst{5} = lane;
4878}
4879
4880class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin,
4881                            dag oops, dag iops, string opc, string dt,
4882                            list<dag> pattern>
4883  : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
4884               "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4885  bits<2> rot;
4886  bit lane;
4887
4888  let Inst{21-20} = rot;
4889  let Inst{5} = Vm{4};
4890  // This is needed because the lane operand does not have any bits in the
4891  // encoding (it only has one possible value), so we need to manually set it
4892  // to it's default value.
4893  let DecoderMethod = "DecodeNEONComplexLane64Instruction";
4894}
4895
4896multiclass N3VCP8ComplexTied<bit op21, bit op4,
4897                       string OpcodeStr, SDPatternOperator Op> {
4898  let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4899  def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd),
4900              (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
4901              OpcodeStr, "f16", []>;
4902  def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd),
4903              (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
4904              OpcodeStr, "f16", []>;
4905  }
4906  let Predicates = [HasNEON,HasV8_3a] in {
4907  def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd),
4908              (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
4909              OpcodeStr, "f32", []>;
4910  def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd),
4911              (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
4912              OpcodeStr, "f32", []>;
4913  }
4914}
4915
4916multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4,
4917                       string OpcodeStr, SDPatternOperator Op> {
4918  let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4919  def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD,
4920              (outs DPR:$Vd),
4921              (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
4922              OpcodeStr, "f16", []>;
4923  def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ,
4924              (outs QPR:$Vd),
4925              (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
4926              OpcodeStr, "f16", []>;
4927  }
4928  let Predicates = [HasNEON,HasV8_3a] in {
4929  def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD,
4930              (outs DPR:$Vd),
4931              (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
4932              OpcodeStr, "f32", []>;
4933  def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ,
4934              (outs QPR:$Vd),
4935              (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
4936              OpcodeStr, "f32", []>;
4937  }
4938}
4939
4940// These instructions index by pairs of lanes, so the VectorIndexes are twice
4941// as wide as the data types.
4942multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr,
4943                                 SDPatternOperator Op> {
4944  let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4945  def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD,
4946                      (outs DPR:$Vd),
4947                      (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4948                      VectorIndex32:$lane, complexrotateop:$rot),
4949                      OpcodeStr, "f16", []>;
4950  def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ,
4951                      (outs QPR:$Vd),
4952                      (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm,
4953                      VectorIndex32:$lane, complexrotateop:$rot),
4954                      OpcodeStr, "f16", []>;
4955  }
4956  let Predicates = [HasNEON,HasV8_3a] in {
4957  def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD,
4958                      (outs DPR:$Vd),
4959                      (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
4960                      complexrotateop:$rot),
4961                      OpcodeStr, "f32", []>;
4962  def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ,
4963                      (outs QPR:$Vd),
4964                      (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
4965                      complexrotateop:$rot),
4966                      OpcodeStr, "f32", []>;
4967  }
4968}
4969
4970defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>;
4971defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>;
4972defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>;
4973
4974// Vector Subtract Operations.
4975
4976//   VSUB     : Vector Subtract (integer and floating-point)
4977defm VSUB     : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
4978                         "vsub", "i", sub, 0>;
4979def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
4980                     v2f32, v2f32, fsub, 0>;
4981def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
4982                     v4f32, v4f32, fsub, 0>;
4983def  VSUBhd   : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
4984                     v4f16, v4f16, fsub, 0>,
4985                Requires<[HasNEON,HasFullFP16]>;
4986def  VSUBhq   : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
4987                     v8f16, v8f16, fsub, 0>,
4988                Requires<[HasNEON,HasFullFP16]>;
4989//   VSUBL    : Vector Subtract Long (Q = D - D)
4990defm VSUBLs   : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
4991                            "vsubl", "s", sub, sext, 0>;
4992defm VSUBLu   : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
4993                            "vsubl", "u", sub, zext, 0>;
4994//   VSUBW    : Vector Subtract Wide (Q = Q - D)
4995defm VSUBWs   : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
4996defm VSUBWu   : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
4997//   VHSUB    : Vector Halving Subtract
4998defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
4999                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5000                           "vhsub", "s", int_arm_neon_vhsubs, 0>;
5001defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
5002                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5003                           "vhsub", "u", int_arm_neon_vhsubu, 0>;
5004//   VQSUB    : Vector Saturing Subtract
5005defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
5006                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5007                            "vqsub", "s", int_arm_neon_vqsubs, 0>;
5008defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
5009                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5010                            "vqsub", "u", int_arm_neon_vqsubu, 0>;
5011//   VSUBHN   : Vector Subtract and Narrow Returning High Half (D = Q - Q)
5012defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
5013//   VRSUBHN  : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
5014defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
5015                            int_arm_neon_vrsubhn, 0>;
5016
5017let Predicates = [HasNEON] in {
5018def : Pat<(v8i8  (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
5019          (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
5020def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
5021          (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
5022def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
5023          (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
5024}
5025
5026// Vector Comparisons.
5027
5028//   VCEQ     : Vector Compare Equal
5029defm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5030                        IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
5031def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
5032                     NEONvceq, 1>;
5033def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
5034                     NEONvceq, 1>;
5035def  VCEQhd   : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
5036                     NEONvceq, 1>,
5037                Requires<[HasNEON, HasFullFP16]>;
5038def  VCEQhq   : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
5039                     NEONvceq, 1>,
5040                Requires<[HasNEON, HasFullFP16]>;
5041
5042let TwoOperandAliasConstraint = "$Vm = $Vd" in
5043defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
5044                            "$Vd, $Vm, #0", NEONvceqz>;
5045
5046//   VCGE     : Vector Compare Greater Than or Equal
5047defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5048                        IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
5049defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5050                        IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
5051def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
5052                     NEONvcge, 0>;
5053def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
5054                     NEONvcge, 0>;
5055def  VCGEhd   : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
5056                     NEONvcge, 0>,
5057                Requires<[HasNEON, HasFullFP16]>;
5058def  VCGEhq   : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
5059                     NEONvcge, 0>,
5060                Requires<[HasNEON, HasFullFP16]>;
5061
5062let TwoOperandAliasConstraint = "$Vm = $Vd" in {
5063defm VCGEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
5064                            "$Vd, $Vm, #0", NEONvcgez>;
5065defm VCLEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
5066                            "$Vd, $Vm, #0", NEONvclez>;
5067}
5068
5069//   VCGT     : Vector Compare Greater Than
5070defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5071                        IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
5072defm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5073                        IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
5074def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
5075                     NEONvcgt, 0>;
5076def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
5077                     NEONvcgt, 0>;
5078def  VCGThd   : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
5079                     NEONvcgt, 0>,
5080                Requires<[HasNEON, HasFullFP16]>;
5081def  VCGThq   : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
5082                     NEONvcgt, 0>,
5083                Requires<[HasNEON, HasFullFP16]>;
5084
5085let TwoOperandAliasConstraint = "$Vm = $Vd" in {
5086defm VCGTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
5087                            "$Vd, $Vm, #0", NEONvcgtz>;
5088defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
5089                            "$Vd, $Vm, #0", NEONvcltz>;
5090}
5091
5092//   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
5093def  VACGEfd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
5094                        "f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
5095def  VACGEfq   : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
5096                        "f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
5097def  VACGEhd   : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
5098                        "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
5099                 Requires<[HasNEON, HasFullFP16]>;
5100def  VACGEhq   : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
5101                        "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
5102                 Requires<[HasNEON, HasFullFP16]>;
5103//   VACGT    : Vector Absolute Compare Greater Than (aka VCAGT)
5104def  VACGTfd   : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
5105                        "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
5106def  VACGTfq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
5107                        "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
5108def  VACGThd   : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
5109                        "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
5110                 Requires<[HasNEON, HasFullFP16]>;
5111def  VACGThq   : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
5112                        "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>,
5113                 Requires<[HasNEON, HasFullFP16]>;
5114//   VTST     : Vector Test Bits
5115defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
5116                        IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
5117
5118def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
5119                   (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5120def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
5121                   (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5122def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
5123                   (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5124def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
5125                   (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5126let Predicates = [HasNEON, HasFullFP16] in {
5127def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
5128                   (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5129def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
5130                   (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5131def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
5132                   (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5133def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
5134                   (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5135}
5136
5137// +fp16fml Floating Point Multiplication Variants
5138let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in {
5139
5140class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn,
5141                RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
5142  : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
5143           asm, "f16", "$Vd, $Vn, $Vm", "", []>;
5144
5145class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn,
5146                RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
5147  : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
5148           asm, "f16", "$Vd, $Vn, $Vm", "", []>;
5149
5150// Vd, Vs, Vs[0-15], Idx[0-1]
5151class VFMD<string opc, string type, bits<2> S>
5152  : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd),
5153               (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx),
5154               IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
5155  bit idx;
5156  let Inst{3} = idx;
5157  let Inst{19-16} = Vn{4-1};
5158  let Inst{7}     = Vn{0};
5159  let Inst{5}     = Vm{0};
5160  let Inst{2-0}   = Vm{3-1};
5161}
5162
5163// Vq, Vd, Vd[0-7], Idx[0-3]
5164class VFMQ<string opc, string type, bits<2> S>
5165  : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd),
5166               (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
5167               IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
5168  bits<2> idx;
5169  let Inst{5} = idx{1};
5170  let Inst{3} = idx{0};
5171}
5172
5173let hasNoSchedulingInfo = 1 in {
5174//                                                op1   op2   op3
5175def VFMALD  : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>;
5176def VFMSLD  : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>;
5177def VFMALQ  : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>;
5178def VFMSLQ  : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>;
5179def VFMALDI : VFMD<"vfmal", "f16", 0b00>;
5180def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>;
5181def VFMALQI : VFMQ<"vfmal", "f16", 0b00>;
5182def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>;
5183}
5184} // HasNEON, HasFP16FML
5185
5186
5187def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
5188                   (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5189def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
5190                   (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5191def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
5192                   (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5193def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
5194                   (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5195let Predicates = [HasNEON, HasFullFP16] in {
5196def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
5197                   (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5198def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
5199                   (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5200def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
5201                   (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5202def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
5203                   (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5204}
5205
5206// Vector Bitwise Operations.
5207
5208def vnotd : PatFrag<(ops node:$in),
5209                    (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
5210def vnotq : PatFrag<(ops node:$in),
5211                    (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
5212
5213
5214//   VAND     : Vector Bitwise AND
5215def  VANDd    : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
5216                      v2i32, v2i32, and, 1>;
5217def  VANDq    : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
5218                      v4i32, v4i32, and, 1>;
5219
5220//   VEOR     : Vector Bitwise Exclusive OR
5221def  VEORd    : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
5222                      v2i32, v2i32, xor, 1>;
5223def  VEORq    : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
5224                      v4i32, v4i32, xor, 1>;
5225
5226//   VORR     : Vector Bitwise OR
5227def  VORRd    : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
5228                      v2i32, v2i32, or, 1>;
5229def  VORRq    : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
5230                      v4i32, v4i32, or, 1>;
5231
5232def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
5233                          (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
5234                          IIC_VMOVImm,
5235                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
5236                          [(set DPR:$Vd,
5237                            (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
5238  let Inst{9} = SIMM{9};
5239}
5240
5241def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
5242                          (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
5243                          IIC_VMOVImm,
5244                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
5245                          [(set DPR:$Vd,
5246                            (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
5247  let Inst{10-9} = SIMM{10-9};
5248}
5249
5250def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
5251                          (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
5252                          IIC_VMOVImm,
5253                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
5254                          [(set QPR:$Vd,
5255                            (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
5256  let Inst{9} = SIMM{9};
5257}
5258
5259def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
5260                          (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
5261                          IIC_VMOVImm,
5262                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
5263                          [(set QPR:$Vd,
5264                            (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
5265  let Inst{10-9} = SIMM{10-9};
5266}
5267
5268
5269//   VBIC     : Vector Bitwise Bit Clear (AND NOT)
5270let TwoOperandAliasConstraint = "$Vn = $Vd" in {
5271def  VBICd    : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5272                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
5273                     "vbic", "$Vd, $Vn, $Vm", "",
5274                     [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
5275                                                 (vnotd DPR:$Vm))))]>;
5276def  VBICq    : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5277                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
5278                     "vbic", "$Vd, $Vn, $Vm", "",
5279                     [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
5280                                                 (vnotq QPR:$Vm))))]>;
5281}
5282
5283def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
5284                          (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
5285                          IIC_VMOVImm,
5286                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
5287                          [(set DPR:$Vd,
5288                            (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
5289  let Inst{9} = SIMM{9};
5290}
5291
5292def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
5293                          (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
5294                          IIC_VMOVImm,
5295                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
5296                          [(set DPR:$Vd,
5297                            (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
5298  let Inst{10-9} = SIMM{10-9};
5299}
5300
5301def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
5302                          (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
5303                          IIC_VMOVImm,
5304                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
5305                          [(set QPR:$Vd,
5306                            (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
5307  let Inst{9} = SIMM{9};
5308}
5309
5310def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
5311                          (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
5312                          IIC_VMOVImm,
5313                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
5314                          [(set QPR:$Vd,
5315                            (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
5316  let Inst{10-9} = SIMM{10-9};
5317}
5318
5319//   VORN     : Vector Bitwise OR NOT
5320def  VORNd    : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
5321                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
5322                     "vorn", "$Vd, $Vn, $Vm", "",
5323                     [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
5324                                                (vnotd DPR:$Vm))))]>;
5325def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
5326                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
5327                     "vorn", "$Vd, $Vn, $Vm", "",
5328                     [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
5329                                                (vnotq QPR:$Vm))))]>;
5330
5331//   VMVN     : Vector Bitwise NOT (Immediate)
5332
5333let isReMaterializable = 1 in {
5334
5335def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
5336                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5337                         "vmvn", "i16", "$Vd, $SIMM", "",
5338                         [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> {
5339  let Inst{9} = SIMM{9};
5340}
5341
5342def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
5343                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5344                         "vmvn", "i16", "$Vd, $SIMM", "",
5345                         [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> {
5346  let Inst{9} = SIMM{9};
5347}
5348
5349def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
5350                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5351                         "vmvn", "i32", "$Vd, $SIMM", "",
5352                         [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> {
5353  let Inst{11-8} = SIMM{11-8};
5354}
5355
5356def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
5357                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5358                         "vmvn", "i32", "$Vd, $SIMM", "",
5359                         [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> {
5360  let Inst{11-8} = SIMM{11-8};
5361}
5362}
5363
5364//   VMVN     : Vector Bitwise NOT
5365def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
5366                     (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
5367                     "vmvn", "$Vd, $Vm", "",
5368                     [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
5369def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
5370                     (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
5371                     "vmvn", "$Vd, $Vm", "",
5372                     [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
5373let Predicates = [HasNEON] in {
5374def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
5375def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
5376}
5377
5378//   VBSL     : Vector Bitwise Select
5379def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5380                     (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5381                     N3RegFrm, IIC_VCNTiD,
5382                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5383                     [(set DPR:$Vd,
5384                           (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
5385let Predicates = [HasNEON] in {
5386def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
5387                                   (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
5388          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5389def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
5390                                    (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
5391          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5392def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
5393                                    (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
5394          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5395def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
5396                                    (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
5397          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5398def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
5399                                    (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
5400          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5401
5402def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
5403                     (and DPR:$Vm, (vnotd DPR:$Vd)))),
5404          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
5405
5406def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
5407                     (and DPR:$Vm, (vnotd DPR:$Vd)))),
5408          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
5409}
5410
5411def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5412                     (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5413                     N3RegFrm, IIC_VCNTiQ,
5414                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5415                     [(set QPR:$Vd,
5416                           (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
5417
5418let Predicates = [HasNEON] in {
5419def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
5420                                   (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
5421          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5422def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
5423                                    (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
5424          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5425def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
5426                                    (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
5427          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5428def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
5429                                    (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
5430          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5431def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
5432                                    (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
5433          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5434
5435def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
5436                     (and QPR:$Vm, (vnotq QPR:$Vd)))),
5437          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
5438def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
5439                     (and QPR:$Vm, (vnotq QPR:$Vd)))),
5440          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
5441}
5442
5443//   VBIF     : Vector Bitwise Insert if False
5444//              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
5445// FIXME: This instruction's encoding MAY NOT BE correct.
5446def  VBIFd    : N3VX<1, 0, 0b11, 0b0001, 0, 1,
5447                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5448                     N3RegFrm, IIC_VBINiD,
5449                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5450                     []>;
5451def  VBIFq    : N3VX<1, 0, 0b11, 0b0001, 1, 1,
5452                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5453                     N3RegFrm, IIC_VBINiQ,
5454                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5455                     []>;
5456
5457//   VBIT     : Vector Bitwise Insert if True
5458//              like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
5459// FIXME: This instruction's encoding MAY NOT BE correct.
5460def  VBITd    : N3VX<1, 0, 0b10, 0b0001, 0, 1,
5461                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5462                     N3RegFrm, IIC_VBINiD,
5463                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5464                     []>;
5465def  VBITq    : N3VX<1, 0, 0b10, 0b0001, 1, 1,
5466                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5467                     N3RegFrm, IIC_VBINiQ,
5468                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5469                     []>;
5470
5471// VBIT/VBIF are not yet implemented.  The TwoAddress pass will not go looking
5472// for equivalent operations with different register constraints; it just
5473// inserts copies.
5474
5475// Vector Absolute Differences.
5476
5477//   VABD     : Vector Absolute Difference
5478defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
5479                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5480                           "vabd", "s", int_arm_neon_vabds, 1>;
5481defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
5482                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5483                           "vabd", "u", int_arm_neon_vabdu, 1>;
5484def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
5485                        "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
5486def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5487                        "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
5488def  VABDhd   : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
5489                        "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
5490                Requires<[HasNEON, HasFullFP16]>;
5491def  VABDhq   : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5492                        "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
5493                Requires<[HasNEON, HasFullFP16]>;
5494
5495//   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
5496defm VABDLs   : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
5497                               "vabdl", "s", int_arm_neon_vabds, zext, 1>;
5498defm VABDLu   : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
5499                               "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
5500
5501let Predicates = [HasNEON] in {
5502def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
5503          (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
5504def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
5505          (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
5506}
5507
5508// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
5509// shift/xor pattern for ABS.
5510
5511def abd_shr :
5512    PatFrag<(ops node:$in1, node:$in2, node:$shift),
5513            (ARMvshrsImm (sub (zext node:$in1),
5514                            (zext node:$in2)), (i32 $shift))>;
5515
5516let Predicates = [HasNEON] in {
5517def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
5518               (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
5519                                                   (zext (v2i32 DPR:$opB))),
5520                                         (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
5521          (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
5522}
5523
5524//   VABA     : Vector Absolute Difference and Accumulate
5525defm VABAs    : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5526                             "vaba", "s", int_arm_neon_vabds, add>;
5527defm VABAu    : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5528                             "vaba", "u", int_arm_neon_vabdu, add>;
5529
5530//   VABAL    : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
5531defm VABALs   : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
5532                                 "vabal", "s", int_arm_neon_vabds, zext, add>;
5533defm VABALu   : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
5534                                 "vabal", "u", int_arm_neon_vabdu, zext, add>;
5535
5536// Vector Maximum and Minimum.
5537
5538//   VMAX     : Vector Maximum
5539defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
5540                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5541                           "vmax", "s", smax, 1>;
5542defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
5543                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5544                           "vmax", "u", umax, 1>;
5545def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
5546                        "vmax", "f32",
5547                        v2f32, v2f32, fmaximum, 1>;
5548def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5549                        "vmax", "f32",
5550                        v4f32, v4f32, fmaximum, 1>;
5551def  VMAXhd   : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
5552                        "vmax", "f16",
5553                        v4f16, v4f16, fmaximum, 1>,
5554                Requires<[HasNEON, HasFullFP16]>;
5555def  VMAXhq   : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5556                        "vmax", "f16",
5557                        v8f16, v8f16, fmaximum, 1>,
5558                Requires<[HasNEON, HasFullFP16]>;
5559
5560// VMAXNM
5561let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5562  def NEON_VMAXNMNDf  : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
5563                                  N3RegFrm, NoItinerary, "vmaxnm", "f32",
5564                                  v2f32, v2f32, fmaxnum, 1>,
5565                                  Requires<[HasV8, HasNEON]>;
5566  def NEON_VMAXNMNQf  : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
5567                                  N3RegFrm, NoItinerary, "vmaxnm", "f32",
5568                                  v4f32, v4f32, fmaxnum, 1>,
5569                                  Requires<[HasV8, HasNEON]>;
5570  def NEON_VMAXNMNDh  : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
5571                                  N3RegFrm, NoItinerary, "vmaxnm", "f16",
5572                                  v4f16, v4f16, fmaxnum, 1>,
5573                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
5574  def NEON_VMAXNMNQh  : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
5575                                  N3RegFrm, NoItinerary, "vmaxnm", "f16",
5576                                  v8f16, v8f16, fmaxnum, 1>,
5577                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
5578}
5579
5580//   VMIN     : Vector Minimum
5581defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
5582                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5583                           "vmin", "s", smin, 1>;
5584defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
5585                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5586                           "vmin", "u", umin, 1>;
5587def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
5588                        "vmin", "f32",
5589                        v2f32, v2f32, fminimum, 1>;
5590def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5591                        "vmin", "f32",
5592                        v4f32, v4f32, fminimum, 1>;
5593def  VMINhd   : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
5594                        "vmin", "f16",
5595                        v4f16, v4f16, fminimum, 1>,
5596                Requires<[HasNEON, HasFullFP16]>;
5597def  VMINhq   : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5598                        "vmin", "f16",
5599                        v8f16, v8f16, fminimum, 1>,
5600                Requires<[HasNEON, HasFullFP16]>;
5601
5602// VMINNM
5603let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5604  def NEON_VMINNMNDf  : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
5605                                  N3RegFrm, NoItinerary, "vminnm", "f32",
5606                                  v2f32, v2f32, fminnum, 1>,
5607                                  Requires<[HasV8, HasNEON]>;
5608  def NEON_VMINNMNQf  : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
5609                                  N3RegFrm, NoItinerary, "vminnm", "f32",
5610                                  v4f32, v4f32, fminnum, 1>,
5611                                  Requires<[HasV8, HasNEON]>;
5612  def NEON_VMINNMNDh  : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
5613                                  N3RegFrm, NoItinerary, "vminnm", "f16",
5614                                  v4f16, v4f16, fminnum, 1>,
5615                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
5616  def NEON_VMINNMNQh  : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
5617                                  N3RegFrm, NoItinerary, "vminnm", "f16",
5618                                  v8f16, v8f16, fminnum, 1>,
5619                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
5620}
5621
5622// Vector Pairwise Operations.
5623
5624//   VPADD    : Vector Pairwise Add
5625def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5626                        "vpadd", "i8",
5627                        v8i8, v8i8, int_arm_neon_vpadd, 0>;
5628def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5629                        "vpadd", "i16",
5630                        v4i16, v4i16, int_arm_neon_vpadd, 0>;
5631def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5632                        "vpadd", "i32",
5633                        v2i32, v2i32, int_arm_neon_vpadd, 0>;
5634def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
5635                        IIC_VPBIND, "vpadd", "f32",
5636                        v2f32, v2f32, int_arm_neon_vpadd, 0>;
5637def  VPADDh   : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
5638                        IIC_VPBIND, "vpadd", "f16",
5639                        v4f16, v4f16, int_arm_neon_vpadd, 0>,
5640                Requires<[HasNEON, HasFullFP16]>;
5641
5642//   VPADDL   : Vector Pairwise Add Long
5643defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
5644                             int_arm_neon_vpaddls>;
5645defm VPADDLu  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
5646                             int_arm_neon_vpaddlu>;
5647
5648//   VPADAL   : Vector Pairwise Add and Accumulate Long
5649defm VPADALs  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
5650                              int_arm_neon_vpadals>;
5651defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
5652                              int_arm_neon_vpadalu>;
5653
5654//   VPMAX    : Vector Pairwise Maximum
5655def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5656                        "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
5657def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5658                        "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
5659def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5660                        "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
5661def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5662                        "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
5663def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5664                        "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
5665def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5666                        "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
5667def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5668                        "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
5669def  VPMAXh   : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5670                        "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
5671                Requires<[HasNEON, HasFullFP16]>;
5672
5673//   VPMIN    : Vector Pairwise Minimum
5674def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5675                        "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
5676def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5677                        "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
5678def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5679                        "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
5680def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5681                        "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
5682def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5683                        "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
5684def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5685                        "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
5686def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5687                        "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
5688def  VPMINh   : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5689                        "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
5690                Requires<[HasNEON, HasFullFP16]>;
5691
5692// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
5693
5694//   VRECPE   : Vector Reciprocal Estimate
5695def  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5696                        IIC_VUNAD, "vrecpe", "u32",
5697                        v2i32, v2i32, int_arm_neon_vrecpe>;
5698def  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5699                        IIC_VUNAQ, "vrecpe", "u32",
5700                        v4i32, v4i32, int_arm_neon_vrecpe>;
5701def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5702                        IIC_VUNAD, "vrecpe", "f32",
5703                        v2f32, v2f32, int_arm_neon_vrecpe>;
5704def  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5705                        IIC_VUNAQ, "vrecpe", "f32",
5706                        v4f32, v4f32, int_arm_neon_vrecpe>;
5707def  VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
5708                        IIC_VUNAD, "vrecpe", "f16",
5709                        v4f16, v4f16, int_arm_neon_vrecpe>,
5710                Requires<[HasNEON, HasFullFP16]>;
5711def  VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
5712                        IIC_VUNAQ, "vrecpe", "f16",
5713                        v8f16, v8f16, int_arm_neon_vrecpe>,
5714                Requires<[HasNEON, HasFullFP16]>;
5715
5716//   VRECPS   : Vector Reciprocal Step
5717def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5718                        IIC_VRECSD, "vrecps", "f32",
5719                        v2f32, v2f32, int_arm_neon_vrecps, 1>;
5720def  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5721                        IIC_VRECSQ, "vrecps", "f32",
5722                        v4f32, v4f32, int_arm_neon_vrecps, 1>;
5723def  VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
5724                        IIC_VRECSD, "vrecps", "f16",
5725                        v4f16, v4f16, int_arm_neon_vrecps, 1>,
5726                Requires<[HasNEON, HasFullFP16]>;
5727def  VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
5728                        IIC_VRECSQ, "vrecps", "f16",
5729                        v8f16, v8f16, int_arm_neon_vrecps, 1>,
5730                Requires<[HasNEON, HasFullFP16]>;
5731
5732//   VRSQRTE  : Vector Reciprocal Square Root Estimate
5733def  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5734                         IIC_VUNAD, "vrsqrte", "u32",
5735                         v2i32, v2i32, int_arm_neon_vrsqrte>;
5736def  VRSQRTEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5737                         IIC_VUNAQ, "vrsqrte", "u32",
5738                         v4i32, v4i32, int_arm_neon_vrsqrte>;
5739def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5740                         IIC_VUNAD, "vrsqrte", "f32",
5741                         v2f32, v2f32, int_arm_neon_vrsqrte>;
5742def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5743                         IIC_VUNAQ, "vrsqrte", "f32",
5744                         v4f32, v4f32, int_arm_neon_vrsqrte>;
5745def  VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
5746                         IIC_VUNAD, "vrsqrte", "f16",
5747                         v4f16, v4f16, int_arm_neon_vrsqrte>,
5748                Requires<[HasNEON, HasFullFP16]>;
5749def  VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
5750                         IIC_VUNAQ, "vrsqrte", "f16",
5751                         v8f16, v8f16, int_arm_neon_vrsqrte>,
5752                Requires<[HasNEON, HasFullFP16]>;
5753
5754//   VRSQRTS  : Vector Reciprocal Square Root Step
5755def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5756                        IIC_VRECSD, "vrsqrts", "f32",
5757                        v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
5758def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5759                        IIC_VRECSQ, "vrsqrts", "f32",
5760                        v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
5761def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
5762                        IIC_VRECSD, "vrsqrts", "f16",
5763                        v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
5764                Requires<[HasNEON, HasFullFP16]>;
5765def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
5766                        IIC_VRECSQ, "vrsqrts", "f16",
5767                        v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
5768                Requires<[HasNEON, HasFullFP16]>;
5769
5770// Vector Shifts.
5771
5772//   VSHL     : Vector Shift
5773defm VSHLs    : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
5774                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5775                            "vshl", "s", int_arm_neon_vshifts>;
5776defm VSHLu    : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
5777                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5778                            "vshl", "u", int_arm_neon_vshiftu>;
5779
5780let Predicates = [HasNEON] in {
5781def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
5782          (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>;
5783def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
5784          (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>;
5785def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
5786          (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>;
5787def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
5788          (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>;
5789def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
5790          (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>;
5791def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
5792          (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>;
5793def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
5794          (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>;
5795def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
5796          (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>;
5797
5798def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
5799          (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>;
5800def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
5801          (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>;
5802def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
5803          (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>;
5804def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
5805          (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>;
5806def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
5807          (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>;
5808def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
5809          (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>;
5810def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
5811          (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>;
5812def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
5813          (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>;
5814
5815}
5816
5817//   VSHL     : Vector Shift Left (Immediate)
5818defm VSHLi    : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>;
5819
5820//   VSHR     : Vector Shift Right (Immediate)
5821defm VSHRs    : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
5822                            ARMvshrsImm>;
5823defm VSHRu    : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
5824                            ARMvshruImm>;
5825
5826//   VSHLL    : Vector Shift Left Long
5827defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
5828  PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>;
5829defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
5830  PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>;
5831
5832//   VSHLL    : Vector Shift Left Long (with maximum shift count)
5833class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
5834                bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
5835                ValueType OpTy, Operand ImmTy>
5836  : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
5837           ResTy, OpTy, ImmTy, null_frag> {
5838  let Inst{21-16} = op21_16;
5839  let DecoderMethod = "DecodeVSHLMaxInstruction";
5840}
5841def  VSHLLi8  : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
5842                          v8i16, v8i8, imm8>;
5843def  VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
5844                          v4i32, v4i16, imm16>;
5845def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
5846                          v2i64, v2i32, imm32>;
5847
5848let Predicates = [HasNEON] in {
5849def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
5850          (VSHLLi8 DPR:$Rn, 8)>;
5851def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
5852          (VSHLLi16 DPR:$Rn, 16)>;
5853def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
5854          (VSHLLi32 DPR:$Rn, 32)>;
5855def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
5856          (VSHLLi8 DPR:$Rn, 8)>;
5857def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
5858          (VSHLLi16 DPR:$Rn, 16)>;
5859def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
5860          (VSHLLi32 DPR:$Rn, 32)>;
5861def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
5862          (VSHLLi8 DPR:$Rn, 8)>;
5863def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
5864          (VSHLLi16 DPR:$Rn, 16)>;
5865def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
5866          (VSHLLi32 DPR:$Rn, 32)>;
5867}
5868
5869//   VSHRN    : Vector Shift Right and Narrow
5870defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
5871                           PatFrag<(ops node:$Rn, node:$amt),
5872                                   (trunc (ARMvshrsImm node:$Rn, node:$amt))>>;
5873
5874let Predicates = [HasNEON] in {
5875def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
5876          (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
5877def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
5878          (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
5879def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
5880          (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
5881}
5882
5883//   VRSHL    : Vector Rounding Shift
5884defm VRSHLs   : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
5885                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5886                            "vrshl", "s", int_arm_neon_vrshifts>;
5887defm VRSHLu   : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
5888                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5889                            "vrshl", "u", int_arm_neon_vrshiftu>;
5890//   VRSHR    : Vector Rounding Shift Right
5891defm VRSHRs   : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
5892                            NEONvrshrsImm>;
5893defm VRSHRu   : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
5894                            NEONvrshruImm>;
5895
5896//   VRSHRN   : Vector Rounding Shift Right and Narrow
5897defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
5898                           NEONvrshrnImm>;
5899
5900//   VQSHL    : Vector Saturating Shift
5901defm VQSHLs   : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
5902                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5903                            "vqshl", "s", int_arm_neon_vqshifts>;
5904defm VQSHLu   : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
5905                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5906                            "vqshl", "u", int_arm_neon_vqshiftu>;
5907//   VQSHL    : Vector Saturating Shift Left (Immediate)
5908defm VQSHLsi  : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>;
5909defm VQSHLui  : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>;
5910
5911//   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
5912defm VQSHLsu  : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>;
5913
5914//   VQSHRN   : Vector Saturating Shift Right and Narrow
5915defm VQSHRNs  : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
5916                           NEONvqshrnsImm>;
5917defm VQSHRNu  : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
5918                           NEONvqshrnuImm>;
5919
5920//   VQSHRUN  : Vector Saturating Shift Right and Narrow (Unsigned)
5921defm VQSHRUN  : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
5922                           NEONvqshrnsuImm>;
5923
5924//   VQRSHL   : Vector Saturating Rounding Shift
5925defm VQRSHLs  : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
5926                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5927                            "vqrshl", "s", int_arm_neon_vqrshifts>;
5928defm VQRSHLu  : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
5929                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5930                            "vqrshl", "u", int_arm_neon_vqrshiftu>;
5931
5932//   VQRSHRN  : Vector Saturating Rounding Shift Right and Narrow
5933defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
5934                           NEONvqrshrnsImm>;
5935defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
5936                           NEONvqrshrnuImm>;
5937
5938//   VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
5939defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
5940                           NEONvqrshrnsuImm>;
5941
5942//   VSRA     : Vector Shift Right and Accumulate
5943defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>;
5944defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>;
5945//   VRSRA    : Vector Rounding Shift Right and Accumulate
5946defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>;
5947defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>;
5948
5949//   VSLI     : Vector Shift Left and Insert
5950defm VSLI     : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
5951
5952//   VSRI     : Vector Shift Right and Insert
5953defm VSRI     : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
5954
5955// Vector Absolute and Saturating Absolute.
5956
5957//   VABS     : Vector Absolute Value
5958defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
5959                           IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>;
5960def  VABSfd   : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
5961                     "vabs", "f32",
5962                     v2f32, v2f32, fabs>;
5963def  VABSfq   : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
5964                     "vabs", "f32",
5965                      v4f32, v4f32, fabs>;
5966def  VABShd   : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
5967                     "vabs", "f16",
5968                     v4f16, v4f16, fabs>,
5969                Requires<[HasNEON, HasFullFP16]>;
5970def  VABShq   : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
5971                     "vabs", "f16",
5972                      v8f16, v8f16, fabs>,
5973                Requires<[HasNEON, HasFullFP16]>;
5974
5975//   VQABS    : Vector Saturating Absolute Value
5976defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
5977                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
5978                           int_arm_neon_vqabs>;
5979
5980// Vector Negate.
5981
5982def vnegd  : PatFrag<(ops node:$in),
5983                     (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
5984def vnegq  : PatFrag<(ops node:$in),
5985                     (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
5986
5987class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
5988  : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
5989        IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
5990        [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
5991class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
5992  : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
5993        IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
5994        [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
5995
5996//   VNEG     : Vector Negate (integer)
5997def  VNEGs8d  : VNEGD<0b00, "vneg", "s8", v8i8>;
5998def  VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
5999def  VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
6000def  VNEGs8q  : VNEGQ<0b00, "vneg", "s8", v16i8>;
6001def  VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
6002def  VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
6003
6004//   VNEG     : Vector Negate (floating-point)
6005def  VNEGfd   : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
6006                    (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
6007                    "vneg", "f32", "$Vd, $Vm", "",
6008                    [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
6009def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
6010                    (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
6011                    "vneg", "f32", "$Vd, $Vm", "",
6012                    [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
6013def  VNEGhd   : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
6014                    (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
6015                    "vneg", "f16", "$Vd, $Vm", "",
6016                    [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
6017                Requires<[HasNEON, HasFullFP16]>;
6018def  VNEGhq   : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
6019                    (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
6020                    "vneg", "f16", "$Vd, $Vm", "",
6021                    [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
6022                Requires<[HasNEON, HasFullFP16]>;
6023
6024let Predicates = [HasNEON] in {
6025def : Pat<(v8i8  (vnegd  DPR:$src)), (VNEGs8d DPR:$src)>;
6026def : Pat<(v4i16 (vnegd  DPR:$src)), (VNEGs16d DPR:$src)>;
6027def : Pat<(v2i32 (vnegd  DPR:$src)), (VNEGs32d DPR:$src)>;
6028def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
6029def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
6030def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
6031}
6032
6033//   VQNEG    : Vector Saturating Negate
6034defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
6035                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
6036                           int_arm_neon_vqneg>;
6037
6038// Vector Bit Counting Operations.
6039
6040//   VCLS     : Vector Count Leading Sign Bits
6041defm VCLS     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
6042                           IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
6043                           int_arm_neon_vcls>;
6044//   VCLZ     : Vector Count Leading Zeros
6045defm VCLZ     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
6046                           IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
6047                           ctlz>;
6048//   VCNT     : Vector Count One Bits
6049def  VCNTd    : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
6050                        IIC_VCNTiD, "vcnt", "8",
6051                        v8i8, v8i8, ctpop>;
6052def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
6053                        IIC_VCNTiQ, "vcnt", "8",
6054                        v16i8, v16i8, ctpop>;
6055
6056// Vector Swap
6057def  VSWPd    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
6058                     (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
6059                     NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
6060                     []>;
6061def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
6062                     (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
6063                     NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
6064                     []>;
6065
6066// Vector Move Operations.
6067
6068//   VMOV     : Vector Move (Register)
6069def : NEONInstAlias<"vmov${p} $Vd, $Vm",
6070                    (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
6071def : NEONInstAlias<"vmov${p} $Vd, $Vm",
6072                    (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
6073
6074//   VMOV     : Vector Move (Immediate)
6075
6076// Although VMOVs are not strictly speaking cheap, they are as expensive
6077// as their copies counterpart (VORR), so we should prefer rematerialization
6078// over splitting when it applies.
6079let isReMaterializable = 1, isAsCheapAsAMove=1 in {
6080def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
6081                         (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
6082                         "vmov", "i8", "$Vd, $SIMM", "",
6083                         [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>;
6084def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
6085                         (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
6086                         "vmov", "i8", "$Vd, $SIMM", "",
6087                         [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>;
6088
6089def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
6090                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
6091                         "vmov", "i16", "$Vd, $SIMM", "",
6092                         [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> {
6093  let Inst{9} = SIMM{9};
6094}
6095
6096def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
6097                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
6098                         "vmov", "i16", "$Vd, $SIMM", "",
6099                         [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> {
6100 let Inst{9} = SIMM{9};
6101}
6102
6103def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
6104                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
6105                         "vmov", "i32", "$Vd, $SIMM", "",
6106                         [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> {
6107  let Inst{11-8} = SIMM{11-8};
6108}
6109
6110def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
6111                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
6112                         "vmov", "i32", "$Vd, $SIMM", "",
6113                         [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> {
6114  let Inst{11-8} = SIMM{11-8};
6115}
6116
6117def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
6118                         (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
6119                         "vmov", "i64", "$Vd, $SIMM", "",
6120                         [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>;
6121def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
6122                         (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
6123                         "vmov", "i64", "$Vd, $SIMM", "",
6124                         [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>;
6125
6126def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
6127                         (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
6128                         "vmov", "f32", "$Vd, $SIMM", "",
6129                         [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>;
6130def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
6131                         (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
6132                         "vmov", "f32", "$Vd, $SIMM", "",
6133                         [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>;
6134} // isReMaterializable, isAsCheapAsAMove
6135
6136// Add support for bytes replication feature, so it could be GAS compatible.
6137multiclass NEONImmReplicateI8InstAlias<ValueType To> {
6138  // E.g. instructions below:
6139  // "vmov.i32 d0, #0xffffffff"
6140  // "vmov.i32 d0, #0xabababab"
6141  // "vmov.i16 d0, #0xabab"
6142  // are incorrect, but we could deal with such cases.
6143  // For last two instructions, for example, it should emit:
6144  // "vmov.i8 d0, #0xab"
6145  def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6146                      (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
6147  def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6148                      (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
6149  // Also add same support for VMVN instructions. So instruction:
6150  // "vmvn.i32 d0, #0xabababab"
6151  // actually means:
6152  // "vmov.i8 d0, #0x54"
6153  def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6154                      (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
6155  def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6156                      (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
6157}
6158
6159defm : NEONImmReplicateI8InstAlias<i16>;
6160defm : NEONImmReplicateI8InstAlias<i32>;
6161defm : NEONImmReplicateI8InstAlias<i64>;
6162
6163// Similar to above for types other than i8, e.g.:
6164// "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00"
6165// "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000"
6166// In this case we do not canonicalize VMVN to VMOV
6167multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16,
6168                                     NeonI NV8, NeonI NV16, ValueType To> {
6169  def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6170                      (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6171  def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6172                      (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6173  def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6174                      (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6175  def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6176                      (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6177}
6178
6179defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
6180                                      VMVNv4i16, VMVNv8i16, i32>;
6181defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
6182                                      VMVNv4i16, VMVNv8i16, i64>;
6183defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32,
6184                                      VMVNv2i32, VMVNv4i32, i64>;
6185// TODO: add "VMOV <-> VMVN" conversion for cases like
6186// "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55"
6187// "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00"
6188
6189// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
6190// require zero cycles to execute so they should be used wherever possible for
6191// setting a register to zero.
6192
6193// Even without these pseudo-insts we would probably end up with the correct
6194// instruction, but we could not mark the general ones with "isAsCheapAsAMove"
6195// since they are sometimes rather expensive (in general).
6196
6197let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
6198  def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
6199                               [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))],
6200                               (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
6201               Requires<[HasZCZ]>;
6202  def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
6203                               [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))],
6204                               (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
6205               Requires<[HasZCZ]>;
6206}
6207
6208//   VMOV     : Vector Get Lane (move scalar to ARM core register)
6209
6210def VGETLNs8  : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
6211                          (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
6212                          IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
6213                          [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V),
6214                                           imm:$lane))]> {
6215  let Inst{21}  = lane{2};
6216  let Inst{6-5} = lane{1-0};
6217}
6218def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
6219                          (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
6220                          IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
6221                          [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V),
6222                                           imm:$lane))]> {
6223  let Inst{21} = lane{1};
6224  let Inst{6}  = lane{0};
6225}
6226def VGETLNu8  : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
6227                          (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
6228                          IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
6229                          [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V),
6230                                           imm:$lane))]> {
6231  let Inst{21}  = lane{2};
6232  let Inst{6-5} = lane{1-0};
6233}
6234def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
6235                          (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
6236                          IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
6237                          [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V),
6238                                           imm:$lane))]> {
6239  let Inst{21} = lane{1};
6240  let Inst{6}  = lane{0};
6241}
6242def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
6243                          (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
6244                          IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
6245                          [(set GPR:$R, (extractelt (v2i32 DPR:$V),
6246                                           imm:$lane))]>,
6247                Requires<[HasFPRegs, HasFastVGETLNi32]> {
6248  let Inst{21} = lane{0};
6249}
6250let Predicates = [HasNEON] in {
6251// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
6252def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane),
6253          (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
6254                           (DSubReg_i8_reg imm:$lane))),
6255                     (SubReg_i8_lane imm:$lane))>;
6256def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane),
6257          (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
6258                             (DSubReg_i16_reg imm:$lane))),
6259                     (SubReg_i16_lane imm:$lane))>;
6260def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane),
6261          (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
6262                           (DSubReg_i8_reg imm:$lane))),
6263                     (SubReg_i8_lane imm:$lane))>;
6264def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane),
6265          (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
6266                             (DSubReg_i16_reg imm:$lane))),
6267                     (SubReg_i16_lane imm:$lane))>;
6268}
6269def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
6270          (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
6271                             (DSubReg_i32_reg imm:$lane))),
6272                     (SubReg_i32_lane imm:$lane))>,
6273      Requires<[HasNEON, HasFastVGETLNi32]>;
6274def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
6275          (COPY_TO_REGCLASS
6276            (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
6277      Requires<[HasNEON, HasSlowVGETLNi32]>;
6278def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
6279          (COPY_TO_REGCLASS
6280            (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
6281      Requires<[HasNEON, HasSlowVGETLNi32]>;
6282let Predicates = [HasNEON] in {
6283def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
6284          (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
6285                          (SSubReg_f32_reg imm:$src2))>;
6286def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
6287          (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
6288                          (SSubReg_f32_reg imm:$src2))>;
6289//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
6290//          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
6291def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
6292          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
6293}
6294
6295def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>;
6296def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>;
6297
6298let Predicates = [HasNEON] in {
6299def : Pat<(extractelt (v4f16 DPR:$src), imm_even:$lane),
6300            (EXTRACT_SUBREG
6301                (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
6302                (SSubReg_f16_reg imm_even:$lane))>;
6303
6304def : Pat<(extractelt (v4f16 DPR:$src), imm_odd:$lane),
6305            (COPY_TO_REGCLASS
6306              (VMOVH (EXTRACT_SUBREG
6307                  (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
6308                  (SSubReg_f16_reg imm_odd:$lane))),
6309              HPR)>;
6310
6311def : Pat<(extractelt (v8f16 QPR:$src), imm_even:$lane),
6312            (EXTRACT_SUBREG
6313                (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)),
6314                (SSubReg_f16_reg imm_even:$lane))>;
6315
6316def : Pat<(extractelt (v8f16 QPR:$src), imm_odd:$lane),
6317            (COPY_TO_REGCLASS
6318              (VMOVH (EXTRACT_SUBREG
6319                  (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)),
6320                  (SSubReg_f16_reg imm_odd:$lane))),
6321              HPR)>;
6322}
6323
6324//   VMOV     : Vector Set Lane (move ARM core register to scalar)
6325
6326let Constraints = "$src1 = $V" in {
6327def VSETLNi8  : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
6328                          (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
6329                          IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
6330                          [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
6331                                           GPR:$R, imm:$lane))]> {
6332  let Inst{21}  = lane{2};
6333  let Inst{6-5} = lane{1-0};
6334}
6335def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
6336                          (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
6337                          IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
6338                          [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
6339                                           GPR:$R, imm:$lane))]> {
6340  let Inst{21} = lane{1};
6341  let Inst{6}  = lane{0};
6342}
6343def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
6344                          (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
6345                          IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
6346                          [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
6347                                           GPR:$R, imm:$lane))]>,
6348                Requires<[HasVFP2]> {
6349  let Inst{21} = lane{0};
6350  // This instruction is equivalent as
6351  // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm)
6352  let isInsertSubreg = 1;
6353}
6354}
6355
6356let Predicates = [HasNEON] in {
6357def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
6358          (v16i8 (INSERT_SUBREG QPR:$src1,
6359                  (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
6360                                   (DSubReg_i8_reg imm:$lane))),
6361                            GPR:$src2, (SubReg_i8_lane imm:$lane))),
6362                  (DSubReg_i8_reg imm:$lane)))>;
6363def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
6364          (v8i16 (INSERT_SUBREG QPR:$src1,
6365                  (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
6366                                     (DSubReg_i16_reg imm:$lane))),
6367                             GPR:$src2, (SubReg_i16_lane imm:$lane))),
6368                  (DSubReg_i16_reg imm:$lane)))>;
6369def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
6370          (v4i32 (INSERT_SUBREG QPR:$src1,
6371                  (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
6372                                     (DSubReg_i32_reg imm:$lane))),
6373                             GPR:$src2, (SubReg_i32_lane imm:$lane))),
6374                  (DSubReg_i32_reg imm:$lane)))>;
6375
6376def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
6377          (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
6378                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
6379def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
6380          (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
6381                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
6382
6383def : Pat<(insertelt (v4f16 DPR:$src1), HPR:$src2, imm:$lane),
6384          (v4f16 (VSETLNi16 DPR:$src1, (VMOVRH $src2), imm:$lane))>;
6385def : Pat<(insertelt (v8f16 QPR:$src1), HPR:$src2, imm:$lane),
6386          (v8f16 (INSERT_SUBREG QPR:$src1,
6387                   (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
6388                                      (DSubReg_i16_reg imm:$lane))),
6389                             (VMOVRH $src2), (SubReg_i16_lane imm:$lane))),
6390                   (DSubReg_i16_reg imm:$lane)))>;
6391
6392//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
6393//          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
6394def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
6395          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
6396
6397def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
6398          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
6399def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
6400          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
6401def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
6402          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
6403
6404def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
6405          (VSETLNi8  (v8i8  (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6406def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
6407          (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6408def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
6409          (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6410
6411def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
6412          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6413                         (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6414                         dsub_0)>;
6415def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
6416          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6417                         (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6418                         dsub_0)>;
6419def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
6420          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6421                         (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6422                         dsub_0)>;
6423}
6424
6425//   VDUP     : Vector Duplicate (from ARM core register to all elements)
6426
6427class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
6428  : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
6429          IIC_VMOVIS, "vdup", Dt, "$V, $R",
6430          [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
6431class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
6432  : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
6433          IIC_VMOVIS, "vdup", Dt, "$V, $R",
6434          [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
6435
6436def  VDUP8d   : VDUPD<0b11101100, 0b00, "8", v8i8>;
6437def  VDUP16d  : VDUPD<0b11101000, 0b01, "16", v4i16>;
6438def  VDUP32d  : VDUPD<0b11101000, 0b00, "32", v2i32>,
6439                Requires<[HasNEON, HasFastVDUP32]>;
6440def  VDUP8q   : VDUPQ<0b11101110, 0b00, "8", v16i8>;
6441def  VDUP16q  : VDUPQ<0b11101010, 0b01, "16", v8i16>;
6442def  VDUP32q  : VDUPQ<0b11101010, 0b00, "32", v4i32>;
6443
6444// ARMvdup patterns for uarchs with fast VDUP.32.
6445def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
6446      Requires<[HasNEON,HasFastVDUP32]>;
6447def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>,
6448      Requires<[HasNEON]>;
6449
6450// ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
6451def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
6452      Requires<[HasNEON,HasSlowVDUP32]>;
6453def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
6454      Requires<[HasNEON,HasSlowVDUP32]>;
6455
6456//   VDUP     : Vector Duplicate Lane (from scalar to all elements)
6457
6458class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
6459              ValueType Ty, Operand IdxTy>
6460  : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6461              IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
6462              [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>;
6463
6464class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
6465              ValueType ResTy, ValueType OpTy, Operand IdxTy>
6466  : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6467              IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
6468              [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm),
6469                                      VectorIndex32:$lane)))]>;
6470
6471// Inst{19-16} is partially specified depending on the element size.
6472
6473def VDUPLN8d  : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
6474  bits<3> lane;
6475  let Inst{19-17} = lane{2-0};
6476}
6477def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
6478  bits<2> lane;
6479  let Inst{19-18} = lane{1-0};
6480}
6481def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
6482  bits<1> lane;
6483  let Inst{19} = lane{0};
6484}
6485def VDUPLN8q  : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
6486  bits<3> lane;
6487  let Inst{19-17} = lane{2-0};
6488}
6489def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
6490  bits<2> lane;
6491  let Inst{19-18} = lane{1-0};
6492}
6493def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
6494  bits<1> lane;
6495  let Inst{19} = lane{0};
6496}
6497
6498let Predicates = [HasNEON] in {
6499def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)),
6500          (VDUPLN32d DPR:$Vm, imm:$lane)>;
6501
6502def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
6503          (VDUPLN32d DPR:$Vm, imm:$lane)>;
6504
6505def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
6506          (VDUPLN32q DPR:$Vm, imm:$lane)>;
6507
6508def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)),
6509          (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
6510                                  (DSubReg_i8_reg imm:$lane))),
6511                           (SubReg_i8_lane imm:$lane)))>;
6512def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)),
6513          (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
6514                                    (DSubReg_i16_reg imm:$lane))),
6515                            (SubReg_i16_lane imm:$lane)))>;
6516def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)),
6517          (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src,
6518                                    (DSubReg_i16_reg imm:$lane))),
6519                            (SubReg_i16_lane imm:$lane)))>;
6520def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)),
6521          (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
6522                                    (DSubReg_i32_reg imm:$lane))),
6523                            (SubReg_i32_lane imm:$lane)))>;
6524def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)),
6525          (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
6526                                   (DSubReg_i32_reg imm:$lane))),
6527                           (SubReg_i32_lane imm:$lane)))>;
6528
6529def : Pat<(v4f16 (ARMvdup HPR:$src)),
6530          (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
6531                             HPR:$src, ssub_0), (i32 0)))>;
6532def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))),
6533          (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6534                             SPR:$src, ssub_0), (i32 0)))>;
6535def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))),
6536          (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6537                             SPR:$src, ssub_0), (i32 0)))>;
6538def : Pat<(v8f16 (ARMvdup HPR:$src)),
6539          (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
6540                             HPR:$src, ssub_0), (i32 0)))>;
6541}
6542
6543//   VMOVN    : Vector Narrowing Move
6544defm VMOVN    : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
6545                         "vmovn", "i", trunc>;
6546//   VQMOVN   : Vector Saturating Narrowing Move
6547defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
6548                            "vqmovn", "s", int_arm_neon_vqmovns>;
6549defm VQMOVNu  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
6550                            "vqmovn", "u", int_arm_neon_vqmovnu>;
6551defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
6552                            "vqmovun", "s", int_arm_neon_vqmovnsu>;
6553//   VMOVL    : Vector Lengthening Move
6554defm VMOVLs   : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
6555defm VMOVLu   : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
6556
6557let Predicates = [HasNEON] in {
6558def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
6559def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
6560def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
6561}
6562
6563// Vector Conversions.
6564
6565//   VCVT     : Vector Convert Between Floating-Point and Integers
6566def  VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
6567                     v2i32, v2f32, fp_to_sint>;
6568def  VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
6569                     v2i32, v2f32, fp_to_uint>;
6570def  VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
6571                     v2f32, v2i32, sint_to_fp>;
6572def  VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
6573                     v2f32, v2i32, uint_to_fp>;
6574
6575def  VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
6576                     v4i32, v4f32, fp_to_sint>;
6577def  VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
6578                     v4i32, v4f32, fp_to_uint>;
6579def  VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
6580                     v4f32, v4i32, sint_to_fp>;
6581def  VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
6582                     v4f32, v4i32, uint_to_fp>;
6583
6584def  VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
6585                     v4i16, v4f16, fp_to_sint>,
6586                Requires<[HasNEON, HasFullFP16]>;
6587def  VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
6588                     v4i16, v4f16, fp_to_uint>,
6589                Requires<[HasNEON, HasFullFP16]>;
6590def  VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
6591                     v4f16, v4i16, sint_to_fp>,
6592                Requires<[HasNEON, HasFullFP16]>;
6593def  VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
6594                     v4f16, v4i16, uint_to_fp>,
6595                Requires<[HasNEON, HasFullFP16]>;
6596
6597def  VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
6598                     v8i16, v8f16, fp_to_sint>,
6599                Requires<[HasNEON, HasFullFP16]>;
6600def  VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
6601                     v8i16, v8f16, fp_to_uint>,
6602                Requires<[HasNEON, HasFullFP16]>;
6603def  VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
6604                     v8f16, v8i16, sint_to_fp>,
6605                Requires<[HasNEON, HasFullFP16]>;
6606def  VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
6607                     v8f16, v8i16, uint_to_fp>,
6608                Requires<[HasNEON, HasFullFP16]>;
6609
6610// VCVT{A, N, P, M}
6611multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
6612                    SDPatternOperator IntU> {
6613  let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
6614    def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6615                       "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
6616    def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6617                       "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
6618    def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6619                       "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
6620    def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6621                       "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
6622    def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6623                       "s16.f16", v4i16, v4f16, IntS>,
6624              Requires<[HasV8, HasNEON, HasFullFP16]>;
6625    def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6626                       "s16.f16", v8i16, v8f16, IntS>,
6627              Requires<[HasV8, HasNEON, HasFullFP16]>;
6628    def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6629                       "u16.f16", v4i16, v4f16, IntU>,
6630              Requires<[HasV8, HasNEON, HasFullFP16]>;
6631    def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6632                       "u16.f16", v8i16, v8f16, IntU>,
6633              Requires<[HasV8, HasNEON, HasFullFP16]>;
6634  }
6635}
6636
6637defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>;
6638defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>;
6639defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>;
6640defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>;
6641
6642//   VCVT     : Vector Convert Between Floating-Point and Fixed-Point.
6643let DecoderMethod = "DecodeVCVTD" in {
6644def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
6645                        v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
6646def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
6647                        v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
6648def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
6649                        v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
6650def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
6651                        v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
6652let Predicates = [HasNEON, HasFullFP16] in {
6653def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
6654                        v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
6655def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
6656                        v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
6657def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
6658                        v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
6659def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
6660                        v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
6661} // Predicates = [HasNEON, HasFullFP16]
6662}
6663
6664let DecoderMethod = "DecodeVCVTQ" in {
6665def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
6666                        v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
6667def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
6668                        v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
6669def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
6670                        v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
6671def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
6672                        v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
6673let Predicates = [HasNEON, HasFullFP16] in {
6674def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
6675                        v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
6676def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
6677                        v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
6678def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
6679                        v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
6680def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
6681                        v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
6682} // Predicates = [HasNEON, HasFullFP16]
6683}
6684
6685def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
6686                    (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
6687def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
6688                    (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
6689def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
6690                    (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
6691def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
6692                    (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
6693
6694def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
6695                    (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
6696def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
6697                    (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
6698def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
6699                    (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
6700def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
6701                    (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
6702
6703def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
6704                    (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
6705def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
6706                    (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
6707def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
6708                    (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
6709def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
6710                    (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
6711
6712def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
6713                    (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
6714def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
6715                    (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
6716def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
6717                    (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
6718def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
6719                    (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
6720
6721
6722//   VCVT     : Vector Convert Between Half-Precision and Single-Precision.
6723def  VCVTf2h  : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
6724                        IIC_VUNAQ, "vcvt", "f16.f32",
6725                        v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
6726                Requires<[HasNEON, HasFP16]>;
6727def  VCVTh2f  : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
6728                        IIC_VUNAQ, "vcvt", "f32.f16",
6729                        v4f32, v4i16, int_arm_neon_vcvthf2fp>,
6730                Requires<[HasNEON, HasFP16]>;
6731
6732// Vector Reverse.
6733
6734//   VREV64   : Vector Reverse elements within 64-bit doublewords
6735
6736class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6737  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
6738        (ins DPR:$Vm), IIC_VMOVD,
6739        OpcodeStr, Dt, "$Vd, $Vm", "",
6740        [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>;
6741class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6742  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
6743        (ins QPR:$Vm), IIC_VMOVQ,
6744        OpcodeStr, Dt, "$Vd, $Vm", "",
6745        [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>;
6746
6747def VREV64d8  : VREV64D<0b00, "vrev64", "8", v8i8>;
6748def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
6749def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
6750let Predicates = [HasNEON] in {
6751def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
6752}
6753
6754def VREV64q8  : VREV64Q<0b00, "vrev64", "8", v16i8>;
6755def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
6756def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
6757
6758let Predicates = [HasNEON] in {
6759def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
6760def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))), (VREV64q16 QPR:$Vm)>;
6761def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))), (VREV64d16 DPR:$Vm)>;
6762}
6763
6764//   VREV32   : Vector Reverse elements within 32-bit words
6765
6766class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6767  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
6768        (ins DPR:$Vm), IIC_VMOVD,
6769        OpcodeStr, Dt, "$Vd, $Vm", "",
6770        [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>;
6771class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6772  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
6773        (ins QPR:$Vm), IIC_VMOVQ,
6774        OpcodeStr, Dt, "$Vd, $Vm", "",
6775        [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>;
6776
6777def VREV32d8  : VREV32D<0b00, "vrev32", "8", v8i8>;
6778def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
6779
6780def VREV32q8  : VREV32Q<0b00, "vrev32", "8", v16i8>;
6781def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
6782
6783//   VREV16   : Vector Reverse elements within 16-bit halfwords
6784
6785class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6786  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
6787        (ins DPR:$Vm), IIC_VMOVD,
6788        OpcodeStr, Dt, "$Vd, $Vm", "",
6789        [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>;
6790class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6791  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
6792        (ins QPR:$Vm), IIC_VMOVQ,
6793        OpcodeStr, Dt, "$Vd, $Vm", "",
6794        [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>;
6795
6796def VREV16d8  : VREV16D<0b00, "vrev16", "8", v8i8>;
6797def VREV16q8  : VREV16Q<0b00, "vrev16", "8", v16i8>;
6798
6799// Other Vector Shuffles.
6800
6801//  Aligned extractions: really just dropping registers
6802
6803class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
6804      : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
6805             (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>,
6806        Requires<[HasNEON]>;
6807
6808def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
6809
6810def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
6811
6812def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
6813
6814def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
6815
6816def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
6817
6818def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; // v8f16 -> v4f16
6819
6820//   VEXT     : Vector Extract
6821
6822
6823// All of these have a two-operand InstAlias.
6824let TwoOperandAliasConstraint = "$Vn = $Vd" in {
6825class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6826  : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
6827        (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
6828        IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6829        [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
6830                                     (Ty DPR:$Vm), imm:$index)))]> {
6831  bits<3> index;
6832  let Inst{11} = 0b0;
6833  let Inst{10-8} = index{2-0};
6834}
6835
6836class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6837  : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
6838        (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
6839        IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6840        [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
6841                                     (Ty QPR:$Vm), imm:$index)))]> {
6842  bits<4> index;
6843  let Inst{11-8} = index{3-0};
6844}
6845}
6846
6847def VEXTd8  : VEXTd<"vext", "8",  v8i8, imm0_7> {
6848  let Inst{10-8} = index{2-0};
6849}
6850def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
6851  let Inst{10-9} = index{1-0};
6852  let Inst{8}    = 0b0;
6853}
6854let Predicates = [HasNEON] in {
6855def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))),
6856          (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>;
6857}
6858
6859def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
6860  let Inst{10}     = index{0};
6861  let Inst{9-8}    = 0b00;
6862}
6863let Predicates = [HasNEON] in {
6864def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))),
6865          (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
6866}
6867
6868def VEXTq8  : VEXTq<"vext", "8",  v16i8, imm0_15> {
6869  let Inst{11-8} = index{3-0};
6870}
6871def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
6872  let Inst{11-9} = index{2-0};
6873  let Inst{8}    = 0b0;
6874}
6875let Predicates = [HasNEON] in {
6876def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))),
6877          (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>;
6878}
6879
6880def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
6881  let Inst{11-10} = index{1-0};
6882  let Inst{9-8}    = 0b00;
6883}
6884def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
6885  let Inst{11} = index{0};
6886  let Inst{10-8}    = 0b000;
6887}
6888let Predicates = [HasNEON] in {
6889def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))),
6890          (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
6891}
6892
6893//   VTRN     : Vector Transpose
6894
6895def  VTRNd8   : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
6896def  VTRNd16  : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
6897def  VTRNd32  : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
6898
6899def  VTRNq8   : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
6900def  VTRNq16  : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
6901def  VTRNq32  : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
6902
6903//   VUZP     : Vector Unzip (Deinterleave)
6904
6905def  VUZPd8   : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
6906def  VUZPd16  : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
6907// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
6908def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
6909                    (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
6910
6911def  VUZPq8   : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
6912def  VUZPq16  : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
6913def  VUZPq32  : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
6914
6915//   VZIP     : Vector Zip (Interleave)
6916
6917def  VZIPd8   : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
6918def  VZIPd16  : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
6919// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
6920def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
6921                    (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
6922
6923def  VZIPq8   : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
6924def  VZIPq16  : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
6925def  VZIPq32  : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
6926
6927// Vector Table Lookup and Table Extension.
6928
6929//   VTBL     : Vector Table Lookup
6930let DecoderMethod = "DecodeTBLInstruction" in {
6931def  VTBL1
6932  : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
6933        (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
6934        "vtbl", "8", "$Vd, $Vn, $Vm", "",
6935        [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
6936
6937let hasExtraSrcRegAllocReq = 1 in {
6938def  VTBL2
6939  : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
6940        (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
6941        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6942def  VTBL3
6943  : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
6944        (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
6945        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6946def  VTBL4
6947  : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
6948        (ins VecListFourD:$Vn, DPR:$Vm),
6949        NVTBLFrm, IIC_VTB4,
6950        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6951} // hasExtraSrcRegAllocReq = 1
6952
6953def  VTBL3Pseudo
6954  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
6955def  VTBL4Pseudo
6956  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
6957
6958//   VTBX     : Vector Table Extension
6959def  VTBX1
6960  : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
6961        (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
6962        "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
6963        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
6964                               DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
6965let hasExtraSrcRegAllocReq = 1 in {
6966def  VTBX2
6967  : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
6968        (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
6969        "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
6970def  VTBX3
6971  : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
6972        (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
6973        NVTBLFrm, IIC_VTBX3,
6974        "vtbx", "8", "$Vd, $Vn, $Vm",
6975        "$orig = $Vd", []>;
6976def  VTBX4
6977  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
6978        (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
6979        "vtbx", "8", "$Vd, $Vn, $Vm",
6980        "$orig = $Vd", []>;
6981} // hasExtraSrcRegAllocReq = 1
6982
6983def  VTBX3Pseudo
6984  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
6985                IIC_VTBX3, "$orig = $dst", []>;
6986def  VTBX4Pseudo
6987  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
6988                IIC_VTBX4, "$orig = $dst", []>;
6989} // DecoderMethod = "DecodeTBLInstruction"
6990
6991let Predicates = [HasNEON] in {
6992def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
6993          (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
6994                                            v8i8:$Vn1, dsub_1),
6995                       v8i8:$Vm))>;
6996def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
6997                                    v8i8:$Vm)),
6998          (v8i8 (VTBX2 v8i8:$orig,
6999                       (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
7000                                            v8i8:$Vn1, dsub_1),
7001                       v8i8:$Vm))>;
7002
7003def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1,
7004                                    v8i8:$Vn2, v8i8:$Vm)),
7005          (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7006                                                 v8i8:$Vn1, dsub_1,
7007                                                 v8i8:$Vn2, dsub_2,
7008                                                 (v8i8 (IMPLICIT_DEF)), dsub_3),
7009                             v8i8:$Vm))>;
7010def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
7011                                    v8i8:$Vn2, v8i8:$Vm)),
7012          (v8i8 (VTBX3Pseudo v8i8:$orig,
7013                             (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7014                                                 v8i8:$Vn1, dsub_1,
7015                                                 v8i8:$Vn2, dsub_2,
7016                                                 (v8i8 (IMPLICIT_DEF)), dsub_3),
7017                             v8i8:$Vm))>;
7018
7019def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1,
7020                                    v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
7021          (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7022                                                 v8i8:$Vn1, dsub_1,
7023                                                 v8i8:$Vn2, dsub_2,
7024                                                 v8i8:$Vn3, dsub_3),
7025                             v8i8:$Vm))>;
7026def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
7027                                    v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
7028          (v8i8 (VTBX4Pseudo v8i8:$orig,
7029                             (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7030                                                 v8i8:$Vn1, dsub_1,
7031                                                 v8i8:$Vn2, dsub_2,
7032                                                 v8i8:$Vn3, dsub_3),
7033                             v8i8:$Vm))>;
7034}
7035
7036// VRINT      : Vector Rounding
7037multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
7038  let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
7039    def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
7040                      !strconcat("vrint", op), "f32",
7041                      v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
7042      let Inst{9-7} = op9_7;
7043    }
7044    def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
7045                      !strconcat("vrint", op), "f32",
7046                      v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
7047      let Inst{9-7} = op9_7;
7048    }
7049    def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
7050                      !strconcat("vrint", op), "f16",
7051                      v4f16, v4f16, Int>,
7052             Requires<[HasV8, HasNEON, HasFullFP16]> {
7053      let Inst{9-7} = op9_7;
7054    }
7055    def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
7056                      !strconcat("vrint", op), "f16",
7057                      v8f16, v8f16, Int>,
7058             Requires<[HasV8, HasNEON, HasFullFP16]> {
7059      let Inst{9-7} = op9_7;
7060    }
7061  }
7062
7063  def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
7064                  (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
7065  def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
7066                  (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
7067  let Predicates = [HasNEON, HasFullFP16] in {
7068  def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
7069                  (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
7070  def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
7071                  (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
7072  }
7073}
7074
7075defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
7076defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
7077defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
7078defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
7079defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
7080defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
7081
7082// Cryptography instructions
7083let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
7084    DecoderNamespace = "v8Crypto", hasSideEffects = 0 in {
7085  class AES<string op, bit op7, bit op6, SDPatternOperator Int>
7086    : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
7087                 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
7088      Requires<[HasV8, HasCrypto]>;
7089  class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
7090    : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
7091                 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
7092      Requires<[HasV8, HasCrypto]>;
7093  class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
7094              SDPatternOperator Int>
7095    : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
7096                 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
7097      Requires<[HasV8, HasCrypto]>;
7098  class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
7099              SDPatternOperator Int>
7100    : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
7101                 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
7102      Requires<[HasV8, HasCrypto]>;
7103  class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
7104    : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
7105                !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
7106      Requires<[HasV8, HasCrypto]>;
7107}
7108
7109def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
7110def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
7111def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
7112def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
7113
7114def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>;
7115def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
7116def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
7117def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>;
7118def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>;
7119def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>;
7120def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
7121def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
7122def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
7123def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
7124
7125let Predicates = [HasNEON] in {
7126def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
7127          (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG
7128              (SHA1H (SUBREG_TO_REG (i64 0),
7129                                    (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)),
7130                                    ssub_0)),
7131              ssub_0)), GPR)>;
7132
7133def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7134          (SHA1C v4i32:$hash_abcd,
7135                 (SUBREG_TO_REG (i64 0),
7136                                (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7137                                ssub_0),
7138                 v4i32:$wk)>;
7139
7140def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7141          (SHA1M v4i32:$hash_abcd,
7142                 (SUBREG_TO_REG (i64 0),
7143                                (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7144                                ssub_0),
7145                 v4i32:$wk)>;
7146
7147def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7148          (SHA1P v4i32:$hash_abcd,
7149                 (SUBREG_TO_REG (i64 0),
7150                                (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7151                                ssub_0),
7152                 v4i32:$wk)>;
7153}
7154
7155//===----------------------------------------------------------------------===//
7156// NEON instructions for single-precision FP math
7157//===----------------------------------------------------------------------===//
7158
7159class N2VSPat<SDNode OpNode, NeonI Inst>
7160  : NEONFPPat<(f32 (OpNode SPR:$a)),
7161              (EXTRACT_SUBREG
7162               (v2f32 (COPY_TO_REGCLASS (Inst
7163                (INSERT_SUBREG
7164                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7165                 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
7166
7167class N3VSPat<SDNode OpNode, NeonI Inst>
7168  : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
7169              (EXTRACT_SUBREG
7170               (v2f32 (COPY_TO_REGCLASS (Inst
7171                (INSERT_SUBREG
7172                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7173                 SPR:$a, ssub_0),
7174                (INSERT_SUBREG
7175                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7176                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7177
7178class N3VSPatFP16<SDNode OpNode, NeonI Inst>
7179  : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)),
7180              (EXTRACT_SUBREG
7181               (v4f16 (COPY_TO_REGCLASS (Inst
7182                (INSERT_SUBREG
7183                 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
7184                 HPR:$a, ssub_0),
7185                (INSERT_SUBREG
7186                 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
7187                 HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7188
7189class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
7190  : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
7191              (EXTRACT_SUBREG
7192               (v2f32 (COPY_TO_REGCLASS (Inst
7193                (INSERT_SUBREG
7194                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7195                 SPR:$acc, ssub_0),
7196                (INSERT_SUBREG
7197                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7198                 SPR:$a, ssub_0),
7199                (INSERT_SUBREG
7200                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7201                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7202
7203class NVCVTIFPat<SDNode OpNode, NeonI Inst>
7204  : NEONFPPat<(f32 (OpNode GPR:$a)),
7205              (f32 (EXTRACT_SUBREG
7206                     (v2f32 (Inst
7207                       (INSERT_SUBREG
7208                         (v2f32 (IMPLICIT_DEF)),
7209                         (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
7210                     ssub_0))>;
7211class NVCVTFIPat<SDNode OpNode, NeonI Inst>
7212  : NEONFPPat<(i32 (OpNode SPR:$a)),
7213              (i32 (EXTRACT_SUBREG
7214                     (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
7215                                                 SPR:$a, ssub_0))),
7216                     ssub_0))>;
7217
7218def : N3VSPat<fadd, VADDfd>;
7219def : N3VSPat<fsub, VSUBfd>;
7220def : N3VSPat<fmul, VMULfd>;
7221def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
7222      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
7223def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
7224      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
7225def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
7226      Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
7227def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
7228      Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
7229def : N2VSPat<fabs, VABSfd>;
7230def : N2VSPat<fneg, VNEGfd>;
7231def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>;
7232def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>;
7233def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>;
7234def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>;
7235def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
7236def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
7237def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
7238def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
7239
7240// NEON doesn't have any f64 conversions, so provide patterns to make
7241// sure the VFP conversions match when extracting from a vector.
7242def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
7243             (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7244def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
7245             (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7246def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
7247             (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7248def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
7249             (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7250
7251
7252// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
7253def : Pat<(f32 (bitconvert GPR:$a)),
7254          (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
7255        Requires<[HasNEON, DontUseVMOVSR]>;
7256def : Pat<(arm_vmovsr GPR:$a),
7257          (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
7258        Requires<[HasNEON, DontUseVMOVSR]>;
7259
7260//===----------------------------------------------------------------------===//
7261// Non-Instruction Patterns or Endiness - Revert Patterns
7262//===----------------------------------------------------------------------===//
7263
7264// bit_convert
7265// 64 bit conversions
7266let Predicates = [HasNEON] in {
7267def : Pat<(f64   (bitconvert (v1i64 DPR:$src))), (f64   DPR:$src)>;
7268def : Pat<(v1i64 (bitconvert (f64   DPR:$src))), (v1i64 DPR:$src)>;
7269
7270def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
7271def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
7272
7273def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16  DPR:$src)>;
7274def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16  DPR:$src)>;
7275
7276// 128 bit conversions
7277def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
7278def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
7279
7280def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
7281def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
7282
7283def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16  QPR:$src)>;
7284def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16  QPR:$src)>;
7285}
7286
7287let Predicates = [IsLE,HasNEON] in {
7288  // 64 bit conversions
7289  def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (f64   DPR:$src)>;
7290  def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (f64   DPR:$src)>;
7291  def : Pat<(f64   (bitconvert (v4f16 DPR:$src))), (f64   DPR:$src)>;
7292  def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (f64   DPR:$src)>;
7293  def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (f64   DPR:$src)>;
7294
7295  def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
7296  def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
7297  def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>;
7298  def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
7299  def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (v1i64 DPR:$src)>;
7300
7301  def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (v2f32 DPR:$src)>;
7302  def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
7303  def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>;
7304  def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
7305  def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (v2f32 DPR:$src)>;
7306
7307  def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (v2i32 DPR:$src)>;
7308  def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
7309  def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>;
7310  def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
7311  def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (v2i32 DPR:$src)>;
7312
7313  def : Pat<(v4f16 (bitconvert (f64   DPR:$src))), (v4f16 DPR:$src)>;
7314  def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>;
7315  def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>;
7316  def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>;
7317  def : Pat<(v4f16 (bitconvert (v8i8  DPR:$src))), (v4f16 DPR:$src)>;
7318
7319  def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (v4i16 DPR:$src)>;
7320  def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
7321  def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
7322  def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
7323  def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (v4i16 DPR:$src)>;
7324
7325  def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (v8i8  DPR:$src)>;
7326  def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (v8i8  DPR:$src)>;
7327  def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (v8i8  DPR:$src)>;
7328  def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (v8i8  DPR:$src)>;
7329  def : Pat<(v8i8  (bitconvert (v4f16 DPR:$src))), (v8i8  DPR:$src)>;
7330  def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (v8i8  DPR:$src)>;
7331
7332  // 128 bit conversions
7333  def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
7334  def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
7335  def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
7336  def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
7337  def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
7338
7339  def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
7340  def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
7341  def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>;
7342  def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
7343  def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
7344
7345  def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
7346  def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
7347  def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>;
7348  def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
7349  def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
7350
7351  def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
7352  def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
7353  def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>;
7354  def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
7355  def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
7356
7357  def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
7358  def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>;
7359  def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>;
7360  def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>;
7361  def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>;
7362
7363  def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
7364  def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
7365  def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
7366  def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
7367  def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
7368
7369  def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
7370  def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
7371  def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
7372  def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
7373  def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>;
7374  def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
7375}
7376
7377let Predicates = [IsBE,HasNEON] in {
7378  // 64 bit conversions
7379  def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
7380  def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
7381  def : Pat<(f64   (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
7382  def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
7383  def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (VREV64d8  DPR:$src)>;
7384
7385  def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
7386  def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
7387  def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
7388  def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
7389  def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (VREV64d8  DPR:$src)>;
7390
7391  def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (VREV64d32 DPR:$src)>;
7392  def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
7393  def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
7394  def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
7395  def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (VREV32d8  DPR:$src)>;
7396
7397  def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (VREV64d32 DPR:$src)>;
7398  def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
7399  def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
7400  def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
7401  def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (VREV32d8  DPR:$src)>;
7402
7403  def : Pat<(v4f16 (bitconvert (f64   DPR:$src))), (VREV64d16 DPR:$src)>;
7404  def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
7405  def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
7406  def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
7407  def : Pat<(v4f16 (bitconvert (v8i8  DPR:$src))), (VREV16d8  DPR:$src)>;
7408
7409  def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (VREV64d16 DPR:$src)>;
7410  def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
7411  def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
7412  def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
7413  def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (VREV16d8  DPR:$src)>;
7414
7415  def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (VREV64d8  DPR:$src)>;
7416  def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (VREV64d8  DPR:$src)>;
7417  def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (VREV32d8  DPR:$src)>;
7418  def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (VREV32d8  DPR:$src)>;
7419  def : Pat<(v8i8  (bitconvert (v4f16 DPR:$src))), (VREV16d8  DPR:$src)>;
7420  def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (VREV16d8  DPR:$src)>;
7421
7422  // 128 bit conversions
7423  def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
7424  def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
7425  def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
7426  def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
7427  def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8  QPR:$src)>;
7428
7429  def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
7430  def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
7431  def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
7432  def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
7433  def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8  QPR:$src)>;
7434
7435  def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
7436  def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
7437  def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
7438  def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
7439  def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8  QPR:$src)>;
7440
7441  def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
7442  def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
7443  def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
7444  def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
7445  def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8  QPR:$src)>;
7446
7447  def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
7448  def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
7449  def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
7450  def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
7451  def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8  QPR:$src)>;
7452
7453  def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
7454  def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
7455  def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
7456  def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
7457  def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8  QPR:$src)>;
7458
7459  def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8  QPR:$src)>;
7460  def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8  QPR:$src)>;
7461  def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8  QPR:$src)>;
7462  def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8  QPR:$src)>;
7463  def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8  QPR:$src)>;
7464  def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8  QPR:$src)>;
7465}
7466
7467// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
7468let Predicates = [IsBE,HasNEON] in {
7469def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
7470          (VREV64q8 (VLD1q8 addrmode6:$addr))>;
7471def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
7472          (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>;
7473def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
7474          (VREV64q16 (VLD1q16 addrmode6:$addr))>;
7475def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
7476          (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>;
7477}
7478
7479// Fold extracting an element out of a v2i32 into a vfp register.
7480def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
7481          (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>,
7482      Requires<[HasNEON]>;
7483
7484// Vector lengthening move with load, matching extending loads.
7485
7486// extload, zextload and sextload for a standard lengthening load. Example:
7487// Lengthen_Single<"8", "i16", "8"> =
7488//     Pat<(v8i16 (extloadvi8 addrmode6:$addr))
7489//         (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
7490//                              (f64 (IMPLICIT_DEF)), (i32 0)))>;
7491multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
7492  let AddedComplexity = 10 in {
7493  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7494                    (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
7495                  (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
7496                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
7497             Requires<[HasNEON]>;
7498
7499  def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7500                  (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
7501                (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
7502                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
7503           Requires<[HasNEON]>;
7504
7505  def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7506                  (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
7507                (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
7508                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
7509           Requires<[HasNEON]>;
7510  }
7511}
7512
7513// extload, zextload and sextload for a lengthening load which only uses
7514// half the lanes available. Example:
7515// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
7516//     Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
7517//         (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
7518//                                      (f64 (IMPLICIT_DEF)), (i32 0))),
7519//                         dsub_0)>;
7520multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
7521                               string InsnLanes, string InsnTy> {
7522  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7523                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7524       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7525         (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7526         dsub_0)>,
7527             Requires<[HasNEON]>;
7528  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7529                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7530       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7531         (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7532         dsub_0)>,
7533             Requires<[HasNEON]>;
7534  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7535                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7536       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
7537         (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7538         dsub_0)>,
7539             Requires<[HasNEON]>;
7540}
7541
7542// The following class definition is basically a copy of the
7543// Lengthen_HalfSingle definition above, however with an additional parameter
7544// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
7545// data loaded by VLD1LN into proper vector format in big endian mode.
7546multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7547                               string InsnLanes, string InsnTy, string RevLanes> {
7548  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7549                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7550       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7551         (!cast<Instruction>("VREV32d" # RevLanes)
7552           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7553         dsub_0)>,
7554             Requires<[HasNEON]>;
7555  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7556                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7557       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7558         (!cast<Instruction>("VREV32d" # RevLanes)
7559           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7560         dsub_0)>,
7561             Requires<[HasNEON]>;
7562  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7563                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7564       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
7565         (!cast<Instruction>("VREV32d" # RevLanes)
7566           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7567         dsub_0)>,
7568             Requires<[HasNEON]>;
7569}
7570
7571// extload, zextload and sextload for a lengthening load followed by another
7572// lengthening load, to quadruple the initial length.
7573//
7574// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
7575//     Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
7576//         (EXTRACT_SUBREG (VMOVLuv4i32
7577//           (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
7578//                                                   (f64 (IMPLICIT_DEF)),
7579//                                                   (i32 0))),
7580//                           dsub_0)),
7581//           dsub_0)>;
7582multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
7583                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7584                           string Insn2Ty> {
7585  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7586                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7587         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7588           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7589             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7590             dsub_0))>,
7591             Requires<[HasNEON]>;
7592  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7593                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7594         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7595           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7596             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7597             dsub_0))>,
7598             Requires<[HasNEON]>;
7599  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7600                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7601         (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7602           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7603             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7604             dsub_0))>,
7605             Requires<[HasNEON]>;
7606}
7607
7608// The following class definition is basically a copy of the
7609// Lengthen_Double definition above, however with an additional parameter
7610// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
7611// data loaded by VLD1LN into proper vector format in big endian mode.
7612multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7613                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7614                           string Insn2Ty, string RevLanes> {
7615  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7616                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7617         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7618           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7619            (!cast<Instruction>("VREV32d" # RevLanes)
7620             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7621             dsub_0))>,
7622             Requires<[HasNEON]>;
7623  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7624                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7625         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7626           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7627            (!cast<Instruction>("VREV32d" # RevLanes)
7628             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7629             dsub_0))>,
7630             Requires<[HasNEON]>;
7631  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7632                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7633         (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7634           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7635            (!cast<Instruction>("VREV32d" # RevLanes)
7636             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7637             dsub_0))>,
7638             Requires<[HasNEON]>;
7639}
7640
7641// extload, zextload and sextload for a lengthening load followed by another
7642// lengthening load, to quadruple the initial length, but which ends up only
7643// requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
7644//
7645// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
7646// Pat<(v2i32 (extloadvi8 addrmode6:$addr))
7647//     (EXTRACT_SUBREG (VMOVLuv4i32
7648//       (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
7649//                                               (f64 (IMPLICIT_DEF)), (i32 0))),
7650//                       dsub_0)),
7651//       dsub_0)>;
7652multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
7653                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7654                           string Insn2Ty> {
7655  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7656                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
7657         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7658           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7659             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7660             dsub_0)),
7661          dsub_0)>,
7662             Requires<[HasNEON]>;
7663  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7664                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
7665         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7666           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7667             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7668             dsub_0)),
7669          dsub_0)>,
7670              Requires<[HasNEON]>;
7671  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7672                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
7673         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7674           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7675             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7676             dsub_0)),
7677          dsub_0)>,
7678             Requires<[HasNEON]>;
7679}
7680
7681// The following class definition is basically a copy of the
7682// Lengthen_HalfDouble definition above, however with an additional VREV16d8
7683// instruction to convert data loaded by VLD1LN into proper vector format
7684// in big endian mode.
7685multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7686                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7687                           string Insn2Ty> {
7688  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7689                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
7690         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7691           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7692            (!cast<Instruction>("VREV16d8")
7693             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7694             dsub_0)),
7695          dsub_0)>,
7696             Requires<[HasNEON]>;
7697  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7698                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
7699         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7700           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7701            (!cast<Instruction>("VREV16d8")
7702             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7703             dsub_0)),
7704          dsub_0)>,
7705             Requires<[HasNEON]>;
7706  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7707                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
7708         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7709           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7710            (!cast<Instruction>("VREV16d8")
7711             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7712             dsub_0)),
7713          dsub_0)>,
7714             Requires<[HasNEON]>;
7715}
7716
7717defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
7718defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
7719defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
7720
7721let Predicates = [HasNEON,IsLE] in {
7722  defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
7723  defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
7724
7725  // Double lengthening - v4i8 -> v4i16 -> v4i32
7726  defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
7727  // v2i8 -> v2i16 -> v2i32
7728  defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
7729  // v2i16 -> v2i32 -> v2i64
7730  defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
7731}
7732
7733let Predicates = [HasNEON,IsBE] in {
7734  defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
7735  defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
7736
7737  // Double lengthening - v4i8 -> v4i16 -> v4i32
7738  defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
7739  // v2i8 -> v2i16 -> v2i32
7740  defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
7741  // v2i16 -> v2i32 -> v2i64
7742  defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
7743}
7744
7745// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
7746let Predicates = [HasNEON,IsLE] in {
7747  def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
7748        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7749           (VLD1LNd16 addrmode6:$addr,
7750                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7751  def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
7752        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7753           (VLD1LNd16 addrmode6:$addr,
7754                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7755  def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
7756        (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
7757           (VLD1LNd16 addrmode6:$addr,
7758                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7759}
7760// The following patterns are basically a copy of the patterns above,
7761// however with an additional VREV16d instruction to convert data
7762// loaded by VLD1LN into proper vector format in big endian mode.
7763let Predicates = [HasNEON,IsBE] in {
7764  def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
7765        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7766           (!cast<Instruction>("VREV16d8")
7767             (VLD1LNd16 addrmode6:$addr,
7768                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7769  def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
7770        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7771           (!cast<Instruction>("VREV16d8")
7772             (VLD1LNd16 addrmode6:$addr,
7773                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7774  def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
7775        (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
7776           (!cast<Instruction>("VREV16d8")
7777             (VLD1LNd16 addrmode6:$addr,
7778                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7779}
7780
7781let Predicates = [HasNEON] in {
7782def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)),
7783          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7784def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)),
7785          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7786def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)),
7787          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7788def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
7789          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7790def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
7791          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7792def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)),
7793          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7794}
7795
7796//===----------------------------------------------------------------------===//
7797// Assembler aliases
7798//
7799
7800def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
7801                    (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
7802def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
7803                    (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
7804
7805// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
7806defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
7807                         (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7808defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
7809                         (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7810defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
7811                         (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7812defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
7813                         (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7814defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
7815                         (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7816defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
7817                         (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7818defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
7819                         (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7820defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
7821                         (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7822// ... two-operand aliases
7823defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
7824                         (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7825defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
7826                         (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7827defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
7828                         (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7829defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
7830                         (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7831defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
7832                         (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7833defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
7834                         (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7835// ... immediates
7836def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
7837                    (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
7838def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
7839                    (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
7840def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
7841                    (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
7842def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
7843                    (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
7844
7845
7846// VLD1 single-lane pseudo-instructions. These need special handling for
7847// the lane index that an InstAlias can't handle, so we use these instead.
7848def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
7849                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7850                      pred:$p)>;
7851def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
7852                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7853                      pred:$p)>;
7854def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
7855                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7856                      pred:$p)>;
7857
7858def VLD1LNdWB_fixed_Asm_8 :
7859        NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
7860                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7861                      pred:$p)>;
7862def VLD1LNdWB_fixed_Asm_16 :
7863        NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
7864                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7865                      pred:$p)>;
7866def VLD1LNdWB_fixed_Asm_32 :
7867        NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
7868                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7869                      pred:$p)>;
7870def VLD1LNdWB_register_Asm_8 :
7871        NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
7872                  (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7873                       rGPR:$Rm, pred:$p)>;
7874def VLD1LNdWB_register_Asm_16 :
7875        NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
7876                  (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7877                       rGPR:$Rm, pred:$p)>;
7878def VLD1LNdWB_register_Asm_32 :
7879        NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
7880                  (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7881                       rGPR:$Rm, pred:$p)>;
7882
7883
7884// VST1 single-lane pseudo-instructions. These need special handling for
7885// the lane index that an InstAlias can't handle, so we use these instead.
7886def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
7887                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7888                      pred:$p)>;
7889def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
7890                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7891                      pred:$p)>;
7892def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
7893                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7894                      pred:$p)>;
7895
7896def VST1LNdWB_fixed_Asm_8 :
7897        NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
7898                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7899                      pred:$p)>;
7900def VST1LNdWB_fixed_Asm_16 :
7901        NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
7902                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7903                      pred:$p)>;
7904def VST1LNdWB_fixed_Asm_32 :
7905        NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
7906                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7907                      pred:$p)>;
7908def VST1LNdWB_register_Asm_8 :
7909        NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
7910                  (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7911                       rGPR:$Rm, pred:$p)>;
7912def VST1LNdWB_register_Asm_16 :
7913        NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
7914                  (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7915                       rGPR:$Rm, pred:$p)>;
7916def VST1LNdWB_register_Asm_32 :
7917        NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
7918                  (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7919                       rGPR:$Rm, pred:$p)>;
7920
7921// VLD2 single-lane pseudo-instructions. These need special handling for
7922// the lane index that an InstAlias can't handle, so we use these instead.
7923def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
7924                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7925                  pred:$p)>;
7926def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
7927                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7928                      pred:$p)>;
7929def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
7930                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>;
7931def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
7932                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7933                      pred:$p)>;
7934def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
7935                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7936                      pred:$p)>;
7937
7938def VLD2LNdWB_fixed_Asm_8 :
7939        NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
7940                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7941                      pred:$p)>;
7942def VLD2LNdWB_fixed_Asm_16 :
7943        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
7944                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7945                      pred:$p)>;
7946def VLD2LNdWB_fixed_Asm_32 :
7947        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
7948                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7949                      pred:$p)>;
7950def VLD2LNqWB_fixed_Asm_16 :
7951        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
7952                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7953                      pred:$p)>;
7954def VLD2LNqWB_fixed_Asm_32 :
7955        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
7956                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7957                      pred:$p)>;
7958def VLD2LNdWB_register_Asm_8 :
7959        NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
7960                  (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7961                       rGPR:$Rm, pred:$p)>;
7962def VLD2LNdWB_register_Asm_16 :
7963        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
7964                  (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7965                       rGPR:$Rm, pred:$p)>;
7966def VLD2LNdWB_register_Asm_32 :
7967        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
7968                  (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7969                       rGPR:$Rm, pred:$p)>;
7970def VLD2LNqWB_register_Asm_16 :
7971        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
7972                  (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7973                       rGPR:$Rm, pred:$p)>;
7974def VLD2LNqWB_register_Asm_32 :
7975        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
7976                  (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7977                       rGPR:$Rm, pred:$p)>;
7978
7979
7980// VST2 single-lane pseudo-instructions. These need special handling for
7981// the lane index that an InstAlias can't handle, so we use these instead.
7982def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
7983                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7984                      pred:$p)>;
7985def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
7986                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7987                      pred:$p)>;
7988def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
7989                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7990                      pred:$p)>;
7991def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
7992                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7993                      pred:$p)>;
7994def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
7995                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7996                      pred:$p)>;
7997
7998def VST2LNdWB_fixed_Asm_8 :
7999        NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
8000                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8001                      pred:$p)>;
8002def VST2LNdWB_fixed_Asm_16 :
8003        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
8004                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8005                      pred:$p)>;
8006def VST2LNdWB_fixed_Asm_32 :
8007        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
8008                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8009                      pred:$p)>;
8010def VST2LNqWB_fixed_Asm_16 :
8011        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
8012                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8013                      pred:$p)>;
8014def VST2LNqWB_fixed_Asm_32 :
8015        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
8016                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8017                      pred:$p)>;
8018def VST2LNdWB_register_Asm_8 :
8019        NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
8020                  (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8021                       rGPR:$Rm, pred:$p)>;
8022def VST2LNdWB_register_Asm_16 :
8023        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
8024                  (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8025                       rGPR:$Rm, pred:$p)>;
8026def VST2LNdWB_register_Asm_32 :
8027        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
8028                  (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8029                       rGPR:$Rm, pred:$p)>;
8030def VST2LNqWB_register_Asm_16 :
8031        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
8032                  (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8033                       rGPR:$Rm, pred:$p)>;
8034def VST2LNqWB_register_Asm_32 :
8035        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
8036                  (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8037                       rGPR:$Rm, pred:$p)>;
8038
8039// VLD3 all-lanes pseudo-instructions. These need special handling for
8040// the lane index that an InstAlias can't handle, so we use these instead.
8041def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8042               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8043                    pred:$p)>;
8044def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8045               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8046                    pred:$p)>;
8047def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8048               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8049                    pred:$p)>;
8050def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8051               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8052                    pred:$p)>;
8053def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8054               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8055                    pred:$p)>;
8056def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8057               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8058                    pred:$p)>;
8059
8060def VLD3DUPdWB_fixed_Asm_8 :
8061        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8062               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8063                    pred:$p)>;
8064def VLD3DUPdWB_fixed_Asm_16 :
8065        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8066               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8067                    pred:$p)>;
8068def VLD3DUPdWB_fixed_Asm_32 :
8069        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8070               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8071                    pred:$p)>;
8072def VLD3DUPqWB_fixed_Asm_8 :
8073        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8074               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8075                    pred:$p)>;
8076def VLD3DUPqWB_fixed_Asm_16 :
8077        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8078               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8079                    pred:$p)>;
8080def VLD3DUPqWB_fixed_Asm_32 :
8081        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8082               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8083                    pred:$p)>;
8084def VLD3DUPdWB_register_Asm_8 :
8085        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8086                  (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8087                       rGPR:$Rm, pred:$p)>;
8088def VLD3DUPdWB_register_Asm_16 :
8089        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8090                  (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8091                       rGPR:$Rm, pred:$p)>;
8092def VLD3DUPdWB_register_Asm_32 :
8093        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8094                  (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8095                       rGPR:$Rm, pred:$p)>;
8096def VLD3DUPqWB_register_Asm_8 :
8097        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8098                  (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8099                       rGPR:$Rm, pred:$p)>;
8100def VLD3DUPqWB_register_Asm_16 :
8101        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8102                  (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8103                       rGPR:$Rm, pred:$p)>;
8104def VLD3DUPqWB_register_Asm_32 :
8105        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8106                  (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8107                       rGPR:$Rm, pred:$p)>;
8108
8109
8110// VLD3 single-lane pseudo-instructions. These need special handling for
8111// the lane index that an InstAlias can't handle, so we use these instead.
8112def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8113               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8114                    pred:$p)>;
8115def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8116               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8117                    pred:$p)>;
8118def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8119               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8120                    pred:$p)>;
8121def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8122               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8123                    pred:$p)>;
8124def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8125               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8126                    pred:$p)>;
8127
8128def VLD3LNdWB_fixed_Asm_8 :
8129        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8130               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8131                    pred:$p)>;
8132def VLD3LNdWB_fixed_Asm_16 :
8133        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8134               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8135                    pred:$p)>;
8136def VLD3LNdWB_fixed_Asm_32 :
8137        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8138               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8139                    pred:$p)>;
8140def VLD3LNqWB_fixed_Asm_16 :
8141        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8142               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8143                    pred:$p)>;
8144def VLD3LNqWB_fixed_Asm_32 :
8145        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8146               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8147                    pred:$p)>;
8148def VLD3LNdWB_register_Asm_8 :
8149        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8150                  (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8151                       rGPR:$Rm, pred:$p)>;
8152def VLD3LNdWB_register_Asm_16 :
8153        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8154                  (ins VecListThreeDHWordIndexed:$list,
8155                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8156def VLD3LNdWB_register_Asm_32 :
8157        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8158                  (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8159                       rGPR:$Rm, pred:$p)>;
8160def VLD3LNqWB_register_Asm_16 :
8161        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8162                  (ins VecListThreeQHWordIndexed:$list,
8163                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8164def VLD3LNqWB_register_Asm_32 :
8165        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8166                  (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8167                       rGPR:$Rm, pred:$p)>;
8168
8169// VLD3 multiple structure pseudo-instructions. These need special handling for
8170// the vector operands that the normal instructions don't yet model.
8171// FIXME: Remove these when the register classes and instructions are updated.
8172def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8173               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8174def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8175               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8176def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8177               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8178def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8179               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8180def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8181               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8182def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8183               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8184
8185def VLD3dWB_fixed_Asm_8 :
8186        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8187               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8188def VLD3dWB_fixed_Asm_16 :
8189        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8190               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8191def VLD3dWB_fixed_Asm_32 :
8192        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8193               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8194def VLD3qWB_fixed_Asm_8 :
8195        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8196               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8197def VLD3qWB_fixed_Asm_16 :
8198        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8199               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8200def VLD3qWB_fixed_Asm_32 :
8201        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8202               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8203def VLD3dWB_register_Asm_8 :
8204        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8205                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8206                       rGPR:$Rm, pred:$p)>;
8207def VLD3dWB_register_Asm_16 :
8208        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8209                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8210                       rGPR:$Rm, pred:$p)>;
8211def VLD3dWB_register_Asm_32 :
8212        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8213                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8214                       rGPR:$Rm, pred:$p)>;
8215def VLD3qWB_register_Asm_8 :
8216        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8217                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8218                       rGPR:$Rm, pred:$p)>;
8219def VLD3qWB_register_Asm_16 :
8220        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8221                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8222                       rGPR:$Rm, pred:$p)>;
8223def VLD3qWB_register_Asm_32 :
8224        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8225                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8226                       rGPR:$Rm, pred:$p)>;
8227
8228// VST3 single-lane pseudo-instructions. These need special handling for
8229// the lane index that an InstAlias can't handle, so we use these instead.
8230def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8231               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8232                    pred:$p)>;
8233def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8234               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8235                    pred:$p)>;
8236def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8237               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8238                    pred:$p)>;
8239def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8240               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8241                    pred:$p)>;
8242def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8243               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8244                    pred:$p)>;
8245
8246def VST3LNdWB_fixed_Asm_8 :
8247        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8248               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8249                    pred:$p)>;
8250def VST3LNdWB_fixed_Asm_16 :
8251        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8252               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8253                    pred:$p)>;
8254def VST3LNdWB_fixed_Asm_32 :
8255        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8256               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8257                    pred:$p)>;
8258def VST3LNqWB_fixed_Asm_16 :
8259        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8260               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8261                    pred:$p)>;
8262def VST3LNqWB_fixed_Asm_32 :
8263        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8264               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8265                    pred:$p)>;
8266def VST3LNdWB_register_Asm_8 :
8267        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8268                  (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8269                       rGPR:$Rm, pred:$p)>;
8270def VST3LNdWB_register_Asm_16 :
8271        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8272                  (ins VecListThreeDHWordIndexed:$list,
8273                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8274def VST3LNdWB_register_Asm_32 :
8275        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8276                  (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8277                       rGPR:$Rm, pred:$p)>;
8278def VST3LNqWB_register_Asm_16 :
8279        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8280                  (ins VecListThreeQHWordIndexed:$list,
8281                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8282def VST3LNqWB_register_Asm_32 :
8283        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8284                  (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8285                       rGPR:$Rm, pred:$p)>;
8286
8287
8288// VST3 multiple structure pseudo-instructions. These need special handling for
8289// the vector operands that the normal instructions don't yet model.
8290// FIXME: Remove these when the register classes and instructions are updated.
8291def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8292               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8293def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8294               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8295def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8296               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8297def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8298               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8299def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8300               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8301def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8302               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8303
8304def VST3dWB_fixed_Asm_8 :
8305        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8306               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8307def VST3dWB_fixed_Asm_16 :
8308        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8309               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8310def VST3dWB_fixed_Asm_32 :
8311        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8312               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8313def VST3qWB_fixed_Asm_8 :
8314        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8315               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8316def VST3qWB_fixed_Asm_16 :
8317        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8318               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8319def VST3qWB_fixed_Asm_32 :
8320        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8321               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8322def VST3dWB_register_Asm_8 :
8323        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8324                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8325                       rGPR:$Rm, pred:$p)>;
8326def VST3dWB_register_Asm_16 :
8327        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8328                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8329                       rGPR:$Rm, pred:$p)>;
8330def VST3dWB_register_Asm_32 :
8331        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8332                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8333                       rGPR:$Rm, pred:$p)>;
8334def VST3qWB_register_Asm_8 :
8335        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8336                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8337                       rGPR:$Rm, pred:$p)>;
8338def VST3qWB_register_Asm_16 :
8339        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8340                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8341                       rGPR:$Rm, pred:$p)>;
8342def VST3qWB_register_Asm_32 :
8343        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8344                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8345                       rGPR:$Rm, pred:$p)>;
8346
8347// VLD4 all-lanes pseudo-instructions. These need special handling for
8348// the lane index that an InstAlias can't handle, so we use these instead.
8349def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8350               (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8351                    pred:$p)>;
8352def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8353               (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8354                    pred:$p)>;
8355def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8356               (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
8357                    pred:$p)>;
8358def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8359               (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8360                    pred:$p)>;
8361def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8362               (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8363                    pred:$p)>;
8364def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8365               (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
8366                    pred:$p)>;
8367
8368def VLD4DUPdWB_fixed_Asm_8 :
8369        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8370               (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8371                    pred:$p)>;
8372def VLD4DUPdWB_fixed_Asm_16 :
8373        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8374               (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8375                    pred:$p)>;
8376def VLD4DUPdWB_fixed_Asm_32 :
8377        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8378               (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
8379                    pred:$p)>;
8380def VLD4DUPqWB_fixed_Asm_8 :
8381        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8382               (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8383                    pred:$p)>;
8384def VLD4DUPqWB_fixed_Asm_16 :
8385        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8386               (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8387                    pred:$p)>;
8388def VLD4DUPqWB_fixed_Asm_32 :
8389        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8390               (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
8391                    pred:$p)>;
8392def VLD4DUPdWB_register_Asm_8 :
8393        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8394                  (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8395                       rGPR:$Rm, pred:$p)>;
8396def VLD4DUPdWB_register_Asm_16 :
8397        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8398                  (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8399                       rGPR:$Rm, pred:$p)>;
8400def VLD4DUPdWB_register_Asm_32 :
8401        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8402                  (ins VecListFourDAllLanes:$list,
8403                       addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
8404def VLD4DUPqWB_register_Asm_8 :
8405        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8406                  (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8407                       rGPR:$Rm, pred:$p)>;
8408def VLD4DUPqWB_register_Asm_16 :
8409        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8410                  (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8411                       rGPR:$Rm, pred:$p)>;
8412def VLD4DUPqWB_register_Asm_32 :
8413        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8414                  (ins VecListFourQAllLanes:$list,
8415                       addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
8416
8417
8418// VLD4 single-lane pseudo-instructions. These need special handling for
8419// the lane index that an InstAlias can't handle, so we use these instead.
8420def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8421               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8422                    pred:$p)>;
8423def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8424               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8425                    pred:$p)>;
8426def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8427               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8428                    pred:$p)>;
8429def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8430               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8431                    pred:$p)>;
8432def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8433               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8434                    pred:$p)>;
8435
8436def VLD4LNdWB_fixed_Asm_8 :
8437        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8438               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8439                    pred:$p)>;
8440def VLD4LNdWB_fixed_Asm_16 :
8441        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8442               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8443                    pred:$p)>;
8444def VLD4LNdWB_fixed_Asm_32 :
8445        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8446               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8447                    pred:$p)>;
8448def VLD4LNqWB_fixed_Asm_16 :
8449        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8450               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8451                    pred:$p)>;
8452def VLD4LNqWB_fixed_Asm_32 :
8453        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8454               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8455                    pred:$p)>;
8456def VLD4LNdWB_register_Asm_8 :
8457        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8458                  (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8459                       rGPR:$Rm, pred:$p)>;
8460def VLD4LNdWB_register_Asm_16 :
8461        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8462                  (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8463                       rGPR:$Rm, pred:$p)>;
8464def VLD4LNdWB_register_Asm_32 :
8465        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8466                  (ins VecListFourDWordIndexed:$list,
8467                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8468def VLD4LNqWB_register_Asm_16 :
8469        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8470                  (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8471                       rGPR:$Rm, pred:$p)>;
8472def VLD4LNqWB_register_Asm_32 :
8473        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8474                  (ins VecListFourQWordIndexed:$list,
8475                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8476
8477
8478
8479// VLD4 multiple structure pseudo-instructions. These need special handling for
8480// the vector operands that the normal instructions don't yet model.
8481// FIXME: Remove these when the register classes and instructions are updated.
8482def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8483               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8484                pred:$p)>;
8485def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8486               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8487                pred:$p)>;
8488def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8489               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8490                pred:$p)>;
8491def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8492               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8493                pred:$p)>;
8494def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8495               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8496                pred:$p)>;
8497def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8498               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8499                pred:$p)>;
8500
8501def VLD4dWB_fixed_Asm_8 :
8502        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8503               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8504                pred:$p)>;
8505def VLD4dWB_fixed_Asm_16 :
8506        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8507               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8508                pred:$p)>;
8509def VLD4dWB_fixed_Asm_32 :
8510        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8511               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8512                pred:$p)>;
8513def VLD4qWB_fixed_Asm_8 :
8514        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8515               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8516                pred:$p)>;
8517def VLD4qWB_fixed_Asm_16 :
8518        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8519               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8520                pred:$p)>;
8521def VLD4qWB_fixed_Asm_32 :
8522        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8523               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8524                pred:$p)>;
8525def VLD4dWB_register_Asm_8 :
8526        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8527                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8528                       rGPR:$Rm, pred:$p)>;
8529def VLD4dWB_register_Asm_16 :
8530        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8531                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8532                       rGPR:$Rm, pred:$p)>;
8533def VLD4dWB_register_Asm_32 :
8534        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8535                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8536                       rGPR:$Rm, pred:$p)>;
8537def VLD4qWB_register_Asm_8 :
8538        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8539                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8540                       rGPR:$Rm, pred:$p)>;
8541def VLD4qWB_register_Asm_16 :
8542        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8543                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8544                       rGPR:$Rm, pred:$p)>;
8545def VLD4qWB_register_Asm_32 :
8546        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8547                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8548                       rGPR:$Rm, pred:$p)>;
8549
8550// VST4 single-lane pseudo-instructions. These need special handling for
8551// the lane index that an InstAlias can't handle, so we use these instead.
8552def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8553               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8554                    pred:$p)>;
8555def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8556               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8557                    pred:$p)>;
8558def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8559               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8560                    pred:$p)>;
8561def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8562               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8563                    pred:$p)>;
8564def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8565               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8566                    pred:$p)>;
8567
8568def VST4LNdWB_fixed_Asm_8 :
8569        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8570               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8571                    pred:$p)>;
8572def VST4LNdWB_fixed_Asm_16 :
8573        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8574               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8575                    pred:$p)>;
8576def VST4LNdWB_fixed_Asm_32 :
8577        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8578               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8579                    pred:$p)>;
8580def VST4LNqWB_fixed_Asm_16 :
8581        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8582               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8583                    pred:$p)>;
8584def VST4LNqWB_fixed_Asm_32 :
8585        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8586               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8587                    pred:$p)>;
8588def VST4LNdWB_register_Asm_8 :
8589        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8590                  (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8591                       rGPR:$Rm, pred:$p)>;
8592def VST4LNdWB_register_Asm_16 :
8593        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8594                  (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8595                       rGPR:$Rm, pred:$p)>;
8596def VST4LNdWB_register_Asm_32 :
8597        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8598                  (ins VecListFourDWordIndexed:$list,
8599                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8600def VST4LNqWB_register_Asm_16 :
8601        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8602                  (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8603                       rGPR:$Rm, pred:$p)>;
8604def VST4LNqWB_register_Asm_32 :
8605        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8606                  (ins VecListFourQWordIndexed:$list,
8607                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8608
8609
8610// VST4 multiple structure pseudo-instructions. These need special handling for
8611// the vector operands that the normal instructions don't yet model.
8612// FIXME: Remove these when the register classes and instructions are updated.
8613def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8614               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8615                    pred:$p)>;
8616def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8617               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8618                    pred:$p)>;
8619def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8620               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8621                    pred:$p)>;
8622def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8623               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8624                    pred:$p)>;
8625def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8626               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8627                    pred:$p)>;
8628def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8629               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8630                    pred:$p)>;
8631
8632def VST4dWB_fixed_Asm_8 :
8633        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8634               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8635                    pred:$p)>;
8636def VST4dWB_fixed_Asm_16 :
8637        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8638               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8639                    pred:$p)>;
8640def VST4dWB_fixed_Asm_32 :
8641        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8642               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8643                    pred:$p)>;
8644def VST4qWB_fixed_Asm_8 :
8645        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8646               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8647                    pred:$p)>;
8648def VST4qWB_fixed_Asm_16 :
8649        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8650               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8651                    pred:$p)>;
8652def VST4qWB_fixed_Asm_32 :
8653        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8654               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8655                    pred:$p)>;
8656def VST4dWB_register_Asm_8 :
8657        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8658                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8659                       rGPR:$Rm, pred:$p)>;
8660def VST4dWB_register_Asm_16 :
8661        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8662                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8663                       rGPR:$Rm, pred:$p)>;
8664def VST4dWB_register_Asm_32 :
8665        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8666                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8667                       rGPR:$Rm, pred:$p)>;
8668def VST4qWB_register_Asm_8 :
8669        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8670                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8671                       rGPR:$Rm, pred:$p)>;
8672def VST4qWB_register_Asm_16 :
8673        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8674                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8675                       rGPR:$Rm, pred:$p)>;
8676def VST4qWB_register_Asm_32 :
8677        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8678                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8679                       rGPR:$Rm, pred:$p)>;
8680
8681// VMOV/VMVN takes an optional datatype suffix
8682defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
8683                         (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
8684defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
8685                         (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
8686
8687defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
8688                         (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>;
8689defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
8690                         (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>;
8691
8692// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
8693// D-register versions.
8694def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
8695                    (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8696def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
8697                    (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8698def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
8699                    (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8700def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
8701                    (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8702def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
8703                    (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8704def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
8705                    (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8706def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
8707                    (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8708let Predicates = [HasNEON, HasFullFP16] in
8709def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
8710                    (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8711// Q-register versions.
8712def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
8713                    (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8714def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
8715                    (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8716def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
8717                    (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8718def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
8719                    (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8720def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
8721                    (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8722def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
8723                    (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8724def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
8725                    (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8726let Predicates = [HasNEON, HasFullFP16] in
8727def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
8728                    (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8729
8730// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
8731// D-register versions.
8732def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
8733                    (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8734def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
8735                    (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8736def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
8737                    (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8738def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
8739                    (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8740def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
8741                    (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8742def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
8743                    (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8744def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
8745                    (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8746let Predicates = [HasNEON, HasFullFP16] in
8747def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
8748                    (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8749// Q-register versions.
8750def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
8751                    (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8752def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
8753                    (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8754def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
8755                    (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8756def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
8757                    (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8758def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
8759                    (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8760def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
8761                    (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8762def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
8763                    (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8764let Predicates = [HasNEON, HasFullFP16] in
8765def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
8766                    (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8767
8768// VSWP allows, but does not require, a type suffix.
8769defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
8770                         (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
8771defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
8772                         (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
8773
8774// VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
8775defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
8776                         (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8777defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
8778                         (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8779defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
8780                         (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8781defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
8782                         (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8783defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
8784                         (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8785defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
8786                         (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8787
8788// "vmov Rd, #-imm" can be handled via "vmvn".
8789def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
8790                    (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8791def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
8792                    (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8793def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
8794                    (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8795def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
8796                    (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8797
8798// 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
8799// these should restrict to just the Q register variants, but the register
8800// classes are enough to match correctly regardless, so we keep it simple
8801// and just use MnemonicAlias.
8802def : NEONMnemonicAlias<"vbicq", "vbic">;
8803def : NEONMnemonicAlias<"vandq", "vand">;
8804def : NEONMnemonicAlias<"veorq", "veor">;
8805def : NEONMnemonicAlias<"vorrq", "vorr">;
8806
8807def : NEONMnemonicAlias<"vmovq", "vmov">;
8808def : NEONMnemonicAlias<"vmvnq", "vmvn">;
8809// Explicit versions for floating point so that the FPImm variants get
8810// handled early. The parser gets confused otherwise.
8811def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
8812def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
8813
8814def : NEONMnemonicAlias<"vaddq", "vadd">;
8815def : NEONMnemonicAlias<"vsubq", "vsub">;
8816
8817def : NEONMnemonicAlias<"vminq", "vmin">;
8818def : NEONMnemonicAlias<"vmaxq", "vmax">;
8819
8820def : NEONMnemonicAlias<"vmulq", "vmul">;
8821
8822def : NEONMnemonicAlias<"vabsq", "vabs">;
8823
8824def : NEONMnemonicAlias<"vshlq", "vshl">;
8825def : NEONMnemonicAlias<"vshrq", "vshr">;
8826
8827def : NEONMnemonicAlias<"vcvtq", "vcvt">;
8828
8829def : NEONMnemonicAlias<"vcleq", "vcle">;
8830def : NEONMnemonicAlias<"vceqq", "vceq">;
8831
8832def : NEONMnemonicAlias<"vzipq", "vzip">;
8833def : NEONMnemonicAlias<"vswpq", "vswp">;
8834
8835def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
8836def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
8837
8838
8839// Alias for loading floating point immediates that aren't representable
8840// using the vmov.f32 encoding but the bitpattern is representable using
8841// the .i32 encoding.
8842def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
8843                     (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
8844def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
8845                     (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
8846