xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td (revision 7a6dacaca14b62ca4b74406814becb87a3fefac0)
1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// AArch64 Instruction definitions.
10//
11//===----------------------------------------------------------------------===//
12
13//===----------------------------------------------------------------------===//
14// ARM Instruction Predicate Definitions.
15//
16def HasV8_0a         : Predicate<"Subtarget->hasV8_0aOps()">,
17                                 AssemblerPredicate<(all_of HasV8_0aOps), "armv8.0a">;
18def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
19                                 AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">;
20def HasV8_2a         : Predicate<"Subtarget->hasV8_2aOps()">,
21                                 AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">;
22def HasV8_3a         : Predicate<"Subtarget->hasV8_3aOps()">,
23                                 AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">;
24def HasV8_4a         : Predicate<"Subtarget->hasV8_4aOps()">,
25                                 AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">;
26def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
27                                 AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">;
28def HasV8_6a         : Predicate<"Subtarget->hasV8_6aOps()">,
29                                 AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">;
30def HasV8_7a         : Predicate<"Subtarget->hasV8_7aOps()">,
31                                 AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">;
32def HasV8_8a         : Predicate<"Subtarget->hasV8_8aOps()">,
33                                 AssemblerPredicateWithAll<(all_of HasV8_8aOps), "armv8.8a">;
34def HasV8_9a         : Predicate<"Subtarget->hasV8_9aOps()">,
35                                 AssemblerPredicateWithAll<(all_of HasV8_9aOps), "armv8.9a">;
36def HasV9_0a         : Predicate<"Subtarget->hasV9_0aOps()">,
37                                 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">;
38def HasV9_1a         : Predicate<"Subtarget->hasV9_1aOps()">,
39                                 AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">;
40def HasV9_2a         : Predicate<"Subtarget->hasV9_2aOps()">,
41                                 AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">;
42def HasV9_3a         : Predicate<"Subtarget->hasV9_3aOps()">,
43                                 AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">;
44def HasV9_4a         : Predicate<"Subtarget->hasV9_4aOps()">,
45                                 AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">;
46def HasV8_0r         : Predicate<"Subtarget->hasV8_0rOps()">,
47                                 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">;
48
49def HasEL2VMSA       : Predicate<"Subtarget->hasEL2VMSA()">,
50                       AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">;
51
52def HasEL3           : Predicate<"Subtarget->hasEL3()">,
53                       AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">;
54
55def HasVH            : Predicate<"Subtarget->hasVH()">,
56                       AssemblerPredicateWithAll<(all_of FeatureVH), "vh">;
57
58def HasLOR           : Predicate<"Subtarget->hasLOR()">,
59                       AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">;
60
61def HasPAuth         : Predicate<"Subtarget->hasPAuth()">,
62                       AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">;
63
64def HasPAuthLR       : Predicate<"Subtarget->hasPAuthLR()">,
65                       AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">;
66
67def HasJS            : Predicate<"Subtarget->hasJS()">,
68                       AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">;
69
70def HasCCIDX         : Predicate<"Subtarget->hasCCIDX()">,
71                       AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">;
72
73def HasComplxNum      : Predicate<"Subtarget->hasComplxNum()">,
74                       AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">;
75
76def HasNV            : Predicate<"Subtarget->hasNV()">,
77                       AssemblerPredicateWithAll<(all_of FeatureNV), "nv">;
78
79def HasMPAM          : Predicate<"Subtarget->hasMPAM()">,
80                       AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">;
81
82def HasDIT           : Predicate<"Subtarget->hasDIT()">,
83                       AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">;
84
85def HasTRACEV8_4         : Predicate<"Subtarget->hasTRACEV8_4()">,
86                       AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">;
87
88def HasAM            : Predicate<"Subtarget->hasAM()">,
89                       AssemblerPredicateWithAll<(all_of FeatureAM), "am">;
90
91def HasSEL2          : Predicate<"Subtarget->hasSEL2()">,
92                       AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">;
93
94def HasTLB_RMI          : Predicate<"Subtarget->hasTLB_RMI()">,
95                       AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">;
96
97def HasFlagM         : Predicate<"Subtarget->hasFlagM()">,
98                       AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">;
99
100def HasRCPC_IMMO      : Predicate<"Subtarget->hasRCPC_IMMO()">,
101                       AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
102
103def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
104                               AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
105def HasNEON          : Predicate<"Subtarget->hasNEON()">,
106                                 AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">;
107def HasSM4           : Predicate<"Subtarget->hasSM4()">,
108                                 AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">;
109def HasSHA3          : Predicate<"Subtarget->hasSHA3()">,
110                                 AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">;
111def HasSHA2          : Predicate<"Subtarget->hasSHA2()">,
112                                 AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">;
113def HasAES           : Predicate<"Subtarget->hasAES()">,
114                                 AssemblerPredicateWithAll<(all_of FeatureAES), "aes">;
115def HasDotProd       : Predicate<"Subtarget->hasDotProd()">,
116                                 AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">;
117def HasCRC           : Predicate<"Subtarget->hasCRC()">,
118                                 AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">;
119def HasCSSC          : Predicate<"Subtarget->hasCSSC()">,
120                                 AssemblerPredicateWithAll<(all_of FeatureCSSC), "cssc">;
121def HasNoCSSC        : Predicate<"!Subtarget->hasCSSC()">;
122def HasLSE           : Predicate<"Subtarget->hasLSE()">,
123                                 AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">;
124def HasNoLSE         : Predicate<"!Subtarget->hasLSE()">;
125def HasRAS           : Predicate<"Subtarget->hasRAS()">,
126                                 AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">;
127def HasRDM           : Predicate<"Subtarget->hasRDM()">,
128                                 AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">;
129def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
130                                 AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">;
131def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
132                                 AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">;
133def HasSPE           : Predicate<"Subtarget->hasSPE()">,
134                                 AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">;
135def HasFuseAES       : Predicate<"Subtarget->hasFuseAES()">,
136                                 AssemblerPredicateWithAll<(all_of FeatureFuseAES),
137                                 "fuse-aes">;
138def HasSVE           : Predicate<"Subtarget->hasSVE()">,
139                                 AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">;
140def HasSVE2          : Predicate<"Subtarget->hasSVE2()">,
141                                 AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">;
142def HasSVE2p1        : Predicate<"Subtarget->hasSVE2p1()">,
143                                 AssemblerPredicateWithAll<(all_of FeatureSVE2p1), "sve2p1">;
144def HasSVE2AES       : Predicate<"Subtarget->hasSVE2AES()">,
145                                 AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">;
146def HasSVE2SM4       : Predicate<"Subtarget->hasSVE2SM4()">,
147                                 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">;
148def HasSVE2SHA3      : Predicate<"Subtarget->hasSVE2SHA3()">,
149                                 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">;
150def HasSVE2BitPerm   : Predicate<"Subtarget->hasSVE2BitPerm()">,
151                                 AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
152def HasB16B16        : Predicate<"Subtarget->hasB16B16()">,
153                                 AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">;
154def HasSME           : Predicate<"Subtarget->hasSME()">,
155                                 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">;
156def HasSMEF64F64     : Predicate<"Subtarget->hasSMEF64F64()">,
157                                 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">;
158def HasSMEF16F16     : Predicate<"Subtarget->hasSMEF16F16()">,
159                                 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">;
160def HasSMEFA64       : Predicate<"Subtarget->hasSMEFA64()">,
161                                 AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">;
162def HasSMEI16I64     : Predicate<"Subtarget->hasSMEI16I64()">,
163                                 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">;
164def HasSME2          : Predicate<"Subtarget->hasSME2()">,
165                                 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">;
166def HasSME2p1        : Predicate<"Subtarget->hasSME2p1()">,
167                                 AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">;
168def HasFPMR          : Predicate<"Subtarget->hasFPMR()">,
169                                 AssemblerPredicateWithAll<(all_of FeatureFPMR), "fpmr">;
170def HasFP8           : Predicate<"Subtarget->hasFP8()">,
171                                 AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">;
172def HasFAMINMAX      : Predicate<"Subtarget->hasFAMINMAX()">,
173                                 AssemblerPredicateWithAll<(all_of FeatureFAMINMAX), "faminmax">;
174def HasFP8FMA        : Predicate<"Subtarget->hasFP8FMA()">,
175                                 AssemblerPredicateWithAll<(all_of FeatureFP8FMA), "fp8fma">;
176def HasSSVE_FP8FMA   : Predicate<"Subtarget->hasSSVE_FP8FMA() || "
177                                 "(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">,
178                                 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8FMA,
179                                                           (all_of FeatureSVE2, FeatureFP8FMA)),
180                                                           "ssve-fp8fma or (sve2 and fp8fma)">;
181def HasFP8DOT2       : Predicate<"Subtarget->hasFP8DOT2()">,
182                                 AssemblerPredicateWithAll<(all_of FeatureFP8DOT2), "fp8dot2">;
183def HasSSVE_FP8DOT2  : Predicate<"Subtarget->hasSSVE_FP8DOT2() || "
184                                 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">,
185                                 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT2,
186                                                           (all_of FeatureSVE2, FeatureFP8DOT2)),
187                                "ssve-fp8dot2 or (sve2 and fp8dot2)">;
188def HasFP8DOT4       : Predicate<"Subtarget->hasFP8DOT4()">,
189                                 AssemblerPredicateWithAll<(all_of FeatureFP8DOT4), "fp8dot4">;
190def HasSSVE_FP8DOT4  : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
191                                 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT4())">,
192                                 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4,
193                                                           (all_of FeatureSVE2, FeatureFP8DOT4)),
194                                 "ssve-fp8dot4 or (sve2 and fp8dot4)">;
195def HasLUT          : Predicate<"Subtarget->hasLUT()">,
196                                 AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">;
197def HasSME_LUTv2     : Predicate<"Subtarget->hasSME_LUTv2()">,
198                                 AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
199def HasSMEF8F16     : Predicate<"Subtarget->hasSMEF8F16()">,
200                                 AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">;
201def HasSMEF8F32     : Predicate<"Subtarget->hasSMEF8F32()">,
202                                 AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
203
204// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
205// they should be enabled if either has been specified.
206def HasSVEorSME
207    : Predicate<"Subtarget->hasSVEorSME()">,
208                AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
209                "sve or sme">;
210def HasSVE2orSME
211    : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">,
212                AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
213                "sve2 or sme">;
214def HasSVE2orSME2
215    : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME2()">,
216                AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2),
217                "sve2 or sme2">;
218def HasSVE2p1_or_HasSME
219    : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">,
220                 AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">;
221def HasSVE2p1_or_HasSME2
222    : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2()">,
223                 AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">;
224def HasSVE2p1_or_HasSME2p1
225    : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()">,
226                 AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">;
227// A subset of NEON instructions are legal in Streaming SVE execution mode,
228// they should be enabled if either has been specified.
229def HasNEONorSME
230    : Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">,
231                AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME),
232                "neon or sme">;
233def HasRCPC          : Predicate<"Subtarget->hasRCPC()">,
234                                 AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
235def HasAltNZCV       : Predicate<"Subtarget->hasAlternativeNZCV()">,
236                       AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">;
237def HasFRInt3264     : Predicate<"Subtarget->hasFRInt3264()">,
238                       AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">;
239def HasSB            : Predicate<"Subtarget->hasSB()">,
240                       AssemblerPredicateWithAll<(all_of FeatureSB), "sb">;
241def HasPredRes      : Predicate<"Subtarget->hasPredRes()">,
242                       AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">;
243def HasCCDP          : Predicate<"Subtarget->hasCCDP()">,
244                       AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">;
245def HasBTI           : Predicate<"Subtarget->hasBTI()">,
246                       AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">;
247def HasMTE           : Predicate<"Subtarget->hasMTE()">,
248                       AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">;
249def HasTME           : Predicate<"Subtarget->hasTME()">,
250                       AssemblerPredicateWithAll<(all_of FeatureTME), "tme">;
251def HasETE           : Predicate<"Subtarget->hasETE()">,
252                       AssemblerPredicateWithAll<(all_of FeatureETE), "ete">;
253def HasTRBE          : Predicate<"Subtarget->hasTRBE()">,
254                       AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">;
255def HasBF16          : Predicate<"Subtarget->hasBF16()">,
256                       AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">;
257def HasMatMulInt8    : Predicate<"Subtarget->hasMatMulInt8()">,
258                       AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">;
259def HasMatMulFP32    : Predicate<"Subtarget->hasMatMulFP32()">,
260                       AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">;
261def HasMatMulFP64    : Predicate<"Subtarget->hasMatMulFP64()">,
262                       AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">;
263def HasXS            : Predicate<"Subtarget->hasXS()">,
264                       AssemblerPredicateWithAll<(all_of FeatureXS), "xs">;
265def HasWFxT          : Predicate<"Subtarget->hasWFxT()">,
266                       AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">;
267def HasLS64          : Predicate<"Subtarget->hasLS64()">,
268                       AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">;
269def HasBRBE          : Predicate<"Subtarget->hasBRBE()">,
270                       AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">;
271def HasSPE_EEF       : Predicate<"Subtarget->hasSPE_EEF()">,
272                       AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">;
273def HasHBC           : Predicate<"Subtarget->hasHBC()">,
274                       AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">;
275def HasMOPS          : Predicate<"Subtarget->hasMOPS()">,
276                       AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">;
277def HasCLRBHB        : Predicate<"Subtarget->hasCLRBHB()">,
278                       AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">;
279def HasSPECRES2      : Predicate<"Subtarget->hasSPECRES2()">,
280                       AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">;
281def HasITE           : Predicate<"Subtarget->hasITE()">,
282                       AssemblerPredicateWithAll<(all_of FeatureITE), "ite">;
283def HasTHE           : Predicate<"Subtarget->hasTHE()">,
284                       AssemblerPredicateWithAll<(all_of FeatureTHE), "the">;
285def HasRCPC3         : Predicate<"Subtarget->hasRCPC3()">,
286                       AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">;
287def HasLSE128        : Predicate<"Subtarget->hasLSE128()">,
288                       AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">;
289def HasD128          : Predicate<"Subtarget->hasD128()">,
290                       AssemblerPredicateWithAll<(all_of FeatureD128), "d128">;
291def HasCHK           : Predicate<"Subtarget->hasCHK()">,
292                       AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">;
293def HasGCS           : Predicate<"Subtarget->hasGCS()">,
294                       AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">;
295def HasCPA           : Predicate<"Subtarget->hasCPA()">,
296                       AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">;
297def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
298def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
299def IsWindows        : Predicate<"Subtarget->isTargetWindows()">;
300def UseExperimentalZeroingPseudos
301    : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
302def UseAlternateSExtLoadCVTF32
303    : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
304
305def UseNegativeImmediates
306    : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
307                                             "NegativeImmediates">;
308
309def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
310
311def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
312
313def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">;
314
315def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
316                                  SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
317                                                       SDTCisInt<1>]>>;
318
319
320//===----------------------------------------------------------------------===//
321// AArch64-specific DAG Nodes.
322//
323
324// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
325def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
326                                              [SDTCisSameAs<0, 2>,
327                                               SDTCisSameAs<0, 3>,
328                                               SDTCisInt<0>, SDTCisVT<1, i32>]>;
329
330// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
331def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
332                                            [SDTCisSameAs<0, 1>,
333                                             SDTCisSameAs<0, 2>,
334                                             SDTCisInt<0>,
335                                             SDTCisVT<3, i32>]>;
336
337// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
338def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
339                                            [SDTCisSameAs<0, 2>,
340                                             SDTCisSameAs<0, 3>,
341                                             SDTCisInt<0>,
342                                             SDTCisVT<1, i32>,
343                                             SDTCisVT<4, i32>]>;
344
345def SDT_AArch64Brcond  : SDTypeProfile<0, 3,
346                                     [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
347                                      SDTCisVT<2, i32>]>;
348def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
349def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
350                                        SDTCisVT<2, OtherVT>]>;
351
352
353def SDT_AArch64CSel  : SDTypeProfile<1, 4,
354                                   [SDTCisSameAs<0, 1>,
355                                    SDTCisSameAs<0, 2>,
356                                    SDTCisInt<3>,
357                                    SDTCisVT<4, i32>]>;
358def SDT_AArch64CCMP : SDTypeProfile<1, 5,
359                                    [SDTCisVT<0, i32>,
360                                     SDTCisInt<1>,
361                                     SDTCisSameAs<1, 2>,
362                                     SDTCisInt<3>,
363                                     SDTCisInt<4>,
364                                     SDTCisVT<5, i32>]>;
365def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
366                                     [SDTCisVT<0, i32>,
367                                      SDTCisFP<1>,
368                                      SDTCisSameAs<1, 2>,
369                                      SDTCisInt<3>,
370                                      SDTCisInt<4>,
371                                      SDTCisVT<5, i32>]>;
372def SDT_AArch64FCmp   : SDTypeProfile<0, 2,
373                                   [SDTCisFP<0>,
374                                    SDTCisSameAs<0, 1>]>;
375def SDT_AArch64Dup   : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
376def SDT_AArch64DupLane   : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
377def SDT_AArch64Insr  : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
378def SDT_AArch64Zip   : SDTypeProfile<1, 2, [SDTCisVec<0>,
379                                          SDTCisSameAs<0, 1>,
380                                          SDTCisSameAs<0, 2>]>;
381def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
382def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
383def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
384                                           SDTCisInt<2>, SDTCisInt<3>]>;
385def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
386def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
387                                          SDTCisSameAs<0,2>, SDTCisInt<3>]>;
388def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
389def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
390                                         SDTCisVec<2>, SDTCisSameAs<2,3>]>;
391
392def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>,
393                                                 SDTCisSameAs<0,1>,
394                                                 SDTCisSameAs<0,2>]>;
395
396def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
397def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
398def SDT_AArch64fcmp  : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
399def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
400                                           SDTCisSameAs<0,2>]>;
401def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
402                                           SDTCisSameAs<0,2>,
403                                           SDTCisSameAs<0,3>]>;
404def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
405def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
406
407def SDT_AArch64ITOF  : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
408
409def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
410                                                 SDTCisPtrTy<1>]>;
411
412def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
413
414def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
415def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
416def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
417def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
418def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
419def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
420
421// Generates the general dynamic sequences, i.e.
422//  adrp  x0, :tlsdesc:var
423//  ldr   x1, [x0, #:tlsdesc_lo12:var]
424//  add   x0, x0, #:tlsdesc_lo12:var
425//  .tlsdesccall var
426//  blr   x1
427
428// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
429// number of operands (the variable)
430def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
431                                          [SDTCisPtrTy<0>]>;
432
433def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
434                                        [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
435                                         SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
436                                         SDTCisSameAs<1, 4>]>;
437
438def SDT_AArch64TBL : SDTypeProfile<1, 2, [
439  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
440]>;
441
442// non-extending masked load fragment.
443def nonext_masked_load :
444  PatFrag<(ops node:$ptr, node:$pred, node:$def),
445          (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
446  return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
447         cast<MaskedLoadSDNode>(N)->isUnindexed() &&
448         !cast<MaskedLoadSDNode>(N)->isNonTemporal();
449}]>;
450// Any/Zero extending masked load fragments.
451def azext_masked_load :
452  PatFrag<(ops node:$ptr, node:$pred, node:$def),
453          (masked_ld node:$ptr, undef, node:$pred, node:$def),[{
454  return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
455          cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) &&
456         cast<MaskedLoadSDNode>(N)->isUnindexed();
457}]>;
458def azext_masked_load_i8 :
459  PatFrag<(ops node:$ptr, node:$pred, node:$def),
460          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
461  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
462}]>;
463def azext_masked_load_i16 :
464  PatFrag<(ops node:$ptr, node:$pred, node:$def),
465          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
466  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
467}]>;
468def azext_masked_load_i32 :
469  PatFrag<(ops node:$ptr, node:$pred, node:$def),
470          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
471  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
472}]>;
473// Sign extending masked load fragments.
474def sext_masked_load :
475  PatFrag<(ops node:$ptr, node:$pred, node:$def),
476          (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
477  return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD &&
478         cast<MaskedLoadSDNode>(N)->isUnindexed();
479}]>;
480def sext_masked_load_i8 :
481  PatFrag<(ops node:$ptr, node:$pred, node:$def),
482          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
483  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
484}]>;
485def sext_masked_load_i16 :
486  PatFrag<(ops node:$ptr, node:$pred, node:$def),
487          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
488  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
489}]>;
490def sext_masked_load_i32 :
491  PatFrag<(ops node:$ptr, node:$pred, node:$def),
492          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
493  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
494}]>;
495
496def non_temporal_load :
497   PatFrag<(ops node:$ptr, node:$pred, node:$def),
498           (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
499   return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
500          cast<MaskedLoadSDNode>(N)->isUnindexed() &&
501          cast<MaskedLoadSDNode>(N)->isNonTemporal();
502}]>;
503
504// non-truncating masked store fragment.
505def nontrunc_masked_store :
506  PatFrag<(ops node:$val, node:$ptr, node:$pred),
507          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
508  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
509         cast<MaskedStoreSDNode>(N)->isUnindexed() &&
510         !cast<MaskedStoreSDNode>(N)->isNonTemporal();
511}]>;
512// truncating masked store fragments.
513def trunc_masked_store :
514  PatFrag<(ops node:$val, node:$ptr, node:$pred),
515          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
516  return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
517         cast<MaskedStoreSDNode>(N)->isUnindexed();
518}]>;
519def trunc_masked_store_i8 :
520  PatFrag<(ops node:$val, node:$ptr, node:$pred),
521          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
522  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
523}]>;
524def trunc_masked_store_i16 :
525  PatFrag<(ops node:$val, node:$ptr, node:$pred),
526          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
527  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
528}]>;
529def trunc_masked_store_i32 :
530  PatFrag<(ops node:$val, node:$ptr, node:$pred),
531          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
532  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
533}]>;
534
535def non_temporal_store :
536  PatFrag<(ops node:$val, node:$ptr, node:$pred),
537          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
538  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
539         cast<MaskedStoreSDNode>(N)->isUnindexed() &&
540         cast<MaskedStoreSDNode>(N)->isNonTemporal();
541}]>;
542
543multiclass masked_gather_scatter<PatFrags GatherScatterOp> {
544  // offsets = (signed)Index << sizeof(elt)
545  def NAME#_signed_scaled :
546    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
547            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
548    auto MGS = cast<MaskedGatherScatterSDNode>(N);
549    bool Signed = MGS->isIndexSigned() ||
550        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
551    return Signed && MGS->isIndexScaled();
552  }]>;
553  // offsets = (signed)Index
554  def NAME#_signed_unscaled :
555    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
556            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
557    auto MGS = cast<MaskedGatherScatterSDNode>(N);
558    bool Signed = MGS->isIndexSigned() ||
559        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
560    return Signed && !MGS->isIndexScaled();
561  }]>;
562  // offsets = (unsigned)Index << sizeof(elt)
563  def NAME#_unsigned_scaled :
564    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
565            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
566    auto MGS = cast<MaskedGatherScatterSDNode>(N);
567    bool Signed = MGS->isIndexSigned() ||
568        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
569    return !Signed && MGS->isIndexScaled();
570  }]>;
571  // offsets = (unsigned)Index
572  def NAME#_unsigned_unscaled :
573    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
574            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
575    auto MGS = cast<MaskedGatherScatterSDNode>(N);
576    bool Signed = MGS->isIndexSigned() ||
577        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
578    return !Signed && !MGS->isIndexScaled();
579  }]>;
580}
581
582defm nonext_masked_gather    : masked_gather_scatter<nonext_masked_gather>;
583defm azext_masked_gather_i8  : masked_gather_scatter<azext_masked_gather_i8>;
584defm azext_masked_gather_i16 : masked_gather_scatter<azext_masked_gather_i16>;
585defm azext_masked_gather_i32 : masked_gather_scatter<azext_masked_gather_i32>;
586defm sext_masked_gather_i8   : masked_gather_scatter<sext_masked_gather_i8>;
587defm sext_masked_gather_i16  : masked_gather_scatter<sext_masked_gather_i16>;
588defm sext_masked_gather_i32  : masked_gather_scatter<sext_masked_gather_i32>;
589
590defm nontrunc_masked_scatter  : masked_gather_scatter<nontrunc_masked_scatter>;
591defm trunc_masked_scatter_i8  : masked_gather_scatter<trunc_masked_scatter_i8>;
592defm trunc_masked_scatter_i16 : masked_gather_scatter<trunc_masked_scatter_i16>;
593defm trunc_masked_scatter_i32 : masked_gather_scatter<trunc_masked_scatter_i32>;
594
595// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
596def top16Zero: PatLeaf<(i32 GPR32:$src), [{
597  return SDValue(N,0)->getValueType(0) == MVT::i32 &&
598         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
599  }]>;
600
601// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise
602def top32Zero: PatLeaf<(i64 GPR64:$src), [{
603  return SDValue(N,0)->getValueType(0) == MVT::i64 &&
604         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
605  }]>;
606
607// topbitsallzero - Return true if all bits except the lowest bit are known zero
608def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{
609  return SDValue(N,0)->getValueType(0) == MVT::i32 &&
610         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31));
611  }]>;
612def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{
613  return SDValue(N,0)->getValueType(0) == MVT::i64 &&
614         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63));
615  }]>;
616
617// Node definitions.
618def AArch64adrp          : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
619def AArch64adr           : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
620def AArch64addlow        : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
621def AArch64LOADgot       : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
622def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
623                                SDCallSeqStart<[ SDTCisVT<0, i32>,
624                                                 SDTCisVT<1, i32> ]>,
625                                [SDNPHasChain, SDNPOutGlue]>;
626def AArch64callseq_end   : SDNode<"ISD::CALLSEQ_END",
627                                SDCallSeqEnd<[ SDTCisVT<0, i32>,
628                                               SDTCisVT<1, i32> ]>,
629                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
630def AArch64call          : SDNode<"AArch64ISD::CALL",
631                                SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
632                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
633                                 SDNPVariadic]>;
634
635def AArch64call_bti      : SDNode<"AArch64ISD::CALL_BTI",
636                                SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
637                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
638                                 SDNPVariadic]>;
639
640def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
641                             SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
642                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
643                              SDNPVariadic]>;
644
645def AArch64call_arm64ec_to_x64 : SDNode<"AArch64ISD::CALL_ARM64EC_TO_X64",
646                                      SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
647                                      [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
648                                       SDNPVariadic]>;
649
650def AArch64brcond        : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
651                                [SDNPHasChain]>;
652def AArch64cbz           : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
653                                [SDNPHasChain]>;
654def AArch64cbnz           : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
655                                [SDNPHasChain]>;
656def AArch64tbz           : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz,
657                                [SDNPHasChain]>;
658def AArch64tbnz           : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
659                                [SDNPHasChain]>;
660
661
662def AArch64csel          : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
663def AArch64csinv         : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
664def AArch64csneg         : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
665def AArch64csinc         : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
666def AArch64retglue       : SDNode<"AArch64ISD::RET_GLUE", SDTNone,
667                                [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
668def AArch64adc       : SDNode<"AArch64ISD::ADC",  SDTBinaryArithWithFlagsIn >;
669def AArch64sbc       : SDNode<"AArch64ISD::SBC",  SDTBinaryArithWithFlagsIn>;
670def AArch64add_flag  : SDNode<"AArch64ISD::ADDS",  SDTBinaryArithWithFlagsOut,
671                            [SDNPCommutative]>;
672def AArch64sub_flag  : SDNode<"AArch64ISD::SUBS",  SDTBinaryArithWithFlagsOut>;
673def AArch64and_flag  : SDNode<"AArch64ISD::ANDS",  SDTBinaryArithWithFlagsOut,
674                            [SDNPCommutative]>;
675def AArch64adc_flag  : SDNode<"AArch64ISD::ADCS",  SDTBinaryArithWithFlagsInOut>;
676def AArch64sbc_flag  : SDNode<"AArch64ISD::SBCS",  SDTBinaryArithWithFlagsInOut>;
677
678def AArch64ccmp      : SDNode<"AArch64ISD::CCMP",  SDT_AArch64CCMP>;
679def AArch64ccmn      : SDNode<"AArch64ISD::CCMN",  SDT_AArch64CCMP>;
680def AArch64fccmp     : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
681
682def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
683
684def AArch64fcmp         : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
685def AArch64strict_fcmp  : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp,
686                                 [SDNPHasChain]>;
687def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp,
688                                 [SDNPHasChain]>;
689def AArch64any_fcmp     : PatFrags<(ops node:$lhs, node:$rhs),
690                                   [(AArch64strict_fcmp node:$lhs, node:$rhs),
691                                    (AArch64fcmp node:$lhs, node:$rhs)]>;
692
693def AArch64dup       : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
694def AArch64duplane8  : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
695def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
696def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
697def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
698def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>;
699
700def AArch64insr      : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>;
701
702def AArch64zip1      : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
703def AArch64zip2      : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
704def AArch64uzp1      : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
705def AArch64uzp2      : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
706def AArch64trn1      : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
707def AArch64trn2      : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
708
709def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
710def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
711def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
712def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
713def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
714def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
715def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
716
717def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
718def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
719def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
720def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
721
722def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
723def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
724def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
725def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
726def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
727def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
728def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
729def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
730def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
731def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
732
733def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
734def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
735
736def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
737def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
738def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
739def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
740def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
741
742def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
743def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
744def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
745
746def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
747def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
748def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
749def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
750def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
751def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
752                        (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
753
754def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
755def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
756def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
757def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
758def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
759
760def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
761def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
762
763def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
764                  [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
765
766def AArch64Prefetch        : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
767                               [SDNPHasChain, SDNPSideEffect]>;
768
769def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
770def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
771
772def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
773                                    SDT_AArch64TLSDescCallSeq,
774                                    [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
775                                     SDNPVariadic]>;
776
777
778def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
779                                 SDT_AArch64WrapperLarge>;
780
781def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
782
783def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
784                                    SDTCisSameAs<1, 2>]>;
785def AArch64pmull    : SDNode<"AArch64ISD::PMULL", SDT_AArch64mull,
786                             [SDNPCommutative]>;
787def AArch64smull    : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull,
788                             [SDNPCommutative]>;
789def AArch64umull    : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull,
790                             [SDNPCommutative]>;
791
792def AArch64frecpe   : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
793def AArch64frecps   : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
794def AArch64frsqrte  : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
795def AArch64frsqrts  : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>;
796
797def AArch64sdot     : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>;
798def AArch64udot     : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>;
799
800def AArch64saddv    : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
801def AArch64uaddv    : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
802def AArch64sminv    : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
803def AArch64uminv    : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
804def AArch64smaxv    : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
805def AArch64umaxv    : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
806def AArch64uaddlv   : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>;
807def AArch64saddlv   : SDNode<"AArch64ISD::SADDLV", SDT_AArch64uaddlp>;
808
809def AArch64uabd     : PatFrags<(ops node:$lhs, node:$rhs),
810                               [(abdu node:$lhs, node:$rhs),
811                                (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
812def AArch64sabd     : PatFrags<(ops node:$lhs, node:$rhs),
813                               [(abds node:$lhs, node:$rhs),
814                                (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
815
816def AArch64addp_n   : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>;
817def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>;
818def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>;
819def AArch64addp     : PatFrags<(ops node:$Rn, node:$Rm),
820                               [(AArch64addp_n node:$Rn, node:$Rm),
821                                (int_aarch64_neon_addp node:$Rn, node:$Rm)]>;
822def AArch64uaddlp   : PatFrags<(ops node:$src),
823                               [(AArch64uaddlp_n node:$src),
824                                (int_aarch64_neon_uaddlp node:$src)]>;
825def AArch64saddlp   : PatFrags<(ops node:$src),
826                               [(AArch64saddlp_n node:$src),
827                                (int_aarch64_neon_saddlp node:$src)]>;
828def AArch64faddp     : PatFrags<(ops node:$Rn, node:$Rm),
829                                [(AArch64addp_n node:$Rn, node:$Rm),
830                                 (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>;
831def AArch64roundingvlshr : ComplexPattern<vAny, 2, "SelectRoundingVLShr", [AArch64vlshr]>;
832def AArch64rshrn : PatFrags<(ops node:$LHS, node:$RHS),
833                            [(trunc (AArch64roundingvlshr node:$LHS, node:$RHS)),
834                             (int_aarch64_neon_rshrn node:$LHS, node:$RHS)]>;
835def AArch64facge     : PatFrags<(ops node:$Rn, node:$Rm),
836                                [(AArch64fcmge (fabs node:$Rn), (fabs node:$Rm)),
837                                 (int_aarch64_neon_facge node:$Rn, node:$Rm)]>;
838def AArch64facgt     : PatFrags<(ops node:$Rn, node:$Rm),
839                                [(AArch64fcmgt (fabs node:$Rn), (fabs node:$Rm)),
840                                 (int_aarch64_neon_facgt node:$Rn, node:$Rm)]>;
841
842def AArch64fmaxnmv : PatFrags<(ops node:$Rn),
843                              [(vecreduce_fmax node:$Rn),
844                               (int_aarch64_neon_fmaxnmv node:$Rn)]>;
845def AArch64fminnmv : PatFrags<(ops node:$Rn),
846                              [(vecreduce_fmin node:$Rn),
847                               (int_aarch64_neon_fminnmv node:$Rn)]>;
848def AArch64fmaxv : PatFrags<(ops node:$Rn),
849                            [(vecreduce_fmaximum node:$Rn),
850                             (int_aarch64_neon_fmaxv node:$Rn)]>;
851def AArch64fminv : PatFrags<(ops node:$Rn),
852                            [(vecreduce_fminimum node:$Rn),
853                             (int_aarch64_neon_fminv node:$Rn)]>;
854
855def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
856def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
857def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
858def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
859def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
860
861def SDT_AArch64unpk : SDTypeProfile<1, 1, [
862    SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>
863]>;
864def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>;
865def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>;
866def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>;
867def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
868
869def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
870def AArch64ldiapp : SDNode<"AArch64ISD::LDIAPP", SDT_AArch64ldiapp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
871def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
872def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
873def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
874def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
875
876def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
877
878def AArch64probedalloca
879    : SDNode<"AArch64ISD::PROBED_ALLOCA",
880             SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
881             [SDNPHasChain, SDNPMayStore]>;
882
883def AArch64mrs : SDNode<"AArch64ISD::MRS",
884                        SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>,
885                        [SDNPHasChain, SDNPOutGlue]>;
886
887def SD_AArch64rshrnb : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<2>]>;
888def AArch64rshrnb : SDNode<"AArch64ISD::RSHRNB_I", SD_AArch64rshrnb>;
889def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i),
890                            [(AArch64rshrnb node:$rs, node:$i),
891                            (int_aarch64_sve_rshrnb node:$rs, node:$i)]>;
892
893def AArch64CttzElts : SDNode<"AArch64ISD::CTTZ_ELTS", SDTypeProfile<1, 1,
894                             [SDTCisInt<0>, SDTCisVec<1>]>, []>;
895
896// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
897// have no common bits.
898def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
899                         [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{
900   if (N->getOpcode() == ISD::ADD)
901     return true;
902   return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
903}]> {
904  let GISelPredicateCode = [{
905     // Only handle G_ADD for now. FIXME. build capability to compute whether
906     // operands of G_OR have common bits set or not.
907     return MI.getOpcode() == TargetOpcode::G_ADD;
908  }];
909}
910
911// Match mul with enough sign-bits. Can be reduced to a smaller mul operand.
912def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{
913  return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 &&
914         CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
915}]>;
916
917//===----------------------------------------------------------------------===//
918
919//===----------------------------------------------------------------------===//
920
921// AArch64 Instruction Predicate Definitions.
922// We could compute these on a per-module basis but doing so requires accessing
923// the Function object through the <Target>Subtarget and objections were raised
924// to that (see post-commit review comments for r301750).
925let RecomputePerFunction = 1 in {
926  def ForCodeSize   : Predicate<"shouldOptForSize(MF)">;
927  def NotForCodeSize   : Predicate<"!shouldOptForSize(MF)">;
928  // Avoid generating STRQro if it is slow, unless we're optimizing for code size.
929  def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
930
931  def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
932  def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
933
934  def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
935  def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
936  // Toggles patterns which aren't beneficial in GlobalISel when we aren't
937  // optimizing. This allows us to selectively use patterns without impacting
938  // SelectionDAG's behaviour.
939  // FIXME: One day there will probably be a nicer way to check for this, but
940  // today is not that day.
941  def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
942}
943
944include "AArch64InstrFormats.td"
945include "SVEInstrFormats.td"
946include "SMEInstrFormats.td"
947
948//===----------------------------------------------------------------------===//
949
950//===----------------------------------------------------------------------===//
951// Miscellaneous instructions.
952//===----------------------------------------------------------------------===//
953
954let hasSideEffects = 1, isCodeGenOnly = 1 in {
955let Defs = [SP], Uses = [SP] in {
956// We set Sched to empty list because we expect these instructions to simply get
957// removed in most cases.
958def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
959                              [(AArch64callseq_start timm:$amt1, timm:$amt2)]>,
960                              Sched<[]>;
961def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
962                            [(AArch64callseq_end timm:$amt1, timm:$amt2)]>,
963                            Sched<[]>;
964
965}
966
967let Defs = [SP, NZCV], Uses = [SP] in {
968// Probed stack allocation of a constant size, used in function prologues when
969// stack-clash protection is enabled.
970def PROBED_STACKALLOC : Pseudo<(outs GPR64:$scratch),
971                               (ins i64imm:$stacksize, i64imm:$fixed_offset,
972                                i64imm:$scalable_offset),
973                               []>,
974                               Sched<[]>;
975
976// Probed stack allocation of a variable size, used in function prologues when
977// stack-clash protection is enabled.
978def PROBED_STACKALLOC_VAR : Pseudo<(outs),
979                                   (ins GPR64sp:$target),
980                                   []>,
981                                   Sched<[]>;
982
983// Probed stack allocations of a variable size, used for allocas of unknown size
984// when stack-clash protection is enabled.
985let usesCustomInserter = 1 in
986def PROBED_STACKALLOC_DYN : Pseudo<(outs),
987                                   (ins GPR64common:$target),
988                                   [(AArch64probedalloca GPR64common:$target)]>,
989                                   Sched<[]>;
990
991} // Defs = [SP, NZCV], Uses = [SP] in
992} // hasSideEffects = 1, isCodeGenOnly = 1
993
994let isReMaterializable = 1, isCodeGenOnly = 1 in {
995// FIXME: The following pseudo instructions are only needed because remat
996// cannot handle multiple instructions.  When that changes, they can be
997// removed, along with the AArch64Wrapper node.
998
999let AddedComplexity = 10 in
1000def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr),
1001                     [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
1002              Sched<[WriteLDAdr]>;
1003
1004// The MOVaddr instruction should match only when the add is not folded
1005// into a load or store address.
1006def MOVaddr
1007    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1008             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
1009                                            tglobaladdr:$low))]>,
1010      Sched<[WriteAdrAdr]>;
1011def MOVaddrJT
1012    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1013             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
1014                                             tjumptable:$low))]>,
1015      Sched<[WriteAdrAdr]>;
1016def MOVaddrCP
1017    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1018             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
1019                                             tconstpool:$low))]>,
1020      Sched<[WriteAdrAdr]>;
1021def MOVaddrBA
1022    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1023             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
1024                                             tblockaddress:$low))]>,
1025      Sched<[WriteAdrAdr]>;
1026def MOVaddrTLS
1027    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1028             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
1029                                            tglobaltlsaddr:$low))]>,
1030      Sched<[WriteAdrAdr]>;
1031def MOVaddrEXT
1032    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1033             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
1034                                            texternalsym:$low))]>,
1035      Sched<[WriteAdrAdr]>;
1036// Normally AArch64addlow either gets folded into a following ldr/str,
1037// or together with an adrp into MOVaddr above. For cases with TLS, it
1038// might appear without either of them, so allow lowering it into a plain
1039// add.
1040def ADDlowTLS
1041    : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low),
1042             [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src,
1043                                            tglobaltlsaddr:$low))]>,
1044      Sched<[WriteAdr]>;
1045
1046} // isReMaterializable, isCodeGenOnly
1047
1048def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr),
1049          (LOADgot tglobaltlsaddr:$addr)>;
1050
1051def : Pat<(AArch64LOADgot texternalsym:$addr),
1052          (LOADgot texternalsym:$addr)>;
1053
1054def : Pat<(AArch64LOADgot tconstpool:$addr),
1055          (LOADgot tconstpool:$addr)>;
1056
1057// In general these get lowered into a sequence of three 4-byte instructions.
1058// 32-bit jump table destination is actually only 2 instructions since we can
1059// use the table itself as a PC-relative base. But optimization occurs after
1060// branch relaxation so be pessimistic.
1061let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch",
1062    isNotDuplicable = 1 in {
1063def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
1064                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
1065                      Sched<[]>;
1066def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
1067                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
1068                      Sched<[]>;
1069def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
1070                            (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
1071                     Sched<[]>;
1072}
1073
1074// Space-consuming pseudo to aid testing of placement and reachability
1075// algorithms. Immediate operand is the number of bytes this "instruction"
1076// occupies; register operands can be used to enforce dependency and constrain
1077// the scheduler.
1078let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
1079def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn),
1080                   [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>,
1081            Sched<[]>;
1082
1083let hasSideEffects = 1, isCodeGenOnly = 1 in {
1084  def SpeculationSafeValueX
1085      : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>;
1086  def SpeculationSafeValueW
1087      : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>;
1088}
1089
1090// SpeculationBarrierEndBB must only be used after an unconditional control
1091// flow, i.e. after a terminator for which isBarrier is True.
1092let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
1093  // This gets lowered to a pair of 4-byte instructions.
1094  let Size = 8 in
1095  def SpeculationBarrierISBDSBEndBB
1096      : Pseudo<(outs), (ins), []>, Sched<[]>;
1097  // This gets lowered to a 4-byte instruction.
1098  let Size = 4 in
1099  def SpeculationBarrierSBEndBB
1100      : Pseudo<(outs), (ins), []>, Sched<[]>;
1101}
1102
1103//===----------------------------------------------------------------------===//
1104// System instructions.
1105//===----------------------------------------------------------------------===//
1106
1107def HINT : HintI<"hint">;
1108def : InstAlias<"nop",  (HINT 0b000)>;
1109def : InstAlias<"yield",(HINT 0b001)>;
1110def : InstAlias<"wfe",  (HINT 0b010)>;
1111def : InstAlias<"wfi",  (HINT 0b011)>;
1112def : InstAlias<"sev",  (HINT 0b100)>;
1113def : InstAlias<"sevl", (HINT 0b101)>;
1114def : InstAlias<"dgh",  (HINT 0b110)>;
1115def : InstAlias<"esb",  (HINT 0b10000)>, Requires<[HasRAS]>;
1116def : InstAlias<"csdb", (HINT 20)>;
1117// In order to be able to write readable assembly, LLVM should accept assembly
1118// inputs that use Branch Target Indentification mnemonics, even with BTI disabled.
1119// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1120// should not emit these mnemonics unless BTI is enabled.
1121def : InstAlias<"bti",  (HINT 32), 0>;
1122def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>;
1123def : InstAlias<"bti",  (HINT 32)>, Requires<[HasBTI]>;
1124def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>;
1125
1126// v8.2a Statistical Profiling extension
1127def : InstAlias<"psb $op",  (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
1128
1129// As far as LLVM is concerned this writes to the system's exclusive monitors.
1130let mayLoad = 1, mayStore = 1 in
1131def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
1132
1133// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
1134// model patterns with sufficiently fine granularity.
1135let mayLoad = ?, mayStore = ? in {
1136def DMB   : CRmSystemI<barrier_op, 0b101, "dmb",
1137                       [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>;
1138
1139def DSB   : CRmSystemI<barrier_op, 0b100, "dsb",
1140                       [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>;
1141
1142def ISB   : CRmSystemI<barrier_op, 0b110, "isb",
1143                       [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>;
1144
1145def TSB   : CRmSystemI<barrier_op, 0b010, "tsb", []> {
1146  let CRm        = 0b0010;
1147  let Inst{12}   = 0;
1148  let Predicates = [HasTRACEV8_4];
1149}
1150
1151def DSBnXS  : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> {
1152  let CRm{1-0}   = 0b11;
1153  let Inst{9-8}  = 0b10;
1154  let Predicates = [HasXS];
1155}
1156
1157let Predicates = [HasWFxT] in {
1158def WFET : RegInputSystemI<0b0000, 0b000, "wfet">;
1159def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">;
1160}
1161
1162// Branch Record Buffer two-word mnemonic instructions
1163class BRBEI<bits<3> op2, string keyword>
1164    : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> {
1165  let Inst{31-8} = 0b110101010000100101110010;
1166  let Inst{7-5} = op2;
1167  let Predicates = [HasBRBE];
1168}
1169def BRB_IALL: BRBEI<0b100, "\tiall">;
1170def BRB_INJ:  BRBEI<0b101, "\tinj">;
1171
1172}
1173
1174// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ
1175def : TokenAlias<"INJ", "inj">;
1176def : TokenAlias<"IALL", "iall">;
1177
1178
1179// ARMv9.4-A Guarded Control Stack
1180class GCSNoOp<bits<3> op2, string mnemonic>
1181    : SimpleSystemI<0, (ins), mnemonic, "">, Sched<[]> {
1182  let Inst{20-8} = 0b0100001110111;
1183  let Inst{7-5} = op2;
1184  let Predicates = [HasGCS];
1185}
1186def GCSPUSHX : GCSNoOp<0b100, "gcspushx">;
1187def GCSPOPCX : GCSNoOp<0b101, "gcspopcx">;
1188def GCSPOPX  : GCSNoOp<0b110, "gcspopx">;
1189
1190class GCSRtIn<bits<3> op1, bits<3> op2, string mnemonic,
1191            list<dag> pattern = []>
1192    : RtSystemI<0, (outs), (ins GPR64:$Rt), mnemonic, "\t$Rt", pattern> {
1193  let Inst{20-19} = 0b01;
1194  let Inst{18-16} = op1;
1195  let Inst{15-8} = 0b01110111;
1196  let Inst{7-5} = op2;
1197  let Predicates = [HasGCS];
1198}
1199
1200def GCSSS1   : GCSRtIn<0b011, 0b010, "gcsss1">;
1201def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">;
1202
1203class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic,
1204            list<dag> pattern = []>
1205    : RtSystemI<1, (outs GPR64:$Rt), (ins), mnemonic, "\t$Rt", pattern> {
1206  let Inst{20-19} = 0b01;
1207  let Inst{18-16} = op1;
1208  let Inst{15-8} = 0b01110111;
1209  let Inst{7-5} = op2;
1210  let Predicates = [HasGCS];
1211}
1212
1213def GCSSS2  : GCSRtOut<0b011, 0b011, "gcsss2">;
1214def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm">;
1215def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent
1216
1217def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>;
1218def GCSB_DSYNC         : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGCS]>;
1219
1220def : TokenAlias<"DSYNC", "dsync">;
1221
1222let Uses = [X16], Defs = [X16], CRm = 0b0101 in {
1223  def CHKFEAT   : SystemNoOperands<0b000, "hint\t#40">;
1224}
1225def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>;
1226def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>;
1227
1228class GCSSt<string mnemonic, bits<3> op>
1229    : I<(outs), (ins GPR64:$Rt, GPR64sp:$Rn), mnemonic, "\t$Rt, $Rn", "", []>, Sched<[]> {
1230  bits<5> Rt;
1231  bits<5> Rn;
1232  let Inst{31-15} = 0b11011001000111110;
1233  let Inst{14-12} = op;
1234  let Inst{11-10} = 0b11;
1235  let Inst{9-5} = Rn;
1236  let Inst{4-0} = Rt;
1237  let Predicates = [HasGCS];
1238}
1239def GCSSTR  : GCSSt<"gcsstr",  0b000>;
1240def GCSSTTR : GCSSt<"gcssttr", 0b001>;
1241
1242
1243// ARMv8.2-A Dot Product
1244let Predicates = [HasDotProd] in {
1245defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>;
1246defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>;
1247defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>;
1248defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>;
1249}
1250
1251// ARMv8.6-A BFloat
1252let Predicates = [HasNEON, HasBF16] in {
1253defm BFDOT       : SIMDThreeSameVectorBFDot<1, "bfdot">;
1254defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">;
1255def BFMMLA       : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">;
1256def BFMLALB      : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
1257def BFMLALT      : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
1258def BFMLALBIdx   : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
1259def BFMLALTIdx   : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
1260def BFCVTN       : SIMD_BFCVTN;
1261def BFCVTN2      : SIMD_BFCVTN2;
1262
1263// Vector-scalar BFDOT:
1264// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
1265// register (the instruction uses a single 32-bit lane from it), so the pattern
1266// is a bit tricky.
1267def : Pat<(v2f32 (int_aarch64_neon_bfdot
1268                    (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
1269                    (v4bf16 (bitconvert
1270                      (v2i32 (AArch64duplane32
1271                        (v4i32 (bitconvert
1272                          (v8bf16 (insert_subvector undef,
1273                            (v4bf16 V64:$Rm),
1274                            (i64 0))))),
1275                        VectorIndexS:$idx)))))),
1276          (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
1277                             (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
1278                             VectorIndexS:$idx)>;
1279}
1280
1281let Predicates = [HasNEONorSME, HasBF16] in {
1282def BFCVT : BF16ToSinglePrecision<"bfcvt">;
1283}
1284
1285// ARMv8.6A AArch64 matrix multiplication
1286let Predicates = [HasMatMulInt8] in {
1287def  SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>;
1288def  UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>;
1289def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>;
1290defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>;
1291defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>;
1292
1293// sudot lane has a pattern where usdot is expected (there is no sudot).
1294// The second operand is used in the dup operation to repeat the indexed
1295// element.
1296class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind,
1297                         string rhs_kind, RegisterOperand RegType,
1298                         ValueType AccumType, ValueType InputType>
1299      : BaseSIMDThreeSameVectorIndexS<Q, 0, 0b00, 0b1111, "sudot", dst_kind,
1300                                        lhs_kind, rhs_kind, RegType, AccumType,
1301                                        InputType, null_frag> {
1302  let Pattern = [(set (AccumType RegType:$dst),
1303                      (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd),
1304                                 (InputType (bitconvert (AccumType
1305                                    (AArch64duplane32 (v4i32 V128:$Rm),
1306                                        VectorIndexS:$idx)))),
1307                                 (InputType RegType:$Rn))))];
1308}
1309
1310multiclass SIMDSUDOTIndex {
1311  def v8i8  : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>;
1312  def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>;
1313}
1314
1315defm SUDOTlane : SIMDSUDOTIndex;
1316
1317}
1318
1319// ARMv8.2-A FP16 Fused Multiply-Add Long
1320let Predicates = [HasNEON, HasFP16FML] in {
1321defm FMLAL      : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
1322defm FMLSL      : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>;
1323defm FMLAL2     : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>;
1324defm FMLSL2     : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>;
1325defm FMLALlane  : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>;
1326defm FMLSLlane  : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>;
1327defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>;
1328defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>;
1329}
1330
1331// Armv8.2-A Crypto extensions
1332let Predicates = [HasSHA3] in {
1333def SHA512H   : CryptoRRRTied<0b0, 0b00, "sha512h">;
1334def SHA512H2  : CryptoRRRTied<0b0, 0b01, "sha512h2">;
1335def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">;
1336def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">;
1337def RAX1      : CryptoRRR_2D<0b0,0b11, "rax1">;
1338def EOR3      : CryptoRRRR_16B<0b00, "eor3">;
1339def BCAX      : CryptoRRRR_16B<0b01, "bcax">;
1340def XAR       : CryptoRRRi6<"xar">;
1341
1342class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy>
1343  : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))),
1344        (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>;
1345
1346def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
1347          (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1348
1349def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>;
1350def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>;
1351def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>;
1352
1353def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>;
1354def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>;
1355def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>;
1356def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>;
1357
1358class EOR3_pattern<ValueType VecTy>
1359  : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)),
1360        (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
1361
1362def : EOR3_pattern<v16i8>;
1363def : EOR3_pattern<v8i16>;
1364def : EOR3_pattern<v4i32>;
1365def : EOR3_pattern<v2i64>;
1366
1367class BCAX_pattern<ValueType VecTy>
1368  : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))),
1369        (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
1370
1371def : BCAX_pattern<v16i8>;
1372def : BCAX_pattern<v8i16>;
1373def : BCAX_pattern<v4i32>;
1374def : BCAX_pattern<v2i64>;
1375
1376def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
1377def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
1378def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;
1379def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>;
1380
1381def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>;
1382def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>;
1383def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>;
1384def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>;
1385
1386def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>;
1387def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>;
1388def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>;
1389def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>;
1390
1391def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
1392          (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1393
1394def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))),
1395          (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>;
1396
1397def : Pat<(xor  (v2i64 V128:$Vn), (or (AArch64vlshr (v2i64 V128:$Vm), (i32 63)), (AArch64vshl (v2i64 V128:$Vm), (i32 1)))),
1398          (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1399
1400} // HasSHA3
1401
1402let Predicates = [HasSM4] in {
1403def SM3TT1A   : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">;
1404def SM3TT1B   : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">;
1405def SM3TT2A   : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">;
1406def SM3TT2B   : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">;
1407def SM3SS1    : CryptoRRRR_4S<0b10, "sm3ss1">;
1408def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">;
1409def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">;
1410def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">;
1411def SM4E      : CryptoRRTied_4S<0b0, 0b01, "sm4e">;
1412
1413def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))),
1414          (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>;
1415
1416class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode>
1417  : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
1418        (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
1419
1420class SM3TT_pattern<Instruction INST, Intrinsic OpNode>
1421  : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )),
1422        (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>;
1423
1424class SM4_pattern<Instruction INST, Intrinsic OpNode>
1425  : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
1426        (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
1427
1428def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>;
1429def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>;
1430
1431def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>;
1432def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>;
1433def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>;
1434def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>;
1435
1436def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>;
1437def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>;
1438} // HasSM4
1439
1440let Predicates = [HasRCPC] in {
1441  // v8.3 Release Consistent Processor Consistent support, optional in v8.2.
1442  def LDAPRB  : RCPCLoad<0b00, "ldaprb", GPR32>;
1443  def LDAPRH  : RCPCLoad<0b01, "ldaprh", GPR32>;
1444  def LDAPRW  : RCPCLoad<0b10, "ldapr", GPR32>;
1445  def LDAPRX  : RCPCLoad<0b11, "ldapr", GPR64>;
1446}
1447
1448// v8.3a complex add and multiply-accumulate. No predicate here, that is done
1449// inside the multiclass as the FP16 versions need different predicates.
1450defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
1451                                               "fcmla", null_frag>;
1452defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
1453                                           "fcadd", null_frag>;
1454defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">;
1455
1456let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
1457  def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
1458            (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>;
1459  def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
1460            (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>;
1461  def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
1462            (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>;
1463  def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
1464            (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
1465}
1466
1467let Predicates = [HasComplxNum, HasNEON] in {
1468  def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
1469            (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>;
1470  def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
1471            (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>;
1472  foreach Ty = [v4f32, v2f64] in {
1473    def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))),
1474              (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>;
1475    def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))),
1476              (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>;
1477  }
1478}
1479
1480multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> {
1481  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1482            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>;
1483  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1484            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>;
1485  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1486            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>;
1487  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1488            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>;
1489}
1490
1491multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> {
1492  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1493            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>;
1494  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1495            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>;
1496  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1497            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>;
1498  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1499            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>;
1500}
1501
1502
1503let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
1504  defm : FCMLA_PATS<v4f16, V64>;
1505  defm : FCMLA_PATS<v8f16, V128>;
1506
1507  defm : FCMLA_LANE_PATS<v4f16, V64,
1508                         (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>;
1509  defm : FCMLA_LANE_PATS<v8f16, V128,
1510                         (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>;
1511}
1512let Predicates = [HasComplxNum, HasNEON] in {
1513  defm : FCMLA_PATS<v2f32, V64>;
1514  defm : FCMLA_PATS<v4f32, V128>;
1515  defm : FCMLA_PATS<v2f64, V128>;
1516
1517  defm : FCMLA_LANE_PATS<v4f32, V128,
1518                         (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
1519}
1520
1521// v8.3a Pointer Authentication
1522// These instructions inhabit part of the hint space and so can be used for
1523// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
1524// important for compatibility with other assemblers (e.g. GAS) when building
1525// software compatible with both CPUs that do or don't implement PA.
1526let Uses = [LR], Defs = [LR] in {
1527  def PACIAZ   : SystemNoOperands<0b000, "hint\t#24">;
1528  def PACIBZ   : SystemNoOperands<0b010, "hint\t#26">;
1529  let isAuthenticated = 1 in {
1530    def AUTIAZ   : SystemNoOperands<0b100, "hint\t#28">;
1531    def AUTIBZ   : SystemNoOperands<0b110, "hint\t#30">;
1532  }
1533}
1534let Uses = [LR, SP], Defs = [LR] in {
1535  def PACIASP  : SystemNoOperands<0b001, "hint\t#25">;
1536  def PACIBSP  : SystemNoOperands<0b011, "hint\t#27">;
1537  let isAuthenticated = 1 in {
1538    def AUTIASP  : SystemNoOperands<0b101, "hint\t#29">;
1539    def AUTIBSP  : SystemNoOperands<0b111, "hint\t#31">;
1540  }
1541}
1542let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in {
1543  def PACIA1716  : SystemNoOperands<0b000, "hint\t#8">;
1544  def PACIB1716  : SystemNoOperands<0b010, "hint\t#10">;
1545  let isAuthenticated = 1 in {
1546    def AUTIA1716  : SystemNoOperands<0b100, "hint\t#12">;
1547    def AUTIB1716  : SystemNoOperands<0b110, "hint\t#14">;
1548  }
1549}
1550
1551let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
1552  def XPACLRI   : SystemNoOperands<0b111, "hint\t#7">;
1553}
1554
1555// In order to be able to write readable assembly, LLVM should accept assembly
1556// inputs that use pointer authentication mnemonics, even with PA disabled.
1557// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1558// should not emit these mnemonics unless PA is enabled.
1559def : InstAlias<"paciaz", (PACIAZ), 0>;
1560def : InstAlias<"pacibz", (PACIBZ), 0>;
1561def : InstAlias<"autiaz", (AUTIAZ), 0>;
1562def : InstAlias<"autibz", (AUTIBZ), 0>;
1563def : InstAlias<"paciasp", (PACIASP), 0>;
1564def : InstAlias<"pacibsp", (PACIBSP), 0>;
1565def : InstAlias<"autiasp", (AUTIASP), 0>;
1566def : InstAlias<"autibsp", (AUTIBSP), 0>;
1567def : InstAlias<"pacia1716", (PACIA1716), 0>;
1568def : InstAlias<"pacib1716", (PACIB1716), 0>;
1569def : InstAlias<"autia1716", (AUTIA1716), 0>;
1570def : InstAlias<"autib1716", (AUTIB1716), 0>;
1571def : InstAlias<"xpaclri", (XPACLRI), 0>;
1572
1573// Pseudos
1574
1575let Uses = [LR, SP], Defs = [LR] in {
1576// Insertion point of LR signing code.
1577def PAUTH_PROLOGUE : Pseudo<(outs), (ins), []>, Sched<[]>;
1578// Insertion point of LR authentication code.
1579// The RET terminator of the containing machine basic block may be replaced
1580// with a combined RETA(A|B) instruction when rewriting this Pseudo.
1581def PAUTH_EPILOGUE : Pseudo<(outs), (ins), []>, Sched<[]>;
1582}
1583
1584// These pointer authentication instructions require armv8.3a
1585let Predicates = [HasPAuth] in {
1586
1587  // When PA is enabled, a better mnemonic should be emitted.
1588  def : InstAlias<"paciaz", (PACIAZ), 1>;
1589  def : InstAlias<"pacibz", (PACIBZ), 1>;
1590  def : InstAlias<"autiaz", (AUTIAZ), 1>;
1591  def : InstAlias<"autibz", (AUTIBZ), 1>;
1592  def : InstAlias<"paciasp", (PACIASP), 1>;
1593  def : InstAlias<"pacibsp", (PACIBSP), 1>;
1594  def : InstAlias<"autiasp", (AUTIASP), 1>;
1595  def : InstAlias<"autibsp", (AUTIBSP), 1>;
1596  def : InstAlias<"pacia1716", (PACIA1716), 1>;
1597  def : InstAlias<"pacib1716", (PACIB1716), 1>;
1598  def : InstAlias<"autia1716", (AUTIA1716), 1>;
1599  def : InstAlias<"autib1716", (AUTIB1716), 1>;
1600  def : InstAlias<"xpaclri", (XPACLRI), 1>;
1601
1602  multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm,
1603                      SDPatternOperator op> {
1604    def IA   : SignAuthOneData<prefix, 0b00, !strconcat(asm,  "ia"), op>;
1605    def IB   : SignAuthOneData<prefix, 0b01, !strconcat(asm,  "ib"), op>;
1606    def DA   : SignAuthOneData<prefix, 0b10, !strconcat(asm,  "da"), op>;
1607    def DB   : SignAuthOneData<prefix, 0b11, !strconcat(asm,  "db"), op>;
1608    def IZA  : SignAuthZero<prefix_z,  0b00, !strconcat(asm, "iza"), op>;
1609    def DZA  : SignAuthZero<prefix_z,  0b10, !strconcat(asm, "dza"), op>;
1610    def IZB  : SignAuthZero<prefix_z,  0b01, !strconcat(asm, "izb"), op>;
1611    def DZB  : SignAuthZero<prefix_z,  0b11, !strconcat(asm, "dzb"), op>;
1612  }
1613
1614  defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>;
1615  defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>;
1616
1617  def XPACI : ClearAuth<0, "xpaci">;
1618  def : Pat<(int_ptrauth_strip GPR64:$Rd, 0), (XPACI GPR64:$Rd)>;
1619  def : Pat<(int_ptrauth_strip GPR64:$Rd, 1), (XPACI GPR64:$Rd)>;
1620
1621  def XPACD : ClearAuth<1, "xpacd">;
1622  def : Pat<(int_ptrauth_strip GPR64:$Rd, 2), (XPACD GPR64:$Rd)>;
1623  def : Pat<(int_ptrauth_strip GPR64:$Rd, 3), (XPACD GPR64:$Rd)>;
1624
1625  def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>;
1626
1627  // Combined Instructions
1628  let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1  in {
1629    def BRAA    : AuthBranchTwoOperands<0, 0, "braa">;
1630    def BRAB    : AuthBranchTwoOperands<0, 1, "brab">;
1631  }
1632  let isCall = 1, Defs = [LR], Uses = [SP] in {
1633    def BLRAA   : AuthBranchTwoOperands<1, 0, "blraa">;
1634    def BLRAB   : AuthBranchTwoOperands<1, 1, "blrab">;
1635  }
1636
1637  let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1  in {
1638    def BRAAZ   : AuthOneOperand<0b000, 0, "braaz">;
1639    def BRABZ   : AuthOneOperand<0b000, 1, "brabz">;
1640  }
1641  let isCall = 1, Defs = [LR], Uses = [SP] in {
1642    def BLRAAZ  : AuthOneOperand<0b001, 0, "blraaz">;
1643    def BLRABZ  : AuthOneOperand<0b001, 1, "blrabz">;
1644  }
1645
1646  let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
1647    def RETAA   : AuthReturn<0b010, 0, "retaa">;
1648    def RETAB   : AuthReturn<0b010, 1, "retab">;
1649    def ERETAA  : AuthReturn<0b100, 0, "eretaa">;
1650    def ERETAB  : AuthReturn<0b100, 1, "eretab">;
1651  }
1652
1653  defm LDRAA  : AuthLoad<0, "ldraa", simm10Scaled>;
1654  defm LDRAB  : AuthLoad<1, "ldrab", simm10Scaled>;
1655
1656}
1657
1658// v9.5-A pointer authentication extensions
1659
1660// Always accept "pacm" as an alias for "hint #39", but don't emit it when
1661// disassembling if we don't have the pauth-lr feature.
1662let CRm = 0b0100 in {
1663  def PACM : SystemNoOperands<0b111, "hint\t#39">;
1664}
1665def : InstAlias<"pacm", (PACM), 0>;
1666
1667let Predicates = [HasPAuthLR] in {
1668  let Defs = [LR], Uses = [LR, SP] in {
1669    //                                opcode2, opcode,   asm
1670    def PACIASPPC : SignAuthFixedRegs<0b00001, 0b101000, "paciasppc">;
1671    def PACIBSPPC : SignAuthFixedRegs<0b00001, 0b101001, "pacibsppc">;
1672    def PACNBIASPPC : SignAuthFixedRegs<0b00001, 0b100000, "pacnbiasppc">;
1673    def PACNBIBSPPC : SignAuthFixedRegs<0b00001, 0b100001, "pacnbibsppc">;
1674    //                             opc,  asm
1675    def AUTIASPPCi : SignAuthPCRel<0b00, "autiasppc">;
1676    def AUTIBSPPCi : SignAuthPCRel<0b01, "autibsppc">;
1677    //                              opcode2, opcode,   asm
1678    def AUTIASPPCr : SignAuthOneReg<0b00001, 0b100100, "autiasppc">;
1679    def AUTIBSPPCr : SignAuthOneReg<0b00001, 0b100101, "autibsppc">;
1680    //                                  opcode2, opcode,   asm
1681    def PACIA171615 : SignAuthFixedRegs<0b00001, 0b100010, "pacia171615">;
1682    def PACIB171615 : SignAuthFixedRegs<0b00001, 0b100011, "pacib171615">;
1683    def AUTIA171615 : SignAuthFixedRegs<0b00001, 0b101110, "autia171615">;
1684    def AUTIB171615 : SignAuthFixedRegs<0b00001, 0b101111, "autib171615">;
1685  }
1686
1687  let Uses = [LR, SP], isReturn = 1, isTerminator = 1, isBarrier = 1 in {
1688    //                                   opc,   op2,     asm
1689    def RETAASPPCi : SignAuthReturnPCRel<0b000, 0b11111, "retaasppc">;
1690    def RETABSPPCi : SignAuthReturnPCRel<0b001, 0b11111, "retabsppc">;
1691    //                                 op3,      asm
1692    def RETAASPPCr : SignAuthReturnReg<0b000010, "retaasppc">;
1693    def RETABSPPCr : SignAuthReturnReg<0b000011, "retabsppc">;
1694  }
1695  def : InstAlias<"pacm", (PACM), 1>;
1696}
1697
1698
1699// v8.3a floating point conversion for javascript
1700let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in
1701def FJCVTZS  : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
1702                                      "fjcvtzs",
1703                                      [(set GPR32:$Rd,
1704                                         (int_aarch64_fjcvtzs FPR64:$Rn))]> {
1705  let Inst{31} = 0;
1706} // HasJS, HasFPARMv8
1707
1708// v8.4 Flag manipulation instructions
1709let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in {
1710def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
1711  let Inst{20-5} = 0b0000001000000000;
1712}
1713def SETF8  : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
1714def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
1715def RMIF   : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
1716                        "{\t$Rn, $imm, $mask}">;
1717} // HasFlagM
1718
1719// v8.5 flag manipulation instructions
1720let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
1721
1722def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> {
1723  let Inst{18-16} = 0b000;
1724  let Inst{11-8} = 0b0000;
1725  let Unpredictable{11-8} = 0b1111;
1726  let Inst{7-5} = 0b001;
1727}
1728
1729def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> {
1730  let Inst{18-16} = 0b000;
1731  let Inst{11-8} = 0b0000;
1732  let Unpredictable{11-8} = 0b1111;
1733  let Inst{7-5} = 0b010;
1734}
1735} // HasAltNZCV
1736
1737
1738// Armv8.5-A speculation barrier
1739def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> {
1740  let Inst{20-5} = 0b0001100110000111;
1741  let Unpredictable{11-8} = 0b1111;
1742  let Predicates = [HasSB];
1743  let hasSideEffects = 1;
1744}
1745
1746def : InstAlias<"clrex", (CLREX 0xf)>;
1747def : InstAlias<"isb", (ISB 0xf)>;
1748def : InstAlias<"ssbb", (DSB 0)>;
1749def : InstAlias<"pssbb", (DSB 4)>;
1750def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>;
1751
1752def MRS    : MRSI;
1753def MSR    : MSRI;
1754def MSRpstateImm1 : MSRpstateImm0_1;
1755def MSRpstateImm4 : MSRpstateImm0_15;
1756
1757def : Pat<(AArch64mrs imm:$id),
1758          (MRS imm:$id)>;
1759
1760// The thread pointer (on Linux, at least, where this has been implemented) is
1761// TPIDR_EL0.
1762def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
1763                       [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
1764
1765// This gets lowered into a 24-byte instruction sequence
1766let Defs = [ X9, X16, X17, NZCV ], Size = 24 in {
1767def KCFI_CHECK : Pseudo<
1768  (outs), (ins GPR64:$ptr, i32imm:$type), []>, Sched<[]>;
1769}
1770
1771let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
1772def HWASAN_CHECK_MEMACCESS : Pseudo<
1773  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
1774  [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
1775  Sched<[]>;
1776}
1777
1778let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in {
1779def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<
1780  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
1781  [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
1782  Sched<[]>;
1783}
1784
1785// The virtual cycle counter register is CNTVCT_EL0.
1786def : Pat<(readcyclecounter), (MRS 0xdf02)>;
1787
1788// FPCR register
1789let Uses = [FPCR] in
1790def MRS_FPCR : Pseudo<(outs GPR64:$dst), (ins),
1791                      [(set GPR64:$dst, (int_aarch64_get_fpcr))]>,
1792               PseudoInstExpansion<(MRS GPR64:$dst, 0xda20)>,
1793               Sched<[WriteSys]>;
1794let Defs = [FPCR] in
1795def MSR_FPCR : Pseudo<(outs), (ins GPR64:$val),
1796                      [(int_aarch64_set_fpcr i64:$val)]>,
1797               PseudoInstExpansion<(MSR 0xda20, GPR64:$val)>,
1798               Sched<[WriteSys]>;
1799
1800// Generic system instructions
1801def SYSxt  : SystemXtI<0, "sys">;
1802def SYSLxt : SystemLXtI<1, "sysl">;
1803
1804def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
1805                (SYSxt imm0_7:$op1, sys_cr_op:$Cn,
1806                 sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
1807
1808
1809let Predicates = [HasTME] in {
1810
1811def TSTART : TMSystemI<0b0000, "tstart",
1812                      [(set GPR64:$Rt, (int_aarch64_tstart))]>;
1813
1814def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>;
1815
1816def TCANCEL : TMSystemException<0b011, "tcancel",
1817                                [(int_aarch64_tcancel timm64_0_65535:$imm)]>;
1818
1819def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> {
1820  let mayLoad = 0;
1821  let mayStore = 0;
1822}
1823} // HasTME
1824
1825//===----------------------------------------------------------------------===//
1826// Move immediate instructions.
1827//===----------------------------------------------------------------------===//
1828
1829defm MOVK : InsertImmediate<0b11, "movk">;
1830defm MOVN : MoveImmediate<0b00, "movn">;
1831
1832let PostEncoderMethod = "fixMOVZ" in
1833defm MOVZ : MoveImmediate<0b10, "movz">;
1834
1835// First group of aliases covers an implicit "lsl #0".
1836def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>;
1837def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>;
1838def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
1839def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
1840def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
1841def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
1842
1843// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
1844def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
1845def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
1846def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
1847def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
1848
1849def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
1850def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
1851def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
1852def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
1853
1854def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>;
1855def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>;
1856def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>;
1857def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>;
1858
1859def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
1860def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
1861
1862def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
1863def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
1864
1865def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>;
1866def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>;
1867
1868// Final group of aliases covers true "mov $Rd, $imm" cases.
1869multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
1870                          int width, int shift> {
1871  def _asmoperand : AsmOperandClass {
1872    let Name = basename # width # "_lsl" # shift # "MovAlias";
1873    let PredicateMethod = "is" # basename # "MovAlias<" # width # ", "
1874                               # shift # ">";
1875    let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">";
1876  }
1877
1878  def _movimm : Operand<i32> {
1879    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand");
1880  }
1881
1882  def : InstAlias<"mov $Rd, $imm",
1883                  (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>;
1884}
1885
1886defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>;
1887defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>;
1888
1889defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>;
1890defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>;
1891defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>;
1892defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>;
1893
1894defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>;
1895defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>;
1896
1897defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>;
1898defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>;
1899defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>;
1900defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>;
1901
1902let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
1903    isAsCheapAsAMove = 1 in {
1904// FIXME: The following pseudo instructions are only needed because remat
1905// cannot handle multiple instructions.  When that changes, we can select
1906// directly to the real instructions and get rid of these pseudos.
1907
1908def MOVi32imm
1909    : Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
1910             [(set GPR32:$dst, imm:$src)]>,
1911      Sched<[WriteImm]>;
1912def MOVi64imm
1913    : Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
1914             [(set GPR64:$dst, imm:$src)]>,
1915      Sched<[WriteImm]>;
1916} // isReMaterializable, isCodeGenOnly
1917
1918// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
1919// eventual expansion code fewer bits to worry about getting right. Marshalling
1920// the types is a little tricky though:
1921def i64imm_32bit : ImmLeaf<i64, [{
1922  return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
1923}]>;
1924
1925def s64imm_32bit : ImmLeaf<i64, [{
1926  int64_t Imm64 = static_cast<int64_t>(Imm);
1927  return Imm64 >= std::numeric_limits<int32_t>::min() &&
1928         Imm64 <= std::numeric_limits<int32_t>::max();
1929}]>;
1930
1931def trunc_imm : SDNodeXForm<imm, [{
1932  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
1933}]>;
1934
1935def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">,
1936  GISDNodeXFormEquiv<trunc_imm>;
1937
1938let Predicates = [OptimizedGISelOrOtherSelector] in {
1939// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless
1940// copies.
1941def : Pat<(i64 i64imm_32bit:$src),
1942          (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
1943}
1944
1945// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
1946def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
1947return CurDAG->getTargetConstant(
1948  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
1949}]>;
1950
1951def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
1952return CurDAG->getTargetConstant(
1953  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
1954}]>;
1955
1956
1957def : Pat<(f32 fpimm:$in),
1958  (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
1959def : Pat<(f64 fpimm:$in),
1960  (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
1961
1962
1963// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
1964// sequences.
1965def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
1966                             tglobaladdr:$g1, tglobaladdr:$g0),
1967          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0),
1968                                  tglobaladdr:$g1, 16),
1969                          tglobaladdr:$g2, 32),
1970                  tglobaladdr:$g3, 48)>;
1971
1972def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
1973                             tblockaddress:$g1, tblockaddress:$g0),
1974          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0),
1975                                  tblockaddress:$g1, 16),
1976                          tblockaddress:$g2, 32),
1977                  tblockaddress:$g3, 48)>;
1978
1979def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
1980                             tconstpool:$g1, tconstpool:$g0),
1981          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0),
1982                                  tconstpool:$g1, 16),
1983                          tconstpool:$g2, 32),
1984                  tconstpool:$g3, 48)>;
1985
1986def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
1987                             tjumptable:$g1, tjumptable:$g0),
1988          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0),
1989                                  tjumptable:$g1, 16),
1990                          tjumptable:$g2, 32),
1991                  tjumptable:$g3, 48)>;
1992
1993
1994//===----------------------------------------------------------------------===//
1995// Arithmetic instructions.
1996//===----------------------------------------------------------------------===//
1997
1998// Add/subtract with carry.
1999defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>;
2000defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
2001
2002def : InstAlias<"ngc $dst, $src",  (SBCWr  GPR32:$dst, WZR, GPR32:$src)>;
2003def : InstAlias<"ngc $dst, $src",  (SBCXr  GPR64:$dst, XZR, GPR64:$src)>;
2004def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
2005def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
2006
2007// Add/subtract
2008defm ADD : AddSub<0, "add", "sub", add>;
2009defm SUB : AddSub<1, "sub", "add">;
2010
2011def : InstAlias<"mov $dst, $src",
2012                (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
2013def : InstAlias<"mov $dst, $src",
2014                (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>;
2015def : InstAlias<"mov $dst, $src",
2016                (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>;
2017def : InstAlias<"mov $dst, $src",
2018                (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
2019
2020defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
2021defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
2022
2023def copyFromSP: PatLeaf<(i64 GPR64:$src), [{
2024  return N->getOpcode() == ISD::CopyFromReg &&
2025         cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP;
2026}]>;
2027
2028// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
2029def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
2030          (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
2031def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
2032          (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
2033def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
2034          (SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
2035def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
2036          (SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
2037def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
2038          (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
2039def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
2040          (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
2041let AddedComplexity = 1 in {
2042def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3),
2043          (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>;
2044def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3),
2045          (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>;
2046def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)),
2047          (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>;
2048}
2049
2050// Because of the immediate format for add/sub-imm instructions, the
2051// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2052//  These patterns capture that transformation.
2053let AddedComplexity = 1 in {
2054def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2055          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2056def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2057          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2058def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2059          (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2060def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2061          (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2062}
2063
2064// Because of the immediate format for add/sub-imm instructions, the
2065// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2066//  These patterns capture that transformation.
2067let AddedComplexity = 1 in {
2068def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2069          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2070def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2071          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2072def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2073          (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2074def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2075          (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2076}
2077
2078def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
2079def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
2080def : InstAlias<"neg $dst, $src$shift",
2081                (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
2082def : InstAlias<"neg $dst, $src$shift",
2083                (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
2084
2085def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
2086def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
2087def : InstAlias<"negs $dst, $src$shift",
2088                (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
2089def : InstAlias<"negs $dst, $src$shift",
2090                (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
2091
2092
2093// Unsigned/Signed divide
2094defm UDIV : Div<0, "udiv", udiv>;
2095defm SDIV : Div<1, "sdiv", sdiv>;
2096
2097def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>;
2098def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>;
2099def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>;
2100def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>;
2101
2102// Variable shift
2103defm ASRV : Shift<0b10, "asr", sra>;
2104defm LSLV : Shift<0b00, "lsl", shl>;
2105defm LSRV : Shift<0b01, "lsr", srl>;
2106defm RORV : Shift<0b11, "ror", rotr>;
2107
2108def : ShiftAlias<"asrv", ASRVWr, GPR32>;
2109def : ShiftAlias<"asrv", ASRVXr, GPR64>;
2110def : ShiftAlias<"lslv", LSLVWr, GPR32>;
2111def : ShiftAlias<"lslv", LSLVXr, GPR64>;
2112def : ShiftAlias<"lsrv", LSRVWr, GPR32>;
2113def : ShiftAlias<"lsrv", LSRVXr, GPR64>;
2114def : ShiftAlias<"rorv", RORVWr, GPR32>;
2115def : ShiftAlias<"rorv", RORVXr, GPR64>;
2116
2117// Multiply-add
2118let AddedComplexity = 5 in {
2119defm MADD : MulAccum<0, "madd">;
2120defm MSUB : MulAccum<1, "msub">;
2121
2122def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
2123          (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
2124def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
2125          (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
2126
2127def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
2128          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
2129def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
2130          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
2131def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
2132          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
2133def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
2134          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
2135} // AddedComplexity = 5
2136
2137let AddedComplexity = 5 in {
2138def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
2139def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
2140def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
2141def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
2142
2143def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))),
2144          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2145def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))),
2146          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2147def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
2148          (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2149def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))),
2150          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2151def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))),
2152          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2153def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
2154          (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2155
2156def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
2157          (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2158def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
2159          (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2160
2161def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))),
2162          (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2163def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))),
2164          (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2165def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))),
2166          (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2167                     (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2168
2169def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
2170          (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2171def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
2172          (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2173def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))),
2174          (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2175                     (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2176
2177def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)),
2178          (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2179def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)),
2180          (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2181def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)),
2182                    GPR64:$Ra)),
2183          (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2184                     (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2185
2186def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
2187          (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2188def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
2189          (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2190def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32),
2191                                    (s64imm_32bit:$C)))),
2192          (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2193                     (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2194
2195def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)),
2196          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2197def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))),
2198          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2199
2200def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)),
2201          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2202def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)),
2203          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2204
2205def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
2206          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2207def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
2208          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2209
2210def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
2211          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2212def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
2213          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2214
2215def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)),
2216          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2217def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))),
2218          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2219
2220def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)),
2221          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2222def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)),
2223          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2224
2225def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))),
2226          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2227def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))),
2228          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2229
2230def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))),
2231          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2232def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, (zext GPR32:$Rm)))),
2233          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2234} // AddedComplexity = 5
2235
2236def : MulAccumWAlias<"mul", MADDWrrr>;
2237def : MulAccumXAlias<"mul", MADDXrrr>;
2238def : MulAccumWAlias<"mneg", MSUBWrrr>;
2239def : MulAccumXAlias<"mneg", MSUBXrrr>;
2240def : WideMulAccumAlias<"smull", SMADDLrrr>;
2241def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
2242def : WideMulAccumAlias<"umull", UMADDLrrr>;
2243def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
2244
2245// Multiply-high
2246def SMULHrr : MulHi<0b010, "smulh", mulhs>;
2247def UMULHrr : MulHi<0b110, "umulh", mulhu>;
2248
2249// CRC32
2250def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
2251def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">;
2252def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">;
2253def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
2254
2255def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
2256def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
2257def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
2258def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
2259
2260// v8.1 atomic CAS
2261defm CAS   : CompareAndSwap<0, 0, "">;
2262defm CASA  : CompareAndSwap<1, 0, "a">;
2263defm CASL  : CompareAndSwap<0, 1, "l">;
2264defm CASAL : CompareAndSwap<1, 1, "al">;
2265
2266// v8.1 atomic CASP
2267defm CASP   : CompareAndSwapPair<0, 0, "">;
2268defm CASPA  : CompareAndSwapPair<1, 0, "a">;
2269defm CASPL  : CompareAndSwapPair<0, 1, "l">;
2270defm CASPAL : CompareAndSwapPair<1, 1, "al">;
2271
2272// v8.1 atomic SWP
2273defm SWP   : Swap<0, 0, "">;
2274defm SWPA  : Swap<1, 0, "a">;
2275defm SWPL  : Swap<0, 1, "l">;
2276defm SWPAL : Swap<1, 1, "al">;
2277
2278// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register)
2279defm LDADD   : LDOPregister<0b000, "add", 0, 0, "">;
2280defm LDADDA  : LDOPregister<0b000, "add", 1, 0, "a">;
2281defm LDADDL  : LDOPregister<0b000, "add", 0, 1, "l">;
2282defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">;
2283
2284defm LDCLR   : LDOPregister<0b001, "clr", 0, 0, "">;
2285defm LDCLRA  : LDOPregister<0b001, "clr", 1, 0, "a">;
2286defm LDCLRL  : LDOPregister<0b001, "clr", 0, 1, "l">;
2287defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">;
2288
2289defm LDEOR   : LDOPregister<0b010, "eor", 0, 0, "">;
2290defm LDEORA  : LDOPregister<0b010, "eor", 1, 0, "a">;
2291defm LDEORL  : LDOPregister<0b010, "eor", 0, 1, "l">;
2292defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">;
2293
2294defm LDSET   : LDOPregister<0b011, "set", 0, 0, "">;
2295defm LDSETA  : LDOPregister<0b011, "set", 1, 0, "a">;
2296defm LDSETL  : LDOPregister<0b011, "set", 0, 1, "l">;
2297defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">;
2298
2299defm LDSMAX   : LDOPregister<0b100, "smax", 0, 0, "">;
2300defm LDSMAXA  : LDOPregister<0b100, "smax", 1, 0, "a">;
2301defm LDSMAXL  : LDOPregister<0b100, "smax", 0, 1, "l">;
2302defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">;
2303
2304defm LDSMIN   : LDOPregister<0b101, "smin", 0, 0, "">;
2305defm LDSMINA  : LDOPregister<0b101, "smin", 1, 0, "a">;
2306defm LDSMINL  : LDOPregister<0b101, "smin", 0, 1, "l">;
2307defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">;
2308
2309defm LDUMAX   : LDOPregister<0b110, "umax", 0, 0, "">;
2310defm LDUMAXA  : LDOPregister<0b110, "umax", 1, 0, "a">;
2311defm LDUMAXL  : LDOPregister<0b110, "umax", 0, 1, "l">;
2312defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">;
2313
2314defm LDUMIN   : LDOPregister<0b111, "umin", 0, 0, "">;
2315defm LDUMINA  : LDOPregister<0b111, "umin", 1, 0, "a">;
2316defm LDUMINL  : LDOPregister<0b111, "umin", 0, 1, "l">;
2317defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">;
2318
2319// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
2320defm : STOPregister<"stadd","LDADD">; // STADDx
2321defm : STOPregister<"stclr","LDCLR">; // STCLRx
2322defm : STOPregister<"steor","LDEOR">; // STEORx
2323defm : STOPregister<"stset","LDSET">; // STSETx
2324defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx
2325defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
2326defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
2327defm : STOPregister<"stumin","LDUMIN">;// STUMINx
2328
2329// v8.5 Memory Tagging Extension
2330let Predicates = [HasMTE] in {
2331
2332def IRG   : BaseTwoOperandRegReg<0b1, 0b0, 0b000100, GPR64sp, "irg",
2333                                 int_aarch64_irg, GPR64sp, GPR64>, Sched<[]>;
2334
2335def GMI   : BaseTwoOperandRegReg<0b1, 0b0, 0b000101, GPR64, "gmi",
2336                                 int_aarch64_gmi, GPR64sp>, Sched<[]> {
2337  let isNotDuplicable = 1;
2338}
2339def ADDG  : AddSubG<0, "addg", null_frag>;
2340def SUBG  : AddSubG<1, "subg", null_frag>;
2341
2342def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
2343
2344def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>;
2345def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
2346  let Defs = [NZCV];
2347}
2348
2349def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;
2350
2351def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">;
2352
2353def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4),
2354          (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>;
2355def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn,  simm9s16:$offset)),
2356          (LDG GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
2357
2358def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
2359
2360def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]",
2361                   (outs GPR64:$Rt), (ins GPR64sp:$Rn)>;
2362def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]",
2363                   (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>;
2364def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]",
2365                   (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> {
2366  let Inst{23} = 0;
2367}
2368
2369defm STG   : MemTagStore<0b00, "stg">;
2370defm STZG  : MemTagStore<0b01, "stzg">;
2371defm ST2G  : MemTagStore<0b10, "st2g">;
2372defm STZ2G : MemTagStore<0b11, "stz2g">;
2373
2374def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2375          (STGi $Rn, $Rm, $imm)>;
2376def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2377          (STZGi $Rn, $Rm, $imm)>;
2378def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2379          (ST2Gi $Rn, $Rm, $imm)>;
2380def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2381          (STZ2Gi $Rn, $Rm, $imm)>;
2382
2383defm STGP     : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
2384def  STGPpre  : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
2385def  STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
2386
2387def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
2388          (STGi GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
2389
2390def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
2391          (STGPi $Rt, $Rt2, $Rn, $imm)>;
2392
2393def IRGstack
2394    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>,
2395      Sched<[]>;
2396def TAGPstack
2397    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>,
2398      Sched<[]>;
2399
2400// Explicit SP in the first operand prevents ShrinkWrap optimization
2401// from leaving this instruction out of the stack frame. When IRGstack
2402// is transformed into IRG, this operand is replaced with the actual
2403// register / expression for the tagged base pointer of the current function.
2404def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
2405
2406// Large STG to be expanded into a loop. $sz is the size, $Rn is start address.
2407// $Rn_wback is one past the end of the range. $Rm is the loop counter.
2408let isCodeGenOnly=1, mayStore=1, Defs=[NZCV] in {
2409def STGloop_wback
2410    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
2411             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
2412      Sched<[WriteAdr, WriteST]>;
2413
2414def STZGloop_wback
2415    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
2416             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
2417      Sched<[WriteAdr, WriteST]>;
2418
2419// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn.
2420// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back).
2421def STGloop
2422    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
2423             [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
2424      Sched<[WriteAdr, WriteST]>;
2425
2426def STZGloop
2427    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
2428             [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
2429      Sched<[WriteAdr, WriteST]>;
2430}
2431
2432} // Predicates = [HasMTE]
2433
2434//===----------------------------------------------------------------------===//
2435// Logical instructions.
2436//===----------------------------------------------------------------------===//
2437
2438// (immediate)
2439defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
2440defm AND  : LogicalImm<0b00, "and", and, "bic">;
2441defm EOR  : LogicalImm<0b10, "eor", xor, "eon">;
2442defm ORR  : LogicalImm<0b01, "orr", or, "orn">;
2443
2444// FIXME: these aliases *are* canonical sometimes (when movz can't be
2445// used). Actually, it seems to be working right now, but putting logical_immXX
2446// here is a bit dodgy on the AsmParser side too.
2447def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
2448                                          logical_imm32:$imm), 0>;
2449def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
2450                                          logical_imm64:$imm), 0>;
2451
2452
2453// (register)
2454defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>;
2455defm BICS : LogicalRegS<0b11, 1, "bics",
2456                        BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
2457defm AND  : LogicalReg<0b00, 0, "and", and>;
2458defm BIC  : LogicalReg<0b00, 1, "bic",
2459                       BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>;
2460defm EON  : LogicalReg<0b10, 1, "eon",
2461                       BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
2462defm EOR  : LogicalReg<0b10, 0, "eor", xor>;
2463defm ORN  : LogicalReg<0b01, 1, "orn",
2464                       BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
2465defm ORR  : LogicalReg<0b01, 0, "orr", or>;
2466
2467def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>;
2468def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>;
2469
2470def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>;
2471def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>;
2472
2473def : InstAlias<"mvn $Wd, $Wm$sh",
2474                (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>;
2475def : InstAlias<"mvn $Xd, $Xm$sh",
2476                (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>;
2477
2478def : InstAlias<"tst $src1, $src2",
2479                (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>;
2480def : InstAlias<"tst $src1, $src2",
2481                (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>;
2482
2483def : InstAlias<"tst $src1, $src2",
2484                        (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>;
2485def : InstAlias<"tst $src1, $src2",
2486                        (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>;
2487
2488def : InstAlias<"tst $src1, $src2$sh",
2489               (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>;
2490def : InstAlias<"tst $src1, $src2$sh",
2491               (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>;
2492
2493
2494def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
2495def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
2496
2497
2498//===----------------------------------------------------------------------===//
2499// One operand data processing instructions.
2500//===----------------------------------------------------------------------===//
2501
2502defm CLS    : OneOperandData<0b000101, "cls">;
2503defm CLZ    : OneOperandData<0b000100, "clz", ctlz>;
2504defm RBIT   : OneOperandData<0b000000, "rbit", bitreverse>;
2505
2506def  REV16Wr : OneWRegData<0b000001, "rev16",
2507                                     UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
2508def  REV16Xr : OneXRegData<0b000001, "rev16", null_frag>;
2509
2510def : Pat<(cttz GPR32:$Rn),
2511          (CLZWr (RBITWr GPR32:$Rn))>;
2512def : Pat<(cttz GPR64:$Rn),
2513          (CLZXr (RBITXr GPR64:$Rn))>;
2514def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
2515                (i32 1))),
2516          (CLSWr GPR32:$Rn)>;
2517def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
2518                (i64 1))),
2519          (CLSXr GPR64:$Rn)>;
2520def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>;
2521def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>;
2522
2523// Unlike the other one operand instructions, the instructions with the "rev"
2524// mnemonic do *not* just different in the size bit, but actually use different
2525// opcode bits for the different sizes.
2526def REVWr   : OneWRegData<0b000010, "rev", bswap>;
2527def REVXr   : OneXRegData<0b000011, "rev", bswap>;
2528def REV32Xr : OneXRegData<0b000010, "rev32",
2529                                    UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
2530
2531def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
2532
2533// The bswap commutes with the rotr so we want a pattern for both possible
2534// orders.
2535def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
2536def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
2537
2538// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
2539def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
2540def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
2541
2542def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)),
2543              (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))),
2544          (REV16Xr GPR64:$Rn)>;
2545
2546//===----------------------------------------------------------------------===//
2547// Bitfield immediate extraction instruction.
2548//===----------------------------------------------------------------------===//
2549let hasSideEffects = 0 in
2550defm EXTR : ExtractImm<"extr">;
2551def : InstAlias<"ror $dst, $src, $shift",
2552            (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
2553def : InstAlias<"ror $dst, $src, $shift",
2554            (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
2555
2556def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
2557          (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
2558def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
2559          (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
2560
2561//===----------------------------------------------------------------------===//
2562// Other bitfield immediate instructions.
2563//===----------------------------------------------------------------------===//
2564let hasSideEffects = 0 in {
2565defm BFM  : BitfieldImmWith2RegArgs<0b01, "bfm">;
2566defm SBFM : BitfieldImm<0b00, "sbfm">;
2567defm UBFM : BitfieldImm<0b10, "ubfm">;
2568}
2569
2570def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
2571  uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
2572  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2573}]>;
2574
2575def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
2576  uint64_t enc = 31 - N->getZExtValue();
2577  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2578}]>;
2579
2580// min(7, 31 - shift_amt)
2581def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
2582  uint64_t enc = 31 - N->getZExtValue();
2583  enc = enc > 7 ? 7 : enc;
2584  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2585}]>;
2586
2587// min(15, 31 - shift_amt)
2588def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
2589  uint64_t enc = 31 - N->getZExtValue();
2590  enc = enc > 15 ? 15 : enc;
2591  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2592}]>;
2593
2594def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
2595  uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
2596  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2597}]>;
2598
2599def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
2600  uint64_t enc = 63 - N->getZExtValue();
2601  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2602}]>;
2603
2604// min(7, 63 - shift_amt)
2605def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
2606  uint64_t enc = 63 - N->getZExtValue();
2607  enc = enc > 7 ? 7 : enc;
2608  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2609}]>;
2610
2611// min(15, 63 - shift_amt)
2612def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
2613  uint64_t enc = 63 - N->getZExtValue();
2614  enc = enc > 15 ? 15 : enc;
2615  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2616}]>;
2617
2618// min(31, 63 - shift_amt)
2619def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
2620  uint64_t enc = 63 - N->getZExtValue();
2621  enc = enc > 31 ? 31 : enc;
2622  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2623}]>;
2624
2625def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
2626          (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
2627                              (i64 (i32shift_b imm0_31:$imm)))>;
2628def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
2629          (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
2630                              (i64 (i64shift_b imm0_63:$imm)))>;
2631
2632let AddedComplexity = 10 in {
2633def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
2634          (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
2635def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
2636          (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
2637}
2638
2639def : InstAlias<"asr $dst, $src, $shift",
2640                (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
2641def : InstAlias<"asr $dst, $src, $shift",
2642                (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
2643def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
2644def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
2645def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
2646def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
2647def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
2648
2649def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
2650          (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
2651def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
2652          (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
2653
2654def : InstAlias<"lsr $dst, $src, $shift",
2655                (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
2656def : InstAlias<"lsr $dst, $src, $shift",
2657                (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
2658def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
2659def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
2660def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
2661def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
2662def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
2663
2664//===----------------------------------------------------------------------===//
2665// Conditional comparison instructions.
2666//===----------------------------------------------------------------------===//
2667defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
2668defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
2669
2670//===----------------------------------------------------------------------===//
2671// Conditional select instructions.
2672//===----------------------------------------------------------------------===//
2673defm CSEL  : CondSelect<0, 0b00, "csel">;
2674
2675def inc : PatFrag<(ops node:$in), (add node:$in, 1)>;
2676defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
2677defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
2678defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
2679
2680def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2681          (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2682def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2683          (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2684def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2685          (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2686def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2687          (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2688def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2689          (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2690def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2691          (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2692
2693def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
2694          (CSINCWr WZR, WZR, (i32 imm:$cc))>;
2695def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
2696          (CSINCXr XZR, XZR, (i32 imm:$cc))>;
2697def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
2698          (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
2699def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV),
2700          (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>;
2701def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV),
2702          (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2703def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV),
2704          (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2705def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
2706          (CSINVWr WZR, WZR, (i32 imm:$cc))>;
2707def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
2708          (CSINVXr XZR, XZR, (i32 imm:$cc))>;
2709def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV),
2710          (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>;
2711def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV),
2712          (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>;
2713def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV),
2714          (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2715def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV),
2716          (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2717
2718def : Pat<(add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
2719          (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>;
2720def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
2721          (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>;
2722
2723def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
2724          (CSINCWr GPR32:$val, WZR, imm:$cc)>;
2725def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
2726          (CSINCXr GPR64:$val, XZR, imm:$cc)>;
2727def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
2728          (CSINCXr GPR64:$val, XZR, imm:$cc)>;
2729
2730def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
2731          (CSELWr WZR, GPR32:$val, imm:$cc)>;
2732def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
2733          (CSELXr XZR, GPR64:$val, imm:$cc)>;
2734def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
2735          (CSELXr XZR, GPR64:$val, imm:$cc)>;
2736
2737// The inverse of the condition code from the alias instruction is what is used
2738// in the aliased instruction. The parser all ready inverts the condition code
2739// for these aliases.
2740def : InstAlias<"cset $dst, $cc",
2741                (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
2742def : InstAlias<"cset $dst, $cc",
2743                (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
2744
2745def : InstAlias<"csetm $dst, $cc",
2746                (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
2747def : InstAlias<"csetm $dst, $cc",
2748                (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
2749
2750def : InstAlias<"cinc $dst, $src, $cc",
2751                (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2752def : InstAlias<"cinc $dst, $src, $cc",
2753                (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2754
2755def : InstAlias<"cinv $dst, $src, $cc",
2756                (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2757def : InstAlias<"cinv $dst, $src, $cc",
2758                (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2759
2760def : InstAlias<"cneg $dst, $src, $cc",
2761                (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2762def : InstAlias<"cneg $dst, $src, $cc",
2763                (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2764
2765//===----------------------------------------------------------------------===//
2766// PC-relative instructions.
2767//===----------------------------------------------------------------------===//
2768let isReMaterializable = 1 in {
2769let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
2770def ADR  : ADRI<0, "adr", adrlabel,
2771                [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>;
2772} // hasSideEffects = 0
2773
2774def ADRP : ADRI<1, "adrp", adrplabel,
2775                [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
2776} // isReMaterializable = 1
2777
2778// page address of a constant pool entry, block address
2779def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>;
2780def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>;
2781def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>;
2782def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>;
2783def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
2784def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
2785def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>;
2786
2787//===----------------------------------------------------------------------===//
2788// Unconditional branch (register) instructions.
2789//===----------------------------------------------------------------------===//
2790
2791let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
2792def RET  : BranchReg<0b0010, "ret", []>;
2793def DRPS : SpecialReturn<0b0101, "drps">;
2794def ERET : SpecialReturn<0b0100, "eret">;
2795} // isReturn = 1, isTerminator = 1, isBarrier = 1
2796
2797// Default to the LR register.
2798def : InstAlias<"ret", (RET LR)>;
2799
2800let isCall = 1, Defs = [LR], Uses = [SP] in {
2801  def BLR : BranchReg<0b0001, "blr", []>;
2802  def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
2803                Sched<[WriteBrReg]>,
2804                PseudoInstExpansion<(BLR GPR64:$Rn)>;
2805  def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
2806                     Sched<[WriteBrReg]>;
2807  def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>,
2808                Sched<[WriteBrReg]>;
2809  let Uses = [X16, SP] in
2810  def BLR_X16 : Pseudo<(outs), (ins), [(AArch64call_arm64ec_to_x64 X16)]>,
2811                Sched<[WriteBrReg]>,
2812                PseudoInstExpansion<(BLR X16)>;
2813} // isCall
2814
2815def : Pat<(AArch64call GPR64:$Rn),
2816          (BLR GPR64:$Rn)>,
2817      Requires<[NoSLSBLRMitigation]>;
2818def : Pat<(AArch64call GPR64noip:$Rn),
2819          (BLRNoIP GPR64noip:$Rn)>,
2820      Requires<[SLSBLRMitigation]>;
2821
2822def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn),
2823          (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>,
2824      Requires<[NoSLSBLRMitigation]>;
2825
2826def : Pat<(AArch64call_bti GPR64:$Rn),
2827          (BLR_BTI GPR64:$Rn)>,
2828      Requires<[NoSLSBLRMitigation]>;
2829def : Pat<(AArch64call_bti GPR64noip:$Rn),
2830          (BLR_BTI GPR64noip:$Rn)>,
2831      Requires<[SLSBLRMitigation]>;
2832
2833let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
2834def BR  : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
2835} // isBranch, isTerminator, isBarrier, isIndirectBranch
2836
2837// Create a separate pseudo-instruction for codegen to use so that we don't
2838// flag lr as used in every function. It'll be restored before the RET by the
2839// epilogue if it's legitimately used.
2840def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>,
2841                   Sched<[WriteBrReg]> {
2842  let isTerminator = 1;
2843  let isBarrier = 1;
2844  let isReturn = 1;
2845}
2846
2847// This is a directive-like pseudo-instruction. The purpose is to insert an
2848// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
2849// (which in the usual case is a BLR).
2850let hasSideEffects = 1 in
2851def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
2852  let AsmString = ".tlsdesccall $sym";
2853}
2854
2855// Pseudo instruction to tell the streamer to emit a 'B' character into the
2856// augmentation string.
2857def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
2858
2859// Pseudo instruction to tell the streamer to emit a 'G' character into the
2860// augmentation string.
2861def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {}
2862
2863// FIXME: maybe the scratch register used shouldn't be fixed to X1?
2864// FIXME: can "hasSideEffects be dropped?
2865// This gets lowered to an instruction sequence which takes 16 bytes
2866let isCall = 1, Defs = [NZCV, LR, X0, X1], hasSideEffects = 1, Size = 16,
2867    isCodeGenOnly = 1 in
2868def TLSDESC_CALLSEQ
2869    : Pseudo<(outs), (ins i64imm:$sym),
2870             [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>,
2871      Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>;
2872def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
2873          (TLSDESC_CALLSEQ texternalsym:$sym)>;
2874
2875//===----------------------------------------------------------------------===//
2876// Conditional branch (immediate) instruction.
2877//===----------------------------------------------------------------------===//
2878def Bcc : BranchCond<0, "b">;
2879
2880// Armv8.8-A variant form which hints to the branch predictor that
2881// this branch is very likely to go the same way nearly all the time
2882// (even though it is not known at compile time _which_ way that is).
2883def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>;
2884
2885//===----------------------------------------------------------------------===//
2886// Compare-and-branch instructions.
2887//===----------------------------------------------------------------------===//
2888defm CBZ  : CmpBranch<0, "cbz", AArch64cbz>;
2889defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
2890
2891//===----------------------------------------------------------------------===//
2892// Test-bit-and-branch instructions.
2893//===----------------------------------------------------------------------===//
2894defm TBZ  : TestBranch<0, "tbz", AArch64tbz>;
2895defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
2896
2897//===----------------------------------------------------------------------===//
2898// Unconditional branch (immediate) instructions.
2899//===----------------------------------------------------------------------===//
2900let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
2901def B  : BranchImm<0, "b", [(br bb:$addr)]>;
2902} // isBranch, isTerminator, isBarrier
2903
2904let isCall = 1, Defs = [LR], Uses = [SP] in {
2905def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
2906} // isCall
2907def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
2908
2909//===----------------------------------------------------------------------===//
2910// Exception generation instructions.
2911//===----------------------------------------------------------------------===//
2912let isTrap = 1 in {
2913def BRK   : ExceptionGeneration<0b001, 0b00, "brk",
2914                                [(int_aarch64_break timm32_0_65535:$imm)]>;
2915}
2916def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
2917def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
2918def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>;
2919def HLT   : ExceptionGeneration<0b010, 0b00, "hlt">;
2920def HVC   : ExceptionGeneration<0b000, 0b10, "hvc">;
2921def SMC   : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>;
2922def SVC   : ExceptionGeneration<0b000, 0b01, "svc">;
2923
2924// DCPSn defaults to an immediate operand of zero if unspecified.
2925def : InstAlias<"dcps1", (DCPS1 0)>;
2926def : InstAlias<"dcps2", (DCPS2 0)>;
2927def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>;
2928
2929def UDF : UDFType<0, "udf">;
2930
2931//===----------------------------------------------------------------------===//
2932// Load instructions.
2933//===----------------------------------------------------------------------===//
2934
2935// Pair (indexed, offset)
2936defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">;
2937defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">;
2938let Predicates = [HasFPARMv8] in {
2939defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">;
2940defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">;
2941defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">;
2942}
2943
2944defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">;
2945
2946// Pair (pre-indexed)
2947def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
2948def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
2949let Predicates = [HasFPARMv8] in {
2950def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
2951def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
2952def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
2953}
2954
2955def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
2956
2957// Pair (post-indexed)
2958def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
2959def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
2960let Predicates = [HasFPARMv8] in {
2961def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
2962def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
2963def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
2964}
2965
2966def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
2967
2968
2969// Pair (no allocate)
2970defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">;
2971defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">;
2972let Predicates = [HasFPARMv8] in {
2973defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">;
2974defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">;
2975defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">;
2976}
2977
2978def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
2979          (LDPXi GPR64sp:$Rn, simm7s8:$offset)>;
2980
2981def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
2982          (LDNPQi GPR64sp:$Rn, simm7s16:$offset)>;
2983//---
2984// (register offset)
2985//---
2986
2987// Integer
2988defm LDRBB : Load8RO<0b00,  0, 0b01, GPR32, "ldrb", i32, zextloadi8>;
2989defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>;
2990defm LDRW  : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
2991defm LDRX  : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
2992
2993// Floating-point
2994let Predicates = [HasFPARMv8] in {
2995defm LDRB : Load8RO<0b00,   1, 0b01, FPR8Op,   "ldr", i8, load>;
2996defm LDRH : Load16RO<0b01,  1, 0b01, FPR16Op,  "ldr", f16, load>;
2997defm LDRS : Load32RO<0b10,  1, 0b01, FPR32Op,  "ldr", f32, load>;
2998defm LDRD : Load64RO<0b11,  1, 0b01, FPR64Op,  "ldr", f64, load>;
2999defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>;
3000}
3001
3002// Load sign-extended half-word
3003defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>;
3004defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>;
3005
3006// Load sign-extended byte
3007defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>;
3008defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>;
3009
3010// Load sign-extended word
3011defm LDRSW  : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
3012
3013// Pre-fetch.
3014defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
3015
3016// For regular load, we do not have any alignment requirement.
3017// Thus, it is safe to directly map the vector loads with interesting
3018// addressing modes.
3019// FIXME: We could do the same for bitconvert to floating point vectors.
3020multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
3021                              ValueType ScalTy, ValueType VecTy,
3022                              Instruction LOADW, Instruction LOADX,
3023                              SubRegIndex sub> {
3024  def : Pat<(VecTy (scalar_to_vector (ScalTy
3025              (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
3026            (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
3027                           (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
3028                           sub)>;
3029
3030  def : Pat<(VecTy (scalar_to_vector (ScalTy
3031              (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
3032            (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
3033                           (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
3034                           sub)>;
3035}
3036
3037let AddedComplexity = 10 in {
3038defm : ScalToVecROLoadPat<ro8,  extloadi8,  i32, v8i8,  LDRBroW, LDRBroX, bsub>;
3039defm : ScalToVecROLoadPat<ro8,  extloadi8,  i32, v16i8, LDRBroW, LDRBroX, bsub>;
3040
3041defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
3042defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
3043
3044defm : ScalToVecROLoadPat<ro16, load,       i32, v4f16, LDRHroW, LDRHroX, hsub>;
3045defm : ScalToVecROLoadPat<ro16, load,       i32, v8f16, LDRHroW, LDRHroX, hsub>;
3046
3047defm : ScalToVecROLoadPat<ro32, load,       i32, v2i32, LDRSroW, LDRSroX, ssub>;
3048defm : ScalToVecROLoadPat<ro32, load,       i32, v4i32, LDRSroW, LDRSroX, ssub>;
3049
3050defm : ScalToVecROLoadPat<ro32, load,       f32, v2f32, LDRSroW, LDRSroX, ssub>;
3051defm : ScalToVecROLoadPat<ro32, load,       f32, v4f32, LDRSroW, LDRSroX, ssub>;
3052
3053defm : ScalToVecROLoadPat<ro64, load,       i64, v2i64, LDRDroW, LDRDroX, dsub>;
3054
3055defm : ScalToVecROLoadPat<ro64, load,       f64, v2f64, LDRDroW, LDRDroX, dsub>;
3056
3057
3058def : Pat <(v1i64 (scalar_to_vector (i64
3059                      (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
3060                                           ro_Wextend64:$extend))))),
3061           (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
3062
3063def : Pat <(v1i64 (scalar_to_vector (i64
3064                      (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
3065                                           ro_Xextend64:$extend))))),
3066           (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
3067}
3068
3069// Match all load 64 bits width whose type is compatible with FPR64
3070multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy,
3071                        Instruction LOADW, Instruction LOADX> {
3072
3073  def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
3074            (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3075
3076  def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
3077            (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3078}
3079
3080let AddedComplexity = 10 in {
3081let Predicates = [IsLE] in {
3082  // We must do vector loads with LD1 in big-endian.
3083  defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>;
3084  defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
3085  defm : VecROLoadPat<ro64, v8i8,  LDRDroW, LDRDroX>;
3086  defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
3087  defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
3088  defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>;
3089}
3090
3091defm : VecROLoadPat<ro64, v1i64,  LDRDroW, LDRDroX>;
3092defm : VecROLoadPat<ro64, v1f64,  LDRDroW, LDRDroX>;
3093
3094// Match all load 128 bits width whose type is compatible with FPR128
3095let Predicates = [IsLE] in {
3096  // We must do vector loads with LD1 in big-endian.
3097  defm : VecROLoadPat<ro128, v2i64,  LDRQroW, LDRQroX>;
3098  defm : VecROLoadPat<ro128, v2f64,  LDRQroW, LDRQroX>;
3099  defm : VecROLoadPat<ro128, v4i32,  LDRQroW, LDRQroX>;
3100  defm : VecROLoadPat<ro128, v4f32,  LDRQroW, LDRQroX>;
3101  defm : VecROLoadPat<ro128, v8i16,  LDRQroW, LDRQroX>;
3102  defm : VecROLoadPat<ro128, v8f16,  LDRQroW, LDRQroX>;
3103  defm : VecROLoadPat<ro128, v8bf16,  LDRQroW, LDRQroX>;
3104  defm : VecROLoadPat<ro128, v16i8,  LDRQroW, LDRQroX>;
3105}
3106} // AddedComplexity = 10
3107
3108// zextload -> i64
3109multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop,
3110                            Instruction INSTW, Instruction INSTX> {
3111  def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
3112            (SUBREG_TO_REG (i64 0),
3113                           (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
3114                           sub_32)>;
3115
3116  def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
3117            (SUBREG_TO_REG (i64 0),
3118                           (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
3119                           sub_32)>;
3120}
3121
3122let AddedComplexity = 10 in {
3123  defm : ExtLoadTo64ROPat<ro8,  zextloadi8,  LDRBBroW, LDRBBroX>;
3124  defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>;
3125  defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW,  LDRWroX>;
3126
3127  // zextloadi1 -> zextloadi8
3128  defm : ExtLoadTo64ROPat<ro8,  zextloadi1,  LDRBBroW, LDRBBroX>;
3129
3130  // extload -> zextload
3131  defm : ExtLoadTo64ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
3132  defm : ExtLoadTo64ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
3133  defm : ExtLoadTo64ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
3134
3135  // extloadi1 -> zextloadi8
3136  defm : ExtLoadTo64ROPat<ro8,  extloadi1,   LDRBBroW, LDRBBroX>;
3137}
3138
3139
3140// zextload -> i64
3141multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop,
3142                            Instruction INSTW, Instruction INSTX> {
3143  def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
3144            (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3145
3146  def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
3147            (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3148
3149}
3150
3151let AddedComplexity = 10 in {
3152  // extload -> zextload
3153  defm : ExtLoadTo32ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
3154  defm : ExtLoadTo32ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
3155  defm : ExtLoadTo32ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
3156
3157  // zextloadi1 -> zextloadi8
3158  defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
3159}
3160
3161//---
3162// (unsigned immediate)
3163//---
3164defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr",
3165                   [(set GPR64z:$Rt,
3166                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
3167defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr",
3168                   [(set GPR32z:$Rt,
3169                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
3170let Predicates = [HasFPARMv8] in {
3171defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr",
3172                   [(set FPR8Op:$Rt,
3173                         (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
3174defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr",
3175                   [(set (f16 FPR16Op:$Rt),
3176                         (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>;
3177defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr",
3178                   [(set (f32 FPR32Op:$Rt),
3179                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
3180defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr",
3181                   [(set (f64 FPR64Op:$Rt),
3182                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
3183defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr",
3184                 [(set (f128 FPR128Op:$Rt),
3185                       (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>;
3186}
3187
3188// bf16 load pattern
3189def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3190           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
3191
3192// For regular load, we do not have any alignment requirement.
3193// Thus, it is safe to directly map the vector loads with interesting
3194// addressing modes.
3195// FIXME: We could do the same for bitconvert to floating point vectors.
3196def : Pat <(v8i8 (scalar_to_vector (i32
3197               (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
3198           (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
3199                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3200def : Pat <(v16i8 (scalar_to_vector (i32
3201               (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
3202           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3203                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3204def : Pat <(v4i16 (scalar_to_vector (i32
3205               (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
3206           (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
3207                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3208def : Pat <(v8i16 (scalar_to_vector (i32
3209               (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
3210           (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
3211                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3212def : Pat <(v2i32 (scalar_to_vector (i32
3213               (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
3214           (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
3215                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3216def : Pat <(v4i32 (scalar_to_vector (i32
3217               (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
3218           (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
3219                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3220def : Pat <(v1i64 (scalar_to_vector (i64
3221               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
3222           (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3223def : Pat <(v2i64 (scalar_to_vector (i64
3224               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
3225           (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
3226                          (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
3227
3228// Match all load 64 bits width whose type is compatible with FPR64
3229let Predicates = [IsLE] in {
3230  // We must use LD1 to perform vector loads in big-endian.
3231  def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3232            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3233  def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3234            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3235  def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3236            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3237  def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3238            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3239  def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3240            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3241  def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3242            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3243}
3244def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3245          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3246def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3247          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3248
3249// Match all load 128 bits width whose type is compatible with FPR128
3250let Predicates = [IsLE] in {
3251  // We must use LD1 to perform vector loads in big-endian.
3252  def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3253            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3254  def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3255            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3256  def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3257            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3258  def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3259            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3260  def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3261            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3262  def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3263            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3264  def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3265            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3266  def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3267            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3268}
3269def : Pat<(f128  (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3270          (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3271
3272defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh",
3273                    [(set GPR32:$Rt,
3274                          (zextloadi16 (am_indexed16 GPR64sp:$Rn,
3275                                                     uimm12s2:$offset)))]>;
3276defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb",
3277                    [(set GPR32:$Rt,
3278                          (zextloadi8 (am_indexed8 GPR64sp:$Rn,
3279                                                   uimm12s1:$offset)))]>;
3280// zextload -> i64
3281def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3282    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3283def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3284    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
3285
3286// zextloadi1 -> zextloadi8
3287def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3288          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3289def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3290    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3291
3292// extload -> zextload
3293def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3294          (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
3295def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3296          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3297def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3298          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3299def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
3300    (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
3301def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3302    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
3303def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3304    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3305def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3306    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3307
3308// load sign-extended half-word
3309defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh",
3310                     [(set GPR32:$Rt,
3311                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
3312                                                      uimm12s2:$offset)))]>;
3313defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh",
3314                     [(set GPR64:$Rt,
3315                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
3316                                                      uimm12s2:$offset)))]>;
3317
3318// load sign-extended byte
3319defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb",
3320                     [(set GPR32:$Rt,
3321                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
3322                                                    uimm12s1:$offset)))]>;
3323defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb",
3324                     [(set GPR64:$Rt,
3325                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
3326                                                    uimm12s1:$offset)))]>;
3327
3328// load sign-extended word
3329defm LDRSW  : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
3330                     [(set GPR64:$Rt,
3331                           (sextloadi32 (am_indexed32 GPR64sp:$Rn,
3332                                                      uimm12s4:$offset)))]>;
3333
3334// load zero-extended word
3335def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
3336      (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
3337
3338// Pre-fetch.
3339def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
3340                        [(AArch64Prefetch timm:$Rt,
3341                                        (am_indexed64 GPR64sp:$Rn,
3342                                                      uimm12s8:$offset))]>;
3343
3344def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
3345
3346//---
3347// (literal)
3348
3349def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{
3350  if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) {
3351    const DataLayout &DL = MF->getDataLayout();
3352    Align Align = G->getGlobal()->getPointerAlignment(DL);
3353    return Align >= 4 && G->getOffset() % 4 == 0;
3354  }
3355  if (auto *C = dyn_cast<ConstantPoolSDNode>(N))
3356    return C->getAlign() >= 4 && C->getOffset() % 4 == 0;
3357  return false;
3358}]>;
3359
3360def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr",
3361  [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
3362def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr",
3363  [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
3364let Predicates = [HasFPARMv8] in {
3365def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr",
3366  [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3367def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr",
3368  [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3369def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr",
3370  [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3371}
3372
3373// load sign-extended word
3374def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw",
3375  [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>;
3376
3377let AddedComplexity = 20 in {
3378def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))),
3379        (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>;
3380}
3381
3382// prefetch
3383def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
3384//                   [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>;
3385
3386//---
3387// (unscaled immediate)
3388defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur",
3389                    [(set GPR64z:$Rt,
3390                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
3391defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur",
3392                    [(set GPR32z:$Rt,
3393                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3394let Predicates = [HasFPARMv8] in {
3395defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur",
3396                    [(set FPR8Op:$Rt,
3397                          (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3398defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur",
3399                    [(set (f16 FPR16Op:$Rt),
3400                          (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3401defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur",
3402                    [(set (f32 FPR32Op:$Rt),
3403                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3404defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur",
3405                    [(set (f64 FPR64Op:$Rt),
3406                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
3407defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur",
3408                    [(set (f128 FPR128Op:$Rt),
3409                          (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>;
3410}
3411
3412defm LDURHH
3413    : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh",
3414             [(set GPR32:$Rt,
3415                    (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3416defm LDURBB
3417    : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb",
3418             [(set GPR32:$Rt,
3419                    (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3420
3421// bf16 load pattern
3422def : Pat <(bf16 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3423           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
3424
3425// Match all load 64 bits width whose type is compatible with FPR64
3426let Predicates = [IsLE] in {
3427  def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3428            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3429  def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3430            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3431  def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3432            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3433  def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3434            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3435  def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3436            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3437}
3438def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3439          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3440def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3441          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3442
3443// Match all load 128 bits width whose type is compatible with FPR128
3444let Predicates = [IsLE] in {
3445  def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3446            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3447  def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3448            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3449  def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3450            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3451  def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3452            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3453  def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3454            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3455  def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3456            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3457  def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3458            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3459}
3460
3461//  anyext -> zext
3462def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3463          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
3464def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3465          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3466def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3467          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3468def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
3469    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3470def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3471    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3472def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3473    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3474def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3475    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3476// unscaled zext
3477def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3478          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
3479def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3480          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3481def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3482          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3483def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
3484    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3485def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3486    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3487def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3488    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3489def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3490    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3491
3492
3493//---
3494// LDR mnemonics fall back to LDUR for negative or unaligned offsets.
3495
3496// Define new assembler match classes as we want to only match these when
3497// the don't otherwise match the scaled addressing mode for LDR/STR. Don't
3498// associate a DiagnosticType either, as we want the diagnostic for the
3499// canonical form (the scaled operand) to take precedence.
3500class SImm9OffsetOperand<int Width> : AsmOperandClass {
3501  let Name = "SImm9OffsetFB" # Width;
3502  let PredicateMethod = "isSImm9OffsetFB<" # Width # ">";
3503  let RenderMethod = "addImmOperands";
3504}
3505
3506def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>;
3507def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>;
3508def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>;
3509def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>;
3510def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>;
3511
3512def simm9_offset_fb8 : Operand<i64> {
3513  let ParserMatchClass = SImm9OffsetFB8Operand;
3514}
3515def simm9_offset_fb16 : Operand<i64> {
3516  let ParserMatchClass = SImm9OffsetFB16Operand;
3517}
3518def simm9_offset_fb32 : Operand<i64> {
3519  let ParserMatchClass = SImm9OffsetFB32Operand;
3520}
3521def simm9_offset_fb64 : Operand<i64> {
3522  let ParserMatchClass = SImm9OffsetFB64Operand;
3523}
3524def simm9_offset_fb128 : Operand<i64> {
3525  let ParserMatchClass = SImm9OffsetFB128Operand;
3526}
3527
3528def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3529                (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3530def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3531                (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3532def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3533                (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3534def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3535                (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3536def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3537                (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3538def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3539                (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3540def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3541               (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
3542
3543// zextload -> i64
3544def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3545  (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3546def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3547  (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3548
3549// load sign-extended half-word
3550defm LDURSHW
3551    : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh",
3552               [(set GPR32:$Rt,
3553                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3554defm LDURSHX
3555    : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh",
3556              [(set GPR64:$Rt,
3557                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3558
3559// load sign-extended byte
3560defm LDURSBW
3561    : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb",
3562                [(set GPR32:$Rt,
3563                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3564defm LDURSBX
3565    : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb",
3566                [(set GPR64:$Rt,
3567                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3568
3569// load sign-extended word
3570defm LDURSW
3571    : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw",
3572              [(set GPR64:$Rt,
3573                    (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3574
3575// zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
3576def : InstAlias<"ldrb $Rt, [$Rn, $offset]",
3577                (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3578def : InstAlias<"ldrh $Rt, [$Rn, $offset]",
3579                (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3580def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
3581                (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3582def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
3583                (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3584def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
3585                (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3586def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
3587                (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3588def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
3589                (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3590
3591// A LDR will implicitly zero the rest of the vector, so vector_insert(zeros,
3592// load, 0) can use a single load.
3593multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType HVT, ValueType SVT,
3594                                  ValueType ScalarVT, Instruction LoadInst, Instruction UnscaledLoadInst,
3595                                  ComplexPattern Addr, ComplexPattern UnscaledAddr, Operand AddrImm,
3596                                  SubRegIndex SubReg> {
3597  // Scaled
3598  def : Pat <(vector_insert (VT immAllZerosV),
3599                (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3600            (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3601  // Unscaled
3602  def : Pat <(vector_insert (VT immAllZerosV),
3603                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3604             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3605
3606  // Half-vector patterns
3607  def : Pat <(vector_insert (HVT immAllZerosV),
3608                 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3609             (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3610  // Unscaled
3611  def : Pat <(vector_insert (HVT immAllZerosV),
3612                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3613             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3614
3615  // SVE patterns
3616  def : Pat <(vector_insert (SVT immAllZerosV),
3617                 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3618             (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3619  // Unscaled
3620  def : Pat <(vector_insert (SVT immAllZerosV),
3621                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3622             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3623}
3624
3625defm : LoadInsertZeroPatterns<extloadi8,  v16i8,  v8i8,   nxv16i8,  i32,  LDRBui, LDURBi,
3626                              am_indexed8,  am_unscaled8,  uimm12s1, bsub>;
3627defm : LoadInsertZeroPatterns<extloadi16, v8i16,  v4i16,  nxv8i16,  i32,  LDRHui, LDURHi,
3628                              am_indexed16, am_unscaled16, uimm12s2, hsub>;
3629defm : LoadInsertZeroPatterns<load,       v4i32,  v2i32,  nxv4i32,  i32,  LDRSui, LDURSi,
3630                              am_indexed32, am_unscaled32, uimm12s4, ssub>;
3631defm : LoadInsertZeroPatterns<load,       v2i64,  v1i64,  nxv2i64,  i64,  LDRDui, LDURDi,
3632                              am_indexed64, am_unscaled64, uimm12s8, dsub>;
3633defm : LoadInsertZeroPatterns<load,       v8f16,  v4f16,  nxv8f16,  f16,  LDRHui, LDURHi,
3634                              am_indexed16, am_unscaled16, uimm12s2, hsub>;
3635defm : LoadInsertZeroPatterns<load,       v8bf16, v4bf16, nxv8bf16, bf16, LDRHui, LDURHi,
3636                              am_indexed16, am_unscaled16, uimm12s2, hsub>;
3637defm : LoadInsertZeroPatterns<load,       v4f32,  v2f32,  nxv4f32,  f32,  LDRSui, LDURSi,
3638                              am_indexed32, am_unscaled32, uimm12s4, ssub>;
3639defm : LoadInsertZeroPatterns<load,       v2f64,  v1f64,  nxv2f64,  f64,  LDRDui, LDURDi,
3640                              am_indexed64, am_unscaled64, uimm12s8, dsub>;
3641
3642// Pre-fetch.
3643defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
3644                  [(AArch64Prefetch timm:$Rt,
3645                                  (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
3646
3647//---
3648// (unscaled immediate, unprivileged)
3649defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
3650defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
3651
3652defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
3653defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
3654
3655// load sign-extended half-word
3656defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
3657defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
3658
3659// load sign-extended byte
3660defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
3661defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
3662
3663// load sign-extended word
3664defm LDTRSW  : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
3665
3666//---
3667// (immediate pre-indexed)
3668def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">;
3669def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">;
3670let Predicates = [HasFPARMv8] in {
3671def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op,  "ldr">;
3672def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
3673def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
3674def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
3675def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
3676}
3677
3678// load sign-extended half-word
3679def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
3680def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
3681
3682// load sign-extended byte
3683def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
3684def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
3685
3686// load zero-extended byte
3687def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
3688def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
3689
3690// load sign-extended word
3691def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
3692
3693//---
3694// (immediate post-indexed)
3695def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">;
3696def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">;
3697let Predicates = [HasFPARMv8] in {
3698def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op,  "ldr">;
3699def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
3700def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
3701def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
3702def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
3703}
3704
3705// load sign-extended half-word
3706def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
3707def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
3708
3709// load sign-extended byte
3710def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
3711def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
3712
3713// load zero-extended byte
3714def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
3715def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
3716
3717// load sign-extended word
3718def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
3719
3720//===----------------------------------------------------------------------===//
3721// Store instructions.
3722//===----------------------------------------------------------------------===//
3723
3724// Pair (indexed, offset)
3725// FIXME: Use dedicated range-checked addressing mode operand here.
3726defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">;
3727defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">;
3728let Predicates = [HasFPARMv8] in {
3729defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">;
3730defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">;
3731defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">;
3732}
3733
3734// Pair (pre-indexed)
3735def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">;
3736def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">;
3737let Predicates = [HasFPARMv8] in {
3738def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
3739def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
3740def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
3741}
3742
3743// Pair (post-indexed)
3744def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">;
3745def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">;
3746let Predicates = [HasFPARMv8] in {
3747def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
3748def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
3749def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
3750}
3751
3752// Pair (no allocate)
3753defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">;
3754defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">;
3755let Predicates = [HasFPARMv8] in {
3756defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">;
3757defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">;
3758defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
3759}
3760
3761def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
3762          (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>;
3763
3764def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
3765          (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>;
3766
3767
3768//---
3769// (Register offset)
3770
3771// Integer
3772defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>;
3773defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>;
3774defm STRW  : Store32RO<0b10, 0, 0b00, GPR32, "str",  i32, store>;
3775defm STRX  : Store64RO<0b11, 0, 0b00, GPR64, "str",  i64, store>;
3776
3777
3778// Floating-point
3779let Predicates = [HasFPARMv8] in {
3780defm STRB : Store8RO< 0b00,  1, 0b00, FPR8Op,   "str", i8, store>;
3781defm STRH : Store16RO<0b01,  1, 0b00, FPR16Op,  "str", f16,     store>;
3782defm STRS : Store32RO<0b10,  1, 0b00, FPR32Op,  "str", f32,     store>;
3783defm STRD : Store64RO<0b11,  1, 0b00, FPR64Op,  "str", f64,     store>;
3784defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">;
3785}
3786
3787let Predicates = [UseSTRQro], AddedComplexity = 10 in {
3788  def : Pat<(store (f128 FPR128:$Rt),
3789                        (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
3790                                        ro_Wextend128:$extend)),
3791            (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>;
3792  def : Pat<(store (f128 FPR128:$Rt),
3793                        (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
3794                                        ro_Xextend128:$extend)),
3795            (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>;
3796}
3797
3798multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
3799                                 Instruction STRW, Instruction STRX> {
3800
3801  def : Pat<(storeop GPR64:$Rt,
3802                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3803            (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32),
3804                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3805
3806  def : Pat<(storeop GPR64:$Rt,
3807                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3808            (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32),
3809                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3810}
3811
3812let AddedComplexity = 10 in {
3813  // truncstore i64
3814  defm : TruncStoreFrom64ROPat<ro8,  truncstorei8,  STRBBroW, STRBBroX>;
3815  defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>;
3816  defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW,  STRWroX>;
3817}
3818
3819multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR,
3820                         Instruction STRW, Instruction STRX> {
3821  def : Pat<(store (VecTy FPR:$Rt),
3822                   (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3823            (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3824
3825  def : Pat<(store (VecTy FPR:$Rt),
3826                   (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3827            (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3828}
3829
3830let AddedComplexity = 10 in {
3831// Match all store 64 bits width whose type is compatible with FPR64
3832let Predicates = [IsLE] in {
3833  // We must use ST1 to store vectors in big-endian.
3834  defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>;
3835  defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
3836  defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
3837  defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
3838  defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
3839  defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>;
3840}
3841
3842defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
3843defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
3844
3845// Match all store 128 bits width whose type is compatible with FPR128
3846let Predicates = [IsLE, UseSTRQro] in {
3847  // We must use ST1 to store vectors in big-endian.
3848  defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
3849  defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
3850  defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>;
3851  defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
3852  defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
3853  defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
3854  defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
3855  defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>;
3856}
3857} // AddedComplexity = 10
3858
3859// Match stores from lane 0 to the appropriate subreg's store.
3860multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
3861                              ValueType VecTy, ValueType STy,
3862                              ValueType SubRegTy,
3863                              SubRegIndex SubRegIdx,
3864                              Instruction STRW, Instruction STRX> {
3865
3866  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))),
3867                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3868            (STRW (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
3869                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3870
3871  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))),
3872                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3873            (STRX (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
3874                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3875}
3876
3877let AddedComplexity = 19 in {
3878  defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, f16, hsub, STRHroW, STRHroX>;
3879  defm : VecROStoreLane0Pat<ro16,         store, v8f16, f16, f16, hsub, STRHroW, STRHroX>;
3880  defm : VecROStoreLane0Pat<ro32,         store, v4i32, i32, i32, ssub, STRSroW, STRSroX>;
3881  defm : VecROStoreLane0Pat<ro32,         store, v4f32, f32, i32, ssub, STRSroW, STRSroX>;
3882  defm : VecROStoreLane0Pat<ro64,         store, v2i64, i64, i64, dsub, STRDroW, STRDroX>;
3883  defm : VecROStoreLane0Pat<ro64,         store, v2f64, f64, i64, dsub, STRDroW, STRDroX>;
3884}
3885
3886//---
3887// (unsigned immediate)
3888defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str",
3889                   [(store GPR64z:$Rt,
3890                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
3891defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str",
3892                    [(store GPR32z:$Rt,
3893                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
3894let Predicates = [HasFPARMv8] in {
3895defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str",
3896                    [(store FPR8Op:$Rt,
3897                            (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
3898defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str",
3899                    [(store (f16 FPR16Op:$Rt),
3900                            (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>;
3901defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str",
3902                    [(store (f32 FPR32Op:$Rt),
3903                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
3904defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str",
3905                    [(store (f64 FPR64Op:$Rt),
3906                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
3907defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>;
3908}
3909
3910defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh",
3911                     [(truncstorei16 GPR32z:$Rt,
3912                                     (am_indexed16 GPR64sp:$Rn,
3913                                                   uimm12s2:$offset))]>;
3914defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1,  "strb",
3915                     [(truncstorei8 GPR32z:$Rt,
3916                                    (am_indexed8 GPR64sp:$Rn,
3917                                                 uimm12s1:$offset))]>;
3918
3919// bf16 store pattern
3920def : Pat<(store (bf16 FPR16Op:$Rt),
3921                 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
3922          (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>;
3923
3924let AddedComplexity = 10 in {
3925
3926// Match all store 64 bits width whose type is compatible with FPR64
3927def : Pat<(store (v1i64 FPR64:$Rt),
3928                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3929          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3930def : Pat<(store (v1f64 FPR64:$Rt),
3931                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3932          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3933
3934let Predicates = [IsLE] in {
3935  // We must use ST1 to store vectors in big-endian.
3936  def : Pat<(store (v2f32 FPR64:$Rt),
3937                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3938            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3939  def : Pat<(store (v8i8 FPR64:$Rt),
3940                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3941            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3942  def : Pat<(store (v4i16 FPR64:$Rt),
3943                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3944            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3945  def : Pat<(store (v2i32 FPR64:$Rt),
3946                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3947            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3948  def : Pat<(store (v4f16 FPR64:$Rt),
3949                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3950            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3951  def : Pat<(store (v4bf16 FPR64:$Rt),
3952                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3953            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3954}
3955
3956// Match all store 128 bits width whose type is compatible with FPR128
3957def : Pat<(store (f128  FPR128:$Rt),
3958                 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3959          (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3960
3961let Predicates = [IsLE] in {
3962  // We must use ST1 to store vectors in big-endian.
3963  def : Pat<(store (v4f32 FPR128:$Rt),
3964                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3965            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3966  def : Pat<(store (v2f64 FPR128:$Rt),
3967                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3968            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3969  def : Pat<(store (v16i8 FPR128:$Rt),
3970                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3971            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3972  def : Pat<(store (v8i16 FPR128:$Rt),
3973                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3974            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3975  def : Pat<(store (v4i32 FPR128:$Rt),
3976                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3977            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3978  def : Pat<(store (v2i64 FPR128:$Rt),
3979                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3980            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3981  def : Pat<(store (v8f16 FPR128:$Rt),
3982                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3983            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3984  def : Pat<(store (v8bf16 FPR128:$Rt),
3985                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3986            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3987}
3988
3989// truncstore i64
3990def : Pat<(truncstorei32 GPR64:$Rt,
3991                         (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
3992  (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>;
3993def : Pat<(truncstorei16 GPR64:$Rt,
3994                         (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
3995  (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>;
3996def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
3997  (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>;
3998
3999} // AddedComplexity = 10
4000
4001// Match stores from lane 0 to the appropriate subreg's store.
4002multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
4003                            ValueType VTy, ValueType STy,
4004                            ValueType SubRegTy,
4005                            SubRegIndex SubRegIdx, Operand IndexType,
4006                            Instruction STR> {
4007  def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), (i64 0))),
4008                     (UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
4009            (STR (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
4010                 GPR64sp:$Rn, IndexType:$offset)>;
4011}
4012
4013let AddedComplexity = 19 in {
4014  defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, f16, hsub, uimm12s2, STRHui>;
4015  defm : VecStoreLane0Pat<am_indexed16,         store, v8f16, f16, f16, hsub, uimm12s2, STRHui>;
4016  defm : VecStoreLane0Pat<am_indexed32,         store, v4i32, i32, i32, ssub, uimm12s4, STRSui>;
4017  defm : VecStoreLane0Pat<am_indexed32,         store, v4f32, f32, i32, ssub, uimm12s4, STRSui>;
4018  defm : VecStoreLane0Pat<am_indexed64,         store, v2i64, i64, i64, dsub, uimm12s8, STRDui>;
4019  defm : VecStoreLane0Pat<am_indexed64,         store, v2f64, f64, i64, dsub, uimm12s8, STRDui>;
4020}
4021
4022//---
4023// (unscaled immediate)
4024defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur",
4025                         [(store GPR64z:$Rt,
4026                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
4027defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur",
4028                         [(store GPR32z:$Rt,
4029                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
4030let Predicates = [HasFPARMv8] in {
4031defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur",
4032                         [(store FPR8Op:$Rt,
4033                                 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
4034defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur",
4035                         [(store (f16 FPR16Op:$Rt),
4036                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
4037defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur",
4038                         [(store (f32 FPR32Op:$Rt),
4039                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
4040defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur",
4041                         [(store (f64 FPR64Op:$Rt),
4042                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
4043defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur",
4044                         [(store (f128 FPR128Op:$Rt),
4045                                 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>;
4046}
4047defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh",
4048                         [(truncstorei16 GPR32z:$Rt,
4049                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
4050defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb",
4051                         [(truncstorei8 GPR32z:$Rt,
4052                                  (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
4053
4054// bf16 store pattern
4055def : Pat<(store (bf16 FPR16Op:$Rt),
4056                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
4057          (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4058
4059// Armv8.4 Weaker Release Consistency enhancements
4060//         LDAPR & STLR with Immediate Offset instructions
4061let Predicates = [HasRCPC_IMMO] in {
4062defm STLURB     : BaseStoreUnscaleV84<"stlurb",  0b00, 0b00, GPR32>;
4063defm STLURH     : BaseStoreUnscaleV84<"stlurh",  0b01, 0b00, GPR32>;
4064defm STLURW     : BaseStoreUnscaleV84<"stlur",   0b10, 0b00, GPR32>;
4065defm STLURX     : BaseStoreUnscaleV84<"stlur",   0b11, 0b00, GPR64>;
4066defm LDAPURB    : BaseLoadUnscaleV84<"ldapurb",  0b00, 0b01, GPR32>;
4067defm LDAPURSBW  : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>;
4068defm LDAPURSBX  : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>;
4069defm LDAPURH    : BaseLoadUnscaleV84<"ldapurh",  0b01, 0b01, GPR32>;
4070defm LDAPURSHW  : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>;
4071defm LDAPURSHX  : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>;
4072defm LDAPUR     : BaseLoadUnscaleV84<"ldapur",   0b10, 0b01, GPR32>;
4073defm LDAPURSW   : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>;
4074defm LDAPURX    : BaseLoadUnscaleV84<"ldapur",   0b11, 0b01, GPR64>;
4075}
4076
4077// Match all store 64 bits width whose type is compatible with FPR64
4078def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4079          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4080def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4081          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4082
4083let AddedComplexity = 10 in {
4084
4085let Predicates = [IsLE] in {
4086  // We must use ST1 to store vectors in big-endian.
4087  def : Pat<(store (v2f32 FPR64:$Rt),
4088                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4089            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4090  def : Pat<(store (v8i8 FPR64:$Rt),
4091                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4092            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4093  def : Pat<(store (v4i16 FPR64:$Rt),
4094                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4095            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4096  def : Pat<(store (v2i32 FPR64:$Rt),
4097                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4098            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4099  def : Pat<(store (v4f16 FPR64:$Rt),
4100                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4101            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4102  def : Pat<(store (v4bf16 FPR64:$Rt),
4103                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4104            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4105}
4106
4107// Match all store 128 bits width whose type is compatible with FPR128
4108def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4109          (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4110
4111let Predicates = [IsLE] in {
4112  // We must use ST1 to store vectors in big-endian.
4113  def : Pat<(store (v4f32 FPR128:$Rt),
4114                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4115            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4116  def : Pat<(store (v2f64 FPR128:$Rt),
4117                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4118            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4119  def : Pat<(store (v16i8 FPR128:$Rt),
4120                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4121            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4122  def : Pat<(store (v8i16 FPR128:$Rt),
4123                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4124            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4125  def : Pat<(store (v4i32 FPR128:$Rt),
4126                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4127            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4128  def : Pat<(store (v2i64 FPR128:$Rt),
4129                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4130            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4131  def : Pat<(store (v2f64 FPR128:$Rt),
4132                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4133            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4134  def : Pat<(store (v8f16 FPR128:$Rt),
4135                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4136            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4137  def : Pat<(store (v8bf16 FPR128:$Rt),
4138                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4139            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4140}
4141
4142} // AddedComplexity = 10
4143
4144// unscaled i64 truncating stores
4145def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
4146  (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
4147def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
4148  (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
4149def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
4150  (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
4151
4152// Match stores from lane 0 to the appropriate subreg's store.
4153multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
4154                             ValueType VTy, ValueType STy,
4155                             ValueType SubRegTy,
4156                             SubRegIndex SubRegIdx, Instruction STR> {
4157  defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegTy, SubRegIdx, simm9, STR>;
4158}
4159
4160let AddedComplexity = 19 in {
4161  defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, f16, hsub, STURHi>;
4162  defm : VecStoreULane0Pat<store,         v8f16, f16, f16, hsub, STURHi>;
4163  defm : VecStoreULane0Pat<store,         v4i32, i32, i32, ssub, STURSi>;
4164  defm : VecStoreULane0Pat<store,         v4f32, f32, i32, ssub, STURSi>;
4165  defm : VecStoreULane0Pat<store,         v2i64, i64, i64, dsub, STURDi>;
4166  defm : VecStoreULane0Pat<store,         v2f64, f64, i64, dsub, STURDi>;
4167}
4168
4169//---
4170// STR mnemonics fall back to STUR for negative or unaligned offsets.
4171def : InstAlias<"str $Rt, [$Rn, $offset]",
4172                (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
4173def : InstAlias<"str $Rt, [$Rn, $offset]",
4174                (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
4175def : InstAlias<"str $Rt, [$Rn, $offset]",
4176                (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
4177def : InstAlias<"str $Rt, [$Rn, $offset]",
4178                (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
4179def : InstAlias<"str $Rt, [$Rn, $offset]",
4180                (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
4181def : InstAlias<"str $Rt, [$Rn, $offset]",
4182                (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
4183def : InstAlias<"str $Rt, [$Rn, $offset]",
4184                (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
4185
4186def : InstAlias<"strb $Rt, [$Rn, $offset]",
4187                (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
4188def : InstAlias<"strh $Rt, [$Rn, $offset]",
4189                (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
4190
4191//---
4192// (unscaled immediate, unprivileged)
4193defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">;
4194defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">;
4195
4196defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">;
4197defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
4198
4199//---
4200// (immediate pre-indexed)
4201def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str",  pre_store, i32>;
4202def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str",  pre_store, i64>;
4203let Predicates = [HasFPARMv8] in {
4204def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op,  "str",  pre_store, i8>;
4205def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str",  pre_store, f16>;
4206def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str",  pre_store, f32>;
4207def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str",  pre_store, f64>;
4208def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>;
4209}
4210
4211def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8,  i32>;
4212def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>;
4213
4214// truncstore i64
4215def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4216  (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4217           simm9:$off)>;
4218def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4219  (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4220            simm9:$off)>;
4221def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4222  (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4223            simm9:$off)>;
4224
4225def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4226          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4227def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4228          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4229def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4230          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4231def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4232          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4233def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4234          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4235def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4236          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4237def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4238          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4239
4240def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4241          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4242def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4243          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4244def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4245          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4246def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4247          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4248def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4249          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4250def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4251          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4252def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4253          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4254
4255//---
4256// (immediate post-indexed)
4257def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z,  "str", post_store, i32>;
4258def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z,  "str", post_store, i64>;
4259let Predicates = [HasFPARMv8] in {
4260def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op,   "str", post_store, i8>;
4261def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op,  "str", post_store, f16>;
4262def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op,  "str", post_store, f32>;
4263def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op,  "str", post_store, f64>;
4264def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>;
4265}
4266
4267def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>;
4268def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>;
4269
4270// truncstore i64
4271def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4272  (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4273            simm9:$off)>;
4274def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4275  (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4276             simm9:$off)>;
4277def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4278  (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4279             simm9:$off)>;
4280
4281def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off),
4282          (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>;
4283
4284def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4285          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4286def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4287          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4288def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4289          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4290def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4291          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4292def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4293          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4294def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4295          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4296def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4297          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4298def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4299          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4300
4301def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4302          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4303def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4304          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4305def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4306          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4307def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4308          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4309def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4310          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4311def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4312          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4313def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4314          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4315def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4316          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4317
4318//===----------------------------------------------------------------------===//
4319// Load/store exclusive instructions.
4320//===----------------------------------------------------------------------===//
4321
4322def LDARW  : LoadAcquire   <0b10, 1, 1, 0, 1, GPR32, "ldar">;
4323def LDARX  : LoadAcquire   <0b11, 1, 1, 0, 1, GPR64, "ldar">;
4324def LDARB  : LoadAcquire   <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
4325def LDARH  : LoadAcquire   <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
4326
4327def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
4328def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
4329def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
4330def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
4331
4332def LDXRW  : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
4333def LDXRX  : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
4334def LDXRB  : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
4335def LDXRH  : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
4336
4337def STLRW  : StoreRelease  <0b10, 1, 0, 0, 1, GPR32, "stlr">;
4338def STLRX  : StoreRelease  <0b11, 1, 0, 0, 1, GPR64, "stlr">;
4339def STLRB  : StoreRelease  <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
4340def STLRH  : StoreRelease  <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
4341
4342/*
4343Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn
4344of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an
4345alias for the case of immediate #0. This is because new STLR versions (from
4346LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not
4347appropriate anymore (it parses and discards the optional zero). This is not the
4348case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed,
4349and the immediate values are not inside the [] brackets and thus not accepted
4350by GPR64sp0 parser.
4351*/
4352def STLRW0  : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRW   GPR32: $Rt, GPR64sp:$Rn)>;
4353def STLRX0  : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRX   GPR64: $Rt, GPR64sp:$Rn)>;
4354def STLRB0  : InstAlias<"stlrb\t$Rt, [$Rn, #0]", (STLRB   GPR32: $Rt, GPR64sp:$Rn)>;
4355def STLRH0  : InstAlias<"stlrh\t$Rt, [$Rn, #0]", (STLRH   GPR32: $Rt, GPR64sp:$Rn)>;
4356
4357def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
4358def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
4359def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
4360def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
4361
4362def STXRW  : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">;
4363def STXRX  : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">;
4364def STXRB  : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">;
4365def STXRH  : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">;
4366
4367def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
4368def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
4369
4370def LDXPW  : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
4371def LDXPX  : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
4372
4373def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
4374def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
4375
4376def STXPW  : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
4377def STXPX  : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
4378
4379let Predicates = [HasLOR] in {
4380  // v8.1a "Limited Order Region" extension load-acquire instructions
4381  def LDLARW  : LoadAcquire   <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
4382  def LDLARX  : LoadAcquire   <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
4383  def LDLARB  : LoadAcquire   <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
4384  def LDLARH  : LoadAcquire   <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
4385
4386  // v8.1a "Limited Order Region" extension store-release instructions
4387  def STLLRW  : StoreRelease   <0b10, 1, 0, 0, 0, GPR32, "stllr">;
4388  def STLLRX  : StoreRelease   <0b11, 1, 0, 0, 0, GPR64, "stllr">;
4389  def STLLRB  : StoreRelease   <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
4390  def STLLRH  : StoreRelease   <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
4391
4392  // Aliases for when offset=0
4393  def STLLRW0 : InstAlias<"stllr\t$Rt,  [$Rn, #0]",  (STLLRW   GPR32: $Rt, GPR64sp:$Rn)>;
4394  def STLLRX0 : InstAlias<"stllr\t$Rt,  [$Rn, #0]",  (STLLRX   GPR64: $Rt, GPR64sp:$Rn)>;
4395  def STLLRB0 : InstAlias<"stllrb\t$Rt, [$Rn, #0]",  (STLLRB   GPR32: $Rt, GPR64sp:$Rn)>;
4396  def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]",  (STLLRH   GPR32: $Rt, GPR64sp:$Rn)>;
4397}
4398
4399//===----------------------------------------------------------------------===//
4400// Scaled floating point to integer conversion instructions.
4401//===----------------------------------------------------------------------===//
4402
4403defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>;
4404defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>;
4405defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>;
4406defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>;
4407defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>;
4408defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
4409defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
4410defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
4411defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
4412defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
4413defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
4414defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
4415
4416// AArch64's FCVT instructions saturate when out of range.
4417multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
4418  let Predicates = [HasFullFP16] in {
4419  def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
4420            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
4421  def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
4422            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
4423  }
4424  def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
4425            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4426  def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
4427            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4428  def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
4429            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4430  def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
4431            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4432
4433  let Predicates = [HasFullFP16] in {
4434  def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
4435            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4436  def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
4437            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4438  }
4439  def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
4440            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
4441  def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
4442            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
4443  def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
4444            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
4445  def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
4446            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
4447}
4448
4449defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
4450defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;
4451
4452multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
4453  let Predicates = [HasFullFP16] in {
4454  def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
4455  def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
4456  }
4457  def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
4458  def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
4459  def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
4460  def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
4461
4462  let Predicates = [HasFullFP16] in {
4463  def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
4464            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4465  def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
4466            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4467  }
4468  def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
4469            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
4470  def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
4471            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
4472  def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
4473            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
4474  def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
4475            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
4476}
4477
4478defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
4479defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
4480
4481multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
4482  def : Pat<(i32 (to_int (round f32:$Rn))),
4483            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4484  def : Pat<(i64 (to_int (round f32:$Rn))),
4485            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4486  def : Pat<(i32 (to_int (round f64:$Rn))),
4487            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4488  def : Pat<(i64 (to_int (round f64:$Rn))),
4489            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4490
4491  // These instructions saturate like fp_to_[su]int_sat.
4492  let Predicates = [HasFullFP16] in {
4493  def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
4494            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
4495  def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
4496            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
4497  }
4498  def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
4499            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4500  def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
4501            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4502  def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
4503            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4504  def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
4505            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4506}
4507
4508defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil,  "FCVTPS">;
4509defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil,  "FCVTPU">;
4510defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
4511defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
4512defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
4513defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
4514defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
4515defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
4516
4517
4518
4519let Predicates = [HasFullFP16] in {
4520  def : Pat<(i32 (any_lround f16:$Rn)),
4521            (FCVTASUWHr f16:$Rn)>;
4522  def : Pat<(i64 (any_lround f16:$Rn)),
4523            (FCVTASUXHr f16:$Rn)>;
4524  def : Pat<(i64 (any_llround f16:$Rn)),
4525            (FCVTASUXHr f16:$Rn)>;
4526}
4527def : Pat<(i32 (any_lround f32:$Rn)),
4528          (FCVTASUWSr f32:$Rn)>;
4529def : Pat<(i32 (any_lround f64:$Rn)),
4530          (FCVTASUWDr f64:$Rn)>;
4531def : Pat<(i64 (any_lround f32:$Rn)),
4532          (FCVTASUXSr f32:$Rn)>;
4533def : Pat<(i64 (any_lround f64:$Rn)),
4534          (FCVTASUXDr f64:$Rn)>;
4535def : Pat<(i64 (any_llround f32:$Rn)),
4536          (FCVTASUXSr f32:$Rn)>;
4537def : Pat<(i64 (any_llround f64:$Rn)),
4538          (FCVTASUXDr f64:$Rn)>;
4539
4540//===----------------------------------------------------------------------===//
4541// Scaled integer to floating point conversion instructions.
4542//===----------------------------------------------------------------------===//
4543
4544defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
4545defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
4546
4547def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
4548          (SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
4549def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)),
4550          (SCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>;
4551def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)),
4552          (SCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>;
4553
4554def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)),
4555          (SCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>;
4556def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)),
4557          (SCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>;
4558def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)),
4559          (SCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>;
4560
4561def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)),
4562          (UCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>;
4563def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)),
4564          (UCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>;
4565def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)),
4566          (UCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>;
4567
4568def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
4569          (UCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
4570def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)),
4571          (UCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>;
4572def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)),
4573          (UCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>;
4574
4575//===----------------------------------------------------------------------===//
4576// Unscaled integer to floating point conversion instruction.
4577//===----------------------------------------------------------------------===//
4578
4579defm FMOV : UnscaledConversion<"fmov">;
4580
4581// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
4582let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1,
4583    Predicates = [HasFPARMv8] in {
4584def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
4585    Sched<[WriteF]>;
4586def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
4587    Sched<[WriteF]>;
4588def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
4589    Sched<[WriteF]>;
4590}
4591
4592// Similarly add aliases
4593def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
4594    Requires<[HasFullFP16]>;
4595def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
4596def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
4597
4598def : Pat<(bf16 fpimm0),
4599          (FMOVH0)>;
4600
4601// Pattern for FP16 and BF16 immediates
4602let Predicates = [HasFullFP16] in {
4603  def : Pat<(f16 fpimm:$in),
4604            (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>;
4605
4606  def : Pat<(bf16 fpimm:$in),
4607            (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 bf16:$in)))>;
4608}
4609
4610//===----------------------------------------------------------------------===//
4611// Floating point conversion instruction.
4612//===----------------------------------------------------------------------===//
4613
4614defm FCVT : FPConversion<"fcvt">;
4615
4616//===----------------------------------------------------------------------===//
4617// Floating point single operand instructions.
4618//===----------------------------------------------------------------------===//
4619
4620defm FABS   : SingleOperandFPDataNoException<0b0001, "fabs", fabs>;
4621defm FMOV   : SingleOperandFPDataNoException<0b0000, "fmov">;
4622defm FNEG   : SingleOperandFPDataNoException<0b0010, "fneg", fneg>;
4623defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>;
4624defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>;
4625defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>;
4626defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>;
4627defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>;
4628
4629defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>;
4630defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>;
4631
4632let SchedRW = [WriteFDiv] in {
4633defm FSQRT  : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>;
4634}
4635
4636let Predicates = [HasFRInt3264] in {
4637  defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>;
4638  defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>;
4639  defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>;
4640  defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>;
4641} // HasFRInt3264
4642
4643// Pattern to convert 1x64 vector intrinsics to equivalent scalar instructions
4644def : Pat<(v1f64 (int_aarch64_neon_frint32z (v1f64 FPR64:$Rn))),
4645          (FRINT32ZDr FPR64:$Rn)>;
4646def : Pat<(v1f64 (int_aarch64_neon_frint64z (v1f64 FPR64:$Rn))),
4647          (FRINT64ZDr FPR64:$Rn)>;
4648def : Pat<(v1f64 (int_aarch64_neon_frint32x (v1f64 FPR64:$Rn))),
4649          (FRINT32XDr FPR64:$Rn)>;
4650def : Pat<(v1f64 (int_aarch64_neon_frint64x (v1f64 FPR64:$Rn))),
4651          (FRINT64XDr FPR64:$Rn)>;
4652
4653// Emitting strict_lrint as two instructions is valid as any exceptions that
4654// occur will happen in exactly one of the instructions (e.g. if the input is
4655// not an integer the inexact exception will happen in the FRINTX but not then
4656// in the FCVTZS as the output of FRINTX is an integer).
4657let Predicates = [HasFullFP16] in {
4658  def : Pat<(i32 (any_lrint f16:$Rn)),
4659            (FCVTZSUWHr (FRINTXHr f16:$Rn))>;
4660  def : Pat<(i64 (any_lrint f16:$Rn)),
4661            (FCVTZSUXHr (FRINTXHr f16:$Rn))>;
4662  def : Pat<(i64 (any_llrint f16:$Rn)),
4663            (FCVTZSUXHr (FRINTXHr f16:$Rn))>;
4664}
4665def : Pat<(i32 (any_lrint f32:$Rn)),
4666          (FCVTZSUWSr (FRINTXSr f32:$Rn))>;
4667def : Pat<(i32 (any_lrint f64:$Rn)),
4668          (FCVTZSUWDr (FRINTXDr f64:$Rn))>;
4669def : Pat<(i64 (any_lrint f32:$Rn)),
4670          (FCVTZSUXSr (FRINTXSr f32:$Rn))>;
4671def : Pat<(i64 (any_lrint f64:$Rn)),
4672          (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
4673def : Pat<(i64 (any_llrint f32:$Rn)),
4674          (FCVTZSUXSr (FRINTXSr f32:$Rn))>;
4675def : Pat<(i64 (any_llrint f64:$Rn)),
4676          (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
4677
4678//===----------------------------------------------------------------------===//
4679// Floating point two operand instructions.
4680//===----------------------------------------------------------------------===//
4681
4682defm FADD   : TwoOperandFPData<0b0010, "fadd", any_fadd>;
4683let SchedRW = [WriteFDiv] in {
4684defm FDIV   : TwoOperandFPData<0b0001, "fdiv", any_fdiv>;
4685}
4686defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>;
4687defm FMAX   : TwoOperandFPData<0b0100, "fmax", any_fmaximum>;
4688defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>;
4689defm FMIN   : TwoOperandFPData<0b0101, "fmin", any_fminimum>;
4690let SchedRW = [WriteFMul] in {
4691defm FMUL   : TwoOperandFPData<0b0000, "fmul", any_fmul>;
4692defm FNMUL  : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>;
4693}
4694defm FSUB   : TwoOperandFPData<0b0011, "fsub", any_fsub>;
4695
4696multiclass FMULScalarFromIndexedLane0Patterns<string inst,
4697                                              string inst_f16_suffix,
4698                                              string inst_f32_suffix,
4699                                              string inst_f64_suffix,
4700                                              SDPatternOperator OpNode,
4701                                              list<Predicate> preds = []> {
4702  let Predicates = !listconcat(preds, [HasFullFP16]) in {
4703  def : Pat<(f16 (OpNode (f16 FPR16:$Rn),
4704                         (f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))),
4705            (!cast<Instruction>(inst # inst_f16_suffix)
4706              FPR16:$Rn, (f16 (EXTRACT_SUBREG V128:$Rm, hsub)))>;
4707  }
4708  let Predicates = preds in {
4709  def : Pat<(f32 (OpNode (f32 FPR32:$Rn),
4710                         (f32 (vector_extract (v4f32 V128:$Rm), (i64 0))))),
4711            (!cast<Instruction>(inst # inst_f32_suffix)
4712              FPR32:$Rn, (EXTRACT_SUBREG V128:$Rm, ssub))>;
4713  def : Pat<(f64 (OpNode (f64 FPR64:$Rn),
4714                         (f64 (vector_extract (v2f64 V128:$Rm), (i64 0))))),
4715            (!cast<Instruction>(inst # inst_f64_suffix)
4716              FPR64:$Rn, (EXTRACT_SUBREG V128:$Rm, dsub))>;
4717  }
4718}
4719
4720defm : FMULScalarFromIndexedLane0Patterns<"FMUL", "Hrr", "Srr", "Drr",
4721                                          any_fmul>;
4722
4723// Match reassociated forms of FNMUL.
4724def : Pat<(fmul (fneg FPR16:$a), (f16 FPR16:$b)),
4725          (FNMULHrr FPR16:$a, FPR16:$b)>,
4726          Requires<[HasFullFP16]>;
4727def : Pat<(fmul (fneg FPR32:$a), (f32 FPR32:$b)),
4728          (FNMULSrr FPR32:$a, FPR32:$b)>;
4729def : Pat<(fmul (fneg FPR64:$a), (f64 FPR64:$b)),
4730          (FNMULDrr FPR64:$a, FPR64:$b)>;
4731
4732def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4733          (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
4734def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4735          (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
4736def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4737          (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
4738def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4739          (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
4740
4741//===----------------------------------------------------------------------===//
4742// Floating point three operand instructions.
4743//===----------------------------------------------------------------------===//
4744
4745defm FMADD  : ThreeOperandFPData<0, 0, "fmadd", any_fma>;
4746defm FMSUB  : ThreeOperandFPData<0, 1, "fmsub",
4747     TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
4748defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
4749     TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >;
4750defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
4751     TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
4752
4753// The following def pats catch the case where the LHS of an FMA is negated.
4754// The TriOpFrag above catches the case where the middle operand is negated.
4755
4756// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
4757// the NEON variant.
4758
4759// Here we handle first -(a + b*c) for FNMADD:
4760
4761let Predicates = [HasNEON, HasFullFP16] in
4762def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)),
4763          (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
4764
4765def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
4766          (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
4767
4768def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
4769          (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
4770
4771// Now it's time for "(-a) + (-b)*c"
4772
4773let Predicates = [HasNEON, HasFullFP16] in
4774def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))),
4775          (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
4776
4777def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
4778          (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
4779
4780def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
4781          (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
4782
4783//===----------------------------------------------------------------------===//
4784// Floating point comparison instructions.
4785//===----------------------------------------------------------------------===//
4786
4787defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>;
4788defm FCMP  : FPComparison<0, "fcmp", AArch64any_fcmp>;
4789
4790//===----------------------------------------------------------------------===//
4791// Floating point conditional comparison instructions.
4792//===----------------------------------------------------------------------===//
4793
4794defm FCCMPE : FPCondComparison<1, "fccmpe">;
4795defm FCCMP  : FPCondComparison<0, "fccmp", AArch64fccmp>;
4796
4797//===----------------------------------------------------------------------===//
4798// Floating point conditional select instruction.
4799//===----------------------------------------------------------------------===//
4800
4801defm FCSEL : FPCondSelect<"fcsel">;
4802
4803let Predicates = [HasFullFP16] in
4804def : Pat<(bf16 (AArch64csel (bf16 FPR16:$Rn), (bf16 FPR16:$Rm), (i32 imm:$cond), NZCV)),
4805          (FCSELHrrr FPR16:$Rn, FPR16:$Rm, imm:$cond)>;
4806
4807// CSEL instructions providing f128 types need to be handled by a
4808// pseudo-instruction since the eventual code will need to introduce basic
4809// blocks and control flow.
4810let Predicates = [HasFPARMv8] in
4811def F128CSEL : Pseudo<(outs FPR128:$Rd),
4812                      (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
4813                      [(set (f128 FPR128:$Rd),
4814                            (AArch64csel FPR128:$Rn, FPR128:$Rm,
4815                                       (i32 imm:$cond), NZCV))]> {
4816  let Uses = [NZCV];
4817  let usesCustomInserter = 1;
4818  let hasNoSchedulingInfo = 1;
4819}
4820
4821//===----------------------------------------------------------------------===//
4822// Instructions used for emitting unwind opcodes on ARM64 Windows.
4823//===----------------------------------------------------------------------===//
4824let isPseudo = 1 in {
4825  def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>;
4826  def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4827  def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4828  def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4829  def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4830  def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4831  def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4832  def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4833  def SEH_SaveFReg_X :  Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4834  def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4835  def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4836  def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>;
4837  def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4838  def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>;
4839  def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
4840  def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>;
4841  def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
4842  def SEH_PACSignLR : Pseudo<(outs), (ins), []>, Sched<[]>;
4843  def SEH_SaveAnyRegQP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4844  def SEH_SaveAnyRegQPX : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4845}
4846
4847// Pseudo instructions for Windows EH
4848//===----------------------------------------------------------------------===//
4849let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
4850    isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in {
4851   def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>;
4852   let usesCustomInserter = 1 in
4853     def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>,
4854                    Sched<[]>;
4855}
4856
4857// Pseudo instructions for homogeneous prolog/epilog
4858let isPseudo = 1 in {
4859  // Save CSRs in order, {FPOffset}
4860  def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
4861  // Restore CSRs in order
4862  def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
4863}
4864
4865//===----------------------------------------------------------------------===//
4866// Floating point immediate move.
4867//===----------------------------------------------------------------------===//
4868
4869let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
4870defm FMOV : FPMoveImmediate<"fmov">;
4871}
4872
4873let Predicates = [HasFullFP16] in {
4874  def : Pat<(bf16 fpimmbf16:$in),
4875            (FMOVHi (fpimm16XForm bf16:$in))>;
4876}
4877
4878//===----------------------------------------------------------------------===//
4879// Advanced SIMD two vector instructions.
4880//===----------------------------------------------------------------------===//
4881
4882defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
4883                                          AArch64uabd>;
4884// Match UABDL in log2-shuffle patterns.
4885def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
4886                           (zext (v8i8 V64:$opB))))),
4887          (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
4888def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
4889               (v8i16 (add (sub (zext (v8i8 V64:$opA)),
4890                                (zext (v8i8 V64:$opB))),
4891                           (AArch64vashr v8i16:$src, (i32 15))))),
4892          (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
4893def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))),
4894                           (zext (extract_high_v16i8 (v16i8 V128:$opB)))))),
4895          (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
4896def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
4897               (v8i16 (add (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))),
4898                                (zext (extract_high_v16i8 (v16i8 V128:$opB)))),
4899                           (AArch64vashr v8i16:$src, (i32 15))))),
4900          (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
4901def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)),
4902                           (zext (v4i16 V64:$opB))))),
4903          (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
4904def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))),
4905                           (zext (extract_high_v8i16 (v8i16 V128:$opB)))))),
4906          (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
4907def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)),
4908                           (zext (v2i32 V64:$opB))))),
4909          (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
4910def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))),
4911                           (zext (extract_high_v4i32 (v4i32 V128:$opB)))))),
4912          (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
4913
4914defm ABS    : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>;
4915defm CLS    : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
4916defm CLZ    : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
4917defm CMEQ   : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
4918defm CMGE   : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>;
4919defm CMGT   : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>;
4920defm CMLE   : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
4921defm CMLT   : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
4922defm CNT    : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
4923defm FABS   : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>;
4924
4925def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))),
4926          (CMLTv8i8rz V64:$Rn)>;
4927def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))),
4928          (CMLTv4i16rz V64:$Rn)>;
4929def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))),
4930          (CMLTv2i32rz V64:$Rn)>;
4931def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))),
4932          (CMLTv16i8rz V128:$Rn)>;
4933def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))),
4934          (CMLTv8i16rz V128:$Rn)>;
4935def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))),
4936          (CMLTv4i32rz V128:$Rn)>;
4937def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))),
4938          (CMLTv2i64rz V128:$Rn)>;
4939
4940defm FCMEQ  : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
4941defm FCMGE  : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
4942defm FCMGT  : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
4943defm FCMLE  : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
4944defm FCMLT  : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
4945defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
4946defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
4947defm FCVTL  : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
4948def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
4949          (FCVTLv4i16 V64:$Rn)>;
4950def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
4951                                                                (i64 4)))),
4952          (FCVTLv8i16 V128:$Rn)>;
4953def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))),
4954          (FCVTLv2i32 V64:$Rn)>;
4955def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))),
4956          (FCVTLv4i32 V128:$Rn)>;
4957def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))),
4958          (FCVTLv4i16 V64:$Rn)>;
4959def : Pat<(v4f32 (any_fpextend (v4f16 (extract_high_v8f16 (v8f16 V128:$Rn))))),
4960          (FCVTLv8i16 V128:$Rn)>;
4961
4962defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
4963defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
4964defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
4965defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
4966defm FCVTN  : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
4967def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
4968          (FCVTNv4i16 V128:$Rn)>;
4969def : Pat<(concat_vectors V64:$Rd,
4970                          (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
4971          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
4972def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))),
4973          (FCVTNv2i32 V128:$Rn)>;
4974def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))),
4975          (FCVTNv4i16 V128:$Rn)>;
4976def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))),
4977          (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
4978def : Pat<(concat_vectors V64:$Rd, (v4f16 (any_fpround (v4f32 V128:$Rn)))),
4979          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
4980defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
4981defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
4982defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
4983                                        int_aarch64_neon_fcvtxn>;
4984defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
4985defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
4986
4987// AArch64's FCVT instructions saturate when out of range.
4988multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
4989  let Predicates = [HasFullFP16] in {
4990  def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)),
4991            (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
4992  def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)),
4993            (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
4994  }
4995  def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)),
4996            (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
4997  def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)),
4998            (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
4999  def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)),
5000            (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
5001}
5002defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">;
5003defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">;
5004
5005def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
5006def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
5007def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>;
5008def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>;
5009def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>;
5010
5011def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>;
5012def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>;
5013def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>;
5014def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>;
5015def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>;
5016
5017defm FNEG   : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>;
5018defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
5019defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>;
5020defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>;
5021defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>;
5022defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>;
5023defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>;
5024defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>;
5025defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>;
5026
5027let Predicates = [HasFRInt3264] in {
5028  defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>;
5029  defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>;
5030  defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>;
5031  defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>;
5032} // HasFRInt3264
5033
5034defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
5035defm FSQRT  : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>;
5036defm NEG    : SIMDTwoVectorBHSD<1, 0b01011, "neg",
5037                               UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
5038defm NOT    : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
5039// Aliases for MVN -> NOT.
5040def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
5041                (NOTv8i8 V64:$Vd, V64:$Vn)>;
5042def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
5043                (NOTv16i8 V128:$Vd, V128:$Vn)>;
5044
5045def : Pat<(vnot (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
5046def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
5047def : Pat<(vnot (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
5048def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
5049def : Pat<(vnot (v1i64 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
5050def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
5051
5052defm RBIT   : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>;
5053defm REV16  : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
5054defm REV32  : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
5055defm REV64  : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
5056defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
5057       BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >;
5058defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>;
5059defm SCVTF  : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>;
5060defm SHLL   : SIMDVectorLShiftLongBySizeBHS;
5061defm SQABS  : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
5062defm SQNEG  : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
5063defm SQXTN  : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>;
5064defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>;
5065defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>;
5066defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
5067       BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >;
5068defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>;
5069defm UCVTF  : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>;
5070defm UQXTN  : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
5071defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
5072defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
5073defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
5074defm XTN    : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
5075
5076def : Pat<(v4f16  (AArch64rev32 V64:$Rn)),  (REV32v4i16 V64:$Rn)>;
5077def : Pat<(v4f16  (AArch64rev64 V64:$Rn)),  (REV64v4i16 V64:$Rn)>;
5078def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)),  (REV32v4i16 V64:$Rn)>;
5079def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)),  (REV64v4i16 V64:$Rn)>;
5080def : Pat<(v8f16  (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
5081def : Pat<(v8f16  (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
5082def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
5083def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
5084def : Pat<(v2f32  (AArch64rev64 V64:$Rn)),  (REV64v2i32 V64:$Rn)>;
5085def : Pat<(v4f32  (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
5086
5087// Patterns for vector long shift (by element width). These need to match all
5088// three of zext, sext and anyext so it's easier to pull the patterns out of the
5089// definition.
5090multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
5091  def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
5092            (SHLLv8i8 V64:$Rn)>;
5093  def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)),
5094            (SHLLv16i8 V128:$Rn)>;
5095  def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
5096            (SHLLv4i16 V64:$Rn)>;
5097  def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)),
5098            (SHLLv8i16 V128:$Rn)>;
5099  def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
5100            (SHLLv2i32 V64:$Rn)>;
5101  def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)),
5102            (SHLLv4i32 V128:$Rn)>;
5103}
5104
5105defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
5106defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
5107defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
5108
5109// Constant vector values, used in the S/UQXTN patterns below.
5110def VImmFF:   PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>;
5111def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>;
5112def VImm7F:   PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>;
5113def VImm80:   PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>;
5114def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
5115def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;
5116
5117// trunc(umin(X, 255)) -> UQXTRN v8i8
5118def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))),
5119          (UQXTNv8i8 V128:$Vn)>;
5120// trunc(umin(X, 65535)) -> UQXTRN v4i16
5121def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))),
5122          (UQXTNv4i16 V128:$Vn)>;
5123// trunc(smin(smax(X, -128), 128)) -> SQXTRN
5124//  with reversed min/max
5125def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
5126                             (v8i16 VImm7F)))),
5127          (SQXTNv8i8 V128:$Vn)>;
5128def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
5129                             (v8i16 VImm80)))),
5130          (SQXTNv8i8 V128:$Vn)>;
5131// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
5132//  with reversed min/max
5133def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
5134                              (v4i32 VImm7FFF)))),
5135          (SQXTNv4i16 V128:$Vn)>;
5136def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
5137                              (v4i32 VImm8000)))),
5138          (SQXTNv4i16 V128:$Vn)>;
5139
5140// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
5141// with reversed min/max
5142def : Pat<(v16i8 (concat_vectors
5143                 (v8i8 V64:$Vd),
5144                 (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
5145                                          (v8i16 VImm7F)))))),
5146          (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5147def : Pat<(v16i8 (concat_vectors
5148                 (v8i8 V64:$Vd),
5149                 (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
5150                                          (v8i16 VImm80)))))),
5151          (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5152
5153// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
5154// with reversed min/max
5155def : Pat<(v8i16 (concat_vectors
5156                 (v4i16 V64:$Vd),
5157                 (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
5158                                           (v4i32 VImm7FFF)))))),
5159          (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5160def : Pat<(v8i16 (concat_vectors
5161                 (v4i16 V64:$Vd),
5162                 (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
5163                                           (v4i32 VImm8000)))))),
5164          (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5165
5166//===----------------------------------------------------------------------===//
5167// Advanced SIMD three vector instructions.
5168//===----------------------------------------------------------------------===//
5169
5170defm ADD     : SIMDThreeSameVector<0, 0b10000, "add", add>;
5171defm ADDP    : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>;
5172defm CMEQ    : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
5173defm CMGE    : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>;
5174defm CMGT    : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
5175defm CMHI    : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
5176defm CMHS    : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
5177defm CMTST   : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
5178foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in {
5179def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>;
5180}
5181defm FABD    : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
5182let Predicates = [HasNEON] in {
5183foreach VT = [ v2f32, v4f32, v2f64 ] in
5184def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
5185}
5186let Predicates = [HasNEON, HasFullFP16] in {
5187foreach VT = [ v4f16, v8f16 ] in
5188def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
5189}
5190defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",AArch64facge>;
5191defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",AArch64facgt>;
5192defm FADDP   : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>;
5193defm FADD    : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>;
5194defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
5195defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
5196defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
5197defm FDIV    : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>;
5198defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
5199defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>;
5200defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
5201defm FMAX    : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>;
5202defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
5203defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>;
5204defm FMINP   : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
5205defm FMIN    : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>;
5206
5207// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
5208// instruction expects the addend first, while the fma intrinsic puts it last.
5209defm FMLA     : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
5210            TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
5211defm FMLS     : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
5212            TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
5213
5214defm FMULX    : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
5215defm FMUL     : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>;
5216defm FRECPS   : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
5217defm FRSQRTS  : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
5218defm FSUB     : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>;
5219
5220// MLA and MLS are generated in MachineCombine
5221defm MLA      : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>;
5222defm MLS      : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
5223
5224defm MUL      : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
5225defm PMUL     : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
5226defm SABA     : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
5227      TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
5228defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
5229defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>;
5230defm SHSUB    : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
5231defm SMAXP    : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
5232defm SMAX     : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
5233defm SMINP    : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
5234defm SMIN     : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
5235defm SQADD    : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
5236defm SQDMULH  : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
5237defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
5238defm SQRSHL   : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
5239defm SQSHL    : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
5240defm SQSUB    : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
5241defm SRHADD   : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>;
5242defm SRSHL    : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
5243defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
5244defm SUB      : SIMDThreeSameVector<1,0b10000,"sub", sub>;
5245defm UABA     : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
5246      TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
5247defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
5248defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>;
5249defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
5250defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
5251defm UMAX     : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
5252defm UMINP    : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
5253defm UMIN     : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
5254defm UQADD    : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
5255defm UQRSHL   : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
5256defm UQSHL    : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
5257defm UQSUB    : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
5258defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>;
5259defm URSHL    : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
5260defm USHL     : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
5261defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
5262                                                  int_aarch64_neon_sqrdmlah>;
5263defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
5264                                                    int_aarch64_neon_sqrdmlsh>;
5265
5266// Extra saturate patterns, other than the intrinsics matches above
5267defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
5268defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
5269defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
5270defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
5271
5272defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
5273defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
5274                                  BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
5275defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
5276defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
5277                                  BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
5278defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
5279
5280// Pseudo bitwise select pattern BSP.
5281// It is expanded into BSL/BIT/BIF after register allocation.
5282defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS),
5283                                                      (and (vnot node:$LHS), node:$RHS))>>;
5284defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">;
5285defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>;
5286defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">;
5287
5288def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
5289          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5290def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
5291          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5292def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
5293          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5294def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
5295          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5296
5297def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
5298          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5299def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
5300          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5301def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
5302          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5303def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
5304          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5305
5306def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
5307                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
5308def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}",
5309                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5310def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}",
5311                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5312def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}",
5313                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5314
5315def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}",
5316                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>;
5317def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}",
5318                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5319def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}",
5320                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5321def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}",
5322                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5323
5324def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" #
5325                "|cmls.8b\t$dst, $src1, $src2}",
5326                (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5327def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" #
5328                "|cmls.16b\t$dst, $src1, $src2}",
5329                (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5330def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" #
5331                "|cmls.4h\t$dst, $src1, $src2}",
5332                (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5333def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" #
5334                "|cmls.8h\t$dst, $src1, $src2}",
5335                (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5336def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" #
5337                "|cmls.2s\t$dst, $src1, $src2}",
5338                (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5339def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" #
5340                "|cmls.4s\t$dst, $src1, $src2}",
5341                (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5342def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" #
5343                "|cmls.2d\t$dst, $src1, $src2}",
5344                (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5345
5346def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" #
5347                "|cmlo.8b\t$dst, $src1, $src2}",
5348                (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5349def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" #
5350                "|cmlo.16b\t$dst, $src1, $src2}",
5351                (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5352def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" #
5353                "|cmlo.4h\t$dst, $src1, $src2}",
5354                (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5355def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" #
5356                "|cmlo.8h\t$dst, $src1, $src2}",
5357                (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5358def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" #
5359                "|cmlo.2s\t$dst, $src1, $src2}",
5360                (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5361def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" #
5362                "|cmlo.4s\t$dst, $src1, $src2}",
5363                (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5364def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" #
5365                "|cmlo.2d\t$dst, $src1, $src2}",
5366                (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5367
5368def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" #
5369                "|cmle.8b\t$dst, $src1, $src2}",
5370                (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5371def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" #
5372                "|cmle.16b\t$dst, $src1, $src2}",
5373                (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5374def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" #
5375                "|cmle.4h\t$dst, $src1, $src2}",
5376                (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5377def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" #
5378                "|cmle.8h\t$dst, $src1, $src2}",
5379                (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5380def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" #
5381                "|cmle.2s\t$dst, $src1, $src2}",
5382                (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5383def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" #
5384                "|cmle.4s\t$dst, $src1, $src2}",
5385                (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5386def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" #
5387                "|cmle.2d\t$dst, $src1, $src2}",
5388                (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5389
5390def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" #
5391                "|cmlt.8b\t$dst, $src1, $src2}",
5392                (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5393def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" #
5394                "|cmlt.16b\t$dst, $src1, $src2}",
5395                (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5396def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" #
5397                "|cmlt.4h\t$dst, $src1, $src2}",
5398                (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5399def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" #
5400                "|cmlt.8h\t$dst, $src1, $src2}",
5401                (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5402def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" #
5403                "|cmlt.2s\t$dst, $src1, $src2}",
5404                (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5405def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" #
5406                "|cmlt.4s\t$dst, $src1, $src2}",
5407                (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5408def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
5409                "|cmlt.2d\t$dst, $src1, $src2}",
5410                (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5411
5412let Predicates = [HasNEON, HasFullFP16] in {
5413def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" #
5414                "|fcmle.4h\t$dst, $src1, $src2}",
5415                (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5416def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" #
5417                "|fcmle.8h\t$dst, $src1, $src2}",
5418                (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5419}
5420def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
5421                "|fcmle.2s\t$dst, $src1, $src2}",
5422                (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5423def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" #
5424                "|fcmle.4s\t$dst, $src1, $src2}",
5425                (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5426def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
5427                "|fcmle.2d\t$dst, $src1, $src2}",
5428                (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5429
5430let Predicates = [HasNEON, HasFullFP16] in {
5431def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" #
5432                "|fcmlt.4h\t$dst, $src1, $src2}",
5433                (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5434def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" #
5435                "|fcmlt.8h\t$dst, $src1, $src2}",
5436                (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5437}
5438def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
5439                "|fcmlt.2s\t$dst, $src1, $src2}",
5440                (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5441def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" #
5442                "|fcmlt.4s\t$dst, $src1, $src2}",
5443                (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5444def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
5445                "|fcmlt.2d\t$dst, $src1, $src2}",
5446                (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5447
5448let Predicates = [HasNEON, HasFullFP16] in {
5449def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" #
5450                "|facle.4h\t$dst, $src1, $src2}",
5451                (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5452def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" #
5453                "|facle.8h\t$dst, $src1, $src2}",
5454                (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5455}
5456def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
5457                "|facle.2s\t$dst, $src1, $src2}",
5458                (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5459def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" #
5460                "|facle.4s\t$dst, $src1, $src2}",
5461                (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5462def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
5463                "|facle.2d\t$dst, $src1, $src2}",
5464                (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5465
5466let Predicates = [HasNEON, HasFullFP16] in {
5467def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" #
5468                "|faclt.4h\t$dst, $src1, $src2}",
5469                (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5470def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" #
5471                "|faclt.8h\t$dst, $src1, $src2}",
5472                (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5473}
5474def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
5475                "|faclt.2s\t$dst, $src1, $src2}",
5476                (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5477def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
5478                "|faclt.4s\t$dst, $src1, $src2}",
5479                (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5480def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
5481                "|faclt.2d\t$dst, $src1, $src2}",
5482                (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5483
5484//===----------------------------------------------------------------------===//
5485// Advanced SIMD three scalar instructions.
5486//===----------------------------------------------------------------------===//
5487
5488defm ADD      : SIMDThreeScalarD<0, 0b10000, "add", add>;
5489defm CMEQ     : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>;
5490defm CMGE     : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>;
5491defm CMGT     : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
5492defm CMHI     : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
5493defm CMHS     : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
5494defm CMTST    : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
5495defm FABD     : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
5496def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5497          (FABD64 FPR64:$Rn, FPR64:$Rm)>;
5498let Predicates = [HasNEON, HasFullFP16] in {
5499def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>;
5500}
5501let Predicates = [HasNEON] in {
5502def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>;
5503def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>;
5504}
5505defm FACGE    : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
5506                                     int_aarch64_neon_facge>;
5507defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
5508                                     int_aarch64_neon_facgt>;
5509defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
5510defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
5511defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
5512defm FMULX    : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>;
5513defm FRECPS   : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>;
5514defm FRSQRTS  : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>;
5515defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
5516defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
5517defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
5518defm SQRSHL   : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
5519defm SQSHL    : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
5520defm SQSUB    : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
5521defm SRSHL    : SIMDThreeScalarD<   0, 0b01010, "srshl", int_aarch64_neon_srshl>;
5522defm SSHL     : SIMDThreeScalarD<   0, 0b01000, "sshl", int_aarch64_neon_sshl>;
5523defm SUB      : SIMDThreeScalarD<   1, 0b10000, "sub", sub>;
5524defm UQADD    : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
5525defm UQRSHL   : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
5526defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
5527defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
5528defm URSHL    : SIMDThreeScalarD<   1, 0b01010, "urshl", int_aarch64_neon_urshl>;
5529defm USHL     : SIMDThreeScalarD<   1, 0b01000, "ushl", int_aarch64_neon_ushl>;
5530let Predicates = [HasRDM] in {
5531  defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
5532  defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
5533  def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn),
5534                                            (i32 FPR32:$Rm))),
5535            (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5536  def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn),
5537                                            (i32 FPR32:$Rm))),
5538            (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5539}
5540
5541defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64",
5542                                          int_aarch64_neon_fmulx,
5543                                          [HasNEONorSME]>;
5544
5545def : InstAlias<"cmls $dst, $src1, $src2",
5546                (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5547def : InstAlias<"cmle $dst, $src1, $src2",
5548                (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5549def : InstAlias<"cmlo $dst, $src1, $src2",
5550                (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5551def : InstAlias<"cmlt $dst, $src1, $src2",
5552                (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5553def : InstAlias<"fcmle $dst, $src1, $src2",
5554                (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5555def : InstAlias<"fcmle $dst, $src1, $src2",
5556                (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5557def : InstAlias<"fcmlt $dst, $src1, $src2",
5558                (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5559def : InstAlias<"fcmlt $dst, $src1, $src2",
5560                (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5561def : InstAlias<"facle $dst, $src1, $src2",
5562                (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5563def : InstAlias<"facle $dst, $src1, $src2",
5564                (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5565def : InstAlias<"faclt $dst, $src1, $src2",
5566                (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5567def : InstAlias<"faclt $dst, $src1, $src2",
5568                (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5569
5570//===----------------------------------------------------------------------===//
5571// Advanced SIMD three scalar instructions (mixed operands).
5572//===----------------------------------------------------------------------===//
5573defm SQDMULL  : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
5574                                       int_aarch64_neon_sqdmulls_scalar>;
5575defm SQDMLAL  : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
5576defm SQDMLSL  : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
5577
5578def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
5579                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
5580                                                        (i32 FPR32:$Rm))))),
5581          (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5582def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
5583                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
5584                                                        (i32 FPR32:$Rm))))),
5585          (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5586
5587//===----------------------------------------------------------------------===//
5588// Advanced SIMD two scalar instructions.
5589//===----------------------------------------------------------------------===//
5590
5591defm ABS    : SIMDTwoScalarD<    0, 0b01011, "abs", abs, [HasNoCSSC]>;
5592defm CMEQ   : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
5593defm CMGE   : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
5594defm CMGT   : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
5595defm CMLE   : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
5596defm CMLT   : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
5597defm FCMEQ  : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
5598defm FCMGE  : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
5599defm FCMGT  : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
5600defm FCMLE  : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
5601defm FCMLT  : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
5602defm FCVTAS : SIMDFPTwoScalar<   0, 0, 0b11100, "fcvtas">;
5603defm FCVTAU : SIMDFPTwoScalar<   1, 0, 0b11100, "fcvtau">;
5604defm FCVTMS : SIMDFPTwoScalar<   0, 0, 0b11011, "fcvtms">;
5605defm FCVTMU : SIMDFPTwoScalar<   1, 0, 0b11011, "fcvtmu">;
5606defm FCVTNS : SIMDFPTwoScalar<   0, 0, 0b11010, "fcvtns">;
5607defm FCVTNU : SIMDFPTwoScalar<   1, 0, 0b11010, "fcvtnu">;
5608defm FCVTPS : SIMDFPTwoScalar<   0, 1, 0b11010, "fcvtps">;
5609defm FCVTPU : SIMDFPTwoScalar<   1, 1, 0b11010, "fcvtpu">;
5610def  FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
5611defm FCVTZS : SIMDFPTwoScalar<   0, 1, 0b11011, "fcvtzs">;
5612defm FCVTZU : SIMDFPTwoScalar<   1, 1, 0b11011, "fcvtzu">;
5613defm FRECPE : SIMDFPTwoScalar<   0, 1, 0b11101, "frecpe", HasNEONorSME>;
5614defm FRECPX : SIMDFPTwoScalar<   0, 1, 0b11111, "frecpx", HasNEONorSME>;
5615defm FRSQRTE : SIMDFPTwoScalar<  1, 1, 0b11101, "frsqrte", HasNEONorSME>;
5616defm NEG    : SIMDTwoScalarD<    1, 0b01011, "neg",
5617                                 UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
5618defm SCVTF  : SIMDFPTwoScalarCVT<   0, 0, 0b11101, "scvtf", AArch64sitof>;
5619defm SQABS  : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
5620defm SQNEG  : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
5621defm SQXTN  : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
5622defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
5623defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
5624                                     int_aarch64_neon_suqadd>;
5625defm UCVTF  : SIMDFPTwoScalarCVT<   1, 0, 0b11101, "ucvtf", AArch64uitof>;
5626defm UQXTN  : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
5627defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
5628                                    int_aarch64_neon_usqadd>;
5629
5630def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
5631          (CMLTv1i64rz V64:$Rn)>;
5632
5633def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
5634          (FCVTASv1i64 FPR64:$Rn)>;
5635def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
5636          (FCVTAUv1i64 FPR64:$Rn)>;
5637def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))),
5638          (FCVTMSv1i64 FPR64:$Rn)>;
5639def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))),
5640          (FCVTMUv1i64 FPR64:$Rn)>;
5641def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))),
5642          (FCVTNSv1i64 FPR64:$Rn)>;
5643def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))),
5644          (FCVTNUv1i64 FPR64:$Rn)>;
5645def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
5646          (FCVTPSv1i64 FPR64:$Rn)>;
5647def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
5648          (FCVTPUv1i64 FPR64:$Rn)>;
5649def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))),
5650          (FCVTZSv1i64 FPR64:$Rn)>;
5651def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))),
5652          (FCVTZUv1i64 FPR64:$Rn)>;
5653
5654def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))),
5655          (FRECPEv1f16 FPR16:$Rn)>;
5656def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))),
5657          (FRECPEv1i32 FPR32:$Rn)>;
5658def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))),
5659          (FRECPEv1i64 FPR64:$Rn)>;
5660def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))),
5661          (FRECPEv1i64 FPR64:$Rn)>;
5662
5663def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))),
5664          (FRECPEv1i32 FPR32:$Rn)>;
5665def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))),
5666          (FRECPEv2f32 V64:$Rn)>;
5667def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))),
5668          (FRECPEv4f32 FPR128:$Rn)>;
5669def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))),
5670          (FRECPEv1i64 FPR64:$Rn)>;
5671def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))),
5672          (FRECPEv1i64 FPR64:$Rn)>;
5673def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),
5674          (FRECPEv2f64 FPR128:$Rn)>;
5675
5676def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
5677          (FRECPS32 FPR32:$Rn, FPR32:$Rm)>;
5678def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5679          (FRECPSv2f32 V64:$Rn, V64:$Rm)>;
5680def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
5681          (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>;
5682def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
5683          (FRECPS64 FPR64:$Rn, FPR64:$Rm)>;
5684def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
5685          (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>;
5686
5687def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))),
5688          (FRECPXv1f16 FPR16:$Rn)>;
5689def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
5690          (FRECPXv1i32 FPR32:$Rn)>;
5691def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
5692          (FRECPXv1i64 FPR64:$Rn)>;
5693
5694def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))),
5695          (FRSQRTEv1f16 FPR16:$Rn)>;
5696def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),
5697          (FRSQRTEv1i32 FPR32:$Rn)>;
5698def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
5699          (FRSQRTEv1i64 FPR64:$Rn)>;
5700def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))),
5701          (FRSQRTEv1i64 FPR64:$Rn)>;
5702
5703def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))),
5704          (FRSQRTEv1i32 FPR32:$Rn)>;
5705def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))),
5706          (FRSQRTEv2f32 V64:$Rn)>;
5707def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))),
5708          (FRSQRTEv4f32 FPR128:$Rn)>;
5709def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))),
5710          (FRSQRTEv1i64 FPR64:$Rn)>;
5711def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))),
5712          (FRSQRTEv1i64 FPR64:$Rn)>;
5713def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),
5714          (FRSQRTEv2f64 FPR128:$Rn)>;
5715
5716def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
5717          (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>;
5718def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5719          (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>;
5720def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
5721          (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>;
5722def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
5723          (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>;
5724def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
5725          (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;
5726
5727// Some float -> int -> float conversion patterns for which we want to keep the
5728// int values in FP registers using the corresponding NEON instructions to
5729// avoid more costly int <-> fp register transfers.
5730let Predicates = [HasNEON] in {
5731def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
5732          (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
5733def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
5734          (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
5735def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
5736          (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
5737def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
5738          (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
5739
5740let Predicates = [HasFullFP16] in {
5741def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
5742          (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
5743def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
5744          (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
5745}
5746// If an integer is about to be converted to a floating point value,
5747// just load it on the floating point unit.
5748// Here are the patterns for 8 and 16-bits to float.
5749// 8-bits -> float.
5750multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
5751                             SDPatternOperator loadop, Instruction UCVTF,
5752                             ROAddrMode ro, Instruction LDRW, Instruction LDRX,
5753                             SubRegIndex sub> {
5754  def : Pat<(DstTy (uint_to_fp (SrcTy
5755                     (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm,
5756                                      ro.Wext:$extend))))),
5757           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
5758                                 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
5759                                 sub))>;
5760
5761  def : Pat<(DstTy (uint_to_fp (SrcTy
5762                     (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm,
5763                                      ro.Wext:$extend))))),
5764           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
5765                                 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
5766                                 sub))>;
5767}
5768
5769defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
5770                         UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
5771def : Pat <(f32 (uint_to_fp (i32
5772               (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
5773           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5774                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
5775def : Pat <(f32 (uint_to_fp (i32
5776                     (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
5777           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5778                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
5779// 16-bits -> float.
5780defm : UIntToFPROLoadPat<f32, i32, zextloadi16,
5781                         UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>;
5782def : Pat <(f32 (uint_to_fp (i32
5783                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
5784           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5785                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
5786def : Pat <(f32 (uint_to_fp (i32
5787                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
5788           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5789                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
5790// 32-bits are handled in target specific dag combine:
5791// performIntToFpCombine.
5792// 64-bits integer to 32-bits floating point, not possible with
5793// UCVTF on floating point registers (both source and destination
5794// must have the same size).
5795
5796// Here are the patterns for 8, 16, 32, and 64-bits to double.
5797// 8-bits -> double.
5798defm : UIntToFPROLoadPat<f64, i32, zextloadi8,
5799                         UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>;
5800def : Pat <(f64 (uint_to_fp (i32
5801                    (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
5802           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5803                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
5804def : Pat <(f64 (uint_to_fp (i32
5805                  (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
5806           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5807                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
5808// 16-bits -> double.
5809defm : UIntToFPROLoadPat<f64, i32, zextloadi16,
5810                         UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>;
5811def : Pat <(f64 (uint_to_fp (i32
5812                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
5813           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5814                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
5815def : Pat <(f64 (uint_to_fp (i32
5816                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
5817           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5818                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
5819// 32-bits -> double.
5820defm : UIntToFPROLoadPat<f64, i32, load,
5821                         UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>;
5822def : Pat <(f64 (uint_to_fp (i32
5823                  (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
5824           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5825                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>;
5826def : Pat <(f64 (uint_to_fp (i32
5827                  (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
5828           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5829                          (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
5830// 64-bits -> double are handled in target specific dag combine:
5831// performIntToFpCombine.
5832} // let Predicates = [HasNEON]
5833
5834//===----------------------------------------------------------------------===//
5835// Advanced SIMD three different-sized vector instructions.
5836//===----------------------------------------------------------------------===//
5837
5838defm ADDHN  : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>;
5839defm SUBHN  : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
5840defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
5841defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
5842defm PMULL  : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>;
5843defm SABAL  : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
5844                                             AArch64sabd>;
5845defm SABDL   : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
5846                                          AArch64sabd>;
5847defm SADDL   : SIMDLongThreeVectorBHS<   0, 0b0000, "saddl",
5848            BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
5849defm SADDW   : SIMDWideThreeVectorBHS<   0, 0b0001, "saddw",
5850                 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
5851defm SMLAL   : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
5852    TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
5853defm SMLSL   : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
5854    TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
5855defm SMULL   : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>;
5856defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
5857                                               int_aarch64_neon_sqadd>;
5858defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
5859                                               int_aarch64_neon_sqsub>;
5860defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
5861                                     int_aarch64_neon_sqdmull>;
5862defm SSUBL   : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
5863                 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
5864defm SSUBW   : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
5865                 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
5866defm UABAL   : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
5867                                              AArch64uabd>;
5868defm UADDL   : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
5869                 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>;
5870defm UADDW   : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
5871                 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>;
5872defm UMLAL   : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
5873    TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
5874defm UMLSL   : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
5875    TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
5876defm UMULL   : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>;
5877defm USUBL   : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
5878                 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
5879defm USUBW   : SIMDWideThreeVectorBHS<   1, 0b0011, "usubw",
5880                 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>;
5881
5882// Additional patterns for [SU]ML[AS]L
5883multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode,
5884  Instruction INST8B, Instruction INST4H, Instruction INST2S> {
5885  def : Pat<(v4i16 (opnode
5886                    V64:$Ra,
5887                    (v4i16 (extract_subvector
5888                            (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)),
5889                            (i64 0))))),
5890             (EXTRACT_SUBREG (v8i16 (INST8B
5891                                     (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub),
5892                                     V64:$Rn, V64:$Rm)), dsub)>;
5893  def : Pat<(v2i32 (opnode
5894                    V64:$Ra,
5895                    (v2i32 (extract_subvector
5896                            (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)),
5897                            (i64 0))))),
5898             (EXTRACT_SUBREG (v4i32 (INST4H
5899                                     (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub),
5900                                     V64:$Rn, V64:$Rm)), dsub)>;
5901  def : Pat<(v1i64 (opnode
5902                    V64:$Ra,
5903                    (v1i64 (extract_subvector
5904                            (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)),
5905                            (i64 0))))),
5906             (EXTRACT_SUBREG (v2i64 (INST2S
5907                                     (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub),
5908                                     V64:$Rn, V64:$Rm)), dsub)>;
5909}
5910
5911defm : Neon_mul_acc_widen_patterns<add, AArch64umull,
5912     UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
5913defm : Neon_mul_acc_widen_patterns<add, AArch64smull,
5914     SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
5915defm : Neon_mul_acc_widen_patterns<sub, AArch64umull,
5916     UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
5917defm : Neon_mul_acc_widen_patterns<sub, AArch64smull,
5918     SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
5919
5920
5921multiclass Neon_addl_extract_patterns<SDPatternOperator opnode, SDPatternOperator ext, string Inst> {
5922  def : Pat<(v4i16 (opnode (extract_subvector (ext (v8i8 V64:$Rn)), (i64 0)),
5923                           (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
5924            (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Lv8i8_v8i16") V64:$Rn, V64:$Rm)), dsub)>;
5925  def : Pat<(v2i32 (opnode (extract_subvector (ext (v4i16 V64:$Rn)), (i64 0)),
5926                           (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
5927            (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Lv4i16_v4i32") V64:$Rn, V64:$Rm)), dsub)>;
5928  def : Pat<(v1i64 (opnode (extract_subvector (ext (v2i32 V64:$Rn)), (i64 0)),
5929                           (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
5930            (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Lv2i32_v2i64") V64:$Rn, V64:$Rm)), dsub)>;
5931
5932  def : Pat<(v4i16 (opnode (v4i16 V64:$Rn),
5933                           (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
5934            (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Wv8i8_v8i16") (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
5935  def : Pat<(v2i32 (opnode (v2i32 V64:$Rn),
5936                           (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
5937            (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Wv4i16_v4i32") (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
5938  def : Pat<(v1i64 (opnode (v1i64 V64:$Rn),
5939                           (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
5940            (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Wv2i32_v2i64") (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
5941}
5942
5943defm : Neon_addl_extract_patterns<add, zanyext, "UADD">;
5944defm : Neon_addl_extract_patterns<add, sext, "SADD">;
5945defm : Neon_addl_extract_patterns<sub, zanyext, "USUB">;
5946defm : Neon_addl_extract_patterns<sub, sext, "SSUB">;
5947
5948// CodeGen patterns for addhn and subhn instructions, which can actually be
5949// written in LLVM IR without too much difficulty.
5950
5951// Prioritize ADDHN and SUBHN over UZP2.
5952let AddedComplexity = 10 in {
5953
5954// ADDHN
5955def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
5956          (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
5957def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5958                                           (i32 16))))),
5959          (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
5960def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5961                                           (i32 32))))),
5962          (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
5963def : Pat<(concat_vectors (v8i8 V64:$Rd),
5964                          (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5965                                                    (i32 8))))),
5966          (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5967                            V128:$Rn, V128:$Rm)>;
5968def : Pat<(concat_vectors (v4i16 V64:$Rd),
5969                          (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5970                                                    (i32 16))))),
5971          (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5972                            V128:$Rn, V128:$Rm)>;
5973def : Pat<(concat_vectors (v2i32 V64:$Rd),
5974                          (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5975                                                    (i32 32))))),
5976          (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5977                            V128:$Rn, V128:$Rm)>;
5978
5979// SUBHN
5980def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
5981          (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
5982def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5983                                           (i32 16))))),
5984          (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
5985def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5986                                           (i32 32))))),
5987          (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
5988def : Pat<(concat_vectors (v8i8 V64:$Rd),
5989                          (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5990                                                    (i32 8))))),
5991          (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5992                            V128:$Rn, V128:$Rm)>;
5993def : Pat<(concat_vectors (v4i16 V64:$Rd),
5994                          (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5995                                                    (i32 16))))),
5996          (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5997                            V128:$Rn, V128:$Rm)>;
5998def : Pat<(concat_vectors (v2i32 V64:$Rd),
5999                          (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6000                                                    (i32 32))))),
6001          (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6002                            V128:$Rn, V128:$Rm)>;
6003
6004} // AddedComplexity = 10
6005
6006//----------------------------------------------------------------------------
6007// AdvSIMD bitwise extract from vector instruction.
6008//----------------------------------------------------------------------------
6009
6010defm EXT : SIMDBitwiseExtract<"ext">;
6011
6012def AdjustExtImm : SDNodeXForm<imm, [{
6013  return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
6014}]>;
6015multiclass ExtPat<ValueType VT64, ValueType VT128, int N> {
6016  def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
6017            (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
6018  def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
6019            (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
6020  // We use EXT to handle extract_subvector to copy the upper 64-bits of a
6021  // 128-bit vector.
6022  def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))),
6023            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
6024  // A 64-bit EXT of two halves of the same 128-bit register can be done as a
6025  // single 128-bit EXT.
6026  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)),
6027                              (extract_subvector V128:$Rn, (i64 N)),
6028                              (i32 imm:$imm))),
6029            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>;
6030  // A 64-bit EXT of the high half of a 128-bit register can be done using a
6031  // 128-bit EXT of the whole register with an adjustment to the immediate. The
6032  // top half of the other operand will be unset, but that doesn't matter as it
6033  // will not be used.
6034  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)),
6035                              V64:$Rm,
6036                              (i32 imm:$imm))),
6037            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn,
6038                                      (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
6039                                      (AdjustExtImm imm:$imm)), dsub)>;
6040}
6041
6042defm : ExtPat<v8i8, v16i8, 8>;
6043defm : ExtPat<v4i16, v8i16, 4>;
6044defm : ExtPat<v4f16, v8f16, 4>;
6045defm : ExtPat<v4bf16, v8bf16, 4>;
6046defm : ExtPat<v2i32, v4i32, 2>;
6047defm : ExtPat<v2f32, v4f32, 2>;
6048defm : ExtPat<v1i64, v2i64, 1>;
6049defm : ExtPat<v1f64, v2f64, 1>;
6050
6051//----------------------------------------------------------------------------
6052// AdvSIMD zip vector
6053//----------------------------------------------------------------------------
6054
6055defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>;
6056defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>;
6057defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>;
6058defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
6059defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
6060defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
6061
6062def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))),
6063                                 (v8i8 (trunc (v8i16 V128:$Vm))))),
6064          (UZP1v16i8 V128:$Vn, V128:$Vm)>;
6065def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))),
6066                                 (v4i16 (trunc (v4i32 V128:$Vm))))),
6067          (UZP1v8i16 V128:$Vn, V128:$Vm)>;
6068def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))),
6069                                 (v2i32 (trunc (v2i64 V128:$Vm))))),
6070          (UZP1v4i32 V128:$Vn, V128:$Vm)>;
6071// These are the same as above, with an optional assertzext node that can be
6072// generated from fptoi lowering.
6073def : Pat<(v16i8 (concat_vectors (v8i8 (assertzext (trunc (v8i16 V128:$Vn)))),
6074                                 (v8i8 (assertzext (trunc (v8i16 V128:$Vm)))))),
6075          (UZP1v16i8 V128:$Vn, V128:$Vm)>;
6076def : Pat<(v8i16 (concat_vectors (v4i16 (assertzext (trunc (v4i32 V128:$Vn)))),
6077                                 (v4i16 (assertzext (trunc (v4i32 V128:$Vm)))))),
6078          (UZP1v8i16 V128:$Vn, V128:$Vm)>;
6079def : Pat<(v4i32 (concat_vectors (v2i32 (assertzext (trunc (v2i64 V128:$Vn)))),
6080                                 (v2i32 (assertzext (trunc (v2i64 V128:$Vm)))))),
6081          (UZP1v4i32 V128:$Vn, V128:$Vm)>;
6082
6083def : Pat<(v16i8 (concat_vectors
6084                 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))),
6085                 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))),
6086          (UZP2v16i8 V128:$Vn, V128:$Vm)>;
6087def : Pat<(v8i16 (concat_vectors
6088                 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))),
6089                 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))),
6090          (UZP2v8i16 V128:$Vn, V128:$Vm)>;
6091def : Pat<(v4i32 (concat_vectors
6092                 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))),
6093                 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))),
6094          (UZP2v4i32 V128:$Vn, V128:$Vm)>;
6095
6096//----------------------------------------------------------------------------
6097// AdvSIMD TBL/TBX instructions
6098//----------------------------------------------------------------------------
6099
6100defm TBL : SIMDTableLookup<    0, "tbl">;
6101defm TBX : SIMDTableLookupTied<1, "tbx">;
6102
6103def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
6104          (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
6105def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
6106          (TBLv16i8One V128:$Ri, V128:$Rn)>;
6107
6108def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd),
6109                  (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
6110          (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
6111def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
6112                   (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
6113          (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
6114
6115//----------------------------------------------------------------------------
6116// AdvSIMD LUT instructions
6117//----------------------------------------------------------------------------
6118let Predicates = [HasLUT] in {
6119  defm LUT2 : BaseSIMDTableLookupIndexed2<"luti2">;
6120  defm LUT4 : BaseSIMDTableLookupIndexed4<"luti4">;
6121}
6122
6123//----------------------------------------------------------------------------
6124// AdvSIMD scalar DUP instruction
6125//----------------------------------------------------------------------------
6126
6127defm DUP : SIMDScalarDUP<"mov">;
6128
6129//----------------------------------------------------------------------------
6130// AdvSIMD scalar pairwise instructions
6131//----------------------------------------------------------------------------
6132
6133defm ADDP    : SIMDPairwiseScalarD<0, 0b11011, "addp">;
6134defm FADDP   : SIMDFPPairwiseScalar<0, 0b01101, "faddp">;
6135defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
6136defm FMAXP   : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
6137defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
6138defm FMINP   : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
6139
6140// Only the lower half of the result of the inner FADDP is used in the patterns
6141// below, so the second operand does not matter. Re-use the first input
6142// operand, so no additional dependencies need to be introduced.
6143let Predicates = [HasFullFP16] in {
6144def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
6145            (FADDPv2i16p
6146              (EXTRACT_SUBREG
6147                 (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn),
6148               dsub))>;
6149def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
6150          (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>;
6151}
6152def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
6153          (FADDPv2i32p
6154            (EXTRACT_SUBREG
6155              (FADDPv4f32 V128:$Rn, V128:$Rn),
6156             dsub))>;
6157def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
6158          (FADDPv2i32p V64:$Rn)>;
6159def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))),
6160          (FADDPv2i64p V128:$Rn)>;
6161
6162def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
6163          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
6164def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
6165          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
6166def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
6167          (FADDPv2i32p V64:$Rn)>;
6168def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
6169          (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
6170def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))),
6171          (FADDPv2i64p V128:$Rn)>;
6172def : Pat<(f32 (AArch64fmaxnmv (v2f32 V64:$Rn))),
6173          (FMAXNMPv2i32p V64:$Rn)>;
6174def : Pat<(f64 (AArch64fmaxnmv (v2f64 V128:$Rn))),
6175          (FMAXNMPv2i64p V128:$Rn)>;
6176def : Pat<(f32 (AArch64fmaxv (v2f32 V64:$Rn))),
6177          (FMAXPv2i32p V64:$Rn)>;
6178def : Pat<(f64 (AArch64fmaxv (v2f64 V128:$Rn))),
6179          (FMAXPv2i64p V128:$Rn)>;
6180def : Pat<(f32 (AArch64fminnmv (v2f32 V64:$Rn))),
6181          (FMINNMPv2i32p V64:$Rn)>;
6182def : Pat<(f64 (AArch64fminnmv (v2f64 V128:$Rn))),
6183          (FMINNMPv2i64p V128:$Rn)>;
6184def : Pat<(f32 (AArch64fminv (v2f32 V64:$Rn))),
6185          (FMINPv2i32p V64:$Rn)>;
6186def : Pat<(f64 (AArch64fminv (v2f64 V128:$Rn))),
6187          (FMINPv2i64p V128:$Rn)>;
6188
6189//----------------------------------------------------------------------------
6190// AdvSIMD INS/DUP instructions
6191//----------------------------------------------------------------------------
6192
6193def DUPv8i8gpr  : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
6194def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
6195def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
6196def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
6197def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
6198def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
6199def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
6200
6201def DUPv2i64lane : SIMDDup64FromElement;
6202def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
6203def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
6204def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>;
6205def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
6206def DUPv8i8lane  : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
6207def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
6208
6209// DUP from a 64-bit register to a 64-bit register is just a copy
6210def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))),
6211          (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>;
6212def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))),
6213          (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>;
6214
6215def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
6216          (v2f32 (DUPv2i32lane
6217            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
6218            (i64 0)))>;
6219def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))),
6220          (v4f32 (DUPv4i32lane
6221            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
6222            (i64 0)))>;
6223def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
6224          (v2f64 (DUPv2i64lane
6225            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
6226            (i64 0)))>;
6227def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))),
6228          (v4f16 (DUPv4i16lane
6229            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6230            (i64 0)))>;
6231def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))),
6232          (v4bf16 (DUPv4i16lane
6233            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6234            (i64 0)))>;
6235def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))),
6236          (v8f16 (DUPv8i16lane
6237            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6238            (i64 0)))>;
6239def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))),
6240          (v8bf16 (DUPv8i16lane
6241            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6242            (i64 0)))>;
6243
6244def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
6245          (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
6246def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
6247          (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
6248
6249def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
6250          (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
6251def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
6252          (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
6253
6254def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
6255          (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
6256def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
6257         (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
6258def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
6259          (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
6260
6261// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane
6262// instruction even if the types don't match: we just have to remap the lane
6263// carefully. N.b. this trick only applies to truncations.
6264def VecIndex_x2 : SDNodeXForm<imm, [{
6265  return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
6266}]>;
6267def VecIndex_x4 : SDNodeXForm<imm, [{
6268  return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
6269}]>;
6270def VecIndex_x8 : SDNodeXForm<imm, [{
6271  return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
6272}]>;
6273
6274multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
6275                            ValueType Src128VT, ValueType ScalVT,
6276                            Instruction DUP, SDNodeXForm IdxXFORM> {
6277  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn),
6278                                                     imm:$idx)))),
6279            (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
6280
6281  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn),
6282                                                     imm:$idx)))),
6283            (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
6284}
6285
6286defm : DUPWithTruncPats<v8i8,   v4i16, v8i16, i32, DUPv8i8lane,  VecIndex_x2>;
6287defm : DUPWithTruncPats<v8i8,   v2i32, v4i32, i32, DUPv8i8lane,  VecIndex_x4>;
6288defm : DUPWithTruncPats<v4i16,  v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>;
6289
6290defm : DUPWithTruncPats<v16i8,  v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>;
6291defm : DUPWithTruncPats<v16i8,  v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>;
6292defm : DUPWithTruncPats<v8i16,  v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
6293
6294multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
6295                               SDNodeXForm IdxXFORM> {
6296  def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn),
6297                                                         imm:$idx))))),
6298            (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
6299
6300  def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn),
6301                                                       imm:$idx))))),
6302            (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
6303}
6304
6305defm : DUPWithTrunci64Pats<v8i8,  DUPv8i8lane,   VecIndex_x8>;
6306defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane,  VecIndex_x4>;
6307defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane,  VecIndex_x2>;
6308
6309defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>;
6310defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>;
6311defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>;
6312
6313// SMOV and UMOV definitions, with some extra patterns for convenience
6314defm SMOV : SMov;
6315defm UMOV : UMov;
6316
6317def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
6318          (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>;
6319def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
6320          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
6321def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6322          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
6323def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6324          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
6325def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6326          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
6327def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
6328          (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
6329
6330def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
6331            VectorIndexB:$idx)))), i8),
6332          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
6333def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
6334            VectorIndexH:$idx)))), i16),
6335          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
6336
6337// Extracting i8 or i16 elements will have the zero-extend transformed to
6338// an 'and' mask by type legalization since neither i8 nor i16 are legal types
6339// for AArch64. Match these patterns here since UMOV already zeroes out the high
6340// bits of the destination register.
6341def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
6342               (i32 0xff)),
6343          (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>;
6344def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
6345               (i32 0xffff)),
6346          (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
6347
6348def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
6349            VectorIndexB:$idx)))), (i64 0xff))),
6350          (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>;
6351def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
6352            VectorIndexH:$idx)))), (i64 0xffff))),
6353          (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>;
6354
6355defm INS : SIMDIns;
6356
6357def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
6358          (SUBREG_TO_REG (i32 0),
6359                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6360def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
6361          (SUBREG_TO_REG (i32 0),
6362                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6363
6364// The top bits will be zero from the FMOVWSr
6365def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))),
6366          (SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>;
6367
6368def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
6369          (SUBREG_TO_REG (i32 0),
6370                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6371def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
6372          (SUBREG_TO_REG (i32 0),
6373                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6374
6375def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
6376          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6377def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
6378          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6379
6380def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6381          (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6382def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6383          (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6384
6385def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
6386            (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
6387                                  (i32 FPR32:$Rn), ssub))>;
6388def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
6389            (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6390                                  (i32 FPR32:$Rn), ssub))>;
6391
6392def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
6393            (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
6394                                  (i64 FPR64:$Rn), dsub))>;
6395
6396def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
6397          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6398def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
6399          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6400
6401def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6402          (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6403def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6404          (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6405
6406def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
6407          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
6408def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
6409          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
6410
6411def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
6412          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
6413
6414def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
6415            (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
6416          (EXTRACT_SUBREG
6417            (INSvi16lane
6418              (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6419              VectorIndexS:$imm,
6420              (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6421              (i64 0)),
6422            dsub)>;
6423
6424def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
6425          (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
6426def : Pat<(vector_insert (v4f16 V64:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
6427          (EXTRACT_SUBREG (INSvi16gpr (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexH:$imm, WZR), dsub)>;
6428def : Pat<(vector_insert (v4f32 V128:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
6429          (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
6430def : Pat<(vector_insert (v2f32 V64:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
6431          (EXTRACT_SUBREG (INSvi32gpr (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, WZR), dsub)>;
6432def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), (i64 VectorIndexD:$imm)),
6433          (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
6434
6435def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
6436            (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
6437          (INSvi16lane
6438            V128:$Rn, VectorIndexH:$imm,
6439            (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6440            (i64 0))>;
6441
6442def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn),
6443            (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
6444          (EXTRACT_SUBREG
6445            (INSvi16lane
6446              (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6447              VectorIndexS:$imm,
6448              (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6449              (i64 0)),
6450            dsub)>;
6451
6452def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn),
6453            (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
6454          (INSvi16lane
6455            V128:$Rn, VectorIndexH:$imm,
6456            (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6457            (i64 0))>;
6458
6459def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
6460            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
6461          (EXTRACT_SUBREG
6462            (INSvi32lane
6463              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6464              VectorIndexS:$imm,
6465              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
6466              (i64 0)),
6467            dsub)>;
6468def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn),
6469            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
6470          (INSvi32lane
6471            V128:$Rn, VectorIndexS:$imm,
6472            (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
6473            (i64 0))>;
6474def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
6475            (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))),
6476          (INSvi64lane
6477            V128:$Rn, VectorIndexD:$imm,
6478            (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
6479            (i64 0))>;
6480
6481def : Pat<(v2i32 (vector_insert (v2i32 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexS:$imm))),
6482          (EXTRACT_SUBREG
6483            (INSvi32gpr (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6484                        VectorIndexS:$imm, GPR32:$Rm),
6485            dsub)>;
6486def : Pat<(v4i16 (vector_insert (v4i16 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexH:$imm))),
6487          (EXTRACT_SUBREG
6488            (INSvi16gpr (v8i16 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6489                        VectorIndexH:$imm, GPR32:$Rm),
6490            dsub)>;
6491def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexB:$imm))),
6492          (EXTRACT_SUBREG
6493            (INSvi8gpr (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6494                       VectorIndexB:$imm, GPR32:$Rm),
6495            dsub)>;
6496
6497// Copy an element at a constant index in one vector into a constant indexed
6498// element of another.
6499// FIXME refactor to a shared class/dev parameterized on vector type, vector
6500// index type and INS extension
6501def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane
6502                   (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
6503                   VectorIndexB:$idx2)),
6504          (v16i8 (INSvi8lane
6505                   V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
6506          )>;
6507def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane
6508                   (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
6509                   VectorIndexH:$idx2)),
6510          (v8i16 (INSvi16lane
6511                   V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
6512          )>;
6513def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane
6514                   (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
6515                   VectorIndexS:$idx2)),
6516          (v4i32 (INSvi32lane
6517                   V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
6518          )>;
6519def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
6520                   (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
6521                   VectorIndexD:$idx2)),
6522          (v2i64 (INSvi64lane
6523                   V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
6524          )>;
6525
6526multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
6527                                ValueType VTScal, Instruction INS> {
6528  def : Pat<(VT128 (vector_insert V128:$src,
6529                        (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
6530                        imm:$Immd)),
6531            (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
6532
6533  def : Pat<(VT128 (vector_insert V128:$src,
6534                        (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
6535                        imm:$Immd)),
6536            (INS V128:$src, imm:$Immd,
6537                 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
6538
6539  def : Pat<(VT64 (vector_insert V64:$src,
6540                        (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
6541                        imm:$Immd)),
6542            (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
6543                                 imm:$Immd, V128:$Rn, imm:$Immn),
6544                            dsub)>;
6545
6546  def : Pat<(VT64 (vector_insert V64:$src,
6547                        (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
6548                        imm:$Immd)),
6549            (EXTRACT_SUBREG
6550                (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
6551                     (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
6552                dsub)>;
6553}
6554
6555defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
6556defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>;
6557defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
6558defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
6559
6560defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
6561defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
6562defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
6563defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi64lane>;
6564
6565// Insert from bitcast
6566// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
6567def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)),
6568          (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>;
6569def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)),
6570          (EXTRACT_SUBREG
6571            (INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)),
6572                         imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0),
6573            dsub)>;
6574def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), imm:$Immd)),
6575          (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>;
6576
6577// bitcast of an extract
6578// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
6579def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
6580          (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>;
6581def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))),
6582          (EXTRACT_SUBREG V128:$src, ssub)>;
6583def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))),
6584          (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>;
6585def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))),
6586          (EXTRACT_SUBREG V128:$src, dsub)>;
6587
6588// Floating point vector extractions are codegen'd as either a sequence of
6589// subregister extractions, or a MOV (aka DUP here) if
6590// the lane number is anything other than zero.
6591def : Pat<(f64 (vector_extract (v2f64 V128:$Rn), (i64 0))),
6592          (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
6593def : Pat<(f32 (vector_extract (v4f32 V128:$Rn), (i64 0))),
6594          (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
6595def : Pat<(f16 (vector_extract (v8f16 V128:$Rn), (i64 0))),
6596          (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
6597def : Pat<(bf16 (vector_extract (v8bf16 V128:$Rn), (i64 0))),
6598          (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
6599
6600
6601def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
6602          (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>;
6603def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
6604          (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>;
6605def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
6606          (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
6607def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
6608          (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
6609
6610// All concat_vectors operations are canonicalised to act on i64 vectors for
6611// AArch64. In the general case we need an instruction, which had just as well be
6612// INS.
6613class ConcatPat<ValueType DstTy, ValueType SrcTy>
6614  : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
6615        (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
6616                     (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
6617
6618def : ConcatPat<v2i64, v1i64>;
6619def : ConcatPat<v2f64, v1f64>;
6620def : ConcatPat<v4i32, v2i32>;
6621def : ConcatPat<v4f32, v2f32>;
6622def : ConcatPat<v8i16, v4i16>;
6623def : ConcatPat<v8f16, v4f16>;
6624def : ConcatPat<v8bf16, v4bf16>;
6625def : ConcatPat<v16i8, v8i8>;
6626
6627// If the high lanes are undef, though, we can just ignore them:
6628class ConcatUndefPat<ValueType DstTy, ValueType SrcTy>
6629  : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
6630        (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
6631
6632def : ConcatUndefPat<v2i64, v1i64>;
6633def : ConcatUndefPat<v2f64, v1f64>;
6634def : ConcatUndefPat<v4i32, v2i32>;
6635def : ConcatUndefPat<v4f32, v2f32>;
6636def : ConcatUndefPat<v8i16, v4i16>;
6637def : ConcatUndefPat<v16i8, v8i8>;
6638
6639//----------------------------------------------------------------------------
6640// AdvSIMD across lanes instructions
6641//----------------------------------------------------------------------------
6642
6643defm ADDV    : SIMDAcrossLanesBHS<0, 0b11011, "addv">;
6644defm SMAXV   : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">;
6645defm SMINV   : SIMDAcrossLanesBHS<0, 0b11010, "sminv">;
6646defm UMAXV   : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
6647defm UMINV   : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
6648defm SADDLV  : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
6649defm UADDLV  : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
6650defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", AArch64fmaxnmv>;
6651defm FMAXV   : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", AArch64fmaxv>;
6652defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>;
6653defm FMINV   : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>;
6654
6655multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> {
6656  // Patterns for addv(addlp(x)) ==> addlv
6657  def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
6658              (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))),
6659              (i64 0))), (i64 0))),
6660            (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
6661              (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>;
6662  def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))),
6663            (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6664              (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>;
6665  def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))),
6666            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>;
6667
6668  // Patterns for addp(addlp(x))) ==> addlv
6669  def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))),
6670            (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>;
6671  def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))),
6672            (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>;
6673}
6674
6675defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>;
6676defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>;
6677
6678// Pattern is used for GlobalISel
6679multiclass SIMDAcrossLaneLongPairIntrinsicGISel<string Opc, SDPatternOperator addlp> {
6680  // Patterns for addv(addlp(x)) ==> addlv
6681  def : Pat<(i16 (vecreduce_add (v4i16 (addlp (v8i8 V64:$Rn))))),
6682            (!cast<Instruction>(Opc#"v8i8v") V64:$Rn)>;
6683  def : Pat<(i16 (vecreduce_add (v8i16 (addlp (v16i8 V128:$Rn))))),
6684            (!cast<Instruction>(Opc#"v16i8v") V128:$Rn)>;
6685  def : Pat<(i32 (vecreduce_add (v4i32 (addlp (v8i16 V128:$Rn))))),
6686            (!cast<Instruction>(Opc#"v8i16v") V128:$Rn)>;
6687
6688  // Patterns for addp(addlp(x))) ==> addlv
6689  def : Pat<(i32 (vecreduce_add (v2i32 (addlp (v4i16 V64:$Rn))))),
6690            (!cast<Instruction>(Opc#"v4i16v") V64:$Rn)>;
6691  def : Pat<(i64 (vecreduce_add (v2i64 (addlp (v4i32 V128:$Rn))))),
6692            (!cast<Instruction>(Opc#"v4i32v") V128:$Rn)>;
6693}
6694
6695defm : SIMDAcrossLaneLongPairIntrinsicGISel<"UADDLV", AArch64uaddlp>;
6696defm : SIMDAcrossLaneLongPairIntrinsicGISel<"SADDLV", AArch64saddlp>;
6697
6698// Patterns for uaddlv(uaddlp(x)) ==> uaddlv
6699def : Pat<(i64 (int_aarch64_neon_uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
6700          (i64 (EXTRACT_SUBREG
6701            (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub)),
6702            dsub))>;
6703
6704def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))),
6705          (i32 (EXTRACT_SUBREG
6706            (v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)),
6707            ssub))>;
6708
6709def : Pat<(v2i64 (AArch64uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
6710          (v2i64 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub))>;
6711
6712def : Pat<(v4i32 (AArch64uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))),
6713          (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub))>;
6714
6715def : Pat<(v4i32 (AArch64uaddlv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))),
6716          (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$op), hsub))>;
6717
6718multiclass SIMDAcrossLaneLongReductionIntrinsic<string Opc, SDPatternOperator addlv> {
6719  def : Pat<(v4i32 (addlv (v8i8 V64:$Rn))),
6720            (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v8i8v") V64:$Rn), hsub))>;
6721
6722  def : Pat<(v4i32 (addlv (v4i16 V64:$Rn))),
6723            (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v4i16v") V64:$Rn), ssub))>;
6724
6725  def : Pat<(v4i32 (addlv (v16i8 V128:$Rn))),
6726            (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v16i8v") V128:$Rn), hsub))>;
6727
6728  def : Pat<(v4i32 (addlv (v8i16 V128:$Rn))),
6729            (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v8i16v") V128:$Rn), ssub))>;
6730
6731  def : Pat<(v2i64 (addlv (v4i32 V128:$Rn))),
6732            (v2i64 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v4i32v") V128:$Rn), dsub))>;
6733}
6734
6735defm : SIMDAcrossLaneLongReductionIntrinsic<"UADDLV", AArch64uaddlv>;
6736defm : SIMDAcrossLaneLongReductionIntrinsic<"SADDLV", AArch64saddlv>;
6737
6738// Patterns for across-vector intrinsics, that have a node equivalent, that
6739// returns a vector (with only the low lane defined) instead of a scalar.
6740// In effect, opNode is the same as (scalar_to_vector (IntNode)).
6741multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
6742                                    SDPatternOperator opNode> {
6743// If a lane instruction caught the vector_extract around opNode, we can
6744// directly match the latter to the instruction.
6745def : Pat<(v8i8 (opNode V64:$Rn)),
6746          (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
6747           (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
6748def : Pat<(v16i8 (opNode V128:$Rn)),
6749          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6750           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
6751def : Pat<(v4i16 (opNode V64:$Rn)),
6752          (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
6753           (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
6754def : Pat<(v8i16 (opNode V128:$Rn)),
6755          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6756           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
6757def : Pat<(v4i32 (opNode V128:$Rn)),
6758          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6759           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
6760
6761
6762// If none did, fallback to the explicit patterns, consuming the vector_extract.
6763def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
6764            (i64 0)), (i64 0))),
6765          (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
6766            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
6767            bsub), ssub)>;
6768def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
6769          (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6770            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
6771            bsub), ssub)>;
6772def : Pat<(i32 (vector_extract (insert_subvector undef,
6773            (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))),
6774          (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
6775            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
6776            hsub), ssub)>;
6777def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
6778          (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6779            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
6780            hsub), ssub)>;
6781def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
6782          (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6783            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
6784            ssub), ssub)>;
6785
6786}
6787
6788multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
6789                                          SDPatternOperator opNode>
6790    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
6791// If there is a sign extension after this intrinsic, consume it as smov already
6792// performed it
6793def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
6794            (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)),
6795          (i32 (SMOVvi8to32
6796            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6797              (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
6798            (i64 0)))>;
6799def : Pat<(i32 (sext_inreg (i32 (vector_extract
6800            (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
6801          (i32 (SMOVvi8to32
6802            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6803             (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
6804            (i64 0)))>;
6805def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
6806            (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)),
6807          (i32 (SMOVvi16to32
6808           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6809            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
6810           (i64 0)))>;
6811def : Pat<(i32 (sext_inreg (i32 (vector_extract
6812            (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
6813          (i32 (SMOVvi16to32
6814            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6815             (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
6816            (i64 0)))>;
6817}
6818
6819multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
6820                                            SDPatternOperator opNode>
6821    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
6822// If there is a masking operation keeping only what has been actually
6823// generated, consume it.
6824def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
6825            (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)),
6826      (i32 (EXTRACT_SUBREG
6827        (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6828          (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
6829        ssub))>;
6830def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
6831            maski8_or_more)),
6832        (i32 (EXTRACT_SUBREG
6833          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6834            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
6835          ssub))>;
6836def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
6837            (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)),
6838          (i32 (EXTRACT_SUBREG
6839            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6840              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
6841            ssub))>;
6842def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
6843            maski16_or_more)),
6844        (i32 (EXTRACT_SUBREG
6845          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6846            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
6847          ssub))>;
6848}
6849
6850// For vecreduce_add, used by GlobalISel not SDAG
6851def : Pat<(i8 (vecreduce_add (v8i8 V64:$Rn))),
6852          (i8 (ADDVv8i8v V64:$Rn))>;
6853def : Pat<(i8 (vecreduce_add (v16i8 V128:$Rn))),
6854          (i8 (ADDVv16i8v V128:$Rn))>;
6855def : Pat<(i16 (vecreduce_add (v4i16 V64:$Rn))),
6856          (i16 (ADDVv4i16v V64:$Rn))>;
6857def : Pat<(i16 (vecreduce_add (v8i16 V128:$Rn))),
6858          (i16 (ADDVv8i16v V128:$Rn))>;
6859def : Pat<(i32 (vecreduce_add (v2i32 V64:$Rn))),
6860          (i32 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub))>;
6861def : Pat<(i32 (vecreduce_add (v4i32 V128:$Rn))),
6862          (i32 (ADDVv4i32v V128:$Rn))>;
6863def : Pat<(i64 (vecreduce_add (v2i64 V128:$Rn))),
6864          (i64 (ADDPv2i64p V128:$Rn))>;
6865
6866defm : SIMDAcrossLanesSignedIntrinsic<"ADDV",  AArch64saddv>;
6867// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
6868def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
6869          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
6870
6871defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
6872// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
6873def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
6874          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
6875
6876defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
6877def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
6878          (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
6879
6880defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
6881def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
6882          (SMINPv2i32 V64:$Rn, V64:$Rn)>;
6883
6884defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
6885def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
6886          (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
6887
6888defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
6889def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
6890          (UMINPv2i32 V64:$Rn, V64:$Rn)>;
6891
6892// For vecreduce_{opc} used by GlobalISel, not SDAG at the moment
6893// because GlobalISel allows us to specify the return register to be a FPR
6894multiclass SIMDAcrossLanesVecReductionIntrinsic<string baseOpc,
6895                                               SDPatternOperator opNode> {
6896def : Pat<(i8 (opNode (v8i8 FPR64:$Rn))),
6897          (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) FPR64:$Rn)>;
6898
6899def : Pat<(i8 (opNode (v16i8 FPR128:$Rn))),
6900          (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) FPR128:$Rn)>;
6901
6902def : Pat<(i16 (opNode (v4i16 FPR64:$Rn))),
6903          (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) FPR64:$Rn)>;
6904
6905def : Pat<(i16 (opNode (v8i16 FPR128:$Rn))),
6906          (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) FPR128:$Rn)>;
6907
6908def : Pat<(i32 (opNode (v4i32 V128:$Rn))),
6909          (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn)>;
6910}
6911
6912// For v2i32 source type, the pairwise instruction can be used instead
6913defm : SIMDAcrossLanesVecReductionIntrinsic<"UMINV", vecreduce_umin>;
6914def : Pat<(i32 (vecreduce_umin (v2i32 V64:$Rn))),
6915          (i32 (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub))>;
6916
6917defm : SIMDAcrossLanesVecReductionIntrinsic<"UMAXV", vecreduce_umax>;
6918def : Pat<(i32 (vecreduce_umax (v2i32 V64:$Rn))),
6919          (i32 (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>;
6920
6921defm : SIMDAcrossLanesVecReductionIntrinsic<"SMINV", vecreduce_smin>;
6922def : Pat<(i32 (vecreduce_smin (v2i32 V64:$Rn))),
6923          (i32 (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub))>;
6924
6925defm : SIMDAcrossLanesVecReductionIntrinsic<"SMAXV", vecreduce_smax>;
6926def : Pat<(i32 (vecreduce_smax (v2i32 V64:$Rn))),
6927          (i32 (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>;
6928
6929multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
6930  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
6931        (i32 (SMOVvi16to32
6932          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6933            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
6934          (i64 0)))>;
6935def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
6936        (i32 (SMOVvi16to32
6937          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6938           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
6939          (i64 0)))>;
6940
6941def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
6942          (i32 (EXTRACT_SUBREG
6943           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6944            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
6945           ssub))>;
6946def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
6947        (i32 (EXTRACT_SUBREG
6948          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6949           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
6950          ssub))>;
6951
6952def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
6953        (i64 (EXTRACT_SUBREG
6954          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6955           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
6956          dsub))>;
6957}
6958
6959multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc,
6960                                                Intrinsic intOp> {
6961  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
6962        (i32 (EXTRACT_SUBREG
6963          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6964            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
6965          ssub))>;
6966def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
6967        (i32 (EXTRACT_SUBREG
6968          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6969            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
6970          ssub))>;
6971
6972def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
6973          (i32 (EXTRACT_SUBREG
6974            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6975              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
6976            ssub))>;
6977def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
6978        (i32 (EXTRACT_SUBREG
6979          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6980            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
6981          ssub))>;
6982
6983def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
6984        (i64 (EXTRACT_SUBREG
6985          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6986            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
6987          dsub))>;
6988}
6989
6990defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
6991defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
6992
6993// The vaddlv_s32 intrinsic gets mapped to SADDLP.
6994def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))),
6995          (i64 (EXTRACT_SUBREG
6996            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6997              (SADDLPv2i32_v1i64 V64:$Rn), dsub),
6998            dsub))>;
6999// The vaddlv_u32 intrinsic gets mapped to UADDLP.
7000def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))),
7001          (i64 (EXTRACT_SUBREG
7002            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7003              (UADDLPv2i32_v1i64 V64:$Rn), dsub),
7004            dsub))>;
7005
7006//------------------------------------------------------------------------------
7007// AdvSIMD modified immediate instructions
7008//------------------------------------------------------------------------------
7009
7010// AdvSIMD BIC
7011defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
7012// AdvSIMD ORR
7013defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>;
7014
7015def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
7016def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7017def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
7018def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7019
7020def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
7021def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7022def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
7023def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7024
7025def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
7026def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7027def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
7028def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7029
7030def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
7031def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7032def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
7033def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7034
7035// AdvSIMD FMOV
7036def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
7037                                              "fmov", ".2d",
7038                       [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7039def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64,  fpimm8,
7040                                              "fmov", ".2s",
7041                       [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7042def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
7043                                              "fmov", ".4s",
7044                       [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7045let Predicates = [HasNEON, HasFullFP16] in {
7046def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64,  fpimm8,
7047                                              "fmov", ".4h",
7048                       [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7049def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
7050                                              "fmov", ".8h",
7051                       [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7052} // Predicates = [HasNEON, HasFullFP16]
7053
7054// AdvSIMD MOVI
7055
7056// EDIT byte mask: scalar
7057let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7058def MOVID      : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
7059                    [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
7060// The movi_edit node has the immediate value already encoded, so we use
7061// a plain imm0_255 here.
7062def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
7063          (MOVID imm0_255:$shift)>;
7064
7065// EDIT byte mask: 2d
7066
7067// The movi_edit node has the immediate value already encoded, so we use
7068// a plain imm0_255 in the pattern
7069let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7070def MOVIv2d_ns   : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
7071                                                simdimmtype10,
7072                                                "movi", ".2d",
7073                   [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
7074
7075def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7076def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7077def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7078def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7079def : Pat<(v2f64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7080def : Pat<(v4f32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7081def : Pat<(v8f16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7082def : Pat<(v8bf16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7083
7084def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7085def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7086def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7087def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7088
7089// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
7090// extract is free and this gives better MachineCSE results.
7091def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7092def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7093def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7094def : Pat<(v8i8  immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7095def : Pat<(v1f64 immAllZerosV), (MOVID (i32 0))>;
7096def : Pat<(v2f32 immAllZerosV), (MOVID (i32 0))>;
7097def : Pat<(v4f16 immAllZerosV), (MOVID (i32 0))>;
7098def : Pat<(v4bf16 immAllZerosV), (MOVID (i32 0))>;
7099
7100def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7101def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7102def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7103def : Pat<(v8i8  immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7104
7105// EDIT per word & halfword: 2s, 4h, 4s, & 8h
7106let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7107defm MOVI      : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
7108
7109let Predicates = [HasNEON] in {
7110  // Using the MOVI to materialize fp constants.
7111  def : Pat<(f32 fpimm32SIMDModImmType4:$in),
7112            (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in),
7113                                       (i32 24)),
7114                            ssub)>;
7115}
7116
7117def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
7118def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7119def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
7120def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7121
7122def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
7123def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7124def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
7125def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7126
7127def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7128          (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
7129def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7130          (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
7131def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7132          (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
7133def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7134          (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
7135
7136let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
7137// EDIT per word: 2s & 4s with MSL shifter
7138def MOVIv2s_msl  : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
7139                      [(set (v2i32 V64:$Rd),
7140                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7141def MOVIv4s_msl  : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
7142                      [(set (v4i32 V128:$Rd),
7143                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7144
7145// Per byte: 8b & 16b
7146def MOVIv8b_ns   : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64,  imm0_255,
7147                                                 "movi", ".8b",
7148                       [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
7149
7150def MOVIv16b_ns  : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
7151                                                 "movi", ".16b",
7152                       [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
7153}
7154
7155// AdvSIMD MVNI
7156
7157// EDIT per word & halfword: 2s, 4h, 4s, & 8h
7158let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7159defm MVNI      : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
7160
7161def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
7162def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7163def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
7164def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7165
7166def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
7167def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7168def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
7169def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7170
7171def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7172          (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
7173def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7174          (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
7175def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7176          (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
7177def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7178          (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
7179
7180// EDIT per word: 2s & 4s with MSL shifter
7181let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
7182def MVNIv2s_msl   : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
7183                      [(set (v2i32 V64:$Rd),
7184                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7185def MVNIv4s_msl   : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
7186                      [(set (v4i32 V128:$Rd),
7187                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7188}
7189
7190//----------------------------------------------------------------------------
7191// AdvSIMD indexed element
7192//----------------------------------------------------------------------------
7193
7194let hasSideEffects = 0 in {
7195  defm FMLA  : SIMDFPIndexedTied<0, 0b0001, "fmla">;
7196  defm FMLS  : SIMDFPIndexedTied<0, 0b0101, "fmls">;
7197}
7198
7199// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
7200// instruction expects the addend first, while the intrinsic expects it last.
7201
7202// On the other hand, there are quite a few valid combinatorial options due to
7203// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
7204defm : SIMDFPIndexedTiedPatterns<"FMLA",
7205           TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>;
7206defm : SIMDFPIndexedTiedPatterns<"FMLA",
7207           TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>;
7208
7209defm : SIMDFPIndexedTiedPatterns<"FMLS",
7210           TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
7211defm : SIMDFPIndexedTiedPatterns<"FMLS",
7212           TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
7213defm : SIMDFPIndexedTiedPatterns<"FMLS",
7214           TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
7215defm : SIMDFPIndexedTiedPatterns<"FMLS",
7216           TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
7217
7218multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
7219  // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7220  // and DUP scalar.
7221  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
7222                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
7223                                           VectorIndexS:$idx))),
7224            (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
7225  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
7226                           (v2f32 (AArch64duplane32
7227                                      (v4f32 (insert_subvector undef,
7228                                                 (v2f32 (fneg V64:$Rm)),
7229                                                 (i64 0))),
7230                                      VectorIndexS:$idx)))),
7231            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
7232                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
7233                               VectorIndexS:$idx)>;
7234  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
7235                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
7236            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
7237                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
7238
7239  // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7240  // and DUP scalar.
7241  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
7242                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
7243                                           VectorIndexS:$idx))),
7244            (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
7245                               VectorIndexS:$idx)>;
7246  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
7247                           (v4f32 (AArch64duplane32
7248                                      (v4f32 (insert_subvector undef,
7249                                                 (v2f32 (fneg V64:$Rm)),
7250                                                 (i64 0))),
7251                                      VectorIndexS:$idx)))),
7252            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
7253                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
7254                               VectorIndexS:$idx)>;
7255  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
7256                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
7257            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
7258                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
7259
7260  // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
7261  // (DUPLANE from 64-bit would be trivial).
7262  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
7263                           (AArch64duplane64 (v2f64 (fneg V128:$Rm)),
7264                                           VectorIndexD:$idx))),
7265            (FMLSv2i64_indexed
7266                V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
7267  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
7268                           (AArch64dup (f64 (fneg FPR64Op:$Rm))))),
7269            (FMLSv2i64_indexed V128:$Rd, V128:$Rn,
7270                (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
7271
7272  // 2 variants for 32-bit scalar version: extract from .2s or from .4s
7273  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
7274                         (vector_extract (v4f32 (fneg V128:$Rm)),
7275                                         VectorIndexS:$idx))),
7276            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
7277                V128:$Rm, VectorIndexS:$idx)>;
7278  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
7279                         (vector_extract (v4f32 (insert_subvector undef,
7280                                                    (v2f32 (fneg V64:$Rm)),
7281                                                    (i64 0))),
7282                                         VectorIndexS:$idx))),
7283            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
7284                (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
7285
7286  // 1 variant for 64-bit scalar version: extract from .1d or from .2d
7287  def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
7288                         (vector_extract (v2f64 (fneg V128:$Rm)),
7289                                         VectorIndexS:$idx))),
7290            (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn,
7291                V128:$Rm, VectorIndexS:$idx)>;
7292}
7293
7294defm : FMLSIndexedAfterNegPatterns<
7295           TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
7296defm : FMLSIndexedAfterNegPatterns<
7297           TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >;
7298
7299defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
7300defm FMUL  : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>;
7301
7302def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
7303          (FMULv2i32_indexed V64:$Rn,
7304            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
7305            (i64 0))>;
7306def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
7307          (FMULv4i32_indexed V128:$Rn,
7308            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
7309            (i64 0))>;
7310def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
7311          (FMULv2i64_indexed V128:$Rn,
7312            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
7313            (i64 0))>;
7314
7315defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
7316defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
7317
7318defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane,
7319                                     int_aarch64_neon_sqdmulh_laneq>;
7320defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane,
7321                                      int_aarch64_neon_sqrdmulh_laneq>;
7322
7323// Generated by MachineCombine
7324defm MLA   : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>;
7325defm MLS   : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>;
7326
7327defm MUL   : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
7328defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
7329    TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
7330defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
7331    TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
7332defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>;
7333defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
7334                                           int_aarch64_neon_sqadd>;
7335defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
7336                                           int_aarch64_neon_sqsub>;
7337defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
7338                                          int_aarch64_neon_sqrdmlah>;
7339defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
7340                                          int_aarch64_neon_sqrdmlsh>;
7341defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
7342defm UMLAL   : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
7343    TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
7344defm UMLSL   : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
7345    TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
7346defm UMULL   : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>;
7347
7348// A scalar sqdmull with the second operand being a vector lane can be
7349// handled directly with the indexed instruction encoding.
7350def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
7351                                          (vector_extract (v4i32 V128:$Vm),
7352                                                           VectorIndexS:$idx)),
7353          (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
7354
7355//----------------------------------------------------------------------------
7356// AdvSIMD scalar shift instructions
7357//----------------------------------------------------------------------------
7358defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
7359defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
7360defm SCVTF  : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
7361defm UCVTF  : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
7362// Codegen patterns for the above. We don't put these directly on the
7363// instructions because TableGen's type inference can't handle the truth.
7364// Having the same base pattern for fp <--> int totally freaks it out.
7365def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
7366          (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
7367def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
7368          (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
7369def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
7370          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
7371def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
7372          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
7373def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
7374                                            vecshiftR64:$imm)),
7375          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
7376def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
7377                                            vecshiftR64:$imm)),
7378          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
7379def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
7380          (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
7381def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
7382          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7383def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
7384                                            vecshiftR64:$imm)),
7385          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7386def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
7387          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7388def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
7389                                            vecshiftR64:$imm)),
7390          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7391def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
7392          (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
7393
7394// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
7395
7396def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
7397          (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
7398def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
7399          (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
7400def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
7401          (SCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>;
7402def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
7403            (and FPR32:$Rn, (i32 65535)),
7404            vecshiftR16:$imm)),
7405          (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
7406def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
7407          (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
7408def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
7409          (UCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>;
7410def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
7411          (i32 (INSERT_SUBREG
7412            (i32 (IMPLICIT_DEF)),
7413            (FCVTZSh FPR16:$Rn, vecshiftR32:$imm),
7414            hsub))>;
7415def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
7416          (i64 (INSERT_SUBREG
7417            (i64 (IMPLICIT_DEF)),
7418            (FCVTZSh FPR16:$Rn, vecshiftR64:$imm),
7419            hsub))>;
7420def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)),
7421          (i32 (INSERT_SUBREG
7422            (i32 (IMPLICIT_DEF)),
7423            (FCVTZUh FPR16:$Rn, vecshiftR32:$imm),
7424            hsub))>;
7425def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
7426          (i64 (INSERT_SUBREG
7427            (i64 (IMPLICIT_DEF)),
7428            (FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
7429            hsub))>;
7430def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
7431          (i32 (INSERT_SUBREG
7432            (i32 (IMPLICIT_DEF)),
7433            (FACGE16 FPR16:$Rn, FPR16:$Rm),
7434            hsub))>;
7435def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
7436          (i32 (INSERT_SUBREG
7437            (i32 (IMPLICIT_DEF)),
7438            (FACGT16 FPR16:$Rn, FPR16:$Rm),
7439            hsub))>;
7440
7441defm SHL      : SIMDScalarLShiftD<   0, 0b01010, "shl", AArch64vshl>;
7442defm SLI      : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
7443defm SQRSHRN  : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
7444                                     int_aarch64_neon_sqrshrn>;
7445defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
7446                                     int_aarch64_neon_sqrshrun>;
7447defm SQSHLU   : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
7448defm SQSHL    : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
7449defm SQSHRN   : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
7450                                     int_aarch64_neon_sqshrn>;
7451defm SQSHRUN  : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
7452                                     int_aarch64_neon_sqshrun>;
7453defm SRI      : SIMDScalarRShiftDTied<   1, 0b01000, "sri">;
7454defm SRSHR    : SIMDScalarRShiftD<   0, 0b00100, "srshr", AArch64srshri>;
7455defm SRSRA    : SIMDScalarRShiftDTied<   0, 0b00110, "srsra",
7456    TriOpFrag<(add node:$LHS,
7457                   (AArch64srshri node:$MHS, node:$RHS))>>;
7458defm SSHR     : SIMDScalarRShiftD<   0, 0b00000, "sshr", AArch64vashr>;
7459defm SSRA     : SIMDScalarRShiftDTied<   0, 0b00010, "ssra",
7460    TriOpFrag<(add_and_or_is_add node:$LHS,
7461                   (AArch64vashr node:$MHS, node:$RHS))>>;
7462defm UQRSHRN  : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
7463                                     int_aarch64_neon_uqrshrn>;
7464defm UQSHL    : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
7465defm UQSHRN   : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
7466                                     int_aarch64_neon_uqshrn>;
7467defm URSHR    : SIMDScalarRShiftD<   1, 0b00100, "urshr", AArch64urshri>;
7468defm URSRA    : SIMDScalarRShiftDTied<   1, 0b00110, "ursra",
7469    TriOpFrag<(add node:$LHS,
7470                   (AArch64urshri node:$MHS, node:$RHS))>>;
7471defm USHR     : SIMDScalarRShiftD<   1, 0b00000, "ushr", AArch64vlshr>;
7472defm USRA     : SIMDScalarRShiftDTied<   1, 0b00010, "usra",
7473    TriOpFrag<(add_and_or_is_add node:$LHS,
7474                   (AArch64vlshr node:$MHS, node:$RHS))>>;
7475
7476//----------------------------------------------------------------------------
7477// AdvSIMD vector shift instructions
7478//----------------------------------------------------------------------------
7479defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
7480defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
7481defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
7482                                   int_aarch64_neon_vcvtfxs2fp>;
7483defm RSHRN   : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", AArch64rshrn>;
7484defm SHL     : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
7485
7486// X << 1 ==> X + X
7487class SHLToADDPat<ValueType ty, RegisterClass regtype>
7488  : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))),
7489            (!cast<Instruction>("ADD"#ty) regtype:$Rn, regtype:$Rn)>;
7490
7491def : SHLToADDPat<v16i8, FPR128>;
7492def : SHLToADDPat<v8i16, FPR128>;
7493def : SHLToADDPat<v4i32, FPR128>;
7494def : SHLToADDPat<v2i64, FPR128>;
7495def : SHLToADDPat<v8i8,  FPR64>;
7496def : SHLToADDPat<v4i16, FPR64>;
7497def : SHLToADDPat<v2i32, FPR64>;
7498
7499defm SHRN    : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
7500                          BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
7501defm SLI     : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
7502def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
7503                                      (i32 vecshiftL64:$imm))),
7504          (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
7505defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
7506                                         int_aarch64_neon_sqrshrn>;
7507defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
7508                                         int_aarch64_neon_sqrshrun>;
7509defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
7510defm SQSHL  : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
7511defm SQSHRN  : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
7512                                         int_aarch64_neon_sqshrn>;
7513defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
7514                                         int_aarch64_neon_sqshrun>;
7515defm SRI     : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
7516def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
7517                                      (i32 vecshiftR64:$imm))),
7518          (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
7519defm SRSHR   : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
7520defm SRSRA   : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
7521                 TriOpFrag<(add node:$LHS,
7522                                (AArch64srshri node:$MHS, node:$RHS))> >;
7523defm SSHLL   : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
7524                BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;
7525
7526defm SSHR    : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
7527defm SSRA    : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
7528                TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
7529defm UCVTF   : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
7530                        int_aarch64_neon_vcvtfxu2fp>;
7531defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
7532                                         int_aarch64_neon_uqrshrn>;
7533defm UQSHL   : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
7534defm UQSHRN  : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
7535                                         int_aarch64_neon_uqshrn>;
7536defm URSHR   : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
7537defm URSRA   : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
7538                TriOpFrag<(add node:$LHS,
7539                               (AArch64urshri node:$MHS, node:$RHS))> >;
7540defm USHLL   : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
7541                BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
7542defm USHR    : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
7543defm USRA    : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
7544                TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
7545
7546// RADDHN patterns for when RSHRN shifts by half the size of the vector element
7547def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))),
7548          (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
7549def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))),
7550          (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
7551let AddedComplexity = 5 in
7552def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))),
7553          (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
7554def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),
7555          (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
7556def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))),
7557          (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
7558def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))),
7559          (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
7560
7561// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element
7562def : Pat<(v16i8 (concat_vectors
7563                 (v8i8 V64:$Vd),
7564                 (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))))),
7565          (RADDHNv8i16_v16i8
7566                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7567                 (v8i16 (MOVIv2d_ns (i32 0))))>;
7568def : Pat<(v8i16 (concat_vectors
7569                 (v4i16 V64:$Vd),
7570                 (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))))),
7571          (RADDHNv4i32_v8i16
7572                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7573                 (v4i32 (MOVIv2d_ns (i32 0))))>;
7574let AddedComplexity = 5 in
7575def : Pat<(v4i32 (concat_vectors
7576                 (v2i32 V64:$Vd),
7577                 (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))))),
7578          (RADDHNv2i64_v4i32
7579                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7580                 (v2i64 (MOVIv2d_ns (i32 0))))>;
7581def : Pat<(v16i8 (concat_vectors
7582                 (v8i8 V64:$Vd),
7583                 (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))),
7584          (RADDHNv8i16_v16i8
7585                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7586                 (v8i16 (MOVIv2d_ns (i32 0))))>;
7587def : Pat<(v8i16 (concat_vectors
7588                 (v4i16 V64:$Vd),
7589                 (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))),
7590          (RADDHNv4i32_v8i16
7591                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7592                 (v4i32 (MOVIv2d_ns (i32 0))))>;
7593def : Pat<(v4i32 (concat_vectors
7594                 (v2i32 V64:$Vd),
7595                 (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))),
7596          (RADDHNv2i64_v4i32
7597                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7598                 (v2i64 (MOVIv2d_ns (i32 0))))>;
7599
7600// SHRN patterns for when a logical right shift was used instead of arithmetic
7601// (the immediate guarantees no sign bits actually end up in the result so it
7602// doesn't matter).
7603def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
7604          (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
7605def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
7606          (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
7607def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
7608          (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
7609
7610def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
7611                                 (trunc (AArch64vlshr (v8i16 V128:$Rn),
7612                                                    vecshiftR16Narrow:$imm)))),
7613          (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
7614                           V128:$Rn, vecshiftR16Narrow:$imm)>;
7615def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
7616                                 (trunc (AArch64vlshr (v4i32 V128:$Rn),
7617                                                    vecshiftR32Narrow:$imm)))),
7618          (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
7619                           V128:$Rn, vecshiftR32Narrow:$imm)>;
7620def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
7621                                 (trunc (AArch64vlshr (v2i64 V128:$Rn),
7622                                                    vecshiftR64Narrow:$imm)))),
7623          (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
7624                           V128:$Rn, vecshiftR32Narrow:$imm)>;
7625
7626// Vector sign and zero extensions are implemented with SSHLL and USSHLL.
7627// Anyexts are implemented as zexts.
7628def : Pat<(v8i16 (sext   (v8i8 V64:$Rn))),  (SSHLLv8i8_shift  V64:$Rn, (i32 0))>;
7629def : Pat<(v8i16 (zext   (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
7630def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
7631def : Pat<(v4i32 (sext   (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>;
7632def : Pat<(v4i32 (zext   (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
7633def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
7634def : Pat<(v2i64 (sext   (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
7635def : Pat<(v2i64 (zext   (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
7636def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
7637// Also match an extend from the upper half of a 128 bit source register.
7638def : Pat<(v8i16 (anyext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
7639          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
7640def : Pat<(v8i16 (zext   (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
7641          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
7642def : Pat<(v8i16 (sext   (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
7643          (SSHLLv16i8_shift V128:$Rn, (i32 0))>;
7644def : Pat<(v4i32 (anyext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
7645          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
7646def : Pat<(v4i32 (zext   (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
7647          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
7648def : Pat<(v4i32 (sext   (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
7649          (SSHLLv8i16_shift V128:$Rn, (i32 0))>;
7650def : Pat<(v2i64 (anyext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
7651          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
7652def : Pat<(v2i64 (zext   (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
7653          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
7654def : Pat<(v2i64 (sext   (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
7655          (SSHLLv4i32_shift V128:$Rn, (i32 0))>;
7656
7657// Vector shift sxtl aliases
7658def : InstAlias<"sxtl.8h $dst, $src1",
7659                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7660def : InstAlias<"sxtl $dst.8h, $src1.8b",
7661                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7662def : InstAlias<"sxtl.4s $dst, $src1",
7663                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7664def : InstAlias<"sxtl $dst.4s, $src1.4h",
7665                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7666def : InstAlias<"sxtl.2d $dst, $src1",
7667                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7668def : InstAlias<"sxtl $dst.2d, $src1.2s",
7669                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7670
7671// Vector shift sxtl2 aliases
7672def : InstAlias<"sxtl2.8h $dst, $src1",
7673                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7674def : InstAlias<"sxtl2 $dst.8h, $src1.16b",
7675                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7676def : InstAlias<"sxtl2.4s $dst, $src1",
7677                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7678def : InstAlias<"sxtl2 $dst.4s, $src1.8h",
7679                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7680def : InstAlias<"sxtl2.2d $dst, $src1",
7681                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7682def : InstAlias<"sxtl2 $dst.2d, $src1.4s",
7683                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7684
7685// Vector shift uxtl aliases
7686def : InstAlias<"uxtl.8h $dst, $src1",
7687                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7688def : InstAlias<"uxtl $dst.8h, $src1.8b",
7689                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7690def : InstAlias<"uxtl.4s $dst, $src1",
7691                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7692def : InstAlias<"uxtl $dst.4s, $src1.4h",
7693                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7694def : InstAlias<"uxtl.2d $dst, $src1",
7695                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7696def : InstAlias<"uxtl $dst.2d, $src1.2s",
7697                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7698
7699// Vector shift uxtl2 aliases
7700def : InstAlias<"uxtl2.8h $dst, $src1",
7701                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7702def : InstAlias<"uxtl2 $dst.8h, $src1.16b",
7703                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7704def : InstAlias<"uxtl2.4s $dst, $src1",
7705                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7706def : InstAlias<"uxtl2 $dst.4s, $src1.8h",
7707                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7708def : InstAlias<"uxtl2.2d $dst, $src1",
7709                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7710def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
7711                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7712
7713// If an integer is about to be converted to a floating point value,
7714// just load it on the floating point unit.
7715// These patterns are more complex because floating point loads do not
7716// support sign extension.
7717// The sign extension has to be explicitly added and is only supported for
7718// one step: byte-to-half, half-to-word, word-to-doubleword.
7719// SCVTF GPR -> FPR is 9 cycles.
7720// SCVTF FPR -> FPR is 4 cyclces.
7721// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
7722// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
7723// and still being faster.
7724// However, this is not good for code size.
7725// 8-bits -> float. 2 sizes step-up.
7726class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
7727  : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))),
7728        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
7729                            (SSHLLv4i16_shift
7730                              (f64
7731                                (EXTRACT_SUBREG
7732                                  (SSHLLv8i8_shift
7733                                    (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7734                                        INST,
7735                                        bsub),
7736                                    0),
7737                                  dsub)),
7738                               0),
7739                             ssub)))>,
7740    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7741
7742def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
7743                          (LDRBroW  GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
7744def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext),
7745                          (LDRBroX  GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>;
7746def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset),
7747                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>;
7748def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset),
7749                          (LDURBi GPR64sp:$Rn, simm9:$offset)>;
7750
7751// 16-bits -> float. 1 size step-up.
7752class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
7753  : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))),
7754        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
7755                            (SSHLLv4i16_shift
7756                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7757                                  INST,
7758                                  hsub),
7759                                0),
7760                            ssub)))>,
7761    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7762
7763def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
7764                           (LDRHroW   GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
7765def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
7766                           (LDRHroX   GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
7767def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
7768                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
7769def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
7770                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
7771
7772// 32-bits to 32-bits are handled in target specific dag combine:
7773// performIntToFpCombine.
7774// 64-bits integer to 32-bits floating point, not possible with
7775// SCVTF on floating point registers (both source and destination
7776// must have the same size).
7777
7778// Here are the patterns for 8, 16, 32, and 64-bits to double.
7779// 8-bits -> double. 3 size step-up: give up.
7780// 16-bits -> double. 2 size step.
7781class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
7782  : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))),
7783           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
7784                              (SSHLLv2i32_shift
7785                                 (f64
7786                                  (EXTRACT_SUBREG
7787                                    (SSHLLv4i16_shift
7788                                      (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7789                                        INST,
7790                                        hsub),
7791                                     0),
7792                                   dsub)),
7793                               0),
7794                             dsub)))>,
7795    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7796
7797def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
7798                           (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
7799def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
7800                           (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
7801def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
7802                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
7803def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
7804                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
7805// 32-bits -> double. 1 size step-up.
7806class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
7807  : Pat <(f64 (sint_to_fp (i32 (load addrmode)))),
7808           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
7809                              (SSHLLv2i32_shift
7810                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7811                                  INST,
7812                                  ssub),
7813                               0),
7814                             dsub)))>,
7815    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7816
7817def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
7818                           (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
7819def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext),
7820                           (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>;
7821def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset),
7822                           (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
7823def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset),
7824                           (LDURSi GPR64sp:$Rn, simm9:$offset)>;
7825
7826// 64-bits -> double are handled in target specific dag combine:
7827// performIntToFpCombine.
7828
7829
7830//----------------------------------------------------------------------------
7831// AdvSIMD Load-Store Structure
7832//----------------------------------------------------------------------------
7833defm LD1 : SIMDLd1Multiple<"ld1">;
7834defm LD2 : SIMDLd2Multiple<"ld2">;
7835defm LD3 : SIMDLd3Multiple<"ld3">;
7836defm LD4 : SIMDLd4Multiple<"ld4">;
7837
7838defm ST1 : SIMDSt1Multiple<"st1">;
7839defm ST2 : SIMDSt2Multiple<"st2">;
7840defm ST3 : SIMDSt3Multiple<"st3">;
7841defm ST4 : SIMDSt4Multiple<"st4">;
7842
7843class Ld1Pat<ValueType ty, Instruction INST>
7844  : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>;
7845
7846def : Ld1Pat<v16i8, LD1Onev16b>;
7847def : Ld1Pat<v8i16, LD1Onev8h>;
7848def : Ld1Pat<v4i32, LD1Onev4s>;
7849def : Ld1Pat<v2i64, LD1Onev2d>;
7850def : Ld1Pat<v8i8,  LD1Onev8b>;
7851def : Ld1Pat<v4i16, LD1Onev4h>;
7852def : Ld1Pat<v2i32, LD1Onev2s>;
7853def : Ld1Pat<v1i64, LD1Onev1d>;
7854
7855class St1Pat<ValueType ty, Instruction INST>
7856  : Pat<(store ty:$Vt, GPR64sp:$Rn),
7857        (INST ty:$Vt, GPR64sp:$Rn)>;
7858
7859def : St1Pat<v16i8, ST1Onev16b>;
7860def : St1Pat<v8i16, ST1Onev8h>;
7861def : St1Pat<v4i32, ST1Onev4s>;
7862def : St1Pat<v2i64, ST1Onev2d>;
7863def : St1Pat<v8i8,  ST1Onev8b>;
7864def : St1Pat<v4i16, ST1Onev4h>;
7865def : St1Pat<v2i32, ST1Onev2s>;
7866def : St1Pat<v1i64, ST1Onev1d>;
7867
7868//---
7869// Single-element
7870//---
7871
7872defm LD1R          : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
7873defm LD2R          : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
7874defm LD3R          : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
7875defm LD4R          : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
7876let mayLoad = 1, hasSideEffects = 0 in {
7877defm LD1 : SIMDLdSingleBTied<0, 0b000,       "ld1", VecListOneb,   GPR64pi1>;
7878defm LD1 : SIMDLdSingleHTied<0, 0b010, 0,    "ld1", VecListOneh,   GPR64pi2>;
7879defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes,   GPR64pi4>;
7880defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned,   GPR64pi8>;
7881defm LD2 : SIMDLdSingleBTied<1, 0b000,       "ld2", VecListTwob,   GPR64pi2>;
7882defm LD2 : SIMDLdSingleHTied<1, 0b010, 0,    "ld2", VecListTwoh,   GPR64pi4>;
7883defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos,   GPR64pi8>;
7884defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod,   GPR64pi16>;
7885defm LD3 : SIMDLdSingleBTied<0, 0b001,       "ld3", VecListThreeb, GPR64pi3>;
7886defm LD3 : SIMDLdSingleHTied<0, 0b011, 0,    "ld3", VecListThreeh, GPR64pi6>;
7887defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>;
7888defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>;
7889defm LD4 : SIMDLdSingleBTied<1, 0b001,       "ld4", VecListFourb,  GPR64pi4>;
7890defm LD4 : SIMDLdSingleHTied<1, 0b011, 0,    "ld4", VecListFourh,  GPR64pi8>;
7891defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours,  GPR64pi16>;
7892defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd,  GPR64pi32>;
7893}
7894
7895def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
7896          (LD1Rv8b GPR64sp:$Rn)>;
7897def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
7898          (LD1Rv16b GPR64sp:$Rn)>;
7899def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
7900          (LD1Rv4h GPR64sp:$Rn)>;
7901def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
7902          (LD1Rv8h GPR64sp:$Rn)>;
7903def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
7904          (LD1Rv2s GPR64sp:$Rn)>;
7905def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
7906          (LD1Rv4s GPR64sp:$Rn)>;
7907def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
7908          (LD1Rv2d GPR64sp:$Rn)>;
7909def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
7910          (LD1Rv1d GPR64sp:$Rn)>;
7911
7912def : Pat<(v8i8 (AArch64duplane8 (v16i8 (insert_subvector undef, (v8i8 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
7913          (LD1Rv8b GPR64sp:$Rn)>;
7914def : Pat<(v16i8 (AArch64duplane8 (v16i8 (load GPR64sp:$Rn)), (i64 0))),
7915          (LD1Rv16b GPR64sp:$Rn)>;
7916def : Pat<(v4i16 (AArch64duplane16 (v8i16 (insert_subvector undef, (v4i16 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
7917          (LD1Rv4h GPR64sp:$Rn)>;
7918def : Pat<(v8i16 (AArch64duplane16 (v8i16 (load GPR64sp:$Rn)), (i64 0))),
7919          (LD1Rv8h GPR64sp:$Rn)>;
7920def : Pat<(v2i32 (AArch64duplane32 (v4i32 (insert_subvector undef, (v2i32 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
7921          (LD1Rv2s GPR64sp:$Rn)>;
7922def : Pat<(v4i32 (AArch64duplane32 (v4i32 (load GPR64sp:$Rn)), (i64 0))),
7923          (LD1Rv4s GPR64sp:$Rn)>;
7924def : Pat<(v2i64 (AArch64duplane64 (v2i64 (load GPR64sp:$Rn)), (i64 0))),
7925          (LD1Rv2d GPR64sp:$Rn)>;
7926
7927// Grab the floating point version too
7928def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
7929          (LD1Rv2s GPR64sp:$Rn)>;
7930def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
7931          (LD1Rv4s GPR64sp:$Rn)>;
7932def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
7933          (LD1Rv2d GPR64sp:$Rn)>;
7934def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
7935          (LD1Rv1d GPR64sp:$Rn)>;
7936def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
7937          (LD1Rv4h GPR64sp:$Rn)>;
7938def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
7939          (LD1Rv8h GPR64sp:$Rn)>;
7940def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
7941          (LD1Rv4h GPR64sp:$Rn)>;
7942def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
7943          (LD1Rv8h GPR64sp:$Rn)>;
7944
7945class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
7946                    ValueType VTy, ValueType STy, Instruction LD1>
7947  : Pat<(vector_insert (VTy VecListOne128:$Rd),
7948           (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
7949        (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
7950
7951def : Ld1Lane128Pat<extloadi8,  VectorIndexB, v16i8, i32, LD1i8>;
7952def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
7953def : Ld1Lane128Pat<load,       VectorIndexS, v4i32, i32, LD1i32>;
7954def : Ld1Lane128Pat<load,       VectorIndexS, v4f32, f32, LD1i32>;
7955def : Ld1Lane128Pat<load,       VectorIndexD, v2i64, i64, LD1i64>;
7956def : Ld1Lane128Pat<load,       VectorIndexD, v2f64, f64, LD1i64>;
7957def : Ld1Lane128Pat<load,       VectorIndexH, v8f16, f16, LD1i16>;
7958def : Ld1Lane128Pat<load,       VectorIndexH, v8bf16, bf16, LD1i16>;
7959
7960// Generate LD1 for extload if memory type does not match the
7961// destination type, for example:
7962//
7963//   (v4i32 (insert_vector_elt (load anyext from i8) idx))
7964//
7965// In this case, the index must be adjusted to match LD1 type.
7966//
7967class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand
7968                         VecIndex, ValueType VTy, ValueType STy,
7969                         Instruction LD1, SDNodeXForm IdxOp>
7970  : Pat<(vector_insert (VTy VecListOne128:$Rd),
7971                       (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
7972        (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>;
7973
7974class Ld1Lane64IdxOpPat<SDPatternOperator scalar_load, Operand VecIndex,
7975                        ValueType VTy, ValueType STy, Instruction LD1,
7976                        SDNodeXForm IdxOp>
7977  : Pat<(vector_insert (VTy VecListOne64:$Rd),
7978                       (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
7979        (EXTRACT_SUBREG
7980            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
7981                (IdxOp VecIndex:$idx), GPR64sp:$Rn),
7982            dsub)>;
7983
7984def VectorIndexStoH : SDNodeXForm<imm, [{
7985  return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
7986}]>;
7987def VectorIndexStoB : SDNodeXForm<imm, [{
7988  return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64);
7989}]>;
7990def VectorIndexHtoB : SDNodeXForm<imm, [{
7991  return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
7992}]>;
7993
7994def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>;
7995def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>;
7996def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>;
7997
7998def : Ld1Lane64IdxOpPat<extloadi16, VectorIndexS, v2i32, i32, LD1i16, VectorIndexStoH>;
7999def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexS, v2i32, i32, LD1i8, VectorIndexStoB>;
8000def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexHtoB>;
8001
8002// Same as above, but the first element is populated using
8003// scalar_to_vector + insert_subvector instead of insert_vector_elt.
8004let Predicates = [IsNeonAvailable] in {
8005  class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
8006                          SDPatternOperator ExtLoad, Instruction LD1>
8007    : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
8008            (ResultTy (EXTRACT_SUBREG
8009              (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
8010
8011  def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>;
8012  def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>;
8013  def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>;
8014}
8015class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
8016                   ValueType VTy, ValueType STy, Instruction LD1>
8017  : Pat<(vector_insert (VTy VecListOne64:$Rd),
8018           (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
8019        (EXTRACT_SUBREG
8020            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
8021                          VecIndex:$idx, GPR64sp:$Rn),
8022            dsub)>;
8023
8024def : Ld1Lane64Pat<extloadi8,  VectorIndexB, v8i8,  i32, LD1i8>;
8025def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
8026def : Ld1Lane64Pat<load,       VectorIndexS, v2i32, i32, LD1i32>;
8027def : Ld1Lane64Pat<load,       VectorIndexS, v2f32, f32, LD1i32>;
8028def : Ld1Lane64Pat<load,       VectorIndexH, v4f16, f16, LD1i16>;
8029def : Ld1Lane64Pat<load,       VectorIndexH, v4bf16, bf16, LD1i16>;
8030
8031
8032defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
8033defm LD2 : SIMDLdSt2SingleAliases<"ld2">;
8034defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
8035defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
8036
8037// Stores
8038defm ST1 : SIMDStSingleB<0, 0b000,       "st1", VecListOneb, GPR64pi1>;
8039defm ST1 : SIMDStSingleH<0, 0b010, 0,    "st1", VecListOneh, GPR64pi2>;
8040defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
8041defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
8042
8043let AddedComplexity = 19 in
8044class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
8045                    ValueType VTy, ValueType STy, Instruction ST1>
8046  : Pat<(scalar_store
8047             (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
8048             GPR64sp:$Rn),
8049        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>;
8050
8051def : St1Lane128Pat<truncstorei8,  VectorIndexB, v16i8, i32, ST1i8>;
8052def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
8053def : St1Lane128Pat<store,         VectorIndexS, v4i32, i32, ST1i32>;
8054def : St1Lane128Pat<store,         VectorIndexS, v4f32, f32, ST1i32>;
8055def : St1Lane128Pat<store,         VectorIndexD, v2i64, i64, ST1i64>;
8056def : St1Lane128Pat<store,         VectorIndexD, v2f64, f64, ST1i64>;
8057def : St1Lane128Pat<store,         VectorIndexH, v8f16, f16, ST1i16>;
8058def : St1Lane128Pat<store,         VectorIndexH, v8bf16, bf16, ST1i16>;
8059
8060let AddedComplexity = 19 in
8061class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
8062                   ValueType VTy, ValueType STy, Instruction ST1>
8063  : Pat<(scalar_store
8064             (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
8065             GPR64sp:$Rn),
8066        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
8067             VecIndex:$idx, GPR64sp:$Rn)>;
8068
8069def : St1Lane64Pat<truncstorei8,  VectorIndexB, v8i8, i32, ST1i8>;
8070def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
8071def : St1Lane64Pat<store,         VectorIndexS, v2i32, i32, ST1i32>;
8072def : St1Lane64Pat<store,         VectorIndexS, v2f32, f32, ST1i32>;
8073def : St1Lane64Pat<store,         VectorIndexH, v4f16, f16, ST1i16>;
8074def : St1Lane64Pat<store,         VectorIndexH, v4bf16, bf16, ST1i16>;
8075
8076multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
8077                             ValueType VTy, ValueType STy, Instruction ST1,
8078                             int offset> {
8079  def : Pat<(scalar_store
8080              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
8081              GPR64sp:$Rn, offset),
8082        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
8083             VecIndex:$idx, GPR64sp:$Rn, XZR)>;
8084
8085  def : Pat<(scalar_store
8086              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
8087              GPR64sp:$Rn, GPR64:$Rm),
8088        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
8089             VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
8090}
8091
8092defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>;
8093defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST,
8094                        2>;
8095defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
8096defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
8097defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
8098defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
8099defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>;
8100defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>;
8101
8102multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
8103                             ValueType VTy, ValueType STy, Instruction ST1,
8104                             int offset> {
8105  def : Pat<(scalar_store
8106              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
8107              GPR64sp:$Rn, offset),
8108        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>;
8109
8110  def : Pat<(scalar_store
8111              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
8112              GPR64sp:$Rn, GPR64:$Rm),
8113        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
8114}
8115
8116defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
8117                         1>;
8118defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
8119                         2>;
8120defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
8121defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
8122defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
8123defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
8124defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
8125defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>;
8126
8127let mayStore = 1, hasSideEffects = 0 in {
8128defm ST2 : SIMDStSingleB<1, 0b000,       "st2", VecListTwob,   GPR64pi2>;
8129defm ST2 : SIMDStSingleH<1, 0b010, 0,    "st2", VecListTwoh,   GPR64pi4>;
8130defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos,   GPR64pi8>;
8131defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod,   GPR64pi16>;
8132defm ST3 : SIMDStSingleB<0, 0b001,       "st3", VecListThreeb, GPR64pi3>;
8133defm ST3 : SIMDStSingleH<0, 0b011, 0,    "st3", VecListThreeh, GPR64pi6>;
8134defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
8135defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
8136defm ST4 : SIMDStSingleB<1, 0b001,       "st4", VecListFourb,  GPR64pi4>;
8137defm ST4 : SIMDStSingleH<1, 0b011, 0,    "st4", VecListFourh,  GPR64pi8>;
8138defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours,  GPR64pi16>;
8139defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd,  GPR64pi32>;
8140}
8141
8142defm ST1 : SIMDLdSt1SingleAliases<"st1">;
8143defm ST2 : SIMDLdSt2SingleAliases<"st2">;
8144defm ST3 : SIMDLdSt3SingleAliases<"st3">;
8145defm ST4 : SIMDLdSt4SingleAliases<"st4">;
8146
8147//----------------------------------------------------------------------------
8148// Crypto extensions
8149//----------------------------------------------------------------------------
8150
8151let Predicates = [HasAES] in {
8152def AESErr   : AESTiedInst<0b0100, "aese",   int_aarch64_crypto_aese>;
8153def AESDrr   : AESTiedInst<0b0101, "aesd",   int_aarch64_crypto_aesd>;
8154def AESMCrr  : AESInst<    0b0110, "aesmc",  int_aarch64_crypto_aesmc>;
8155def AESIMCrr : AESInst<    0b0111, "aesimc", int_aarch64_crypto_aesimc>;
8156}
8157
8158// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
8159// for AES fusion on some CPUs.
8160let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
8161def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
8162                        Sched<[WriteVq]>;
8163def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
8164                         Sched<[WriteVq]>;
8165}
8166
8167// Only use constrained versions of AES(I)MC instructions if they are paired with
8168// AESE/AESD.
8169def : Pat<(v16i8 (int_aarch64_crypto_aesmc
8170            (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1),
8171                                            (v16i8 V128:$src2))))),
8172          (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1),
8173                                             (v16i8 V128:$src2)))))>,
8174          Requires<[HasFuseAES]>;
8175
8176def : Pat<(v16i8 (int_aarch64_crypto_aesimc
8177            (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1),
8178                                            (v16i8 V128:$src2))))),
8179          (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1),
8180                                              (v16i8 V128:$src2)))))>,
8181          Requires<[HasFuseAES]>;
8182
8183let Predicates = [HasSHA2] in {
8184def SHA1Crrr     : SHATiedInstQSV<0b000, "sha1c",   int_aarch64_crypto_sha1c>;
8185def SHA1Prrr     : SHATiedInstQSV<0b001, "sha1p",   int_aarch64_crypto_sha1p>;
8186def SHA1Mrrr     : SHATiedInstQSV<0b010, "sha1m",   int_aarch64_crypto_sha1m>;
8187def SHA1SU0rrr   : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>;
8188def SHA256Hrrr   : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>;
8189def SHA256H2rrr  : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>;
8190def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>;
8191
8192def SHA1Hrr     : SHAInstSS<    0b0000, "sha1h",    int_aarch64_crypto_sha1h>;
8193def SHA1SU1rr   : SHATiedInstVV<0b0001, "sha1su1",  int_aarch64_crypto_sha1su1>;
8194def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>;
8195}
8196
8197//----------------------------------------------------------------------------
8198// Compiler-pseudos
8199//----------------------------------------------------------------------------
8200// FIXME: Like for X86, these should go in their own separate .td file.
8201
8202// For an anyext, we don't care what the high bits are, so we can perform an
8203// INSERT_SUBREF into an IMPLICIT_DEF.
8204def : Pat<(i64 (anyext GPR32:$src)),
8205          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
8206
8207// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
8208// then assert the extension has happened.
8209def : Pat<(i64 (zext GPR32:$src)),
8210          (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
8211
8212// To sign extend, we use a signed bitfield move instruction (SBFM) on the
8213// containing super-reg.
8214def : Pat<(i64 (sext GPR32:$src)),
8215   (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
8216def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>;
8217def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>;
8218def : Pat<(i64 (sext_inreg GPR64:$src, i8)),  (SBFMXri GPR64:$src, 0, 7)>;
8219def : Pat<(i64 (sext_inreg GPR64:$src, i1)),  (SBFMXri GPR64:$src, 0, 0)>;
8220def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>;
8221def : Pat<(i32 (sext_inreg GPR32:$src, i8)),  (SBFMWri GPR32:$src, 0, 7)>;
8222def : Pat<(i32 (sext_inreg GPR32:$src, i1)),  (SBFMWri GPR32:$src, 0, 0)>;
8223
8224def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)),
8225          (SBFMWri GPR32:$Rn, (i64 (i32shift_a       imm0_31:$imm)),
8226                              (i64 (i32shift_sext_i8 imm0_31:$imm)))>;
8227def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)),
8228          (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
8229                              (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
8230
8231def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)),
8232          (SBFMWri GPR32:$Rn, (i64 (i32shift_a        imm0_31:$imm)),
8233                              (i64 (i32shift_sext_i16 imm0_31:$imm)))>;
8234def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)),
8235          (SBFMXri GPR64:$Rn, (i64 (i64shift_a        imm0_63:$imm)),
8236                              (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
8237
8238def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
8239          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
8240                   (i64 (i64shift_a        imm0_63:$imm)),
8241                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
8242
8243def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)),
8244          (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
8245                   (i64 (i64shift_a        imm0_63:$imm)),
8246                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
8247
8248// sra patterns have an AddedComplexity of 10, so make sure we have a higher
8249// AddedComplexity for the following patterns since we want to match sext + sra
8250// patterns before we attempt to match a single sra node.
8251let AddedComplexity = 20 in {
8252// We support all sext + sra combinations which preserve at least one bit of the
8253// original value which is to be sign extended. E.g. we support shifts up to
8254// bitwidth-1 bits.
8255def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)),
8256          (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>;
8257def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)),
8258          (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>;
8259
8260def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)),
8261          (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>;
8262def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)),
8263          (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>;
8264
8265def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
8266          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
8267                   (i64 imm0_31:$imm), 31)>;
8268} // AddedComplexity = 20
8269
8270// To truncate, we can simply extract from a subregister.
8271def : Pat<(i32 (trunc GPR64sp:$src)),
8272          (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
8273
8274// __builtin_trap() uses the BRK instruction on AArch64.
8275def : Pat<(trap), (BRK 1)>;
8276def : Pat<(debugtrap), (BRK 0xF000)>;
8277
8278def ubsan_trap_xform : SDNodeXForm<timm, [{
8279  return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
8280}]>;
8281
8282def ubsan_trap_imm : TImmLeaf<i32, [{
8283  return isUInt<8>(Imm);
8284}], ubsan_trap_xform>;
8285
8286def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>;
8287
8288// Multiply high patterns which multiply the lower subvector using smull/umull
8289// and the upper subvector with smull2/umull2. Then shuffle the high the high
8290// part of both results together.
8291def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)),
8292          (UZP2v16i8
8293           (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
8294                            (EXTRACT_SUBREG V128:$Rm, dsub)),
8295           (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
8296def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)),
8297          (UZP2v8i16
8298           (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
8299                             (EXTRACT_SUBREG V128:$Rm, dsub)),
8300           (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
8301def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)),
8302          (UZP2v4i32
8303           (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
8304                             (EXTRACT_SUBREG V128:$Rm, dsub)),
8305           (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
8306
8307def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)),
8308          (UZP2v16i8
8309           (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
8310                            (EXTRACT_SUBREG V128:$Rm, dsub)),
8311           (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
8312def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)),
8313          (UZP2v8i16
8314           (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
8315                             (EXTRACT_SUBREG V128:$Rm, dsub)),
8316           (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
8317def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
8318          (UZP2v4i32
8319           (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
8320                             (EXTRACT_SUBREG V128:$Rm, dsub)),
8321           (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
8322
8323// Conversions within AdvSIMD types in the same register size are free.
8324// But because we need a consistent lane ordering, in big endian many
8325// conversions require one or more REV instructions.
8326//
8327// Consider a simple memory load followed by a bitconvert then a store.
8328//   v0 = load v2i32
8329//   v1 = BITCAST v2i32 v0 to v4i16
8330//        store v4i16 v2
8331//
8332// In big endian mode every memory access has an implicit byte swap. LDR and
8333// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
8334// is, they treat the vector as a sequence of elements to be byte-swapped.
8335// The two pairs of instructions are fundamentally incompatible. We've decided
8336// to use LD1/ST1 only to simplify compiler implementation.
8337//
8338// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes
8339// the original code sequence:
8340//   v0 = load v2i32
8341//   v1 = REV v2i32                  (implicit)
8342//   v2 = BITCAST v2i32 v1 to v4i16
8343//   v3 = REV v4i16 v2               (implicit)
8344//        store v4i16 v3
8345//
8346// But this is now broken - the value stored is different to the value loaded
8347// due to lane reordering. To fix this, on every BITCAST we must perform two
8348// other REVs:
8349//   v0 = load v2i32
8350//   v1 = REV v2i32                  (implicit)
8351//   v2 = REV v2i32
8352//   v3 = BITCAST v2i32 v2 to v4i16
8353//   v4 = REV v4i16
8354//   v5 = REV v4i16 v4               (implicit)
8355//        store v4i16 v5
8356//
8357// This means an extra two instructions, but actually in most cases the two REV
8358// instructions can be combined into one. For example:
8359//   (REV64_2s (REV64_4h X)) === (REV32_4h X)
8360//
8361// There is also no 128-bit REV instruction. This must be synthesized with an
8362// EXT instruction.
8363//
8364// Most bitconverts require some sort of conversion. The only exceptions are:
8365//   a) Identity conversions -  vNfX <-> vNiX
8366//   b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
8367//
8368
8369// Natural vector casts (64 bit)
8370foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in
8371  foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in
8372    def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))),
8373              (VT FPR64:$src)>;
8374
8375// Natural vector casts (128 bit)
8376foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
8377  foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
8378    def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))),
8379              (VT FPR128:$src)>;
8380
8381let Predicates = [IsLE] in {
8382def : Pat<(v8i8  (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8383def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8384def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8385def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8386def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8387def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8388
8389def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
8390          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8391def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
8392          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8393def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
8394          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8395def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
8396          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8397def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
8398          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8399def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
8400          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8401def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
8402          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8403}
8404let Predicates = [IsBE] in {
8405def : Pat<(v8i8  (bitconvert GPR64:$Xn)),
8406                 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8407def : Pat<(v4i16 (bitconvert GPR64:$Xn)),
8408                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8409def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
8410                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8411def : Pat<(v4f16 (bitconvert GPR64:$Xn)),
8412                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8413def : Pat<(v4bf16 (bitconvert GPR64:$Xn)),
8414                  (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8415def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
8416                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8417
8418def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
8419          (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8420def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
8421          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8422def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
8423          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8424def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
8425          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8426def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
8427          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8428def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
8429          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8430}
8431def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8432def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8433def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
8434          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8435def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
8436          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8437def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
8438          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8439def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
8440
8441def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
8442          (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
8443def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))),
8444          (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>;
8445def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))),
8446          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8447def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))),
8448          (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>;
8449def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
8450          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8451
8452def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>;
8453def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>;
8454
8455let Predicates = [IsLE] in {
8456def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
8457def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
8458def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))), (v1i64 FPR64:$src)>;
8459def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>;
8460def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>;
8461def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
8462}
8463let Predicates = [IsBE] in {
8464def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))),
8465                             (v1i64 (REV64v2i32 FPR64:$src))>;
8466def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))),
8467                             (v1i64 (REV64v4i16 FPR64:$src))>;
8468def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))),
8469                             (v1i64 (REV64v8i8 FPR64:$src))>;
8470def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))),
8471                             (v1i64 (REV64v4i16 FPR64:$src))>;
8472def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))),
8473                             (v1i64 (REV64v4i16 FPR64:$src))>;
8474def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
8475                             (v1i64 (REV64v2i32 FPR64:$src))>;
8476}
8477def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>;
8478def : Pat<(v1i64 (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
8479
8480let Predicates = [IsLE] in {
8481def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>;
8482def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
8483def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))), (v2i32 FPR64:$src)>;
8484def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
8485def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
8486def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>;
8487def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>;
8488}
8489let Predicates = [IsBE] in {
8490def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
8491                             (v2i32 (REV64v2i32 FPR64:$src))>;
8492def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))),
8493                             (v2i32 (REV32v4i16 FPR64:$src))>;
8494def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))),
8495                             (v2i32 (REV32v8i8 FPR64:$src))>;
8496def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))),
8497                             (v2i32 (REV64v2i32 FPR64:$src))>;
8498def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
8499                             (v2i32 (REV64v2i32 FPR64:$src))>;
8500def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))),
8501                             (v2i32 (REV32v4i16 FPR64:$src))>;
8502def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))),
8503                             (v2i32 (REV32v4i16 FPR64:$src))>;
8504}
8505def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
8506
8507let Predicates = [IsLE] in {
8508def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
8509def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
8510def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))), (v4i16 FPR64:$src)>;
8511def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
8512def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
8513def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
8514}
8515let Predicates = [IsBE] in {
8516def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))),
8517                             (v4i16 (REV64v4i16 FPR64:$src))>;
8518def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))),
8519                             (v4i16 (REV32v4i16 FPR64:$src))>;
8520def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))),
8521                             (v4i16 (REV16v8i8 FPR64:$src))>;
8522def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))),
8523                             (v4i16 (REV64v4i16 FPR64:$src))>;
8524def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
8525                             (v4i16 (REV32v4i16 FPR64:$src))>;
8526def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
8527                             (v4i16 (REV64v4i16 FPR64:$src))>;
8528}
8529def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>;
8530def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>;
8531
8532let Predicates = [IsLE] in {
8533def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>;
8534def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
8535def : Pat<(v4f16 (bitconvert (v8i8  FPR64:$src))), (v4f16 FPR64:$src)>;
8536def : Pat<(v4f16 (bitconvert (f64   FPR64:$src))), (v4f16 FPR64:$src)>;
8537def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>;
8538def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>;
8539
8540def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>;
8541def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>;
8542def : Pat<(v4bf16 (bitconvert (v8i8  FPR64:$src))), (v4bf16 FPR64:$src)>;
8543def : Pat<(v4bf16 (bitconvert (f64   FPR64:$src))), (v4bf16 FPR64:$src)>;
8544def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>;
8545def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
8546}
8547let Predicates = [IsBE] in {
8548def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))),
8549                             (v4f16 (REV64v4i16 FPR64:$src))>;
8550def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))),
8551                             (v4f16 (REV32v4i16 FPR64:$src))>;
8552def : Pat<(v4f16 (bitconvert (v8i8  FPR64:$src))),
8553                             (v4f16 (REV16v8i8 FPR64:$src))>;
8554def : Pat<(v4f16 (bitconvert (f64   FPR64:$src))),
8555                             (v4f16 (REV64v4i16 FPR64:$src))>;
8556def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))),
8557                             (v4f16 (REV32v4i16 FPR64:$src))>;
8558def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))),
8559                             (v4f16 (REV64v4i16 FPR64:$src))>;
8560
8561def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))),
8562                             (v4bf16 (REV64v4i16 FPR64:$src))>;
8563def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))),
8564                             (v4bf16 (REV32v4i16 FPR64:$src))>;
8565def : Pat<(v4bf16 (bitconvert (v8i8  FPR64:$src))),
8566                             (v4bf16 (REV16v8i8 FPR64:$src))>;
8567def : Pat<(v4bf16 (bitconvert (f64   FPR64:$src))),
8568                             (v4bf16 (REV64v4i16 FPR64:$src))>;
8569def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))),
8570                             (v4bf16 (REV32v4i16 FPR64:$src))>;
8571def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))),
8572                             (v4bf16 (REV64v4i16 FPR64:$src))>;
8573}
8574def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
8575def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>;
8576
8577let Predicates = [IsLE] in {
8578def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))), (v8i8  FPR64:$src)>;
8579def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))), (v8i8  FPR64:$src)>;
8580def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))), (v8i8  FPR64:$src)>;
8581def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))), (v8i8  FPR64:$src)>;
8582def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))), (v8i8  FPR64:$src)>;
8583def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))), (v8i8  FPR64:$src)>;
8584def : Pat<(v8i8  (bitconvert (v4f16 FPR64:$src))), (v8i8  FPR64:$src)>;
8585def : Pat<(v8i8  (bitconvert (v4bf16 FPR64:$src))), (v8i8  FPR64:$src)>;
8586}
8587let Predicates = [IsBE] in {
8588def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))),
8589                             (v8i8 (REV64v8i8 FPR64:$src))>;
8590def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))),
8591                             (v8i8 (REV32v8i8 FPR64:$src))>;
8592def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))),
8593                             (v8i8 (REV16v8i8 FPR64:$src))>;
8594def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))),
8595                             (v8i8 (REV64v8i8 FPR64:$src))>;
8596def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))),
8597                             (v8i8 (REV32v8i8 FPR64:$src))>;
8598def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))),
8599                             (v8i8 (REV64v8i8 FPR64:$src))>;
8600def : Pat<(v8i8  (bitconvert (v4f16 FPR64:$src))),
8601                             (v8i8 (REV16v8i8 FPR64:$src))>;
8602def : Pat<(v8i8  (bitconvert (v4bf16 FPR64:$src))),
8603                             (v8i8 (REV16v8i8 FPR64:$src))>;
8604}
8605
8606let Predicates = [IsLE] in {
8607def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))), (f64   FPR64:$src)>;
8608def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))), (f64   FPR64:$src)>;
8609def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))), (f64   FPR64:$src)>;
8610def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))), (f64   FPR64:$src)>;
8611def : Pat<(f64   (bitconvert (v4f16 FPR64:$src))), (f64   FPR64:$src)>;
8612def : Pat<(f64   (bitconvert (v4bf16 FPR64:$src))), (f64   FPR64:$src)>;
8613}
8614let Predicates = [IsBE] in {
8615def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))),
8616                             (f64 (REV64v2i32 FPR64:$src))>;
8617def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))),
8618                             (f64 (REV64v4i16 FPR64:$src))>;
8619def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))),
8620                             (f64 (REV64v2i32 FPR64:$src))>;
8621def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))),
8622                             (f64 (REV64v8i8 FPR64:$src))>;
8623def : Pat<(f64   (bitconvert (v4f16 FPR64:$src))),
8624                             (f64 (REV64v4i16 FPR64:$src))>;
8625def : Pat<(f64   (bitconvert (v4bf16 FPR64:$src))),
8626                             (f64 (REV64v4i16 FPR64:$src))>;
8627}
8628def : Pat<(f64   (bitconvert (v1i64 FPR64:$src))), (f64   FPR64:$src)>;
8629def : Pat<(f64   (bitconvert (v1f64 FPR64:$src))), (f64   FPR64:$src)>;
8630
8631let Predicates = [IsLE] in {
8632def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
8633def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
8634def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))), (v1f64 FPR64:$src)>;
8635def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
8636def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>;
8637def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>;
8638}
8639let Predicates = [IsBE] in {
8640def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
8641                             (v1f64 (REV64v2i32 FPR64:$src))>;
8642def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))),
8643                             (v1f64 (REV64v4i16 FPR64:$src))>;
8644def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))),
8645                             (v1f64 (REV64v8i8 FPR64:$src))>;
8646def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
8647                             (v1f64 (REV64v2i32 FPR64:$src))>;
8648def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))),
8649                             (v1f64 (REV64v4i16 FPR64:$src))>;
8650def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))),
8651                             (v1f64 (REV64v4i16 FPR64:$src))>;
8652}
8653def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
8654def : Pat<(v1f64 (bitconvert (f64   FPR64:$src))), (v1f64 FPR64:$src)>;
8655
8656let Predicates = [IsLE] in {
8657def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>;
8658def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
8659def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))), (v2f32 FPR64:$src)>;
8660def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
8661def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
8662def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>;
8663def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>;
8664}
8665let Predicates = [IsBE] in {
8666def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
8667                             (v2f32 (REV64v2i32 FPR64:$src))>;
8668def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))),
8669                             (v2f32 (REV32v4i16 FPR64:$src))>;
8670def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))),
8671                             (v2f32 (REV32v8i8 FPR64:$src))>;
8672def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))),
8673                             (v2f32 (REV64v2i32 FPR64:$src))>;
8674def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))),
8675                             (v2f32 (REV64v2i32 FPR64:$src))>;
8676def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))),
8677                             (v2f32 (REV32v4i16 FPR64:$src))>;
8678def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))),
8679                             (v2f32 (REV32v4i16 FPR64:$src))>;
8680}
8681def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
8682
8683let Predicates = [IsLE] in {
8684def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>;
8685def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
8686def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
8687def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
8688def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
8689def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>;
8690def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>;
8691def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
8692}
8693let Predicates = [IsBE] in {
8694def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))),
8695                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
8696def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
8697                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
8698                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
8699def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
8700                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
8701                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
8702def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
8703                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
8704                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
8705def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))),
8706                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
8707                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
8708def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
8709                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
8710def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
8711                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
8712                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
8713def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))),
8714                            (f128 (EXTv16i8 (REV64v16i8 FPR128:$src),
8715                                            (REV64v16i8 FPR128:$src), (i32 8)))>;
8716}
8717
8718let Predicates = [IsLE] in {
8719def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))), (v2f64 FPR128:$src)>;
8720def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
8721def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
8722def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
8723def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>;
8724def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
8725def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
8726}
8727let Predicates = [IsBE] in {
8728def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))),
8729                             (v2f64 (EXTv16i8 FPR128:$src,
8730                                              FPR128:$src, (i32 8)))>;
8731def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
8732                             (v2f64 (REV64v4i32 FPR128:$src))>;
8733def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
8734                             (v2f64 (REV64v8i16 FPR128:$src))>;
8735def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
8736                             (v2f64 (REV64v8i16 FPR128:$src))>;
8737def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))),
8738                             (v2f64 (REV64v8i16 FPR128:$src))>;
8739def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
8740                             (v2f64 (REV64v16i8 FPR128:$src))>;
8741def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
8742                             (v2f64 (REV64v4i32 FPR128:$src))>;
8743}
8744def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
8745
8746let Predicates = [IsLE] in {
8747def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))), (v4f32 FPR128:$src)>;
8748def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
8749def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
8750def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>;
8751def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
8752def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
8753def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
8754}
8755let Predicates = [IsBE] in {
8756def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))),
8757                             (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src),
8758                                    (REV64v4i32 FPR128:$src), (i32 8)))>;
8759def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
8760                             (v4f32 (REV32v8i16 FPR128:$src))>;
8761def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
8762                             (v4f32 (REV32v8i16 FPR128:$src))>;
8763def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))),
8764                             (v4f32 (REV32v8i16 FPR128:$src))>;
8765def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
8766                             (v4f32 (REV32v16i8 FPR128:$src))>;
8767def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
8768                             (v4f32 (REV64v4i32 FPR128:$src))>;
8769def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))),
8770                             (v4f32 (REV64v4i32 FPR128:$src))>;
8771}
8772def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
8773
8774let Predicates = [IsLE] in {
8775def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))), (v2i64 FPR128:$src)>;
8776def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
8777def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
8778def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
8779def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
8780def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>;
8781def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>;
8782}
8783let Predicates = [IsBE] in {
8784def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))),
8785                             (v2i64 (EXTv16i8 FPR128:$src,
8786                                              FPR128:$src, (i32 8)))>;
8787def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))),
8788                             (v2i64 (REV64v4i32 FPR128:$src))>;
8789def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))),
8790                             (v2i64 (REV64v8i16 FPR128:$src))>;
8791def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))),
8792                             (v2i64 (REV64v16i8 FPR128:$src))>;
8793def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
8794                             (v2i64 (REV64v4i32 FPR128:$src))>;
8795def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))),
8796                             (v2i64 (REV64v8i16 FPR128:$src))>;
8797def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))),
8798                             (v2i64 (REV64v8i16 FPR128:$src))>;
8799}
8800def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
8801
8802let Predicates = [IsLE] in {
8803def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))), (v4i32 FPR128:$src)>;
8804def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
8805def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
8806def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
8807def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
8808def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>;
8809def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>;
8810}
8811let Predicates = [IsBE] in {
8812def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))),
8813                             (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src),
8814                                              (REV64v4i32 FPR128:$src),
8815                                              (i32 8)))>;
8816def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))),
8817                             (v4i32 (REV64v4i32 FPR128:$src))>;
8818def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))),
8819                             (v4i32 (REV32v8i16 FPR128:$src))>;
8820def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))),
8821                             (v4i32 (REV32v16i8 FPR128:$src))>;
8822def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
8823                             (v4i32 (REV64v4i32 FPR128:$src))>;
8824def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))),
8825                             (v4i32 (REV32v8i16 FPR128:$src))>;
8826def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))),
8827                             (v4i32 (REV32v8i16 FPR128:$src))>;
8828}
8829def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
8830
8831let Predicates = [IsLE] in {
8832def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))), (v8i16 FPR128:$src)>;
8833def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
8834def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
8835def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
8836def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
8837def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
8838}
8839let Predicates = [IsBE] in {
8840def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))),
8841                             (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src),
8842                                              (REV64v8i16 FPR128:$src),
8843                                              (i32 8)))>;
8844def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))),
8845                             (v8i16 (REV64v8i16 FPR128:$src))>;
8846def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))),
8847                             (v8i16 (REV32v8i16 FPR128:$src))>;
8848def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))),
8849                             (v8i16 (REV16v16i8 FPR128:$src))>;
8850def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))),
8851                             (v8i16 (REV64v8i16 FPR128:$src))>;
8852def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
8853                             (v8i16 (REV32v8i16 FPR128:$src))>;
8854}
8855def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>;
8856def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>;
8857
8858let Predicates = [IsLE] in {
8859def : Pat<(v8f16 (bitconvert (f128  FPR128:$src))), (v8f16 FPR128:$src)>;
8860def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
8861def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
8862def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
8863def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
8864def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
8865
8866def : Pat<(v8bf16 (bitconvert (f128  FPR128:$src))), (v8bf16 FPR128:$src)>;
8867def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>;
8868def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>;
8869def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>;
8870def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>;
8871def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>;
8872}
8873let Predicates = [IsBE] in {
8874def : Pat<(v8f16 (bitconvert (f128  FPR128:$src))),
8875                             (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src),
8876                                              (REV64v8i16 FPR128:$src),
8877                                              (i32 8)))>;
8878def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))),
8879                             (v8f16 (REV64v8i16 FPR128:$src))>;
8880def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))),
8881                             (v8f16 (REV32v8i16 FPR128:$src))>;
8882def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))),
8883                             (v8f16 (REV16v16i8 FPR128:$src))>;
8884def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))),
8885                             (v8f16 (REV64v8i16 FPR128:$src))>;
8886def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))),
8887                             (v8f16 (REV32v8i16 FPR128:$src))>;
8888
8889def : Pat<(v8bf16 (bitconvert (f128  FPR128:$src))),
8890                             (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src),
8891                                              (REV64v8i16 FPR128:$src),
8892                                              (i32 8)))>;
8893def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))),
8894                             (v8bf16 (REV64v8i16 FPR128:$src))>;
8895def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))),
8896                             (v8bf16 (REV32v8i16 FPR128:$src))>;
8897def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))),
8898                             (v8bf16 (REV16v16i8 FPR128:$src))>;
8899def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))),
8900                             (v8bf16 (REV64v8i16 FPR128:$src))>;
8901def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))),
8902                             (v8bf16 (REV32v8i16 FPR128:$src))>;
8903}
8904def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
8905def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>;
8906
8907let Predicates = [IsLE] in {
8908def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))), (v16i8 FPR128:$src)>;
8909def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
8910def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
8911def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
8912def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
8913def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
8914def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>;
8915def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>;
8916}
8917let Predicates = [IsBE] in {
8918def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))),
8919                             (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src),
8920                                              (REV64v16i8 FPR128:$src),
8921                                              (i32 8)))>;
8922def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))),
8923                             (v16i8 (REV64v16i8 FPR128:$src))>;
8924def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))),
8925                             (v16i8 (REV32v16i8 FPR128:$src))>;
8926def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))),
8927                             (v16i8 (REV16v16i8 FPR128:$src))>;
8928def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))),
8929                             (v16i8 (REV64v16i8 FPR128:$src))>;
8930def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
8931                             (v16i8 (REV32v16i8 FPR128:$src))>;
8932def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
8933                             (v16i8 (REV16v16i8 FPR128:$src))>;
8934def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))),
8935                             (v16i8 (REV16v16i8 FPR128:$src))>;
8936}
8937
8938def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))),
8939           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8940def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))),
8941           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8942def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))),
8943           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8944def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))),
8945           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8946def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))),
8947           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8948def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))),
8949           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8950def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))),
8951           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8952def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))),
8953           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8954
8955def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
8956          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
8957def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
8958          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
8959def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
8960          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
8961def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
8962          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
8963
8964// A 64-bit subvector insert to the first 128-bit vector position
8965// is a subregister copy that needs no instruction.
8966multiclass InsertSubvectorUndef<ValueType Ty> {
8967  def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)),
8968            (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8969  def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)),
8970            (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8971  def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)),
8972            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8973  def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)),
8974            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8975  def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)),
8976            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8977  def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)),
8978            (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8979  def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)),
8980            (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8981  def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)),
8982            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8983}
8984
8985defm : InsertSubvectorUndef<i32>;
8986defm : InsertSubvectorUndef<i64>;
8987
8988// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
8989// or v2f32.
8990def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
8991                    (vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
8992           (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
8993def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
8994                         (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
8995           (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
8996    // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
8997    // so we match on v4f32 here, not v2f32. This will also catch adding
8998    // the low two lanes of a true v4f32 vector.
8999def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
9000                    (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
9001          (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
9002def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
9003                    (vector_extract (v8f16 FPR128:$Rn), (i64 1))),
9004          (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
9005
9006// Prefer using the bottom lanes of addp Rn, Rn compared to
9007// addp extractlow(Rn), extracthigh(Rn)
9008def : Pat<(AArch64addp (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 0))),
9009                       (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 2)))),
9010          (v2i32 (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub))>;
9011def : Pat<(AArch64addp (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 0))),
9012                       (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 4)))),
9013          (v4i16 (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub))>;
9014def : Pat<(AArch64addp (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 0))),
9015                       (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 8)))),
9016          (v8i8 (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub))>;
9017
9018def : Pat<(AArch64faddp (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 0))),
9019                        (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 2)))),
9020          (v2f32 (EXTRACT_SUBREG (FADDPv4f32 $Rn, $Rn), dsub))>;
9021def : Pat<(AArch64faddp (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 0))),
9022                        (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 4)))),
9023          (v4f16 (EXTRACT_SUBREG (FADDPv8f16 $Rn, $Rn), dsub))>;
9024
9025// Scalar 64-bit shifts in FPR64 registers.
9026def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9027          (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9028def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9029          (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9030def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9031          (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9032def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9033          (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9034
9035// Patterns for nontemporal/no-allocate stores.
9036// We have to resort to tricks to turn a single-input store into a store pair,
9037// because there is no single-input nontemporal store, only STNP.
9038let Predicates = [IsLE] in {
9039let AddedComplexity = 15 in {
9040class NTStore128Pat<ValueType VT> :
9041  Pat<(nontemporalstore (VT FPR128:$Rt),
9042        (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
9043      (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
9044              (DUPi64 FPR128:$Rt, (i64 1)),
9045              GPR64sp:$Rn, simm7s8:$offset)>;
9046
9047def : NTStore128Pat<v2i64>;
9048def : NTStore128Pat<v4i32>;
9049def : NTStore128Pat<v8i16>;
9050def : NTStore128Pat<v16i8>;
9051
9052class NTStore64Pat<ValueType VT> :
9053  Pat<(nontemporalstore (VT FPR64:$Rt),
9054        (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
9055      (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
9056              (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
9057              GPR64sp:$Rn, simm7s4:$offset)>;
9058
9059// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
9060def : NTStore64Pat<v1f64>;
9061def : NTStore64Pat<v1i64>;
9062def : NTStore64Pat<v2i32>;
9063def : NTStore64Pat<v4i16>;
9064def : NTStore64Pat<v8i8>;
9065
9066def : Pat<(nontemporalstore GPR64:$Rt,
9067            (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
9068          (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
9069                  (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32),
9070                  GPR64sp:$Rn, simm7s4:$offset)>;
9071} // AddedComplexity=10
9072} // Predicates = [IsLE]
9073
9074// Tail call return handling. These are all compiler pseudo-instructions,
9075// so no encoding information or anything like that.
9076let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
9077  def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>,
9078                   Sched<[WriteBrReg]>;
9079  def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>,
9080                   Sched<[WriteBrReg]>;
9081  // Indirect tail-call with any register allowed, used by MachineOutliner when
9082  // this is proven safe.
9083  // FIXME: If we have to add any more hacks like this, we should instead relax
9084  // some verifier checks for outlined functions.
9085  def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>,
9086                      Sched<[WriteBrReg]>;
9087  // Indirect tail-call limited to only use registers (x16 and x17) which are
9088  // allowed to tail-call a "BTI c" instruction.
9089  def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>,
9090                      Sched<[WriteBrReg]>;
9091}
9092
9093def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
9094          (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>,
9095      Requires<[NotUseBTI]>;
9096def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)),
9097          (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>,
9098      Requires<[UseBTI]>;
9099def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
9100          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
9101def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
9102          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
9103
9104def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
9105def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
9106
9107// Extracting lane zero is a special case where we can just use a plain
9108// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the
9109// rest of the compiler, especially the register allocator and copy propagation,
9110// to reason about, so is preferred when it's possible to use it.
9111let AddedComplexity = 10 in {
9112  def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>;
9113  def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>;
9114  def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>;
9115}
9116
9117// dot_v4i8
9118class mul_v4i8<SDPatternOperator ldop> :
9119  PatFrag<(ops node:$Rn, node:$Rm, node:$offset),
9120          (mul (ldop (add node:$Rn, node:$offset)),
9121               (ldop (add node:$Rm, node:$offset)))>;
9122class mulz_v4i8<SDPatternOperator ldop> :
9123  PatFrag<(ops node:$Rn, node:$Rm),
9124          (mul (ldop node:$Rn), (ldop node:$Rm))>;
9125
9126def load_v4i8 :
9127  OutPatFrag<(ops node:$R),
9128             (INSERT_SUBREG
9129              (v2i32 (IMPLICIT_DEF)),
9130               (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)),
9131              ssub)>;
9132
9133class dot_v4i8<Instruction DOT, SDPatternOperator ldop> :
9134  Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)),
9135           (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)),
9136           (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)),
9137                (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))),
9138      (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR),
9139                                (load_v4i8 GPR64sp:$Rn),
9140                                (load_v4i8 GPR64sp:$Rm))),
9141                      sub_32)>, Requires<[HasDotProd]>;
9142
9143// dot_v8i8
9144class ee_v8i8<SDPatternOperator extend> :
9145  PatFrag<(ops node:$V, node:$K),
9146          (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>;
9147
9148class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
9149  PatFrag<(ops node:$M, node:$N, node:$K),
9150          (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)),
9151                 (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>;
9152
9153class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
9154  PatFrag<(ops node:$M, node:$N),
9155          (i32 (extractelt
9156           (v4i32 (AArch64uaddv
9157            (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)),
9158                 (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))),
9159           (i64 0)))>;
9160
9161// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
9162def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>;
9163
9164class odot_v8i8<Instruction DOT> :
9165  OutPatFrag<(ops node:$Vm, node:$Vn),
9166             (EXTRACT_SUBREG
9167              (VADDV_32
9168               (i64 (DOT (DUPv2i32gpr WZR),
9169                         (v8i8 node:$Vm),
9170                         (v8i8 node:$Vn)))),
9171              sub_32)>;
9172
9173class dot_v8i8<Instruction DOT, SDPatternOperator mulop,
9174                    SDPatternOperator extend> :
9175  Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn),
9176      (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>,
9177  Requires<[HasDotProd]>;
9178
9179// dot_v16i8
9180class ee_v16i8<SDPatternOperator extend> :
9181  PatFrag<(ops node:$V, node:$K1, node:$K2),
9182          (v4i16 (extract_subvector
9183           (v8i16 (extend
9184            (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>;
9185
9186class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> :
9187  PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2),
9188          (v4i32
9189           (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)),
9190                  (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>;
9191
9192class idot_v16i8<SDPatternOperator m, SDPatternOperator x> :
9193  PatFrag<(ops node:$M, node:$N),
9194          (i32 (extractelt
9195           (v4i32 (AArch64uaddv
9196            (add
9197             (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)),
9198                  (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))),
9199             (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)),
9200                  (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))),
9201           (i64 0)))>;
9202
9203class odot_v16i8<Instruction DOT> :
9204  OutPatFrag<(ops node:$Vm, node:$Vn),
9205             (i32 (ADDVv4i32v
9206              (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>;
9207
9208class dot_v16i8<Instruction DOT, SDPatternOperator mulop,
9209                SDPatternOperator extend> :
9210  Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn),
9211      (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>,
9212  Requires<[HasDotProd]>;
9213
9214let AddedComplexity = 10 in {
9215  def : dot_v4i8<SDOTv8i8, sextloadi8>;
9216  def : dot_v4i8<UDOTv8i8, zextloadi8>;
9217  def : dot_v8i8<SDOTv8i8, AArch64smull, sext>;
9218  def : dot_v8i8<UDOTv8i8, AArch64umull, zext>;
9219  def : dot_v16i8<SDOTv16i8, AArch64smull, sext>;
9220  def : dot_v16i8<UDOTv16i8, AArch64umull, zext>;
9221
9222  // FIXME: add patterns to generate vector by element dot product.
9223  // FIXME: add SVE dot-product patterns.
9224}
9225
9226// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs,
9227// so that it can be used as input to inline asm, and vice versa.
9228def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>;
9229def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>;
9230def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3,
9231                             GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)),
9232          (REG_SEQUENCE GPR64x8Class,
9233              $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3,
9234              $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>;
9235foreach i = 0-7 in {
9236  def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))),
9237            (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>;
9238}
9239
9240let Predicates = [HasLS64] in {
9241  def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn),
9242                                          (outs GPR64x8:$Rt)>;
9243  def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn),
9244                                          (outs)>;
9245  def ST64BV:   Store64BV<0b011, "st64bv">;
9246  def ST64BV0:  Store64BV<0b010, "st64bv0">;
9247
9248  class ST64BPattern<Intrinsic intrinsic, Instruction instruction>
9249    : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7),
9250          (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>;
9251
9252  def : ST64BPattern<int_aarch64_st64b, ST64B>;
9253  def : ST64BPattern<int_aarch64_st64bv, ST64BV>;
9254  def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>;
9255}
9256
9257let Predicates = [HasMOPS] in {
9258  let Defs = [NZCV] in {
9259    defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">;
9260
9261    defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">;
9262
9263    defm SETP : MOPSMemorySetInsns<0b00, "setp">;
9264  }
9265  let Uses = [NZCV] in {
9266    defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">;
9267    defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">;
9268
9269    defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">;
9270    defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">;
9271
9272    defm SETM : MOPSMemorySetInsns<0b01, "setm">;
9273    defm SETE : MOPSMemorySetInsns<0b10, "sete">;
9274  }
9275}
9276let Predicates = [HasMOPS, HasMTE] in {
9277  let Defs = [NZCV] in {
9278    defm SETGP     : MOPSMemorySetTaggingInsns<0b00, "setgp">;
9279  }
9280  let Uses = [NZCV] in {
9281    defm SETGM     : MOPSMemorySetTaggingInsns<0b01, "setgm">;
9282    // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td
9283    defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">;
9284  }
9285}
9286
9287// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain
9288// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain
9289def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>;
9290def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>;
9291def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>;
9292def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>;
9293def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>;
9294
9295// MOPS operations always contain three 4-byte instructions
9296let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in {
9297  let mayLoad = 1 in {
9298    def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
9299                                      (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
9300                                      [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
9301    def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
9302                                      (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
9303                                      [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
9304  }
9305  let mayLoad = 0 in {
9306    def MOPSMemorySetPseudo  : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
9307                                      (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
9308                                      [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
9309  }
9310}
9311let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in {
9312  def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
9313                                          (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
9314                                          [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
9315}
9316
9317//-----------------------------------------------------------------------------
9318// v8.3 Pointer Authentication late patterns
9319
9320let Predicates = [HasPAuth] in {
9321def : Pat<(int_ptrauth_blend GPR64:$Rd, imm64_0_65535:$imm),
9322          (MOVKXi GPR64:$Rd, (trunc_imm imm64_0_65535:$imm), 48)>;
9323def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn),
9324          (BFMXri GPR64:$Rd, GPR64:$Rn, 16, 15)>;
9325}
9326
9327//-----------------------------------------------------------------------------
9328
9329// This gets lowered into an instruction sequence of 20 bytes
9330let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
9331def StoreSwiftAsyncContext
9332      : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
9333               []>, Sched<[]>;
9334
9335def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
9336def : Pat<(AArch64AssertZExtBool GPR32:$op),
9337          (i32 GPR32:$op)>;
9338
9339//===----------------------------===//
9340// 2022 Architecture Extensions:
9341//===----------------------------===//
9342
9343def : InstAlias<"clrbhb",  (HINT 22), 0>;
9344let Predicates = [HasCLRBHB] in {
9345  def : InstAlias<"clrbhb",  (HINT 22), 1>;
9346}
9347
9348//===----------------------------------------------------------------------===//
9349// Translation Hardening Extension (FEAT_THE)
9350//===----------------------------------------------------------------------===//
9351defm RCW     : ReadCheckWriteCompareAndSwap;
9352
9353defm RCWCLR  : ReadCheckWriteOperation<0b001, "clr">;
9354defm RCWSET  : ReadCheckWriteOperation<0b011, "set">;
9355defm RCWSWP  : ReadCheckWriteOperation<0b010, "swp">;
9356
9357//===----------------------------------------------------------------------===//
9358// General Data-Processing Instructions (FEAT_V94_DP)
9359//===----------------------------------------------------------------------===//
9360defm ABS : OneOperandData<0b001000, "abs", abs>, Requires<[HasCSSC]>;
9361defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>;
9362defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>;
9363
9364defm SMAX : ComparisonOp<0, 0, "smax", smax>, Requires<[HasCSSC]>;
9365defm SMIN : ComparisonOp<0, 1, "smin", smin>, Requires<[HasCSSC]>;
9366defm UMAX : ComparisonOp<1, 0, "umax", umax>, Requires<[HasCSSC]>;
9367defm UMIN : ComparisonOp<1, 1, "umin", umin>, Requires<[HasCSSC]>;
9368
9369def RPRFM:
9370    I<(outs), (ins rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn),
9371      "rprfm", "\t$Rt, $Rm, [$Rn]", "", []>,
9372    Sched<[]> {
9373  bits<6> Rt;
9374  bits<5> Rn;
9375  bits<5> Rm;
9376  let Inst{2-0} = Rt{2-0};
9377  let Inst{4-3} = 0b11;
9378  let Inst{9-5} = Rn;
9379  let Inst{11-10} = 0b10;
9380  let Inst{13-12} = Rt{4-3};
9381  let Inst{14} = 0b1;
9382  let Inst{15} = Rt{5};
9383  let Inst{20-16} = Rm;
9384  let Inst{31-21} = 0b11111000101;
9385  let mayLoad = 0;
9386  let mayStore = 0;
9387  let hasSideEffects = 1;
9388  // RPRFM overlaps with PRFM (reg), when the decoder method of PRFM returns
9389  // Fail, the decoder should attempt to decode RPRFM. This requires setting
9390  // the decoder namespace to "Fallback".
9391  let DecoderNamespace = "Fallback";
9392}
9393
9394//===----------------------------------------------------------------------===//
9395// 128-bit Atomics (FEAT_LSE128)
9396//===----------------------------------------------------------------------===//
9397let Predicates = [HasLSE128] in {
9398  def SWPP     : LSE128Base<0b000, 0b00, 0b1, "swpp">;
9399  def SWPPA    : LSE128Base<0b000, 0b10, 0b1, "swppa">;
9400  def SWPPAL   : LSE128Base<0b000, 0b11, 0b1, "swppal">;
9401  def SWPPL    : LSE128Base<0b000, 0b01, 0b1, "swppl">;
9402  def LDCLRP   : LSE128Base<0b001, 0b00, 0b0, "ldclrp">;
9403  def LDCLRPA  : LSE128Base<0b001, 0b10, 0b0, "ldclrpa">;
9404  def LDCLRPAL : LSE128Base<0b001, 0b11, 0b0, "ldclrpal">;
9405  def LDCLRPL  : LSE128Base<0b001, 0b01, 0b0, "ldclrpl">;
9406  def LDSETP   : LSE128Base<0b011, 0b00, 0b0, "ldsetp">;
9407  def LDSETPA  : LSE128Base<0b011, 0b10, 0b0, "ldsetpa">;
9408  def LDSETPAL : LSE128Base<0b011, 0b11, 0b0, "ldsetpal">;
9409  def LDSETPL  : LSE128Base<0b011, 0b01, 0b0, "ldsetpl">;
9410}
9411
9412//===----------------------------------------------------------------------===//
9413// RCPC Instructions (FEAT_LRCPC3)
9414//===----------------------------------------------------------------------===//
9415
9416let Predicates = [HasRCPC3] in {
9417  //                                              size   opc    opc2
9418  def STILPWpre:   BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">;
9419  def STILPXpre:   BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">;
9420  def STILPW:      BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0001, (outs), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">;
9421  def STILPX:      BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0001, (outs), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">;
9422  def LDIAPPWpost: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0000, (outs GPR64sp:$wback, GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #8", "$Rn = $wback">;
9423  def LDIAPPXpost: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0000, (outs GPR64sp:$wback, GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #16", "$Rn = $wback">;
9424  def LDIAPPW:     BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
9425  def LDIAPPX:     BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
9426
9427  def : Pat<(AArch64ldiapp GPR64sp:$Rn), (LDIAPPX GPR64sp:$Rn)>;
9428  def : Pat<(AArch64stilp GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), (STILPX GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn)>;
9429
9430  // Aliases for when offset=0
9431  def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>;
9432  def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>;
9433
9434  //                                         size   opc
9435  def STLRWpre:   BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback),            (ins GPR32:$Rt, GPR64sp:$Rn), "stlr",  "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">;
9436  def STLRXpre:   BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback),            (ins GPR64:$Rt, GPR64sp:$Rn), "stlr",  "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">;
9437  def LDAPRWpost: BaseLRCPC3IntegerLoadStore<0b10, 0b11, (outs GPR64sp:$wback, GPR32:$Rt), (ins GPR64sp:$Rn),            "ldapr", "\t$Rt, [$Rn], #4",   "$Rn = $wback">;
9438  def LDAPRXpost: BaseLRCPC3IntegerLoadStore<0b11, 0b11, (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn),            "ldapr", "\t$Rt, [$Rn], #8",   "$Rn = $wback">;
9439}
9440
9441let Predicates = [HasRCPC3, HasNEON] in {
9442  //                                              size   opc regtype
9443  defm STLURb:  LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b00, FPR8  , (outs), (ins FPR8  :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9444  defm STLURh:  LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b00, FPR16 , (outs), (ins FPR16 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9445  defm STLURs:  LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b00, FPR32 , (outs), (ins FPR32 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9446  defm STLURd:  LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b00, FPR64 , (outs), (ins FPR64 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9447  defm STLURq:  LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b10, FPR128, (outs), (ins FPR128:$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9448  defm LDAPURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b01, FPR8  , (outs FPR8  :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9449  defm LDAPURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b01, FPR16 , (outs FPR16 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9450  defm LDAPURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b01, FPR32 , (outs FPR32 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9451  defm LDAPURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b01, FPR64 , (outs FPR64 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9452  defm LDAPURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b11, FPR128, (outs FPR128:$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9453
9454  //                                L
9455  def STL1:  LRCPC3NEONLdStSingle<0b0, (outs), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn) , "stl1", "">;
9456  def LDAP1: LRCPC3NEONLdStSingle<0b1, (outs VecListOned:$dst), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp0:$Rn), "ldap1", "$Vt = $dst">;
9457
9458  // Aliases for when offset=0
9459  def : InstAlias<"stl1\t$Vt$Q, [$Rn, #0]", (STL1 VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn)>;
9460}
9461
9462//===----------------------------------------------------------------------===//
9463// 128-bit System Instructions (FEAT_SYSINSTR128)
9464//===----------------------------------------------------------------------===//
9465let Predicates = [HasD128] in {
9466  def SYSPxt  : SystemPXtI<0, "sysp">;
9467
9468  def SYSPxt_XZR
9469    : BaseSystemI<0, (outs),
9470        (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, SyspXzrPairOperand:$xzr_pair),
9471        "sysp", "\t$op1, $Cn, $Cm, $op2, $xzr_pair">,
9472      Sched<[WriteSys]>
9473  {
9474    // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?)
9475    // and therefore autogenerates a decoder that builds an MC representation that has 4 fields
9476    // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one
9477    // extra for the XZR) because AArch64InstPrinter::printInstruction in AArch64GenAsmWriter.inc
9478    // is based off of the asm template (maybe) and therefore wants to print 5 operands.
9479    // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would
9480    // overlap with the main SYSP instruction.
9481    let DecoderMethod = "DecodeSyspXzrInstruction";
9482    bits<3> op1;
9483    bits<4> Cn;
9484    bits<4> Cm;
9485    bits<3> op2;
9486    let Inst{22}    = 0b1; // override BaseSystemI
9487    let Inst{20-19} = 0b01;
9488    let Inst{18-16} = op1;
9489    let Inst{15-12} = Cn;
9490    let Inst{11-8}  = Cm;
9491    let Inst{7-5}   = op2;
9492    let Inst{4-0}   = 0b11111;
9493  }
9494
9495  def : InstAlias<"sysp $op1, $Cn, $Cm, $op2",
9496                  (SYSPxt_XZR imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
9497}
9498
9499//---
9500// 128-bit System Registers (FEAT_SYSREG128)
9501//---
9502
9503// Instruction encoding:
9504//
9505//          31       22|21|20|19|18 16|15 12|11 8|7 5|4 0
9506// MRRS      1101010101| 1| 1|o0|  op1|   Cn|  Cm|op2| Rt
9507// MSRR      1101010101| 0| 1|o0|  op1|   Cn|  Cm|op2| Rt
9508
9509// Instruction syntax:
9510//
9511// MRRS <Xt>, <Xt+1>, <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>
9512// MSRR <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>, <Xt>, <Xt+1>
9513//
9514// ...where t is even (X0, X2, etc).
9515
9516let Predicates = [HasD128] in {
9517  def MRRS : RtSystemI128<1,
9518    (outs MrrsMssrPairClassOperand:$Rt), (ins mrs_sysreg_op:$systemreg),
9519    "mrrs", "\t$Rt, $systemreg">
9520  {
9521    bits<16> systemreg;
9522    let Inst{20-5} = systemreg;
9523  }
9524
9525  def MSRR : RtSystemI128<0,
9526    (outs),  (ins msr_sysreg_op:$systemreg, MrrsMssrPairClassOperand:$Rt),
9527    "msrr", "\t$systemreg, $Rt">
9528  {
9529    bits<16> systemreg;
9530    let Inst{20-5} = systemreg;
9531  }
9532}
9533
9534//===----------------------------===//
9535// 2023 Architecture Extensions:
9536//===----------------------------===//
9537
9538let Predicates = [HasFP8] in {
9539  defm F1CVTL  : SIMDMixedTwoVectorFP8<0b00, "f1cvtl">;
9540  defm F2CVTL  : SIMDMixedTwoVectorFP8<0b01, "f2cvtl">;
9541  defm BF1CVTL : SIMDMixedTwoVectorFP8<0b10, "bf1cvtl">;
9542  defm BF2CVTL : SIMDMixedTwoVectorFP8<0b11, "bf2cvtl">;
9543  defm FCVTN_F16_F8 : SIMDThreeSameSizeVectorCvt<"fcvtn">;
9544  defm FCVTN_F32_F8 : SIMDThreeVectorCvt<"fcvtn">;
9545  defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>;
9546} // End let Predicates = [HasFP8]
9547
9548let Predicates = [HasFAMINMAX] in {
9549 defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>;
9550 defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
9551} // End let Predicates = [HasFAMAXMIN]
9552
9553let Predicates = [HasFP8FMA] in {
9554 defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb">;
9555 defm FMLALTlane : SIMDThreeSameVectorMLAIndex<0b1, "fmlalt">;
9556 defm FMLALLBBlane : SIMDThreeSameVectorMLALIndex<0b0, 0b00, "fmlallbb">;
9557 defm FMLALLBTlane : SIMDThreeSameVectorMLALIndex<0b0, 0b01, "fmlallbt">;
9558 defm FMLALLTBlane : SIMDThreeSameVectorMLALIndex<0b1, 0b00, "fmlalltb">;
9559 defm FMLALLTTlane : SIMDThreeSameVectorMLALIndex<0b1, 0b01, "fmlalltt">;
9560
9561 defm FMLALB : SIMDThreeSameVectorMLA<0b0, "fmlalb">;
9562 defm FMLALT : SIMDThreeSameVectorMLA<0b1, "fmlalt">;
9563 defm FMLALLBB : SIMDThreeSameVectorMLAL<0b0, 0b00, "fmlallbb">;
9564 defm FMLALLBT : SIMDThreeSameVectorMLAL<0b0, 0b01, "fmlallbt">;
9565 defm FMLALLTB : SIMDThreeSameVectorMLAL<0b1, 0b00, "fmlalltb">;
9566 defm FMLALLTT : SIMDThreeSameVectorMLAL<0b1, 0b01, "fmlalltt">;
9567} // End let Predicates = [HasFP8FMA]
9568
9569let Predicates = [HasFP8DOT2] in {
9570 defm FDOTlane : SIMDThreeSameVectorFP8DOT2Index<"fdot">;
9571 defm FDOT : SIMDThreeSameVectorDOT2<"fdot">;
9572} // End let Predicates = [HasFP8DOT2]
9573
9574let Predicates = [HasFP8DOT4] in {
9575 defm FDOTlane : SIMDThreeSameVectorFP8DOT4Index<"fdot">;
9576 defm FDOT : SIMDThreeSameVectorDOT4<"fdot">;
9577} // End let Predicates = [HasFP8DOT4]
9578
9579//===----------------------------------------------------------------------===//
9580// Checked Pointer Arithmetic (FEAT_CPA)
9581//===----------------------------------------------------------------------===//
9582let Predicates = [HasCPA] in {
9583  // Scalar add/subtract
9584  defm ADDPT : AddSubCPA<0, "addpt">;
9585  defm SUBPT : AddSubCPA<1, "subpt">;
9586
9587  // Scalar multiply-add/subtract
9588  def MADDPT : MulAccumCPA<0, "maddpt">;
9589  def MSUBPT : MulAccumCPA<1, "msubpt">;
9590}
9591
9592include "AArch64InstrAtomics.td"
9593include "AArch64SVEInstrInfo.td"
9594include "AArch64SMEInstrInfo.td"
9595include "AArch64InstrGISel.td"
9596